aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/amd_iommu.c108
-rw-r--r--arch/x86/kernel/amd_iommu_init.c4
-rw-r--r--arch/x86/kernel/aperture_64.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/cpu/amd.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c125
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c19
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/pci-iommu_table.c18
-rw-r--r--arch/x86/kernel/ptrace.c36
-rw-r--r--arch/x86/kernel/reboot_32.S12
-rw-r--r--arch/x86/kernel/setup.c5
16 files changed, 319 insertions, 101 deletions
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d6192bcf9f09..dc5dddafe5c2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/pci-ats.h>
21#include <linux/bitmap.h> 22#include <linux/bitmap.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/debugfs.h> 24#include <linux/debugfs.h>
@@ -463,6 +464,37 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
463 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 464 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
464} 465}
465 466
467static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
468 u64 address, size_t size)
469{
470 u64 pages;
471 int s;
472
473 pages = iommu_num_pages(address, size, PAGE_SIZE);
474 s = 0;
475
476 if (pages > 1) {
477 /*
478 * If we have to flush more than one page, flush all
479 * TLB entries for this domain
480 */
481 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
482 s = 1;
483 }
484
485 address &= PAGE_MASK;
486
487 memset(cmd, 0, sizeof(*cmd));
488 cmd->data[0] = devid;
489 cmd->data[0] |= (qdep & 0xff) << 24;
490 cmd->data[1] = devid;
491 cmd->data[2] = lower_32_bits(address);
492 cmd->data[3] = upper_32_bits(address);
493 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
494 if (s)
495 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
496}
497
466static void build_inv_all(struct iommu_cmd *cmd) 498static void build_inv_all(struct iommu_cmd *cmd)
467{ 499{
468 memset(cmd, 0, sizeof(*cmd)); 500 memset(cmd, 0, sizeof(*cmd));
@@ -594,17 +626,47 @@ void iommu_flush_all_caches(struct amd_iommu *iommu)
594} 626}
595 627
596/* 628/*
629 * Command send function for flushing on-device TLB
630 */
631static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
632{
633 struct pci_dev *pdev = to_pci_dev(dev);
634 struct amd_iommu *iommu;
635 struct iommu_cmd cmd;
636 u16 devid;
637 int qdep;
638
639 qdep = pci_ats_queue_depth(pdev);
640 devid = get_device_id(dev);
641 iommu = amd_iommu_rlookup_table[devid];
642
643 build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
644
645 return iommu_queue_command(iommu, &cmd);
646}
647
648/*
597 * Command send function for invalidating a device table entry 649 * Command send function for invalidating a device table entry
598 */ 650 */
599static int device_flush_dte(struct device *dev) 651static int device_flush_dte(struct device *dev)
600{ 652{
601 struct amd_iommu *iommu; 653 struct amd_iommu *iommu;
654 struct pci_dev *pdev;
602 u16 devid; 655 u16 devid;
656 int ret;
603 657
658 pdev = to_pci_dev(dev);
604 devid = get_device_id(dev); 659 devid = get_device_id(dev);
605 iommu = amd_iommu_rlookup_table[devid]; 660 iommu = amd_iommu_rlookup_table[devid];
606 661
607 return iommu_flush_dte(iommu, devid); 662 ret = iommu_flush_dte(iommu, devid);
663 if (ret)
664 return ret;
665
666 if (pci_ats_enabled(pdev))
667 ret = device_flush_iotlb(dev, 0, ~0UL);
668
669 return ret;
608} 670}
609 671
610/* 672/*
@@ -615,6 +677,7 @@ static int device_flush_dte(struct device *dev)
615static void __domain_flush_pages(struct protection_domain *domain, 677static void __domain_flush_pages(struct protection_domain *domain,
616 u64 address, size_t size, int pde) 678 u64 address, size_t size, int pde)
617{ 679{
680 struct iommu_dev_data *dev_data;
618 struct iommu_cmd cmd; 681 struct iommu_cmd cmd;
619 int ret = 0, i; 682 int ret = 0, i;
620 683
@@ -631,6 +694,15 @@ static void __domain_flush_pages(struct protection_domain *domain,
631 ret |= iommu_queue_command(amd_iommus[i], &cmd); 694 ret |= iommu_queue_command(amd_iommus[i], &cmd);
632 } 695 }
633 696
697 list_for_each_entry(dev_data, &domain->dev_list, list) {
698 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
699
700 if (!pci_ats_enabled(pdev))
701 continue;
702
703 ret |= device_flush_iotlb(dev_data->dev, address, size);
704 }
705
634 WARN_ON(ret); 706 WARN_ON(ret);
635} 707}
636 708
@@ -1400,17 +1472,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
1400 return domain->flags & PD_DMA_OPS_MASK; 1472 return domain->flags & PD_DMA_OPS_MASK;
1401} 1473}
1402 1474
1403static void set_dte_entry(u16 devid, struct protection_domain *domain) 1475static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1404{ 1476{
1405 u64 pte_root = virt_to_phys(domain->pt_root); 1477 u64 pte_root = virt_to_phys(domain->pt_root);
1478 u32 flags = 0;
1406 1479
1407 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 1480 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1408 << DEV_ENTRY_MODE_SHIFT; 1481 << DEV_ENTRY_MODE_SHIFT;
1409 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 1482 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1410 1483
1411 amd_iommu_dev_table[devid].data[2] = domain->id; 1484 if (ats)
1412 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1485 flags |= DTE_FLAG_IOTLB;
1413 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1486
1487 amd_iommu_dev_table[devid].data[3] |= flags;
1488 amd_iommu_dev_table[devid].data[2] = domain->id;
1489 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1490 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1414} 1491}
1415 1492
1416static void clear_dte_entry(u16 devid) 1493static void clear_dte_entry(u16 devid)
@@ -1427,16 +1504,22 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
1427{ 1504{
1428 struct iommu_dev_data *dev_data; 1505 struct iommu_dev_data *dev_data;
1429 struct amd_iommu *iommu; 1506 struct amd_iommu *iommu;
1507 struct pci_dev *pdev;
1508 bool ats = false;
1430 u16 devid; 1509 u16 devid;
1431 1510
1432 devid = get_device_id(dev); 1511 devid = get_device_id(dev);
1433 iommu = amd_iommu_rlookup_table[devid]; 1512 iommu = amd_iommu_rlookup_table[devid];
1434 dev_data = get_dev_data(dev); 1513 dev_data = get_dev_data(dev);
1514 pdev = to_pci_dev(dev);
1515
1516 if (amd_iommu_iotlb_sup)
1517 ats = pci_ats_enabled(pdev);
1435 1518
1436 /* Update data structures */ 1519 /* Update data structures */
1437 dev_data->domain = domain; 1520 dev_data->domain = domain;
1438 list_add(&dev_data->list, &domain->dev_list); 1521 list_add(&dev_data->list, &domain->dev_list);
1439 set_dte_entry(devid, domain); 1522 set_dte_entry(devid, domain, ats);
1440 1523
1441 /* Do reference counting */ 1524 /* Do reference counting */
1442 domain->dev_iommu[iommu->index] += 1; 1525 domain->dev_iommu[iommu->index] += 1;
@@ -1450,11 +1533,13 @@ static void do_detach(struct device *dev)
1450{ 1533{
1451 struct iommu_dev_data *dev_data; 1534 struct iommu_dev_data *dev_data;
1452 struct amd_iommu *iommu; 1535 struct amd_iommu *iommu;
1536 struct pci_dev *pdev;
1453 u16 devid; 1537 u16 devid;
1454 1538
1455 devid = get_device_id(dev); 1539 devid = get_device_id(dev);
1456 iommu = amd_iommu_rlookup_table[devid]; 1540 iommu = amd_iommu_rlookup_table[devid];
1457 dev_data = get_dev_data(dev); 1541 dev_data = get_dev_data(dev);
1542 pdev = to_pci_dev(dev);
1458 1543
1459 /* decrease reference counters */ 1544 /* decrease reference counters */
1460 dev_data->domain->dev_iommu[iommu->index] -= 1; 1545 dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -1529,9 +1614,13 @@ out_unlock:
1529static int attach_device(struct device *dev, 1614static int attach_device(struct device *dev,
1530 struct protection_domain *domain) 1615 struct protection_domain *domain)
1531{ 1616{
1617 struct pci_dev *pdev = to_pci_dev(dev);
1532 unsigned long flags; 1618 unsigned long flags;
1533 int ret; 1619 int ret;
1534 1620
1621 if (amd_iommu_iotlb_sup)
1622 pci_enable_ats(pdev, PAGE_SHIFT);
1623
1535 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1624 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1536 ret = __attach_device(dev, domain); 1625 ret = __attach_device(dev, domain);
1537 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1626 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1588,12 +1677,16 @@ static void __detach_device(struct device *dev)
1588 */ 1677 */
1589static void detach_device(struct device *dev) 1678static void detach_device(struct device *dev)
1590{ 1679{
1680 struct pci_dev *pdev = to_pci_dev(dev);
1591 unsigned long flags; 1681 unsigned long flags;
1592 1682
1593 /* lock device table */ 1683 /* lock device table */
1594 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1684 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1595 __detach_device(dev); 1685 __detach_device(dev);
1596 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1686 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1687
1688 if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
1689 pci_disable_ats(pdev);
1597} 1690}
1598 1691
1599/* 1692/*
@@ -1743,8 +1836,9 @@ static void update_device_table(struct protection_domain *domain)
1743 struct iommu_dev_data *dev_data; 1836 struct iommu_dev_data *dev_data;
1744 1837
1745 list_for_each_entry(dev_data, &domain->dev_list, list) { 1838 list_for_each_entry(dev_data, &domain->dev_list, list) {
1839 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
1746 u16 devid = get_device_id(dev_data->dev); 1840 u16 devid = get_device_id(dev_data->dev);
1747 set_dte_entry(devid, domain); 1841 set_dte_entry(devid, domain, pci_ats_enabled(pdev));
1748 } 1842 }
1749} 1843}
1750 1844
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 047905dc3e14..28b078133688 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
137 137
138/* IOMMUs have a non-present cache? */ 138/* IOMMUs have a non-present cache? */
139bool amd_iommu_np_cache __read_mostly; 139bool amd_iommu_np_cache __read_mostly;
140bool amd_iommu_iotlb_sup __read_mostly = true;
140 141
141/* 142/*
142 * The ACPI table parsing functions set this variable on an error 143 * The ACPI table parsing functions set this variable on an error
@@ -687,6 +688,9 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
687 MMIO_GET_LD(range)); 688 MMIO_GET_LD(range));
688 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 689 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
689 690
691 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
692 amd_iommu_iotlb_sup = false;
693
690 /* read extended feature bits */ 694 /* read extended feature bits */
691 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); 695 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
692 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); 696 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 86d1ad4962a7..73fb469908c6 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -499,7 +499,7 @@ out:
499 * Don't enable translation yet but enable GART IO and CPU 499 * Don't enable translation yet but enable GART IO and CPU
500 * accesses and set DISTLBWALKPRB since GART table memory is UC. 500 * accesses and set DISTLBWALKPRB since GART table memory is UC.
501 */ 501 */
502 u32 ctl = DISTLBWALKPRB | aper_order << 1; 502 u32 ctl = aper_order << 1;
503 503
504 bus = amd_nb_bus_dev_ranges[i].bus; 504 bus = amd_nb_bus_dev_ranges[i].bus;
505 dev_base = amd_nb_bus_dev_ranges[i].dev_base; 505 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 68df09bba92e..45fd33d1fd3a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
128} 128}
129early_param("noapic", parse_noapic); 129early_param("noapic", parse_noapic);
130 130
131static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 131static int io_apic_setup_irq_pin(unsigned int irq, int node,
132 struct io_apic_irq_attr *attr); 132 struct io_apic_irq_attr *attr);
133 133
134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ 134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
135void mp_save_irq(struct mpc_intsrc *m) 135void mp_save_irq(struct mpc_intsrc *m)
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3570} 3570}
3571#endif /* CONFIG_HT_IRQ */ 3571#endif /* CONFIG_HT_IRQ */
3572 3572
3573int 3573static int
3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) 3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3575{ 3575{
3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); 3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3585 return ret; 3585 return ret;
3586} 3586}
3587 3587
3588static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 3588int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3589 struct io_apic_irq_attr *attr) 3589 struct io_apic_irq_attr *attr)
3590{ 3590{
3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin; 3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
3592 int ret; 3592 int ret;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0b4be431c620..adee12e0da1f 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
228#include <linux/kthread.h> 228#include <linux/kthread.h>
229#include <linux/jiffies.h> 229#include <linux/jiffies.h>
230#include <linux/acpi.h> 230#include <linux/acpi.h>
231#include <linux/syscore_ops.h>
231 232
232#include <asm/system.h> 233#include <asm/system.h>
233#include <asm/uaccess.h> 234#include <asm/uaccess.h>
@@ -1238,6 +1239,7 @@ static int suspend(int vetoable)
1238 1239
1239 local_irq_disable(); 1240 local_irq_disable();
1240 sysdev_suspend(PMSG_SUSPEND); 1241 sysdev_suspend(PMSG_SUSPEND);
1242 syscore_suspend();
1241 1243
1242 local_irq_enable(); 1244 local_irq_enable();
1243 1245
@@ -1255,6 +1257,7 @@ static int suspend(int vetoable)
1255 apm_error("suspend", err); 1257 apm_error("suspend", err);
1256 err = (err == APM_SUCCESS) ? 0 : -EIO; 1258 err = (err == APM_SUCCESS) ? 0 : -EIO;
1257 1259
1260 syscore_resume();
1258 sysdev_resume(); 1261 sysdev_resume();
1259 local_irq_enable(); 1262 local_irq_enable();
1260 1263
@@ -1280,6 +1283,7 @@ static void standby(void)
1280 1283
1281 local_irq_disable(); 1284 local_irq_disable();
1282 sysdev_suspend(PMSG_SUSPEND); 1285 sysdev_suspend(PMSG_SUSPEND);
1286 syscore_suspend();
1283 local_irq_enable(); 1287 local_irq_enable();
1284 1288
1285 err = set_system_power_state(APM_STATE_STANDBY); 1289 err = set_system_power_state(APM_STATE_STANDBY);
@@ -1287,6 +1291,7 @@ static void standby(void)
1287 apm_error("standby", err); 1291 apm_error("standby", err);
1288 1292
1289 local_irq_disable(); 1293 local_irq_disable();
1294 syscore_resume();
1290 sysdev_resume(); 1295 sysdev_resume();
1291 local_irq_enable(); 1296 local_irq_enable();
1292 1297
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3ecece0217ef..bb9eb29a52dd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -615,6 +615,25 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
615 /* As a rule processors have APIC timer running in deep C states */ 615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) 616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT); 617 set_cpu_cap(c, X86_FEATURE_ARAT);
618
619 /*
620 * Disable GART TLB Walk Errors on Fam10h. We do this here
621 * because this is always needed when GART is enabled, even in a
622 * kernel which has no MCE support built in.
623 */
624 if (c->x86 == 0x10) {
625 /*
626 * BIOS should disable GartTlbWlk Errors themself. If
627 * it doesn't do it here as suggested by the BKDG.
628 *
629 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
630 */
631 u64 mask;
632
633 rdmsrl(MSR_AMD64_MCx_MASK(4), mask);
634 mask |= (1 << 10);
635 wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
636 }
618} 637}
619 638
620#ifdef CONFIG_X86_32 639#ifdef CONFIG_X86_32
@@ -679,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
679 */ 698 */
680 699
681const int amd_erratum_400[] = 700const int amd_erratum_400[] =
682 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
683 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
684EXPORT_SYMBOL_GPL(amd_erratum_400); 703EXPORT_SYMBOL_GPL(amd_erratum_400);
685 704
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index eed3673a8656..e638689279d3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -586,8 +586,12 @@ static int x86_setup_perfctr(struct perf_event *event)
586 return -EOPNOTSUPP; 586 return -EOPNOTSUPP;
587 } 587 }
588 588
589 /*
590 * Do not allow config1 (extended registers) to propagate,
591 * there's no sane user-space generalization yet:
592 */
589 if (attr->type == PERF_TYPE_RAW) 593 if (attr->type == PERF_TYPE_RAW)
590 return x86_pmu_extra_regs(event->attr.config, event); 594 return 0;
591 595
592 if (attr->type == PERF_TYPE_HW_CACHE) 596 if (attr->type == PERF_TYPE_HW_CACHE)
593 return set_ext_hw_attr(hwc, event); 597 return set_ext_hw_attr(hwc, event);
@@ -609,8 +613,8 @@ static int x86_setup_perfctr(struct perf_event *event)
609 /* 613 /*
610 * Branch tracing: 614 * Branch tracing:
611 */ 615 */
612 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 616 if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
613 (hwc->sample_period == 1)) { 617 !attr->freq && hwc->sample_period == 1) {
614 /* BTS is not supported by this architecture. */ 618 /* BTS is not supported by this architecture. */
615 if (!x86_pmu.bts_active) 619 if (!x86_pmu.bts_active)
616 return -EOPNOTSUPP; 620 return -EOPNOTSUPP;
@@ -1284,6 +1288,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1284 1288
1285 cpuc = &__get_cpu_var(cpu_hw_events); 1289 cpuc = &__get_cpu_var(cpu_hw_events);
1286 1290
1291 /*
1292 * Some chipsets need to unmask the LVTPC in a particular spot
1293 * inside the nmi handler. As a result, the unmasking was pushed
1294 * into all the nmi handlers.
1295 *
1296 * This generic handler doesn't seem to have any issues where the
1297 * unmasking occurs so it was left at the top.
1298 */
1299 apic_write(APIC_LVTPC, APIC_DM_NMI);
1300
1287 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1301 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1288 if (!test_bit(idx, cpuc->active_mask)) { 1302 if (!test_bit(idx, cpuc->active_mask)) {
1289 /* 1303 /*
@@ -1370,8 +1384,6 @@ perf_event_nmi_handler(struct notifier_block *self,
1370 return NOTIFY_DONE; 1384 return NOTIFY_DONE;
1371 } 1385 }
1372 1386
1373 apic_write(APIC_LVTPC, APIC_DM_NMI);
1374
1375 handled = x86_pmu.handle_irq(args->regs); 1387 handled = x86_pmu.handle_irq(args->regs);
1376 if (!handled) 1388 if (!handled)
1377 return NOTIFY_DONE; 1389 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 461f62bbd774..cf4e369cea67 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids
8 [ C(L1D) ] = { 8 [ C(L1D) ] = {
9 [ C(OP_READ) ] = { 9 [ C(OP_READ) ] = {
10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
11 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 11 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
12 }, 12 },
13 [ C(OP_WRITE) ] = { 13 [ C(OP_WRITE) ] = {
14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ 14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
@@ -427,7 +427,9 @@ static __initconst const struct x86_pmu amd_pmu = {
427 * 427 *
428 * Exceptions: 428 * Exceptions:
429 * 429 *
430 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
430 * 0x003 FP PERF_CTL[3] 431 * 0x003 FP PERF_CTL[3]
432 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
431 * 0x00B FP PERF_CTL[3] 433 * 0x00B FP PERF_CTL[3]
432 * 0x00D FP PERF_CTL[3] 434 * 0x00D FP PERF_CTL[3]
433 * 0x023 DE PERF_CTL[2:0] 435 * 0x023 DE PERF_CTL[2:0]
@@ -448,6 +450,8 @@ static __initconst const struct x86_pmu amd_pmu = {
448 * 0x0DF LS PERF_CTL[5:0] 450 * 0x0DF LS PERF_CTL[5:0]
449 * 0x1D6 EX PERF_CTL[5:0] 451 * 0x1D6 EX PERF_CTL[5:0]
450 * 0x1D8 EX PERF_CTL[5:0] 452 * 0x1D8 EX PERF_CTL[5:0]
453 *
454 * (*) depending on the umask all FPU counters may be used
451 */ 455 */
452 456
453static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 457static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
@@ -460,18 +464,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
460static struct event_constraint * 464static struct event_constraint *
461amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) 465amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
462{ 466{
463 unsigned int event_code = amd_get_event_code(&event->hw); 467 struct hw_perf_event *hwc = &event->hw;
468 unsigned int event_code = amd_get_event_code(hwc);
464 469
465 switch (event_code & AMD_EVENT_TYPE_MASK) { 470 switch (event_code & AMD_EVENT_TYPE_MASK) {
466 case AMD_EVENT_FP: 471 case AMD_EVENT_FP:
467 switch (event_code) { 472 switch (event_code) {
473 case 0x000:
474 if (!(hwc->config & 0x0000F000ULL))
475 break;
476 if (!(hwc->config & 0x00000F00ULL))
477 break;
478 return &amd_f15_PMC3;
479 case 0x004:
480 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
481 break;
482 return &amd_f15_PMC3;
468 case 0x003: 483 case 0x003:
469 case 0x00B: 484 case 0x00B:
470 case 0x00D: 485 case 0x00D:
471 return &amd_f15_PMC3; 486 return &amd_f15_PMC3;
472 default:
473 return &amd_f15_PMC53;
474 } 487 }
488 return &amd_f15_PMC53;
475 case AMD_EVENT_LS: 489 case AMD_EVENT_LS:
476 case AMD_EVENT_DC: 490 case AMD_EVENT_DC:
477 case AMD_EVENT_EX_LS: 491 case AMD_EVENT_EX_LS:
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8fc2b2cee1da..447a28de6f09 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -25,7 +25,7 @@ struct intel_percore {
25/* 25/*
26 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
27 */ 27 */
28static const u64 intel_perfmon_event_map[] = 28static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
29{ 29{
30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
184 }, 184 },
185 }, 185 },
186 [ C(LL ) ] = { 186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = { 187 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 188 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7, 189 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 190 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb, 191 [ C(RESULT_MISS) ] = 0x01b7,
195 }, 192 },
196 [ C(OP_WRITE) ] = { 193 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ 194 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7, 195 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ 196 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb, 197 [ C(RESULT_MISS) ] = 0x01b7,
201 }, 198 },
202 [ C(OP_PREFETCH) ] = { 199 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 200 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7, 201 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 202 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb, 203 [ C(RESULT_MISS) ] = 0x01b7,
207 }, 204 },
208 }, 205 },
209 [ C(DTLB) ] = { 206 [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
285 }, 282 },
286 [ C(LL ) ] = { 283 [ C(LL ) ] = {
287 [ C(OP_READ) ] = { 284 [ C(OP_READ) ] = {
288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 285 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
289 [ C(RESULT_ACCESS) ] = 0x01b7, 286 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 287 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb, 288 [ C(RESULT_MISS) ] = 0x01b7,
292 }, 289 },
293 /* 290 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur 291 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO. 292 * on RFO.
296 */ 293 */
297 [ C(OP_WRITE) ] = { 294 [ C(OP_WRITE) ] = {
298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ 295 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
299 [ C(RESULT_ACCESS) ] = 0x01bb, 296 [ C(RESULT_ACCESS) ] = 0x01b7,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ 297 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7, 298 [ C(RESULT_MISS) ] = 0x01b7,
302 }, 299 },
303 [ C(OP_PREFETCH) ] = { 300 [ C(OP_PREFETCH) ] = {
304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 301 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
305 [ C(RESULT_ACCESS) ] = 0x01b7, 302 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 303 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb, 304 [ C(RESULT_MISS) ] = 0x01b7,
308 }, 305 },
309 }, 306 },
310 [ C(DTLB) ] = { 307 [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
352}; 349};
353 350
354/* 351/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 352 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
353 * See IA32 SDM Vol 3B 30.6.1.3
356 */ 354 */
357 355
358#define DMND_DATA_RD (1 << 0) 356#define NHM_DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1) 357#define NHM_DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3) 358#define NHM_DMND_IFETCH (1 << 2)
361#define PF_DATA_RD (1 << 4) 359#define NHM_DMND_WB (1 << 3)
362#define PF_DATA_RFO (1 << 5) 360#define NHM_PF_DATA_RD (1 << 4)
363#define RESP_UNCORE_HIT (1 << 8) 361#define NHM_PF_DATA_RFO (1 << 5)
364#define RESP_MISS (0xf600) /* non uncore hit */ 362#define NHM_PF_IFETCH (1 << 6)
363#define NHM_OFFCORE_OTHER (1 << 7)
364#define NHM_UNCORE_HIT (1 << 8)
365#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
366#define NHM_OTHER_CORE_HITM (1 << 10)
367 /* reserved */
368#define NHM_REMOTE_CACHE_FWD (1 << 12)
369#define NHM_REMOTE_DRAM (1 << 13)
370#define NHM_LOCAL_DRAM (1 << 14)
371#define NHM_NON_DRAM (1 << 15)
372
373#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
374
375#define NHM_DMND_READ (NHM_DMND_DATA_RD)
376#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
377#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
378
379#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
380#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
381#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
365 382
366static __initconst const u64 nehalem_hw_cache_extra_regs 383static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX] 384 [PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
370{ 387{
371 [ C(LL ) ] = { 388 [ C(LL ) ] = {
372 [ C(OP_READ) ] = { 389 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, 390 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, 391 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
375 }, 392 },
376 [ C(OP_WRITE) ] = { 393 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, 394 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, 395 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
379 }, 396 },
380 [ C(OP_PREFETCH) ] = { 397 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, 398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, 399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
383 }, 400 },
384 } 401 }
385}; 402};
@@ -391,12 +408,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids
391{ 408{
392 [ C(L1D) ] = { 409 [ C(L1D) ] = {
393 [ C(OP_READ) ] = { 410 [ C(OP_READ) ] = {
394 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ 411 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
395 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ 412 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
396 }, 413 },
397 [ C(OP_WRITE) ] = { 414 [ C(OP_WRITE) ] = {
398 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ 415 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
399 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ 416 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
400 }, 417 },
401 [ C(OP_PREFETCH) ] = { 418 [ C(OP_PREFETCH) ] = {
402 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 419 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
@@ -933,6 +950,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
933 950
934 cpuc = &__get_cpu_var(cpu_hw_events); 951 cpuc = &__get_cpu_var(cpu_hw_events);
935 952
953 /*
954 * Some chipsets need to unmask the LVTPC in a particular spot
955 * inside the nmi handler. As a result, the unmasking was pushed
956 * into all the nmi handlers.
957 *
958 * This handler doesn't seem to have any issues with the unmasking
959 * so it was left at the top.
960 */
961 apic_write(APIC_LVTPC, APIC_DM_NMI);
962
936 intel_pmu_disable_all(); 963 intel_pmu_disable_all();
937 handled = intel_pmu_drain_bts_buffer(); 964 handled = intel_pmu_drain_bts_buffer();
938 status = intel_pmu_get_status(); 965 status = intel_pmu_get_status();
@@ -998,6 +1025,9 @@ intel_bts_constraints(struct perf_event *event)
998 struct hw_perf_event *hwc = &event->hw; 1025 struct hw_perf_event *hwc = &event->hw;
999 unsigned int hw_event, bts_event; 1026 unsigned int hw_event, bts_event;
1000 1027
1028 if (event->attr.freq)
1029 return NULL;
1030
1001 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 1031 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1002 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 1032 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1003 1033
@@ -1305,7 +1335,7 @@ static void intel_clovertown_quirks(void)
1305 * AJ106 could possibly be worked around by not allowing LBR 1335 * AJ106 could possibly be worked around by not allowing LBR
1306 * usage from PEBS, including the fixup. 1336 * usage from PEBS, including the fixup.
1307 * AJ68 could possibly be worked around by always programming 1337 * AJ68 could possibly be worked around by always programming
1308 * a pebs_event_reset[0] value and coping with the lost events. 1338 * a pebs_event_reset[0] value and coping with the lost events.
1309 * 1339 *
1310 * But taken together it might just make sense to not enable PEBS on 1340 * But taken together it might just make sense to not enable PEBS on
1311 * these chips. 1341 * these chips.
@@ -1409,6 +1439,18 @@ static __init int intel_pmu_init(void)
1409 x86_pmu.percore_constraints = intel_nehalem_percore_constraints; 1439 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1410 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1411 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1441 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442
1443 if (ebx & 0x40) {
1444 /*
1445 * Erratum AAJ80 detected, we work it around by using
1446 * the BR_MISP_EXEC.ANY event. This will over-count
1447 * branch-misses, but it's still much better than the
1448 * architectural event which is often completely bogus:
1449 */
1450 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1451
1452 pr_cont("erratum AAJ80 worked around, ");
1453 }
1412 pr_cont("Nehalem events, "); 1454 pr_cont("Nehalem events, ");
1413 break; 1455 break;
1414 1456
@@ -1425,6 +1467,7 @@ static __init int intel_pmu_init(void)
1425 1467
1426 case 37: /* 32 nm nehalem, "Clarkdale" */ 1468 case 37: /* 32 nm nehalem, "Clarkdale" */
1427 case 44: /* 32 nm nehalem, "Gulftown" */ 1469 case 44: /* 32 nm nehalem, "Gulftown" */
1470 case 47: /* 32 nm Xeon E7 */
1428 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 1471 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1429 sizeof(hw_cache_event_ids)); 1472 sizeof(hw_cache_event_ids));
1430 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 1473 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index c2520e178d32..e93fcd55fae1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -947,14 +947,23 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
947 if (!x86_perf_event_set_period(event)) 947 if (!x86_perf_event_set_period(event))
948 continue; 948 continue;
949 if (perf_event_overflow(event, 1, &data, regs)) 949 if (perf_event_overflow(event, 1, &data, regs))
950 p4_pmu_disable_event(event); 950 x86_pmu_stop(event, 0);
951 } 951 }
952 952
953 if (handled) { 953 if (handled)
954 /* p4 quirk: unmask it again */
955 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
956 inc_irq_stat(apic_perf_irqs); 954 inc_irq_stat(apic_perf_irqs);
957 } 955
956 /*
957 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
958 * been observed that the OVF bit flag has to be cleared first _before_
959 * the LVTPC can be unmasked.
960 *
961 * The reason is the NMI line will continue to be asserted while the OVF
962 * bit is set. This causes a second NMI to generate if the LVTPC is
963 * unmasked before the OVF bit is cleared, leading to unknown NMI
964 * messages.
965 */
966 apic_write(APIC_LVTPC, APIC_DM_NMI);
958 967
959 return handled; 968 return handled;
960} 969}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 706a9fb46a58..e90f08458e6b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
391 391
392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
393 393
394 return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); 394 return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
395} 395}
396 396
397static void __init ioapic_add_ofnode(struct device_node *np) 397static void __init ioapic_add_ofnode(struct device_node *np)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 82ada01625b9..b117efd24f71 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -81,6 +81,9 @@ static u32 gart_unmapped_entry;
81#define AGPEXTERN 81#define AGPEXTERN
82#endif 82#endif
83 83
84/* GART can only remap to physical addresses < 1TB */
85#define GART_MAX_PHYS_ADDR (1ULL << 40)
86
84/* backdoor interface to AGP driver */ 87/* backdoor interface to AGP driver */
85AGPEXTERN int agp_memory_reserved; 88AGPEXTERN int agp_memory_reserved;
86AGPEXTERN __u32 *agp_gatt_table; 89AGPEXTERN __u32 *agp_gatt_table;
@@ -212,9 +215,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
212 size_t size, int dir, unsigned long align_mask) 215 size_t size, int dir, unsigned long align_mask)
213{ 216{
214 unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); 217 unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
215 unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); 218 unsigned long iommu_page;
216 int i; 219 int i;
217 220
221 if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
222 return bad_dma_addr;
223
224 iommu_page = alloc_iommu(dev, npages, align_mask);
218 if (iommu_page == -1) { 225 if (iommu_page == -1) {
219 if (!nonforced_iommu(dev, phys_mem, size)) 226 if (!nonforced_iommu(dev, phys_mem, size))
220 return phys_mem; 227 return phys_mem;
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745ec1181..35ccf75696eb 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
50 struct iommu_table_entry *finish) 50 struct iommu_table_entry *finish)
51{ 51{
52 struct iommu_table_entry *p, *q, *x; 52 struct iommu_table_entry *p, *q, *x;
53 char sym_p[KSYM_SYMBOL_LEN];
54 char sym_q[KSYM_SYMBOL_LEN];
55 53
56 /* Simple cyclic dependency checker. */ 54 /* Simple cyclic dependency checker. */
57 for (p = start; p < finish; p++) { 55 for (p = start; p < finish; p++) {
58 q = find_dependents_of(start, finish, p); 56 q = find_dependents_of(start, finish, p);
59 x = find_dependents_of(start, finish, q); 57 x = find_dependents_of(start, finish, q);
60 if (p == x) { 58 if (p == x) {
61 sprint_symbol(sym_p, (unsigned long)p->detect); 59 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
62 sprint_symbol(sym_q, (unsigned long)q->detect); 60 p->detect, q->detect);
63
64 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
65 " on %s and vice-versa. BREAKING IT.\n",
66 sym_p, sym_q);
67 /* Heavy handed way..*/ 61 /* Heavy handed way..*/
68 x->depend = 0; 62 x->depend = 0;
69 } 63 }
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
72 for (p = start; p < finish; p++) { 66 for (p = start; p < finish; p++) {
73 q = find_dependents_of(p, finish, p); 67 q = find_dependents_of(p, finish, p);
74 if (q && q > p) { 68 if (q && q > p) {
75 sprint_symbol(sym_p, (unsigned long)p->detect); 69 printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
76 sprint_symbol(sym_q, (unsigned long)q->detect); 70 p->detect, q->detect);
77
78 printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
79 "should be called before %s!\n",
80 sym_p, sym_q);
81 } 71 }
82 } 72 }
83} 73}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72a..f65e5b521dbd 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
608 unsigned len, type; 608 unsigned len, type;
609 struct perf_event *bp; 609 struct perf_event *bp;
610 610
611 if (ptrace_get_breakpoints(tsk) < 0)
612 return -ESRCH;
613
611 data &= ~DR_CONTROL_RESERVED; 614 data &= ~DR_CONTROL_RESERVED;
612 old_dr7 = ptrace_get_dr7(thread->ptrace_bps); 615 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
613restore: 616restore:
@@ -655,6 +658,9 @@ restore:
655 } 658 }
656 goto restore; 659 goto restore;
657 } 660 }
661
662 ptrace_put_breakpoints(tsk);
663
658 return ((orig_ret < 0) ? orig_ret : rc); 664 return ((orig_ret < 0) ? orig_ret : rc);
659} 665}
660 666
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
668 674
669 if (n < HBP_NUM) { 675 if (n < HBP_NUM) {
670 struct perf_event *bp; 676 struct perf_event *bp;
677
678 if (ptrace_get_breakpoints(tsk) < 0)
679 return -ESRCH;
680
671 bp = thread->ptrace_bps[n]; 681 bp = thread->ptrace_bps[n];
672 if (!bp) 682 if (!bp)
673 return 0; 683 val = 0;
674 val = bp->hw.info.address; 684 else
685 val = bp->hw.info.address;
686
687 ptrace_put_breakpoints(tsk);
675 } else if (n == 6) { 688 } else if (n == 6) {
676 val = thread->debugreg6; 689 val = thread->debugreg6;
677 } else if (n == 7) { 690 } else if (n == 7) {
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
686 struct perf_event *bp; 699 struct perf_event *bp;
687 struct thread_struct *t = &tsk->thread; 700 struct thread_struct *t = &tsk->thread;
688 struct perf_event_attr attr; 701 struct perf_event_attr attr;
702 int err = 0;
703
704 if (ptrace_get_breakpoints(tsk) < 0)
705 return -ESRCH;
689 706
690 if (!t->ptrace_bps[nr]) { 707 if (!t->ptrace_bps[nr]) {
691 ptrace_breakpoint_init(&attr); 708 ptrace_breakpoint_init(&attr);
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
709 * writing for the user. And anyway this is the previous 726 * writing for the user. And anyway this is the previous
710 * behaviour. 727 * behaviour.
711 */ 728 */
712 if (IS_ERR(bp)) 729 if (IS_ERR(bp)) {
713 return PTR_ERR(bp); 730 err = PTR_ERR(bp);
731 goto put;
732 }
714 733
715 t->ptrace_bps[nr] = bp; 734 t->ptrace_bps[nr] = bp;
716 } else { 735 } else {
717 int err;
718
719 bp = t->ptrace_bps[nr]; 736 bp = t->ptrace_bps[nr];
720 737
721 attr = bp->attr; 738 attr = bp->attr;
722 attr.bp_addr = addr; 739 attr.bp_addr = addr;
723 err = modify_user_hw_breakpoint(bp, &attr); 740 err = modify_user_hw_breakpoint(bp, &attr);
724 if (err)
725 return err;
726 } 741 }
727 742
728 743put:
729 return 0; 744 ptrace_put_breakpoints(tsk);
745 return err;
730} 746}
731 747
732/* 748/*
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d816..1d5c46df0d78 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
21 /* Get our own relocated address */ 21 /* Get our own relocated address */
22 call 1f 22 call 1f
231: popl %ebx 231: popl %ebx
24 subl $1b, %ebx 24 subl $(1b - r_base), %ebx
25 25
26 /* Compute the equivalent real-mode segment */ 26 /* Compute the equivalent real-mode segment */
27 movl %ebx, %ecx 27 movl %ebx, %ecx
28 shrl $4, %ecx 28 shrl $4, %ecx
29 29
30 /* Patch post-real-mode segment jump */ 30 /* Patch post-real-mode segment jump */
31 movw dispatch_table(%ebx,%eax,2),%ax 31 movw (dispatch_table - r_base)(%ebx,%eax,2),%ax
32 movw %ax, 101f(%ebx) 32 movw %ax, (101f - r_base)(%ebx)
33 movw %cx, 102f(%ebx) 33 movw %cx, (102f - r_base)(%ebx)
34 34
35 /* Set up the IDT for real mode. */ 35 /* Set up the IDT for real mode. */
36 lidtl machine_real_restart_idt(%ebx) 36 lidtl (machine_real_restart_idt - r_base)(%ebx)
37 37
38 /* 38 /*
39 * Set up a GDT from which we can load segment descriptors for real 39 * Set up a GDT from which we can load segment descriptors for real
40 * mode. The GDT is not used in real mode; it is just needed here to 40 * mode. The GDT is not used in real mode; it is just needed here to
41 * prepare the descriptors. 41 * prepare the descriptors.
42 */ 42 */
43 lgdtl machine_real_restart_gdt(%ebx) 43 lgdtl (machine_real_restart_gdt - r_base)(%ebx)
44 44
45 /* 45 /*
46 * Load the data segment registers with 16-bit compatible values 46 * Load the data segment registers with 16-bit compatible values
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5a0484a95ad6..4be9b398470e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -976,6 +976,11 @@ void __init setup_arch(char **cmdline_p)
976 paging_init(); 976 paging_init();
977 x86_init.paging.pagetable_setup_done(swapper_pg_dir); 977 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
978 978
979 if (boot_cpu_data.cpuid_level >= 0) {
980 /* A CPU has %cr4 if and only if it has CPUID */
981 mmu_cr4_features = read_cr4();
982 }
983
979#ifdef CONFIG_X86_32 984#ifdef CONFIG_X86_32
980 /* sync back kernel address range */ 985 /* sync back kernel address range */
981 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 986 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,