diff options
Diffstat (limited to 'arch/x86/kernel')
71 files changed, 4073 insertions, 3068 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 27ef365e757d..c2ac1b4515a0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
| 58 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
| 59 | 59 | ||
| 60 | #include <asm/proto.h> | 60 | #include <asm/proto.h> |
| 61 | #include <asm/genapic.h> | ||
| 62 | 61 | ||
| 63 | #else /* X86 */ | 62 | #else /* X86 */ |
| 64 | 63 | ||
| @@ -97,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |||
| 97 | #warning ACPI uses CMPXCHG, i486 and later hardware | 96 | #warning ACPI uses CMPXCHG, i486 and later hardware |
| 98 | #endif | 97 | #endif |
| 99 | 98 | ||
| 100 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
| 101 | |||
| 102 | /* -------------------------------------------------------------------------- | 99 | /* -------------------------------------------------------------------------- |
| 103 | Boot-time Configuration | 100 | Boot-time Configuration |
| 104 | -------------------------------------------------------------------------- */ | 101 | -------------------------------------------------------------------------- */ |
| @@ -160,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
| 160 | struct acpi_mcfg_allocation *pci_mmcfg_config; | 157 | struct acpi_mcfg_allocation *pci_mmcfg_config; |
| 161 | int pci_mmcfg_config_num; | 158 | int pci_mmcfg_config_num; |
| 162 | 159 | ||
| 160 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
| 161 | |||
| 163 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | 162 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) |
| 164 | { | 163 | { |
| 165 | if (!strcmp(mcfg->header.oem_id, "SGI")) | 164 | if (!strcmp(mcfg->header.oem_id, "SGI")) |
| @@ -253,10 +252,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) | |||
| 253 | return; | 252 | return; |
| 254 | } | 253 | } |
| 255 | 254 | ||
| 256 | #ifdef CONFIG_X86_32 | ||
| 257 | if (boot_cpu_physical_apicid != -1U) | 255 | if (boot_cpu_physical_apicid != -1U) |
| 258 | ver = apic_version[boot_cpu_physical_apicid]; | 256 | ver = apic_version[boot_cpu_physical_apicid]; |
| 259 | #endif | ||
| 260 | 257 | ||
| 261 | generic_processor_info(id, ver); | 258 | generic_processor_info(id, ver); |
| 262 | } | 259 | } |
| @@ -776,10 +773,8 @@ static void __init acpi_register_lapic_address(unsigned long address) | |||
| 776 | set_fixmap_nocache(FIX_APIC_BASE, address); | 773 | set_fixmap_nocache(FIX_APIC_BASE, address); |
| 777 | if (boot_cpu_physical_apicid == -1U) { | 774 | if (boot_cpu_physical_apicid == -1U) { |
| 778 | boot_cpu_physical_apicid = read_apic_id(); | 775 | boot_cpu_physical_apicid = read_apic_id(); |
| 779 | #ifdef CONFIG_X86_32 | ||
| 780 | apic_version[boot_cpu_physical_apicid] = | 776 | apic_version[boot_cpu_physical_apicid] = |
| 781 | GET_APIC_VERSION(apic_read(APIC_LVR)); | 777 | GET_APIC_VERSION(apic_read(APIC_LVR)); |
| 782 | #endif | ||
| 783 | } | 778 | } |
| 784 | } | 779 | } |
| 785 | 780 | ||
| @@ -1607,6 +1602,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
| 1607 | */ | 1602 | */ |
| 1608 | { | 1603 | { |
| 1609 | .callback = dmi_ignore_irq0_timer_override, | 1604 | .callback = dmi_ignore_irq0_timer_override, |
| 1605 | .ident = "HP nx6115 laptop", | ||
| 1606 | .matches = { | ||
| 1607 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
| 1608 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), | ||
| 1609 | }, | ||
| 1610 | }, | ||
| 1611 | { | ||
| 1612 | .callback = dmi_ignore_irq0_timer_override, | ||
| 1610 | .ident = "HP NX6125 laptop", | 1613 | .ident = "HP NX6125 laptop", |
| 1611 | .matches = { | 1614 | .matches = { |
| 1612 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | 1615 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), |
| @@ -1621,6 +1624,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
| 1621 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), | 1624 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), |
| 1622 | }, | 1625 | }, |
| 1623 | }, | 1626 | }, |
| 1627 | { | ||
| 1628 | .callback = dmi_ignore_irq0_timer_override, | ||
| 1629 | .ident = "HP 6715b laptop", | ||
| 1630 | .matches = { | ||
| 1631 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
| 1632 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), | ||
| 1633 | }, | ||
| 1634 | }, | ||
| 1624 | {} | 1635 | {} |
| 1625 | }; | 1636 | }; |
| 1626 | 1637 | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 69b4d060b21c..34e4d112b1ef 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -33,6 +33,10 @@ | |||
| 33 | 33 | ||
| 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
| 35 | 35 | ||
| 36 | /* A list of preallocated protection domains */ | ||
| 37 | static LIST_HEAD(iommu_pd_list); | ||
| 38 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | ||
| 39 | |||
| 36 | /* | 40 | /* |
| 37 | * general struct to manage commands send to an IOMMU | 41 | * general struct to manage commands send to an IOMMU |
| 38 | */ | 42 | */ |
| @@ -51,6 +55,102 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
| 51 | 55 | ||
| 52 | /**************************************************************************** | 56 | /**************************************************************************** |
| 53 | * | 57 | * |
| 58 | * Interrupt handling functions | ||
| 59 | * | ||
| 60 | ****************************************************************************/ | ||
| 61 | |||
| 62 | static void iommu_print_event(void *__evt) | ||
| 63 | { | ||
| 64 | u32 *event = __evt; | ||
| 65 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | ||
| 66 | int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; | ||
| 67 | int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; | ||
| 68 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | ||
| 69 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | ||
| 70 | |||
| 71 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | ||
| 72 | |||
| 73 | switch (type) { | ||
| 74 | case EVENT_TYPE_ILL_DEV: | ||
| 75 | printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " | ||
| 76 | "address=0x%016llx flags=0x%04x]\n", | ||
| 77 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 78 | address, flags); | ||
| 79 | break; | ||
| 80 | case EVENT_TYPE_IO_FAULT: | ||
| 81 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | ||
| 82 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
| 83 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 84 | domid, address, flags); | ||
| 85 | break; | ||
| 86 | case EVENT_TYPE_DEV_TAB_ERR: | ||
| 87 | printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
| 88 | "address=0x%016llx flags=0x%04x]\n", | ||
| 89 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 90 | address, flags); | ||
| 91 | break; | ||
| 92 | case EVENT_TYPE_PAGE_TAB_ERR: | ||
| 93 | printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
| 94 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
| 95 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 96 | domid, address, flags); | ||
| 97 | break; | ||
| 98 | case EVENT_TYPE_ILL_CMD: | ||
| 99 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | ||
| 100 | break; | ||
| 101 | case EVENT_TYPE_CMD_HARD_ERR: | ||
| 102 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | ||
| 103 | "flags=0x%04x]\n", address, flags); | ||
| 104 | break; | ||
| 105 | case EVENT_TYPE_IOTLB_INV_TO: | ||
| 106 | printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " | ||
| 107 | "address=0x%016llx]\n", | ||
| 108 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 109 | address); | ||
| 110 | break; | ||
| 111 | case EVENT_TYPE_INV_DEV_REQ: | ||
| 112 | printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " | ||
| 113 | "address=0x%016llx flags=0x%04x]\n", | ||
| 114 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 115 | address, flags); | ||
| 116 | break; | ||
| 117 | default: | ||
| 118 | printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static void iommu_poll_events(struct amd_iommu *iommu) | ||
| 123 | { | ||
| 124 | u32 head, tail; | ||
| 125 | unsigned long flags; | ||
| 126 | |||
| 127 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 128 | |||
| 129 | head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
| 130 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
| 131 | |||
| 132 | while (head != tail) { | ||
| 133 | iommu_print_event(iommu->evt_buf + head); | ||
| 134 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | ||
| 135 | } | ||
| 136 | |||
| 137 | writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
| 138 | |||
| 139 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 140 | } | ||
| 141 | |||
| 142 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
| 143 | { | ||
| 144 | struct amd_iommu *iommu; | ||
| 145 | |||
| 146 | list_for_each_entry(iommu, &amd_iommu_list, list) | ||
| 147 | iommu_poll_events(iommu); | ||
| 148 | |||
| 149 | return IRQ_HANDLED; | ||
| 150 | } | ||
| 151 | |||
| 152 | /**************************************************************************** | ||
| 153 | * | ||
| 54 | * IOMMU command queuing functions | 154 | * IOMMU command queuing functions |
| 55 | * | 155 | * |
| 56 | ****************************************************************************/ | 156 | ****************************************************************************/ |
| @@ -101,10 +201,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
| 101 | */ | 201 | */ |
| 102 | static int iommu_completion_wait(struct amd_iommu *iommu) | 202 | static int iommu_completion_wait(struct amd_iommu *iommu) |
| 103 | { | 203 | { |
| 104 | int ret, ready = 0; | 204 | int ret = 0, ready = 0; |
| 105 | unsigned status = 0; | 205 | unsigned status = 0; |
| 106 | struct iommu_cmd cmd; | 206 | struct iommu_cmd cmd; |
| 107 | unsigned long i = 0; | 207 | unsigned long flags, i = 0; |
| 108 | 208 | ||
| 109 | memset(&cmd, 0, sizeof(cmd)); | 209 | memset(&cmd, 0, sizeof(cmd)); |
| 110 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | 210 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; |
| @@ -112,10 +212,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
| 112 | 212 | ||
| 113 | iommu->need_sync = 0; | 213 | iommu->need_sync = 0; |
| 114 | 214 | ||
| 115 | ret = iommu_queue_command(iommu, &cmd); | 215 | spin_lock_irqsave(&iommu->lock, flags); |
| 216 | |||
| 217 | ret = __iommu_queue_command(iommu, &cmd); | ||
| 116 | 218 | ||
| 117 | if (ret) | 219 | if (ret) |
| 118 | return ret; | 220 | goto out; |
| 119 | 221 | ||
| 120 | while (!ready && (i < EXIT_LOOP_COUNT)) { | 222 | while (!ready && (i < EXIT_LOOP_COUNT)) { |
| 121 | ++i; | 223 | ++i; |
| @@ -130,6 +232,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
| 130 | 232 | ||
| 131 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) | 233 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) |
| 132 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); | 234 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); |
| 235 | out: | ||
| 236 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 133 | 237 | ||
| 134 | return 0; | 238 | return 0; |
| 135 | } | 239 | } |
| @@ -140,6 +244,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
| 140 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 244 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) |
| 141 | { | 245 | { |
| 142 | struct iommu_cmd cmd; | 246 | struct iommu_cmd cmd; |
| 247 | int ret; | ||
| 143 | 248 | ||
| 144 | BUG_ON(iommu == NULL); | 249 | BUG_ON(iommu == NULL); |
| 145 | 250 | ||
| @@ -147,9 +252,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
| 147 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 252 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
| 148 | cmd.data[0] = devid; | 253 | cmd.data[0] = devid; |
| 149 | 254 | ||
| 255 | ret = iommu_queue_command(iommu, &cmd); | ||
| 256 | |||
| 150 | iommu->need_sync = 1; | 257 | iommu->need_sync = 1; |
| 151 | 258 | ||
| 152 | return iommu_queue_command(iommu, &cmd); | 259 | return ret; |
| 153 | } | 260 | } |
| 154 | 261 | ||
| 155 | /* | 262 | /* |
| @@ -159,6 +266,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
| 159 | u64 address, u16 domid, int pde, int s) | 266 | u64 address, u16 domid, int pde, int s) |
| 160 | { | 267 | { |
| 161 | struct iommu_cmd cmd; | 268 | struct iommu_cmd cmd; |
| 269 | int ret; | ||
| 162 | 270 | ||
| 163 | memset(&cmd, 0, sizeof(cmd)); | 271 | memset(&cmd, 0, sizeof(cmd)); |
| 164 | address &= PAGE_MASK; | 272 | address &= PAGE_MASK; |
| @@ -171,9 +279,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
| 171 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | 279 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ |
| 172 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | 280 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; |
| 173 | 281 | ||
| 282 | ret = iommu_queue_command(iommu, &cmd); | ||
| 283 | |||
| 174 | iommu->need_sync = 1; | 284 | iommu->need_sync = 1; |
| 175 | 285 | ||
| 176 | return iommu_queue_command(iommu, &cmd); | 286 | return ret; |
| 177 | } | 287 | } |
| 178 | 288 | ||
| 179 | /* | 289 | /* |
| @@ -203,6 +313,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
| 203 | return 0; | 313 | return 0; |
| 204 | } | 314 | } |
| 205 | 315 | ||
| 316 | /* Flush the whole IO/TLB for a given protection domain */ | ||
| 317 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | ||
| 318 | { | ||
| 319 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
| 320 | |||
| 321 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | ||
| 322 | } | ||
| 323 | |||
| 206 | /**************************************************************************** | 324 | /**************************************************************************** |
| 207 | * | 325 | * |
| 208 | * The functions below are used the create the page table mappings for | 326 | * The functions below are used the create the page table mappings for |
| @@ -362,11 +480,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
| 362 | * efficient allocator. | 480 | * efficient allocator. |
| 363 | * | 481 | * |
| 364 | ****************************************************************************/ | 482 | ****************************************************************************/ |
| 365 | static unsigned long dma_mask_to_pages(unsigned long mask) | ||
| 366 | { | ||
| 367 | return (mask >> PAGE_SHIFT) + | ||
| 368 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); | ||
| 369 | } | ||
| 370 | 483 | ||
| 371 | /* | 484 | /* |
| 372 | * The address allocator core function. | 485 | * The address allocator core function. |
| @@ -375,25 +488,31 @@ static unsigned long dma_mask_to_pages(unsigned long mask) | |||
| 375 | */ | 488 | */ |
| 376 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 489 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
| 377 | struct dma_ops_domain *dom, | 490 | struct dma_ops_domain *dom, |
| 378 | unsigned int pages) | 491 | unsigned int pages, |
| 492 | unsigned long align_mask, | ||
| 493 | u64 dma_mask) | ||
| 379 | { | 494 | { |
| 380 | unsigned long limit = dma_mask_to_pages(*dev->dma_mask); | 495 | unsigned long limit; |
| 381 | unsigned long address; | 496 | unsigned long address; |
| 382 | unsigned long size = dom->aperture_size >> PAGE_SHIFT; | ||
| 383 | unsigned long boundary_size; | 497 | unsigned long boundary_size; |
| 384 | 498 | ||
| 385 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 499 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, |
| 386 | PAGE_SIZE) >> PAGE_SHIFT; | 500 | PAGE_SIZE) >> PAGE_SHIFT; |
| 387 | limit = limit < size ? limit : size; | 501 | limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, |
| 502 | dma_mask >> PAGE_SHIFT); | ||
| 388 | 503 | ||
| 389 | if (dom->next_bit >= limit) | 504 | if (dom->next_bit >= limit) { |
| 390 | dom->next_bit = 0; | 505 | dom->next_bit = 0; |
| 506 | dom->need_flush = true; | ||
| 507 | } | ||
| 391 | 508 | ||
| 392 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, | 509 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, |
| 393 | 0 , boundary_size, 0); | 510 | 0 , boundary_size, align_mask); |
| 394 | if (address == -1) | 511 | if (address == -1) { |
| 395 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, | 512 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, |
| 396 | 0, boundary_size, 0); | 513 | 0, boundary_size, align_mask); |
| 514 | dom->need_flush = true; | ||
| 515 | } | ||
| 397 | 516 | ||
| 398 | if (likely(address != -1)) { | 517 | if (likely(address != -1)) { |
| 399 | dom->next_bit = address + pages; | 518 | dom->next_bit = address + pages; |
| @@ -459,7 +578,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
| 459 | if (start_page + pages > last_page) | 578 | if (start_page + pages > last_page) |
| 460 | pages = last_page - start_page; | 579 | pages = last_page - start_page; |
| 461 | 580 | ||
| 462 | set_bit_string(dom->bitmap, start_page, pages); | 581 | iommu_area_reserve(dom->bitmap, start_page, pages); |
| 463 | } | 582 | } |
| 464 | 583 | ||
| 465 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | 584 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) |
| @@ -553,6 +672,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
| 553 | dma_dom->bitmap[0] = 1; | 672 | dma_dom->bitmap[0] = 1; |
| 554 | dma_dom->next_bit = 0; | 673 | dma_dom->next_bit = 0; |
| 555 | 674 | ||
| 675 | dma_dom->need_flush = false; | ||
| 676 | dma_dom->target_dev = 0xffff; | ||
| 677 | |||
| 556 | /* Intialize the exclusion range if necessary */ | 678 | /* Intialize the exclusion range if necessary */ |
| 557 | if (iommu->exclusion_start && | 679 | if (iommu->exclusion_start && |
| 558 | iommu->exclusion_start < dma_dom->aperture_size) { | 680 | iommu->exclusion_start < dma_dom->aperture_size) { |
| @@ -623,12 +745,13 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
| 623 | 745 | ||
| 624 | u64 pte_root = virt_to_phys(domain->pt_root); | 746 | u64 pte_root = virt_to_phys(domain->pt_root); |
| 625 | 747 | ||
| 626 | pte_root |= (domain->mode & 0x07) << 9; | 748 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
| 627 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; | 749 | << DEV_ENTRY_MODE_SHIFT; |
| 750 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
| 628 | 751 | ||
| 629 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 752 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
| 630 | amd_iommu_dev_table[devid].data[0] = pte_root; | 753 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
| 631 | amd_iommu_dev_table[devid].data[1] = pte_root >> 32; | 754 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
| 632 | amd_iommu_dev_table[devid].data[2] = domain->id; | 755 | amd_iommu_dev_table[devid].data[2] = domain->id; |
| 633 | 756 | ||
| 634 | amd_iommu_pd_table[devid] = domain; | 757 | amd_iommu_pd_table[devid] = domain; |
| @@ -646,6 +769,45 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
| 646 | *****************************************************************************/ | 769 | *****************************************************************************/ |
| 647 | 770 | ||
| 648 | /* | 771 | /* |
| 772 | * This function checks if the driver got a valid device from the caller to | ||
| 773 | * avoid dereferencing invalid pointers. | ||
| 774 | */ | ||
| 775 | static bool check_device(struct device *dev) | ||
| 776 | { | ||
| 777 | if (!dev || !dev->dma_mask) | ||
| 778 | return false; | ||
| 779 | |||
| 780 | return true; | ||
| 781 | } | ||
| 782 | |||
| 783 | /* | ||
| 784 | * In this function the list of preallocated protection domains is traversed to | ||
| 785 | * find the domain for a specific device | ||
| 786 | */ | ||
| 787 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
| 788 | { | ||
| 789 | struct dma_ops_domain *entry, *ret = NULL; | ||
| 790 | unsigned long flags; | ||
| 791 | |||
| 792 | if (list_empty(&iommu_pd_list)) | ||
| 793 | return NULL; | ||
| 794 | |||
| 795 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
| 796 | |||
| 797 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
| 798 | if (entry->target_dev == devid) { | ||
| 799 | ret = entry; | ||
| 800 | list_del(&ret->list); | ||
| 801 | break; | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
| 806 | |||
| 807 | return ret; | ||
| 808 | } | ||
| 809 | |||
| 810 | /* | ||
| 649 | * In the dma_ops path we only have the struct device. This function | 811 | * In the dma_ops path we only have the struct device. This function |
| 650 | * finds the corresponding IOMMU, the protection domain and the | 812 | * finds the corresponding IOMMU, the protection domain and the |
| 651 | * requestor id for a given device. | 813 | * requestor id for a given device. |
| @@ -661,27 +823,30 @@ static int get_device_resources(struct device *dev, | |||
| 661 | struct pci_dev *pcidev; | 823 | struct pci_dev *pcidev; |
| 662 | u16 _bdf; | 824 | u16 _bdf; |
| 663 | 825 | ||
| 664 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); | 826 | *iommu = NULL; |
| 827 | *domain = NULL; | ||
| 828 | *bdf = 0xffff; | ||
| 829 | |||
| 830 | if (dev->bus != &pci_bus_type) | ||
| 831 | return 0; | ||
| 665 | 832 | ||
| 666 | pcidev = to_pci_dev(dev); | 833 | pcidev = to_pci_dev(dev); |
| 667 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | 834 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
| 668 | 835 | ||
| 669 | /* device not translated by any IOMMU in the system? */ | 836 | /* device not translated by any IOMMU in the system? */ |
| 670 | if (_bdf > amd_iommu_last_bdf) { | 837 | if (_bdf > amd_iommu_last_bdf) |
| 671 | *iommu = NULL; | ||
| 672 | *domain = NULL; | ||
| 673 | *bdf = 0xffff; | ||
| 674 | return 0; | 838 | return 0; |
| 675 | } | ||
| 676 | 839 | ||
| 677 | *bdf = amd_iommu_alias_table[_bdf]; | 840 | *bdf = amd_iommu_alias_table[_bdf]; |
| 678 | 841 | ||
| 679 | *iommu = amd_iommu_rlookup_table[*bdf]; | 842 | *iommu = amd_iommu_rlookup_table[*bdf]; |
| 680 | if (*iommu == NULL) | 843 | if (*iommu == NULL) |
| 681 | return 0; | 844 | return 0; |
| 682 | dma_dom = (*iommu)->default_dom; | ||
| 683 | *domain = domain_for_device(*bdf); | 845 | *domain = domain_for_device(*bdf); |
| 684 | if (*domain == NULL) { | 846 | if (*domain == NULL) { |
| 847 | dma_dom = find_protection_domain(*bdf); | ||
| 848 | if (!dma_dom) | ||
| 849 | dma_dom = (*iommu)->default_dom; | ||
| 685 | *domain = &dma_dom->domain; | 850 | *domain = &dma_dom->domain; |
| 686 | set_device_domain(*iommu, *domain, *bdf); | 851 | set_device_domain(*iommu, *domain, *bdf); |
| 687 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 852 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " |
| @@ -760,17 +925,24 @@ static dma_addr_t __map_single(struct device *dev, | |||
| 760 | struct dma_ops_domain *dma_dom, | 925 | struct dma_ops_domain *dma_dom, |
| 761 | phys_addr_t paddr, | 926 | phys_addr_t paddr, |
| 762 | size_t size, | 927 | size_t size, |
| 763 | int dir) | 928 | int dir, |
| 929 | bool align, | ||
| 930 | u64 dma_mask) | ||
| 764 | { | 931 | { |
| 765 | dma_addr_t offset = paddr & ~PAGE_MASK; | 932 | dma_addr_t offset = paddr & ~PAGE_MASK; |
| 766 | dma_addr_t address, start; | 933 | dma_addr_t address, start; |
| 767 | unsigned int pages; | 934 | unsigned int pages; |
| 935 | unsigned long align_mask = 0; | ||
| 768 | int i; | 936 | int i; |
| 769 | 937 | ||
| 770 | pages = iommu_num_pages(paddr, size); | 938 | pages = iommu_num_pages(paddr, size); |
| 771 | paddr &= PAGE_MASK; | 939 | paddr &= PAGE_MASK; |
| 772 | 940 | ||
| 773 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 941 | if (align) |
| 942 | align_mask = (1UL << get_order(size)) - 1; | ||
| 943 | |||
| 944 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | ||
| 945 | dma_mask); | ||
| 774 | if (unlikely(address == bad_dma_address)) | 946 | if (unlikely(address == bad_dma_address)) |
| 775 | goto out; | 947 | goto out; |
| 776 | 948 | ||
| @@ -782,6 +954,12 @@ static dma_addr_t __map_single(struct device *dev, | |||
| 782 | } | 954 | } |
| 783 | address += offset; | 955 | address += offset; |
| 784 | 956 | ||
| 957 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | ||
| 958 | iommu_flush_tlb(iommu, dma_dom->domain.id); | ||
| 959 | dma_dom->need_flush = false; | ||
| 960 | } else if (unlikely(iommu_has_npcache(iommu))) | ||
| 961 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | ||
| 962 | |||
| 785 | out: | 963 | out: |
| 786 | return address; | 964 | return address; |
| 787 | } | 965 | } |
| @@ -812,6 +990,9 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
| 812 | } | 990 | } |
| 813 | 991 | ||
| 814 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 992 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
| 993 | |||
| 994 | if (amd_iommu_unmap_flush) | ||
| 995 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | ||
| 815 | } | 996 | } |
| 816 | 997 | ||
| 817 | /* | 998 | /* |
| @@ -825,6 +1006,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
| 825 | struct protection_domain *domain; | 1006 | struct protection_domain *domain; |
| 826 | u16 devid; | 1007 | u16 devid; |
| 827 | dma_addr_t addr; | 1008 | dma_addr_t addr; |
| 1009 | u64 dma_mask; | ||
| 1010 | |||
| 1011 | if (!check_device(dev)) | ||
| 1012 | return bad_dma_address; | ||
| 1013 | |||
| 1014 | dma_mask = *dev->dma_mask; | ||
| 828 | 1015 | ||
| 829 | get_device_resources(dev, &iommu, &domain, &devid); | 1016 | get_device_resources(dev, &iommu, &domain, &devid); |
| 830 | 1017 | ||
| @@ -833,14 +1020,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
| 833 | return (dma_addr_t)paddr; | 1020 | return (dma_addr_t)paddr; |
| 834 | 1021 | ||
| 835 | spin_lock_irqsave(&domain->lock, flags); | 1022 | spin_lock_irqsave(&domain->lock, flags); |
| 836 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); | 1023 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, |
| 1024 | dma_mask); | ||
| 837 | if (addr == bad_dma_address) | 1025 | if (addr == bad_dma_address) |
| 838 | goto out; | 1026 | goto out; |
| 839 | 1027 | ||
| 840 | if (iommu_has_npcache(iommu)) | 1028 | if (unlikely(iommu->need_sync)) |
| 841 | iommu_flush_pages(iommu, domain->id, addr, size); | ||
| 842 | |||
| 843 | if (iommu->need_sync) | ||
| 844 | iommu_completion_wait(iommu); | 1029 | iommu_completion_wait(iommu); |
| 845 | 1030 | ||
| 846 | out: | 1031 | out: |
| @@ -860,7 +1045,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 860 | struct protection_domain *domain; | 1045 | struct protection_domain *domain; |
| 861 | u16 devid; | 1046 | u16 devid; |
| 862 | 1047 | ||
| 863 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1048 | if (!check_device(dev) || |
| 1049 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 864 | /* device not handled by any AMD IOMMU */ | 1050 | /* device not handled by any AMD IOMMU */ |
| 865 | return; | 1051 | return; |
| 866 | 1052 | ||
| @@ -868,9 +1054,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 868 | 1054 | ||
| 869 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1055 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); |
| 870 | 1056 | ||
| 871 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | 1057 | if (unlikely(iommu->need_sync)) |
| 872 | |||
| 873 | if (iommu->need_sync) | ||
| 874 | iommu_completion_wait(iommu); | 1058 | iommu_completion_wait(iommu); |
| 875 | 1059 | ||
| 876 | spin_unlock_irqrestore(&domain->lock, flags); | 1060 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -909,6 +1093,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
| 909 | struct scatterlist *s; | 1093 | struct scatterlist *s; |
| 910 | phys_addr_t paddr; | 1094 | phys_addr_t paddr; |
| 911 | int mapped_elems = 0; | 1095 | int mapped_elems = 0; |
| 1096 | u64 dma_mask; | ||
| 1097 | |||
| 1098 | if (!check_device(dev)) | ||
| 1099 | return 0; | ||
| 1100 | |||
| 1101 | dma_mask = *dev->dma_mask; | ||
| 912 | 1102 | ||
| 913 | get_device_resources(dev, &iommu, &domain, &devid); | 1103 | get_device_resources(dev, &iommu, &domain, &devid); |
| 914 | 1104 | ||
| @@ -921,19 +1111,17 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
| 921 | paddr = sg_phys(s); | 1111 | paddr = sg_phys(s); |
| 922 | 1112 | ||
| 923 | s->dma_address = __map_single(dev, iommu, domain->priv, | 1113 | s->dma_address = __map_single(dev, iommu, domain->priv, |
| 924 | paddr, s->length, dir); | 1114 | paddr, s->length, dir, false, |
| 1115 | dma_mask); | ||
| 925 | 1116 | ||
| 926 | if (s->dma_address) { | 1117 | if (s->dma_address) { |
| 927 | s->dma_length = s->length; | 1118 | s->dma_length = s->length; |
| 928 | mapped_elems++; | 1119 | mapped_elems++; |
| 929 | } else | 1120 | } else |
| 930 | goto unmap; | 1121 | goto unmap; |
| 931 | if (iommu_has_npcache(iommu)) | ||
| 932 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
| 933 | s->dma_length); | ||
| 934 | } | 1122 | } |
| 935 | 1123 | ||
| 936 | if (iommu->need_sync) | 1124 | if (unlikely(iommu->need_sync)) |
| 937 | iommu_completion_wait(iommu); | 1125 | iommu_completion_wait(iommu); |
| 938 | 1126 | ||
| 939 | out: | 1127 | out: |
| @@ -967,7 +1155,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
| 967 | u16 devid; | 1155 | u16 devid; |
| 968 | int i; | 1156 | int i; |
| 969 | 1157 | ||
| 970 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1158 | if (!check_device(dev) || |
| 1159 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 971 | return; | 1160 | return; |
| 972 | 1161 | ||
| 973 | spin_lock_irqsave(&domain->lock, flags); | 1162 | spin_lock_irqsave(&domain->lock, flags); |
| @@ -975,12 +1164,10 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
| 975 | for_each_sg(sglist, s, nelems, i) { | 1164 | for_each_sg(sglist, s, nelems, i) { |
| 976 | __unmap_single(iommu, domain->priv, s->dma_address, | 1165 | __unmap_single(iommu, domain->priv, s->dma_address, |
| 977 | s->dma_length, dir); | 1166 | s->dma_length, dir); |
| 978 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
| 979 | s->dma_length); | ||
| 980 | s->dma_address = s->dma_length = 0; | 1167 | s->dma_address = s->dma_length = 0; |
| 981 | } | 1168 | } |
| 982 | 1169 | ||
| 983 | if (iommu->need_sync) | 1170 | if (unlikely(iommu->need_sync)) |
| 984 | iommu_completion_wait(iommu); | 1171 | iommu_completion_wait(iommu); |
| 985 | 1172 | ||
| 986 | spin_unlock_irqrestore(&domain->lock, flags); | 1173 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -998,25 +1185,33 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
| 998 | struct protection_domain *domain; | 1185 | struct protection_domain *domain; |
| 999 | u16 devid; | 1186 | u16 devid; |
| 1000 | phys_addr_t paddr; | 1187 | phys_addr_t paddr; |
| 1188 | u64 dma_mask = dev->coherent_dma_mask; | ||
| 1189 | |||
| 1190 | if (!check_device(dev)) | ||
| 1191 | return NULL; | ||
| 1001 | 1192 | ||
| 1193 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 1194 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 1195 | |||
| 1196 | flag |= __GFP_ZERO; | ||
| 1002 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 1197 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
| 1003 | if (!virt_addr) | 1198 | if (!virt_addr) |
| 1004 | return 0; | 1199 | return 0; |
| 1005 | 1200 | ||
| 1006 | memset(virt_addr, 0, size); | ||
| 1007 | paddr = virt_to_phys(virt_addr); | 1201 | paddr = virt_to_phys(virt_addr); |
| 1008 | 1202 | ||
| 1009 | get_device_resources(dev, &iommu, &domain, &devid); | ||
| 1010 | |||
| 1011 | if (!iommu || !domain) { | 1203 | if (!iommu || !domain) { |
| 1012 | *dma_addr = (dma_addr_t)paddr; | 1204 | *dma_addr = (dma_addr_t)paddr; |
| 1013 | return virt_addr; | 1205 | return virt_addr; |
| 1014 | } | 1206 | } |
| 1015 | 1207 | ||
| 1208 | if (!dma_mask) | ||
| 1209 | dma_mask = *dev->dma_mask; | ||
| 1210 | |||
| 1016 | spin_lock_irqsave(&domain->lock, flags); | 1211 | spin_lock_irqsave(&domain->lock, flags); |
| 1017 | 1212 | ||
| 1018 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1213 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
| 1019 | size, DMA_BIDIRECTIONAL); | 1214 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
| 1020 | 1215 | ||
| 1021 | if (*dma_addr == bad_dma_address) { | 1216 | if (*dma_addr == bad_dma_address) { |
| 1022 | free_pages((unsigned long)virt_addr, get_order(size)); | 1217 | free_pages((unsigned long)virt_addr, get_order(size)); |
| @@ -1024,10 +1219,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
| 1024 | goto out; | 1219 | goto out; |
| 1025 | } | 1220 | } |
| 1026 | 1221 | ||
| 1027 | if (iommu_has_npcache(iommu)) | 1222 | if (unlikely(iommu->need_sync)) |
| 1028 | iommu_flush_pages(iommu, domain->id, *dma_addr, size); | ||
| 1029 | |||
| 1030 | if (iommu->need_sync) | ||
| 1031 | iommu_completion_wait(iommu); | 1223 | iommu_completion_wait(iommu); |
| 1032 | 1224 | ||
| 1033 | out: | 1225 | out: |
| @@ -1038,8 +1230,6 @@ out: | |||
| 1038 | 1230 | ||
| 1039 | /* | 1231 | /* |
| 1040 | * The exported free_coherent function for dma_ops. | 1232 | * The exported free_coherent function for dma_ops. |
| 1041 | * FIXME: fix the generic x86 DMA layer so that it actually calls that | ||
| 1042 | * function. | ||
| 1043 | */ | 1233 | */ |
| 1044 | static void free_coherent(struct device *dev, size_t size, | 1234 | static void free_coherent(struct device *dev, size_t size, |
| 1045 | void *virt_addr, dma_addr_t dma_addr) | 1235 | void *virt_addr, dma_addr_t dma_addr) |
| @@ -1049,6 +1239,9 @@ static void free_coherent(struct device *dev, size_t size, | |||
| 1049 | struct protection_domain *domain; | 1239 | struct protection_domain *domain; |
| 1050 | u16 devid; | 1240 | u16 devid; |
| 1051 | 1241 | ||
| 1242 | if (!check_device(dev)) | ||
| 1243 | return; | ||
| 1244 | |||
| 1052 | get_device_resources(dev, &iommu, &domain, &devid); | 1245 | get_device_resources(dev, &iommu, &domain, &devid); |
| 1053 | 1246 | ||
| 1054 | if (!iommu || !domain) | 1247 | if (!iommu || !domain) |
| @@ -1057,9 +1250,8 @@ static void free_coherent(struct device *dev, size_t size, | |||
| 1057 | spin_lock_irqsave(&domain->lock, flags); | 1250 | spin_lock_irqsave(&domain->lock, flags); |
| 1058 | 1251 | ||
| 1059 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 1252 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
| 1060 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | ||
| 1061 | 1253 | ||
| 1062 | if (iommu->need_sync) | 1254 | if (unlikely(iommu->need_sync)) |
| 1063 | iommu_completion_wait(iommu); | 1255 | iommu_completion_wait(iommu); |
| 1064 | 1256 | ||
| 1065 | spin_unlock_irqrestore(&domain->lock, flags); | 1257 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -1069,6 +1261,30 @@ free_mem: | |||
| 1069 | } | 1261 | } |
| 1070 | 1262 | ||
| 1071 | /* | 1263 | /* |
| 1264 | * This function is called by the DMA layer to find out if we can handle a | ||
| 1265 | * particular device. It is part of the dma_ops. | ||
| 1266 | */ | ||
| 1267 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | ||
| 1268 | { | ||
| 1269 | u16 bdf; | ||
| 1270 | struct pci_dev *pcidev; | ||
| 1271 | |||
| 1272 | /* No device or no PCI device */ | ||
| 1273 | if (!dev || dev->bus != &pci_bus_type) | ||
| 1274 | return 0; | ||
| 1275 | |||
| 1276 | pcidev = to_pci_dev(dev); | ||
| 1277 | |||
| 1278 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
| 1279 | |||
| 1280 | /* Out of our scope? */ | ||
| 1281 | if (bdf > amd_iommu_last_bdf) | ||
| 1282 | return 0; | ||
| 1283 | |||
| 1284 | return 1; | ||
| 1285 | } | ||
| 1286 | |||
| 1287 | /* | ||
| 1072 | * The function for pre-allocating protection domains. | 1288 | * The function for pre-allocating protection domains. |
| 1073 | * | 1289 | * |
| 1074 | * If the driver core informs the DMA layer if a driver grabs a device | 1290 | * If the driver core informs the DMA layer if a driver grabs a device |
| @@ -1097,10 +1313,9 @@ void prealloc_protection_domains(void) | |||
| 1097 | if (!dma_dom) | 1313 | if (!dma_dom) |
| 1098 | continue; | 1314 | continue; |
| 1099 | init_unity_mappings_for_device(dma_dom, devid); | 1315 | init_unity_mappings_for_device(dma_dom, devid); |
| 1100 | set_device_domain(iommu, &dma_dom->domain, devid); | 1316 | dma_dom->target_dev = devid; |
| 1101 | printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", | 1317 | |
| 1102 | dma_dom->domain.id); | 1318 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
| 1103 | print_devid(devid, 1); | ||
| 1104 | } | 1319 | } |
| 1105 | } | 1320 | } |
| 1106 | 1321 | ||
| @@ -1111,6 +1326,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { | |||
| 1111 | .unmap_single = unmap_single, | 1326 | .unmap_single = unmap_single, |
| 1112 | .map_sg = map_sg, | 1327 | .map_sg = map_sg, |
| 1113 | .unmap_sg = unmap_sg, | 1328 | .unmap_sg = unmap_sg, |
| 1329 | .dma_supported = amd_iommu_dma_supported, | ||
| 1114 | }; | 1330 | }; |
| 1115 | 1331 | ||
| 1116 | /* | 1332 | /* |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a69cc0f52042..148fcfe22f17 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include <linux/gfp.h> | 22 | #include <linux/gfp.h> |
| 23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
| 24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
| 25 | #include <linux/interrupt.h> | ||
| 26 | #include <linux/msi.h> | ||
| 25 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
| 26 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
| 27 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
| @@ -30,7 +32,6 @@ | |||
| 30 | /* | 32 | /* |
| 31 | * definitions for the ACPI scanning code | 33 | * definitions for the ACPI scanning code |
| 32 | */ | 34 | */ |
| 33 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | ||
| 34 | #define IVRS_HEADER_LENGTH 48 | 35 | #define IVRS_HEADER_LENGTH 48 |
| 35 | 36 | ||
| 36 | #define ACPI_IVHD_TYPE 0x10 | 37 | #define ACPI_IVHD_TYPE 0x10 |
| @@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | |||
| 121 | we find in ACPI */ | 122 | we find in ACPI */ |
| 122 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | 123 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ |
| 123 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | 124 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ |
| 125 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | ||
| 124 | 126 | ||
| 125 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 127 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
| 126 | system */ | 128 | system */ |
| @@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
| 234 | { | 236 | { |
| 235 | u32 ctrl; | 237 | u32 ctrl; |
| 236 | 238 | ||
| 237 | ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); | 239 | ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); |
| 238 | ctrl &= ~(1 << bit); | 240 | ctrl &= ~(1 << bit); |
| 239 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 241 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
| 240 | } | 242 | } |
| @@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
| 242 | /* Function to enable the hardware */ | 244 | /* Function to enable the hardware */ |
| 243 | void __init iommu_enable(struct amd_iommu *iommu) | 245 | void __init iommu_enable(struct amd_iommu *iommu) |
| 244 | { | 246 | { |
| 245 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); | 247 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " |
| 246 | print_devid(iommu->devid, 0); | 248 | "at %02x:%02x.%x cap 0x%hx\n", |
| 247 | printk(" cap 0x%hx\n", iommu->cap_ptr); | 249 | iommu->dev->bus->number, |
| 250 | PCI_SLOT(iommu->dev->devfn), | ||
| 251 | PCI_FUNC(iommu->dev->devfn), | ||
| 252 | iommu->cap_ptr); | ||
| 248 | 253 | ||
| 249 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 254 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
| 250 | } | 255 | } |
| 251 | 256 | ||
| 257 | /* Function to enable IOMMU event logging and event interrupts */ | ||
| 258 | void __init iommu_enable_event_logging(struct amd_iommu *iommu) | ||
| 259 | { | ||
| 260 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | ||
| 261 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | ||
| 262 | } | ||
| 263 | |||
| 252 | /* | 264 | /* |
| 253 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | 265 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in |
| 254 | * the system has one. | 266 | * the system has one. |
| @@ -286,6 +298,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | |||
| 286 | ****************************************************************************/ | 298 | ****************************************************************************/ |
| 287 | 299 | ||
| 288 | /* | 300 | /* |
| 301 | * This function calculates the length of a given IVHD entry | ||
| 302 | */ | ||
| 303 | static inline int ivhd_entry_length(u8 *ivhd) | ||
| 304 | { | ||
| 305 | return 0x04 << (*ivhd >> 6); | ||
| 306 | } | ||
| 307 | |||
| 308 | /* | ||
| 289 | * This function reads the last device id the IOMMU has to handle from the PCI | 309 | * This function reads the last device id the IOMMU has to handle from the PCI |
| 290 | * capability header for this IOMMU | 310 | * capability header for this IOMMU |
| 291 | */ | 311 | */ |
| @@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
| 329 | default: | 349 | default: |
| 330 | break; | 350 | break; |
| 331 | } | 351 | } |
| 332 | p += 0x04 << (*p >> 6); | 352 | p += ivhd_entry_length(p); |
| 333 | } | 353 | } |
| 334 | 354 | ||
| 335 | WARN_ON(p != end); | 355 | WARN_ON(p != end); |
| @@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
| 414 | 434 | ||
| 415 | static void __init free_command_buffer(struct amd_iommu *iommu) | 435 | static void __init free_command_buffer(struct amd_iommu *iommu) |
| 416 | { | 436 | { |
| 417 | free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); | 437 | free_pages((unsigned long)iommu->cmd_buf, |
| 438 | get_order(iommu->cmd_buf_size)); | ||
| 439 | } | ||
| 440 | |||
| 441 | /* allocates the memory where the IOMMU will log its events to */ | ||
| 442 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | ||
| 443 | { | ||
| 444 | u64 entry; | ||
| 445 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
| 446 | get_order(EVT_BUFFER_SIZE)); | ||
| 447 | |||
| 448 | if (iommu->evt_buf == NULL) | ||
| 449 | return NULL; | ||
| 450 | |||
| 451 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | ||
| 452 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | ||
| 453 | &entry, sizeof(entry)); | ||
| 454 | |||
| 455 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | ||
| 456 | |||
| 457 | return iommu->evt_buf; | ||
| 458 | } | ||
| 459 | |||
| 460 | static void __init free_event_buffer(struct amd_iommu *iommu) | ||
| 461 | { | ||
| 462 | free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); | ||
| 418 | } | 463 | } |
| 419 | 464 | ||
| 420 | /* sets a specific bit in the device table entry. */ | 465 | /* sets a specific bit in the device table entry. */ |
| @@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
| 487 | */ | 532 | */ |
| 488 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 533 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
| 489 | { | 534 | { |
| 490 | int bus = PCI_BUS(iommu->devid); | ||
| 491 | int dev = PCI_SLOT(iommu->devid); | ||
| 492 | int fn = PCI_FUNC(iommu->devid); | ||
| 493 | int cap_ptr = iommu->cap_ptr; | 535 | int cap_ptr = iommu->cap_ptr; |
| 494 | u32 range; | 536 | u32 range, misc; |
| 495 | 537 | ||
| 496 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); | 538 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
| 539 | &iommu->cap); | ||
| 540 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, | ||
| 541 | &range); | ||
| 542 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, | ||
| 543 | &misc); | ||
| 497 | 544 | ||
| 498 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | ||
| 499 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), | 545 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), |
| 500 | MMIO_GET_FD(range)); | 546 | MMIO_GET_FD(range)); |
| 501 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | 547 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), |
| 502 | MMIO_GET_LD(range)); | 548 | MMIO_GET_LD(range)); |
| 549 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | ||
| 503 | } | 550 | } |
| 504 | 551 | ||
| 505 | /* | 552 | /* |
| @@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 604 | break; | 651 | break; |
| 605 | } | 652 | } |
| 606 | 653 | ||
| 607 | p += 0x04 << (e->type >> 6); | 654 | p += ivhd_entry_length(p); |
| 608 | } | 655 | } |
| 609 | } | 656 | } |
| 610 | 657 | ||
| @@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) | |||
| 622 | static void __init free_iommu_one(struct amd_iommu *iommu) | 669 | static void __init free_iommu_one(struct amd_iommu *iommu) |
| 623 | { | 670 | { |
| 624 | free_command_buffer(iommu); | 671 | free_command_buffer(iommu); |
| 672 | free_event_buffer(iommu); | ||
| 625 | iommu_unmap_mmio_space(iommu); | 673 | iommu_unmap_mmio_space(iommu); |
| 626 | } | 674 | } |
| 627 | 675 | ||
| @@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
| 649 | /* | 697 | /* |
| 650 | * Copy data from ACPI table entry to the iommu struct | 698 | * Copy data from ACPI table entry to the iommu struct |
| 651 | */ | 699 | */ |
| 652 | iommu->devid = h->devid; | 700 | iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); |
| 701 | if (!iommu->dev) | ||
| 702 | return 1; | ||
| 703 | |||
| 653 | iommu->cap_ptr = h->cap_ptr; | 704 | iommu->cap_ptr = h->cap_ptr; |
| 705 | iommu->pci_seg = h->pci_seg; | ||
| 654 | iommu->mmio_phys = h->mmio_phys; | 706 | iommu->mmio_phys = h->mmio_phys; |
| 655 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); | 707 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); |
| 656 | if (!iommu->mmio_base) | 708 | if (!iommu->mmio_base) |
| @@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
| 661 | if (!iommu->cmd_buf) | 713 | if (!iommu->cmd_buf) |
| 662 | return -ENOMEM; | 714 | return -ENOMEM; |
| 663 | 715 | ||
| 716 | iommu->evt_buf = alloc_event_buffer(iommu); | ||
| 717 | if (!iommu->evt_buf) | ||
| 718 | return -ENOMEM; | ||
| 719 | |||
| 720 | iommu->int_enabled = false; | ||
| 721 | |||
| 664 | init_iommu_from_pci(iommu); | 722 | init_iommu_from_pci(iommu); |
| 665 | init_iommu_from_acpi(iommu, h); | 723 | init_iommu_from_acpi(iommu, h); |
| 666 | init_iommu_devices(iommu); | 724 | init_iommu_devices(iommu); |
| 667 | 725 | ||
| 726 | pci_enable_device(iommu->dev); | ||
| 727 | |||
| 668 | return 0; | 728 | return 0; |
| 669 | } | 729 | } |
| 670 | 730 | ||
| @@ -706,6 +766,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
| 706 | 766 | ||
| 707 | /**************************************************************************** | 767 | /**************************************************************************** |
| 708 | * | 768 | * |
| 769 | * The following functions initialize the MSI interrupts for all IOMMUs | ||
| 770 | * in the system. Its a bit challenging because there could be multiple | ||
| 771 | * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per | ||
| 772 | * pci_dev. | ||
| 773 | * | ||
| 774 | ****************************************************************************/ | ||
| 775 | |||
| 776 | static int __init iommu_setup_msix(struct amd_iommu *iommu) | ||
| 777 | { | ||
| 778 | struct amd_iommu *curr; | ||
| 779 | struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ | ||
| 780 | int nvec = 0, i; | ||
| 781 | |||
| 782 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
| 783 | if (curr->dev == iommu->dev) { | ||
| 784 | entries[nvec].entry = curr->evt_msi_num; | ||
| 785 | entries[nvec].vector = 0; | ||
| 786 | curr->int_enabled = true; | ||
| 787 | nvec++; | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | if (pci_enable_msix(iommu->dev, entries, nvec)) { | ||
| 792 | pci_disable_msix(iommu->dev); | ||
| 793 | return 1; | ||
| 794 | } | ||
| 795 | |||
| 796 | for (i = 0; i < nvec; ++i) { | ||
| 797 | int r = request_irq(entries->vector, amd_iommu_int_handler, | ||
| 798 | IRQF_SAMPLE_RANDOM, | ||
| 799 | "AMD IOMMU", | ||
| 800 | NULL); | ||
| 801 | if (r) | ||
| 802 | goto out_free; | ||
| 803 | } | ||
| 804 | |||
| 805 | return 0; | ||
| 806 | |||
| 807 | out_free: | ||
| 808 | for (i -= 1; i >= 0; --i) | ||
| 809 | free_irq(entries->vector, NULL); | ||
| 810 | |||
| 811 | pci_disable_msix(iommu->dev); | ||
| 812 | |||
| 813 | return 1; | ||
| 814 | } | ||
| 815 | |||
| 816 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | ||
| 817 | { | ||
| 818 | int r; | ||
| 819 | struct amd_iommu *curr; | ||
| 820 | |||
| 821 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
| 822 | if (curr->dev == iommu->dev) | ||
| 823 | curr->int_enabled = true; | ||
| 824 | } | ||
| 825 | |||
| 826 | |||
| 827 | if (pci_enable_msi(iommu->dev)) | ||
| 828 | return 1; | ||
| 829 | |||
| 830 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | ||
| 831 | IRQF_SAMPLE_RANDOM, | ||
| 832 | "AMD IOMMU", | ||
| 833 | NULL); | ||
| 834 | |||
| 835 | if (r) { | ||
| 836 | pci_disable_msi(iommu->dev); | ||
| 837 | return 1; | ||
| 838 | } | ||
| 839 | |||
| 840 | return 0; | ||
| 841 | } | ||
| 842 | |||
| 843 | static int __init iommu_init_msi(struct amd_iommu *iommu) | ||
| 844 | { | ||
| 845 | if (iommu->int_enabled) | ||
| 846 | return 0; | ||
| 847 | |||
| 848 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) | ||
| 849 | return iommu_setup_msix(iommu); | ||
| 850 | else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) | ||
| 851 | return iommu_setup_msi(iommu); | ||
| 852 | |||
| 853 | return 1; | ||
| 854 | } | ||
| 855 | |||
| 856 | /**************************************************************************** | ||
| 857 | * | ||
| 709 | * The next functions belong to the third pass of parsing the ACPI | 858 | * The next functions belong to the third pass of parsing the ACPI |
| 710 | * table. In this last pass the memory mapping requirements are | 859 | * table. In this last pass the memory mapping requirements are |
| 711 | * gathered (like exclusion and unity mapping reanges). | 860 | * gathered (like exclusion and unity mapping reanges). |
| @@ -811,7 +960,6 @@ static void init_device_table(void) | |||
| 811 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { | 960 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { |
| 812 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); | 961 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); |
| 813 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); | 962 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); |
| 814 | set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); | ||
| 815 | } | 963 | } |
| 816 | } | 964 | } |
| 817 | 965 | ||
| @@ -825,6 +973,8 @@ static void __init enable_iommus(void) | |||
| 825 | 973 | ||
| 826 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 974 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
| 827 | iommu_set_exclusion_range(iommu); | 975 | iommu_set_exclusion_range(iommu); |
| 976 | iommu_init_msi(iommu); | ||
| 977 | iommu_enable_event_logging(iommu); | ||
| 828 | iommu_enable(iommu); | 978 | iommu_enable(iommu); |
| 829 | } | 979 | } |
| 830 | } | 980 | } |
| @@ -995,11 +1145,17 @@ int __init amd_iommu_init(void) | |||
| 995 | else | 1145 | else |
| 996 | printk("disabled\n"); | 1146 | printk("disabled\n"); |
| 997 | 1147 | ||
| 1148 | if (amd_iommu_unmap_flush) | ||
| 1149 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | ||
| 1150 | else | ||
| 1151 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | ||
| 1152 | |||
| 998 | out: | 1153 | out: |
| 999 | return ret; | 1154 | return ret; |
| 1000 | 1155 | ||
| 1001 | free: | 1156 | free: |
| 1002 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); | 1157 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
| 1158 | get_order(MAX_DOMAIN_ID/8)); | ||
| 1003 | 1159 | ||
| 1004 | free_pages((unsigned long)amd_iommu_pd_table, | 1160 | free_pages((unsigned long)amd_iommu_pd_table, |
| 1005 | get_order(rlookup_table_size)); | 1161 | get_order(rlookup_table_size)); |
| @@ -1057,8 +1213,10 @@ void __init amd_iommu_detect(void) | |||
| 1057 | static int __init parse_amd_iommu_options(char *str) | 1213 | static int __init parse_amd_iommu_options(char *str) |
| 1058 | { | 1214 | { |
| 1059 | for (; *str; ++str) { | 1215 | for (; *str; ++str) { |
| 1060 | if (strcmp(str, "isolate") == 0) | 1216 | if (strncmp(str, "isolate", 7) == 0) |
| 1061 | amd_iommu_isolate = 1; | 1217 | amd_iommu_isolate = 1; |
| 1218 | if (strncmp(str, "fullflush", 11) == 0) | ||
| 1219 | amd_iommu_unmap_flush = true; | ||
| 1062 | } | 1220 | } |
| 1063 | 1221 | ||
| 1064 | return 1; | 1222 | return 1; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 44e21826db11..9a32b37ee2ee 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
| @@ -455,11 +455,11 @@ out: | |||
| 455 | force_iommu || | 455 | force_iommu || |
| 456 | valid_agp || | 456 | valid_agp || |
| 457 | fallback_aper_force) { | 457 | fallback_aper_force) { |
| 458 | printk(KERN_ERR | 458 | printk(KERN_INFO |
| 459 | "Your BIOS doesn't leave a aperture memory hole\n"); | 459 | "Your BIOS doesn't leave a aperture memory hole\n"); |
| 460 | printk(KERN_ERR | 460 | printk(KERN_INFO |
| 461 | "Please enable the IOMMU option in the BIOS setup\n"); | 461 | "Please enable the IOMMU option in the BIOS setup\n"); |
| 462 | printk(KERN_ERR | 462 | printk(KERN_INFO |
| 463 | "This costs you %d MB of RAM\n", | 463 | "This costs you %d MB of RAM\n", |
| 464 | 32 << fallback_aper_order); | 464 | 32 << fallback_aper_order); |
| 465 | 465 | ||
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 584272105051..a91c57cb666a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
| @@ -60,10 +60,8 @@ unsigned long mp_lapic_addr; | |||
| 60 | static int force_enable_local_apic; | 60 | static int force_enable_local_apic; |
| 61 | int disable_apic; | 61 | int disable_apic; |
| 62 | 62 | ||
| 63 | /* Local APIC timer verification ok */ | ||
| 64 | static int local_apic_timer_verify_ok; | ||
| 65 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ | 63 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ |
| 66 | static int local_apic_timer_disabled; | 64 | static int disable_apic_timer __cpuinitdata; |
| 67 | /* Local APIC timer works in C2 */ | 65 | /* Local APIC timer works in C2 */ |
| 68 | int local_apic_timer_c2_ok; | 66 | int local_apic_timer_c2_ok; |
| 69 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | 67 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); |
| @@ -130,7 +128,11 @@ static inline int lapic_get_version(void) | |||
| 130 | */ | 128 | */ |
| 131 | static inline int lapic_is_integrated(void) | 129 | static inline int lapic_is_integrated(void) |
| 132 | { | 130 | { |
| 131 | #ifdef CONFIG_X86_64 | ||
| 132 | return 1; | ||
| 133 | #else | ||
| 133 | return APIC_INTEGRATED(lapic_get_version()); | 134 | return APIC_INTEGRATED(lapic_get_version()); |
| 135 | #endif | ||
| 134 | } | 136 | } |
| 135 | 137 | ||
| 136 | /* | 138 | /* |
| @@ -244,8 +246,12 @@ int lapic_get_maxlvt(void) | |||
| 244 | * Local APIC timer | 246 | * Local APIC timer |
| 245 | */ | 247 | */ |
| 246 | 248 | ||
| 247 | /* Clock divisor is set to 16 */ | 249 | /* Clock divisor */ |
| 250 | #ifdef CONFG_X86_64 | ||
| 251 | #define APIC_DIVISOR 1 | ||
| 252 | #else | ||
| 248 | #define APIC_DIVISOR 16 | 253 | #define APIC_DIVISOR 16 |
| 254 | #endif | ||
| 249 | 255 | ||
| 250 | /* | 256 | /* |
| 251 | * This function sets up the local APIC timer, with a timeout of | 257 | * This function sets up the local APIC timer, with a timeout of |
| @@ -253,6 +259,9 @@ int lapic_get_maxlvt(void) | |||
| 253 | * this function twice on the boot CPU, once with a bogus timeout | 259 | * this function twice on the boot CPU, once with a bogus timeout |
| 254 | * value, second time for real. The other (noncalibrating) CPUs | 260 | * value, second time for real. The other (noncalibrating) CPUs |
| 255 | * call this function only once, with the real, calibrated value. | 261 | * call this function only once, with the real, calibrated value. |
| 262 | * | ||
| 263 | * We do reads before writes even if unnecessary, to get around the | ||
| 264 | * P5 APIC double write bug. | ||
| 256 | */ | 265 | */ |
| 257 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | 266 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) |
| 258 | { | 267 | { |
| @@ -274,14 +283,44 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
| 274 | */ | 283 | */ |
| 275 | tmp_value = apic_read(APIC_TDCR); | 284 | tmp_value = apic_read(APIC_TDCR); |
| 276 | apic_write(APIC_TDCR, | 285 | apic_write(APIC_TDCR, |
| 277 | (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | | 286 | (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | |
| 278 | APIC_TDR_DIV_16); | 287 | APIC_TDR_DIV_16); |
| 279 | 288 | ||
| 280 | if (!oneshot) | 289 | if (!oneshot) |
| 281 | apic_write(APIC_TMICT, clocks / APIC_DIVISOR); | 290 | apic_write(APIC_TMICT, clocks / APIC_DIVISOR); |
| 282 | } | 291 | } |
| 283 | 292 | ||
| 284 | /* | 293 | /* |
| 294 | * Setup extended LVT, AMD specific (K8, family 10h) | ||
| 295 | * | ||
| 296 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | ||
| 297 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | ||
| 298 | */ | ||
| 299 | |||
| 300 | #define APIC_EILVT_LVTOFF_MCE 0 | ||
| 301 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 302 | |||
| 303 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | ||
| 304 | { | ||
| 305 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | ||
| 306 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
| 307 | |||
| 308 | apic_write(reg, v); | ||
| 309 | } | ||
| 310 | |||
| 311 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | ||
| 312 | { | ||
| 313 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | ||
| 314 | return APIC_EILVT_LVTOFF_MCE; | ||
| 315 | } | ||
| 316 | |||
| 317 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | ||
| 318 | { | ||
| 319 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | ||
| 320 | return APIC_EILVT_LVTOFF_IBS; | ||
| 321 | } | ||
| 322 | |||
| 323 | /* | ||
| 285 | * Program the next event, relative to now | 324 | * Program the next event, relative to now |
| 286 | */ | 325 | */ |
| 287 | static int lapic_next_event(unsigned long delta, | 326 | static int lapic_next_event(unsigned long delta, |
| @@ -300,8 +339,8 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 300 | unsigned long flags; | 339 | unsigned long flags; |
| 301 | unsigned int v; | 340 | unsigned int v; |
| 302 | 341 | ||
| 303 | /* Lapic used for broadcast ? */ | 342 | /* Lapic used as dummy for broadcast ? */ |
| 304 | if (!local_apic_timer_verify_ok) | 343 | if (evt->features & CLOCK_EVT_FEAT_DUMMY) |
| 305 | return; | 344 | return; |
| 306 | 345 | ||
| 307 | local_irq_save(flags); | 346 | local_irq_save(flags); |
| @@ -514,7 +553,7 @@ static int __init calibrate_APIC_clock(void) | |||
| 514 | return -1; | 553 | return -1; |
| 515 | } | 554 | } |
| 516 | 555 | ||
| 517 | local_apic_timer_verify_ok = 1; | 556 | levt->features &= ~CLOCK_EVT_FEAT_DUMMY; |
| 518 | 557 | ||
| 519 | /* We trust the pm timer based calibration */ | 558 | /* We trust the pm timer based calibration */ |
| 520 | if (!pm_referenced) { | 559 | if (!pm_referenced) { |
| @@ -548,11 +587,11 @@ static int __init calibrate_APIC_clock(void) | |||
| 548 | if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) | 587 | if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) |
| 549 | apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); | 588 | apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); |
| 550 | else | 589 | else |
| 551 | local_apic_timer_verify_ok = 0; | 590 | levt->features |= CLOCK_EVT_FEAT_DUMMY; |
| 552 | } else | 591 | } else |
| 553 | local_irq_enable(); | 592 | local_irq_enable(); |
| 554 | 593 | ||
| 555 | if (!local_apic_timer_verify_ok) { | 594 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { |
| 556 | printk(KERN_WARNING | 595 | printk(KERN_WARNING |
| 557 | "APIC timer disabled due to verification failure.\n"); | 596 | "APIC timer disabled due to verification failure.\n"); |
| 558 | return -1; | 597 | return -1; |
| @@ -574,7 +613,8 @@ void __init setup_boot_APIC_clock(void) | |||
| 574 | * timer as a dummy clock event source on SMP systems, so the | 613 | * timer as a dummy clock event source on SMP systems, so the |
| 575 | * broadcast mechanism is used. On UP systems simply ignore it. | 614 | * broadcast mechanism is used. On UP systems simply ignore it. |
| 576 | */ | 615 | */ |
| 577 | if (local_apic_timer_disabled) { | 616 | if (disable_apic_timer) { |
| 617 | printk(KERN_INFO "Disabling APIC timer\n"); | ||
| 578 | /* No broadcast on UP ! */ | 618 | /* No broadcast on UP ! */ |
| 579 | if (num_possible_cpus() > 1) { | 619 | if (num_possible_cpus() > 1) { |
| 580 | lapic_clockevent.mult = 1; | 620 | lapic_clockevent.mult = 1; |
| @@ -643,7 +683,11 @@ static void local_apic_timer_interrupt(void) | |||
| 643 | /* | 683 | /* |
| 644 | * the NMI deadlock-detector uses this. | 684 | * the NMI deadlock-detector uses this. |
| 645 | */ | 685 | */ |
| 686 | #ifdef CONFIG_X86_64 | ||
| 687 | add_pda(apic_timer_irqs, 1); | ||
| 688 | #else | ||
| 646 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | 689 | per_cpu(irq_stat, cpu).apic_timer_irqs++; |
| 690 | #endif | ||
| 647 | 691 | ||
| 648 | evt->event_handler(evt); | 692 | evt->event_handler(evt); |
| 649 | } | 693 | } |
| @@ -683,35 +727,6 @@ int setup_profiling_timer(unsigned int multiplier) | |||
| 683 | } | 727 | } |
| 684 | 728 | ||
| 685 | /* | 729 | /* |
| 686 | * Setup extended LVT, AMD specific (K8, family 10h) | ||
| 687 | * | ||
| 688 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | ||
| 689 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | ||
| 690 | */ | ||
| 691 | |||
| 692 | #define APIC_EILVT_LVTOFF_MCE 0 | ||
| 693 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 694 | |||
| 695 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | ||
| 696 | { | ||
| 697 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | ||
| 698 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
| 699 | apic_write(reg, v); | ||
| 700 | } | ||
| 701 | |||
| 702 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | ||
| 703 | { | ||
| 704 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | ||
| 705 | return APIC_EILVT_LVTOFF_MCE; | ||
| 706 | } | ||
| 707 | |||
| 708 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | ||
| 709 | { | ||
| 710 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | ||
| 711 | return APIC_EILVT_LVTOFF_IBS; | ||
| 712 | } | ||
| 713 | |||
| 714 | /* | ||
| 715 | * Local APIC start and shutdown | 730 | * Local APIC start and shutdown |
| 716 | */ | 731 | */ |
| 717 | 732 | ||
| @@ -756,7 +771,7 @@ void clear_local_APIC(void) | |||
| 756 | } | 771 | } |
| 757 | 772 | ||
| 758 | /* lets not touch this if we didn't frob it */ | 773 | /* lets not touch this if we didn't frob it */ |
| 759 | #ifdef CONFIG_X86_MCE_P4THERMAL | 774 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) |
| 760 | if (maxlvt >= 5) { | 775 | if (maxlvt >= 5) { |
| 761 | v = apic_read(APIC_LVTTHMR); | 776 | v = apic_read(APIC_LVTTHMR); |
| 762 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 777 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
| @@ -773,10 +788,6 @@ void clear_local_APIC(void) | |||
| 773 | if (maxlvt >= 4) | 788 | if (maxlvt >= 4) |
| 774 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); | 789 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); |
| 775 | 790 | ||
| 776 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
| 777 | if (maxlvt >= 5) | ||
| 778 | apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); | ||
| 779 | #endif | ||
| 780 | /* Integrated APIC (!82489DX) ? */ | 791 | /* Integrated APIC (!82489DX) ? */ |
| 781 | if (lapic_is_integrated()) { | 792 | if (lapic_is_integrated()) { |
| 782 | if (maxlvt > 3) | 793 | if (maxlvt > 3) |
| @@ -791,7 +802,7 @@ void clear_local_APIC(void) | |||
| 791 | */ | 802 | */ |
| 792 | void disable_local_APIC(void) | 803 | void disable_local_APIC(void) |
| 793 | { | 804 | { |
| 794 | unsigned long value; | 805 | unsigned int value; |
| 795 | 806 | ||
| 796 | clear_local_APIC(); | 807 | clear_local_APIC(); |
| 797 | 808 | ||
| @@ -803,6 +814,7 @@ void disable_local_APIC(void) | |||
| 803 | value &= ~APIC_SPIV_APIC_ENABLED; | 814 | value &= ~APIC_SPIV_APIC_ENABLED; |
| 804 | apic_write(APIC_SPIV, value); | 815 | apic_write(APIC_SPIV, value); |
| 805 | 816 | ||
| 817 | #ifdef CONFIG_X86_32 | ||
| 806 | /* | 818 | /* |
| 807 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | 819 | * When LAPIC was disabled by the BIOS and enabled by the kernel, |
| 808 | * restore the disabled state. | 820 | * restore the disabled state. |
| @@ -814,6 +826,7 @@ void disable_local_APIC(void) | |||
| 814 | l &= ~MSR_IA32_APICBASE_ENABLE; | 826 | l &= ~MSR_IA32_APICBASE_ENABLE; |
| 815 | wrmsr(MSR_IA32_APICBASE, l, h); | 827 | wrmsr(MSR_IA32_APICBASE, l, h); |
| 816 | } | 828 | } |
| 829 | #endif | ||
| 817 | } | 830 | } |
| 818 | 831 | ||
| 819 | /* | 832 | /* |
| @@ -830,11 +843,15 @@ void lapic_shutdown(void) | |||
| 830 | return; | 843 | return; |
| 831 | 844 | ||
| 832 | local_irq_save(flags); | 845 | local_irq_save(flags); |
| 833 | clear_local_APIC(); | ||
| 834 | 846 | ||
| 835 | if (enabled_via_apicbase) | 847 | #ifdef CONFIG_X86_32 |
| 848 | if (!enabled_via_apicbase) | ||
| 849 | clear_local_APIC(); | ||
| 850 | else | ||
| 851 | #endif | ||
| 836 | disable_local_APIC(); | 852 | disable_local_APIC(); |
| 837 | 853 | ||
| 854 | |||
| 838 | local_irq_restore(flags); | 855 | local_irq_restore(flags); |
| 839 | } | 856 | } |
| 840 | 857 | ||
| @@ -879,6 +896,12 @@ int __init verify_local_APIC(void) | |||
| 879 | */ | 896 | */ |
| 880 | reg0 = apic_read(APIC_ID); | 897 | reg0 = apic_read(APIC_ID); |
| 881 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | 898 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); |
| 899 | apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); | ||
| 900 | reg1 = apic_read(APIC_ID); | ||
| 901 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); | ||
| 902 | apic_write(APIC_ID, reg0); | ||
| 903 | if (reg1 != (reg0 ^ APIC_ID_MASK)) | ||
| 904 | return 0; | ||
| 882 | 905 | ||
| 883 | /* | 906 | /* |
| 884 | * The next two are just to see if we have sane values. | 907 | * The next two are just to see if we have sane values. |
| @@ -904,14 +927,15 @@ void __init sync_Arb_IDs(void) | |||
| 904 | */ | 927 | */ |
| 905 | if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | 928 | if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) |
| 906 | return; | 929 | return; |
| 930 | |||
| 907 | /* | 931 | /* |
| 908 | * Wait for idle. | 932 | * Wait for idle. |
| 909 | */ | 933 | */ |
| 910 | apic_wait_icr_idle(); | 934 | apic_wait_icr_idle(); |
| 911 | 935 | ||
| 912 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | 936 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); |
| 913 | apic_write(APIC_ICR, | 937 | apic_write(APIC_ICR, APIC_DEST_ALLINC | |
| 914 | APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); | 938 | APIC_INT_LEVELTRIG | APIC_DM_INIT); |
| 915 | } | 939 | } |
| 916 | 940 | ||
| 917 | /* | 941 | /* |
| @@ -919,7 +943,7 @@ void __init sync_Arb_IDs(void) | |||
| 919 | */ | 943 | */ |
| 920 | void __init init_bsp_APIC(void) | 944 | void __init init_bsp_APIC(void) |
| 921 | { | 945 | { |
| 922 | unsigned long value; | 946 | unsigned int value; |
| 923 | 947 | ||
| 924 | /* | 948 | /* |
| 925 | * Don't do the setup now if we have a SMP BIOS as the | 949 | * Don't do the setup now if we have a SMP BIOS as the |
| @@ -940,11 +964,13 @@ void __init init_bsp_APIC(void) | |||
| 940 | value &= ~APIC_VECTOR_MASK; | 964 | value &= ~APIC_VECTOR_MASK; |
| 941 | value |= APIC_SPIV_APIC_ENABLED; | 965 | value |= APIC_SPIV_APIC_ENABLED; |
| 942 | 966 | ||
| 967 | #ifdef CONFIG_X86_32 | ||
| 943 | /* This bit is reserved on P4/Xeon and should be cleared */ | 968 | /* This bit is reserved on P4/Xeon and should be cleared */ |
| 944 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 969 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && |
| 945 | (boot_cpu_data.x86 == 15)) | 970 | (boot_cpu_data.x86 == 15)) |
| 946 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 971 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
| 947 | else | 972 | else |
| 973 | #endif | ||
| 948 | value |= APIC_SPIV_FOCUS_DISABLED; | 974 | value |= APIC_SPIV_FOCUS_DISABLED; |
| 949 | value |= SPURIOUS_APIC_VECTOR; | 975 | value |= SPURIOUS_APIC_VECTOR; |
| 950 | apic_write(APIC_SPIV, value); | 976 | apic_write(APIC_SPIV, value); |
| @@ -963,6 +989,16 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 963 | { | 989 | { |
| 964 | unsigned long oldvalue, value, maxlvt; | 990 | unsigned long oldvalue, value, maxlvt; |
| 965 | if (lapic_is_integrated() && !esr_disable) { | 991 | if (lapic_is_integrated() && !esr_disable) { |
| 992 | if (esr_disable) { | ||
| 993 | /* | ||
| 994 | * Something untraceable is creating bad interrupts on | ||
| 995 | * secondary quads ... for the moment, just leave the | ||
| 996 | * ESR disabled - we can't do anything useful with the | ||
| 997 | * errors anyway - mbligh | ||
| 998 | */ | ||
| 999 | printk(KERN_INFO "Leaving ESR disabled.\n"); | ||
| 1000 | return; | ||
| 1001 | } | ||
| 966 | /* !82489DX */ | 1002 | /* !82489DX */ |
| 967 | maxlvt = lapic_get_maxlvt(); | 1003 | maxlvt = lapic_get_maxlvt(); |
| 968 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 1004 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| @@ -983,16 +1019,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 983 | "vector: 0x%08lx after: 0x%08lx\n", | 1019 | "vector: 0x%08lx after: 0x%08lx\n", |
| 984 | oldvalue, value); | 1020 | oldvalue, value); |
| 985 | } else { | 1021 | } else { |
| 986 | if (esr_disable) | 1022 | printk(KERN_INFO "No ESR for 82489DX.\n"); |
| 987 | /* | ||
| 988 | * Something untraceable is creating bad interrupts on | ||
| 989 | * secondary quads ... for the moment, just leave the | ||
| 990 | * ESR disabled - we can't do anything useful with the | ||
| 991 | * errors anyway - mbligh | ||
| 992 | */ | ||
| 993 | printk(KERN_INFO "Leaving ESR disabled.\n"); | ||
| 994 | else | ||
| 995 | printk(KERN_INFO "No ESR for 82489DX.\n"); | ||
| 996 | } | 1023 | } |
| 997 | } | 1024 | } |
| 998 | 1025 | ||
| @@ -1130,13 +1157,17 @@ void __cpuinit setup_local_APIC(void) | |||
| 1130 | 1157 | ||
| 1131 | void __cpuinit end_local_APIC_setup(void) | 1158 | void __cpuinit end_local_APIC_setup(void) |
| 1132 | { | 1159 | { |
| 1133 | unsigned long value; | ||
| 1134 | |||
| 1135 | lapic_setup_esr(); | 1160 | lapic_setup_esr(); |
| 1136 | /* Disable the local apic timer */ | 1161 | |
| 1137 | value = apic_read(APIC_LVTT); | 1162 | #ifdef CONFIG_X86_32 |
| 1138 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 1163 | { |
| 1139 | apic_write(APIC_LVTT, value); | 1164 | unsigned int value; |
| 1165 | /* Disable the local apic timer */ | ||
| 1166 | value = apic_read(APIC_LVTT); | ||
| 1167 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
| 1168 | apic_write(APIC_LVTT, value); | ||
| 1169 | } | ||
| 1170 | #endif | ||
| 1140 | 1171 | ||
| 1141 | setup_apic_nmi_watchdog(NULL); | 1172 | setup_apic_nmi_watchdog(NULL); |
| 1142 | apic_pm_activate(); | 1173 | apic_pm_activate(); |
| @@ -1367,6 +1398,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
| 1367 | */ | 1398 | */ |
| 1368 | void __init connect_bsp_APIC(void) | 1399 | void __init connect_bsp_APIC(void) |
| 1369 | { | 1400 | { |
| 1401 | #ifdef CONFIG_X86_32 | ||
| 1370 | if (pic_mode) { | 1402 | if (pic_mode) { |
| 1371 | /* | 1403 | /* |
| 1372 | * Do not trust the local APIC being empty at bootup. | 1404 | * Do not trust the local APIC being empty at bootup. |
| @@ -1381,6 +1413,7 @@ void __init connect_bsp_APIC(void) | |||
| 1381 | outb(0x70, 0x22); | 1413 | outb(0x70, 0x22); |
| 1382 | outb(0x01, 0x23); | 1414 | outb(0x01, 0x23); |
| 1383 | } | 1415 | } |
| 1416 | #endif | ||
| 1384 | enable_apic_mode(); | 1417 | enable_apic_mode(); |
| 1385 | } | 1418 | } |
| 1386 | 1419 | ||
| @@ -1393,6 +1426,9 @@ void __init connect_bsp_APIC(void) | |||
| 1393 | */ | 1426 | */ |
| 1394 | void disconnect_bsp_APIC(int virt_wire_setup) | 1427 | void disconnect_bsp_APIC(int virt_wire_setup) |
| 1395 | { | 1428 | { |
| 1429 | unsigned int value; | ||
| 1430 | |||
| 1431 | #ifdef CONFIG_X86_32 | ||
| 1396 | if (pic_mode) { | 1432 | if (pic_mode) { |
| 1397 | /* | 1433 | /* |
| 1398 | * Put the board back into PIC mode (has an effect only on | 1434 | * Put the board back into PIC mode (has an effect only on |
| @@ -1404,54 +1440,53 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
| 1404 | "entering PIC mode.\n"); | 1440 | "entering PIC mode.\n"); |
| 1405 | outb(0x70, 0x22); | 1441 | outb(0x70, 0x22); |
| 1406 | outb(0x00, 0x23); | 1442 | outb(0x00, 0x23); |
| 1407 | } else { | 1443 | return; |
| 1408 | /* Go back to Virtual Wire compatibility mode */ | 1444 | } |
| 1409 | unsigned long value; | 1445 | #endif |
| 1410 | 1446 | ||
| 1411 | /* For the spurious interrupt use vector F, and enable it */ | 1447 | /* Go back to Virtual Wire compatibility mode */ |
| 1412 | value = apic_read(APIC_SPIV); | ||
| 1413 | value &= ~APIC_VECTOR_MASK; | ||
| 1414 | value |= APIC_SPIV_APIC_ENABLED; | ||
| 1415 | value |= 0xf; | ||
| 1416 | apic_write(APIC_SPIV, value); | ||
| 1417 | 1448 | ||
| 1418 | if (!virt_wire_setup) { | 1449 | /* For the spurious interrupt use vector F, and enable it */ |
| 1419 | /* | 1450 | value = apic_read(APIC_SPIV); |
| 1420 | * For LVT0 make it edge triggered, active high, | 1451 | value &= ~APIC_VECTOR_MASK; |
| 1421 | * external and enabled | 1452 | value |= APIC_SPIV_APIC_ENABLED; |
| 1422 | */ | 1453 | value |= 0xf; |
| 1423 | value = apic_read(APIC_LVT0); | 1454 | apic_write(APIC_SPIV, value); |
| 1424 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1425 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
| 1426 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
| 1427 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
| 1428 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
| 1429 | apic_write(APIC_LVT0, value); | ||
| 1430 | } else { | ||
| 1431 | /* Disable LVT0 */ | ||
| 1432 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
| 1433 | } | ||
| 1434 | 1455 | ||
| 1456 | if (!virt_wire_setup) { | ||
| 1435 | /* | 1457 | /* |
| 1436 | * For LVT1 make it edge triggered, active high, nmi and | 1458 | * For LVT0 make it edge triggered, active high, |
| 1437 | * enabled | 1459 | * external and enabled |
| 1438 | */ | 1460 | */ |
| 1439 | value = apic_read(APIC_LVT1); | 1461 | value = apic_read(APIC_LVT0); |
| 1440 | value &= ~( | 1462 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
| 1441 | APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1442 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | 1463 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
| 1443 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 1464 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
| 1444 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1465 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
| 1445 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | 1466 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
| 1446 | apic_write(APIC_LVT1, value); | 1467 | apic_write(APIC_LVT0, value); |
| 1468 | } else { | ||
| 1469 | /* Disable LVT0 */ | ||
| 1470 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
| 1447 | } | 1471 | } |
| 1472 | |||
| 1473 | /* | ||
| 1474 | * For LVT1 make it edge triggered, active high, | ||
| 1475 | * nmi and enabled | ||
| 1476 | */ | ||
| 1477 | value = apic_read(APIC_LVT1); | ||
| 1478 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1479 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
| 1480 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
| 1481 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
| 1482 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
| 1483 | apic_write(APIC_LVT1, value); | ||
| 1448 | } | 1484 | } |
| 1449 | 1485 | ||
| 1450 | void __cpuinit generic_processor_info(int apicid, int version) | 1486 | void __cpuinit generic_processor_info(int apicid, int version) |
| 1451 | { | 1487 | { |
| 1452 | int cpu; | 1488 | int cpu; |
| 1453 | cpumask_t tmp_map; | 1489 | cpumask_t tmp_map; |
| 1454 | physid_mask_t phys_cpu; | ||
| 1455 | 1490 | ||
| 1456 | /* | 1491 | /* |
| 1457 | * Validate version | 1492 | * Validate version |
| @@ -1464,9 +1499,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1464 | } | 1499 | } |
| 1465 | apic_version[apicid] = version; | 1500 | apic_version[apicid] = version; |
| 1466 | 1501 | ||
| 1467 | phys_cpu = apicid_to_cpu_present(apicid); | ||
| 1468 | physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); | ||
| 1469 | |||
| 1470 | if (num_processors >= NR_CPUS) { | 1502 | if (num_processors >= NR_CPUS) { |
| 1471 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1503 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." |
| 1472 | " Processor ignored.\n", NR_CPUS); | 1504 | " Processor ignored.\n", NR_CPUS); |
| @@ -1477,17 +1509,19 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1477 | cpus_complement(tmp_map, cpu_present_map); | 1509 | cpus_complement(tmp_map, cpu_present_map); |
| 1478 | cpu = first_cpu(tmp_map); | 1510 | cpu = first_cpu(tmp_map); |
| 1479 | 1511 | ||
| 1480 | if (apicid == boot_cpu_physical_apicid) | 1512 | physid_set(apicid, phys_cpu_present_map); |
| 1513 | if (apicid == boot_cpu_physical_apicid) { | ||
| 1481 | /* | 1514 | /* |
| 1482 | * x86_bios_cpu_apicid is required to have processors listed | 1515 | * x86_bios_cpu_apicid is required to have processors listed |
| 1483 | * in same order as logical cpu numbers. Hence the first | 1516 | * in same order as logical cpu numbers. Hence the first |
| 1484 | * entry is BSP, and so on. | 1517 | * entry is BSP, and so on. |
| 1485 | */ | 1518 | */ |
| 1486 | cpu = 0; | 1519 | cpu = 0; |
| 1487 | 1520 | } | |
| 1488 | if (apicid > max_physical_apicid) | 1521 | if (apicid > max_physical_apicid) |
| 1489 | max_physical_apicid = apicid; | 1522 | max_physical_apicid = apicid; |
| 1490 | 1523 | ||
| 1524 | #ifdef CONFIG_X86_32 | ||
| 1491 | /* | 1525 | /* |
| 1492 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y | 1526 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y |
| 1493 | * but we need to work other dependencies like SMP_SUSPEND etc | 1527 | * but we need to work other dependencies like SMP_SUSPEND etc |
| @@ -1507,7 +1541,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1507 | def_to_bigsmp = 1; | 1541 | def_to_bigsmp = 1; |
| 1508 | } | 1542 | } |
| 1509 | } | 1543 | } |
| 1510 | #ifdef CONFIG_SMP | 1544 | #endif |
| 1545 | |||
| 1546 | #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) | ||
| 1511 | /* are we being called early in kernel startup? */ | 1547 | /* are we being called early in kernel startup? */ |
| 1512 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { | 1548 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
| 1513 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | 1549 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
| @@ -1520,6 +1556,7 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1520 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | 1556 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
| 1521 | } | 1557 | } |
| 1522 | #endif | 1558 | #endif |
| 1559 | |||
| 1523 | cpu_set(cpu, cpu_possible_map); | 1560 | cpu_set(cpu, cpu_possible_map); |
| 1524 | cpu_set(cpu, cpu_present_map); | 1561 | cpu_set(cpu, cpu_present_map); |
| 1525 | } | 1562 | } |
| @@ -1530,6 +1567,11 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1530 | #ifdef CONFIG_PM | 1567 | #ifdef CONFIG_PM |
| 1531 | 1568 | ||
| 1532 | static struct { | 1569 | static struct { |
| 1570 | /* | ||
| 1571 | * 'active' is true if the local APIC was enabled by us and | ||
| 1572 | * not the BIOS; this signifies that we are also responsible | ||
| 1573 | * for disabling it before entering apm/acpi suspend | ||
| 1574 | */ | ||
| 1533 | int active; | 1575 | int active; |
| 1534 | /* r/w apic fields */ | 1576 | /* r/w apic fields */ |
| 1535 | unsigned int apic_id; | 1577 | unsigned int apic_id; |
| @@ -1570,7 +1612,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
| 1570 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | 1612 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); |
| 1571 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | 1613 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); |
| 1572 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | 1614 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); |
| 1573 | #ifdef CONFIG_X86_MCE_P4THERMAL | 1615 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) |
| 1574 | if (maxlvt >= 5) | 1616 | if (maxlvt >= 5) |
| 1575 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | 1617 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); |
| 1576 | #endif | 1618 | #endif |
| @@ -1594,16 +1636,23 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1594 | 1636 | ||
| 1595 | local_irq_save(flags); | 1637 | local_irq_save(flags); |
| 1596 | 1638 | ||
| 1597 | /* | 1639 | #ifdef CONFIG_X86_64 |
| 1598 | * Make sure the APICBASE points to the right address | 1640 | if (x2apic) |
| 1599 | * | 1641 | enable_x2apic(); |
| 1600 | * FIXME! This will be wrong if we ever support suspend on | 1642 | else |
| 1601 | * SMP! We'll need to do this as part of the CPU restore! | 1643 | #endif |
| 1602 | */ | 1644 | { |
| 1603 | rdmsr(MSR_IA32_APICBASE, l, h); | 1645 | /* |
| 1604 | l &= ~MSR_IA32_APICBASE_BASE; | 1646 | * Make sure the APICBASE points to the right address |
| 1605 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | 1647 | * |
| 1606 | wrmsr(MSR_IA32_APICBASE, l, h); | 1648 | * FIXME! This will be wrong if we ever support suspend on |
| 1649 | * SMP! We'll need to do this as part of the CPU restore! | ||
| 1650 | */ | ||
| 1651 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
| 1652 | l &= ~MSR_IA32_APICBASE_BASE; | ||
| 1653 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
| 1654 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
| 1655 | } | ||
| 1607 | 1656 | ||
| 1608 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | 1657 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); |
| 1609 | apic_write(APIC_ID, apic_pm_state.apic_id); | 1658 | apic_write(APIC_ID, apic_pm_state.apic_id); |
| @@ -1613,7 +1662,7 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1613 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | 1662 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); |
| 1614 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | 1663 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); |
| 1615 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | 1664 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); |
| 1616 | #ifdef CONFIG_X86_MCE_P4THERMAL | 1665 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) |
| 1617 | if (maxlvt >= 5) | 1666 | if (maxlvt >= 5) |
| 1618 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | 1667 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); |
| 1619 | #endif | 1668 | #endif |
| @@ -1627,7 +1676,9 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1627 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | 1676 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); |
| 1628 | apic_write(APIC_ESR, 0); | 1677 | apic_write(APIC_ESR, 0); |
| 1629 | apic_read(APIC_ESR); | 1678 | apic_read(APIC_ESR); |
| 1679 | |||
| 1630 | local_irq_restore(flags); | 1680 | local_irq_restore(flags); |
| 1681 | |||
| 1631 | return 0; | 1682 | return 0; |
| 1632 | } | 1683 | } |
| 1633 | 1684 | ||
| @@ -1683,20 +1734,20 @@ static int __init parse_lapic(char *arg) | |||
| 1683 | } | 1734 | } |
| 1684 | early_param("lapic", parse_lapic); | 1735 | early_param("lapic", parse_lapic); |
| 1685 | 1736 | ||
| 1686 | static int __init parse_nolapic(char *arg) | 1737 | static int __init setup_disableapic(char *arg) |
| 1687 | { | 1738 | { |
| 1688 | disable_apic = 1; | 1739 | disable_apic = 1; |
| 1689 | setup_clear_cpu_cap(X86_FEATURE_APIC); | 1740 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
| 1690 | return 0; | 1741 | return 0; |
| 1691 | } | 1742 | } |
| 1692 | early_param("nolapic", parse_nolapic); | 1743 | early_param("disableapic", setup_disableapic); |
| 1693 | 1744 | ||
| 1694 | static int __init parse_disable_lapic_timer(char *arg) | 1745 | /* same as disableapic, for compatibility */ |
| 1746 | static int __init setup_nolapic(char *arg) | ||
| 1695 | { | 1747 | { |
| 1696 | local_apic_timer_disabled = 1; | 1748 | return setup_disableapic(arg); |
| 1697 | return 0; | ||
| 1698 | } | 1749 | } |
| 1699 | early_param("nolapic_timer", parse_disable_lapic_timer); | 1750 | early_param("nolapic", setup_nolapic); |
| 1700 | 1751 | ||
| 1701 | static int __init parse_lapic_timer_c2_ok(char *arg) | 1752 | static int __init parse_lapic_timer_c2_ok(char *arg) |
| 1702 | { | 1753 | { |
| @@ -1705,15 +1756,40 @@ static int __init parse_lapic_timer_c2_ok(char *arg) | |||
| 1705 | } | 1756 | } |
| 1706 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | 1757 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); |
| 1707 | 1758 | ||
| 1759 | static int __init parse_disable_apic_timer(char *arg) | ||
| 1760 | { | ||
| 1761 | disable_apic_timer = 1; | ||
| 1762 | return 0; | ||
| 1763 | } | ||
| 1764 | early_param("noapictimer", parse_disable_apic_timer); | ||
| 1765 | |||
| 1766 | static int __init parse_nolapic_timer(char *arg) | ||
| 1767 | { | ||
| 1768 | disable_apic_timer = 1; | ||
| 1769 | return 0; | ||
| 1770 | } | ||
| 1771 | early_param("nolapic_timer", parse_nolapic_timer); | ||
| 1772 | |||
| 1708 | static int __init apic_set_verbosity(char *arg) | 1773 | static int __init apic_set_verbosity(char *arg) |
| 1709 | { | 1774 | { |
| 1710 | if (!arg) | 1775 | if (!arg) { |
| 1776 | #ifdef CONFIG_X86_64 | ||
| 1777 | skip_ioapic_setup = 0; | ||
| 1778 | ioapic_force = 1; | ||
| 1779 | return 0; | ||
| 1780 | #endif | ||
| 1711 | return -EINVAL; | 1781 | return -EINVAL; |
| 1782 | } | ||
| 1712 | 1783 | ||
| 1713 | if (strcmp(arg, "debug") == 0) | 1784 | if (strcmp("debug", arg) == 0) |
| 1714 | apic_verbosity = APIC_DEBUG; | 1785 | apic_verbosity = APIC_DEBUG; |
| 1715 | else if (strcmp(arg, "verbose") == 0) | 1786 | else if (strcmp("verbose", arg) == 0) |
| 1716 | apic_verbosity = APIC_VERBOSE; | 1787 | apic_verbosity = APIC_VERBOSE; |
| 1788 | else { | ||
| 1789 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
| 1790 | " use apic=verbose or apic=debug\n", arg); | ||
| 1791 | return -EINVAL; | ||
| 1792 | } | ||
| 1717 | 1793 | ||
| 1718 | return 0; | 1794 | return 0; |
| 1719 | } | 1795 | } |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1a6011855af3..53898b65a6ae 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include <mach_ipi.h> | 45 | #include <mach_ipi.h> |
| 46 | #include <mach_apic.h> | 46 | #include <mach_apic.h> |
| 47 | 47 | ||
| 48 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ | ||
| 48 | static int disable_apic_timer __cpuinitdata; | 49 | static int disable_apic_timer __cpuinitdata; |
| 49 | static int apic_calibrate_pmtmr __initdata; | 50 | static int apic_calibrate_pmtmr __initdata; |
| 50 | int disable_apic; | 51 | int disable_apic; |
| @@ -80,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 80 | static void lapic_timer_broadcast(cpumask_t mask); | 81 | static void lapic_timer_broadcast(cpumask_t mask); |
| 81 | static void apic_pm_activate(void); | 82 | static void apic_pm_activate(void); |
| 82 | 83 | ||
| 84 | /* | ||
| 85 | * The local apic timer can be used for any function which is CPU local. | ||
| 86 | */ | ||
| 83 | static struct clock_event_device lapic_clockevent = { | 87 | static struct clock_event_device lapic_clockevent = { |
| 84 | .name = "lapic", | 88 | .name = "lapic", |
| 85 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | 89 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
| @@ -106,11 +110,15 @@ static inline int lapic_get_version(void) | |||
| 106 | } | 110 | } |
| 107 | 111 | ||
| 108 | /* | 112 | /* |
| 109 | * Check, if the APIC is integrated or a seperate chip | 113 | * Check, if the APIC is integrated or a separate chip |
| 110 | */ | 114 | */ |
| 111 | static inline int lapic_is_integrated(void) | 115 | static inline int lapic_is_integrated(void) |
| 112 | { | 116 | { |
| 117 | #ifdef CONFIG_X86_64 | ||
| 113 | return 1; | 118 | return 1; |
| 119 | #else | ||
| 120 | return APIC_INTEGRATED(lapic_get_version()); | ||
| 121 | #endif | ||
| 114 | } | 122 | } |
| 115 | 123 | ||
| 116 | /* | 124 | /* |
| @@ -125,6 +133,11 @@ static int modern_apic(void) | |||
| 125 | return lapic_get_version() >= 0x14; | 133 | return lapic_get_version() >= 0x14; |
| 126 | } | 134 | } |
| 127 | 135 | ||
| 136 | /* | ||
| 137 | * Paravirt kernels also might be using these below ops. So we still | ||
| 138 | * use generic apic_read()/apic_write(), which might be pointing to different | ||
| 139 | * ops in PARAVIRT case. | ||
| 140 | */ | ||
| 128 | void xapic_wait_icr_idle(void) | 141 | void xapic_wait_icr_idle(void) |
| 129 | { | 142 | { |
| 130 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | 143 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) |
| @@ -149,7 +162,7 @@ u32 safe_xapic_wait_icr_idle(void) | |||
| 149 | 162 | ||
| 150 | void xapic_icr_write(u32 low, u32 id) | 163 | void xapic_icr_write(u32 low, u32 id) |
| 151 | { | 164 | { |
| 152 | apic_write(APIC_ICR2, id << 24); | 165 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); |
| 153 | apic_write(APIC_ICR, low); | 166 | apic_write(APIC_ICR, low); |
| 154 | } | 167 | } |
| 155 | 168 | ||
| @@ -160,7 +173,7 @@ u64 xapic_icr_read(void) | |||
| 160 | icr2 = apic_read(APIC_ICR2); | 173 | icr2 = apic_read(APIC_ICR2); |
| 161 | icr1 = apic_read(APIC_ICR); | 174 | icr1 = apic_read(APIC_ICR); |
| 162 | 175 | ||
| 163 | return (icr1 | ((u64)icr2 << 32)); | 176 | return icr1 | ((u64)icr2 << 32); |
| 164 | } | 177 | } |
| 165 | 178 | ||
| 166 | static struct apic_ops xapic_ops = { | 179 | static struct apic_ops xapic_ops = { |
| @@ -173,7 +186,6 @@ static struct apic_ops xapic_ops = { | |||
| 173 | }; | 186 | }; |
| 174 | 187 | ||
| 175 | struct apic_ops __read_mostly *apic_ops = &xapic_ops; | 188 | struct apic_ops __read_mostly *apic_ops = &xapic_ops; |
| 176 | |||
| 177 | EXPORT_SYMBOL_GPL(apic_ops); | 189 | EXPORT_SYMBOL_GPL(apic_ops); |
| 178 | 190 | ||
| 179 | static void x2apic_wait_icr_idle(void) | 191 | static void x2apic_wait_icr_idle(void) |
| @@ -243,6 +255,17 @@ int lapic_get_maxlvt(void) | |||
| 243 | } | 255 | } |
| 244 | 256 | ||
| 245 | /* | 257 | /* |
| 258 | * Local APIC timer | ||
| 259 | */ | ||
| 260 | |||
| 261 | /* Clock divisor */ | ||
| 262 | #ifdef CONFG_X86_64 | ||
| 263 | #define APIC_DIVISOR 1 | ||
| 264 | #else | ||
| 265 | #define APIC_DIVISOR 16 | ||
| 266 | #endif | ||
| 267 | |||
| 268 | /* | ||
| 246 | * This function sets up the local APIC timer, with a timeout of | 269 | * This function sets up the local APIC timer, with a timeout of |
| 247 | * 'clocks' APIC bus clock. During calibration we actually call | 270 | * 'clocks' APIC bus clock. During calibration we actually call |
| 248 | * this function twice on the boot CPU, once with a bogus timeout | 271 | * this function twice on the boot CPU, once with a bogus timeout |
| @@ -252,7 +275,6 @@ int lapic_get_maxlvt(void) | |||
| 252 | * We do reads before writes even if unnecessary, to get around the | 275 | * We do reads before writes even if unnecessary, to get around the |
| 253 | * P5 APIC double write bug. | 276 | * P5 APIC double write bug. |
| 254 | */ | 277 | */ |
| 255 | |||
| 256 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | 278 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) |
| 257 | { | 279 | { |
| 258 | unsigned int lvtt_value, tmp_value; | 280 | unsigned int lvtt_value, tmp_value; |
| @@ -260,6 +282,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
| 260 | lvtt_value = LOCAL_TIMER_VECTOR; | 282 | lvtt_value = LOCAL_TIMER_VECTOR; |
| 261 | if (!oneshot) | 283 | if (!oneshot) |
| 262 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | 284 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; |
| 285 | if (!lapic_is_integrated()) | ||
| 286 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | ||
| 287 | |||
| 263 | if (!irqen) | 288 | if (!irqen) |
| 264 | lvtt_value |= APIC_LVT_MASKED; | 289 | lvtt_value |= APIC_LVT_MASKED; |
| 265 | 290 | ||
| @@ -269,12 +294,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
| 269 | * Divide PICLK by 16 | 294 | * Divide PICLK by 16 |
| 270 | */ | 295 | */ |
| 271 | tmp_value = apic_read(APIC_TDCR); | 296 | tmp_value = apic_read(APIC_TDCR); |
| 272 | apic_write(APIC_TDCR, (tmp_value | 297 | apic_write(APIC_TDCR, |
| 273 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 298 | (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | |
| 274 | | APIC_TDR_DIV_16); | 299 | APIC_TDR_DIV_16); |
| 275 | 300 | ||
| 276 | if (!oneshot) | 301 | if (!oneshot) |
| 277 | apic_write(APIC_TMICT, clocks); | 302 | apic_write(APIC_TMICT, clocks / APIC_DIVISOR); |
| 278 | } | 303 | } |
| 279 | 304 | ||
| 280 | /* | 305 | /* |
| @@ -444,7 +469,7 @@ static int __init calibrate_APIC_clock(void) | |||
| 444 | lapic_clockevent.min_delta_ns = | 469 | lapic_clockevent.min_delta_ns = |
| 445 | clockevent_delta2ns(0xF, &lapic_clockevent); | 470 | clockevent_delta2ns(0xF, &lapic_clockevent); |
| 446 | 471 | ||
| 447 | calibration_result = result / HZ; | 472 | calibration_result = (result * APIC_DIVISOR) / HZ; |
| 448 | 473 | ||
| 449 | /* | 474 | /* |
| 450 | * Do a sanity check on the APIC calibration result | 475 | * Do a sanity check on the APIC calibration result |
| @@ -466,10 +491,10 @@ static int __init calibrate_APIC_clock(void) | |||
| 466 | void __init setup_boot_APIC_clock(void) | 491 | void __init setup_boot_APIC_clock(void) |
| 467 | { | 492 | { |
| 468 | /* | 493 | /* |
| 469 | * The local apic timer can be disabled via the kernel commandline. | 494 | * The local apic timer can be disabled via the kernel |
| 470 | * Register the lapic timer as a dummy clock event source on SMP | 495 | * commandline or from the CPU detection code. Register the lapic |
| 471 | * systems, so the broadcast mechanism is used. On UP systems simply | 496 | * timer as a dummy clock event source on SMP systems, so the |
| 472 | * ignore it. | 497 | * broadcast mechanism is used. On UP systems simply ignore it. |
| 473 | */ | 498 | */ |
| 474 | if (disable_apic_timer) { | 499 | if (disable_apic_timer) { |
| 475 | printk(KERN_INFO "Disabling APIC timer\n"); | 500 | printk(KERN_INFO "Disabling APIC timer\n"); |
| @@ -481,7 +506,9 @@ void __init setup_boot_APIC_clock(void) | |||
| 481 | return; | 506 | return; |
| 482 | } | 507 | } |
| 483 | 508 | ||
| 484 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); | 509 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" |
| 510 | "calibrating APIC timer ...\n"); | ||
| 511 | |||
| 485 | if (calibrate_APIC_clock()) { | 512 | if (calibrate_APIC_clock()) { |
| 486 | /* No broadcast on UP ! */ | 513 | /* No broadcast on UP ! */ |
| 487 | if (num_possible_cpus() > 1) | 514 | if (num_possible_cpus() > 1) |
| @@ -500,6 +527,7 @@ void __init setup_boot_APIC_clock(void) | |||
| 500 | printk(KERN_WARNING "APIC timer registered as dummy," | 527 | printk(KERN_WARNING "APIC timer registered as dummy," |
| 501 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | 528 | " due to nmi_watchdog=%d!\n", nmi_watchdog); |
| 502 | 529 | ||
| 530 | /* Setup the lapic or request the broadcast */ | ||
| 503 | setup_APIC_timer(); | 531 | setup_APIC_timer(); |
| 504 | } | 532 | } |
| 505 | 533 | ||
| @@ -538,7 +566,11 @@ static void local_apic_timer_interrupt(void) | |||
| 538 | /* | 566 | /* |
| 539 | * the NMI deadlock-detector uses this. | 567 | * the NMI deadlock-detector uses this. |
| 540 | */ | 568 | */ |
| 569 | #ifdef CONFIG_X86_64 | ||
| 541 | add_pda(apic_timer_irqs, 1); | 570 | add_pda(apic_timer_irqs, 1); |
| 571 | #else | ||
| 572 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
| 573 | #endif | ||
| 542 | 574 | ||
| 543 | evt->event_handler(evt); | 575 | evt->event_handler(evt); |
| 544 | } | 576 | } |
| @@ -569,6 +601,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
| 569 | irq_enter(); | 601 | irq_enter(); |
| 570 | local_apic_timer_interrupt(); | 602 | local_apic_timer_interrupt(); |
| 571 | irq_exit(); | 603 | irq_exit(); |
| 604 | |||
| 572 | set_irq_regs(old_regs); | 605 | set_irq_regs(old_regs); |
| 573 | } | 606 | } |
| 574 | 607 | ||
| @@ -622,6 +655,13 @@ void clear_local_APIC(void) | |||
| 622 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); | 655 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); |
| 623 | } | 656 | } |
| 624 | 657 | ||
| 658 | /* lets not touch this if we didn't frob it */ | ||
| 659 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) | ||
| 660 | if (maxlvt >= 5) { | ||
| 661 | v = apic_read(APIC_LVTTHMR); | ||
| 662 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); | ||
| 663 | } | ||
| 664 | #endif | ||
| 625 | /* | 665 | /* |
| 626 | * Clean APIC state for other OSs: | 666 | * Clean APIC state for other OSs: |
| 627 | */ | 667 | */ |
| @@ -632,8 +672,14 @@ void clear_local_APIC(void) | |||
| 632 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); | 672 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); |
| 633 | if (maxlvt >= 4) | 673 | if (maxlvt >= 4) |
| 634 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); | 674 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); |
| 635 | apic_write(APIC_ESR, 0); | 675 | |
| 636 | apic_read(APIC_ESR); | 676 | /* Integrated APIC (!82489DX) ? */ |
| 677 | if (lapic_is_integrated()) { | ||
| 678 | if (maxlvt > 3) | ||
| 679 | /* Clear ESR due to Pentium errata 3AP and 11AP */ | ||
| 680 | apic_write(APIC_ESR, 0); | ||
| 681 | apic_read(APIC_ESR); | ||
| 682 | } | ||
| 637 | } | 683 | } |
| 638 | 684 | ||
| 639 | /** | 685 | /** |
| @@ -652,8 +698,28 @@ void disable_local_APIC(void) | |||
| 652 | value = apic_read(APIC_SPIV); | 698 | value = apic_read(APIC_SPIV); |
| 653 | value &= ~APIC_SPIV_APIC_ENABLED; | 699 | value &= ~APIC_SPIV_APIC_ENABLED; |
| 654 | apic_write(APIC_SPIV, value); | 700 | apic_write(APIC_SPIV, value); |
| 701 | |||
| 702 | #ifdef CONFIG_X86_32 | ||
| 703 | /* | ||
| 704 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | ||
| 705 | * restore the disabled state. | ||
| 706 | */ | ||
| 707 | if (enabled_via_apicbase) { | ||
| 708 | unsigned int l, h; | ||
| 709 | |||
| 710 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
| 711 | l &= ~MSR_IA32_APICBASE_ENABLE; | ||
| 712 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
| 713 | } | ||
| 714 | #endif | ||
| 655 | } | 715 | } |
| 656 | 716 | ||
| 717 | /* | ||
| 718 | * If Linux enabled the LAPIC against the BIOS default disable it down before | ||
| 719 | * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and | ||
| 720 | * not power-off. Additionally clear all LVT entries before disable_local_APIC | ||
| 721 | * for the case where Linux didn't enable the LAPIC. | ||
| 722 | */ | ||
| 657 | void lapic_shutdown(void) | 723 | void lapic_shutdown(void) |
| 658 | { | 724 | { |
| 659 | unsigned long flags; | 725 | unsigned long flags; |
| @@ -663,7 +729,13 @@ void lapic_shutdown(void) | |||
| 663 | 729 | ||
| 664 | local_irq_save(flags); | 730 | local_irq_save(flags); |
| 665 | 731 | ||
| 666 | disable_local_APIC(); | 732 | #ifdef CONFIG_X86_32 |
| 733 | if (!enabled_via_apicbase) | ||
| 734 | clear_local_APIC(); | ||
| 735 | else | ||
| 736 | #endif | ||
| 737 | disable_local_APIC(); | ||
| 738 | |||
| 667 | 739 | ||
| 668 | local_irq_restore(flags); | 740 | local_irq_restore(flags); |
| 669 | } | 741 | } |
| @@ -734,8 +806,11 @@ int __init verify_local_APIC(void) | |||
| 734 | */ | 806 | */ |
| 735 | void __init sync_Arb_IDs(void) | 807 | void __init sync_Arb_IDs(void) |
| 736 | { | 808 | { |
| 737 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ | 809 | /* |
| 738 | if (modern_apic()) | 810 | * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not |
| 811 | * needed on AMD. | ||
| 812 | */ | ||
| 813 | if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
| 739 | return; | 814 | return; |
| 740 | 815 | ||
| 741 | /* | 816 | /* |
| @@ -744,8 +819,8 @@ void __init sync_Arb_IDs(void) | |||
| 744 | apic_wait_icr_idle(); | 819 | apic_wait_icr_idle(); |
| 745 | 820 | ||
| 746 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | 821 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); |
| 747 | apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | 822 | apic_write(APIC_ICR, APIC_DEST_ALLINC | |
| 748 | | APIC_DM_INIT); | 823 | APIC_INT_LEVELTRIG | APIC_DM_INIT); |
| 749 | } | 824 | } |
| 750 | 825 | ||
| 751 | /* | 826 | /* |
| @@ -762,8 +837,6 @@ void __init init_bsp_APIC(void) | |||
| 762 | if (smp_found_config || !cpu_has_apic) | 837 | if (smp_found_config || !cpu_has_apic) |
| 763 | return; | 838 | return; |
| 764 | 839 | ||
| 765 | value = apic_read(APIC_LVR); | ||
| 766 | |||
| 767 | /* | 840 | /* |
| 768 | * Do not trust the local APIC being empty at bootup. | 841 | * Do not trust the local APIC being empty at bootup. |
| 769 | */ | 842 | */ |
| @@ -775,7 +848,15 @@ void __init init_bsp_APIC(void) | |||
| 775 | value = apic_read(APIC_SPIV); | 848 | value = apic_read(APIC_SPIV); |
| 776 | value &= ~APIC_VECTOR_MASK; | 849 | value &= ~APIC_VECTOR_MASK; |
| 777 | value |= APIC_SPIV_APIC_ENABLED; | 850 | value |= APIC_SPIV_APIC_ENABLED; |
| 778 | value |= APIC_SPIV_FOCUS_DISABLED; | 851 | |
| 852 | #ifdef CONFIG_X86_32 | ||
| 853 | /* This bit is reserved on P4/Xeon and should be cleared */ | ||
| 854 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
| 855 | (boot_cpu_data.x86 == 15)) | ||
| 856 | value &= ~APIC_SPIV_FOCUS_DISABLED; | ||
| 857 | else | ||
| 858 | #endif | ||
| 859 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
| 779 | value |= SPURIOUS_APIC_VECTOR; | 860 | value |= SPURIOUS_APIC_VECTOR; |
| 780 | apic_write(APIC_SPIV, value); | 861 | apic_write(APIC_SPIV, value); |
| 781 | 862 | ||
| @@ -784,9 +865,50 @@ void __init init_bsp_APIC(void) | |||
| 784 | */ | 865 | */ |
| 785 | apic_write(APIC_LVT0, APIC_DM_EXTINT); | 866 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
| 786 | value = APIC_DM_NMI; | 867 | value = APIC_DM_NMI; |
| 868 | if (!lapic_is_integrated()) /* 82489DX */ | ||
| 869 | value |= APIC_LVT_LEVEL_TRIGGER; | ||
| 787 | apic_write(APIC_LVT1, value); | 870 | apic_write(APIC_LVT1, value); |
| 788 | } | 871 | } |
| 789 | 872 | ||
| 873 | static void __cpuinit lapic_setup_esr(void) | ||
| 874 | { | ||
| 875 | unsigned long oldvalue, value, maxlvt; | ||
| 876 | if (lapic_is_integrated() && !esr_disable) { | ||
| 877 | if (esr_disable) { | ||
| 878 | /* | ||
| 879 | * Something untraceable is creating bad interrupts on | ||
| 880 | * secondary quads ... for the moment, just leave the | ||
| 881 | * ESR disabled - we can't do anything useful with the | ||
| 882 | * errors anyway - mbligh | ||
| 883 | */ | ||
| 884 | printk(KERN_INFO "Leaving ESR disabled.\n"); | ||
| 885 | return; | ||
| 886 | } | ||
| 887 | /* !82489DX */ | ||
| 888 | maxlvt = lapic_get_maxlvt(); | ||
| 889 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | ||
| 890 | apic_write(APIC_ESR, 0); | ||
| 891 | oldvalue = apic_read(APIC_ESR); | ||
| 892 | |||
| 893 | /* enables sending errors */ | ||
| 894 | value = ERROR_APIC_VECTOR; | ||
| 895 | apic_write(APIC_LVTERR, value); | ||
| 896 | /* | ||
| 897 | * spec says clear errors after enabling vector. | ||
| 898 | */ | ||
| 899 | if (maxlvt > 3) | ||
| 900 | apic_write(APIC_ESR, 0); | ||
| 901 | value = apic_read(APIC_ESR); | ||
| 902 | if (value != oldvalue) | ||
| 903 | apic_printk(APIC_VERBOSE, "ESR value before enabling " | ||
| 904 | "vector: 0x%08lx after: 0x%08lx\n", | ||
| 905 | oldvalue, value); | ||
| 906 | } else { | ||
| 907 | printk(KERN_INFO "No ESR for 82489DX.\n"); | ||
| 908 | } | ||
| 909 | } | ||
| 910 | |||
| 911 | |||
| 790 | /** | 912 | /** |
| 791 | * setup_local_APIC - setup the local APIC | 913 | * setup_local_APIC - setup the local APIC |
| 792 | */ | 914 | */ |
| @@ -892,21 +1014,20 @@ void __cpuinit setup_local_APIC(void) | |||
| 892 | preempt_enable(); | 1014 | preempt_enable(); |
| 893 | } | 1015 | } |
| 894 | 1016 | ||
| 895 | static void __cpuinit lapic_setup_esr(void) | ||
| 896 | { | ||
| 897 | unsigned maxlvt = lapic_get_maxlvt(); | ||
| 898 | |||
| 899 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); | ||
| 900 | /* | ||
| 901 | * spec says clear errors after enabling vector. | ||
| 902 | */ | ||
| 903 | if (maxlvt > 3) | ||
| 904 | apic_write(APIC_ESR, 0); | ||
| 905 | } | ||
| 906 | |||
| 907 | void __cpuinit end_local_APIC_setup(void) | 1017 | void __cpuinit end_local_APIC_setup(void) |
| 908 | { | 1018 | { |
| 909 | lapic_setup_esr(); | 1019 | lapic_setup_esr(); |
| 1020 | |||
| 1021 | #ifdef CONFIG_X86_32 | ||
| 1022 | { | ||
| 1023 | unsigned int value; | ||
| 1024 | /* Disable the local apic timer */ | ||
| 1025 | value = apic_read(APIC_LVTT); | ||
| 1026 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
| 1027 | apic_write(APIC_LVTT, value); | ||
| 1028 | } | ||
| 1029 | #endif | ||
| 1030 | |||
| 910 | setup_apic_nmi_watchdog(NULL); | 1031 | setup_apic_nmi_watchdog(NULL); |
| 911 | apic_pm_activate(); | 1032 | apic_pm_activate(); |
| 912 | } | 1033 | } |
| @@ -1108,6 +1229,8 @@ void __init init_apic_mappings(void) | |||
| 1108 | * This initializes the IO-APIC and APIC hardware if this is | 1229 | * This initializes the IO-APIC and APIC hardware if this is |
| 1109 | * a UP kernel. | 1230 | * a UP kernel. |
| 1110 | */ | 1231 | */ |
| 1232 | int apic_version[MAX_APICS]; | ||
| 1233 | |||
| 1111 | int __init APIC_init_uniprocessor(void) | 1234 | int __init APIC_init_uniprocessor(void) |
| 1112 | { | 1235 | { |
| 1113 | if (disable_apic) { | 1236 | if (disable_apic) { |
| @@ -1209,17 +1332,57 @@ asmlinkage void smp_error_interrupt(void) | |||
| 1209 | } | 1332 | } |
| 1210 | 1333 | ||
| 1211 | /** | 1334 | /** |
| 1212 | * * connect_bsp_APIC - attach the APIC to the interrupt system | 1335 | * connect_bsp_APIC - attach the APIC to the interrupt system |
| 1213 | * */ | 1336 | */ |
| 1214 | void __init connect_bsp_APIC(void) | 1337 | void __init connect_bsp_APIC(void) |
| 1215 | { | 1338 | { |
| 1339 | #ifdef CONFIG_X86_32 | ||
| 1340 | if (pic_mode) { | ||
| 1341 | /* | ||
| 1342 | * Do not trust the local APIC being empty at bootup. | ||
| 1343 | */ | ||
| 1344 | clear_local_APIC(); | ||
| 1345 | /* | ||
| 1346 | * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's | ||
| 1347 | * local APIC to INT and NMI lines. | ||
| 1348 | */ | ||
| 1349 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | ||
| 1350 | "enabling APIC mode.\n"); | ||
| 1351 | outb(0x70, 0x22); | ||
| 1352 | outb(0x01, 0x23); | ||
| 1353 | } | ||
| 1354 | #endif | ||
| 1216 | enable_apic_mode(); | 1355 | enable_apic_mode(); |
| 1217 | } | 1356 | } |
| 1218 | 1357 | ||
| 1358 | /** | ||
| 1359 | * disconnect_bsp_APIC - detach the APIC from the interrupt system | ||
| 1360 | * @virt_wire_setup: indicates, whether virtual wire mode is selected | ||
| 1361 | * | ||
| 1362 | * Virtual wire mode is necessary to deliver legacy interrupts even when the | ||
| 1363 | * APIC is disabled. | ||
| 1364 | */ | ||
| 1219 | void disconnect_bsp_APIC(int virt_wire_setup) | 1365 | void disconnect_bsp_APIC(int virt_wire_setup) |
| 1220 | { | 1366 | { |
| 1367 | unsigned int value; | ||
| 1368 | |||
| 1369 | #ifdef CONFIG_X86_32 | ||
| 1370 | if (pic_mode) { | ||
| 1371 | /* | ||
| 1372 | * Put the board back into PIC mode (has an effect only on | ||
| 1373 | * certain older boards). Note that APIC interrupts, including | ||
| 1374 | * IPIs, won't work beyond this point! The only exception are | ||
| 1375 | * INIT IPIs. | ||
| 1376 | */ | ||
| 1377 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | ||
| 1378 | "entering PIC mode.\n"); | ||
| 1379 | outb(0x70, 0x22); | ||
| 1380 | outb(0x00, 0x23); | ||
| 1381 | return; | ||
| 1382 | } | ||
| 1383 | #endif | ||
| 1384 | |||
| 1221 | /* Go back to Virtual Wire compatibility mode */ | 1385 | /* Go back to Virtual Wire compatibility mode */ |
| 1222 | unsigned long value; | ||
| 1223 | 1386 | ||
| 1224 | /* For the spurious interrupt use vector F, and enable it */ | 1387 | /* For the spurious interrupt use vector F, and enable it */ |
| 1225 | value = apic_read(APIC_SPIV); | 1388 | value = apic_read(APIC_SPIV); |
| @@ -1245,7 +1408,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
| 1245 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | 1408 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
| 1246 | } | 1409 | } |
| 1247 | 1410 | ||
| 1248 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | 1411 | /* |
| 1412 | * For LVT1 make it edge triggered, active high, | ||
| 1413 | * nmi and enabled | ||
| 1414 | */ | ||
| 1249 | value = apic_read(APIC_LVT1); | 1415 | value = apic_read(APIC_LVT1); |
| 1250 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | 1416 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
| 1251 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | 1417 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
| @@ -1260,9 +1426,20 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1260 | int cpu; | 1426 | int cpu; |
| 1261 | cpumask_t tmp_map; | 1427 | cpumask_t tmp_map; |
| 1262 | 1428 | ||
| 1429 | /* | ||
| 1430 | * Validate version | ||
| 1431 | */ | ||
| 1432 | if (version == 0x0) { | ||
| 1433 | printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " | ||
| 1434 | "fixing up to 0x10. (tell your hw vendor)\n", | ||
| 1435 | version); | ||
| 1436 | version = 0x10; | ||
| 1437 | } | ||
| 1438 | apic_version[apicid] = version; | ||
| 1439 | |||
| 1263 | if (num_processors >= NR_CPUS) { | 1440 | if (num_processors >= NR_CPUS) { |
| 1264 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1441 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." |
| 1265 | " Processor ignored.\n", NR_CPUS); | 1442 | " Processor ignored.\n", NR_CPUS); |
| 1266 | return; | 1443 | return; |
| 1267 | } | 1444 | } |
| 1268 | 1445 | ||
| @@ -1282,6 +1459,29 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1282 | if (apicid > max_physical_apicid) | 1459 | if (apicid > max_physical_apicid) |
| 1283 | max_physical_apicid = apicid; | 1460 | max_physical_apicid = apicid; |
| 1284 | 1461 | ||
| 1462 | #ifdef CONFIG_X86_32 | ||
| 1463 | /* | ||
| 1464 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y | ||
| 1465 | * but we need to work other dependencies like SMP_SUSPEND etc | ||
| 1466 | * before this can be done without some confusion. | ||
| 1467 | * if (CPU_HOTPLUG_ENABLED || num_processors > 8) | ||
| 1468 | * - Ashok Raj <ashok.raj@intel.com> | ||
| 1469 | */ | ||
| 1470 | if (max_physical_apicid >= 8) { | ||
| 1471 | switch (boot_cpu_data.x86_vendor) { | ||
| 1472 | case X86_VENDOR_INTEL: | ||
| 1473 | if (!APIC_XAPIC(version)) { | ||
| 1474 | def_to_bigsmp = 0; | ||
| 1475 | break; | ||
| 1476 | } | ||
| 1477 | /* If P4 and above fall through */ | ||
| 1478 | case X86_VENDOR_AMD: | ||
| 1479 | def_to_bigsmp = 1; | ||
| 1480 | } | ||
| 1481 | } | ||
| 1482 | #endif | ||
| 1483 | |||
| 1484 | #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) | ||
| 1285 | /* are we being called early in kernel startup? */ | 1485 | /* are we being called early in kernel startup? */ |
| 1286 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { | 1486 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
| 1287 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | 1487 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
| @@ -1293,6 +1493,7 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1293 | per_cpu(x86_cpu_to_apicid, cpu) = apicid; | 1493 | per_cpu(x86_cpu_to_apicid, cpu) = apicid; |
| 1294 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | 1494 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
| 1295 | } | 1495 | } |
| 1496 | #endif | ||
| 1296 | 1497 | ||
| 1297 | cpu_set(cpu, cpu_possible_map); | 1498 | cpu_set(cpu, cpu_possible_map); |
| 1298 | cpu_set(cpu, cpu_present_map); | 1499 | cpu_set(cpu, cpu_present_map); |
| @@ -1309,9 +1510,11 @@ int hard_smp_processor_id(void) | |||
| 1309 | #ifdef CONFIG_PM | 1510 | #ifdef CONFIG_PM |
| 1310 | 1511 | ||
| 1311 | static struct { | 1512 | static struct { |
| 1312 | /* 'active' is true if the local APIC was enabled by us and | 1513 | /* |
| 1313 | not the BIOS; this signifies that we are also responsible | 1514 | * 'active' is true if the local APIC was enabled by us and |
| 1314 | for disabling it before entering apm/acpi suspend */ | 1515 | * not the BIOS; this signifies that we are also responsible |
| 1516 | * for disabling it before entering apm/acpi suspend | ||
| 1517 | */ | ||
| 1315 | int active; | 1518 | int active; |
| 1316 | /* r/w apic fields */ | 1519 | /* r/w apic fields */ |
| 1317 | unsigned int apic_id; | 1520 | unsigned int apic_id; |
| @@ -1352,10 +1555,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
| 1352 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | 1555 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); |
| 1353 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | 1556 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); |
| 1354 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | 1557 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); |
| 1355 | #ifdef CONFIG_X86_MCE_INTEL | 1558 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) |
| 1356 | if (maxlvt >= 5) | 1559 | if (maxlvt >= 5) |
| 1357 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | 1560 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); |
| 1358 | #endif | 1561 | #endif |
| 1562 | |||
| 1359 | local_irq_save(flags); | 1563 | local_irq_save(flags); |
| 1360 | disable_local_APIC(); | 1564 | disable_local_APIC(); |
| 1361 | local_irq_restore(flags); | 1565 | local_irq_restore(flags); |
| @@ -1374,13 +1578,24 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1374 | maxlvt = lapic_get_maxlvt(); | 1578 | maxlvt = lapic_get_maxlvt(); |
| 1375 | 1579 | ||
| 1376 | local_irq_save(flags); | 1580 | local_irq_save(flags); |
| 1377 | if (!x2apic) { | 1581 | |
| 1582 | #ifdef CONFIG_X86_64 | ||
| 1583 | if (x2apic) | ||
| 1584 | enable_x2apic(); | ||
| 1585 | else | ||
| 1586 | #endif | ||
| 1587 | { | ||
| 1588 | /* | ||
| 1589 | * Make sure the APICBASE points to the right address | ||
| 1590 | * | ||
| 1591 | * FIXME! This will be wrong if we ever support suspend on | ||
| 1592 | * SMP! We'll need to do this as part of the CPU restore! | ||
| 1593 | */ | ||
| 1378 | rdmsr(MSR_IA32_APICBASE, l, h); | 1594 | rdmsr(MSR_IA32_APICBASE, l, h); |
| 1379 | l &= ~MSR_IA32_APICBASE_BASE; | 1595 | l &= ~MSR_IA32_APICBASE_BASE; |
| 1380 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | 1596 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; |
| 1381 | wrmsr(MSR_IA32_APICBASE, l, h); | 1597 | wrmsr(MSR_IA32_APICBASE, l, h); |
| 1382 | } else | 1598 | } |
| 1383 | enable_x2apic(); | ||
| 1384 | 1599 | ||
| 1385 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | 1600 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); |
| 1386 | apic_write(APIC_ID, apic_pm_state.apic_id); | 1601 | apic_write(APIC_ID, apic_pm_state.apic_id); |
| @@ -1390,7 +1605,7 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1390 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | 1605 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); |
| 1391 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | 1606 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); |
| 1392 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | 1607 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); |
| 1393 | #ifdef CONFIG_X86_MCE_INTEL | 1608 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) |
| 1394 | if (maxlvt >= 5) | 1609 | if (maxlvt >= 5) |
| 1395 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | 1610 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); |
| 1396 | #endif | 1611 | #endif |
| @@ -1404,10 +1619,17 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1404 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | 1619 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); |
| 1405 | apic_write(APIC_ESR, 0); | 1620 | apic_write(APIC_ESR, 0); |
| 1406 | apic_read(APIC_ESR); | 1621 | apic_read(APIC_ESR); |
| 1622 | |||
| 1407 | local_irq_restore(flags); | 1623 | local_irq_restore(flags); |
| 1624 | |||
| 1408 | return 0; | 1625 | return 0; |
| 1409 | } | 1626 | } |
| 1410 | 1627 | ||
| 1628 | /* | ||
| 1629 | * This device has no shutdown method - fully functioning local APICs | ||
| 1630 | * are needed on every CPU up until machine_halt/restart/poweroff. | ||
| 1631 | */ | ||
| 1632 | |||
| 1411 | static struct sysdev_class lapic_sysclass = { | 1633 | static struct sysdev_class lapic_sysclass = { |
| 1412 | .name = "lapic", | 1634 | .name = "lapic", |
| 1413 | .resume = lapic_resume, | 1635 | .resume = lapic_resume, |
| @@ -1533,28 +1755,7 @@ early_param("nox2apic", setup_nox2apic); | |||
| 1533 | /* | 1755 | /* |
| 1534 | * APIC command line parameters | 1756 | * APIC command line parameters |
| 1535 | */ | 1757 | */ |
| 1536 | static int __init apic_set_verbosity(char *str) | 1758 | static int __init setup_disableapic(char *arg) |
| 1537 | { | ||
| 1538 | if (str == NULL) { | ||
| 1539 | skip_ioapic_setup = 0; | ||
| 1540 | ioapic_force = 1; | ||
| 1541 | return 0; | ||
| 1542 | } | ||
| 1543 | if (strcmp("debug", str) == 0) | ||
| 1544 | apic_verbosity = APIC_DEBUG; | ||
| 1545 | else if (strcmp("verbose", str) == 0) | ||
| 1546 | apic_verbosity = APIC_VERBOSE; | ||
| 1547 | else { | ||
| 1548 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
| 1549 | " use apic=verbose or apic=debug\n", str); | ||
| 1550 | return -EINVAL; | ||
| 1551 | } | ||
| 1552 | |||
| 1553 | return 0; | ||
| 1554 | } | ||
| 1555 | early_param("apic", apic_set_verbosity); | ||
| 1556 | |||
| 1557 | static __init int setup_disableapic(char *str) | ||
| 1558 | { | 1759 | { |
| 1559 | disable_apic = 1; | 1760 | disable_apic = 1; |
| 1560 | setup_clear_cpu_cap(X86_FEATURE_APIC); | 1761 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
| @@ -1563,9 +1764,9 @@ static __init int setup_disableapic(char *str) | |||
| 1563 | early_param("disableapic", setup_disableapic); | 1764 | early_param("disableapic", setup_disableapic); |
| 1564 | 1765 | ||
| 1565 | /* same as disableapic, for compatibility */ | 1766 | /* same as disableapic, for compatibility */ |
| 1566 | static __init int setup_nolapic(char *str) | 1767 | static int __init setup_nolapic(char *arg) |
| 1567 | { | 1768 | { |
| 1568 | return setup_disableapic(str); | 1769 | return setup_disableapic(arg); |
| 1569 | } | 1770 | } |
| 1570 | early_param("nolapic", setup_nolapic); | 1771 | early_param("nolapic", setup_nolapic); |
| 1571 | 1772 | ||
| @@ -1576,14 +1777,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) | |||
| 1576 | } | 1777 | } |
| 1577 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | 1778 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); |
| 1578 | 1779 | ||
| 1579 | static __init int setup_noapictimer(char *str) | 1780 | static int __init parse_disable_apic_timer(char *arg) |
| 1580 | { | 1781 | { |
| 1581 | if (str[0] != ' ' && str[0] != 0) | ||
| 1582 | return 0; | ||
| 1583 | disable_apic_timer = 1; | 1782 | disable_apic_timer = 1; |
| 1584 | return 1; | 1783 | return 0; |
| 1784 | } | ||
| 1785 | early_param("noapictimer", parse_disable_apic_timer); | ||
| 1786 | |||
| 1787 | static int __init parse_nolapic_timer(char *arg) | ||
| 1788 | { | ||
| 1789 | disable_apic_timer = 1; | ||
| 1790 | return 0; | ||
| 1585 | } | 1791 | } |
| 1586 | __setup("noapictimer", setup_noapictimer); | 1792 | early_param("nolapic_timer", parse_nolapic_timer); |
| 1587 | 1793 | ||
| 1588 | static __init int setup_apicpmtimer(char *s) | 1794 | static __init int setup_apicpmtimer(char *s) |
| 1589 | { | 1795 | { |
| @@ -1593,6 +1799,31 @@ static __init int setup_apicpmtimer(char *s) | |||
| 1593 | } | 1799 | } |
| 1594 | __setup("apicpmtimer", setup_apicpmtimer); | 1800 | __setup("apicpmtimer", setup_apicpmtimer); |
| 1595 | 1801 | ||
| 1802 | static int __init apic_set_verbosity(char *arg) | ||
| 1803 | { | ||
| 1804 | if (!arg) { | ||
| 1805 | #ifdef CONFIG_X86_64 | ||
| 1806 | skip_ioapic_setup = 0; | ||
| 1807 | ioapic_force = 1; | ||
| 1808 | return 0; | ||
| 1809 | #endif | ||
| 1810 | return -EINVAL; | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | if (strcmp("debug", arg) == 0) | ||
| 1814 | apic_verbosity = APIC_DEBUG; | ||
| 1815 | else if (strcmp("verbose", arg) == 0) | ||
| 1816 | apic_verbosity = APIC_VERBOSE; | ||
| 1817 | else { | ||
| 1818 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
| 1819 | " use apic=verbose or apic=debug\n", arg); | ||
| 1820 | return -EINVAL; | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | return 0; | ||
| 1824 | } | ||
| 1825 | early_param("apic", apic_set_verbosity); | ||
| 1826 | |||
| 1596 | static int __init lapic_insert_resource(void) | 1827 | static int __init lapic_insert_resource(void) |
| 1597 | { | 1828 | { |
| 1598 | if (!apic_phys) | 1829 | if (!apic_phys) |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b0..5145a6e72bbb 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -228,12 +228,12 @@ | |||
| 228 | #include <linux/suspend.h> | 228 | #include <linux/suspend.h> |
| 229 | #include <linux/kthread.h> | 229 | #include <linux/kthread.h> |
| 230 | #include <linux/jiffies.h> | 230 | #include <linux/jiffies.h> |
| 231 | #include <linux/smp_lock.h> | ||
| 232 | 231 | ||
| 233 | #include <asm/system.h> | 232 | #include <asm/system.h> |
| 234 | #include <asm/uaccess.h> | 233 | #include <asm/uaccess.h> |
| 235 | #include <asm/desc.h> | 234 | #include <asm/desc.h> |
| 236 | #include <asm/i8253.h> | 235 | #include <asm/i8253.h> |
| 236 | #include <asm/olpc.h> | ||
| 237 | #include <asm/paravirt.h> | 237 | #include <asm/paravirt.h> |
| 238 | #include <asm/reboot.h> | 238 | #include <asm/reboot.h> |
| 239 | 239 | ||
| @@ -2217,7 +2217,7 @@ static int __init apm_init(void) | |||
| 2217 | 2217 | ||
| 2218 | dmi_check_system(apm_dmi_table); | 2218 | dmi_check_system(apm_dmi_table); |
| 2219 | 2219 | ||
| 2220 | if (apm_info.bios.version == 0 || paravirt_enabled()) { | 2220 | if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { |
| 2221 | printk(KERN_INFO "apm: BIOS not found.\n"); | 2221 | printk(KERN_INFO "apm: BIOS not found.\n"); |
| 2222 | return -ENODEV; | 2222 | return -ENODEV; |
| 2223 | } | 2223 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index c639bd55391c..fdd585f9c53d 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
| @@ -25,11 +25,11 @@ x86_bios_strerror(long status) | |||
| 25 | { | 25 | { |
| 26 | const char *str; | 26 | const char *str; |
| 27 | switch (status) { | 27 | switch (status) { |
| 28 | case 0: str = "Call completed without error"; break; | 28 | case 0: str = "Call completed without error"; break; |
| 29 | case -1: str = "Not implemented"; break; | 29 | case -1: str = "Not implemented"; break; |
| 30 | case -2: str = "Invalid argument"; break; | 30 | case -2: str = "Invalid argument"; break; |
| 31 | case -3: str = "Call completed with error"; break; | 31 | case -3: str = "Call completed with error"; break; |
| 32 | default: str = "Unknown BIOS status code"; break; | 32 | default: str = "Unknown BIOS status code"; break; |
| 33 | } | 33 | } |
| 34 | return str; | 34 | return str; |
| 35 | } | 35 | } |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 403e689df0b8..7f0b45a5d788 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -3,15 +3,13 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
| 6 | obj-y += proc.o capflags.o powerflags.o | 6 | obj-y += proc.o capflags.o powerflags.o common.o |
| 7 | 7 | ||
| 8 | obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o | 8 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
| 9 | obj-$(CONFIG_X86_64) += common_64.o bugs_64.o | 9 | obj-$(CONFIG_X86_64) += bugs_64.o |
| 10 | 10 | ||
| 11 | obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o | 11 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
| 12 | obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o | 12 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
| 13 | obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o | ||
| 14 | obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o | ||
| 15 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o | 13 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o |
| 16 | obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o | 14 | obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o |
| 17 | obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o | 15 | obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d64ea6097ca7..32e73520adf7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -1,13 +1,22 @@ | |||
| 1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
| 2 | #include <linux/bitops.h> | 2 | #include <linux/bitops.h> |
| 3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
| 4 | |||
| 4 | #include <asm/io.h> | 5 | #include <asm/io.h> |
| 5 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
| 6 | #include <asm/apic.h> | 7 | #include <asm/apic.h> |
| 7 | 8 | ||
| 9 | #ifdef CONFIG_X86_64 | ||
| 10 | # include <asm/numa_64.h> | ||
| 11 | # include <asm/mmconfig.h> | ||
| 12 | # include <asm/cacheflush.h> | ||
| 13 | #endif | ||
| 14 | |||
| 8 | #include <mach_apic.h> | 15 | #include <mach_apic.h> |
| 16 | |||
| 9 | #include "cpu.h" | 17 | #include "cpu.h" |
| 10 | 18 | ||
| 19 | #ifdef CONFIG_X86_32 | ||
| 11 | /* | 20 | /* |
| 12 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause | 21 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause |
| 13 | * misexecution of code under Linux. Owners of such processors should | 22 | * misexecution of code under Linux. Owners of such processors should |
| @@ -24,26 +33,273 @@ | |||
| 24 | extern void vide(void); | 33 | extern void vide(void); |
| 25 | __asm__(".align 4\nvide: ret"); | 34 | __asm__(".align 4\nvide: ret"); |
| 26 | 35 | ||
| 27 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | 36 | static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) |
| 28 | { | 37 | { |
| 29 | if (cpuid_eax(0x80000000) >= 0x80000007) { | 38 | /* |
| 30 | c->x86_power = cpuid_edx(0x80000007); | 39 | * General Systems BIOSen alias the cpu frequency registers |
| 31 | if (c->x86_power & (1<<8)) | 40 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux |
| 32 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 41 | * drivers subsequently pokes it, and changes the CPU speed. |
| 42 | * Workaround : Remove the unneeded alias. | ||
| 43 | */ | ||
| 44 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ | ||
| 45 | #define CBAR_ENB (0x80000000) | ||
| 46 | #define CBAR_KEY (0X000000CB) | ||
| 47 | if (c->x86_model == 9 || c->x86_model == 10) { | ||
| 48 | if (inl (CBAR) & CBAR_ENB) | ||
| 49 | outl (0 | CBAR_KEY, CBAR); | ||
| 33 | } | 50 | } |
| 34 | |||
| 35 | /* Set MTRR capability flag if appropriate */ | ||
| 36 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
| 37 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
| 38 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
| 39 | } | 51 | } |
| 40 | 52 | ||
| 41 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 53 | |
| 54 | static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | ||
| 42 | { | 55 | { |
| 43 | u32 l, h; | 56 | u32 l, h; |
| 44 | int mbytes = num_physpages >> (20-PAGE_SHIFT); | 57 | int mbytes = num_physpages >> (20-PAGE_SHIFT); |
| 45 | int r; | ||
| 46 | 58 | ||
| 59 | if (c->x86_model < 6) { | ||
| 60 | /* Based on AMD doc 20734R - June 2000 */ | ||
| 61 | if (c->x86_model == 0) { | ||
| 62 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
| 63 | set_cpu_cap(c, X86_FEATURE_PGE); | ||
| 64 | } | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | |||
| 68 | if (c->x86_model == 6 && c->x86_mask == 1) { | ||
| 69 | const int K6_BUG_LOOP = 1000000; | ||
| 70 | int n; | ||
| 71 | void (*f_vide)(void); | ||
| 72 | unsigned long d, d2; | ||
| 73 | |||
| 74 | printk(KERN_INFO "AMD K6 stepping B detected - "); | ||
| 75 | |||
| 76 | /* | ||
| 77 | * It looks like AMD fixed the 2.6.2 bug and improved indirect | ||
| 78 | * calls at the same time. | ||
| 79 | */ | ||
| 80 | |||
| 81 | n = K6_BUG_LOOP; | ||
| 82 | f_vide = vide; | ||
| 83 | rdtscl(d); | ||
| 84 | while (n--) | ||
| 85 | f_vide(); | ||
| 86 | rdtscl(d2); | ||
| 87 | d = d2-d; | ||
| 88 | |||
| 89 | if (d > 20*K6_BUG_LOOP) | ||
| 90 | printk("system stability may be impaired when more than 32 MB are used.\n"); | ||
| 91 | else | ||
| 92 | printk("probably OK (after B9730xxxx).\n"); | ||
| 93 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | ||
| 94 | } | ||
| 95 | |||
| 96 | /* K6 with old style WHCR */ | ||
| 97 | if (c->x86_model < 8 || | ||
| 98 | (c->x86_model == 8 && c->x86_mask < 8)) { | ||
| 99 | /* We can only write allocate on the low 508Mb */ | ||
| 100 | if (mbytes > 508) | ||
| 101 | mbytes = 508; | ||
| 102 | |||
| 103 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 104 | if ((l&0x0000FFFF) == 0) { | ||
| 105 | unsigned long flags; | ||
| 106 | l = (1<<0)|((mbytes/4)<<1); | ||
| 107 | local_irq_save(flags); | ||
| 108 | wbinvd(); | ||
| 109 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 110 | local_irq_restore(flags); | ||
| 111 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", | ||
| 112 | mbytes); | ||
| 113 | } | ||
| 114 | return; | ||
| 115 | } | ||
| 116 | |||
| 117 | if ((c->x86_model == 8 && c->x86_mask > 7) || | ||
| 118 | c->x86_model == 9 || c->x86_model == 13) { | ||
| 119 | /* The more serious chips .. */ | ||
| 120 | |||
| 121 | if (mbytes > 4092) | ||
| 122 | mbytes = 4092; | ||
| 123 | |||
| 124 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 125 | if ((l&0xFFFF0000) == 0) { | ||
| 126 | unsigned long flags; | ||
| 127 | l = ((mbytes>>2)<<22)|(1<<16); | ||
| 128 | local_irq_save(flags); | ||
| 129 | wbinvd(); | ||
| 130 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 131 | local_irq_restore(flags); | ||
| 132 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", | ||
| 133 | mbytes); | ||
| 134 | } | ||
| 135 | |||
| 136 | return; | ||
| 137 | } | ||
| 138 | |||
| 139 | if (c->x86_model == 10) { | ||
| 140 | /* AMD Geode LX is model 10 */ | ||
| 141 | /* placeholder for any needed mods */ | ||
| 142 | return; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | ||
| 147 | { | ||
| 148 | u32 l, h; | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Bit 15 of Athlon specific MSR 15, needs to be 0 | ||
| 152 | * to enable SSE on Palomino/Morgan/Barton CPU's. | ||
| 153 | * If the BIOS didn't enable it already, enable it here. | ||
| 154 | */ | ||
| 155 | if (c->x86_model >= 6 && c->x86_model <= 10) { | ||
| 156 | if (!cpu_has(c, X86_FEATURE_XMM)) { | ||
| 157 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | ||
| 158 | rdmsr(MSR_K7_HWCR, l, h); | ||
| 159 | l &= ~0x00008000; | ||
| 160 | wrmsr(MSR_K7_HWCR, l, h); | ||
| 161 | set_cpu_cap(c, X86_FEATURE_XMM); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * It's been determined by AMD that Athlons since model 8 stepping 1 | ||
| 167 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx | ||
| 168 | * As per AMD technical note 27212 0.2 | ||
| 169 | */ | ||
| 170 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | ||
| 171 | rdmsr(MSR_K7_CLK_CTL, l, h); | ||
| 172 | if ((l & 0xfff00000) != 0x20000000) { | ||
| 173 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | ||
| 174 | ((l & 0x000fffff)|0x20000000)); | ||
| 175 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | set_cpu_cap(c, X86_FEATURE_K7); | ||
| 180 | } | ||
| 181 | #endif | ||
| 182 | |||
| 183 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 184 | static int __cpuinit nearby_node(int apicid) | ||
| 185 | { | ||
| 186 | int i, node; | ||
| 187 | |||
| 188 | for (i = apicid - 1; i >= 0; i--) { | ||
| 189 | node = apicid_to_node[i]; | ||
| 190 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 191 | return node; | ||
| 192 | } | ||
| 193 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | ||
| 194 | node = apicid_to_node[i]; | ||
| 195 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 196 | return node; | ||
| 197 | } | ||
| 198 | return first_node(node_online_map); /* Shouldn't happen */ | ||
| 199 | } | ||
| 200 | #endif | ||
| 201 | |||
| 202 | /* | ||
| 203 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | ||
| 204 | * Assumes number of cores is a power of two. | ||
| 205 | */ | ||
| 206 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | ||
| 207 | { | ||
| 208 | #ifdef CONFIG_X86_HT | ||
| 209 | unsigned bits; | ||
| 210 | |||
| 211 | bits = c->x86_coreid_bits; | ||
| 212 | |||
| 213 | /* Low order bits define the core id (index of core in socket) */ | ||
| 214 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | ||
| 215 | /* Convert the initial APIC ID into the socket ID */ | ||
| 216 | c->phys_proc_id = c->initial_apicid >> bits; | ||
| 217 | #endif | ||
| 218 | } | ||
| 219 | |||
| 220 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | ||
| 221 | { | ||
| 222 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 223 | int cpu = smp_processor_id(); | ||
| 224 | int node; | ||
| 225 | unsigned apicid = hard_smp_processor_id(); | ||
| 226 | |||
| 227 | node = c->phys_proc_id; | ||
| 228 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
| 229 | node = apicid_to_node[apicid]; | ||
| 230 | if (!node_online(node)) { | ||
| 231 | /* Two possibilities here: | ||
| 232 | - The CPU is missing memory and no node was created. | ||
| 233 | In that case try picking one from a nearby CPU | ||
| 234 | - The APIC IDs differ from the HyperTransport node IDs | ||
| 235 | which the K8 northbridge parsing fills in. | ||
| 236 | Assume they are all increased by a constant offset, | ||
| 237 | but in the same order as the HT nodeids. | ||
| 238 | If that doesn't result in a usable node fall back to the | ||
| 239 | path for the previous case. */ | ||
| 240 | |||
| 241 | int ht_nodeid = c->initial_apicid; | ||
| 242 | |||
| 243 | if (ht_nodeid >= 0 && | ||
| 244 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
| 245 | node = apicid_to_node[ht_nodeid]; | ||
| 246 | /* Pick a nearby node */ | ||
| 247 | if (!node_online(node)) | ||
| 248 | node = nearby_node(apicid); | ||
| 249 | } | ||
| 250 | numa_set_node(cpu, node); | ||
| 251 | |||
| 252 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
| 253 | #endif | ||
| 254 | } | ||
| 255 | |||
| 256 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
| 257 | { | ||
| 258 | #ifdef CONFIG_X86_HT | ||
| 259 | unsigned bits, ecx; | ||
| 260 | |||
| 261 | /* Multi core CPU? */ | ||
| 262 | if (c->extended_cpuid_level < 0x80000008) | ||
| 263 | return; | ||
| 264 | |||
| 265 | ecx = cpuid_ecx(0x80000008); | ||
| 266 | |||
| 267 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
| 268 | |||
| 269 | /* CPU telling us the core id bits shift? */ | ||
| 270 | bits = (ecx >> 12) & 0xF; | ||
| 271 | |||
| 272 | /* Otherwise recompute */ | ||
| 273 | if (bits == 0) { | ||
| 274 | while ((1 << bits) < c->x86_max_cores) | ||
| 275 | bits++; | ||
| 276 | } | ||
| 277 | |||
| 278 | c->x86_coreid_bits = bits; | ||
| 279 | #endif | ||
| 280 | } | ||
| 281 | |||
| 282 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
| 283 | { | ||
| 284 | early_init_amd_mc(c); | ||
| 285 | |||
| 286 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
| 287 | if (c->x86_power & (1<<8)) | ||
| 288 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 289 | |||
| 290 | #ifdef CONFIG_X86_64 | ||
| 291 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
| 292 | #else | ||
| 293 | /* Set MTRR capability flag if appropriate */ | ||
| 294 | if (c->x86 == 5) | ||
| 295 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
| 296 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
| 297 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
| 298 | #endif | ||
| 299 | } | ||
| 300 | |||
| 301 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | ||
| 302 | { | ||
| 47 | #ifdef CONFIG_SMP | 303 | #ifdef CONFIG_SMP |
| 48 | unsigned long long value; | 304 | unsigned long long value; |
| 49 | 305 | ||
| @@ -54,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 54 | * Errata 63 for SH-B3 steppings | 310 | * Errata 63 for SH-B3 steppings |
| 55 | * Errata 122 for all steppings (F+ have it disabled by default) | 311 | * Errata 122 for all steppings (F+ have it disabled by default) |
| 56 | */ | 312 | */ |
| 57 | if (c->x86 == 15) { | 313 | if (c->x86 == 0xf) { |
| 58 | rdmsrl(MSR_K7_HWCR, value); | 314 | rdmsrl(MSR_K7_HWCR, value); |
| 59 | value |= 1 << 6; | 315 | value |= 1 << 6; |
| 60 | wrmsrl(MSR_K7_HWCR, value); | 316 | wrmsrl(MSR_K7_HWCR, value); |
| @@ -64,209 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 64 | early_init_amd(c); | 320 | early_init_amd(c); |
| 65 | 321 | ||
| 66 | /* | 322 | /* |
| 67 | * FIXME: We should handle the K5 here. Set up the write | ||
| 68 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | ||
| 69 | * no bus pipeline) | ||
| 70 | */ | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Bit 31 in normal CPUID used for nonstandard 3DNow ID; | 323 | * Bit 31 in normal CPUID used for nonstandard 3DNow ID; |
| 74 | * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway | 324 | * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway |
| 75 | */ | 325 | */ |
| 76 | clear_cpu_cap(c, 0*32+31); | 326 | clear_cpu_cap(c, 0*32+31); |
| 77 | 327 | ||
| 78 | r = get_model_name(c); | 328 | #ifdef CONFIG_X86_64 |
| 329 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | ||
| 330 | if (c->x86 == 0xf) { | ||
| 331 | u32 level; | ||
| 79 | 332 | ||
| 80 | switch (c->x86) { | 333 | level = cpuid_eax(1); |
| 81 | case 4: | 334 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) |
| 82 | /* | 335 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
| 83 | * General Systems BIOSen alias the cpu frequency registers | ||
| 84 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux | ||
| 85 | * drivers subsequently pokes it, and changes the CPU speed. | ||
| 86 | * Workaround : Remove the unneeded alias. | ||
| 87 | */ | ||
| 88 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ | ||
| 89 | #define CBAR_ENB (0x80000000) | ||
| 90 | #define CBAR_KEY (0X000000CB) | ||
| 91 | if (c->x86_model == 9 || c->x86_model == 10) { | ||
| 92 | if (inl (CBAR) & CBAR_ENB) | ||
| 93 | outl (0 | CBAR_KEY, CBAR); | ||
| 94 | } | ||
| 95 | break; | ||
| 96 | case 5: | ||
| 97 | if (c->x86_model < 6) { | ||
| 98 | /* Based on AMD doc 20734R - June 2000 */ | ||
| 99 | if (c->x86_model == 0) { | ||
| 100 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
| 101 | set_cpu_cap(c, X86_FEATURE_PGE); | ||
| 102 | } | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | |||
| 106 | if (c->x86_model == 6 && c->x86_mask == 1) { | ||
| 107 | const int K6_BUG_LOOP = 1000000; | ||
| 108 | int n; | ||
| 109 | void (*f_vide)(void); | ||
| 110 | unsigned long d, d2; | ||
| 111 | |||
| 112 | printk(KERN_INFO "AMD K6 stepping B detected - "); | ||
| 113 | |||
| 114 | /* | ||
| 115 | * It looks like AMD fixed the 2.6.2 bug and improved indirect | ||
| 116 | * calls at the same time. | ||
| 117 | */ | ||
| 118 | |||
| 119 | n = K6_BUG_LOOP; | ||
| 120 | f_vide = vide; | ||
| 121 | rdtscl(d); | ||
| 122 | while (n--) | ||
| 123 | f_vide(); | ||
| 124 | rdtscl(d2); | ||
| 125 | d = d2-d; | ||
| 126 | |||
| 127 | if (d > 20*K6_BUG_LOOP) | ||
| 128 | printk("system stability may be impaired when more than 32 MB are used.\n"); | ||
| 129 | else | ||
| 130 | printk("probably OK (after B9730xxxx).\n"); | ||
| 131 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | ||
| 132 | } | ||
| 133 | |||
| 134 | /* K6 with old style WHCR */ | ||
| 135 | if (c->x86_model < 8 || | ||
| 136 | (c->x86_model == 8 && c->x86_mask < 8)) { | ||
| 137 | /* We can only write allocate on the low 508Mb */ | ||
| 138 | if (mbytes > 508) | ||
| 139 | mbytes = 508; | ||
| 140 | |||
| 141 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 142 | if ((l&0x0000FFFF) == 0) { | ||
| 143 | unsigned long flags; | ||
| 144 | l = (1<<0)|((mbytes/4)<<1); | ||
| 145 | local_irq_save(flags); | ||
| 146 | wbinvd(); | ||
| 147 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 148 | local_irq_restore(flags); | ||
| 149 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", | ||
| 150 | mbytes); | ||
| 151 | } | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | |||
| 155 | if ((c->x86_model == 8 && c->x86_mask > 7) || | ||
| 156 | c->x86_model == 9 || c->x86_model == 13) { | ||
| 157 | /* The more serious chips .. */ | ||
| 158 | |||
| 159 | if (mbytes > 4092) | ||
| 160 | mbytes = 4092; | ||
| 161 | |||
| 162 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 163 | if ((l&0xFFFF0000) == 0) { | ||
| 164 | unsigned long flags; | ||
| 165 | l = ((mbytes>>2)<<22)|(1<<16); | ||
| 166 | local_irq_save(flags); | ||
| 167 | wbinvd(); | ||
| 168 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 169 | local_irq_restore(flags); | ||
| 170 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", | ||
| 171 | mbytes); | ||
| 172 | } | ||
| 173 | |||
| 174 | break; | ||
| 175 | } | ||
| 176 | |||
| 177 | if (c->x86_model == 10) { | ||
| 178 | /* AMD Geode LX is model 10 */ | ||
| 179 | /* placeholder for any needed mods */ | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | break; | ||
| 183 | case 6: /* An Athlon/Duron */ | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Bit 15 of Athlon specific MSR 15, needs to be 0 | ||
| 187 | * to enable SSE on Palomino/Morgan/Barton CPU's. | ||
| 188 | * If the BIOS didn't enable it already, enable it here. | ||
| 189 | */ | ||
| 190 | if (c->x86_model >= 6 && c->x86_model <= 10) { | ||
| 191 | if (!cpu_has(c, X86_FEATURE_XMM)) { | ||
| 192 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | ||
| 193 | rdmsr(MSR_K7_HWCR, l, h); | ||
| 194 | l &= ~0x00008000; | ||
| 195 | wrmsr(MSR_K7_HWCR, l, h); | ||
| 196 | set_cpu_cap(c, X86_FEATURE_XMM); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | /* | ||
| 201 | * It's been determined by AMD that Athlons since model 8 stepping 1 | ||
| 202 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx | ||
| 203 | * As per AMD technical note 27212 0.2 | ||
| 204 | */ | ||
| 205 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | ||
| 206 | rdmsr(MSR_K7_CLK_CTL, l, h); | ||
| 207 | if ((l & 0xfff00000) != 0x20000000) { | ||
| 208 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | ||
| 209 | ((l & 0x000fffff)|0x20000000)); | ||
| 210 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | ||
| 211 | } | ||
| 212 | } | ||
| 213 | break; | ||
| 214 | } | 336 | } |
| 337 | if (c->x86 == 0x10 || c->x86 == 0x11) | ||
| 338 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 339 | #else | ||
| 340 | |||
| 341 | /* | ||
| 342 | * FIXME: We should handle the K5 here. Set up the write | ||
| 343 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | ||
| 344 | * no bus pipeline) | ||
| 345 | */ | ||
| 215 | 346 | ||
| 216 | switch (c->x86) { | 347 | switch (c->x86) { |
| 217 | case 15: | 348 | case 4: |
| 218 | /* Use K8 tuning for Fam10h and Fam11h */ | 349 | init_amd_k5(c); |
| 219 | case 0x10: | ||
| 220 | case 0x11: | ||
| 221 | set_cpu_cap(c, X86_FEATURE_K8); | ||
| 222 | break; | 350 | break; |
| 223 | case 6: | 351 | case 5: |
| 224 | set_cpu_cap(c, X86_FEATURE_K7); | 352 | init_amd_k6(c); |
| 353 | break; | ||
| 354 | case 6: /* An Athlon/Duron */ | ||
| 355 | init_amd_k7(c); | ||
| 225 | break; | 356 | break; |
| 226 | } | 357 | } |
| 358 | |||
| 359 | /* K6s reports MCEs but don't actually have all the MSRs */ | ||
| 360 | if (c->x86 < 6) | ||
| 361 | clear_cpu_cap(c, X86_FEATURE_MCE); | ||
| 362 | #endif | ||
| 363 | |||
| 364 | /* Enable workaround for FXSAVE leak */ | ||
| 227 | if (c->x86 >= 6) | 365 | if (c->x86 >= 6) |
| 228 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | 366 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); |
| 229 | 367 | ||
| 230 | display_cacheinfo(c); | 368 | if (!c->x86_model_id[0]) { |
| 231 | 369 | switch (c->x86) { | |
| 232 | if (cpuid_eax(0x80000000) >= 0x80000008) | 370 | case 0xf: |
| 233 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 371 | /* Should distinguish Models here, but this is only |
| 372 | a fallback anyways. */ | ||
| 373 | strcpy(c->x86_model_id, "Hammer"); | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | } | ||
| 234 | 377 | ||
| 235 | #ifdef CONFIG_X86_HT | 378 | display_cacheinfo(c); |
| 236 | /* | ||
| 237 | * On a AMD multi core setup the lower bits of the APIC id | ||
| 238 | * distinguish the cores. | ||
| 239 | */ | ||
| 240 | if (c->x86_max_cores > 1) { | ||
| 241 | int cpu = smp_processor_id(); | ||
| 242 | unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; | ||
| 243 | 379 | ||
| 244 | if (bits == 0) { | 380 | /* Multi core CPU? */ |
| 245 | while ((1 << bits) < c->x86_max_cores) | 381 | if (c->extended_cpuid_level >= 0x80000008) { |
| 246 | bits++; | 382 | amd_detect_cmp(c); |
| 247 | } | 383 | srat_detect_node(c); |
| 248 | c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1); | ||
| 249 | c->phys_proc_id >>= bits; | ||
| 250 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", | ||
| 251 | cpu, c->x86_max_cores, c->cpu_core_id); | ||
| 252 | } | 384 | } |
| 385 | |||
| 386 | #ifdef CONFIG_X86_32 | ||
| 387 | detect_ht(c); | ||
| 253 | #endif | 388 | #endif |
| 254 | 389 | ||
| 255 | if (cpuid_eax(0x80000000) >= 0x80000006) { | 390 | if (c->extended_cpuid_level >= 0x80000006) { |
| 256 | if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) | 391 | if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) |
| 257 | num_cache_leaves = 4; | 392 | num_cache_leaves = 4; |
| 258 | else | 393 | else |
| 259 | num_cache_leaves = 3; | 394 | num_cache_leaves = 3; |
| 260 | } | 395 | } |
| 261 | 396 | ||
| 262 | /* K6s reports MCEs but don't actually have all the MSRs */ | 397 | if (c->x86 >= 0xf && c->x86 <= 0x11) |
| 263 | if (c->x86 < 6) | 398 | set_cpu_cap(c, X86_FEATURE_K8); |
| 264 | clear_cpu_cap(c, X86_FEATURE_MCE); | ||
| 265 | 399 | ||
| 266 | if (cpu_has_xmm2) | 400 | if (cpu_has_xmm2) { |
| 401 | /* MFENCE stops RDTSC speculation */ | ||
| 267 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | 402 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); |
| 403 | } | ||
| 404 | |||
| 405 | #ifdef CONFIG_X86_64 | ||
| 406 | if (c->x86 == 0x10) { | ||
| 407 | /* do this for boot cpu */ | ||
| 408 | if (c == &boot_cpu_data) | ||
| 409 | check_enable_amd_mmconf_dmi(); | ||
| 410 | |||
| 411 | fam10h_check_enable_mmcfg(); | ||
| 412 | } | ||
| 413 | |||
| 414 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | ||
| 415 | unsigned long long tseg; | ||
| 416 | |||
| 417 | /* | ||
| 418 | * Split up direct mapping around the TSEG SMM area. | ||
| 419 | * Don't do it for gbpages because there seems very little | ||
| 420 | * benefit in doing so. | ||
| 421 | */ | ||
| 422 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | ||
| 423 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | ||
| 424 | if ((tseg>>PMD_SHIFT) < | ||
| 425 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
| 426 | ((tseg>>PMD_SHIFT) < | ||
| 427 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
| 428 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
| 429 | set_memory_4k((unsigned long)__va(tseg), 1); | ||
| 430 | } | ||
| 431 | } | ||
| 432 | #endif | ||
| 268 | } | 433 | } |
| 269 | 434 | ||
| 435 | #ifdef CONFIG_X86_32 | ||
| 270 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 436 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
| 271 | { | 437 | { |
| 272 | /* AMD errata T13 (order #21922) */ | 438 | /* AMD errata T13 (order #21922) */ |
| @@ -279,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int | |||
| 279 | } | 445 | } |
| 280 | return size; | 446 | return size; |
| 281 | } | 447 | } |
| 448 | #endif | ||
| 282 | 449 | ||
| 283 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { | 450 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { |
| 284 | .c_vendor = "AMD", | 451 | .c_vendor = "AMD", |
| 285 | .c_ident = { "AuthenticAMD" }, | 452 | .c_ident = { "AuthenticAMD" }, |
| 453 | #ifdef CONFIG_X86_32 | ||
| 286 | .c_models = { | 454 | .c_models = { |
| 287 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = | 455 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = |
| 288 | { | 456 | { |
| @@ -295,9 +463,10 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { | |||
| 295 | } | 463 | } |
| 296 | }, | 464 | }, |
| 297 | }, | 465 | }, |
| 466 | .c_size_cache = amd_size_cache, | ||
| 467 | #endif | ||
| 298 | .c_early_init = early_init_amd, | 468 | .c_early_init = early_init_amd, |
| 299 | .c_init = init_amd, | 469 | .c_init = init_amd, |
| 300 | .c_size_cache = amd_size_cache, | ||
| 301 | .c_x86_vendor = X86_VENDOR_AMD, | 470 | .c_x86_vendor = X86_VENDOR_AMD, |
| 302 | }; | 471 | }; |
| 303 | 472 | ||
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index d1c721c0c49f..000000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null | |||
| @@ -1,224 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/mm.h> | ||
| 3 | |||
| 4 | #include <asm/numa_64.h> | ||
| 5 | #include <asm/mmconfig.h> | ||
| 6 | #include <asm/cacheflush.h> | ||
| 7 | |||
| 8 | #include <mach_apic.h> | ||
| 9 | |||
| 10 | #include "cpu.h" | ||
| 11 | |||
| 12 | int force_mwait __cpuinitdata; | ||
| 13 | |||
| 14 | #ifdef CONFIG_NUMA | ||
| 15 | static int __cpuinit nearby_node(int apicid) | ||
| 16 | { | ||
| 17 | int i, node; | ||
| 18 | |||
| 19 | for (i = apicid - 1; i >= 0; i--) { | ||
| 20 | node = apicid_to_node[i]; | ||
| 21 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 22 | return node; | ||
| 23 | } | ||
| 24 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | ||
| 25 | node = apicid_to_node[i]; | ||
| 26 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 27 | return node; | ||
| 28 | } | ||
| 29 | return first_node(node_online_map); /* Shouldn't happen */ | ||
| 30 | } | ||
| 31 | #endif | ||
| 32 | |||
| 33 | /* | ||
| 34 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | ||
| 35 | * Assumes number of cores is a power of two. | ||
| 36 | */ | ||
| 37 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | ||
| 38 | { | ||
| 39 | #ifdef CONFIG_SMP | ||
| 40 | unsigned bits; | ||
| 41 | #ifdef CONFIG_NUMA | ||
| 42 | int cpu = smp_processor_id(); | ||
| 43 | int node = 0; | ||
| 44 | unsigned apicid = hard_smp_processor_id(); | ||
| 45 | #endif | ||
| 46 | bits = c->x86_coreid_bits; | ||
| 47 | |||
| 48 | /* Low order bits define the core id (index of core in socket) */ | ||
| 49 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | ||
| 50 | /* Convert the initial APIC ID into the socket ID */ | ||
| 51 | c->phys_proc_id = c->initial_apicid >> bits; | ||
| 52 | |||
| 53 | #ifdef CONFIG_NUMA | ||
| 54 | node = c->phys_proc_id; | ||
| 55 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
| 56 | node = apicid_to_node[apicid]; | ||
| 57 | if (!node_online(node)) { | ||
| 58 | /* Two possibilities here: | ||
| 59 | - The CPU is missing memory and no node was created. | ||
| 60 | In that case try picking one from a nearby CPU | ||
| 61 | - The APIC IDs differ from the HyperTransport node IDs | ||
| 62 | which the K8 northbridge parsing fills in. | ||
| 63 | Assume they are all increased by a constant offset, | ||
| 64 | but in the same order as the HT nodeids. | ||
| 65 | If that doesn't result in a usable node fall back to the | ||
| 66 | path for the previous case. */ | ||
| 67 | |||
| 68 | int ht_nodeid = c->initial_apicid; | ||
| 69 | |||
| 70 | if (ht_nodeid >= 0 && | ||
| 71 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
| 72 | node = apicid_to_node[ht_nodeid]; | ||
| 73 | /* Pick a nearby node */ | ||
| 74 | if (!node_online(node)) | ||
| 75 | node = nearby_node(apicid); | ||
| 76 | } | ||
| 77 | numa_set_node(cpu, node); | ||
| 78 | |||
| 79 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
| 80 | #endif | ||
| 81 | #endif | ||
| 82 | } | ||
| 83 | |||
| 84 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
| 85 | { | ||
| 86 | #ifdef CONFIG_SMP | ||
| 87 | unsigned bits, ecx; | ||
| 88 | |||
| 89 | /* Multi core CPU? */ | ||
| 90 | if (c->extended_cpuid_level < 0x80000008) | ||
| 91 | return; | ||
| 92 | |||
| 93 | ecx = cpuid_ecx(0x80000008); | ||
| 94 | |||
| 95 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
| 96 | |||
| 97 | /* CPU telling us the core id bits shift? */ | ||
| 98 | bits = (ecx >> 12) & 0xF; | ||
| 99 | |||
| 100 | /* Otherwise recompute */ | ||
| 101 | if (bits == 0) { | ||
| 102 | while ((1 << bits) < c->x86_max_cores) | ||
| 103 | bits++; | ||
| 104 | } | ||
| 105 | |||
| 106 | c->x86_coreid_bits = bits; | ||
| 107 | |||
| 108 | #endif | ||
| 109 | } | ||
| 110 | |||
| 111 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
| 112 | { | ||
| 113 | early_init_amd_mc(c); | ||
| 114 | |||
| 115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
| 116 | if (c->x86_power & (1<<8)) | ||
| 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 118 | |||
| 119 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
| 120 | } | ||
| 121 | |||
| 122 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | ||
| 123 | { | ||
| 124 | unsigned level; | ||
| 125 | |||
| 126 | #ifdef CONFIG_SMP | ||
| 127 | unsigned long value; | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | ||
| 131 | * bit 6 of msr C001_0015 | ||
| 132 | * | ||
| 133 | * Errata 63 for SH-B3 steppings | ||
| 134 | * Errata 122 for all steppings (F+ have it disabled by default) | ||
| 135 | */ | ||
| 136 | if (c->x86 == 0xf) { | ||
| 137 | rdmsrl(MSR_K8_HWCR, value); | ||
| 138 | value |= 1 << 6; | ||
| 139 | wrmsrl(MSR_K8_HWCR, value); | ||
| 140 | } | ||
| 141 | #endif | ||
| 142 | |||
| 143 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
| 144 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
| 145 | clear_cpu_cap(c, 0*32+31); | ||
| 146 | |||
| 147 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | ||
| 148 | if (c->x86 == 0xf) { | ||
| 149 | level = cpuid_eax(1); | ||
| 150 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | ||
| 151 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 152 | } | ||
| 153 | if (c->x86 == 0x10 || c->x86 == 0x11) | ||
| 154 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 155 | |||
| 156 | /* Enable workaround for FXSAVE leak */ | ||
| 157 | if (c->x86 >= 6) | ||
| 158 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | ||
| 159 | |||
| 160 | level = get_model_name(c); | ||
| 161 | if (!level) { | ||
| 162 | switch (c->x86) { | ||
| 163 | case 0xf: | ||
| 164 | /* Should distinguish Models here, but this is only | ||
| 165 | a fallback anyways. */ | ||
| 166 | strcpy(c->x86_model_id, "Hammer"); | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | display_cacheinfo(c); | ||
| 171 | |||
| 172 | /* Multi core CPU? */ | ||
| 173 | if (c->extended_cpuid_level >= 0x80000008) | ||
| 174 | amd_detect_cmp(c); | ||
| 175 | |||
| 176 | if (c->extended_cpuid_level >= 0x80000006 && | ||
| 177 | (cpuid_edx(0x80000006) & 0xf000)) | ||
| 178 | num_cache_leaves = 4; | ||
| 179 | else | ||
| 180 | num_cache_leaves = 3; | ||
| 181 | |||
| 182 | if (c->x86 >= 0xf && c->x86 <= 0x11) | ||
| 183 | set_cpu_cap(c, X86_FEATURE_K8); | ||
| 184 | |||
| 185 | /* MFENCE stops RDTSC speculation */ | ||
| 186 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | ||
| 187 | |||
| 188 | if (c->x86 == 0x10) { | ||
| 189 | /* do this for boot cpu */ | ||
| 190 | if (c == &boot_cpu_data) | ||
| 191 | check_enable_amd_mmconf_dmi(); | ||
| 192 | |||
| 193 | fam10h_check_enable_mmcfg(); | ||
| 194 | } | ||
| 195 | |||
| 196 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | ||
| 197 | unsigned long long tseg; | ||
| 198 | |||
| 199 | /* | ||
| 200 | * Split up direct mapping around the TSEG SMM area. | ||
| 201 | * Don't do it for gbpages because there seems very little | ||
| 202 | * benefit in doing so. | ||
| 203 | */ | ||
| 204 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | ||
| 205 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | ||
| 206 | if ((tseg>>PMD_SHIFT) < | ||
| 207 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
| 208 | ((tseg>>PMD_SHIFT) < | ||
| 209 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
| 210 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
| 211 | set_memory_4k((unsigned long)__va(tseg), 1); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | } | ||
| 215 | |||
| 216 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { | ||
| 217 | .c_vendor = "AMD", | ||
| 218 | .c_ident = { "AuthenticAMD" }, | ||
| 219 | .c_early_init = early_init_amd, | ||
| 220 | .c_init = init_amd, | ||
| 221 | .c_x86_vendor = X86_VENDOR_AMD, | ||
| 222 | }; | ||
| 223 | |||
| 224 | cpu_dev_register(amd_cpu_dev); | ||
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5f6d89521bf..89bfdd9cacc6 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
| @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
| 289 | if (c->x86_model >= 6 && c->x86_model < 9) | 289 | if (c->x86_model >= 6 && c->x86_model < 9) |
| 290 | set_cpu_cap(c, X86_FEATURE_3DNOW); | 290 | set_cpu_cap(c, X86_FEATURE_3DNOW); |
| 291 | 291 | ||
| 292 | get_model_name(c); | ||
| 293 | display_cacheinfo(c); | 292 | display_cacheinfo(c); |
| 294 | } | 293 | } |
| 295 | 294 | ||
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 49cfc6d2f2fb..a1625f5a1e78 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c | |||
| @@ -16,9 +16,10 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | |||
| 16 | 16 | ||
| 17 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 17 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
| 18 | { | 18 | { |
| 19 | early_init_centaur(c); | ||
| 20 | |||
| 19 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { | 21 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { |
| 20 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 22 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
| 21 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 22 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 23 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
| 23 | } | 24 | } |
| 24 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 25 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d5a07f0fd24..7581b62df184 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1,29 +1,61 @@ | |||
| 1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
| 2 | #include <linux/kernel.h> | ||
| 3 | #include <linux/sched.h> | ||
| 2 | #include <linux/string.h> | 4 | #include <linux/string.h> |
| 5 | #include <linux/bootmem.h> | ||
| 6 | #include <linux/bitops.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | #include <linux/kgdb.h> | ||
| 9 | #include <linux/topology.h> | ||
| 3 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
| 4 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
| 5 | #include <linux/module.h> | ||
| 6 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
| 7 | #include <linux/bootmem.h> | ||
| 8 | #include <asm/processor.h> | ||
| 9 | #include <asm/i387.h> | 13 | #include <asm/i387.h> |
| 10 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
| 11 | #include <asm/io.h> | 15 | #include <asm/io.h> |
| 16 | #include <asm/linkage.h> | ||
| 12 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
| 13 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
| 14 | #include <asm/mce.h> | 19 | #include <asm/mce.h> |
| 15 | #include <asm/pat.h> | 20 | #include <asm/pat.h> |
| 16 | #include <asm/asm.h> | 21 | #include <asm/asm.h> |
| 22 | #include <asm/numa.h> | ||
| 17 | #ifdef CONFIG_X86_LOCAL_APIC | 23 | #ifdef CONFIG_X86_LOCAL_APIC |
| 18 | #include <asm/mpspec.h> | 24 | #include <asm/mpspec.h> |
| 19 | #include <asm/apic.h> | 25 | #include <asm/apic.h> |
| 20 | #include <mach_apic.h> | 26 | #include <mach_apic.h> |
| 27 | #include <asm/genapic.h> | ||
| 21 | #endif | 28 | #endif |
| 22 | 29 | ||
| 30 | #include <asm/pda.h> | ||
| 31 | #include <asm/pgtable.h> | ||
| 32 | #include <asm/processor.h> | ||
| 33 | #include <asm/desc.h> | ||
| 34 | #include <asm/atomic.h> | ||
| 35 | #include <asm/proto.h> | ||
| 36 | #include <asm/sections.h> | ||
| 37 | #include <asm/setup.h> | ||
| 38 | |||
| 23 | #include "cpu.h" | 39 | #include "cpu.h" |
| 24 | 40 | ||
| 25 | static struct cpu_dev *this_cpu __cpuinitdata; | 41 | static struct cpu_dev *this_cpu __cpuinitdata; |
| 26 | 42 | ||
| 43 | #ifdef CONFIG_X86_64 | ||
| 44 | /* We need valid kernel segments for data and code in long mode too | ||
| 45 | * IRET will check the segment types kkeil 2000/10/28 | ||
| 46 | * Also sysret mandates a special GDT layout | ||
| 47 | */ | ||
| 48 | /* The TLS descriptors are currently at a different place compared to i386. | ||
| 49 | Hopefully nobody expects them at a fixed place (Wine?) */ | ||
| 50 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | ||
| 51 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | ||
| 52 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | ||
| 53 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | ||
| 54 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | ||
| 55 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | ||
| 56 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | ||
| 57 | } }; | ||
| 58 | #else | ||
| 27 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | 59 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { |
| 28 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | 60 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, |
| 29 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 61 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
| @@ -58,8 +90,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
| 58 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 90 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, |
| 59 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, | 91 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, |
| 60 | } }; | 92 | } }; |
| 93 | #endif | ||
| 61 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 94 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
| 62 | 95 | ||
| 96 | #ifdef CONFIG_X86_32 | ||
| 63 | static int cachesize_override __cpuinitdata = -1; | 97 | static int cachesize_override __cpuinitdata = -1; |
| 64 | static int disable_x86_serial_nr __cpuinitdata = 1; | 98 | static int disable_x86_serial_nr __cpuinitdata = 1; |
| 65 | 99 | ||
| @@ -70,34 +104,6 @@ static int __init cachesize_setup(char *str) | |||
| 70 | } | 104 | } |
| 71 | __setup("cachesize=", cachesize_setup); | 105 | __setup("cachesize=", cachesize_setup); |
| 72 | 106 | ||
| 73 | /* | ||
| 74 | * Naming convention should be: <Name> [(<Codename>)] | ||
| 75 | * This table only is used unless init_<vendor>() below doesn't set it; | ||
| 76 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | ||
| 77 | * | ||
| 78 | */ | ||
| 79 | |||
| 80 | /* Look up CPU names by table lookup. */ | ||
| 81 | static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | ||
| 82 | { | ||
| 83 | struct cpu_model_info *info; | ||
| 84 | |||
| 85 | if (c->x86_model >= 16) | ||
| 86 | return NULL; /* Range check */ | ||
| 87 | |||
| 88 | if (!this_cpu) | ||
| 89 | return NULL; | ||
| 90 | |||
| 91 | info = this_cpu->c_models; | ||
| 92 | |||
| 93 | while (info && info->family) { | ||
| 94 | if (info->family == c->x86) | ||
| 95 | return info->model_names[c->x86_model]; | ||
| 96 | info++; | ||
| 97 | } | ||
| 98 | return NULL; /* Not found */ | ||
| 99 | } | ||
| 100 | |||
| 101 | static int __init x86_fxsr_setup(char *s) | 107 | static int __init x86_fxsr_setup(char *s) |
| 102 | { | 108 | { |
| 103 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | 109 | setup_clear_cpu_cap(X86_FEATURE_FXSR); |
| @@ -162,6 +168,48 @@ static int __init x86_serial_nr_setup(char *s) | |||
| 162 | return 1; | 168 | return 1; |
| 163 | } | 169 | } |
| 164 | __setup("serialnumber", x86_serial_nr_setup); | 170 | __setup("serialnumber", x86_serial_nr_setup); |
| 171 | #else | ||
| 172 | static inline int flag_is_changeable_p(u32 flag) | ||
| 173 | { | ||
| 174 | return 1; | ||
| 175 | } | ||
| 176 | /* Probe for the CPUID instruction */ | ||
| 177 | static inline int have_cpuid_p(void) | ||
| 178 | { | ||
| 179 | return 1; | ||
| 180 | } | ||
| 181 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | ||
| 182 | { | ||
| 183 | } | ||
| 184 | #endif | ||
| 185 | |||
| 186 | /* | ||
| 187 | * Naming convention should be: <Name> [(<Codename>)] | ||
| 188 | * This table only is used unless init_<vendor>() below doesn't set it; | ||
| 189 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | ||
| 190 | * | ||
| 191 | */ | ||
| 192 | |||
| 193 | /* Look up CPU names by table lookup. */ | ||
| 194 | static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | ||
| 195 | { | ||
| 196 | struct cpu_model_info *info; | ||
| 197 | |||
| 198 | if (c->x86_model >= 16) | ||
| 199 | return NULL; /* Range check */ | ||
| 200 | |||
| 201 | if (!this_cpu) | ||
| 202 | return NULL; | ||
| 203 | |||
| 204 | info = this_cpu->c_models; | ||
| 205 | |||
| 206 | while (info && info->family) { | ||
| 207 | if (info->family == c->x86) | ||
| 208 | return info->model_names[c->x86_model]; | ||
| 209 | info++; | ||
| 210 | } | ||
| 211 | return NULL; /* Not found */ | ||
| 212 | } | ||
| 165 | 213 | ||
| 166 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | 214 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; |
| 167 | 215 | ||
| @@ -174,13 +222,18 @@ void switch_to_new_gdt(void) | |||
| 174 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | 222 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); |
| 175 | gdt_descr.size = GDT_SIZE - 1; | 223 | gdt_descr.size = GDT_SIZE - 1; |
| 176 | load_gdt(&gdt_descr); | 224 | load_gdt(&gdt_descr); |
| 225 | #ifdef CONFIG_X86_32 | ||
| 177 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); | 226 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); |
| 227 | #endif | ||
| 178 | } | 228 | } |
| 179 | 229 | ||
| 180 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | 230 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; |
| 181 | 231 | ||
| 182 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | 232 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
| 183 | { | 233 | { |
| 234 | #ifdef CONFIG_X86_64 | ||
| 235 | display_cacheinfo(c); | ||
| 236 | #else | ||
| 184 | /* Not much we can do here... */ | 237 | /* Not much we can do here... */ |
| 185 | /* Check if at least it has cpuid */ | 238 | /* Check if at least it has cpuid */ |
| 186 | if (c->cpuid_level == -1) { | 239 | if (c->cpuid_level == -1) { |
| @@ -190,6 +243,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) | |||
| 190 | else if (c->x86 == 3) | 243 | else if (c->x86 == 3) |
| 191 | strcpy(c->x86_model_id, "386"); | 244 | strcpy(c->x86_model_id, "386"); |
| 192 | } | 245 | } |
| 246 | #endif | ||
| 193 | } | 247 | } |
| 194 | 248 | ||
| 195 | static struct cpu_dev __cpuinitdata default_cpu = { | 249 | static struct cpu_dev __cpuinitdata default_cpu = { |
| @@ -198,13 +252,13 @@ static struct cpu_dev __cpuinitdata default_cpu = { | |||
| 198 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | 252 | .c_x86_vendor = X86_VENDOR_UNKNOWN, |
| 199 | }; | 253 | }; |
| 200 | 254 | ||
| 201 | int __cpuinit get_model_name(struct cpuinfo_x86 *c) | 255 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) |
| 202 | { | 256 | { |
| 203 | unsigned int *v; | 257 | unsigned int *v; |
| 204 | char *p, *q; | 258 | char *p, *q; |
| 205 | 259 | ||
| 206 | if (c->extended_cpuid_level < 0x80000004) | 260 | if (c->extended_cpuid_level < 0x80000004) |
| 207 | return 0; | 261 | return; |
| 208 | 262 | ||
| 209 | v = (unsigned int *) c->x86_model_id; | 263 | v = (unsigned int *) c->x86_model_id; |
| 210 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | 264 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); |
| @@ -223,8 +277,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
| 223 | while (q <= &c->x86_model_id[48]) | 277 | while (q <= &c->x86_model_id[48]) |
| 224 | *q++ = '\0'; /* Zero-pad the rest */ | 278 | *q++ = '\0'; /* Zero-pad the rest */ |
| 225 | } | 279 | } |
| 226 | |||
| 227 | return 1; | ||
| 228 | } | 280 | } |
| 229 | 281 | ||
| 230 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 282 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) |
| @@ -238,6 +290,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
| 238 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | 290 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", |
| 239 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | 291 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); |
| 240 | c->x86_cache_size = (ecx>>24) + (edx>>24); | 292 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
| 293 | #ifdef CONFIG_X86_64 | ||
| 294 | /* On K8 L1 TLB is inclusive, so don't count it */ | ||
| 295 | c->x86_tlbsize = 0; | ||
| 296 | #endif | ||
| 241 | } | 297 | } |
| 242 | 298 | ||
| 243 | if (n < 0x80000006) /* Some chips just has a large L1. */ | 299 | if (n < 0x80000006) /* Some chips just has a large L1. */ |
| @@ -246,6 +302,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
| 246 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); | 302 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); |
| 247 | l2size = ecx >> 16; | 303 | l2size = ecx >> 16; |
| 248 | 304 | ||
| 305 | #ifdef CONFIG_X86_64 | ||
| 306 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | ||
| 307 | #else | ||
| 249 | /* do processor-specific cache resizing */ | 308 | /* do processor-specific cache resizing */ |
| 250 | if (this_cpu->c_size_cache) | 309 | if (this_cpu->c_size_cache) |
| 251 | l2size = this_cpu->c_size_cache(c, l2size); | 310 | l2size = this_cpu->c_size_cache(c, l2size); |
| @@ -256,6 +315,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
| 256 | 315 | ||
| 257 | if (l2size == 0) | 316 | if (l2size == 0) |
| 258 | return; /* Again, no L2 cache is possible */ | 317 | return; /* Again, no L2 cache is possible */ |
| 318 | #endif | ||
| 259 | 319 | ||
| 260 | c->x86_cache_size = l2size; | 320 | c->x86_cache_size = l2size; |
| 261 | 321 | ||
| @@ -263,9 +323,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
| 263 | l2size, ecx & 0xFF); | 323 | l2size, ecx & 0xFF); |
| 264 | } | 324 | } |
| 265 | 325 | ||
| 266 | #ifdef CONFIG_X86_HT | ||
| 267 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 326 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
| 268 | { | 327 | { |
| 328 | #ifdef CONFIG_X86_HT | ||
| 269 | u32 eax, ebx, ecx, edx; | 329 | u32 eax, ebx, ecx, edx; |
| 270 | int index_msb, core_bits; | 330 | int index_msb, core_bits; |
| 271 | 331 | ||
| @@ -275,6 +335,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 275 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 335 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
| 276 | goto out; | 336 | goto out; |
| 277 | 337 | ||
| 338 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) | ||
| 339 | return; | ||
| 340 | |||
| 278 | cpuid(1, &eax, &ebx, &ecx, &edx); | 341 | cpuid(1, &eax, &ebx, &ecx, &edx); |
| 279 | 342 | ||
| 280 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 343 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
| @@ -291,8 +354,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 291 | } | 354 | } |
| 292 | 355 | ||
| 293 | index_msb = get_count_order(smp_num_siblings); | 356 | index_msb = get_count_order(smp_num_siblings); |
| 357 | #ifdef CONFIG_X86_64 | ||
| 358 | c->phys_proc_id = phys_pkg_id(index_msb); | ||
| 359 | #else | ||
| 294 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); | 360 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); |
| 295 | 361 | #endif | |
| 296 | 362 | ||
| 297 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 363 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
| 298 | 364 | ||
| @@ -300,8 +366,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 300 | 366 | ||
| 301 | core_bits = get_count_order(c->x86_max_cores); | 367 | core_bits = get_count_order(c->x86_max_cores); |
| 302 | 368 | ||
| 369 | #ifdef CONFIG_X86_64 | ||
| 370 | c->cpu_core_id = phys_pkg_id(index_msb) & | ||
| 371 | ((1 << core_bits) - 1); | ||
| 372 | #else | ||
| 303 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & | 373 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & |
| 304 | ((1 << core_bits) - 1); | 374 | ((1 << core_bits) - 1); |
| 375 | #endif | ||
| 305 | } | 376 | } |
| 306 | 377 | ||
| 307 | out: | 378 | out: |
| @@ -311,8 +382,8 @@ out: | |||
| 311 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 382 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
| 312 | c->cpu_core_id); | 383 | c->cpu_core_id); |
| 313 | } | 384 | } |
| 314 | } | ||
| 315 | #endif | 385 | #endif |
| 386 | } | ||
| 316 | 387 | ||
| 317 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | 388 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) |
| 318 | { | 389 | { |
| @@ -335,7 +406,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |||
| 335 | 406 | ||
| 336 | if (!printed) { | 407 | if (!printed) { |
| 337 | printed++; | 408 | printed++; |
| 338 | printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); | 409 | printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); |
| 339 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | 410 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); |
| 340 | } | 411 | } |
| 341 | 412 | ||
| @@ -392,7 +463,47 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
| 392 | c->x86_capability[6] = cpuid_ecx(0x80000001); | 463 | c->x86_capability[6] = cpuid_ecx(0x80000001); |
| 393 | } | 464 | } |
| 394 | } | 465 | } |
| 466 | |||
| 467 | #ifdef CONFIG_X86_64 | ||
| 468 | if (c->extended_cpuid_level >= 0x80000008) { | ||
| 469 | u32 eax = cpuid_eax(0x80000008); | ||
| 470 | |||
| 471 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
| 472 | c->x86_phys_bits = eax & 0xff; | ||
| 473 | } | ||
| 474 | #endif | ||
| 475 | |||
| 476 | if (c->extended_cpuid_level >= 0x80000007) | ||
| 477 | c->x86_power = cpuid_edx(0x80000007); | ||
| 478 | |||
| 395 | } | 479 | } |
| 480 | |||
| 481 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | ||
| 482 | { | ||
| 483 | #ifdef CONFIG_X86_32 | ||
| 484 | int i; | ||
| 485 | |||
| 486 | /* | ||
| 487 | * First of all, decide if this is a 486 or higher | ||
| 488 | * It's a 486 if we can modify the AC flag | ||
| 489 | */ | ||
| 490 | if (flag_is_changeable_p(X86_EFLAGS_AC)) | ||
| 491 | c->x86 = 4; | ||
| 492 | else | ||
| 493 | c->x86 = 3; | ||
| 494 | |||
| 495 | for (i = 0; i < X86_VENDOR_NUM; i++) | ||
| 496 | if (cpu_devs[i] && cpu_devs[i]->c_identify) { | ||
| 497 | c->x86_vendor_id[0] = 0; | ||
| 498 | cpu_devs[i]->c_identify(c); | ||
| 499 | if (c->x86_vendor_id[0]) { | ||
| 500 | get_cpu_vendor(c); | ||
| 501 | break; | ||
| 502 | } | ||
| 503 | } | ||
| 504 | #endif | ||
| 505 | } | ||
| 506 | |||
| 396 | /* | 507 | /* |
| 397 | * Do minimum CPU detection early. | 508 | * Do minimum CPU detection early. |
| 398 | * Fields really needed: vendor, cpuid_level, family, model, mask, | 509 | * Fields really needed: vendor, cpuid_level, family, model, mask, |
| @@ -404,16 +515,23 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
| 404 | */ | 515 | */ |
| 405 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) | 516 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
| 406 | { | 517 | { |
| 518 | #ifdef CONFIG_X86_64 | ||
| 519 | c->x86_clflush_size = 64; | ||
| 520 | #else | ||
| 407 | c->x86_clflush_size = 32; | 521 | c->x86_clflush_size = 32; |
| 522 | #endif | ||
| 408 | c->x86_cache_alignment = c->x86_clflush_size; | 523 | c->x86_cache_alignment = c->x86_clflush_size; |
| 409 | 524 | ||
| 410 | if (!have_cpuid_p()) | ||
| 411 | return; | ||
| 412 | |||
| 413 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 525 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
| 414 | |||
| 415 | c->extended_cpuid_level = 0; | 526 | c->extended_cpuid_level = 0; |
| 416 | 527 | ||
| 528 | if (!have_cpuid_p()) | ||
| 529 | identify_cpu_without_cpuid(c); | ||
| 530 | |||
| 531 | /* cyrix could have cpuid enabled via c_identify()*/ | ||
| 532 | if (!have_cpuid_p()) | ||
| 533 | return; | ||
| 534 | |||
| 417 | cpu_detect(c); | 535 | cpu_detect(c); |
| 418 | 536 | ||
| 419 | get_cpu_vendor(c); | 537 | get_cpu_vendor(c); |
| @@ -454,39 +572,27 @@ void __init early_cpu_init(void) | |||
| 454 | 572 | ||
| 455 | /* | 573 | /* |
| 456 | * The NOPL instruction is supposed to exist on all CPUs with | 574 | * The NOPL instruction is supposed to exist on all CPUs with |
| 457 | * family >= 6, unfortunately, that's not true in practice because | 575 | * family >= 6; unfortunately, that's not true in practice because |
| 458 | * of early VIA chips and (more importantly) broken virtualizers that | 576 | * of early VIA chips and (more importantly) broken virtualizers that |
| 459 | * are not easy to detect. Hence, probe for it based on first | 577 | * are not easy to detect. In the latter case it doesn't even *fail* |
| 460 | * principles. | 578 | * reliably, so probing for it doesn't even work. Disable it completely |
| 579 | * unless we can find a reliable way to detect all the broken cases. | ||
| 461 | */ | 580 | */ |
| 462 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | 581 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) |
| 463 | { | 582 | { |
| 464 | const u32 nopl_signature = 0x888c53b1; /* Random number */ | ||
| 465 | u32 has_nopl = nopl_signature; | ||
| 466 | |||
| 467 | clear_cpu_cap(c, X86_FEATURE_NOPL); | 583 | clear_cpu_cap(c, X86_FEATURE_NOPL); |
| 468 | if (c->x86 >= 6) { | ||
| 469 | asm volatile("\n" | ||
| 470 | "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ | ||
| 471 | "2:\n" | ||
| 472 | " .section .fixup,\"ax\"\n" | ||
| 473 | "3: xor %0,%0\n" | ||
| 474 | " jmp 2b\n" | ||
| 475 | " .previous\n" | ||
| 476 | _ASM_EXTABLE(1b,3b) | ||
| 477 | : "+a" (has_nopl)); | ||
| 478 | |||
| 479 | if (has_nopl == nopl_signature) | ||
| 480 | set_cpu_cap(c, X86_FEATURE_NOPL); | ||
| 481 | } | ||
| 482 | } | 584 | } |
| 483 | 585 | ||
| 484 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 586 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
| 485 | { | 587 | { |
| 588 | c->extended_cpuid_level = 0; | ||
| 589 | |||
| 486 | if (!have_cpuid_p()) | 590 | if (!have_cpuid_p()) |
| 487 | return; | 591 | identify_cpu_without_cpuid(c); |
| 488 | 592 | ||
| 489 | c->extended_cpuid_level = 0; | 593 | /* cyrix could have cpuid enabled via c_identify()*/ |
| 594 | if (!have_cpuid_p()) | ||
| 595 | return; | ||
| 490 | 596 | ||
| 491 | cpu_detect(c); | 597 | cpu_detect(c); |
| 492 | 598 | ||
| @@ -496,16 +602,20 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
| 496 | 602 | ||
| 497 | if (c->cpuid_level >= 0x00000001) { | 603 | if (c->cpuid_level >= 0x00000001) { |
| 498 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; | 604 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; |
| 499 | #ifdef CONFIG_X86_HT | 605 | #ifdef CONFIG_X86_32 |
| 606 | # ifdef CONFIG_X86_HT | ||
| 500 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | 607 | c->apicid = phys_pkg_id(c->initial_apicid, 0); |
| 501 | c->phys_proc_id = c->initial_apicid; | 608 | # else |
| 502 | #else | ||
| 503 | c->apicid = c->initial_apicid; | 609 | c->apicid = c->initial_apicid; |
| 610 | # endif | ||
| 611 | #endif | ||
| 612 | |||
| 613 | #ifdef CONFIG_X86_HT | ||
| 614 | c->phys_proc_id = c->initial_apicid; | ||
| 504 | #endif | 615 | #endif |
| 505 | } | 616 | } |
| 506 | 617 | ||
| 507 | if (c->extended_cpuid_level >= 0x80000004) | 618 | get_model_name(c); /* Default name */ |
| 508 | get_model_name(c); /* Default name */ | ||
| 509 | 619 | ||
| 510 | init_scattered_cpuid_features(c); | 620 | init_scattered_cpuid_features(c); |
| 511 | detect_nopl(c); | 621 | detect_nopl(c); |
| @@ -521,30 +631,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 521 | c->loops_per_jiffy = loops_per_jiffy; | 631 | c->loops_per_jiffy = loops_per_jiffy; |
| 522 | c->x86_cache_size = -1; | 632 | c->x86_cache_size = -1; |
| 523 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 633 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
| 524 | c->cpuid_level = -1; /* CPUID not detected */ | ||
| 525 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | 634 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ |
| 526 | c->x86_vendor_id[0] = '\0'; /* Unset */ | 635 | c->x86_vendor_id[0] = '\0'; /* Unset */ |
| 527 | c->x86_model_id[0] = '\0'; /* Unset */ | 636 | c->x86_model_id[0] = '\0'; /* Unset */ |
| 528 | c->x86_max_cores = 1; | 637 | c->x86_max_cores = 1; |
| 638 | c->x86_coreid_bits = 0; | ||
| 639 | #ifdef CONFIG_X86_64 | ||
| 640 | c->x86_clflush_size = 64; | ||
| 641 | #else | ||
| 642 | c->cpuid_level = -1; /* CPUID not detected */ | ||
| 529 | c->x86_clflush_size = 32; | 643 | c->x86_clflush_size = 32; |
| 644 | #endif | ||
| 645 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 530 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 646 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
| 531 | 647 | ||
| 532 | if (!have_cpuid_p()) { | ||
| 533 | /* | ||
| 534 | * First of all, decide if this is a 486 or higher | ||
| 535 | * It's a 486 if we can modify the AC flag | ||
| 536 | */ | ||
| 537 | if (flag_is_changeable_p(X86_EFLAGS_AC)) | ||
| 538 | c->x86 = 4; | ||
| 539 | else | ||
| 540 | c->x86 = 3; | ||
| 541 | } | ||
| 542 | |||
| 543 | generic_identify(c); | 648 | generic_identify(c); |
| 544 | 649 | ||
| 545 | if (this_cpu->c_identify) | 650 | if (this_cpu->c_identify) |
| 546 | this_cpu->c_identify(c); | 651 | this_cpu->c_identify(c); |
| 547 | 652 | ||
| 653 | #ifdef CONFIG_X86_64 | ||
| 654 | c->apicid = phys_pkg_id(0); | ||
| 655 | #endif | ||
| 656 | |||
| 548 | /* | 657 | /* |
| 549 | * Vendor-specific initialization. In this section we | 658 | * Vendor-specific initialization. In this section we |
| 550 | * canonicalize the feature flags, meaning if there are | 659 | * canonicalize the feature flags, meaning if there are |
| @@ -578,6 +687,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 578 | c->x86, c->x86_model); | 687 | c->x86, c->x86_model); |
| 579 | } | 688 | } |
| 580 | 689 | ||
| 690 | #ifdef CONFIG_X86_64 | ||
| 691 | detect_ht(c); | ||
| 692 | #endif | ||
| 693 | |||
| 581 | /* | 694 | /* |
| 582 | * On SMP, boot_cpu_data holds the common feature set between | 695 | * On SMP, boot_cpu_data holds the common feature set between |
| 583 | * all CPUs; so make sure that we indicate which features are | 696 | * all CPUs; so make sure that we indicate which features are |
| @@ -594,24 +707,34 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 594 | for (i = 0; i < NCAPINTS; i++) | 707 | for (i = 0; i < NCAPINTS; i++) |
| 595 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; | 708 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; |
| 596 | 709 | ||
| 710 | #ifdef CONFIG_X86_MCE | ||
| 597 | /* Init Machine Check Exception if available. */ | 711 | /* Init Machine Check Exception if available. */ |
| 598 | mcheck_init(c); | 712 | mcheck_init(c); |
| 713 | #endif | ||
| 599 | 714 | ||
| 600 | select_idle_routine(c); | 715 | select_idle_routine(c); |
| 716 | |||
| 717 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 718 | numa_add_cpu(smp_processor_id()); | ||
| 719 | #endif | ||
| 601 | } | 720 | } |
| 602 | 721 | ||
| 603 | void __init identify_boot_cpu(void) | 722 | void __init identify_boot_cpu(void) |
| 604 | { | 723 | { |
| 605 | identify_cpu(&boot_cpu_data); | 724 | identify_cpu(&boot_cpu_data); |
| 725 | #ifdef CONFIG_X86_32 | ||
| 606 | sysenter_setup(); | 726 | sysenter_setup(); |
| 607 | enable_sep_cpu(); | 727 | enable_sep_cpu(); |
| 728 | #endif | ||
| 608 | } | 729 | } |
| 609 | 730 | ||
| 610 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 731 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
| 611 | { | 732 | { |
| 612 | BUG_ON(c == &boot_cpu_data); | 733 | BUG_ON(c == &boot_cpu_data); |
| 613 | identify_cpu(c); | 734 | identify_cpu(c); |
| 735 | #ifdef CONFIG_X86_32 | ||
| 614 | enable_sep_cpu(); | 736 | enable_sep_cpu(); |
| 737 | #endif | ||
| 615 | mtrr_ap_init(); | 738 | mtrr_ap_init(); |
| 616 | } | 739 | } |
| 617 | 740 | ||
| @@ -709,6 +832,89 @@ __setup("clearcpuid=", setup_disablecpuid); | |||
| 709 | 832 | ||
| 710 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 833 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
| 711 | 834 | ||
| 835 | #ifdef CONFIG_X86_64 | ||
| 836 | struct x8664_pda **_cpu_pda __read_mostly; | ||
| 837 | EXPORT_SYMBOL(_cpu_pda); | ||
| 838 | |||
| 839 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | ||
| 840 | |||
| 841 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | ||
| 842 | |||
| 843 | void __cpuinit pda_init(int cpu) | ||
| 844 | { | ||
| 845 | struct x8664_pda *pda = cpu_pda(cpu); | ||
| 846 | |||
| 847 | /* Setup up data that may be needed in __get_free_pages early */ | ||
| 848 | loadsegment(fs, 0); | ||
| 849 | loadsegment(gs, 0); | ||
| 850 | /* Memory clobbers used to order PDA accessed */ | ||
| 851 | mb(); | ||
| 852 | wrmsrl(MSR_GS_BASE, pda); | ||
| 853 | mb(); | ||
| 854 | |||
| 855 | pda->cpunumber = cpu; | ||
| 856 | pda->irqcount = -1; | ||
| 857 | pda->kernelstack = (unsigned long)stack_thread_info() - | ||
| 858 | PDA_STACKOFFSET + THREAD_SIZE; | ||
| 859 | pda->active_mm = &init_mm; | ||
| 860 | pda->mmu_state = 0; | ||
| 861 | |||
| 862 | if (cpu == 0) { | ||
| 863 | /* others are initialized in smpboot.c */ | ||
| 864 | pda->pcurrent = &init_task; | ||
| 865 | pda->irqstackptr = boot_cpu_stack; | ||
| 866 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 867 | } else { | ||
| 868 | if (!pda->irqstackptr) { | ||
| 869 | pda->irqstackptr = (char *) | ||
| 870 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | ||
| 871 | if (!pda->irqstackptr) | ||
| 872 | panic("cannot allocate irqstack for cpu %d", | ||
| 873 | cpu); | ||
| 874 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 875 | } | ||
| 876 | |||
| 877 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
| 878 | pda->nodenumber = cpu_to_node(cpu); | ||
| 879 | } | ||
| 880 | } | ||
| 881 | |||
| 882 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | ||
| 883 | DEBUG_STKSZ] __page_aligned_bss; | ||
| 884 | |||
| 885 | extern asmlinkage void ignore_sysret(void); | ||
| 886 | |||
| 887 | /* May not be marked __init: used by software suspend */ | ||
| 888 | void syscall_init(void) | ||
| 889 | { | ||
| 890 | /* | ||
| 891 | * LSTAR and STAR live in a bit strange symbiosis. | ||
| 892 | * They both write to the same internal register. STAR allows to | ||
| 893 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | ||
| 894 | */ | ||
| 895 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | ||
| 896 | wrmsrl(MSR_LSTAR, system_call); | ||
| 897 | wrmsrl(MSR_CSTAR, ignore_sysret); | ||
| 898 | |||
| 899 | #ifdef CONFIG_IA32_EMULATION | ||
| 900 | syscall32_cpu_init(); | ||
| 901 | #endif | ||
| 902 | |||
| 903 | /* Flags to clear on syscall */ | ||
| 904 | wrmsrl(MSR_SYSCALL_MASK, | ||
| 905 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | ||
| 906 | } | ||
| 907 | |||
| 908 | unsigned long kernel_eflags; | ||
| 909 | |||
| 910 | /* | ||
| 911 | * Copies of the original ist values from the tss are only accessed during | ||
| 912 | * debugging, no special alignment required. | ||
| 913 | */ | ||
| 914 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | ||
| 915 | |||
| 916 | #else | ||
| 917 | |||
| 712 | /* Make sure %fs is initialized properly in idle threads */ | 918 | /* Make sure %fs is initialized properly in idle threads */ |
| 713 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | 919 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
| 714 | { | 920 | { |
| @@ -716,13 +922,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | |||
| 716 | regs->fs = __KERNEL_PERCPU; | 922 | regs->fs = __KERNEL_PERCPU; |
| 717 | return regs; | 923 | return regs; |
| 718 | } | 924 | } |
| 925 | #endif | ||
| 719 | 926 | ||
| 720 | /* | 927 | /* |
| 721 | * cpu_init() initializes state that is per-CPU. Some data is already | 928 | * cpu_init() initializes state that is per-CPU. Some data is already |
| 722 | * initialized (naturally) in the bootstrap process, such as the GDT | 929 | * initialized (naturally) in the bootstrap process, such as the GDT |
| 723 | * and IDT. We reload them nevertheless, this function acts as a | 930 | * and IDT. We reload them nevertheless, this function acts as a |
| 724 | * 'CPU state barrier', nothing should get across. | 931 | * 'CPU state barrier', nothing should get across. |
| 932 | * A lot of state is already set up in PDA init for 64 bit | ||
| 725 | */ | 933 | */ |
| 934 | #ifdef CONFIG_X86_64 | ||
| 935 | void __cpuinit cpu_init(void) | ||
| 936 | { | ||
| 937 | int cpu = stack_smp_processor_id(); | ||
| 938 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
| 939 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | ||
| 940 | unsigned long v; | ||
| 941 | char *estacks = NULL; | ||
| 942 | struct task_struct *me; | ||
| 943 | int i; | ||
| 944 | |||
| 945 | /* CPU 0 is initialised in head64.c */ | ||
| 946 | if (cpu != 0) | ||
| 947 | pda_init(cpu); | ||
| 948 | else | ||
| 949 | estacks = boot_exception_stacks; | ||
| 950 | |||
| 951 | me = current; | ||
| 952 | |||
| 953 | if (cpu_test_and_set(cpu, cpu_initialized)) | ||
| 954 | panic("CPU#%d already initialized!\n", cpu); | ||
| 955 | |||
| 956 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
| 957 | |||
| 958 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
| 959 | |||
| 960 | /* | ||
| 961 | * Initialize the per-CPU GDT with the boot GDT, | ||
| 962 | * and set up the GDT descriptor: | ||
| 963 | */ | ||
| 964 | |||
| 965 | switch_to_new_gdt(); | ||
| 966 | load_idt((const struct desc_ptr *)&idt_descr); | ||
| 967 | |||
| 968 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | ||
| 969 | syscall_init(); | ||
| 970 | |||
| 971 | wrmsrl(MSR_FS_BASE, 0); | ||
| 972 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | ||
| 973 | barrier(); | ||
| 974 | |||
| 975 | check_efer(); | ||
| 976 | if (cpu != 0 && x2apic) | ||
| 977 | enable_x2apic(); | ||
| 978 | |||
| 979 | /* | ||
| 980 | * set up and load the per-CPU TSS | ||
| 981 | */ | ||
| 982 | if (!orig_ist->ist[0]) { | ||
| 983 | static const unsigned int order[N_EXCEPTION_STACKS] = { | ||
| 984 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | ||
| 985 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
| 986 | }; | ||
| 987 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | ||
| 988 | if (cpu) { | ||
| 989 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
| 990 | if (!estacks) | ||
| 991 | panic("Cannot allocate exception " | ||
| 992 | "stack %ld %d\n", v, cpu); | ||
| 993 | } | ||
| 994 | estacks += PAGE_SIZE << order[v]; | ||
| 995 | orig_ist->ist[v] = t->x86_tss.ist[v] = | ||
| 996 | (unsigned long)estacks; | ||
| 997 | } | ||
| 998 | } | ||
| 999 | |||
| 1000 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | ||
| 1001 | /* | ||
| 1002 | * <= is required because the CPU will access up to | ||
| 1003 | * 8 bits beyond the end of the IO permission bitmap. | ||
| 1004 | */ | ||
| 1005 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | ||
| 1006 | t->io_bitmap[i] = ~0UL; | ||
| 1007 | |||
| 1008 | atomic_inc(&init_mm.mm_count); | ||
| 1009 | me->active_mm = &init_mm; | ||
| 1010 | if (me->mm) | ||
| 1011 | BUG(); | ||
| 1012 | enter_lazy_tlb(&init_mm, me); | ||
| 1013 | |||
| 1014 | load_sp0(t, ¤t->thread); | ||
| 1015 | set_tss_desc(cpu, t); | ||
| 1016 | load_TR_desc(); | ||
| 1017 | load_LDT(&init_mm.context); | ||
| 1018 | |||
| 1019 | #ifdef CONFIG_KGDB | ||
| 1020 | /* | ||
| 1021 | * If the kgdb is connected no debug regs should be altered. This | ||
| 1022 | * is only applicable when KGDB and a KGDB I/O module are built | ||
| 1023 | * into the kernel and you are using early debugging with | ||
| 1024 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
| 1025 | */ | ||
| 1026 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
| 1027 | arch_kgdb_ops.correct_hw_break(); | ||
| 1028 | else { | ||
| 1029 | #endif | ||
| 1030 | /* | ||
| 1031 | * Clear all 6 debug registers: | ||
| 1032 | */ | ||
| 1033 | |||
| 1034 | set_debugreg(0UL, 0); | ||
| 1035 | set_debugreg(0UL, 1); | ||
| 1036 | set_debugreg(0UL, 2); | ||
| 1037 | set_debugreg(0UL, 3); | ||
| 1038 | set_debugreg(0UL, 6); | ||
| 1039 | set_debugreg(0UL, 7); | ||
| 1040 | #ifdef CONFIG_KGDB | ||
| 1041 | /* If the kgdb is connected no debug regs should be altered. */ | ||
| 1042 | } | ||
| 1043 | #endif | ||
| 1044 | |||
| 1045 | fpu_init(); | ||
| 1046 | |||
| 1047 | raw_local_save_flags(kernel_eflags); | ||
| 1048 | |||
| 1049 | if (is_uv_system()) | ||
| 1050 | uv_cpu_init(); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | #else | ||
| 1054 | |||
| 726 | void __cpuinit cpu_init(void) | 1055 | void __cpuinit cpu_init(void) |
| 727 | { | 1056 | { |
| 728 | int cpu = smp_processor_id(); | 1057 | int cpu = smp_processor_id(); |
| @@ -803,3 +1132,5 @@ void __cpuinit cpu_uninit(void) | |||
| 803 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | 1132 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; |
| 804 | } | 1133 | } |
| 805 | #endif | 1134 | #endif |
| 1135 | |||
| 1136 | #endif | ||
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index bcb48ce05d23..000000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null | |||
| @@ -1,816 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/kernel.h> | ||
| 3 | #include <linux/sched.h> | ||
| 4 | #include <linux/string.h> | ||
| 5 | #include <linux/bootmem.h> | ||
| 6 | #include <linux/bitops.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | #include <linux/kgdb.h> | ||
| 9 | #include <linux/topology.h> | ||
| 10 | #include <linux/delay.h> | ||
| 11 | #include <linux/smp.h> | ||
| 12 | #include <linux/percpu.h> | ||
| 13 | #include <asm/i387.h> | ||
| 14 | #include <asm/msr.h> | ||
| 15 | #include <asm/io.h> | ||
| 16 | #include <asm/linkage.h> | ||
| 17 | #include <asm/mmu_context.h> | ||
| 18 | #include <asm/mtrr.h> | ||
| 19 | #include <asm/mce.h> | ||
| 20 | #include <asm/pat.h> | ||
| 21 | #include <asm/asm.h> | ||
| 22 | #include <asm/numa.h> | ||
| 23 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 24 | #include <asm/mpspec.h> | ||
| 25 | #include <asm/apic.h> | ||
| 26 | #include <mach_apic.h> | ||
| 27 | #endif | ||
| 28 | #include <asm/pda.h> | ||
| 29 | #include <asm/pgtable.h> | ||
| 30 | #include <asm/processor.h> | ||
| 31 | #include <asm/desc.h> | ||
| 32 | #include <asm/atomic.h> | ||
| 33 | #include <asm/proto.h> | ||
| 34 | #include <asm/sections.h> | ||
| 35 | #include <asm/setup.h> | ||
| 36 | #include <asm/genapic.h> | ||
| 37 | |||
| 38 | #include "cpu.h" | ||
| 39 | |||
| 40 | static struct cpu_dev *this_cpu __cpuinitdata; | ||
| 41 | |||
| 42 | /* We need valid kernel segments for data and code in long mode too | ||
| 43 | * IRET will check the segment types kkeil 2000/10/28 | ||
| 44 | * Also sysret mandates a special GDT layout | ||
| 45 | */ | ||
| 46 | /* The TLS descriptors are currently at a different place compared to i386. | ||
| 47 | Hopefully nobody expects them at a fixed place (Wine?) */ | ||
| 48 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | ||
| 49 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | ||
| 50 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | ||
| 51 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | ||
| 52 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | ||
| 53 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | ||
| 54 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | ||
| 55 | } }; | ||
| 56 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | ||
| 57 | |||
| 58 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | ||
| 59 | |||
| 60 | /* Current gdt points %fs at the "master" per-cpu area: after this, | ||
| 61 | * it's on the real one. */ | ||
| 62 | void switch_to_new_gdt(void) | ||
| 63 | { | ||
| 64 | struct desc_ptr gdt_descr; | ||
| 65 | |||
| 66 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | ||
| 67 | gdt_descr.size = GDT_SIZE - 1; | ||
| 68 | load_gdt(&gdt_descr); | ||
| 69 | } | ||
| 70 | |||
| 71 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | ||
| 72 | |||
| 73 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | ||
| 74 | { | ||
| 75 | display_cacheinfo(c); | ||
| 76 | } | ||
| 77 | |||
| 78 | static struct cpu_dev __cpuinitdata default_cpu = { | ||
| 79 | .c_init = default_init, | ||
| 80 | .c_vendor = "Unknown", | ||
| 81 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | ||
| 82 | }; | ||
| 83 | |||
| 84 | int __cpuinit get_model_name(struct cpuinfo_x86 *c) | ||
| 85 | { | ||
| 86 | unsigned int *v; | ||
| 87 | char *p, *q; | ||
| 88 | |||
| 89 | if (c->extended_cpuid_level < 0x80000004) | ||
| 90 | return 0; | ||
| 91 | |||
| 92 | v = (unsigned int *) c->x86_model_id; | ||
| 93 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | ||
| 94 | cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | ||
| 95 | cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | ||
| 96 | c->x86_model_id[48] = 0; | ||
| 97 | |||
| 98 | /* Intel chips right-justify this string for some dumb reason; | ||
| 99 | undo that brain damage */ | ||
| 100 | p = q = &c->x86_model_id[0]; | ||
| 101 | while (*p == ' ') | ||
| 102 | p++; | ||
| 103 | if (p != q) { | ||
| 104 | while (*p) | ||
| 105 | *q++ = *p++; | ||
| 106 | while (q <= &c->x86_model_id[48]) | ||
| 107 | *q++ = '\0'; /* Zero-pad the rest */ | ||
| 108 | } | ||
| 109 | |||
| 110 | return 1; | ||
| 111 | } | ||
| 112 | |||
| 113 | |||
| 114 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | ||
| 115 | { | ||
| 116 | unsigned int n, dummy, ebx, ecx, edx, l2size; | ||
| 117 | |||
| 118 | n = c->extended_cpuid_level; | ||
| 119 | |||
| 120 | if (n >= 0x80000005) { | ||
| 121 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | ||
| 122 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | ||
| 123 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | ||
| 124 | c->x86_cache_size = (ecx>>24) + (edx>>24); | ||
| 125 | /* On K8 L1 TLB is inclusive, so don't count it */ | ||
| 126 | c->x86_tlbsize = 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | if (n < 0x80000006) /* Some chips just has a large L1. */ | ||
| 130 | return; | ||
| 131 | |||
| 132 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); | ||
| 133 | l2size = ecx >> 16; | ||
| 134 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | ||
| 135 | |||
| 136 | c->x86_cache_size = l2size; | ||
| 137 | |||
| 138 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | ||
| 139 | l2size, ecx & 0xFF); | ||
| 140 | } | ||
| 141 | |||
| 142 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | ||
| 143 | { | ||
| 144 | #ifdef CONFIG_SMP | ||
| 145 | u32 eax, ebx, ecx, edx; | ||
| 146 | int index_msb, core_bits; | ||
| 147 | |||
| 148 | if (!cpu_has(c, X86_FEATURE_HT)) | ||
| 149 | return; | ||
| 150 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | ||
| 151 | goto out; | ||
| 152 | |||
| 153 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) | ||
| 154 | return; | ||
| 155 | |||
| 156 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
| 157 | |||
| 158 | smp_num_siblings = (ebx & 0xff0000) >> 16; | ||
| 159 | |||
| 160 | if (smp_num_siblings == 1) { | ||
| 161 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | ||
| 162 | } else if (smp_num_siblings > 1) { | ||
| 163 | |||
| 164 | if (smp_num_siblings > NR_CPUS) { | ||
| 165 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", | ||
| 166 | smp_num_siblings); | ||
| 167 | smp_num_siblings = 1; | ||
| 168 | return; | ||
| 169 | } | ||
| 170 | |||
| 171 | index_msb = get_count_order(smp_num_siblings); | ||
| 172 | c->phys_proc_id = phys_pkg_id(index_msb); | ||
| 173 | |||
| 174 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | ||
| 175 | |||
| 176 | index_msb = get_count_order(smp_num_siblings); | ||
| 177 | |||
| 178 | core_bits = get_count_order(c->x86_max_cores); | ||
| 179 | |||
| 180 | c->cpu_core_id = phys_pkg_id(index_msb) & | ||
| 181 | ((1 << core_bits) - 1); | ||
| 182 | } | ||
| 183 | |||
| 184 | out: | ||
| 185 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | ||
| 186 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | ||
| 187 | c->phys_proc_id); | ||
| 188 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | ||
| 189 | c->cpu_core_id); | ||
| 190 | } | ||
| 191 | #endif | ||
| 192 | } | ||
| 193 | |||
| 194 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | ||
| 195 | { | ||
| 196 | char *v = c->x86_vendor_id; | ||
| 197 | int i; | ||
| 198 | static int printed; | ||
| 199 | |||
| 200 | for (i = 0; i < X86_VENDOR_NUM; i++) { | ||
| 201 | if (!cpu_devs[i]) | ||
| 202 | break; | ||
| 203 | |||
| 204 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || | ||
| 205 | (cpu_devs[i]->c_ident[1] && | ||
| 206 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { | ||
| 207 | this_cpu = cpu_devs[i]; | ||
| 208 | c->x86_vendor = this_cpu->c_x86_vendor; | ||
| 209 | return; | ||
| 210 | } | ||
| 211 | } | ||
| 212 | |||
| 213 | if (!printed) { | ||
| 214 | printed++; | ||
| 215 | printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); | ||
| 216 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | ||
| 217 | } | ||
| 218 | |||
| 219 | c->x86_vendor = X86_VENDOR_UNKNOWN; | ||
| 220 | this_cpu = &default_cpu; | ||
| 221 | } | ||
| 222 | |||
| 223 | void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | ||
| 224 | { | ||
| 225 | /* Get vendor name */ | ||
| 226 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | ||
| 227 | (unsigned int *)&c->x86_vendor_id[0], | ||
| 228 | (unsigned int *)&c->x86_vendor_id[8], | ||
| 229 | (unsigned int *)&c->x86_vendor_id[4]); | ||
| 230 | |||
| 231 | c->x86 = 4; | ||
| 232 | /* Intel-defined flags: level 0x00000001 */ | ||
| 233 | if (c->cpuid_level >= 0x00000001) { | ||
| 234 | u32 junk, tfms, cap0, misc; | ||
| 235 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); | ||
| 236 | c->x86 = (tfms >> 8) & 0xf; | ||
| 237 | c->x86_model = (tfms >> 4) & 0xf; | ||
| 238 | c->x86_mask = tfms & 0xf; | ||
| 239 | if (c->x86 == 0xf) | ||
| 240 | c->x86 += (tfms >> 20) & 0xff; | ||
| 241 | if (c->x86 >= 0x6) | ||
| 242 | c->x86_model += ((tfms >> 16) & 0xf) << 4; | ||
| 243 | if (cap0 & (1<<19)) { | ||
| 244 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | ||
| 245 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | |||
| 251 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | ||
| 252 | { | ||
| 253 | u32 tfms, xlvl; | ||
| 254 | u32 ebx; | ||
| 255 | |||
| 256 | /* Intel-defined flags: level 0x00000001 */ | ||
| 257 | if (c->cpuid_level >= 0x00000001) { | ||
| 258 | u32 capability, excap; | ||
| 259 | |||
| 260 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | ||
| 261 | c->x86_capability[0] = capability; | ||
| 262 | c->x86_capability[4] = excap; | ||
| 263 | } | ||
| 264 | |||
| 265 | /* AMD-defined flags: level 0x80000001 */ | ||
| 266 | xlvl = cpuid_eax(0x80000000); | ||
| 267 | c->extended_cpuid_level = xlvl; | ||
| 268 | if ((xlvl & 0xffff0000) == 0x80000000) { | ||
| 269 | if (xlvl >= 0x80000001) { | ||
| 270 | c->x86_capability[1] = cpuid_edx(0x80000001); | ||
| 271 | c->x86_capability[6] = cpuid_ecx(0x80000001); | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | /* Transmeta-defined flags: level 0x80860001 */ | ||
| 276 | xlvl = cpuid_eax(0x80860000); | ||
| 277 | if ((xlvl & 0xffff0000) == 0x80860000) { | ||
| 278 | /* Don't set x86_cpuid_level here for now to not confuse. */ | ||
| 279 | if (xlvl >= 0x80860001) | ||
| 280 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
| 281 | } | ||
| 282 | |||
| 283 | if (c->extended_cpuid_level >= 0x80000007) | ||
| 284 | c->x86_power = cpuid_edx(0x80000007); | ||
| 285 | |||
| 286 | if (c->extended_cpuid_level >= 0x80000008) { | ||
| 287 | u32 eax = cpuid_eax(0x80000008); | ||
| 288 | |||
| 289 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
| 290 | c->x86_phys_bits = eax & 0xff; | ||
| 291 | } | ||
| 292 | } | ||
| 293 | |||
| 294 | /* Do some early cpuid on the boot CPU to get some parameter that are | ||
| 295 | needed before check_bugs. Everything advanced is in identify_cpu | ||
| 296 | below. */ | ||
| 297 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) | ||
| 298 | { | ||
| 299 | |||
| 300 | c->x86_clflush_size = 64; | ||
| 301 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 302 | |||
| 303 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
| 304 | |||
| 305 | c->extended_cpuid_level = 0; | ||
| 306 | |||
| 307 | cpu_detect(c); | ||
| 308 | |||
| 309 | get_cpu_vendor(c); | ||
| 310 | |||
| 311 | get_cpu_cap(c); | ||
| 312 | |||
| 313 | if (this_cpu->c_early_init) | ||
| 314 | this_cpu->c_early_init(c); | ||
| 315 | |||
| 316 | validate_pat_support(c); | ||
| 317 | } | ||
| 318 | |||
| 319 | void __init early_cpu_init(void) | ||
| 320 | { | ||
| 321 | struct cpu_dev **cdev; | ||
| 322 | int count = 0; | ||
| 323 | |||
| 324 | printk("KERNEL supported cpus:\n"); | ||
| 325 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { | ||
| 326 | struct cpu_dev *cpudev = *cdev; | ||
| 327 | unsigned int j; | ||
| 328 | |||
| 329 | if (count >= X86_VENDOR_NUM) | ||
| 330 | break; | ||
| 331 | cpu_devs[count] = cpudev; | ||
| 332 | count++; | ||
| 333 | |||
| 334 | for (j = 0; j < 2; j++) { | ||
| 335 | if (!cpudev->c_ident[j]) | ||
| 336 | continue; | ||
| 337 | printk(" %s %s\n", cpudev->c_vendor, | ||
| 338 | cpudev->c_ident[j]); | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | early_identify_cpu(&boot_cpu_data); | ||
| 343 | } | ||
| 344 | |||
| 345 | /* | ||
| 346 | * The NOPL instruction is supposed to exist on all CPUs with | ||
| 347 | * family >= 6, unfortunately, that's not true in practice because | ||
| 348 | * of early VIA chips and (more importantly) broken virtualizers that | ||
| 349 | * are not easy to detect. Hence, probe for it based on first | ||
| 350 | * principles. | ||
| 351 | * | ||
| 352 | * Note: no 64-bit chip is known to lack these, but put the code here | ||
| 353 | * for consistency with 32 bits, and to make it utterly trivial to | ||
| 354 | * diagnose the problem should it ever surface. | ||
| 355 | */ | ||
| 356 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | ||
| 357 | { | ||
| 358 | const u32 nopl_signature = 0x888c53b1; /* Random number */ | ||
| 359 | u32 has_nopl = nopl_signature; | ||
| 360 | |||
| 361 | clear_cpu_cap(c, X86_FEATURE_NOPL); | ||
| 362 | if (c->x86 >= 6) { | ||
| 363 | asm volatile("\n" | ||
| 364 | "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ | ||
| 365 | "2:\n" | ||
| 366 | " .section .fixup,\"ax\"\n" | ||
| 367 | "3: xor %0,%0\n" | ||
| 368 | " jmp 2b\n" | ||
| 369 | " .previous\n" | ||
| 370 | _ASM_EXTABLE(1b,3b) | ||
| 371 | : "+a" (has_nopl)); | ||
| 372 | |||
| 373 | if (has_nopl == nopl_signature) | ||
| 374 | set_cpu_cap(c, X86_FEATURE_NOPL); | ||
| 375 | } | ||
| 376 | } | ||
| 377 | |||
| 378 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | ||
| 379 | { | ||
| 380 | c->extended_cpuid_level = 0; | ||
| 381 | |||
| 382 | cpu_detect(c); | ||
| 383 | |||
| 384 | get_cpu_vendor(c); | ||
| 385 | |||
| 386 | get_cpu_cap(c); | ||
| 387 | |||
| 388 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; | ||
| 389 | #ifdef CONFIG_SMP | ||
| 390 | c->phys_proc_id = c->initial_apicid; | ||
| 391 | #endif | ||
| 392 | |||
| 393 | if (c->extended_cpuid_level >= 0x80000004) | ||
| 394 | get_model_name(c); /* Default name */ | ||
| 395 | |||
| 396 | init_scattered_cpuid_features(c); | ||
| 397 | detect_nopl(c); | ||
| 398 | } | ||
| 399 | |||
| 400 | /* | ||
| 401 | * This does the hard work of actually picking apart the CPU stuff... | ||
| 402 | */ | ||
| 403 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | ||
| 404 | { | ||
| 405 | int i; | ||
| 406 | |||
| 407 | c->loops_per_jiffy = loops_per_jiffy; | ||
| 408 | c->x86_cache_size = -1; | ||
| 409 | c->x86_vendor = X86_VENDOR_UNKNOWN; | ||
| 410 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | ||
| 411 | c->x86_vendor_id[0] = '\0'; /* Unset */ | ||
| 412 | c->x86_model_id[0] = '\0'; /* Unset */ | ||
| 413 | c->x86_max_cores = 1; | ||
| 414 | c->x86_coreid_bits = 0; | ||
| 415 | c->x86_clflush_size = 64; | ||
| 416 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 417 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
| 418 | |||
| 419 | generic_identify(c); | ||
| 420 | |||
| 421 | c->apicid = phys_pkg_id(0); | ||
| 422 | |||
| 423 | /* | ||
| 424 | * Vendor-specific initialization. In this section we | ||
| 425 | * canonicalize the feature flags, meaning if there are | ||
| 426 | * features a certain CPU supports which CPUID doesn't | ||
| 427 | * tell us, CPUID claiming incorrect flags, or other bugs, | ||
| 428 | * we handle them here. | ||
| 429 | * | ||
| 430 | * At the end of this section, c->x86_capability better | ||
| 431 | * indicate the features this CPU genuinely supports! | ||
| 432 | */ | ||
| 433 | if (this_cpu->c_init) | ||
| 434 | this_cpu->c_init(c); | ||
| 435 | |||
| 436 | detect_ht(c); | ||
| 437 | |||
| 438 | /* | ||
| 439 | * On SMP, boot_cpu_data holds the common feature set between | ||
| 440 | * all CPUs; so make sure that we indicate which features are | ||
| 441 | * common between the CPUs. The first time this routine gets | ||
| 442 | * executed, c == &boot_cpu_data. | ||
| 443 | */ | ||
| 444 | if (c != &boot_cpu_data) { | ||
| 445 | /* AND the already accumulated flags with these */ | ||
| 446 | for (i = 0; i < NCAPINTS; i++) | ||
| 447 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | ||
| 448 | } | ||
| 449 | |||
| 450 | /* Clear all flags overriden by options */ | ||
| 451 | for (i = 0; i < NCAPINTS; i++) | ||
| 452 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; | ||
| 453 | |||
| 454 | #ifdef CONFIG_X86_MCE | ||
| 455 | mcheck_init(c); | ||
| 456 | #endif | ||
| 457 | select_idle_routine(c); | ||
| 458 | |||
| 459 | #ifdef CONFIG_NUMA | ||
| 460 | numa_add_cpu(smp_processor_id()); | ||
| 461 | #endif | ||
| 462 | |||
| 463 | } | ||
| 464 | |||
| 465 | void __init identify_boot_cpu(void) | ||
| 466 | { | ||
| 467 | identify_cpu(&boot_cpu_data); | ||
| 468 | } | ||
| 469 | |||
| 470 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | ||
| 471 | { | ||
| 472 | BUG_ON(c == &boot_cpu_data); | ||
| 473 | identify_cpu(c); | ||
| 474 | mtrr_ap_init(); | ||
| 475 | } | ||
| 476 | |||
| 477 | struct msr_range { | ||
| 478 | unsigned min; | ||
| 479 | unsigned max; | ||
| 480 | }; | ||
| 481 | |||
| 482 | static struct msr_range msr_range_array[] __cpuinitdata = { | ||
| 483 | { 0x00000000, 0x00000418}, | ||
| 484 | { 0xc0000000, 0xc000040b}, | ||
| 485 | { 0xc0010000, 0xc0010142}, | ||
| 486 | { 0xc0011000, 0xc001103b}, | ||
| 487 | }; | ||
| 488 | |||
| 489 | static void __cpuinit print_cpu_msr(void) | ||
| 490 | { | ||
| 491 | unsigned index; | ||
| 492 | u64 val; | ||
| 493 | int i; | ||
| 494 | unsigned index_min, index_max; | ||
| 495 | |||
| 496 | for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { | ||
| 497 | index_min = msr_range_array[i].min; | ||
| 498 | index_max = msr_range_array[i].max; | ||
| 499 | for (index = index_min; index < index_max; index++) { | ||
| 500 | if (rdmsrl_amd_safe(index, &val)) | ||
| 501 | continue; | ||
| 502 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | static int show_msr __cpuinitdata; | ||
| 508 | static __init int setup_show_msr(char *arg) | ||
| 509 | { | ||
| 510 | int num; | ||
| 511 | |||
| 512 | get_option(&arg, &num); | ||
| 513 | |||
| 514 | if (num > 0) | ||
| 515 | show_msr = num; | ||
| 516 | return 1; | ||
| 517 | } | ||
| 518 | __setup("show_msr=", setup_show_msr); | ||
| 519 | |||
| 520 | static __init int setup_noclflush(char *arg) | ||
| 521 | { | ||
| 522 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | ||
| 523 | return 1; | ||
| 524 | } | ||
| 525 | __setup("noclflush", setup_noclflush); | ||
| 526 | |||
| 527 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | ||
| 528 | { | ||
| 529 | if (c->x86_model_id[0]) | ||
| 530 | printk(KERN_CONT "%s", c->x86_model_id); | ||
| 531 | |||
| 532 | if (c->x86_mask || c->cpuid_level >= 0) | ||
| 533 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | ||
| 534 | else | ||
| 535 | printk(KERN_CONT "\n"); | ||
| 536 | |||
| 537 | #ifdef CONFIG_SMP | ||
| 538 | if (c->cpu_index < show_msr) | ||
| 539 | print_cpu_msr(); | ||
| 540 | #else | ||
| 541 | if (show_msr) | ||
| 542 | print_cpu_msr(); | ||
| 543 | #endif | ||
| 544 | } | ||
| 545 | |||
| 546 | static __init int setup_disablecpuid(char *arg) | ||
| 547 | { | ||
| 548 | int bit; | ||
| 549 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) | ||
| 550 | setup_clear_cpu_cap(bit); | ||
| 551 | else | ||
| 552 | return 0; | ||
| 553 | return 1; | ||
| 554 | } | ||
| 555 | __setup("clearcpuid=", setup_disablecpuid); | ||
| 556 | |||
| 557 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | ||
| 558 | |||
| 559 | struct x8664_pda **_cpu_pda __read_mostly; | ||
| 560 | EXPORT_SYMBOL(_cpu_pda); | ||
| 561 | |||
| 562 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | ||
| 563 | |||
| 564 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | ||
| 565 | |||
| 566 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | ||
| 567 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | ||
| 568 | |||
| 569 | static int do_not_nx __cpuinitdata; | ||
| 570 | |||
| 571 | /* noexec=on|off | ||
| 572 | Control non executable mappings for 64bit processes. | ||
| 573 | |||
| 574 | on Enable(default) | ||
| 575 | off Disable | ||
| 576 | */ | ||
| 577 | static int __init nonx_setup(char *str) | ||
| 578 | { | ||
| 579 | if (!str) | ||
| 580 | return -EINVAL; | ||
| 581 | if (!strncmp(str, "on", 2)) { | ||
| 582 | __supported_pte_mask |= _PAGE_NX; | ||
| 583 | do_not_nx = 0; | ||
| 584 | } else if (!strncmp(str, "off", 3)) { | ||
| 585 | do_not_nx = 1; | ||
| 586 | __supported_pte_mask &= ~_PAGE_NX; | ||
| 587 | } | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | early_param("noexec", nonx_setup); | ||
| 591 | |||
| 592 | int force_personality32; | ||
| 593 | |||
| 594 | /* noexec32=on|off | ||
| 595 | Control non executable heap for 32bit processes. | ||
| 596 | To control the stack too use noexec=off | ||
| 597 | |||
| 598 | on PROT_READ does not imply PROT_EXEC for 32bit processes (default) | ||
| 599 | off PROT_READ implies PROT_EXEC | ||
| 600 | */ | ||
| 601 | static int __init nonx32_setup(char *str) | ||
| 602 | { | ||
| 603 | if (!strcmp(str, "on")) | ||
| 604 | force_personality32 &= ~READ_IMPLIES_EXEC; | ||
| 605 | else if (!strcmp(str, "off")) | ||
| 606 | force_personality32 |= READ_IMPLIES_EXEC; | ||
| 607 | return 1; | ||
| 608 | } | ||
| 609 | __setup("noexec32=", nonx32_setup); | ||
| 610 | |||
| 611 | void pda_init(int cpu) | ||
| 612 | { | ||
| 613 | struct x8664_pda *pda = cpu_pda(cpu); | ||
| 614 | |||
| 615 | /* Setup up data that may be needed in __get_free_pages early */ | ||
| 616 | loadsegment(fs, 0); | ||
| 617 | loadsegment(gs, 0); | ||
| 618 | /* Memory clobbers used to order PDA accessed */ | ||
| 619 | mb(); | ||
| 620 | wrmsrl(MSR_GS_BASE, pda); | ||
| 621 | mb(); | ||
| 622 | |||
| 623 | pda->cpunumber = cpu; | ||
| 624 | pda->irqcount = -1; | ||
| 625 | pda->kernelstack = (unsigned long)stack_thread_info() - | ||
| 626 | PDA_STACKOFFSET + THREAD_SIZE; | ||
| 627 | pda->active_mm = &init_mm; | ||
| 628 | pda->mmu_state = 0; | ||
| 629 | |||
| 630 | if (cpu == 0) { | ||
| 631 | /* others are initialized in smpboot.c */ | ||
| 632 | pda->pcurrent = &init_task; | ||
| 633 | pda->irqstackptr = boot_cpu_stack; | ||
| 634 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 635 | } else { | ||
| 636 | if (!pda->irqstackptr) { | ||
| 637 | pda->irqstackptr = (char *) | ||
| 638 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | ||
| 639 | if (!pda->irqstackptr) | ||
| 640 | panic("cannot allocate irqstack for cpu %d", | ||
| 641 | cpu); | ||
| 642 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 643 | } | ||
| 644 | |||
| 645 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
| 646 | pda->nodenumber = cpu_to_node(cpu); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 650 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | ||
| 651 | DEBUG_STKSZ] __page_aligned_bss; | ||
| 652 | |||
| 653 | extern asmlinkage void ignore_sysret(void); | ||
| 654 | |||
| 655 | /* May not be marked __init: used by software suspend */ | ||
| 656 | void syscall_init(void) | ||
| 657 | { | ||
| 658 | /* | ||
| 659 | * LSTAR and STAR live in a bit strange symbiosis. | ||
| 660 | * They both write to the same internal register. STAR allows to | ||
| 661 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | ||
| 662 | */ | ||
| 663 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | ||
| 664 | wrmsrl(MSR_LSTAR, system_call); | ||
| 665 | wrmsrl(MSR_CSTAR, ignore_sysret); | ||
| 666 | |||
| 667 | #ifdef CONFIG_IA32_EMULATION | ||
| 668 | syscall32_cpu_init(); | ||
| 669 | #endif | ||
| 670 | |||
| 671 | /* Flags to clear on syscall */ | ||
| 672 | wrmsrl(MSR_SYSCALL_MASK, | ||
| 673 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | ||
| 674 | } | ||
| 675 | |||
| 676 | void __cpuinit check_efer(void) | ||
| 677 | { | ||
| 678 | unsigned long efer; | ||
| 679 | |||
| 680 | rdmsrl(MSR_EFER, efer); | ||
| 681 | if (!(efer & EFER_NX) || do_not_nx) | ||
| 682 | __supported_pte_mask &= ~_PAGE_NX; | ||
| 683 | } | ||
| 684 | |||
| 685 | unsigned long kernel_eflags; | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Copies of the original ist values from the tss are only accessed during | ||
| 689 | * debugging, no special alignment required. | ||
| 690 | */ | ||
| 691 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | ||
| 692 | |||
| 693 | /* | ||
| 694 | * cpu_init() initializes state that is per-CPU. Some data is already | ||
| 695 | * initialized (naturally) in the bootstrap process, such as the GDT | ||
| 696 | * and IDT. We reload them nevertheless, this function acts as a | ||
| 697 | * 'CPU state barrier', nothing should get across. | ||
| 698 | * A lot of state is already set up in PDA init. | ||
| 699 | */ | ||
| 700 | void __cpuinit cpu_init(void) | ||
| 701 | { | ||
| 702 | int cpu = stack_smp_processor_id(); | ||
| 703 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
| 704 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | ||
| 705 | unsigned long v; | ||
| 706 | char *estacks = NULL; | ||
| 707 | struct task_struct *me; | ||
| 708 | int i; | ||
| 709 | |||
| 710 | /* CPU 0 is initialised in head64.c */ | ||
| 711 | if (cpu != 0) | ||
| 712 | pda_init(cpu); | ||
| 713 | else | ||
| 714 | estacks = boot_exception_stacks; | ||
| 715 | |||
| 716 | me = current; | ||
| 717 | |||
| 718 | if (cpu_test_and_set(cpu, cpu_initialized)) | ||
| 719 | panic("CPU#%d already initialized!\n", cpu); | ||
| 720 | |||
| 721 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
| 722 | |||
| 723 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
| 724 | |||
| 725 | /* | ||
| 726 | * Initialize the per-CPU GDT with the boot GDT, | ||
| 727 | * and set up the GDT descriptor: | ||
| 728 | */ | ||
| 729 | |||
| 730 | switch_to_new_gdt(); | ||
| 731 | load_idt((const struct desc_ptr *)&idt_descr); | ||
| 732 | |||
| 733 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | ||
| 734 | syscall_init(); | ||
| 735 | |||
| 736 | wrmsrl(MSR_FS_BASE, 0); | ||
| 737 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | ||
| 738 | barrier(); | ||
| 739 | |||
| 740 | check_efer(); | ||
| 741 | if (cpu != 0 && x2apic) | ||
| 742 | enable_x2apic(); | ||
| 743 | |||
| 744 | /* | ||
| 745 | * set up and load the per-CPU TSS | ||
| 746 | */ | ||
| 747 | if (!orig_ist->ist[0]) { | ||
| 748 | static const unsigned int order[N_EXCEPTION_STACKS] = { | ||
| 749 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | ||
| 750 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
| 751 | }; | ||
| 752 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | ||
| 753 | if (cpu) { | ||
| 754 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
| 755 | if (!estacks) | ||
| 756 | panic("Cannot allocate exception " | ||
| 757 | "stack %ld %d\n", v, cpu); | ||
| 758 | } | ||
| 759 | estacks += PAGE_SIZE << order[v]; | ||
| 760 | orig_ist->ist[v] = t->x86_tss.ist[v] = | ||
| 761 | (unsigned long)estacks; | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | ||
| 766 | /* | ||
| 767 | * <= is required because the CPU will access up to | ||
| 768 | * 8 bits beyond the end of the IO permission bitmap. | ||
| 769 | */ | ||
| 770 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | ||
| 771 | t->io_bitmap[i] = ~0UL; | ||
| 772 | |||
| 773 | atomic_inc(&init_mm.mm_count); | ||
| 774 | me->active_mm = &init_mm; | ||
| 775 | if (me->mm) | ||
| 776 | BUG(); | ||
| 777 | enter_lazy_tlb(&init_mm, me); | ||
| 778 | |||
| 779 | load_sp0(t, ¤t->thread); | ||
| 780 | set_tss_desc(cpu, t); | ||
| 781 | load_TR_desc(); | ||
| 782 | load_LDT(&init_mm.context); | ||
| 783 | |||
| 784 | #ifdef CONFIG_KGDB | ||
| 785 | /* | ||
| 786 | * If the kgdb is connected no debug regs should be altered. This | ||
| 787 | * is only applicable when KGDB and a KGDB I/O module are built | ||
| 788 | * into the kernel and you are using early debugging with | ||
| 789 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
| 790 | */ | ||
| 791 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
| 792 | arch_kgdb_ops.correct_hw_break(); | ||
| 793 | else { | ||
| 794 | #endif | ||
| 795 | /* | ||
| 796 | * Clear all 6 debug registers: | ||
| 797 | */ | ||
| 798 | |||
| 799 | set_debugreg(0UL, 0); | ||
| 800 | set_debugreg(0UL, 1); | ||
| 801 | set_debugreg(0UL, 2); | ||
| 802 | set_debugreg(0UL, 3); | ||
| 803 | set_debugreg(0UL, 6); | ||
| 804 | set_debugreg(0UL, 7); | ||
| 805 | #ifdef CONFIG_KGDB | ||
| 806 | /* If the kgdb is connected no debug regs should be altered. */ | ||
| 807 | } | ||
| 808 | #endif | ||
| 809 | |||
| 810 | fpu_init(); | ||
| 811 | |||
| 812 | raw_local_save_flags(kernel_eflags); | ||
| 813 | |||
| 814 | if (is_uv_system()) | ||
| 815 | uv_cpu_init(); | ||
| 816 | } | ||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3cc9d92afd8f..de4094a39210 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
| @@ -31,7 +31,6 @@ struct cpu_dev { | |||
| 31 | 31 | ||
| 32 | extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; | 32 | extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; |
| 33 | 33 | ||
| 34 | extern int get_model_name(struct cpuinfo_x86 *c); | ||
| 35 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 34 | extern void display_cacheinfo(struct cpuinfo_x86 *c); |
| 36 | 35 | ||
| 37 | #endif | 36 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd097b835839..c24c4a487b7c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask) | |||
| 256 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | 256 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and |
| 257 | * no meaning should be associated with absolute values of these MSRs. | 257 | * no meaning should be associated with absolute values of these MSRs. |
| 258 | */ | 258 | */ |
| 259 | static unsigned int get_measured_perf(unsigned int cpu) | 259 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
| 260 | unsigned int cpu) | ||
| 260 | { | 261 | { |
| 261 | union { | 262 | union { |
| 262 | struct { | 263 | struct { |
| @@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
| 326 | 327 | ||
| 327 | #endif | 328 | #endif |
| 328 | 329 | ||
| 329 | retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; | 330 | retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; |
| 330 | 331 | ||
| 331 | put_cpu(); | 332 | put_cpu(); |
| 332 | set_cpus_allowed_ptr(current, &saved_mask); | 333 | set_cpus_allowed_ptr(current, &saved_mask); |
| @@ -785,7 +786,11 @@ static int __init acpi_cpufreq_init(void) | |||
| 785 | if (ret) | 786 | if (ret) |
| 786 | return ret; | 787 | return ret; |
| 787 | 788 | ||
| 788 | return cpufreq_register_driver(&acpi_cpufreq_driver); | 789 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); |
| 790 | if (ret) | ||
| 791 | free_percpu(acpi_perf_data); | ||
| 792 | |||
| 793 | return ret; | ||
| 789 | } | 794 | } |
| 790 | 795 | ||
| 791 | static void __exit acpi_cpufreq_exit(void) | 796 | static void __exit acpi_cpufreq_exit(void) |
| @@ -795,8 +800,6 @@ static void __exit acpi_cpufreq_exit(void) | |||
| 795 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | 800 | cpufreq_unregister_driver(&acpi_cpufreq_driver); |
| 796 | 801 | ||
| 797 | free_percpu(acpi_perf_data); | 802 | free_percpu(acpi_perf_data); |
| 798 | |||
| 799 | return; | ||
| 800 | } | 803 | } |
| 801 | 804 | ||
| 802 | module_param(acpi_pstate_strict, uint, 0644); | 805 | module_param(acpi_pstate_strict, uint, 0644); |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index e4a4bf870e94..fe613c93b366 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
| @@ -25,8 +25,8 @@ | |||
| 25 | #include <linux/cpufreq.h> | 25 | #include <linux/cpufreq.h> |
| 26 | 26 | ||
| 27 | #include <asm/msr.h> | 27 | #include <asm/msr.h> |
| 28 | #include <asm/timex.h> | 28 | #include <linux/timex.h> |
| 29 | #include <asm/io.h> | 29 | #include <linux/io.h> |
| 30 | 30 | ||
| 31 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ | 31 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ |
| 32 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ | 32 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ |
| @@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | |||
| 82 | u8 clockspeed_reg; /* Clock Speed Register */ | 82 | u8 clockspeed_reg; /* Clock Speed Register */ |
| 83 | 83 | ||
| 84 | local_irq_disable(); | 84 | local_irq_disable(); |
| 85 | outb_p(0x80,REG_CSCIR); | 85 | outb_p(0x80, REG_CSCIR); |
| 86 | clockspeed_reg = inb_p(REG_CSCDR); | 86 | clockspeed_reg = inb_p(REG_CSCDR); |
| 87 | local_irq_enable(); | 87 | local_irq_enable(); |
| 88 | 88 | ||
| @@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | |||
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | /* 33 MHz is not 32 MHz... */ | 100 | /* 33 MHz is not 32 MHz... */ |
| 101 | if ((clockspeed_reg & 0xE0)==0xA0) | 101 | if ((clockspeed_reg & 0xE0) == 0xA0) |
| 102 | return 33000; | 102 | return 33000; |
| 103 | 103 | ||
| 104 | return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); | 104 | return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | 107 | ||
| @@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | |||
| 117 | * There is no return value. | 117 | * There is no return value. |
| 118 | */ | 118 | */ |
| 119 | 119 | ||
| 120 | static void elanfreq_set_cpu_state (unsigned int state) | 120 | static void elanfreq_set_cpu_state(unsigned int state) |
| 121 | { | 121 | { |
| 122 | struct cpufreq_freqs freqs; | 122 | struct cpufreq_freqs freqs; |
| 123 | 123 | ||
| @@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state) | |||
| 144 | */ | 144 | */ |
| 145 | 145 | ||
| 146 | local_irq_disable(); | 146 | local_irq_disable(); |
| 147 | outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ | 147 | outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ |
| 148 | outb_p(0x00,REG_CSCDR); | 148 | outb_p(0x00, REG_CSCDR); |
| 149 | local_irq_enable(); /* wait till internal pipelines and */ | 149 | local_irq_enable(); /* wait till internal pipelines and */ |
| 150 | udelay(1000); /* buffers have cleaned up */ | 150 | udelay(1000); /* buffers have cleaned up */ |
| 151 | 151 | ||
| 152 | local_irq_disable(); | 152 | local_irq_disable(); |
| 153 | 153 | ||
| 154 | /* now, set the CPU clock speed register (0x80) */ | 154 | /* now, set the CPU clock speed register (0x80) */ |
| 155 | outb_p(0x80,REG_CSCIR); | 155 | outb_p(0x80, REG_CSCIR); |
| 156 | outb_p(elan_multiplier[state].val80h,REG_CSCDR); | 156 | outb_p(elan_multiplier[state].val80h, REG_CSCDR); |
| 157 | 157 | ||
| 158 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ | 158 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ |
| 159 | outb_p(0x40,REG_CSCIR); | 159 | outb_p(0x40, REG_CSCIR); |
| 160 | outb_p(elan_multiplier[state].val40h,REG_CSCDR); | 160 | outb_p(elan_multiplier[state].val40h, REG_CSCDR); |
| 161 | udelay(10000); | 161 | udelay(10000); |
| 162 | local_irq_enable(); | 162 | local_irq_enable(); |
| 163 | 163 | ||
| @@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state) | |||
| 173 | * for the hardware supported by the driver. | 173 | * for the hardware supported by the driver. |
| 174 | */ | 174 | */ |
| 175 | 175 | ||
| 176 | static int elanfreq_verify (struct cpufreq_policy *policy) | 176 | static int elanfreq_verify(struct cpufreq_policy *policy) |
| 177 | { | 177 | { |
| 178 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); | 178 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | static int elanfreq_target (struct cpufreq_policy *policy, | 181 | static int elanfreq_target(struct cpufreq_policy *policy, |
| 182 | unsigned int target_freq, | 182 | unsigned int target_freq, |
| 183 | unsigned int relation) | 183 | unsigned int relation) |
| 184 | { | 184 | { |
| @@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) | |||
| 205 | 205 | ||
| 206 | /* capability check */ | 206 | /* capability check */ |
| 207 | if ((c->x86_vendor != X86_VENDOR_AMD) || | 207 | if ((c->x86_vendor != X86_VENDOR_AMD) || |
| 208 | (c->x86 != 4) || (c->x86_model!=10)) | 208 | (c->x86 != 4) || (c->x86_model != 10)) |
| 209 | return -ENODEV; | 209 | return -ENODEV; |
| 210 | 210 | ||
| 211 | /* max freq */ | 211 | /* max freq */ |
| @@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) | |||
| 213 | max_freq = elanfreq_get_cpu_frequency(0); | 213 | max_freq = elanfreq_get_cpu_frequency(0); |
| 214 | 214 | ||
| 215 | /* table init */ | 215 | /* table init */ |
| 216 | for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { | 216 | for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { |
| 217 | if (elanfreq_table[i].frequency > max_freq) | 217 | if (elanfreq_table[i].frequency > max_freq) |
| 218 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; | 218 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; |
| 219 | } | 219 | } |
| @@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) | |||
| 224 | 224 | ||
| 225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); | 225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); |
| 226 | if (result) | 226 | if (result) |
| 227 | return (result); | 227 | return result; |
| 228 | 228 | ||
| 229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); | 229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); |
| 230 | return 0; | 230 | return 0; |
| @@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup); | |||
| 260 | #endif | 260 | #endif |
| 261 | 261 | ||
| 262 | 262 | ||
| 263 | static struct freq_attr* elanfreq_attr[] = { | 263 | static struct freq_attr *elanfreq_attr[] = { |
| 264 | &cpufreq_freq_attr_scaling_available_freqs, | 264 | &cpufreq_freq_attr_scaling_available_freqs, |
| 265 | NULL, | 265 | NULL, |
| 266 | }; | 266 | }; |
| @@ -284,9 +284,9 @@ static int __init elanfreq_init(void) | |||
| 284 | 284 | ||
| 285 | /* Test if we have the right hardware */ | 285 | /* Test if we have the right hardware */ |
| 286 | if ((c->x86_vendor != X86_VENDOR_AMD) || | 286 | if ((c->x86_vendor != X86_VENDOR_AMD) || |
| 287 | (c->x86 != 4) || (c->x86_model!=10)) { | 287 | (c->x86 != 4) || (c->x86_model != 10)) { |
| 288 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); | 288 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); |
| 289 | return -ENODEV; | 289 | return -ENODEV; |
| 290 | } | 290 | } |
| 291 | return cpufreq_register_driver(&elanfreq_driver); | 291 | return cpufreq_register_driver(&elanfreq_driver); |
| 292 | } | 292 | } |
| @@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void) | |||
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | 300 | ||
| 301 | module_param (max_freq, int, 0444); | 301 | module_param(max_freq, int, 0444); |
| 302 | 302 | ||
| 303 | MODULE_LICENSE("GPL"); | 303 | MODULE_LICENSE("GPL"); |
| 304 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); | 304 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index f1685fb91fbd..b8e05ee4f736 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
| @@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | if (c->x86 != 0xF) { | 173 | if (c->x86 != 0xF) { |
| 174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); | 174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); |
| 175 | return 0; | 175 | return 0; |
| 176 | } | 176 | } |
| 177 | 177 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index eb9b62b0830c..b5ced806a316 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c | |||
| @@ -15,12 +15,11 @@ | |||
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | 16 | ||
| 17 | #include <asm/msr.h> | 17 | #include <asm/msr.h> |
| 18 | #include <asm/timex.h> | 18 | #include <linux/timex.h> |
| 19 | #include <asm/io.h> | 19 | #include <linux/io.h> |
| 20 | 20 | ||
| 21 | 21 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | |
| 22 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | 22 | as it is unused */ |
| 23 | as it is unused */ | ||
| 24 | 23 | ||
| 25 | static unsigned int busfreq; /* FSB, in 10 kHz */ | 24 | static unsigned int busfreq; /* FSB, in 10 kHz */ |
| 26 | static unsigned int max_multiplier; | 25 | static unsigned int max_multiplier; |
| @@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void) | |||
| 53 | 52 | ||
| 54 | msrval = POWERNOW_IOPORT + 0x1; | 53 | msrval = POWERNOW_IOPORT + 0x1; |
| 55 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | 54 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ |
| 56 | invalue=inl(POWERNOW_IOPORT + 0x8); | 55 | invalue = inl(POWERNOW_IOPORT + 0x8); |
| 57 | msrval = POWERNOW_IOPORT + 0x0; | 56 | msrval = POWERNOW_IOPORT + 0x0; |
| 58 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | 57 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ |
| 59 | 58 | ||
| @@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void) | |||
| 67 | * | 66 | * |
| 68 | * Tries to change the PowerNow! multiplier | 67 | * Tries to change the PowerNow! multiplier |
| 69 | */ | 68 | */ |
| 70 | static void powernow_k6_set_state (unsigned int best_i) | 69 | static void powernow_k6_set_state(unsigned int best_i) |
| 71 | { | 70 | { |
| 72 | unsigned long outvalue=0, invalue=0; | 71 | unsigned long outvalue = 0, invalue = 0; |
| 73 | unsigned long msrval; | 72 | unsigned long msrval; |
| 74 | struct cpufreq_freqs freqs; | 73 | struct cpufreq_freqs freqs; |
| 75 | 74 | ||
| @@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i) | |||
| 90 | 89 | ||
| 91 | msrval = POWERNOW_IOPORT + 0x1; | 90 | msrval = POWERNOW_IOPORT + 0x1; |
| 92 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | 91 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ |
| 93 | invalue=inl(POWERNOW_IOPORT + 0x8); | 92 | invalue = inl(POWERNOW_IOPORT + 0x8); |
| 94 | invalue = invalue & 0xf; | 93 | invalue = invalue & 0xf; |
| 95 | outvalue = outvalue | invalue; | 94 | outvalue = outvalue | invalue; |
| 96 | outl(outvalue ,(POWERNOW_IOPORT + 0x8)); | 95 | outl(outvalue , (POWERNOW_IOPORT + 0x8)); |
| 97 | msrval = POWERNOW_IOPORT + 0x0; | 96 | msrval = POWERNOW_IOPORT + 0x0; |
| 98 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | 97 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ |
| 99 | 98 | ||
| @@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) | |||
| 124 | * | 123 | * |
| 125 | * sets a new CPUFreq policy | 124 | * sets a new CPUFreq policy |
| 126 | */ | 125 | */ |
| 127 | static int powernow_k6_target (struct cpufreq_policy *policy, | 126 | static int powernow_k6_target(struct cpufreq_policy *policy, |
| 128 | unsigned int target_freq, | 127 | unsigned int target_freq, |
| 129 | unsigned int relation) | 128 | unsigned int relation) |
| 130 | { | 129 | { |
| @@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
| 152 | busfreq = cpu_khz / max_multiplier; | 151 | busfreq = cpu_khz / max_multiplier; |
| 153 | 152 | ||
| 154 | /* table init */ | 153 | /* table init */ |
| 155 | for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { | 154 | for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { |
| 156 | if (clock_ratio[i].index > max_multiplier) | 155 | if (clock_ratio[i].index > max_multiplier) |
| 157 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; | 156 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; |
| 158 | else | 157 | else |
| @@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
| 165 | 164 | ||
| 166 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | 165 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); |
| 167 | if (result) | 166 | if (result) |
| 168 | return (result); | 167 | return result; |
| 169 | 168 | ||
| 170 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); | 169 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); |
| 171 | 170 | ||
| @@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
| 176 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) | 175 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) |
| 177 | { | 176 | { |
| 178 | unsigned int i; | 177 | unsigned int i; |
| 179 | for (i=0; i<8; i++) { | 178 | for (i = 0; i < 8; i++) { |
| 180 | if (i==max_multiplier) | 179 | if (i == max_multiplier) |
| 181 | powernow_k6_set_state(i); | 180 | powernow_k6_set_state(i); |
| 182 | } | 181 | } |
| 183 | cpufreq_frequency_table_put_attr(policy->cpu); | 182 | cpufreq_frequency_table_put_attr(policy->cpu); |
| @@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu) | |||
| 189 | return busfreq * powernow_k6_get_cpu_multiplier(); | 188 | return busfreq * powernow_k6_get_cpu_multiplier(); |
| 190 | } | 189 | } |
| 191 | 190 | ||
| 192 | static struct freq_attr* powernow_k6_attr[] = { | 191 | static struct freq_attr *powernow_k6_attr[] = { |
| 193 | &cpufreq_freq_attr_scaling_available_freqs, | 192 | &cpufreq_freq_attr_scaling_available_freqs, |
| 194 | NULL, | 193 | NULL, |
| 195 | }; | 194 | }; |
| @@ -227,7 +226,7 @@ static int __init powernow_k6_init(void) | |||
| 227 | } | 226 | } |
| 228 | 227 | ||
| 229 | if (cpufreq_register_driver(&powernow_k6_driver)) { | 228 | if (cpufreq_register_driver(&powernow_k6_driver)) { |
| 230 | release_region (POWERNOW_IOPORT, 16); | 229 | release_region(POWERNOW_IOPORT, 16); |
| 231 | return -EINVAL; | 230 | return -EINVAL; |
| 232 | } | 231 | } |
| 233 | 232 | ||
| @@ -243,13 +242,13 @@ static int __init powernow_k6_init(void) | |||
| 243 | static void __exit powernow_k6_exit(void) | 242 | static void __exit powernow_k6_exit(void) |
| 244 | { | 243 | { |
| 245 | cpufreq_unregister_driver(&powernow_k6_driver); | 244 | cpufreq_unregister_driver(&powernow_k6_driver); |
| 246 | release_region (POWERNOW_IOPORT, 16); | 245 | release_region(POWERNOW_IOPORT, 16); |
| 247 | } | 246 | } |
| 248 | 247 | ||
| 249 | 248 | ||
| 250 | MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); | 249 | MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); |
| 251 | MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); | 250 | MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); |
| 252 | MODULE_LICENSE ("GPL"); | 251 | MODULE_LICENSE("GPL"); |
| 253 | 252 | ||
| 254 | module_init(powernow_k6_init); | 253 | module_init(powernow_k6_init); |
| 255 | module_exit(powernow_k6_exit); | 254 | module_exit(powernow_k6_exit); |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 15e13c01cc36..3b5f06423e77 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | #include <asm/cpufeature.h> | 26 | #include <asm/cpufeature.h> |
| 27 | 27 | ||
| 28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
| 29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" | 29 | #define MAINTAINER "cpufreq@vger.kernel.org" |
| 30 | 30 | ||
| 31 | #define dprintk(msg...) \ | 31 | #define dprintk(msg...) \ |
| 32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3f8c7283d816..ffd0f5ed071a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
| @@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
| 301 | */ | 301 | */ |
| 302 | if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) | 302 | if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) |
| 303 | geode_configure(); | 303 | geode_configure(); |
| 304 | get_model_name(c); /* get CPU marketing name */ | ||
| 305 | return; | 304 | return; |
| 306 | } else { /* MediaGX */ | 305 | } else { /* MediaGX */ |
| 307 | Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; | 306 | Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 959417b8cd64..99468dbd08da 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -15,6 +15,11 @@ | |||
| 15 | #include <asm/ds.h> | 15 | #include <asm/ds.h> |
| 16 | #include <asm/bugs.h> | 16 | #include <asm/bugs.h> |
| 17 | 17 | ||
| 18 | #ifdef CONFIG_X86_64 | ||
| 19 | #include <asm/topology.h> | ||
| 20 | #include <asm/numa_64.h> | ||
| 21 | #endif | ||
| 22 | |||
| 18 | #include "cpu.h" | 23 | #include "cpu.h" |
| 19 | 24 | ||
| 20 | #ifdef CONFIG_X86_LOCAL_APIC | 25 | #ifdef CONFIG_X86_LOCAL_APIC |
| @@ -25,14 +30,20 @@ | |||
| 25 | 30 | ||
| 26 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | 31 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
| 27 | { | 32 | { |
| 28 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ | ||
| 29 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | ||
| 30 | c->x86_cache_alignment = 128; | ||
| 31 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 33 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
| 32 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 34 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
| 33 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 35 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 36 | |||
| 37 | #ifdef CONFIG_X86_64 | ||
| 38 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | ||
| 39 | #else | ||
| 40 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ | ||
| 41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | ||
| 42 | c->x86_cache_alignment = 128; | ||
| 43 | #endif | ||
| 34 | } | 44 | } |
| 35 | 45 | ||
| 46 | #ifdef CONFIG_X86_32 | ||
| 36 | /* | 47 | /* |
| 37 | * Early probe support logic for ppro memory erratum #50 | 48 | * Early probe support logic for ppro memory erratum #50 |
| 38 | * | 49 | * |
| @@ -52,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void) | |||
| 52 | return 0; | 63 | return 0; |
| 53 | } | 64 | } |
| 54 | 65 | ||
| 66 | #ifdef CONFIG_X86_F00F_BUG | ||
| 67 | static void __cpuinit trap_init_f00f_bug(void) | ||
| 68 | { | ||
| 69 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | ||
| 55 | 70 | ||
| 56 | /* | 71 | /* |
| 57 | * P4 Xeon errata 037 workaround. | 72 | * Update the IDT descriptor and reload the IDT so that |
| 58 | * Hardware prefetcher may cause stale data to be loaded into the cache. | 73 | * it uses the read-only mapped virtual address. |
| 59 | */ | 74 | */ |
| 60 | static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) | 75 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); |
| 76 | load_idt(&idt_descr); | ||
| 77 | } | ||
| 78 | #endif | ||
| 79 | |||
| 80 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | ||
| 61 | { | 81 | { |
| 62 | unsigned long lo, hi; | 82 | unsigned long lo, hi; |
| 63 | 83 | ||
| 84 | #ifdef CONFIG_X86_F00F_BUG | ||
| 85 | /* | ||
| 86 | * All current models of Pentium and Pentium with MMX technology CPUs | ||
| 87 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | ||
| 88 | * Note that the workaround only should be initialized once... | ||
| 89 | */ | ||
| 90 | c->f00f_bug = 0; | ||
| 91 | if (!paravirt_enabled() && c->x86 == 5) { | ||
| 92 | static int f00f_workaround_enabled; | ||
| 93 | |||
| 94 | c->f00f_bug = 1; | ||
| 95 | if (!f00f_workaround_enabled) { | ||
| 96 | trap_init_f00f_bug(); | ||
| 97 | printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); | ||
| 98 | f00f_workaround_enabled = 1; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | #endif | ||
| 102 | |||
| 103 | /* | ||
| 104 | * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until | ||
| 105 | * model 3 mask 3 | ||
| 106 | */ | ||
| 107 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | ||
| 108 | clear_cpu_cap(c, X86_FEATURE_SEP); | ||
| 109 | |||
| 110 | /* | ||
| 111 | * P4 Xeon errata 037 workaround. | ||
| 112 | * Hardware prefetcher may cause stale data to be loaded into the cache. | ||
| 113 | */ | ||
| 64 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { | 114 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { |
| 65 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); | 115 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); |
| 66 | if ((lo & (1<<9)) == 0) { | 116 | if ((lo & (1<<9)) == 0) { |
| @@ -70,13 +120,68 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) | |||
| 70 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); | 120 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); |
| 71 | } | 121 | } |
| 72 | } | 122 | } |
| 123 | |||
| 124 | /* | ||
| 125 | * See if we have a good local APIC by checking for buggy Pentia, | ||
| 126 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
| 127 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
| 128 | * Specification Update"). | ||
| 129 | */ | ||
| 130 | if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && | ||
| 131 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) | ||
| 132 | set_cpu_cap(c, X86_FEATURE_11AP); | ||
| 133 | |||
| 134 | |||
| 135 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
| 136 | /* | ||
| 137 | * Set up the preferred alignment for movsl bulk memory moves | ||
| 138 | */ | ||
| 139 | switch (c->x86) { | ||
| 140 | case 4: /* 486: untested */ | ||
| 141 | break; | ||
| 142 | case 5: /* Old Pentia: untested */ | ||
| 143 | break; | ||
| 144 | case 6: /* PII/PIII only like movsl with 8-byte alignment */ | ||
| 145 | movsl_mask.mask = 7; | ||
| 146 | break; | ||
| 147 | case 15: /* P4 is OK down to 8-byte alignment */ | ||
| 148 | movsl_mask.mask = 7; | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | #endif | ||
| 152 | |||
| 153 | #ifdef CONFIG_X86_NUMAQ | ||
| 154 | numaq_tsc_disable(); | ||
| 155 | #endif | ||
| 156 | } | ||
| 157 | #else | ||
| 158 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | ||
| 159 | { | ||
| 73 | } | 160 | } |
| 161 | #endif | ||
| 74 | 162 | ||
| 163 | static void __cpuinit srat_detect_node(void) | ||
| 164 | { | ||
| 165 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 166 | unsigned node; | ||
| 167 | int cpu = smp_processor_id(); | ||
| 168 | int apicid = hard_smp_processor_id(); | ||
| 169 | |||
| 170 | /* Don't do the funky fallback heuristics the AMD version employs | ||
| 171 | for now. */ | ||
| 172 | node = apicid_to_node[apicid]; | ||
| 173 | if (node == NUMA_NO_NODE || !node_online(node)) | ||
| 174 | node = first_node(node_online_map); | ||
| 175 | numa_set_node(cpu, node); | ||
| 176 | |||
| 177 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
| 178 | #endif | ||
| 179 | } | ||
| 75 | 180 | ||
| 76 | /* | 181 | /* |
| 77 | * find out the number of processor cores on the die | 182 | * find out the number of processor cores on the die |
| 78 | */ | 183 | */ |
| 79 | static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) | 184 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) |
| 80 | { | 185 | { |
| 81 | unsigned int eax, ebx, ecx, edx; | 186 | unsigned int eax, ebx, ecx, edx; |
| 82 | 187 | ||
| @@ -91,45 +196,51 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) | |||
| 91 | return 1; | 196 | return 1; |
| 92 | } | 197 | } |
| 93 | 198 | ||
| 94 | #ifdef CONFIG_X86_F00F_BUG | 199 | static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) |
| 95 | static void __cpuinit trap_init_f00f_bug(void) | ||
| 96 | { | 200 | { |
| 97 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | 201 | /* Intel VMX MSR indicated features */ |
| 98 | 202 | #define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 | |
| 99 | /* | 203 | #define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 |
| 100 | * Update the IDT descriptor and reload the IDT so that | 204 | #define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 |
| 101 | * it uses the read-only mapped virtual address. | 205 | #define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 |
| 102 | */ | 206 | #define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 |
| 103 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); | 207 | #define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 |
| 104 | load_idt(&idt_descr); | 208 | |
| 209 | u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; | ||
| 210 | |||
| 211 | clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); | ||
| 212 | clear_cpu_cap(c, X86_FEATURE_VNMI); | ||
| 213 | clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); | ||
| 214 | clear_cpu_cap(c, X86_FEATURE_EPT); | ||
| 215 | clear_cpu_cap(c, X86_FEATURE_VPID); | ||
| 216 | |||
| 217 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); | ||
| 218 | msr_ctl = vmx_msr_high | vmx_msr_low; | ||
| 219 | if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) | ||
| 220 | set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); | ||
| 221 | if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) | ||
| 222 | set_cpu_cap(c, X86_FEATURE_VNMI); | ||
| 223 | if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { | ||
| 224 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | ||
| 225 | vmx_msr_low, vmx_msr_high); | ||
| 226 | msr_ctl2 = vmx_msr_high | vmx_msr_low; | ||
| 227 | if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && | ||
| 228 | (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) | ||
| 229 | set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); | ||
| 230 | if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) | ||
| 231 | set_cpu_cap(c, X86_FEATURE_EPT); | ||
| 232 | if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) | ||
| 233 | set_cpu_cap(c, X86_FEATURE_VPID); | ||
| 234 | } | ||
| 105 | } | 235 | } |
| 106 | #endif | ||
| 107 | 236 | ||
| 108 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | 237 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) |
| 109 | { | 238 | { |
| 110 | unsigned int l2 = 0; | 239 | unsigned int l2 = 0; |
| 111 | char *p = NULL; | ||
| 112 | 240 | ||
| 113 | early_init_intel(c); | 241 | early_init_intel(c); |
| 114 | 242 | ||
| 115 | #ifdef CONFIG_X86_F00F_BUG | 243 | intel_workarounds(c); |
| 116 | /* | ||
| 117 | * All current models of Pentium and Pentium with MMX technology CPUs | ||
| 118 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | ||
| 119 | * Note that the workaround only should be initialized once... | ||
| 120 | */ | ||
| 121 | c->f00f_bug = 0; | ||
| 122 | if (!paravirt_enabled() && c->x86 == 5) { | ||
| 123 | static int f00f_workaround_enabled; | ||
| 124 | |||
| 125 | c->f00f_bug = 1; | ||
| 126 | if (!f00f_workaround_enabled) { | ||
| 127 | trap_init_f00f_bug(); | ||
| 128 | printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); | ||
| 129 | f00f_workaround_enabled = 1; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | #endif | ||
| 133 | 244 | ||
| 134 | l2 = init_intel_cacheinfo(c); | 245 | l2 = init_intel_cacheinfo(c); |
| 135 | if (c->cpuid_level > 9) { | 246 | if (c->cpuid_level > 9) { |
| @@ -139,16 +250,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 139 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 250 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
| 140 | } | 251 | } |
| 141 | 252 | ||
| 142 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ | 253 | if (cpu_has_xmm2) |
| 143 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | 254 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
| 144 | clear_cpu_cap(c, X86_FEATURE_SEP); | 255 | if (cpu_has_ds) { |
| 256 | unsigned int l1; | ||
| 257 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
| 258 | if (!(l1 & (1<<11))) | ||
| 259 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
| 260 | if (!(l1 & (1<<12))) | ||
| 261 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
| 262 | ds_init_intel(c); | ||
| 263 | } | ||
| 145 | 264 | ||
| 265 | #ifdef CONFIG_X86_64 | ||
| 266 | if (c->x86 == 15) | ||
| 267 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
| 268 | if (c->x86 == 6) | ||
| 269 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 270 | #else | ||
| 146 | /* | 271 | /* |
| 147 | * Names for the Pentium II/Celeron processors | 272 | * Names for the Pentium II/Celeron processors |
| 148 | * detectable only by also checking the cache size. | 273 | * detectable only by also checking the cache size. |
| 149 | * Dixon is NOT a Celeron. | 274 | * Dixon is NOT a Celeron. |
| 150 | */ | 275 | */ |
| 151 | if (c->x86 == 6) { | 276 | if (c->x86 == 6) { |
| 277 | char *p = NULL; | ||
| 278 | |||
| 152 | switch (c->x86_model) { | 279 | switch (c->x86_model) { |
| 153 | case 5: | 280 | case 5: |
| 154 | if (c->x86_mask == 0) { | 281 | if (c->x86_mask == 0) { |
| @@ -171,77 +298,41 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 171 | p = "Celeron (Coppermine)"; | 298 | p = "Celeron (Coppermine)"; |
| 172 | break; | 299 | break; |
| 173 | } | 300 | } |
| 301 | |||
| 302 | if (p) | ||
| 303 | strcpy(c->x86_model_id, p); | ||
| 174 | } | 304 | } |
| 175 | 305 | ||
| 176 | if (p) | 306 | if (c->x86 == 15) |
| 177 | strcpy(c->x86_model_id, p); | 307 | set_cpu_cap(c, X86_FEATURE_P4); |
| 308 | if (c->x86 == 6) | ||
| 309 | set_cpu_cap(c, X86_FEATURE_P3); | ||
| 178 | 310 | ||
| 179 | detect_extended_topology(c); | 311 | if (cpu_has_bts) |
| 312 | ptrace_bts_init_intel(c); | ||
| 180 | 313 | ||
| 314 | #endif | ||
| 315 | |||
| 316 | detect_extended_topology(c); | ||
| 181 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | 317 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { |
| 182 | /* | 318 | /* |
| 183 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | 319 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology |
| 184 | * detection. | 320 | * detection. |
| 185 | */ | 321 | */ |
| 186 | c->x86_max_cores = num_cpu_cores(c); | 322 | c->x86_max_cores = intel_num_cpu_cores(c); |
| 323 | #ifdef CONFIG_X86_32 | ||
| 187 | detect_ht(c); | 324 | detect_ht(c); |
| 188 | } | ||
| 189 | |||
| 190 | /* Work around errata */ | ||
| 191 | Intel_errata_workarounds(c); | ||
| 192 | |||
| 193 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
| 194 | /* | ||
| 195 | * Set up the preferred alignment for movsl bulk memory moves | ||
| 196 | */ | ||
| 197 | switch (c->x86) { | ||
| 198 | case 4: /* 486: untested */ | ||
| 199 | break; | ||
| 200 | case 5: /* Old Pentia: untested */ | ||
| 201 | break; | ||
| 202 | case 6: /* PII/PIII only like movsl with 8-byte alignment */ | ||
| 203 | movsl_mask.mask = 7; | ||
| 204 | break; | ||
| 205 | case 15: /* P4 is OK down to 8-byte alignment */ | ||
| 206 | movsl_mask.mask = 7; | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | #endif | 325 | #endif |
| 210 | |||
| 211 | if (cpu_has_xmm2) | ||
| 212 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
| 213 | if (c->x86 == 15) { | ||
| 214 | set_cpu_cap(c, X86_FEATURE_P4); | ||
| 215 | } | 326 | } |
| 216 | if (c->x86 == 6) | ||
| 217 | set_cpu_cap(c, X86_FEATURE_P3); | ||
| 218 | if (cpu_has_ds) { | ||
| 219 | unsigned int l1; | ||
| 220 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
| 221 | if (!(l1 & (1<<11))) | ||
| 222 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
| 223 | if (!(l1 & (1<<12))) | ||
| 224 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
| 225 | } | ||
| 226 | |||
| 227 | if (cpu_has_bts) | ||
| 228 | ds_init_intel(c); | ||
| 229 | 327 | ||
| 230 | /* | 328 | /* Work around errata */ |
| 231 | * See if we have a good local APIC by checking for buggy Pentia, | 329 | srat_detect_node(); |
| 232 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
| 233 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
| 234 | * Specification Update"). | ||
| 235 | */ | ||
| 236 | if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && | ||
| 237 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) | ||
| 238 | set_cpu_cap(c, X86_FEATURE_11AP); | ||
| 239 | 330 | ||
| 240 | #ifdef CONFIG_X86_NUMAQ | 331 | if (cpu_has(c, X86_FEATURE_VMX)) |
| 241 | numaq_tsc_disable(); | 332 | detect_vmx_virtcap(c); |
| 242 | #endif | ||
| 243 | } | 333 | } |
| 244 | 334 | ||
| 335 | #ifdef CONFIG_X86_32 | ||
| 245 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 336 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
| 246 | { | 337 | { |
| 247 | /* | 338 | /* |
| @@ -254,10 +345,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i | |||
| 254 | size = 256; | 345 | size = 256; |
| 255 | return size; | 346 | return size; |
| 256 | } | 347 | } |
| 348 | #endif | ||
| 257 | 349 | ||
| 258 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { | 350 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { |
| 259 | .c_vendor = "Intel", | 351 | .c_vendor = "Intel", |
| 260 | .c_ident = { "GenuineIntel" }, | 352 | .c_ident = { "GenuineIntel" }, |
| 353 | #ifdef CONFIG_X86_32 | ||
| 261 | .c_models = { | 354 | .c_models = { |
| 262 | { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = | 355 | { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = |
| 263 | { | 356 | { |
| @@ -307,13 +400,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { | |||
| 307 | } | 400 | } |
| 308 | }, | 401 | }, |
| 309 | }, | 402 | }, |
| 403 | .c_size_cache = intel_size_cache, | ||
| 404 | #endif | ||
| 310 | .c_early_init = early_init_intel, | 405 | .c_early_init = early_init_intel, |
| 311 | .c_init = init_intel, | 406 | .c_init = init_intel, |
| 312 | .c_size_cache = intel_size_cache, | ||
| 313 | .c_x86_vendor = X86_VENDOR_INTEL, | 407 | .c_x86_vendor = X86_VENDOR_INTEL, |
| 314 | }; | 408 | }; |
| 315 | 409 | ||
| 316 | cpu_dev_register(intel_cpu_dev); | 410 | cpu_dev_register(intel_cpu_dev); |
| 317 | 411 | ||
| 318 | /* arch_initcall(intel_cpu_init); */ | ||
| 319 | |||
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c deleted file mode 100644 index 0c0a58dfe099..000000000000 --- a/arch/x86/kernel/cpu/intel_64.c +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/smp.h> | ||
| 3 | #include <asm/processor.h> | ||
| 4 | #include <asm/ptrace.h> | ||
| 5 | #include <asm/topology.h> | ||
| 6 | #include <asm/numa_64.h> | ||
| 7 | |||
| 8 | #include "cpu.h" | ||
| 9 | |||
| 10 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | ||
| 11 | { | ||
| 12 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
| 13 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
| 14 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 15 | |||
| 16 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | ||
| 17 | } | ||
| 18 | |||
| 19 | /* | ||
| 20 | * find out the number of processor cores on the die | ||
| 21 | */ | ||
| 22 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | ||
| 23 | { | ||
| 24 | unsigned int eax, t; | ||
| 25 | |||
| 26 | if (c->cpuid_level < 4) | ||
| 27 | return 1; | ||
| 28 | |||
| 29 | cpuid_count(4, 0, &eax, &t, &t, &t); | ||
| 30 | |||
| 31 | if (eax & 0x1f) | ||
| 32 | return ((eax >> 26) + 1); | ||
| 33 | else | ||
| 34 | return 1; | ||
| 35 | } | ||
| 36 | |||
| 37 | static void __cpuinit srat_detect_node(void) | ||
| 38 | { | ||
| 39 | #ifdef CONFIG_NUMA | ||
| 40 | unsigned node; | ||
| 41 | int cpu = smp_processor_id(); | ||
| 42 | int apicid = hard_smp_processor_id(); | ||
| 43 | |||
| 44 | /* Don't do the funky fallback heuristics the AMD version employs | ||
| 45 | for now. */ | ||
| 46 | node = apicid_to_node[apicid]; | ||
| 47 | if (node == NUMA_NO_NODE || !node_online(node)) | ||
| 48 | node = first_node(node_online_map); | ||
| 49 | numa_set_node(cpu, node); | ||
| 50 | |||
| 51 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
| 52 | #endif | ||
| 53 | } | ||
| 54 | |||
| 55 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | ||
| 56 | { | ||
| 57 | init_intel_cacheinfo(c); | ||
| 58 | if (c->cpuid_level > 9) { | ||
| 59 | unsigned eax = cpuid_eax(10); | ||
| 60 | /* Check for version and the number of counters */ | ||
| 61 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | ||
| 62 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | ||
| 63 | } | ||
| 64 | |||
| 65 | if (cpu_has_ds) { | ||
| 66 | unsigned int l1, l2; | ||
| 67 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
| 68 | if (!(l1 & (1<<11))) | ||
| 69 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
| 70 | if (!(l1 & (1<<12))) | ||
| 71 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
| 72 | } | ||
| 73 | |||
| 74 | |||
| 75 | if (cpu_has_bts) | ||
| 76 | ds_init_intel(c); | ||
| 77 | |||
| 78 | if (c->x86 == 15) | ||
| 79 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
| 80 | if (c->x86 == 6) | ||
| 81 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 82 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
| 83 | |||
| 84 | detect_extended_topology(c); | ||
| 85 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) | ||
| 86 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
| 87 | |||
| 88 | srat_detect_node(); | ||
| 89 | } | ||
| 90 | |||
| 91 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { | ||
| 92 | .c_vendor = "Intel", | ||
| 93 | .c_ident = { "GenuineIntel" }, | ||
| 94 | .c_early_init = early_init_intel, | ||
| 95 | .c_init = init_intel, | ||
| 96 | .c_x86_vendor = X86_VENDOR_INTEL, | ||
| 97 | }; | ||
| 98 | |||
| 99 | cpu_dev_register(intel_cpu_dev); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 726a5fcdf341..4b031a4ac856 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
| @@ -860,7 +860,7 @@ error: | |||
| 860 | return err; | 860 | return err; |
| 861 | } | 861 | } |
| 862 | 862 | ||
| 863 | static void mce_remove_device(unsigned int cpu) | 863 | static __cpuinit void mce_remove_device(unsigned int cpu) |
| 864 | { | 864 | { |
| 865 | int i; | 865 | int i; |
| 866 | 866 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index cb7d3b6a80eb..4e8d77f01eeb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
| @@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
| 401 | tmp |= ~((1<<(hi - 1)) - 1); | 401 | tmp |= ~((1<<(hi - 1)) - 1); |
| 402 | 402 | ||
| 403 | if (tmp != mask_lo) { | 403 | if (tmp != mask_lo) { |
| 404 | static int once = 1; | 404 | WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); |
| 405 | |||
| 406 | if (once) { | ||
| 407 | printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | ||
| 408 | once = 0; | ||
| 409 | } | ||
| 410 | mask_lo = tmp; | 405 | mask_lo = tmp; |
| 411 | } | 406 | } |
| 412 | } | 407 | } |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480bb3715..4c4214690dd1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
| @@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
| 405 | } | 405 | } |
| 406 | /* RED-PEN: base can be > 32bit */ | 406 | /* RED-PEN: base can be > 32bit */ |
| 407 | len += seq_printf(seq, | 407 | len += seq_printf(seq, |
| 408 | "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", | 408 | "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", |
| 409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | 409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, |
| 410 | mtrr_attrib_to_str(type), mtrr_usage_table[i]); | 410 | mtrr_usage_table[i], mtrr_attrib_to_str(type)); |
| 411 | } | 411 | } |
| 412 | } | 412 | } |
| 413 | return 0; | 413 | return 0; |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 58ac5d3d4361..c78c04821ea1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
| @@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
| 759 | /* take out UC ranges */ | 759 | /* take out UC ranges */ |
| 760 | for (i = 0; i < num_var_ranges; i++) { | 760 | for (i = 0; i < num_var_ranges; i++) { |
| 761 | type = range_state[i].type; | 761 | type = range_state[i].type; |
| 762 | if (type != MTRR_TYPE_UNCACHABLE) | 762 | if (type != MTRR_TYPE_UNCACHABLE && |
| 763 | type != MTRR_TYPE_WRPROT) | ||
| 763 | continue; | 764 | continue; |
| 764 | size = range_state[i].size_pfn; | 765 | size = range_state[i].size_pfn; |
| 765 | if (!size) | 766 | if (!size) |
| @@ -834,7 +835,14 @@ static int __init enable_mtrr_cleanup_setup(char *str) | |||
| 834 | enable_mtrr_cleanup = 1; | 835 | enable_mtrr_cleanup = 1; |
| 835 | return 0; | 836 | return 0; |
| 836 | } | 837 | } |
| 837 | early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); | 838 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); |
| 839 | |||
| 840 | static int __init mtrr_cleanup_debug_setup(char *str) | ||
| 841 | { | ||
| 842 | debug_print = 1; | ||
| 843 | return 0; | ||
| 844 | } | ||
| 845 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | ||
| 838 | 846 | ||
| 839 | struct var_mtrr_state { | 847 | struct var_mtrr_state { |
| 840 | unsigned long range_startk; | 848 | unsigned long range_startk; |
| @@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits) | |||
| 898 | } | 906 | } |
| 899 | } | 907 | } |
| 900 | 908 | ||
| 909 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | ||
| 910 | { | ||
| 911 | char factor; | ||
| 912 | unsigned long base = sizek; | ||
| 913 | |||
| 914 | if (base & ((1<<10) - 1)) { | ||
| 915 | /* not MB alignment */ | ||
| 916 | factor = 'K'; | ||
| 917 | } else if (base & ((1<<20) - 1)){ | ||
| 918 | factor = 'M'; | ||
| 919 | base >>= 10; | ||
| 920 | } else { | ||
| 921 | factor = 'G'; | ||
| 922 | base >>= 20; | ||
| 923 | } | ||
| 924 | |||
| 925 | *factorp = factor; | ||
| 926 | |||
| 927 | return base; | ||
| 928 | } | ||
| 929 | |||
| 901 | static unsigned int __init | 930 | static unsigned int __init |
| 902 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | 931 | range_to_mtrr(unsigned int reg, unsigned long range_startk, |
| 903 | unsigned long range_sizek, unsigned char type) | 932 | unsigned long range_sizek, unsigned char type) |
| @@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
| 919 | align = max_align; | 948 | align = max_align; |
| 920 | 949 | ||
| 921 | sizek = 1 << align; | 950 | sizek = 1 << align; |
| 922 | if (debug_print) | 951 | if (debug_print) { |
| 952 | char start_factor = 'K', size_factor = 'K'; | ||
| 953 | unsigned long start_base, size_base; | ||
| 954 | |||
| 955 | start_base = to_size_factor(range_startk, &start_factor), | ||
| 956 | size_base = to_size_factor(sizek, &size_factor), | ||
| 957 | |||
| 923 | printk(KERN_DEBUG "Setting variable MTRR %d, " | 958 | printk(KERN_DEBUG "Setting variable MTRR %d, " |
| 924 | "base: %ldMB, range: %ldMB, type %s\n", | 959 | "base: %ld%cB, range: %ld%cB, type %s\n", |
| 925 | reg, range_startk >> 10, sizek >> 10, | 960 | reg, start_base, start_factor, |
| 961 | size_base, size_factor, | ||
| 926 | (type == MTRR_TYPE_UNCACHABLE)?"UC": | 962 | (type == MTRR_TYPE_UNCACHABLE)?"UC": |
| 927 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") | 963 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") |
| 928 | ); | 964 | ); |
| 965 | } | ||
| 929 | save_var_mtrr(reg++, range_startk, sizek, type); | 966 | save_var_mtrr(reg++, range_startk, sizek, type); |
| 930 | range_startk += sizek; | 967 | range_startk += sizek; |
| 931 | range_sizek -= sizek; | 968 | range_sizek -= sizek; |
| @@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
| 970 | /* try to append some small hole */ | 1007 | /* try to append some small hole */ |
| 971 | range0_basek = state->range_startk; | 1008 | range0_basek = state->range_startk; |
| 972 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | 1009 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); |
| 1010 | |||
| 1011 | /* no increase */ | ||
| 973 | if (range0_sizek == state->range_sizek) { | 1012 | if (range0_sizek == state->range_sizek) { |
| 974 | if (debug_print) | 1013 | if (debug_print) |
| 975 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | 1014 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", |
| @@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
| 980 | return 0; | 1019 | return 0; |
| 981 | } | 1020 | } |
| 982 | 1021 | ||
| 983 | range0_sizek -= chunk_sizek; | 1022 | /* only cut back, when it is not the last */ |
| 984 | if (range0_sizek && sizek) { | 1023 | if (sizek) { |
| 985 | while (range0_basek + range0_sizek > (basek + sizek)) { | 1024 | while (range0_basek + range0_sizek > (basek + sizek)) { |
| 986 | range0_sizek -= chunk_sizek; | 1025 | if (range0_sizek >= chunk_sizek) |
| 987 | if (!range0_sizek) | 1026 | range0_sizek -= chunk_sizek; |
| 988 | break; | 1027 | else |
| 989 | } | 1028 | range0_sizek = 0; |
| 1029 | |||
| 1030 | if (!range0_sizek) | ||
| 1031 | break; | ||
| 1032 | } | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | second_try: | ||
| 1036 | range_basek = range0_basek + range0_sizek; | ||
| 1037 | |||
| 1038 | /* one hole in the middle */ | ||
| 1039 | if (range_basek > basek && range_basek <= (basek + sizek)) | ||
| 1040 | second_sizek = range_basek - basek; | ||
| 1041 | |||
| 1042 | if (range0_sizek > state->range_sizek) { | ||
| 1043 | |||
| 1044 | /* one hole in middle or at end */ | ||
| 1045 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | ||
| 1046 | |||
| 1047 | /* hole size should be less than half of range0 size */ | ||
| 1048 | if (hole_sizek >= (range0_sizek >> 1) && | ||
| 1049 | range0_sizek >= chunk_sizek) { | ||
| 1050 | range0_sizek -= chunk_sizek; | ||
| 1051 | second_sizek = 0; | ||
| 1052 | hole_sizek = 0; | ||
| 1053 | |||
| 1054 | goto second_try; | ||
| 1055 | } | ||
| 990 | } | 1056 | } |
| 991 | 1057 | ||
| 992 | if (range0_sizek) { | 1058 | if (range0_sizek) { |
| @@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
| 996 | (range0_basek + range0_sizek)<<10); | 1062 | (range0_basek + range0_sizek)<<10); |
| 997 | state->reg = range_to_mtrr(state->reg, range0_basek, | 1063 | state->reg = range_to_mtrr(state->reg, range0_basek, |
| 998 | range0_sizek, MTRR_TYPE_WRBACK); | 1064 | range0_sizek, MTRR_TYPE_WRBACK); |
| 999 | |||
| 1000 | } | ||
| 1001 | |||
| 1002 | range_basek = range0_basek + range0_sizek; | ||
| 1003 | range_sizek = chunk_sizek; | ||
| 1004 | |||
| 1005 | if (range_basek + range_sizek > basek && | ||
| 1006 | range_basek + range_sizek <= (basek + sizek)) { | ||
| 1007 | /* one hole */ | ||
| 1008 | second_basek = basek; | ||
| 1009 | second_sizek = range_basek + range_sizek - basek; | ||
| 1010 | } | 1065 | } |
| 1011 | 1066 | ||
| 1012 | /* if last piece, only could one hole near end */ | 1067 | if (range0_sizek < state->range_sizek) { |
| 1013 | if ((second_basek || !basek) && | 1068 | /* need to handle left over */ |
| 1014 | range_sizek - (state->range_sizek - range0_sizek) - second_sizek < | ||
| 1015 | (chunk_sizek >> 1)) { | ||
| 1016 | /* | ||
| 1017 | * one hole in middle (second_sizek is 0) or at end | ||
| 1018 | * (second_sizek is 0 ) | ||
| 1019 | */ | ||
| 1020 | hole_sizek = range_sizek - (state->range_sizek - range0_sizek) | ||
| 1021 | - second_sizek; | ||
| 1022 | hole_basek = range_basek + range_sizek - hole_sizek | ||
| 1023 | - second_sizek; | ||
| 1024 | } else { | ||
| 1025 | /* fallback for big hole, or several holes */ | ||
| 1026 | range_sizek = state->range_sizek - range0_sizek; | 1069 | range_sizek = state->range_sizek - range0_sizek; |
| 1027 | second_basek = 0; | 1070 | |
| 1028 | second_sizek = 0; | 1071 | if (debug_print) |
| 1072 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | ||
| 1073 | range_basek<<10, | ||
| 1074 | (range_basek + range_sizek)<<10); | ||
| 1075 | state->reg = range_to_mtrr(state->reg, range_basek, | ||
| 1076 | range_sizek, MTRR_TYPE_WRBACK); | ||
| 1029 | } | 1077 | } |
| 1030 | 1078 | ||
| 1031 | if (debug_print) | ||
| 1032 | printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, | ||
| 1033 | (range_basek + range_sizek)<<10); | ||
| 1034 | state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, | ||
| 1035 | MTRR_TYPE_WRBACK); | ||
| 1036 | if (hole_sizek) { | 1079 | if (hole_sizek) { |
| 1080 | hole_basek = range_basek - hole_sizek - second_sizek; | ||
| 1037 | if (debug_print) | 1081 | if (debug_print) |
| 1038 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | 1082 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", |
| 1039 | hole_basek<<10, (hole_basek + hole_sizek)<<10); | 1083 | hole_basek<<10, |
| 1040 | state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, | 1084 | (hole_basek + hole_sizek)<<10); |
| 1041 | MTRR_TYPE_UNCACHABLE); | 1085 | state->reg = range_to_mtrr(state->reg, hole_basek, |
| 1042 | 1086 | hole_sizek, MTRR_TYPE_UNCACHABLE); | |
| 1043 | } | 1087 | } |
| 1044 | 1088 | ||
| 1045 | return second_sizek; | 1089 | return second_sizek; |
| @@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result { | |||
| 1154 | }; | 1198 | }; |
| 1155 | 1199 | ||
| 1156 | /* | 1200 | /* |
| 1157 | * gran_size: 1M, 2M, ..., 2G | 1201 | * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G |
| 1158 | * chunk size: gran_size, ..., 4G | 1202 | * chunk size: gran_size, ..., 2G |
| 1159 | * so we need (2+13)*6 | 1203 | * so we need (1+16)*8 |
| 1160 | */ | 1204 | */ |
| 1161 | #define NUM_RESULT 90 | 1205 | #define NUM_RESULT 136 |
| 1162 | #define PSHIFT (PAGE_SHIFT - 10) | 1206 | #define PSHIFT (PAGE_SHIFT - 10) |
| 1163 | 1207 | ||
| 1164 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
| @@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | |||
| 1168 | static int __init mtrr_cleanup(unsigned address_bits) | 1212 | static int __init mtrr_cleanup(unsigned address_bits) |
| 1169 | { | 1213 | { |
| 1170 | unsigned long extra_remove_base, extra_remove_size; | 1214 | unsigned long extra_remove_base, extra_remove_size; |
| 1171 | unsigned long i, base, size, def, dummy; | 1215 | unsigned long base, size, def, dummy; |
| 1172 | mtrr_type type; | 1216 | mtrr_type type; |
| 1173 | int nr_range, nr_range_new; | 1217 | int nr_range, nr_range_new; |
| 1174 | u64 chunk_size, gran_size; | 1218 | u64 chunk_size, gran_size; |
| 1175 | unsigned long range_sums, range_sums_new; | 1219 | unsigned long range_sums, range_sums_new; |
| 1176 | int index_good; | 1220 | int index_good; |
| 1177 | int num_reg_good; | 1221 | int num_reg_good; |
| 1222 | int i; | ||
| 1178 | 1223 | ||
| 1179 | /* extra one for all 0 */ | 1224 | /* extra one for all 0 */ |
| 1180 | int num[MTRR_NUM_TYPES + 1]; | 1225 | int num[MTRR_NUM_TYPES + 1]; |
| @@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1204 | continue; | 1249 | continue; |
| 1205 | if (!size) | 1250 | if (!size) |
| 1206 | type = MTRR_NUM_TYPES; | 1251 | type = MTRR_NUM_TYPES; |
| 1252 | if (type == MTRR_TYPE_WRPROT) | ||
| 1253 | type = MTRR_TYPE_UNCACHABLE; | ||
| 1207 | num[type]++; | 1254 | num[type]++; |
| 1208 | } | 1255 | } |
| 1209 | 1256 | ||
| @@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1216 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1263 | num_var_ranges - num[MTRR_NUM_TYPES]) |
| 1217 | return 0; | 1264 | return 0; |
| 1218 | 1265 | ||
| 1266 | /* print original var MTRRs at first, for debugging: */ | ||
| 1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
| 1268 | for (i = 0; i < num_var_ranges; i++) { | ||
| 1269 | char start_factor = 'K', size_factor = 'K'; | ||
| 1270 | unsigned long start_base, size_base; | ||
| 1271 | |||
| 1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | ||
| 1273 | if (!size_base) | ||
| 1274 | continue; | ||
| 1275 | |||
| 1276 | size_base = to_size_factor(size_base, &size_factor), | ||
| 1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | ||
| 1278 | start_base = to_size_factor(start_base, &start_factor), | ||
| 1279 | type = range_state[i].type; | ||
| 1280 | |||
| 1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | ||
| 1282 | i, start_base, start_factor, | ||
| 1283 | size_base, size_factor, | ||
| 1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
| 1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
| 1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
| 1287 | ); | ||
| 1288 | } | ||
| 1289 | |||
| 1219 | memset(range, 0, sizeof(range)); | 1290 | memset(range, 0, sizeof(range)); |
| 1220 | extra_remove_size = 0; | 1291 | extra_remove_size = 0; |
| 1221 | if (mtrr_tom2) { | 1292 | extra_remove_base = 1 << (32 - PAGE_SHIFT); |
| 1222 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | 1293 | if (mtrr_tom2) |
| 1223 | extra_remove_size = | 1294 | extra_remove_size = |
| 1224 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | 1295 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; |
| 1225 | } | ||
| 1226 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | 1296 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, |
| 1227 | extra_remove_size); | 1297 | extra_remove_size); |
| 1298 | /* | ||
| 1299 | * [0, 1M) should always be coverred by var mtrr with WB | ||
| 1300 | * and fixed mtrrs should take effective before var mtrr for it | ||
| 1301 | */ | ||
| 1302 | nr_range = add_range_with_merge(range, nr_range, 0, | ||
| 1303 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | ||
| 1304 | /* sort the ranges */ | ||
| 1305 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
| 1306 | |||
| 1228 | range_sums = sum_ranges(range, nr_range); | 1307 | range_sums = sum_ranges(range, nr_range); |
| 1229 | printk(KERN_INFO "total RAM coverred: %ldM\n", | 1308 | printk(KERN_INFO "total RAM coverred: %ldM\n", |
| 1230 | range_sums >> (20 - PAGE_SHIFT)); | 1309 | range_sums >> (20 - PAGE_SHIFT)); |
| 1231 | 1310 | ||
| 1232 | if (mtrr_chunk_size && mtrr_gran_size) { | 1311 | if (mtrr_chunk_size && mtrr_gran_size) { |
| 1233 | int num_reg; | 1312 | int num_reg; |
| 1313 | char gran_factor, chunk_factor, lose_factor; | ||
| 1314 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1234 | 1315 | ||
| 1235 | debug_print = 1; | 1316 | debug_print++; |
| 1236 | /* convert ranges to var ranges state */ | 1317 | /* convert ranges to var ranges state */ |
| 1237 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | 1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, |
| 1238 | mtrr_gran_size); | 1319 | mtrr_gran_size); |
| @@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1256 | result[i].lose_cover_sizek = | 1337 | result[i].lose_cover_sizek = |
| 1257 | (range_sums - range_sums_new) << PSHIFT; | 1338 | (range_sums - range_sums_new) << PSHIFT; |
| 1258 | 1339 | ||
| 1259 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | 1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
| 1260 | result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, | 1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
| 1261 | result[i].chunk_sizek >> 10); | 1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
| 1262 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", | 1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
| 1344 | result[i].bad?"*BAD*":" ", | ||
| 1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1263 | result[i].num_reg, result[i].bad?"-":"", | 1347 | result[i].num_reg, result[i].bad?"-":"", |
| 1264 | result[i].lose_cover_sizek >> 10); | 1348 | lose_base, lose_factor); |
| 1265 | if (!result[i].bad) { | 1349 | if (!result[i].bad) { |
| 1266 | set_var_mtrr_all(address_bits); | 1350 | set_var_mtrr_all(address_bits); |
| 1267 | return 1; | 1351 | return 1; |
| 1268 | } | 1352 | } |
| 1269 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
| 1270 | "will find optimal one\n"); | 1354 | "will find optimal one\n"); |
| 1271 | debug_print = 0; | 1355 | debug_print--; |
| 1272 | memset(result, 0, sizeof(result[0])); | 1356 | memset(result, 0, sizeof(result[0])); |
| 1273 | } | 1357 | } |
| 1274 | 1358 | ||
| 1275 | i = 0; | 1359 | i = 0; |
| 1276 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
| 1277 | memset(result, 0, sizeof(result)); | 1361 | memset(result, 0, sizeof(result)); |
| 1278 | for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { | 1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
| 1279 | for (chunk_size = gran_size; chunk_size < (1ULL<<33); | 1363 | char gran_factor; |
| 1364 | unsigned long gran_base; | ||
| 1365 | |||
| 1366 | if (debug_print) | ||
| 1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
| 1368 | |||
| 1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | ||
| 1280 | chunk_size <<= 1) { | 1370 | chunk_size <<= 1) { |
| 1281 | int num_reg; | 1371 | int num_reg; |
| 1282 | 1372 | ||
| 1283 | if (debug_print) | 1373 | if (debug_print) { |
| 1284 | printk(KERN_INFO | 1374 | char chunk_factor; |
| 1285 | "\ngran_size: %lldM chunk_size_size: %lldM\n", | 1375 | unsigned long chunk_base; |
| 1286 | gran_size >> 20, chunk_size >> 20); | 1376 | |
| 1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
| 1378 | printk(KERN_INFO "\n"); | ||
| 1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
| 1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1381 | } | ||
| 1287 | if (i >= NUM_RESULT) | 1382 | if (i >= NUM_RESULT) |
| 1288 | continue; | 1383 | continue; |
| 1289 | 1384 | ||
| @@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1326 | 1421 | ||
| 1327 | /* print out all */ | 1422 | /* print out all */ |
| 1328 | for (i = 0; i < NUM_RESULT; i++) { | 1423 | for (i = 0; i < NUM_RESULT; i++) { |
| 1329 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | 1424 | char gran_factor, chunk_factor, lose_factor; |
| 1330 | result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, | 1425 | unsigned long gran_base, chunk_base, lose_base; |
| 1331 | result[i].chunk_sizek >> 10); | 1426 | |
| 1332 | printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", | 1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
| 1333 | result[i].num_reg, result[i].bad?"-":"", | 1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
| 1334 | result[i].lose_cover_sizek >> 10); | 1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
| 1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
| 1431 | result[i].bad?"*BAD*":" ", | ||
| 1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1434 | result[i].num_reg, result[i].bad?"-":"", | ||
| 1435 | lose_base, lose_factor); | ||
| 1335 | } | 1436 | } |
| 1336 | 1437 | ||
| 1337 | /* try to find the optimal index */ | 1438 | /* try to find the optimal index */ |
| @@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1339 | nr_mtrr_spare_reg = num_var_ranges - 1; | 1440 | nr_mtrr_spare_reg = num_var_ranges - 1; |
| 1340 | num_reg_good = -1; | 1441 | num_reg_good = -1; |
| 1341 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | 1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { |
| 1342 | if (!min_loss_pfn[i]) { | 1443 | if (!min_loss_pfn[i]) |
| 1343 | num_reg_good = i; | 1444 | num_reg_good = i; |
| 1344 | break; | ||
| 1345 | } | ||
| 1346 | } | 1445 | } |
| 1347 | 1446 | ||
| 1348 | index_good = -1; | 1447 | index_good = -1; |
| @@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1358 | } | 1457 | } |
| 1359 | 1458 | ||
| 1360 | if (index_good != -1) { | 1459 | if (index_good != -1) { |
| 1460 | char gran_factor, chunk_factor, lose_factor; | ||
| 1461 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1462 | |||
| 1361 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
| 1362 | i = index_good; | 1464 | i = index_good; |
| 1363 | printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", | 1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
| 1364 | result[i].gran_sizek >> 10, | 1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
| 1365 | result[i].chunk_sizek >> 10); | 1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
| 1366 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", | 1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", |
| 1367 | result[i].num_reg, | 1469 | gran_base, gran_factor, chunk_base, chunk_factor); |
| 1368 | result[i].lose_cover_sizek >> 10); | 1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", |
| 1471 | result[i].num_reg, lose_base, lose_factor); | ||
| 1369 | /* convert ranges to var ranges state */ | 1472 | /* convert ranges to var ranges state */ |
| 1370 | chunk_size = result[i].chunk_sizek; | 1473 | chunk_size = result[i].chunk_sizek; |
| 1371 | chunk_size <<= 10; | 1474 | chunk_size <<= 10; |
| 1372 | gran_size = result[i].gran_sizek; | 1475 | gran_size = result[i].gran_sizek; |
| 1373 | gran_size <<= 10; | 1476 | gran_size <<= 10; |
| 1374 | debug_print = 1; | 1477 | debug_print++; |
| 1375 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
| 1479 | debug_print--; | ||
| 1376 | set_var_mtrr_all(address_bits); | 1480 | set_var_mtrr_all(address_bits); |
| 1377 | return 1; | 1481 | return 1; |
| 1378 | } | 1482 | } |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 05cc22dbd4ff..6bff382094f5 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
| @@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) | |||
| 295 | /* setup the timer */ | 295 | /* setup the timer */ |
| 296 | wrmsr(evntsel_msr, evntsel, 0); | 296 | wrmsr(evntsel_msr, evntsel, 0); |
| 297 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | 297 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); |
| 298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 299 | evntsel |= K7_EVNTSEL_ENABLE; | ||
| 300 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 301 | 298 | ||
| 299 | /* initialize the wd struct before enabling */ | ||
| 302 | wd->perfctr_msr = perfctr_msr; | 300 | wd->perfctr_msr = perfctr_msr; |
| 303 | wd->evntsel_msr = evntsel_msr; | 301 | wd->evntsel_msr = evntsel_msr; |
| 304 | wd->cccr_msr = 0; /* unused */ | 302 | wd->cccr_msr = 0; /* unused */ |
| 303 | |||
| 304 | /* ok, everything is initialized, announce that we're set */ | ||
| 305 | cpu_nmi_set_wd_enabled(); | ||
| 306 | |||
| 307 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 308 | evntsel |= K7_EVNTSEL_ENABLE; | ||
| 309 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 310 | |||
| 305 | return 1; | 311 | return 1; |
| 306 | } | 312 | } |
| 307 | 313 | ||
| @@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz) | |||
| 379 | wrmsr(evntsel_msr, evntsel, 0); | 385 | wrmsr(evntsel_msr, evntsel, 0); |
| 380 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 386 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
| 381 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | 387 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); |
| 382 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 383 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
| 384 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 385 | 388 | ||
| 389 | /* initialize the wd struct before enabling */ | ||
| 386 | wd->perfctr_msr = perfctr_msr; | 390 | wd->perfctr_msr = perfctr_msr; |
| 387 | wd->evntsel_msr = evntsel_msr; | 391 | wd->evntsel_msr = evntsel_msr; |
| 388 | wd->cccr_msr = 0; /* unused */ | 392 | wd->cccr_msr = 0; /* unused */ |
| 393 | |||
| 394 | /* ok, everything is initialized, announce that we're set */ | ||
| 395 | cpu_nmi_set_wd_enabled(); | ||
| 396 | |||
| 397 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 398 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
| 399 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 400 | |||
| 389 | return 1; | 401 | return 1; |
| 390 | } | 402 | } |
| 391 | 403 | ||
| @@ -432,6 +444,27 @@ static const struct wd_ops p6_wd_ops = { | |||
| 432 | #define P4_CCCR_ENABLE (1 << 12) | 444 | #define P4_CCCR_ENABLE (1 << 12) |
| 433 | #define P4_CCCR_OVF (1 << 31) | 445 | #define P4_CCCR_OVF (1 << 31) |
| 434 | 446 | ||
| 447 | #define P4_CONTROLS 18 | ||
| 448 | static unsigned int p4_controls[18] = { | ||
| 449 | MSR_P4_BPU_CCCR0, | ||
| 450 | MSR_P4_BPU_CCCR1, | ||
| 451 | MSR_P4_BPU_CCCR2, | ||
| 452 | MSR_P4_BPU_CCCR3, | ||
| 453 | MSR_P4_MS_CCCR0, | ||
| 454 | MSR_P4_MS_CCCR1, | ||
| 455 | MSR_P4_MS_CCCR2, | ||
| 456 | MSR_P4_MS_CCCR3, | ||
| 457 | MSR_P4_FLAME_CCCR0, | ||
| 458 | MSR_P4_FLAME_CCCR1, | ||
| 459 | MSR_P4_FLAME_CCCR2, | ||
| 460 | MSR_P4_FLAME_CCCR3, | ||
| 461 | MSR_P4_IQ_CCCR0, | ||
| 462 | MSR_P4_IQ_CCCR1, | ||
| 463 | MSR_P4_IQ_CCCR2, | ||
| 464 | MSR_P4_IQ_CCCR3, | ||
| 465 | MSR_P4_IQ_CCCR4, | ||
| 466 | MSR_P4_IQ_CCCR5, | ||
| 467 | }; | ||
| 435 | /* | 468 | /* |
| 436 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 469 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter |
| 437 | * CRU_ESCR0 (with any non-null event selector) through a complemented | 470 | * CRU_ESCR0 (with any non-null event selector) through a complemented |
| @@ -473,6 +506,26 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
| 473 | evntsel_msr = MSR_P4_CRU_ESCR0; | 506 | evntsel_msr = MSR_P4_CRU_ESCR0; |
| 474 | cccr_msr = MSR_P4_IQ_CCCR0; | 507 | cccr_msr = MSR_P4_IQ_CCCR0; |
| 475 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | 508 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); |
| 509 | |||
| 510 | /* | ||
| 511 | * If we're on the kdump kernel or other situation, we may | ||
| 512 | * still have other performance counter registers set to | ||
| 513 | * interrupt and they'll keep interrupting forever because | ||
| 514 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | ||
| 515 | * pending interrupts and disable all the registers here, | ||
| 516 | * before reenabling the NMI delivery. Refer to p4_rearm() | ||
| 517 | * about the P4_CCCR_OVF quirk. | ||
| 518 | */ | ||
| 519 | if (reset_devices) { | ||
| 520 | unsigned int low, high; | ||
| 521 | int i; | ||
| 522 | |||
| 523 | for (i = 0; i < P4_CONTROLS; i++) { | ||
| 524 | rdmsr(p4_controls[i], low, high); | ||
| 525 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | ||
| 526 | wrmsr(p4_controls[i], low, high); | ||
| 527 | } | ||
| 528 | } | ||
| 476 | } else { | 529 | } else { |
| 477 | /* logical cpu 1 */ | 530 | /* logical cpu 1 */ |
| 478 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | 531 | perfctr_msr = MSR_P4_IQ_PERFCTR1; |
| @@ -499,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
| 499 | wrmsr(evntsel_msr, evntsel, 0); | 552 | wrmsr(evntsel_msr, evntsel, 0); |
| 500 | wrmsr(cccr_msr, cccr_val, 0); | 553 | wrmsr(cccr_msr, cccr_val, 0); |
| 501 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | 554 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); |
| 502 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 555 | |
| 503 | cccr_val |= P4_CCCR_ENABLE; | ||
| 504 | wrmsr(cccr_msr, cccr_val, 0); | ||
| 505 | wd->perfctr_msr = perfctr_msr; | 556 | wd->perfctr_msr = perfctr_msr; |
| 506 | wd->evntsel_msr = evntsel_msr; | 557 | wd->evntsel_msr = evntsel_msr; |
| 507 | wd->cccr_msr = cccr_msr; | 558 | wd->cccr_msr = cccr_msr; |
| 559 | |||
| 560 | /* ok, everything is initialized, announce that we're set */ | ||
| 561 | cpu_nmi_set_wd_enabled(); | ||
| 562 | |||
| 563 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 564 | cccr_val |= P4_CCCR_ENABLE; | ||
| 565 | wrmsr(cccr_msr, cccr_val, 0); | ||
| 508 | return 1; | 566 | return 1; |
| 509 | } | 567 | } |
| 510 | 568 | ||
| @@ -620,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
| 620 | wrmsr(evntsel_msr, evntsel, 0); | 678 | wrmsr(evntsel_msr, evntsel, 0); |
| 621 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 679 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
| 622 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | 680 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); |
| 623 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 624 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
| 625 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 626 | 681 | ||
| 627 | wd->perfctr_msr = perfctr_msr; | 682 | wd->perfctr_msr = perfctr_msr; |
| 628 | wd->evntsel_msr = evntsel_msr; | 683 | wd->evntsel_msr = evntsel_msr; |
| 629 | wd->cccr_msr = 0; /* unused */ | 684 | wd->cccr_msr = 0; /* unused */ |
| 685 | |||
| 686 | /* ok, everything is initialized, announce that we're set */ | ||
| 687 | cpu_nmi_set_wd_enabled(); | ||
| 688 | |||
| 689 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 690 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
| 691 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 630 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 692 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
| 631 | return 1; | 693 | return 1; |
| 632 | } | 694 | } |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 7c46e6ecedca..52b3fefbd5af 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
| @@ -5,6 +5,18 @@ | |||
| 5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
| 6 | #include "cpu.h" | 6 | #include "cpu.h" |
| 7 | 7 | ||
| 8 | static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) | ||
| 9 | { | ||
| 10 | u32 xlvl; | ||
| 11 | |||
| 12 | /* Transmeta-defined flags: level 0x80860001 */ | ||
| 13 | xlvl = cpuid_eax(0x80860000); | ||
| 14 | if ((xlvl & 0xffff0000) == 0x80860000) { | ||
| 15 | if (xlvl >= 0x80860001) | ||
| 16 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 8 | static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | 20 | static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) |
| 9 | { | 21 | { |
| 10 | unsigned int cap_mask, uk, max, dummy; | 22 | unsigned int cap_mask, uk, max, dummy; |
| @@ -12,7 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
| 12 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; | 24 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; |
| 13 | char cpu_info[65]; | 25 | char cpu_info[65]; |
| 14 | 26 | ||
| 15 | get_model_name(c); /* Same as AMD/Cyrix */ | 27 | early_init_transmeta(c); |
| 28 | |||
| 16 | display_cacheinfo(c); | 29 | display_cacheinfo(c); |
| 17 | 30 | ||
| 18 | /* Print CMS and CPU revision */ | 31 | /* Print CMS and CPU revision */ |
| @@ -85,23 +98,11 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
| 85 | #endif | 98 | #endif |
| 86 | } | 99 | } |
| 87 | 100 | ||
| 88 | static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) | ||
| 89 | { | ||
| 90 | u32 xlvl; | ||
| 91 | |||
| 92 | /* Transmeta-defined flags: level 0x80860001 */ | ||
| 93 | xlvl = cpuid_eax(0x80860000); | ||
| 94 | if ((xlvl & 0xffff0000) == 0x80860000) { | ||
| 95 | if (xlvl >= 0x80860001) | ||
| 96 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { | 101 | static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { |
| 101 | .c_vendor = "Transmeta", | 102 | .c_vendor = "Transmeta", |
| 102 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, | 103 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, |
| 104 | .c_early_init = early_init_transmeta, | ||
| 103 | .c_init = init_transmeta, | 105 | .c_init = init_transmeta, |
| 104 | .c_identify = transmeta_identify, | ||
| 105 | .c_x86_vendor = X86_VENDOR_TRANSMETA, | 106 | .c_x86_vendor = X86_VENDOR_TRANSMETA, |
| 106 | }; | 107 | }; |
| 107 | 108 | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 8e9cd6a8ec12..6a44d6465991 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | #include <linux/smp_lock.h> | 36 | #include <linux/smp_lock.h> |
| 37 | #include <linux/major.h> | 37 | #include <linux/major.h> |
| 38 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
| 39 | #include <linux/smp_lock.h> | ||
| 40 | #include <linux/device.h> | 39 | #include <linux/device.h> |
| 41 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
| 42 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a46..e90a60ef10c2 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c | |||
| @@ -7,9 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
| 9 | #include <linux/crash_dump.h> | 9 | #include <linux/crash_dump.h> |
| 10 | 10 | #include <linux/uaccess.h> | |
| 11 | #include <asm/uaccess.h> | 11 | #include <linux/io.h> |
| 12 | #include <asm/io.h> | ||
| 13 | 12 | ||
| 14 | /** | 13 | /** |
| 15 | * copy_oldmem_page - copy one page from "oldmem" | 14 | * copy_oldmem_page - copy one page from "oldmem" |
| @@ -25,7 +24,7 @@ | |||
| 25 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. | 24 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. |
| 26 | */ | 25 | */ |
| 27 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | 26 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, |
| 28 | size_t csize, unsigned long offset, int userbuf) | 27 | size_t csize, unsigned long offset, int userbuf) |
| 29 | { | 28 | { |
| 30 | void *vaddr; | 29 | void *vaddr; |
| 31 | 30 | ||
| @@ -33,14 +32,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
| 33 | return 0; | 32 | return 0; |
| 34 | 33 | ||
| 35 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); | 34 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); |
| 35 | if (!vaddr) | ||
| 36 | return -ENOMEM; | ||
| 36 | 37 | ||
| 37 | if (userbuf) { | 38 | if (userbuf) { |
| 38 | if (copy_to_user(buf, (vaddr + offset), csize)) { | 39 | if (copy_to_user(buf, vaddr + offset, csize)) { |
| 39 | iounmap(vaddr); | 40 | iounmap(vaddr); |
| 40 | return -EFAULT; | 41 | return -EFAULT; |
| 41 | } | 42 | } |
| 42 | } else | 43 | } else |
| 43 | memcpy(buf, (vaddr + offset), csize); | 44 | memcpy(buf, vaddr + offset, csize); |
| 44 | 45 | ||
| 45 | iounmap(vaddr); | 46 | iounmap(vaddr); |
| 46 | return csize; | 47 | return csize; |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48d..2b69994fd3a8 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
| @@ -2,26 +2,49 @@ | |||
| 2 | * Debug Store support | 2 | * Debug Store support |
| 3 | * | 3 | * |
| 4 | * This provides a low-level interface to the hardware's Debug Store | 4 | * This provides a low-level interface to the hardware's Debug Store |
| 5 | * feature that is used for last branch recording (LBR) and | 5 | * feature that is used for branch trace store (BTS) and |
| 6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
| 7 | * | 7 | * |
| 8 | * Different architectures use a different DS layout/pointer size. | 8 | * It manages: |
| 9 | * The below functions therefore work on a void*. | 9 | * - per-thread and per-cpu allocation of BTS and PEBS |
| 10 | * - buffer memory allocation (optional) | ||
| 11 | * - buffer overflow handling | ||
| 12 | * - buffer access | ||
| 10 | * | 13 | * |
| 14 | * It assumes: | ||
| 15 | * - get_task_struct on all parameter tasks | ||
| 16 | * - current is allowed to trace parameter tasks | ||
| 11 | * | 17 | * |
| 12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
| 13 | * trace store, BTS) is supported. | ||
| 14 | * | 18 | * |
| 15 | * | 19 | * Copyright (C) 2007-2008 Intel Corporation. |
| 16 | * Copyright (C) 2007 Intel Corporation. | 20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 |
| 17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
| 18 | */ | 21 | */ |
| 19 | 22 | ||
| 23 | |||
| 24 | #ifdef CONFIG_X86_DS | ||
| 25 | |||
| 20 | #include <asm/ds.h> | 26 | #include <asm/ds.h> |
| 21 | 27 | ||
| 22 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
| 23 | #include <linux/string.h> | 29 | #include <linux/string.h> |
| 24 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| 31 | #include <linux/sched.h> | ||
| 32 | #include <linux/mm.h> | ||
| 33 | |||
| 34 | |||
| 35 | /* | ||
| 36 | * The configuration for a particular DS hardware implementation. | ||
| 37 | */ | ||
| 38 | struct ds_configuration { | ||
| 39 | /* the size of the DS structure in bytes */ | ||
| 40 | unsigned char sizeof_ds; | ||
| 41 | /* the size of one pointer-typed field in the DS structure in bytes; | ||
| 42 | this covers the first 8 fields related to buffer management. */ | ||
| 43 | unsigned char sizeof_field; | ||
| 44 | /* the size of a BTS/PEBS record in bytes */ | ||
| 45 | unsigned char sizeof_rec[2]; | ||
| 46 | }; | ||
| 47 | static struct ds_configuration ds_cfg; | ||
| 25 | 48 | ||
| 26 | 49 | ||
| 27 | /* | 50 | /* |
| @@ -44,378 +67,747 @@ | |||
| 44 | * (interrupt occurs when write pointer passes interrupt pointer) | 67 | * (interrupt occurs when write pointer passes interrupt pointer) |
| 45 | * - value to which counter is reset following counter overflow | 68 | * - value to which counter is reset following counter overflow |
| 46 | * | 69 | * |
| 47 | * On later architectures, the last branch recording hardware uses | 70 | * Later architectures use 64bit pointers throughout, whereas earlier |
| 48 | * 64bit pointers even in 32bit mode. | 71 | * architectures use 32bit pointers in 32bit mode. |
| 49 | * | ||
| 50 | * | ||
| 51 | * Branch Trace Store (BTS) records store information about control | ||
| 52 | * flow changes. They at least provide the following information: | ||
| 53 | * - source linear address | ||
| 54 | * - destination linear address | ||
| 55 | * | 72 | * |
| 56 | * Netburst supported a predicated bit that had been dropped in later | ||
| 57 | * architectures. We do not suppor it. | ||
| 58 | * | 73 | * |
| 74 | * We compute the base address for the first 8 fields based on: | ||
| 75 | * - the field size stored in the DS configuration | ||
| 76 | * - the relative field position | ||
| 77 | * - an offset giving the start of the respective region | ||
| 59 | * | 78 | * |
| 60 | * In order to abstract from the actual DS and BTS layout, we describe | 79 | * This offset is further used to index various arrays holding |
| 61 | * the access to the relevant fields. | 80 | * information for BTS and PEBS at the respective index. |
| 62 | * Thanks to Andi Kleen for proposing this design. | ||
| 63 | * | 81 | * |
| 64 | * The implementation, however, is not as general as it might seem. In | 82 | * On later 32bit processors, we only access the lower 32bit of the |
| 65 | * order to stay somewhat simple and efficient, we assume an | 83 | * 64bit pointer fields. The upper halves will be zeroed out. |
| 66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
| 67 | * field to be at least as big as that type. | ||
| 68 | */ | 84 | */ |
| 69 | 85 | ||
| 70 | /* | 86 | enum ds_field { |
| 71 | * A special from_ip address to indicate that the BTS record is an | 87 | ds_buffer_base = 0, |
| 72 | * info record that needs to be interpreted or skipped. | 88 | ds_index, |
| 73 | */ | 89 | ds_absolute_maximum, |
| 74 | #define BTS_ESCAPE_ADDRESS (-1) | 90 | ds_interrupt_threshold, |
| 91 | }; | ||
| 75 | 92 | ||
| 76 | /* | 93 | enum ds_qualifier { |
| 77 | * A field access descriptor | 94 | ds_bts = 0, |
| 78 | */ | 95 | ds_pebs |
| 79 | struct access_desc { | ||
| 80 | unsigned char offset; | ||
| 81 | unsigned char size; | ||
| 82 | }; | 96 | }; |
| 83 | 97 | ||
| 98 | static inline unsigned long ds_get(const unsigned char *base, | ||
| 99 | enum ds_qualifier qual, enum ds_field field) | ||
| 100 | { | ||
| 101 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
| 102 | return *(unsigned long *)base; | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | ||
| 106 | enum ds_field field, unsigned long value) | ||
| 107 | { | ||
| 108 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
| 109 | (*(unsigned long *)base) = value; | ||
| 110 | } | ||
| 111 | |||
| 112 | |||
| 84 | /* | 113 | /* |
| 85 | * The configuration for a particular DS/BTS hardware implementation. | 114 | * Locking is done only for allocating BTS or PEBS resources and for |
| 115 | * guarding context and buffer memory allocation. | ||
| 116 | * | ||
| 117 | * Most functions require the current task to own the ds context part | ||
| 118 | * they are going to access. All the locking is done when validating | ||
| 119 | * access to the context. | ||
| 86 | */ | 120 | */ |
| 87 | struct ds_configuration { | 121 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); |
| 88 | /* the DS configuration */ | ||
| 89 | unsigned char sizeof_ds; | ||
| 90 | struct access_desc bts_buffer_base; | ||
| 91 | struct access_desc bts_index; | ||
| 92 | struct access_desc bts_absolute_maximum; | ||
| 93 | struct access_desc bts_interrupt_threshold; | ||
| 94 | /* the BTS configuration */ | ||
| 95 | unsigned char sizeof_bts; | ||
| 96 | struct access_desc from_ip; | ||
| 97 | struct access_desc to_ip; | ||
| 98 | /* BTS variants used to store additional information like | ||
| 99 | timestamps */ | ||
| 100 | struct access_desc info_type; | ||
| 101 | struct access_desc info_data; | ||
| 102 | unsigned long debugctl_mask; | ||
| 103 | }; | ||
| 104 | 122 | ||
| 105 | /* | 123 | /* |
| 106 | * The global configuration used by the below accessor functions | 124 | * Validate that the current task is allowed to access the BTS/PEBS |
| 125 | * buffer of the parameter task. | ||
| 126 | * | ||
| 127 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
| 107 | */ | 128 | */ |
| 108 | static struct ds_configuration ds_cfg; | 129 | static inline int ds_validate_access(struct ds_context *context, |
| 130 | enum ds_qualifier qual) | ||
| 131 | { | ||
| 132 | if (!context) | ||
| 133 | return -EPERM; | ||
| 134 | |||
| 135 | if (context->owner[qual] == current) | ||
| 136 | return 0; | ||
| 137 | |||
| 138 | return -EPERM; | ||
| 139 | } | ||
| 140 | |||
| 109 | 141 | ||
| 110 | /* | 142 | /* |
| 111 | * Accessor functions for some DS and BTS fields using the above | 143 | * We either support (system-wide) per-cpu or per-thread allocation. |
| 112 | * global ptrace_bts_cfg. | 144 | * We distinguish the two based on the task_struct pointer, where a |
| 145 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
| 146 | * | ||
| 147 | * Allocations are use-counted. As soon as resources are allocated, | ||
| 148 | * further allocations must be of the same type (per-cpu or | ||
| 149 | * per-thread). We model this by counting allocations (i.e. the number | ||
| 150 | * of tracers of a certain type) for one type negatively: | ||
| 151 | * =0 no tracers | ||
| 152 | * >0 number of per-thread tracers | ||
| 153 | * <0 number of per-cpu tracers | ||
| 154 | * | ||
| 155 | * The below functions to get and put tracers and to check the | ||
| 156 | * allocation type require the ds_lock to be held by the caller. | ||
| 157 | * | ||
| 158 | * Tracers essentially gives the number of ds contexts for a certain | ||
| 159 | * type of allocation. | ||
| 113 | */ | 160 | */ |
| 114 | static inline unsigned long get_bts_buffer_base(char *base) | 161 | static long tracers; |
| 162 | |||
| 163 | static inline void get_tracer(struct task_struct *task) | ||
| 115 | { | 164 | { |
| 116 | return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); | 165 | tracers += (task ? 1 : -1); |
| 117 | } | 166 | } |
| 118 | static inline void set_bts_buffer_base(char *base, unsigned long value) | 167 | |
| 168 | static inline void put_tracer(struct task_struct *task) | ||
| 119 | { | 169 | { |
| 120 | (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; | 170 | tracers -= (task ? 1 : -1); |
| 121 | } | 171 | } |
| 122 | static inline unsigned long get_bts_index(char *base) | 172 | |
| 173 | static inline int check_tracer(struct task_struct *task) | ||
| 123 | { | 174 | { |
| 124 | return *(unsigned long *)(base + ds_cfg.bts_index.offset); | 175 | return (task ? (tracers >= 0) : (tracers <= 0)); |
| 125 | } | 176 | } |
| 126 | static inline void set_bts_index(char *base, unsigned long value) | 177 | |
| 178 | |||
| 179 | /* | ||
| 180 | * The DS context is either attached to a thread or to a cpu: | ||
| 181 | * - in the former case, the thread_struct contains a pointer to the | ||
| 182 | * attached context. | ||
| 183 | * - in the latter case, we use a static array of per-cpu context | ||
| 184 | * pointers. | ||
| 185 | * | ||
| 186 | * Contexts are use-counted. They are allocated on first access and | ||
| 187 | * deallocated when the last user puts the context. | ||
| 188 | * | ||
| 189 | * We distinguish between an allocating and a non-allocating get of a | ||
| 190 | * context: | ||
| 191 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
| 192 | * requires the caller to hold the global ds_lock. | ||
| 193 | * - the non-allocating get is used for all other cases. A | ||
| 194 | * non-existing context indicates an error. It acquires and releases | ||
| 195 | * the ds_lock itself for obtaining the context. | ||
| 196 | * | ||
| 197 | * A context and its DS configuration are allocated and deallocated | ||
| 198 | * together. A context always has a DS configuration of the | ||
| 199 | * appropriate size. | ||
| 200 | */ | ||
| 201 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
| 202 | |||
| 203 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Returns the pointer to the parameter task's context or to the | ||
| 207 | * system-wide context, if task is NULL. | ||
| 208 | * | ||
| 209 | * Increases the use count of the returned context, if not NULL. | ||
| 210 | */ | ||
| 211 | static inline struct ds_context *ds_get_context(struct task_struct *task) | ||
| 127 | { | 212 | { |
| 128 | (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; | 213 | struct ds_context *context; |
| 214 | |||
| 215 | spin_lock(&ds_lock); | ||
| 216 | |||
| 217 | context = (task ? task->thread.ds_ctx : this_system_context); | ||
| 218 | if (context) | ||
| 219 | context->count++; | ||
| 220 | |||
| 221 | spin_unlock(&ds_lock); | ||
| 222 | |||
| 223 | return context; | ||
| 129 | } | 224 | } |
| 130 | static inline unsigned long get_bts_absolute_maximum(char *base) | 225 | |
| 226 | /* | ||
| 227 | * Same as ds_get_context, but allocates the context and it's DS | ||
| 228 | * structure, if necessary; returns NULL; if out of memory. | ||
| 229 | * | ||
| 230 | * pre: requires ds_lock to be held | ||
| 231 | */ | ||
| 232 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
| 131 | { | 233 | { |
| 132 | return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); | 234 | struct ds_context **p_context = |
| 235 | (task ? &task->thread.ds_ctx : &this_system_context); | ||
| 236 | struct ds_context *context = *p_context; | ||
| 237 | |||
| 238 | if (!context) { | ||
| 239 | context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
| 240 | |||
| 241 | if (!context) | ||
| 242 | return NULL; | ||
| 243 | |||
| 244 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
| 245 | if (!context->ds) { | ||
| 246 | kfree(context); | ||
| 247 | return NULL; | ||
| 248 | } | ||
| 249 | |||
| 250 | *p_context = context; | ||
| 251 | |||
| 252 | context->this = p_context; | ||
| 253 | context->task = task; | ||
| 254 | |||
| 255 | if (task) | ||
| 256 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
| 257 | |||
| 258 | if (!task || (task == current)) | ||
| 259 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | ||
| 260 | |||
| 261 | get_tracer(task); | ||
| 262 | } | ||
| 263 | |||
| 264 | context->count++; | ||
| 265 | |||
| 266 | return context; | ||
| 133 | } | 267 | } |
| 134 | static inline void set_bts_absolute_maximum(char *base, unsigned long value) | 268 | |
| 269 | /* | ||
| 270 | * Decreases the use count of the parameter context, if not NULL. | ||
| 271 | * Deallocates the context, if the use count reaches zero. | ||
| 272 | */ | ||
| 273 | static inline void ds_put_context(struct ds_context *context) | ||
| 135 | { | 274 | { |
| 136 | (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | 275 | if (!context) |
| 276 | return; | ||
| 277 | |||
| 278 | spin_lock(&ds_lock); | ||
| 279 | |||
| 280 | if (--context->count) | ||
| 281 | goto out; | ||
| 282 | |||
| 283 | *(context->this) = NULL; | ||
| 284 | |||
| 285 | if (context->task) | ||
| 286 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
| 287 | |||
| 288 | if (!context->task || (context->task == current)) | ||
| 289 | wrmsrl(MSR_IA32_DS_AREA, 0); | ||
| 290 | |||
| 291 | put_tracer(context->task); | ||
| 292 | |||
| 293 | /* free any leftover buffers from tracers that did not | ||
| 294 | * deallocate them properly. */ | ||
| 295 | kfree(context->buffer[ds_bts]); | ||
| 296 | kfree(context->buffer[ds_pebs]); | ||
| 297 | kfree(context->ds); | ||
| 298 | kfree(context); | ||
| 299 | out: | ||
| 300 | spin_unlock(&ds_lock); | ||
| 137 | } | 301 | } |
| 138 | static inline unsigned long get_bts_interrupt_threshold(char *base) | 302 | |
| 303 | |||
| 304 | /* | ||
| 305 | * Handle a buffer overflow | ||
| 306 | * | ||
| 307 | * task: the task whose buffers are overflowing; | ||
| 308 | * NULL for a buffer overflow on the current cpu | ||
| 309 | * context: the ds context | ||
| 310 | * qual: the buffer type | ||
| 311 | */ | ||
| 312 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | ||
| 313 | enum ds_qualifier qual) | ||
| 139 | { | 314 | { |
| 140 | return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); | 315 | if (!context) |
| 316 | return; | ||
| 317 | |||
| 318 | if (context->callback[qual]) | ||
| 319 | (*context->callback[qual])(task); | ||
| 320 | |||
| 321 | /* todo: do some more overflow handling */ | ||
| 141 | } | 322 | } |
| 142 | static inline void set_bts_interrupt_threshold(char *base, unsigned long value) | 323 | |
| 324 | |||
| 325 | /* | ||
| 326 | * Allocate a non-pageable buffer of the parameter size. | ||
| 327 | * Checks the memory and the locked memory rlimit. | ||
| 328 | * | ||
| 329 | * Returns the buffer, if successful; | ||
| 330 | * NULL, if out of memory or rlimit exceeded. | ||
| 331 | * | ||
| 332 | * size: the requested buffer size in bytes | ||
| 333 | * pages (out): if not NULL, contains the number of pages reserved | ||
| 334 | */ | ||
| 335 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | ||
| 143 | { | 336 | { |
| 144 | (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | 337 | unsigned long rlim, vm, pgsz; |
| 338 | void *buffer; | ||
| 339 | |||
| 340 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
| 341 | |||
| 342 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
| 343 | vm = current->mm->total_vm + pgsz; | ||
| 344 | if (rlim < vm) | ||
| 345 | return NULL; | ||
| 346 | |||
| 347 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
| 348 | vm = current->mm->locked_vm + pgsz; | ||
| 349 | if (rlim < vm) | ||
| 350 | return NULL; | ||
| 351 | |||
| 352 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 353 | if (!buffer) | ||
| 354 | return NULL; | ||
| 355 | |||
| 356 | current->mm->total_vm += pgsz; | ||
| 357 | current->mm->locked_vm += pgsz; | ||
| 358 | |||
| 359 | if (pages) | ||
| 360 | *pages = pgsz; | ||
| 361 | |||
| 362 | return buffer; | ||
| 145 | } | 363 | } |
| 146 | static inline unsigned long get_from_ip(char *base) | 364 | |
| 365 | static int ds_request(struct task_struct *task, void *base, size_t size, | ||
| 366 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | ||
| 147 | { | 367 | { |
| 148 | return *(unsigned long *)(base + ds_cfg.from_ip.offset); | 368 | struct ds_context *context; |
| 369 | unsigned long buffer, adj; | ||
| 370 | const unsigned long alignment = (1 << 3); | ||
| 371 | int error = 0; | ||
| 372 | |||
| 373 | if (!ds_cfg.sizeof_ds) | ||
| 374 | return -EOPNOTSUPP; | ||
| 375 | |||
| 376 | /* we require some space to do alignment adjustments below */ | ||
| 377 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | ||
| 378 | return -EINVAL; | ||
| 379 | |||
| 380 | /* buffer overflow notification is not yet implemented */ | ||
| 381 | if (ovfl) | ||
| 382 | return -EOPNOTSUPP; | ||
| 383 | |||
| 384 | |||
| 385 | spin_lock(&ds_lock); | ||
| 386 | |||
| 387 | if (!check_tracer(task)) | ||
| 388 | return -EPERM; | ||
| 389 | |||
| 390 | error = -ENOMEM; | ||
| 391 | context = ds_alloc_context(task); | ||
| 392 | if (!context) | ||
| 393 | goto out_unlock; | ||
| 394 | |||
| 395 | error = -EALREADY; | ||
| 396 | if (context->owner[qual] == current) | ||
| 397 | goto out_unlock; | ||
| 398 | error = -EPERM; | ||
| 399 | if (context->owner[qual] != NULL) | ||
| 400 | goto out_unlock; | ||
| 401 | context->owner[qual] = current; | ||
| 402 | |||
| 403 | spin_unlock(&ds_lock); | ||
| 404 | |||
| 405 | |||
| 406 | error = -ENOMEM; | ||
| 407 | if (!base) { | ||
| 408 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
| 409 | if (!base) | ||
| 410 | goto out_release; | ||
| 411 | |||
| 412 | context->buffer[qual] = base; | ||
| 413 | } | ||
| 414 | error = 0; | ||
| 415 | |||
| 416 | context->callback[qual] = ovfl; | ||
| 417 | |||
| 418 | /* adjust the buffer address and size to meet alignment | ||
| 419 | * constraints: | ||
| 420 | * - buffer is double-word aligned | ||
| 421 | * - size is multiple of record size | ||
| 422 | * | ||
| 423 | * We checked the size at the very beginning; we have enough | ||
| 424 | * space to do the adjustment. | ||
| 425 | */ | ||
| 426 | buffer = (unsigned long)base; | ||
| 427 | |||
| 428 | adj = ALIGN(buffer, alignment) - buffer; | ||
| 429 | buffer += adj; | ||
| 430 | size -= adj; | ||
| 431 | |||
| 432 | size /= ds_cfg.sizeof_rec[qual]; | ||
| 433 | size *= ds_cfg.sizeof_rec[qual]; | ||
| 434 | |||
| 435 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
| 436 | ds_set(context->ds, qual, ds_index, buffer); | ||
| 437 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
| 438 | |||
| 439 | if (ovfl) { | ||
| 440 | /* todo: select a suitable interrupt threshold */ | ||
| 441 | } else | ||
| 442 | ds_set(context->ds, qual, | ||
| 443 | ds_interrupt_threshold, buffer + size + 1); | ||
| 444 | |||
| 445 | /* we keep the context until ds_release */ | ||
| 446 | return error; | ||
| 447 | |||
| 448 | out_release: | ||
| 449 | context->owner[qual] = NULL; | ||
| 450 | ds_put_context(context); | ||
| 451 | return error; | ||
| 452 | |||
| 453 | out_unlock: | ||
| 454 | spin_unlock(&ds_lock); | ||
| 455 | ds_put_context(context); | ||
| 456 | return error; | ||
| 149 | } | 457 | } |
| 150 | static inline void set_from_ip(char *base, unsigned long value) | 458 | |
| 459 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
| 460 | ds_ovfl_callback_t ovfl) | ||
| 151 | { | 461 | { |
| 152 | (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; | 462 | return ds_request(task, base, size, ovfl, ds_bts); |
| 153 | } | 463 | } |
| 154 | static inline unsigned long get_to_ip(char *base) | 464 | |
| 465 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
| 466 | ds_ovfl_callback_t ovfl) | ||
| 155 | { | 467 | { |
| 156 | return *(unsigned long *)(base + ds_cfg.to_ip.offset); | 468 | return ds_request(task, base, size, ovfl, ds_pebs); |
| 157 | } | 469 | } |
| 158 | static inline void set_to_ip(char *base, unsigned long value) | 470 | |
| 471 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
| 159 | { | 472 | { |
| 160 | (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; | 473 | struct ds_context *context; |
| 474 | int error; | ||
| 475 | |||
| 476 | context = ds_get_context(task); | ||
| 477 | error = ds_validate_access(context, qual); | ||
| 478 | if (error < 0) | ||
| 479 | goto out; | ||
| 480 | |||
| 481 | kfree(context->buffer[qual]); | ||
| 482 | context->buffer[qual] = NULL; | ||
| 483 | |||
| 484 | current->mm->total_vm -= context->pages[qual]; | ||
| 485 | current->mm->locked_vm -= context->pages[qual]; | ||
| 486 | context->pages[qual] = 0; | ||
| 487 | context->owner[qual] = NULL; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * we put the context twice: | ||
| 491 | * once for the ds_get_context | ||
| 492 | * once for the corresponding ds_request | ||
| 493 | */ | ||
| 494 | ds_put_context(context); | ||
| 495 | out: | ||
| 496 | ds_put_context(context); | ||
| 497 | return error; | ||
| 161 | } | 498 | } |
| 162 | static inline unsigned char get_info_type(char *base) | 499 | |
| 500 | int ds_release_bts(struct task_struct *task) | ||
| 163 | { | 501 | { |
| 164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | 502 | return ds_release(task, ds_bts); |
| 165 | } | 503 | } |
| 166 | static inline void set_info_type(char *base, unsigned char value) | 504 | |
| 505 | int ds_release_pebs(struct task_struct *task) | ||
| 167 | { | 506 | { |
| 168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | 507 | return ds_release(task, ds_pebs); |
| 169 | } | 508 | } |
| 170 | static inline unsigned long get_info_data(char *base) | 509 | |
| 510 | static int ds_get_index(struct task_struct *task, size_t *pos, | ||
| 511 | enum ds_qualifier qual) | ||
| 171 | { | 512 | { |
| 172 | return *(unsigned long *)(base + ds_cfg.info_data.offset); | 513 | struct ds_context *context; |
| 514 | unsigned long base, index; | ||
| 515 | int error; | ||
| 516 | |||
| 517 | context = ds_get_context(task); | ||
| 518 | error = ds_validate_access(context, qual); | ||
| 519 | if (error < 0) | ||
| 520 | goto out; | ||
| 521 | |||
| 522 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 523 | index = ds_get(context->ds, qual, ds_index); | ||
| 524 | |||
| 525 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | ||
| 526 | if (pos) | ||
| 527 | *pos = error; | ||
| 528 | out: | ||
| 529 | ds_put_context(context); | ||
| 530 | return error; | ||
| 173 | } | 531 | } |
| 174 | static inline void set_info_data(char *base, unsigned long value) | 532 | |
| 533 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | ||
| 175 | { | 534 | { |
| 176 | (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; | 535 | return ds_get_index(task, pos, ds_bts); |
| 177 | } | 536 | } |
| 178 | 537 | ||
| 538 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
| 539 | { | ||
| 540 | return ds_get_index(task, pos, ds_pebs); | ||
| 541 | } | ||
| 179 | 542 | ||
| 180 | int ds_allocate(void **dsp, size_t bts_size_in_bytes) | 543 | static int ds_get_end(struct task_struct *task, size_t *pos, |
| 544 | enum ds_qualifier qual) | ||
| 181 | { | 545 | { |
| 182 | size_t bts_size_in_records; | 546 | struct ds_context *context; |
| 183 | unsigned long bts; | 547 | unsigned long base, end; |
| 184 | void *ds; | 548 | int error; |
| 549 | |||
| 550 | context = ds_get_context(task); | ||
| 551 | error = ds_validate_access(context, qual); | ||
| 552 | if (error < 0) | ||
| 553 | goto out; | ||
| 554 | |||
| 555 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 556 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 557 | |||
| 558 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | ||
| 559 | if (pos) | ||
| 560 | *pos = error; | ||
| 561 | out: | ||
| 562 | ds_put_context(context); | ||
| 563 | return error; | ||
| 564 | } | ||
| 185 | 565 | ||
| 186 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 566 | int ds_get_bts_end(struct task_struct *task, size_t *pos) |
| 187 | return -EOPNOTSUPP; | 567 | { |
| 568 | return ds_get_end(task, pos, ds_bts); | ||
| 569 | } | ||
| 188 | 570 | ||
| 189 | if (bts_size_in_bytes < 0) | 571 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) |
| 190 | return -EINVAL; | 572 | { |
| 573 | return ds_get_end(task, pos, ds_pebs); | ||
| 574 | } | ||
| 191 | 575 | ||
| 192 | bts_size_in_records = | 576 | static int ds_access(struct task_struct *task, size_t index, |
| 193 | bts_size_in_bytes / ds_cfg.sizeof_bts; | 577 | const void **record, enum ds_qualifier qual) |
| 194 | bts_size_in_bytes = | 578 | { |
| 195 | bts_size_in_records * ds_cfg.sizeof_bts; | 579 | struct ds_context *context; |
| 580 | unsigned long base, idx; | ||
| 581 | int error; | ||
| 196 | 582 | ||
| 197 | if (bts_size_in_bytes <= 0) | 583 | if (!record) |
| 198 | return -EINVAL; | 584 | return -EINVAL; |
| 199 | 585 | ||
| 200 | bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); | 586 | context = ds_get_context(task); |
| 201 | 587 | error = ds_validate_access(context, qual); | |
| 202 | if (!bts) | 588 | if (error < 0) |
| 203 | return -ENOMEM; | 589 | goto out; |
| 204 | 590 | ||
| 205 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 591 | base = ds_get(context->ds, qual, ds_buffer_base); |
| 592 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
| 206 | 593 | ||
| 207 | if (!ds) { | 594 | error = -EINVAL; |
| 208 | kfree((void *)bts); | 595 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) |
| 209 | return -ENOMEM; | 596 | goto out; |
| 210 | } | ||
| 211 | |||
| 212 | set_bts_buffer_base(ds, bts); | ||
| 213 | set_bts_index(ds, bts); | ||
| 214 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
| 215 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
| 216 | 597 | ||
| 217 | *dsp = ds; | 598 | *record = (const void *)idx; |
| 218 | return 0; | 599 | error = ds_cfg.sizeof_rec[qual]; |
| 600 | out: | ||
| 601 | ds_put_context(context); | ||
| 602 | return error; | ||
| 219 | } | 603 | } |
| 220 | 604 | ||
| 221 | int ds_free(void **dsp) | 605 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) |
| 222 | { | 606 | { |
| 223 | if (*dsp) { | 607 | return ds_access(task, index, record, ds_bts); |
| 224 | kfree((void *)get_bts_buffer_base(*dsp)); | ||
| 225 | kfree(*dsp); | ||
| 226 | *dsp = NULL; | ||
| 227 | } | ||
| 228 | return 0; | ||
| 229 | } | 608 | } |
| 230 | 609 | ||
| 231 | int ds_get_bts_size(void *ds) | 610 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) |
| 232 | { | 611 | { |
| 233 | int size_in_bytes; | 612 | return ds_access(task, index, record, ds_pebs); |
| 234 | |||
| 235 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
| 236 | return -EOPNOTSUPP; | ||
| 237 | |||
| 238 | if (!ds) | ||
| 239 | return 0; | ||
| 240 | |||
| 241 | size_in_bytes = | ||
| 242 | get_bts_absolute_maximum(ds) - | ||
| 243 | get_bts_buffer_base(ds); | ||
| 244 | return size_in_bytes; | ||
| 245 | } | 613 | } |
| 246 | 614 | ||
| 247 | int ds_get_bts_end(void *ds) | 615 | static int ds_write(struct task_struct *task, const void *record, size_t size, |
| 616 | enum ds_qualifier qual, int force) | ||
| 248 | { | 617 | { |
| 249 | int size_in_bytes = ds_get_bts_size(ds); | 618 | struct ds_context *context; |
| 250 | 619 | int error; | |
| 251 | if (size_in_bytes <= 0) | ||
| 252 | return size_in_bytes; | ||
| 253 | 620 | ||
| 254 | return size_in_bytes / ds_cfg.sizeof_bts; | 621 | if (!record) |
| 255 | } | 622 | return -EINVAL; |
| 256 | 623 | ||
| 257 | int ds_get_bts_index(void *ds) | 624 | error = -EPERM; |
| 258 | { | 625 | context = ds_get_context(task); |
| 259 | int index_offset_in_bytes; | 626 | if (!context) |
| 627 | goto out; | ||
| 260 | 628 | ||
| 261 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 629 | if (!force) { |
| 262 | return -EOPNOTSUPP; | 630 | error = ds_validate_access(context, qual); |
| 631 | if (error < 0) | ||
| 632 | goto out; | ||
| 633 | } | ||
| 263 | 634 | ||
| 264 | index_offset_in_bytes = | 635 | error = 0; |
| 265 | get_bts_index(ds) - | 636 | while (size) { |
| 266 | get_bts_buffer_base(ds); | 637 | unsigned long base, index, end, write_end, int_th; |
| 638 | unsigned long write_size, adj_write_size; | ||
| 639 | |||
| 640 | /* | ||
| 641 | * write as much as possible without producing an | ||
| 642 | * overflow interrupt. | ||
| 643 | * | ||
| 644 | * interrupt_threshold must either be | ||
| 645 | * - bigger than absolute_maximum or | ||
| 646 | * - point to a record between buffer_base and absolute_maximum | ||
| 647 | * | ||
| 648 | * index points to a valid record. | ||
| 649 | */ | ||
| 650 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 651 | index = ds_get(context->ds, qual, ds_index); | ||
| 652 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 653 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 654 | |||
| 655 | write_end = min(end, int_th); | ||
| 656 | |||
| 657 | /* if we are already beyond the interrupt threshold, | ||
| 658 | * we fill the entire buffer */ | ||
| 659 | if (write_end <= index) | ||
| 660 | write_end = end; | ||
| 661 | |||
| 662 | if (write_end <= index) | ||
| 663 | goto out; | ||
| 664 | |||
| 665 | write_size = min((unsigned long) size, write_end - index); | ||
| 666 | memcpy((void *)index, record, write_size); | ||
| 667 | |||
| 668 | record = (const char *)record + write_size; | ||
| 669 | size -= write_size; | ||
| 670 | error += write_size; | ||
| 671 | |||
| 672 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 673 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 674 | |||
| 675 | /* zero out trailing bytes */ | ||
| 676 | memset((char *)index + write_size, 0, | ||
| 677 | adj_write_size - write_size); | ||
| 678 | index += adj_write_size; | ||
| 679 | |||
| 680 | if (index >= end) | ||
| 681 | index = base; | ||
| 682 | ds_set(context->ds, qual, ds_index, index); | ||
| 683 | |||
| 684 | if (index >= int_th) | ||
| 685 | ds_overflow(task, context, qual); | ||
| 686 | } | ||
| 267 | 687 | ||
| 268 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | 688 | out: |
| 689 | ds_put_context(context); | ||
| 690 | return error; | ||
| 269 | } | 691 | } |
| 270 | 692 | ||
| 271 | int ds_set_overflow(void *ds, int method) | 693 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) |
| 272 | { | 694 | { |
| 273 | switch (method) { | 695 | return ds_write(task, record, size, ds_bts, /* force = */ 0); |
| 274 | case DS_O_SIGNAL: | ||
| 275 | return -EOPNOTSUPP; | ||
| 276 | case DS_O_WRAP: | ||
| 277 | return 0; | ||
| 278 | default: | ||
| 279 | return -EINVAL; | ||
| 280 | } | ||
| 281 | } | 696 | } |
| 282 | 697 | ||
| 283 | int ds_get_overflow(void *ds) | 698 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) |
| 284 | { | 699 | { |
| 285 | return DS_O_WRAP; | 700 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); |
| 286 | } | 701 | } |
| 287 | 702 | ||
| 288 | int ds_clear(void *ds) | 703 | int ds_unchecked_write_bts(struct task_struct *task, |
| 704 | const void *record, size_t size) | ||
| 289 | { | 705 | { |
| 290 | int bts_size = ds_get_bts_size(ds); | 706 | return ds_write(task, record, size, ds_bts, /* force = */ 1); |
| 291 | unsigned long bts_base; | ||
| 292 | |||
| 293 | if (bts_size <= 0) | ||
| 294 | return bts_size; | ||
| 295 | |||
| 296 | bts_base = get_bts_buffer_base(ds); | ||
| 297 | memset((void *)bts_base, 0, bts_size); | ||
| 298 | |||
| 299 | set_bts_index(ds, bts_base); | ||
| 300 | return 0; | ||
| 301 | } | 707 | } |
| 302 | 708 | ||
| 303 | int ds_read_bts(void *ds, int index, struct bts_struct *out) | 709 | int ds_unchecked_write_pebs(struct task_struct *task, |
| 710 | const void *record, size_t size) | ||
| 304 | { | 711 | { |
| 305 | void *bts; | 712 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); |
| 713 | } | ||
| 306 | 714 | ||
| 307 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 715 | static int ds_reset_or_clear(struct task_struct *task, |
| 308 | return -EOPNOTSUPP; | 716 | enum ds_qualifier qual, int clear) |
| 717 | { | ||
| 718 | struct ds_context *context; | ||
| 719 | unsigned long base, end; | ||
| 720 | int error; | ||
| 309 | 721 | ||
| 310 | if (index < 0) | 722 | context = ds_get_context(task); |
| 311 | return -EINVAL; | 723 | error = ds_validate_access(context, qual); |
| 724 | if (error < 0) | ||
| 725 | goto out; | ||
| 312 | 726 | ||
| 313 | if (index >= ds_get_bts_size(ds)) | 727 | base = ds_get(context->ds, qual, ds_buffer_base); |
| 314 | return -EINVAL; | 728 | end = ds_get(context->ds, qual, ds_absolute_maximum); |
| 315 | 729 | ||
| 316 | bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); | 730 | if (clear) |
| 731 | memset((void *)base, 0, end - base); | ||
| 317 | 732 | ||
| 318 | memset(out, 0, sizeof(*out)); | 733 | ds_set(context->ds, qual, ds_index, base); |
| 319 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
| 320 | out->qualifier = get_info_type(bts); | ||
| 321 | out->variant.jiffies = get_info_data(bts); | ||
| 322 | } else { | ||
| 323 | out->qualifier = BTS_BRANCH; | ||
| 324 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
| 325 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
| 326 | } | ||
| 327 | 734 | ||
| 328 | return sizeof(*out);; | 735 | error = 0; |
| 736 | out: | ||
| 737 | ds_put_context(context); | ||
| 738 | return error; | ||
| 329 | } | 739 | } |
| 330 | 740 | ||
| 331 | int ds_write_bts(void *ds, const struct bts_struct *in) | 741 | int ds_reset_bts(struct task_struct *task) |
| 332 | { | 742 | { |
| 333 | unsigned long bts; | 743 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); |
| 334 | 744 | } | |
| 335 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
| 336 | return -EOPNOTSUPP; | ||
| 337 | |||
| 338 | if (ds_get_bts_size(ds) <= 0) | ||
| 339 | return -ENXIO; | ||
| 340 | 745 | ||
| 341 | bts = get_bts_index(ds); | 746 | int ds_reset_pebs(struct task_struct *task) |
| 747 | { | ||
| 748 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | ||
| 749 | } | ||
| 342 | 750 | ||
| 343 | memset((void *)bts, 0, ds_cfg.sizeof_bts); | 751 | int ds_clear_bts(struct task_struct *task) |
| 344 | switch (in->qualifier) { | 752 | { |
| 345 | case BTS_INVALID: | 753 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); |
| 346 | break; | 754 | } |
| 347 | 755 | ||
| 348 | case BTS_BRANCH: | 756 | int ds_clear_pebs(struct task_struct *task) |
| 349 | set_from_ip((void *)bts, in->variant.lbr.from_ip); | 757 | { |
| 350 | set_to_ip((void *)bts, in->variant.lbr.to_ip); | 758 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); |
| 351 | break; | 759 | } |
| 352 | 760 | ||
| 353 | case BTS_TASK_ARRIVES: | 761 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) |
| 354 | case BTS_TASK_DEPARTS: | 762 | { |
| 355 | set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); | 763 | struct ds_context *context; |
| 356 | set_info_type((void *)bts, in->qualifier); | 764 | int error; |
| 357 | set_info_data((void *)bts, in->variant.jiffies); | ||
| 358 | break; | ||
| 359 | 765 | ||
| 360 | default: | 766 | if (!value) |
| 361 | return -EINVAL; | 767 | return -EINVAL; |
| 362 | } | ||
| 363 | 768 | ||
| 364 | bts = bts + ds_cfg.sizeof_bts; | 769 | context = ds_get_context(task); |
| 365 | if (bts >= get_bts_absolute_maximum(ds)) | 770 | error = ds_validate_access(context, ds_pebs); |
| 366 | bts = get_bts_buffer_base(ds); | 771 | if (error < 0) |
| 367 | set_bts_index(ds, bts); | 772 | goto out; |
| 368 | 773 | ||
| 369 | return ds_cfg.sizeof_bts; | 774 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); |
| 775 | |||
| 776 | error = 0; | ||
| 777 | out: | ||
| 778 | ds_put_context(context); | ||
| 779 | return error; | ||
| 370 | } | 780 | } |
| 371 | 781 | ||
| 372 | unsigned long ds_debugctl_mask(void) | 782 | int ds_set_pebs_reset(struct task_struct *task, u64 value) |
| 373 | { | 783 | { |
| 374 | return ds_cfg.debugctl_mask; | 784 | struct ds_context *context; |
| 375 | } | 785 | int error; |
| 376 | 786 | ||
| 377 | #ifdef __i386__ | 787 | context = ds_get_context(task); |
| 378 | static const struct ds_configuration ds_cfg_netburst = { | 788 | error = ds_validate_access(context, ds_pebs); |
| 379 | .sizeof_ds = 9 * 4, | 789 | if (error < 0) |
| 380 | .bts_buffer_base = { 0, 4 }, | 790 | goto out; |
| 381 | .bts_index = { 4, 4 }, | ||
| 382 | .bts_absolute_maximum = { 8, 4 }, | ||
| 383 | .bts_interrupt_threshold = { 12, 4 }, | ||
| 384 | .sizeof_bts = 3 * 4, | ||
| 385 | .from_ip = { 0, 4 }, | ||
| 386 | .to_ip = { 4, 4 }, | ||
| 387 | .info_type = { 4, 1 }, | ||
| 388 | .info_data = { 8, 4 }, | ||
| 389 | .debugctl_mask = (1<<2)|(1<<3) | ||
| 390 | }; | ||
| 391 | 791 | ||
| 392 | static const struct ds_configuration ds_cfg_pentium_m = { | 792 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; |
| 393 | .sizeof_ds = 9 * 4, | 793 | |
| 394 | .bts_buffer_base = { 0, 4 }, | 794 | error = 0; |
| 395 | .bts_index = { 4, 4 }, | 795 | out: |
| 396 | .bts_absolute_maximum = { 8, 4 }, | 796 | ds_put_context(context); |
| 397 | .bts_interrupt_threshold = { 12, 4 }, | 797 | return error; |
| 398 | .sizeof_bts = 3 * 4, | 798 | } |
| 399 | .from_ip = { 0, 4 }, | 799 | |
| 400 | .to_ip = { 4, 4 }, | 800 | static const struct ds_configuration ds_cfg_var = { |
| 401 | .info_type = { 4, 1 }, | 801 | .sizeof_ds = sizeof(long) * 12, |
| 402 | .info_data = { 8, 4 }, | 802 | .sizeof_field = sizeof(long), |
| 403 | .debugctl_mask = (1<<6)|(1<<7) | 803 | .sizeof_rec[ds_bts] = sizeof(long) * 3, |
| 804 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | ||
| 404 | }; | 805 | }; |
| 405 | #endif /* _i386_ */ | 806 | static const struct ds_configuration ds_cfg_64 = { |
| 406 | 807 | .sizeof_ds = 8 * 12, | |
| 407 | static const struct ds_configuration ds_cfg_core2 = { | 808 | .sizeof_field = 8, |
| 408 | .sizeof_ds = 9 * 8, | 809 | .sizeof_rec[ds_bts] = 8 * 3, |
| 409 | .bts_buffer_base = { 0, 8 }, | 810 | .sizeof_rec[ds_pebs] = 8 * 10 |
| 410 | .bts_index = { 8, 8 }, | ||
| 411 | .bts_absolute_maximum = { 16, 8 }, | ||
| 412 | .bts_interrupt_threshold = { 24, 8 }, | ||
| 413 | .sizeof_bts = 3 * 8, | ||
| 414 | .from_ip = { 0, 8 }, | ||
| 415 | .to_ip = { 8, 8 }, | ||
| 416 | .info_type = { 8, 1 }, | ||
| 417 | .info_data = { 16, 8 }, | ||
| 418 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
| 419 | }; | 811 | }; |
| 420 | 812 | ||
| 421 | static inline void | 813 | static inline void |
| @@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 429 | switch (c->x86) { | 821 | switch (c->x86) { |
| 430 | case 0x6: | 822 | case 0x6: |
| 431 | switch (c->x86_model) { | 823 | switch (c->x86_model) { |
| 432 | #ifdef __i386__ | ||
| 433 | case 0xD: | 824 | case 0xD: |
| 434 | case 0xE: /* Pentium M */ | 825 | case 0xE: /* Pentium M */ |
| 435 | ds_configure(&ds_cfg_pentium_m); | 826 | ds_configure(&ds_cfg_var); |
| 436 | break; | 827 | break; |
| 437 | #endif /* _i386_ */ | ||
| 438 | case 0xF: /* Core2 */ | 828 | case 0xF: /* Core2 */ |
| 439 | ds_configure(&ds_cfg_core2); | 829 | case 0x1C: /* Atom */ |
| 830 | ds_configure(&ds_cfg_64); | ||
| 440 | break; | 831 | break; |
| 441 | default: | 832 | default: |
| 442 | /* sorry, don't know about them */ | 833 | /* sorry, don't know about them */ |
| @@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 445 | break; | 836 | break; |
| 446 | case 0xF: | 837 | case 0xF: |
| 447 | switch (c->x86_model) { | 838 | switch (c->x86_model) { |
| 448 | #ifdef __i386__ | ||
| 449 | case 0x0: | 839 | case 0x0: |
| 450 | case 0x1: | 840 | case 0x1: |
| 451 | case 0x2: /* Netburst */ | 841 | case 0x2: /* Netburst */ |
| 452 | ds_configure(&ds_cfg_netburst); | 842 | ds_configure(&ds_cfg_var); |
| 453 | break; | 843 | break; |
| 454 | #endif /* _i386_ */ | ||
| 455 | default: | 844 | default: |
| 456 | /* sorry, don't know about them */ | 845 | /* sorry, don't know about them */ |
| 457 | break; | 846 | break; |
| @@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 462 | break; | 851 | break; |
| 463 | } | 852 | } |
| 464 | } | 853 | } |
| 854 | |||
| 855 | void ds_free(struct ds_context *context) | ||
| 856 | { | ||
| 857 | /* This is called when the task owning the parameter context | ||
| 858 | * is dying. There should not be any user of that context left | ||
| 859 | * to disturb us, anymore. */ | ||
| 860 | unsigned long leftovers = context->count; | ||
| 861 | while (leftovers--) | ||
| 862 | ds_put_context(context); | ||
| 863 | } | ||
| 864 | #endif /* CONFIG_X86_DS */ | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e24d1bc47b46..78e642feac30 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -1206,7 +1206,7 @@ static int __init parse_memmap_opt(char *p) | |||
| 1206 | if (!p) | 1206 | if (!p) |
| 1207 | return -EINVAL; | 1207 | return -EINVAL; |
| 1208 | 1208 | ||
| 1209 | if (!strcmp(p, "exactmap")) { | 1209 | if (!strncmp(p, "exactmap", 8)) { |
| 1210 | #ifdef CONFIG_CRASH_DUMP | 1210 | #ifdef CONFIG_CRASH_DUMP |
| 1211 | /* | 1211 | /* |
| 1212 | * If we are doing a crash dump, we still need to know | 1212 | * If we are doing a crash dump, we still need to know |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fac..24bb5faf5efa 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
| 95 | 95 | ||
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | #ifdef CONFIG_DMAR | ||
| 99 | static void __init intel_g33_dmar(int num, int slot, int func) | ||
| 100 | { | ||
| 101 | struct acpi_table_header *dmar_tbl; | ||
| 102 | acpi_status status; | ||
| 103 | |||
| 104 | status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); | ||
| 105 | if (ACPI_SUCCESS(status)) { | ||
| 106 | printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); | ||
| 107 | dmar_disabled = 1; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | #endif | ||
| 111 | |||
| 98 | #define QFLAG_APPLY_ONCE 0x1 | 112 | #define QFLAG_APPLY_ONCE 0x1 |
| 99 | #define QFLAG_APPLIED 0x2 | 113 | #define QFLAG_APPLIED 0x2 |
| 100 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 114 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
| @@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { | |||
| 114 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, | 128 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, |
| 115 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 129 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
| 116 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, | 130 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, |
| 131 | #ifdef CONFIG_DMAR | ||
| 132 | { PCI_VENDOR_ID_INTEL, 0x29c0, | ||
| 133 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, | ||
| 134 | #endif | ||
| 117 | {} | 135 | {} |
| 118 | }; | 136 | }; |
| 119 | 137 | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4254b1..945a31cdd81f 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
| @@ -414,9 +414,11 @@ void __init efi_init(void) | |||
| 414 | if (memmap.map == NULL) | 414 | if (memmap.map == NULL) |
| 415 | printk(KERN_ERR "Could not map the EFI memory map!\n"); | 415 | printk(KERN_ERR "Could not map the EFI memory map!\n"); |
| 416 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); | 416 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); |
| 417 | |||
| 417 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) | 418 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) |
| 418 | printk(KERN_WARNING "Kernel-defined memdesc" | 419 | printk(KERN_WARNING |
| 419 | "doesn't match the one from EFI!\n"); | 420 | "Kernel-defined memdesc doesn't match the one from EFI!\n"); |
| 421 | |||
| 420 | if (add_efi_memmap) | 422 | if (add_efi_memmap) |
| 421 | do_add_efi_memmap(); | 423 | do_add_efi_memmap(); |
| 422 | 424 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9bfc4d72fb2e..d16084f90649 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -108,12 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 108 | } | 108 | } |
| 109 | load_idt((const struct desc_ptr *)&idt_descr); | 109 | load_idt((const struct desc_ptr *)&idt_descr); |
| 110 | 110 | ||
| 111 | early_printk("Kernel alive\n"); | 111 | if (console_loglevel == 10) |
| 112 | early_printk("Kernel alive\n"); | ||
| 112 | 113 | ||
| 113 | x86_64_init_pda(); | 114 | x86_64_init_pda(); |
| 114 | 115 | ||
| 115 | early_printk("Kernel really alive\n"); | ||
| 116 | |||
| 117 | x86_64_start_reservations(real_mode_data); | 116 | x86_64_start_reservations(real_mode_data); |
| 118 | } | 117 | } |
| 119 | 118 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a7010c3a377a..e835b4eea70b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
| 172 | * | 172 | * |
| 173 | * Note that the stack is not yet set up! | 173 | * Note that the stack is not yet set up! |
| 174 | */ | 174 | */ |
| 175 | #define PTE_ATTR 0x007 /* PRESENT+RW+USER */ | ||
| 176 | #define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ | ||
| 177 | #define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ | ||
| 178 | |||
| 179 | default_entry: | 175 | default_entry: |
| 180 | #ifdef CONFIG_X86_PAE | 176 | #ifdef CONFIG_X86_PAE |
| 181 | 177 | ||
| @@ -196,9 +192,9 @@ default_entry: | |||
| 196 | movl $pa(pg0), %edi | 192 | movl $pa(pg0), %edi |
| 197 | movl %edi, pa(init_pg_tables_start) | 193 | movl %edi, pa(init_pg_tables_start) |
| 198 | movl $pa(swapper_pg_pmd), %edx | 194 | movl $pa(swapper_pg_pmd), %edx |
| 199 | movl $PTE_ATTR, %eax | 195 | movl $PTE_IDENT_ATTR, %eax |
| 200 | 10: | 196 | 10: |
| 201 | leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ | 197 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ |
| 202 | movl %ecx,(%edx) /* Store PMD entry */ | 198 | movl %ecx,(%edx) /* Store PMD entry */ |
| 203 | /* Upper half already zero */ | 199 | /* Upper half already zero */ |
| 204 | addl $8,%edx | 200 | addl $8,%edx |
| @@ -215,7 +211,7 @@ default_entry: | |||
| 215 | * End condition: we must map up to and including INIT_MAP_BEYOND_END | 211 | * End condition: we must map up to and including INIT_MAP_BEYOND_END |
| 216 | * bytes beyond the end of our own page tables. | 212 | * bytes beyond the end of our own page tables. |
| 217 | */ | 213 | */ |
| 218 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 214 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
| 219 | cmpl %ebp,%eax | 215 | cmpl %ebp,%eax |
| 220 | jb 10b | 216 | jb 10b |
| 221 | 1: | 217 | 1: |
| @@ -224,7 +220,7 @@ default_entry: | |||
| 224 | movl %eax, pa(max_pfn_mapped) | 220 | movl %eax, pa(max_pfn_mapped) |
| 225 | 221 | ||
| 226 | /* Do early initialization of the fixmap area */ | 222 | /* Do early initialization of the fixmap area */ |
| 227 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 223 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 228 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) | 224 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) |
| 229 | #else /* Not PAE */ | 225 | #else /* Not PAE */ |
| 230 | 226 | ||
| @@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 233 | movl $pa(pg0), %edi | 229 | movl $pa(pg0), %edi |
| 234 | movl %edi, pa(init_pg_tables_start) | 230 | movl %edi, pa(init_pg_tables_start) |
| 235 | movl $pa(swapper_pg_dir), %edx | 231 | movl $pa(swapper_pg_dir), %edx |
| 236 | movl $PTE_ATTR, %eax | 232 | movl $PTE_IDENT_ATTR, %eax |
| 237 | 10: | 233 | 10: |
| 238 | leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ | 234 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ |
| 239 | movl %ecx,(%edx) /* Store identity PDE entry */ | 235 | movl %ecx,(%edx) /* Store identity PDE entry */ |
| 240 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ | 236 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ |
| 241 | addl $4,%edx | 237 | addl $4,%edx |
| @@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 249 | * bytes beyond the end of our own page tables; the +0x007 is | 245 | * bytes beyond the end of our own page tables; the +0x007 is |
| 250 | * the attribute bits | 246 | * the attribute bits |
| 251 | */ | 247 | */ |
| 252 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 248 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
| 253 | cmpl %ebp,%eax | 249 | cmpl %ebp,%eax |
| 254 | jb 10b | 250 | jb 10b |
| 255 | movl %edi,pa(init_pg_tables_end) | 251 | movl %edi,pa(init_pg_tables_end) |
| @@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 257 | movl %eax, pa(max_pfn_mapped) | 253 | movl %eax, pa(max_pfn_mapped) |
| 258 | 254 | ||
| 259 | /* Do early initialization of the fixmap area */ | 255 | /* Do early initialization of the fixmap area */ |
| 260 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 256 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 261 | movl %eax,pa(swapper_pg_dir+0xffc) | 257 | movl %eax,pa(swapper_pg_dir+0xffc) |
| 262 | #endif | 258 | #endif |
| 263 | jmp 3f | 259 | jmp 3f |
| @@ -634,19 +630,19 @@ ENTRY(empty_zero_page) | |||
| 634 | /* Page-aligned for the benefit of paravirt? */ | 630 | /* Page-aligned for the benefit of paravirt? */ |
| 635 | .align PAGE_SIZE_asm | 631 | .align PAGE_SIZE_asm |
| 636 | ENTRY(swapper_pg_dir) | 632 | ENTRY(swapper_pg_dir) |
| 637 | .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ | 633 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ |
| 638 | # if KPMDS == 3 | 634 | # if KPMDS == 3 |
| 639 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 635 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 640 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 636 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 641 | .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 | 637 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 |
| 642 | # elif KPMDS == 2 | 638 | # elif KPMDS == 2 |
| 643 | .long 0,0 | 639 | .long 0,0 |
| 644 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 640 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 645 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 641 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 646 | # elif KPMDS == 1 | 642 | # elif KPMDS == 1 |
| 647 | .long 0,0 | 643 | .long 0,0 |
| 648 | .long 0,0 | 644 | .long 0,0 |
| 649 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 645 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 650 | # else | 646 | # else |
| 651 | # error "Kernel PMDs should be 1, 2 or 3" | 647 | # error "Kernel PMDs should be 1, 2 or 3" |
| 652 | # endif | 648 | # endif |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index db3280afe886..26cfdc1d7c7f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
| @@ -110,7 +110,7 @@ startup_64: | |||
| 110 | movq %rdi, %rax | 110 | movq %rdi, %rax |
| 111 | shrq $PMD_SHIFT, %rax | 111 | shrq $PMD_SHIFT, %rax |
| 112 | andq $(PTRS_PER_PMD - 1), %rax | 112 | andq $(PTRS_PER_PMD - 1), %rax |
| 113 | leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx | 113 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx |
| 114 | leaq level2_spare_pgt(%rip), %rbx | 114 | leaq level2_spare_pgt(%rip), %rbx |
| 115 | movq %rdx, 0(%rbx, %rax, 8) | 115 | movq %rdx, 0(%rbx, %rax, 8) |
| 116 | ident_complete: | 116 | ident_complete: |
| @@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) | |||
| 374 | /* Since I easily can, map the first 1G. | 374 | /* Since I easily can, map the first 1G. |
| 375 | * Don't set NX because code runs from these pages. | 375 | * Don't set NX because code runs from these pages. |
| 376 | */ | 376 | */ |
| 377 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) | 377 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) |
| 378 | 378 | ||
| 379 | NEXT_PAGE(level2_kernel_pgt) | 379 | NEXT_PAGE(level2_kernel_pgt) |
| 380 | /* | 380 | /* |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 59fd3b6b1303..73deaffadd03 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void) | |||
| 210 | /* Calculate the min / max delta */ | 210 | /* Calculate the min / max delta */ |
| 211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | 211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, |
| 212 | &hpet_clockevent); | 212 | &hpet_clockevent); |
| 213 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, | 213 | /* 5 usec minimum reprogramming delta. */ |
| 214 | &hpet_clockevent); | 214 | hpet_clockevent.min_delta_ns = 5000; |
| 215 | 215 | ||
| 216 | /* | 216 | /* |
| 217 | * Start hpet with the boot cpu mask and make it | 217 | * Start hpet with the boot cpu mask and make it |
| @@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, | |||
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | static int hpet_legacy_next_event(unsigned long delta, | 272 | static int hpet_legacy_next_event(unsigned long delta, |
| 273 | struct clock_event_device *evt) | 273 | struct clock_event_device *evt) |
| 274 | { | 274 | { |
| 275 | unsigned long cnt; | 275 | u32 cnt; |
| 276 | 276 | ||
| 277 | cnt = hpet_readl(HPET_COUNTER); | 277 | cnt = hpet_readl(HPET_COUNTER); |
| 278 | cnt += delta; | 278 | cnt += (u32) delta; |
| 279 | hpet_writel(cnt, HPET_T0_CMP); | 279 | hpet_writel(cnt, HPET_T0_CMP); |
| 280 | 280 | ||
| 281 | return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; | 281 | /* |
| 282 | * We need to read back the CMP register to make sure that | ||
| 283 | * what we wrote hit the chip before we compare it to the | ||
| 284 | * counter. | ||
| 285 | */ | ||
| 286 | WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); | ||
| 287 | |||
| 288 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | ||
| 282 | } | 289 | } |
| 283 | 290 | ||
| 284 | /* | 291 | /* |
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 1c3a66a67f83..720d2607aacb 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c | |||
| @@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { | |||
| 92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") | 92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") |
| 93 | } | 93 | } |
| 94 | }, | 94 | }, |
| 95 | { | ||
| 96 | .callback = dmi_io_delay_0xed_port, | ||
| 97 | .ident = "Presario F700", | ||
| 98 | .matches = { | ||
| 99 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
| 100 | DMI_MATCH(DMI_BOARD_NAME, "30D3") | ||
| 101 | } | ||
| 102 | }, | ||
| 95 | { } | 103 | { } |
| 96 | }; | 104 | }; |
| 97 | 105 | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1cf8c1fcc088..b71e02d42f4f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
| @@ -325,7 +325,7 @@ skip: | |||
| 325 | for_each_online_cpu(j) | 325 | for_each_online_cpu(j) |
| 326 | seq_printf(p, "%10u ", | 326 | seq_printf(p, "%10u ", |
| 327 | per_cpu(irq_stat,j).irq_call_count); | 327 | per_cpu(irq_stat,j).irq_call_count); |
| 328 | seq_printf(p, " function call interrupts\n"); | 328 | seq_printf(p, " Function call interrupts\n"); |
| 329 | seq_printf(p, "TLB: "); | 329 | seq_printf(p, "TLB: "); |
| 330 | for_each_online_cpu(j) | 330 | for_each_online_cpu(j) |
| 331 | seq_printf(p, "%10u ", | 331 | seq_printf(p, "%10u ", |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1f78b238d8d2..f065fe9071b9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
| @@ -129,7 +129,7 @@ skip: | |||
| 129 | seq_printf(p, "CAL: "); | 129 | seq_printf(p, "CAL: "); |
| 130 | for_each_online_cpu(j) | 130 | for_each_online_cpu(j) |
| 131 | seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); | 131 | seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); |
| 132 | seq_printf(p, " function call interrupts\n"); | 132 | seq_printf(p, " Function call interrupts\n"); |
| 133 | seq_printf(p, "TLB: "); | 133 | seq_printf(p, "TLB: "); |
| 134 | for_each_online_cpu(j) | 134 | for_each_online_cpu(j) |
| 135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); | 135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); |
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb21335..304d8bad6559 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c | |||
| @@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); | |||
| 16 | static u32 *flush_words; | 16 | static u32 *flush_words; |
| 17 | 17 | ||
| 18 | struct pci_device_id k8_nb_ids[] = { | 18 | struct pci_device_id k8_nb_ids[] = { |
| 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
| 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
| 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, | ||
| 21 | {} | 22 | {} |
| 22 | }; | 23 | }; |
| 23 | EXPORT_SYMBOL(k8_nb_ids); | 24 | EXPORT_SYMBOL(k8_nb_ids); |
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index f2d43bc75514..ff7d3b0124f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
| @@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
| 139 | if (PageHighMem(pg)) { | 139 | if (PageHighMem(pg)) { |
| 140 | data = ioremap_cache(pa_data, sizeof(*data)); | 140 | data = ioremap_cache(pa_data, sizeof(*data)); |
| 141 | if (!data) { | 141 | if (!data) { |
| 142 | kfree(node); | ||
| 142 | error = -ENXIO; | 143 | error = -ENXIO; |
| 143 | goto err_dir; | 144 | goto err_dir; |
| 144 | } | 145 | } |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b8..10435a120d22 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -69,6 +69,9 @@ static int gdb_x86vector = -1; | |||
| 69 | */ | 69 | */ |
| 70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
| 71 | { | 71 | { |
| 72 | #ifndef CONFIG_X86_32 | ||
| 73 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
| 74 | #endif | ||
| 72 | gdb_regs[GDB_AX] = regs->ax; | 75 | gdb_regs[GDB_AX] = regs->ax; |
| 73 | gdb_regs[GDB_BX] = regs->bx; | 76 | gdb_regs[GDB_BX] = regs->bx; |
| 74 | gdb_regs[GDB_CX] = regs->cx; | 77 | gdb_regs[GDB_CX] = regs->cx; |
| @@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 76 | gdb_regs[GDB_SI] = regs->si; | 79 | gdb_regs[GDB_SI] = regs->si; |
| 77 | gdb_regs[GDB_DI] = regs->di; | 80 | gdb_regs[GDB_DI] = regs->di; |
| 78 | gdb_regs[GDB_BP] = regs->bp; | 81 | gdb_regs[GDB_BP] = regs->bp; |
| 79 | gdb_regs[GDB_PS] = regs->flags; | ||
| 80 | gdb_regs[GDB_PC] = regs->ip; | 82 | gdb_regs[GDB_PC] = regs->ip; |
| 81 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
| 84 | gdb_regs[GDB_PS] = regs->flags; | ||
| 82 | gdb_regs[GDB_DS] = regs->ds; | 85 | gdb_regs[GDB_DS] = regs->ds; |
| 83 | gdb_regs[GDB_ES] = regs->es; | 86 | gdb_regs[GDB_ES] = regs->es; |
| 84 | gdb_regs[GDB_CS] = regs->cs; | 87 | gdb_regs[GDB_CS] = regs->cs; |
| @@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 94 | gdb_regs[GDB_R13] = regs->r13; | 97 | gdb_regs[GDB_R13] = regs->r13; |
| 95 | gdb_regs[GDB_R14] = regs->r14; | 98 | gdb_regs[GDB_R14] = regs->r14; |
| 96 | gdb_regs[GDB_R15] = regs->r15; | 99 | gdb_regs[GDB_R15] = regs->r15; |
| 100 | gdb_regs32[GDB_PS] = regs->flags; | ||
| 101 | gdb_regs32[GDB_CS] = regs->cs; | ||
| 102 | gdb_regs32[GDB_SS] = regs->ss; | ||
| 97 | #endif | 103 | #endif |
| 98 | gdb_regs[GDB_SP] = regs->sp; | 104 | gdb_regs[GDB_SP] = regs->sp; |
| 99 | } | 105 | } |
| @@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 112 | */ | 118 | */ |
| 113 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | 119 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) |
| 114 | { | 120 | { |
| 121 | #ifndef CONFIG_X86_32 | ||
| 122 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
| 123 | #endif | ||
| 115 | gdb_regs[GDB_AX] = 0; | 124 | gdb_regs[GDB_AX] = 0; |
| 116 | gdb_regs[GDB_BX] = 0; | 125 | gdb_regs[GDB_BX] = 0; |
| 117 | gdb_regs[GDB_CX] = 0; | 126 | gdb_regs[GDB_CX] = 0; |
| @@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
| 129 | gdb_regs[GDB_FS] = 0xFFFF; | 138 | gdb_regs[GDB_FS] = 0xFFFF; |
| 130 | gdb_regs[GDB_GS] = 0xFFFF; | 139 | gdb_regs[GDB_GS] = 0xFFFF; |
| 131 | #else | 140 | #else |
| 132 | gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 141 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); |
| 133 | gdb_regs[GDB_PC] = 0; | 142 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
| 143 | gdb_regs32[GDB_SS] = __KERNEL_DS; | ||
| 144 | gdb_regs[GDB_PC] = p->thread.ip; | ||
| 134 | gdb_regs[GDB_R8] = 0; | 145 | gdb_regs[GDB_R8] = 0; |
| 135 | gdb_regs[GDB_R9] = 0; | 146 | gdb_regs[GDB_R9] = 0; |
| 136 | gdb_regs[GDB_R10] = 0; | 147 | gdb_regs[GDB_R10] = 0; |
| @@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
| 153 | */ | 164 | */ |
| 154 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 165 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
| 155 | { | 166 | { |
| 167 | #ifndef CONFIG_X86_32 | ||
| 168 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
| 169 | #endif | ||
| 156 | regs->ax = gdb_regs[GDB_AX]; | 170 | regs->ax = gdb_regs[GDB_AX]; |
| 157 | regs->bx = gdb_regs[GDB_BX]; | 171 | regs->bx = gdb_regs[GDB_BX]; |
| 158 | regs->cx = gdb_regs[GDB_CX]; | 172 | regs->cx = gdb_regs[GDB_CX]; |
| @@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 160 | regs->si = gdb_regs[GDB_SI]; | 174 | regs->si = gdb_regs[GDB_SI]; |
| 161 | regs->di = gdb_regs[GDB_DI]; | 175 | regs->di = gdb_regs[GDB_DI]; |
| 162 | regs->bp = gdb_regs[GDB_BP]; | 176 | regs->bp = gdb_regs[GDB_BP]; |
| 163 | regs->flags = gdb_regs[GDB_PS]; | ||
| 164 | regs->ip = gdb_regs[GDB_PC]; | 177 | regs->ip = gdb_regs[GDB_PC]; |
| 165 | #ifdef CONFIG_X86_32 | 178 | #ifdef CONFIG_X86_32 |
| 179 | regs->flags = gdb_regs[GDB_PS]; | ||
| 166 | regs->ds = gdb_regs[GDB_DS]; | 180 | regs->ds = gdb_regs[GDB_DS]; |
| 167 | regs->es = gdb_regs[GDB_ES]; | 181 | regs->es = gdb_regs[GDB_ES]; |
| 168 | regs->cs = gdb_regs[GDB_CS]; | 182 | regs->cs = gdb_regs[GDB_CS]; |
| @@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 175 | regs->r13 = gdb_regs[GDB_R13]; | 189 | regs->r13 = gdb_regs[GDB_R13]; |
| 176 | regs->r14 = gdb_regs[GDB_R14]; | 190 | regs->r14 = gdb_regs[GDB_R14]; |
| 177 | regs->r15 = gdb_regs[GDB_R15]; | 191 | regs->r15 = gdb_regs[GDB_R15]; |
| 192 | regs->flags = gdb_regs32[GDB_PS]; | ||
| 193 | regs->cs = gdb_regs32[GDB_CS]; | ||
| 194 | regs->ss = gdb_regs32[GDB_SS]; | ||
| 178 | #endif | 195 | #endif |
| 179 | } | 196 | } |
| 180 | 197 | ||
| @@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
| 378 | if (remcomInBuffer[0] == 's') { | 395 | if (remcomInBuffer[0] == 's') { |
| 379 | linux_regs->flags |= X86_EFLAGS_TF; | 396 | linux_regs->flags |= X86_EFLAGS_TF; |
| 380 | kgdb_single_step = 1; | 397 | kgdb_single_step = 1; |
| 381 | if (kgdb_contthread) { | 398 | atomic_set(&kgdb_cpu_doing_single_step, |
| 382 | atomic_set(&kgdb_cpu_doing_single_step, | 399 | raw_smp_processor_id()); |
| 383 | raw_smp_processor_id()); | ||
| 384 | } | ||
| 385 | } | 400 | } |
| 386 | 401 | ||
| 387 | get_debugreg(dr6, 6); | 402 | get_debugreg(dr6, 6); |
| @@ -440,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
| 440 | return NOTIFY_DONE; | 455 | return NOTIFY_DONE; |
| 441 | 456 | ||
| 442 | case DIE_NMI_IPI: | 457 | case DIE_NMI_IPI: |
| 443 | if (atomic_read(&kgdb_active) != -1) { | 458 | /* Just ignore, we will handle the roundup on DIE_NMI. */ |
| 444 | /* KGDB CPU roundup */ | ||
| 445 | kgdb_nmicallback(raw_smp_processor_id(), regs); | ||
| 446 | was_in_debug_nmi[raw_smp_processor_id()] = 1; | ||
| 447 | touch_nmi_watchdog(); | ||
| 448 | } | ||
| 449 | return NOTIFY_DONE; | 459 | return NOTIFY_DONE; |
| 450 | 460 | ||
| 451 | case DIE_NMIUNKNOWN: | 461 | case DIE_NMIUNKNOWN: |
| @@ -466,9 +476,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
| 466 | 476 | ||
| 467 | case DIE_DEBUG: | 477 | case DIE_DEBUG: |
| 468 | if (atomic_read(&kgdb_cpu_doing_single_step) == | 478 | if (atomic_read(&kgdb_cpu_doing_single_step) == |
| 469 | raw_smp_processor_id() && | 479 | raw_smp_processor_id()) { |
| 470 | user_mode(regs)) | 480 | if (user_mode(regs)) |
| 471 | return single_step_cont(regs, args); | 481 | return single_step_cont(regs, args); |
| 482 | break; | ||
| 483 | } else if (test_thread_flag(TIF_SINGLESTEP)) | ||
| 484 | /* This means a user thread is single stepping | ||
| 485 | * a system call which should be ignored | ||
| 486 | */ | ||
| 487 | return NOTIFY_DONE; | ||
| 472 | /* fall through */ | 488 | /* fall through */ |
| 473 | default: | 489 | default: |
| 474 | if (user_mode(regs)) | 490 | if (user_mode(regs)) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf37d2b..478bca986eca 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) | |||
| 178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | 178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | static void kvm_release_pt(u32 pfn) | 181 | static void kvm_release_pt(unsigned long pfn) |
| 182 | { | 182 | { |
| 183 | struct kvm_mmu_op_release_pt rpt = { | 183 | struct kvm_mmu_op_release_pt rpt = { |
| 184 | .header.op = KVM_MMU_OP_RELEASE_PT, | 184 | .header.op = KVM_MMU_OP_RELEASE_PT, |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index abb78a2cc4ad..2c97f07f1c2c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
| @@ -299,6 +299,15 @@ void acpi_nmi_disable(void) | |||
| 299 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 299 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | /* | ||
| 303 | * This function is called as soon the LAPIC NMI watchdog driver has everything | ||
| 304 | * in place and it's ready to check if the NMIs belong to the NMI watchdog | ||
| 305 | */ | ||
| 306 | void cpu_nmi_set_wd_enabled(void) | ||
| 307 | { | ||
| 308 | __get_cpu_var(wd_enabled) = 1; | ||
| 309 | } | ||
| 310 | |||
| 302 | void setup_apic_nmi_watchdog(void *unused) | 311 | void setup_apic_nmi_watchdog(void *unused) |
| 303 | { | 312 | { |
| 304 | if (__get_cpu_var(wd_enabled)) | 313 | if (__get_cpu_var(wd_enabled)) |
| @@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) | |||
| 311 | 320 | ||
| 312 | switch (nmi_watchdog) { | 321 | switch (nmi_watchdog) { |
| 313 | case NMI_LOCAL_APIC: | 322 | case NMI_LOCAL_APIC: |
| 314 | /* enable it before to avoid race with handler */ | ||
| 315 | __get_cpu_var(wd_enabled) = 1; | ||
| 316 | if (lapic_watchdog_init(nmi_hz) < 0) { | 323 | if (lapic_watchdog_init(nmi_hz) < 0) { |
| 317 | __get_cpu_var(wd_enabled) = 0; | 324 | __get_cpu_var(wd_enabled) = 0; |
| 318 | return; | 325 | return; |
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e6672274807..7a13fac63a1f 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
| @@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); | |||
| 190 | static void __init platform_detect(void) | 190 | static void __init platform_detect(void) |
| 191 | { | 191 | { |
| 192 | size_t propsize; | 192 | size_t propsize; |
| 193 | u32 rev; | 193 | __be32 rev; |
| 194 | 194 | ||
| 195 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, | 195 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, |
| 196 | &propsize) || propsize != 4) { | 196 | &propsize) || propsize != 4) { |
| 197 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 197 | printk(KERN_ERR "ofw: getprop call failed!\n"); |
| 198 | rev = 0; | 198 | rev = cpu_to_be32(0); |
| 199 | } | 199 | } |
| 200 | olpc_platform_info.boardrev = be32_to_cpu(rev); | 200 | olpc_platform_info.boardrev = be32_to_cpu(rev); |
| 201 | } | 201 | } |
| @@ -203,7 +203,7 @@ static void __init platform_detect(void) | |||
| 203 | static void __init platform_detect(void) | 203 | static void __init platform_detect(void) |
| 204 | { | 204 | { |
| 205 | /* stopgap until OFW support is added to the kernel */ | 205 | /* stopgap until OFW support is added to the kernel */ |
| 206 | olpc_platform_info.boardrev = be32_to_cpu(0xc2); | 206 | olpc_platform_info.boardrev = 0xc2; |
| 207 | } | 207 | } |
| 208 | #endif | 208 | #endif |
| 209 | 209 | ||
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 58262218781b..9fe644f4861d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
| @@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
| 23 | start = start_##ops##_##x; \ | 23 | start = start_##ops##_##x; \ |
| 24 | end = end_##ops##_##x; \ | 24 | end = end_##ops##_##x; \ |
| 25 | goto patch_site | 25 | goto patch_site |
| 26 | switch(type) { | 26 | switch (type) { |
| 27 | PATCH_SITE(pv_irq_ops, irq_disable); | 27 | PATCH_SITE(pv_irq_ops, irq_disable); |
| 28 | PATCH_SITE(pv_irq_ops, irq_enable); | 28 | PATCH_SITE(pv_irq_ops, irq_enable); |
| 29 | PATCH_SITE(pv_irq_ops, restore_fl); | 29 | PATCH_SITE(pv_irq_ops, restore_fl); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index dcdac6c826e9..080d1d27f37a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
| @@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
| 261 | badbit, tbl, start_addr, npages); | 261 | badbit, tbl, start_addr, npages); |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | set_bit_string(tbl->it_map, index, npages); | 264 | iommu_area_reserve(tbl->it_map, index, npages); |
| 265 | 265 | ||
| 266 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 266 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
| 267 | } | 267 | } |
| @@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
| 491 | npages = size >> PAGE_SHIFT; | 491 | npages = size >> PAGE_SHIFT; |
| 492 | order = get_order(size); | 492 | order = get_order(size); |
| 493 | 493 | ||
| 494 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 495 | |||
| 494 | /* alloc enough pages (and possibly more) */ | 496 | /* alloc enough pages (and possibly more) */ |
| 495 | ret = (void *)__get_free_pages(flag, order); | 497 | ret = (void *)__get_free_pages(flag, order); |
| 496 | if (!ret) | 498 | if (!ret) |
| @@ -510,8 +512,22 @@ error: | |||
| 510 | return ret; | 512 | return ret; |
| 511 | } | 513 | } |
| 512 | 514 | ||
| 515 | static void calgary_free_coherent(struct device *dev, size_t size, | ||
| 516 | void *vaddr, dma_addr_t dma_handle) | ||
| 517 | { | ||
| 518 | unsigned int npages; | ||
| 519 | struct iommu_table *tbl = find_iommu_table(dev); | ||
| 520 | |||
| 521 | size = PAGE_ALIGN(size); | ||
| 522 | npages = size >> PAGE_SHIFT; | ||
| 523 | |||
| 524 | iommu_free(tbl, dma_handle, npages); | ||
| 525 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 526 | } | ||
| 527 | |||
| 513 | static struct dma_mapping_ops calgary_dma_ops = { | 528 | static struct dma_mapping_ops calgary_dma_ops = { |
| 514 | .alloc_coherent = calgary_alloc_coherent, | 529 | .alloc_coherent = calgary_alloc_coherent, |
| 530 | .free_coherent = calgary_free_coherent, | ||
| 515 | .map_single = calgary_map_single, | 531 | .map_single = calgary_map_single, |
| 516 | .unmap_single = calgary_unmap_single, | 532 | .unmap_single = calgary_unmap_single, |
| 517 | .map_sg = calgary_map_sg, | 533 | .map_sg = calgary_map_sg, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec2..0a3824e837b4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); | |||
| 41 | /* Dummy device used for NULL arguments (normally ISA). Better would | 41 | /* Dummy device used for NULL arguments (normally ISA). Better would |
| 42 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 42 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
| 43 | to older i386. */ | 43 | to older i386. */ |
| 44 | struct device fallback_dev = { | 44 | struct device x86_dma_fallback_dev = { |
| 45 | .bus_id = "fallback device", | 45 | .bus_id = "fallback device", |
| 46 | .coherent_dma_mask = DMA_32BIT_MASK, | 46 | .coherent_dma_mask = DMA_32BIT_MASK, |
| 47 | .dma_mask = &fallback_dev.coherent_dma_mask, | 47 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
| 48 | }; | 48 | }; |
| 49 | EXPORT_SYMBOL(x86_dma_fallback_dev); | ||
| 49 | 50 | ||
| 50 | int dma_set_mask(struct device *dev, u64 mask) | 51 | int dma_set_mask(struct device *dev, u64 mask) |
| 51 | { | 52 | { |
| @@ -82,7 +83,7 @@ void __init dma32_reserve_bootmem(void) | |||
| 82 | * using 512M as goal | 83 | * using 512M as goal |
| 83 | */ | 84 | */ |
| 84 | align = 64ULL<<20; | 85 | align = 64ULL<<20; |
| 85 | size = round_up(dma32_bootmem_size, align); | 86 | size = roundup(dma32_bootmem_size, align); |
| 86 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 87 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
| 87 | 512ULL<<20); | 88 | 512ULL<<20); |
| 88 | if (dma32_bootmem_ptr) | 89 | if (dma32_bootmem_ptr) |
| @@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | |||
| 133 | EXPORT_SYMBOL(iommu_num_pages); | 134 | EXPORT_SYMBOL(iommu_num_pages); |
| 134 | #endif | 135 | #endif |
| 135 | 136 | ||
| 137 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | ||
| 138 | dma_addr_t *dma_addr, gfp_t flag) | ||
| 139 | { | ||
| 140 | unsigned long dma_mask; | ||
| 141 | struct page *page; | ||
| 142 | dma_addr_t addr; | ||
| 143 | |||
| 144 | dma_mask = dma_alloc_coherent_mask(dev, flag); | ||
| 145 | |||
| 146 | flag |= __GFP_ZERO; | ||
| 147 | again: | ||
| 148 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
| 149 | if (!page) | ||
| 150 | return NULL; | ||
| 151 | |||
| 152 | addr = page_to_phys(page); | ||
| 153 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | ||
| 154 | __free_pages(page, get_order(size)); | ||
| 155 | |||
| 156 | if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { | ||
| 157 | flag = (flag & ~GFP_DMA32) | GFP_DMA; | ||
| 158 | goto again; | ||
| 159 | } | ||
| 160 | |||
| 161 | return NULL; | ||
| 162 | } | ||
| 163 | |||
| 164 | *dma_addr = addr; | ||
| 165 | return page_address(page); | ||
| 166 | } | ||
| 167 | |||
| 136 | /* | 168 | /* |
| 137 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 169 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
| 138 | * documentation. | 170 | * documentation. |
| @@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask) | |||
| 241 | } | 273 | } |
| 242 | EXPORT_SYMBOL(dma_supported); | 274 | EXPORT_SYMBOL(dma_supported); |
| 243 | 275 | ||
| 244 | /* Allocate DMA memory on node near device */ | ||
| 245 | static noinline struct page * | ||
| 246 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
| 247 | { | ||
| 248 | int node; | ||
| 249 | |||
| 250 | node = dev_to_node(dev); | ||
| 251 | |||
| 252 | return alloc_pages_node(node, gfp, order); | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Allocate memory for a coherent mapping. | ||
| 257 | */ | ||
| 258 | void * | ||
| 259 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
| 260 | gfp_t gfp) | ||
| 261 | { | ||
| 262 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 263 | void *memory = NULL; | ||
| 264 | struct page *page; | ||
| 265 | unsigned long dma_mask = 0; | ||
| 266 | dma_addr_t bus; | ||
| 267 | int noretry = 0; | ||
| 268 | |||
| 269 | /* ignore region specifiers */ | ||
| 270 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 271 | |||
| 272 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | ||
| 273 | return memory; | ||
| 274 | |||
| 275 | if (!dev) { | ||
| 276 | dev = &fallback_dev; | ||
| 277 | gfp |= GFP_DMA; | ||
| 278 | } | ||
| 279 | dma_mask = dev->coherent_dma_mask; | ||
| 280 | if (dma_mask == 0) | ||
| 281 | dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; | ||
| 282 | |||
| 283 | /* Device not DMA able */ | ||
| 284 | if (dev->dma_mask == NULL) | ||
| 285 | return NULL; | ||
| 286 | |||
| 287 | /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ | ||
| 288 | if (gfp & __GFP_DMA) | ||
| 289 | noretry = 1; | ||
| 290 | |||
| 291 | #ifdef CONFIG_X86_64 | ||
| 292 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
| 293 | larger than 16MB and in this case we have a chance of | ||
| 294 | finding fitting memory in the next higher zone first. If | ||
| 295 | not retry with true GFP_DMA. -AK */ | ||
| 296 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
| 297 | gfp |= GFP_DMA32; | ||
| 298 | if (dma_mask < DMA_32BIT_MASK) | ||
| 299 | noretry = 1; | ||
| 300 | } | ||
| 301 | #endif | ||
| 302 | |||
| 303 | again: | ||
| 304 | page = dma_alloc_pages(dev, | ||
| 305 | noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); | ||
| 306 | if (page == NULL) | ||
| 307 | return NULL; | ||
| 308 | |||
| 309 | { | ||
| 310 | int high, mmu; | ||
| 311 | bus = page_to_phys(page); | ||
| 312 | memory = page_address(page); | ||
| 313 | high = (bus + size) >= dma_mask; | ||
| 314 | mmu = high; | ||
| 315 | if (force_iommu && !(gfp & GFP_DMA)) | ||
| 316 | mmu = 1; | ||
| 317 | else if (high) { | ||
| 318 | free_pages((unsigned long)memory, | ||
| 319 | get_order(size)); | ||
| 320 | |||
| 321 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
| 322 | needed. It's better to use remapping first. */ | ||
| 323 | if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
| 324 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
| 325 | goto again; | ||
| 326 | } | ||
| 327 | |||
| 328 | /* Let low level make its own zone decisions */ | ||
| 329 | gfp &= ~(GFP_DMA32|GFP_DMA); | ||
| 330 | |||
| 331 | if (ops->alloc_coherent) | ||
| 332 | return ops->alloc_coherent(dev, size, | ||
| 333 | dma_handle, gfp); | ||
| 334 | return NULL; | ||
| 335 | } | ||
| 336 | |||
| 337 | memset(memory, 0, size); | ||
| 338 | if (!mmu) { | ||
| 339 | *dma_handle = bus; | ||
| 340 | return memory; | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | if (ops->alloc_coherent) { | ||
| 345 | free_pages((unsigned long)memory, get_order(size)); | ||
| 346 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
| 347 | return ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
| 348 | } | ||
| 349 | |||
| 350 | if (ops->map_simple) { | ||
| 351 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), | ||
| 352 | size, | ||
| 353 | PCI_DMA_BIDIRECTIONAL); | ||
| 354 | if (*dma_handle != bad_dma_address) | ||
| 355 | return memory; | ||
| 356 | } | ||
| 357 | |||
| 358 | if (panic_on_overflow) | ||
| 359 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", | ||
| 360 | (unsigned long)size); | ||
| 361 | free_pages((unsigned long)memory, get_order(size)); | ||
| 362 | return NULL; | ||
| 363 | } | ||
| 364 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
| 365 | |||
| 366 | /* | ||
| 367 | * Unmap coherent memory. | ||
| 368 | * The caller must ensure that the device has finished accessing the mapping. | ||
| 369 | */ | ||
| 370 | void dma_free_coherent(struct device *dev, size_t size, | ||
| 371 | void *vaddr, dma_addr_t bus) | ||
| 372 | { | ||
| 373 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 374 | |||
| 375 | int order = get_order(size); | ||
| 376 | WARN_ON(irqs_disabled()); /* for portability */ | ||
| 377 | if (dma_release_from_coherent(dev, order, vaddr)) | ||
| 378 | return; | ||
| 379 | if (ops->unmap_single) | ||
| 380 | ops->unmap_single(dev, bus, size, 0); | ||
| 381 | free_pages((unsigned long)vaddr, order); | ||
| 382 | } | ||
| 383 | EXPORT_SYMBOL(dma_free_coherent); | ||
| 384 | |||
| 385 | static int __init pci_iommu_init(void) | 276 | static int __init pci_iommu_init(void) |
| 386 | { | 277 | { |
| 387 | calgary_iommu_init(); | 278 | calgary_iommu_init(); |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d5..145f1c83369f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -27,8 +27,8 @@ | |||
| 27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
| 28 | #include <linux/iommu-helper.h> | 28 | #include <linux/iommu-helper.h> |
| 29 | #include <linux/sysdev.h> | 29 | #include <linux/sysdev.h> |
| 30 | #include <linux/io.h> | ||
| 30 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
| 31 | #include <asm/io.h> | ||
| 32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
| 33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
| 34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
| @@ -80,9 +80,10 @@ AGPEXTERN int agp_memory_reserved; | |||
| 80 | AGPEXTERN __u32 *agp_gatt_table; | 80 | AGPEXTERN __u32 *agp_gatt_table; |
| 81 | 81 | ||
| 82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
| 83 | static int need_flush; /* global flush state. set for each gart wrap */ | 83 | static bool need_flush; /* global flush state. set for each gart wrap */ |
| 84 | 84 | ||
| 85 | static unsigned long alloc_iommu(struct device *dev, int size) | 85 | static unsigned long alloc_iommu(struct device *dev, int size, |
| 86 | unsigned long align_mask) | ||
| 86 | { | 87 | { |
| 87 | unsigned long offset, flags; | 88 | unsigned long offset, flags; |
| 88 | unsigned long boundary_size; | 89 | unsigned long boundary_size; |
| @@ -90,26 +91,27 @@ static unsigned long alloc_iommu(struct device *dev, int size) | |||
| 90 | 91 | ||
| 91 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 92 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
| 92 | PAGE_SIZE) >> PAGE_SHIFT; | 93 | PAGE_SIZE) >> PAGE_SHIFT; |
| 93 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 94 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, |
| 94 | PAGE_SIZE) >> PAGE_SHIFT; | 95 | PAGE_SIZE) >> PAGE_SHIFT; |
| 95 | 96 | ||
| 96 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 97 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
| 97 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, | 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, |
| 98 | size, base_index, boundary_size, 0); | 99 | size, base_index, boundary_size, align_mask); |
| 99 | if (offset == -1) { | 100 | if (offset == -1) { |
| 100 | need_flush = 1; | 101 | need_flush = true; |
| 101 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, | 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, |
| 102 | size, base_index, boundary_size, 0); | 103 | size, base_index, boundary_size, |
| 104 | align_mask); | ||
| 103 | } | 105 | } |
| 104 | if (offset != -1) { | 106 | if (offset != -1) { |
| 105 | next_bit = offset+size; | 107 | next_bit = offset+size; |
| 106 | if (next_bit >= iommu_pages) { | 108 | if (next_bit >= iommu_pages) { |
| 107 | next_bit = 0; | 109 | next_bit = 0; |
| 108 | need_flush = 1; | 110 | need_flush = true; |
| 109 | } | 111 | } |
| 110 | } | 112 | } |
| 111 | if (iommu_fullflush) | 113 | if (iommu_fullflush) |
| 112 | need_flush = 1; | 114 | need_flush = true; |
| 113 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
| 114 | 116 | ||
| 115 | return offset; | 117 | return offset; |
| @@ -134,7 +136,7 @@ static void flush_gart(void) | |||
| 134 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
| 135 | if (need_flush) { | 137 | if (need_flush) { |
| 136 | k8_flush_garts(); | 138 | k8_flush_garts(); |
| 137 | need_flush = 0; | 139 | need_flush = false; |
| 138 | } | 140 | } |
| 139 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
| 140 | } | 142 | } |
| @@ -173,7 +175,8 @@ static void dump_leak(void) | |||
| 173 | iommu_leak_pages); | 175 | iommu_leak_pages); |
| 174 | for (i = 0; i < iommu_leak_pages; i += 2) { | 176 | for (i = 0; i < iommu_leak_pages; i += 2) { |
| 175 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); | 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); |
| 176 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); | 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], |
| 179 | 0); | ||
| 177 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); | 180 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); |
| 178 | } | 181 | } |
| 179 | printk(KERN_DEBUG "\n"); | 182 | printk(KERN_DEBUG "\n"); |
| @@ -212,34 +215,24 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
| 212 | static inline int | 215 | static inline int |
| 213 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 216 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
| 214 | { | 217 | { |
| 215 | u64 mask = *dev->dma_mask; | 218 | return force_iommu || |
| 216 | int high = addr + size > mask; | 219 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
| 217 | int mmu = high; | ||
| 218 | |||
| 219 | if (force_iommu) | ||
| 220 | mmu = 1; | ||
| 221 | |||
| 222 | return mmu; | ||
| 223 | } | 220 | } |
| 224 | 221 | ||
| 225 | static inline int | 222 | static inline int |
| 226 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 223 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
| 227 | { | 224 | { |
| 228 | u64 mask = *dev->dma_mask; | 225 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
| 229 | int high = addr + size > mask; | ||
| 230 | int mmu = high; | ||
| 231 | |||
| 232 | return mmu; | ||
| 233 | } | 226 | } |
| 234 | 227 | ||
| 235 | /* Map a single continuous physical area into the IOMMU. | 228 | /* Map a single continuous physical area into the IOMMU. |
| 236 | * Caller needs to check if the iommu is needed and flush. | 229 | * Caller needs to check if the iommu is needed and flush. |
| 237 | */ | 230 | */ |
| 238 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 231 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
| 239 | size_t size, int dir) | 232 | size_t size, int dir, unsigned long align_mask) |
| 240 | { | 233 | { |
| 241 | unsigned long npages = iommu_num_pages(phys_mem, size); | 234 | unsigned long npages = iommu_num_pages(phys_mem, size); |
| 242 | unsigned long iommu_page = alloc_iommu(dev, npages); | 235 | unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); |
| 243 | int i; | 236 | int i; |
| 244 | 237 | ||
| 245 | if (iommu_page == -1) { | 238 | if (iommu_page == -1) { |
| @@ -259,16 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
| 259 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 252 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
| 260 | } | 253 | } |
| 261 | 254 | ||
| 262 | static dma_addr_t | ||
| 263 | gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) | ||
| 264 | { | ||
| 265 | dma_addr_t map = dma_map_area(dev, paddr, size, dir); | ||
| 266 | |||
| 267 | flush_gart(); | ||
| 268 | |||
| 269 | return map; | ||
| 270 | } | ||
| 271 | |||
| 272 | /* Map a single area into the IOMMU */ | 255 | /* Map a single area into the IOMMU */ |
| 273 | static dma_addr_t | 256 | static dma_addr_t |
| 274 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | 257 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) |
| @@ -276,12 +259,13 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | |||
| 276 | unsigned long bus; | 259 | unsigned long bus; |
| 277 | 260 | ||
| 278 | if (!dev) | 261 | if (!dev) |
| 279 | dev = &fallback_dev; | 262 | dev = &x86_dma_fallback_dev; |
| 280 | 263 | ||
| 281 | if (!need_iommu(dev, paddr, size)) | 264 | if (!need_iommu(dev, paddr, size)) |
| 282 | return paddr; | 265 | return paddr; |
| 283 | 266 | ||
| 284 | bus = gart_map_simple(dev, paddr, size, dir); | 267 | bus = dma_map_area(dev, paddr, size, dir, 0); |
| 268 | flush_gart(); | ||
| 285 | 269 | ||
| 286 | return bus; | 270 | return bus; |
| 287 | } | 271 | } |
| @@ -340,7 +324,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
| 340 | unsigned long addr = sg_phys(s); | 324 | unsigned long addr = sg_phys(s); |
| 341 | 325 | ||
| 342 | if (nonforced_iommu(dev, addr, s->length)) { | 326 | if (nonforced_iommu(dev, addr, s->length)) { |
| 343 | addr = dma_map_area(dev, addr, s->length, dir); | 327 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
| 344 | if (addr == bad_dma_address) { | 328 | if (addr == bad_dma_address) { |
| 345 | if (i > 0) | 329 | if (i > 0) |
| 346 | gart_unmap_sg(dev, sg, i, dir); | 330 | gart_unmap_sg(dev, sg, i, dir); |
| @@ -362,7 +346,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
| 362 | int nelems, struct scatterlist *sout, | 346 | int nelems, struct scatterlist *sout, |
| 363 | unsigned long pages) | 347 | unsigned long pages) |
| 364 | { | 348 | { |
| 365 | unsigned long iommu_start = alloc_iommu(dev, pages); | 349 | unsigned long iommu_start = alloc_iommu(dev, pages, 0); |
| 366 | unsigned long iommu_page = iommu_start; | 350 | unsigned long iommu_page = iommu_start; |
| 367 | struct scatterlist *s; | 351 | struct scatterlist *s; |
| 368 | int i; | 352 | int i; |
| @@ -427,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
| 427 | return 0; | 411 | return 0; |
| 428 | 412 | ||
| 429 | if (!dev) | 413 | if (!dev) |
| 430 | dev = &fallback_dev; | 414 | dev = &x86_dma_fallback_dev; |
| 431 | 415 | ||
| 432 | out = 0; | 416 | out = 0; |
| 433 | start = 0; | 417 | start = 0; |
| @@ -499,6 +483,46 @@ error: | |||
| 499 | return 0; | 483 | return 0; |
| 500 | } | 484 | } |
| 501 | 485 | ||
| 486 | /* allocate and map a coherent mapping */ | ||
| 487 | static void * | ||
| 488 | gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | ||
| 489 | gfp_t flag) | ||
| 490 | { | ||
| 491 | dma_addr_t paddr; | ||
| 492 | unsigned long align_mask; | ||
| 493 | struct page *page; | ||
| 494 | |||
| 495 | if (force_iommu && !(flag & GFP_DMA)) { | ||
| 496 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 497 | page = alloc_pages(flag | __GFP_ZERO, get_order(size)); | ||
| 498 | if (!page) | ||
| 499 | return NULL; | ||
| 500 | |||
| 501 | align_mask = (1UL << get_order(size)) - 1; | ||
| 502 | paddr = dma_map_area(dev, page_to_phys(page), size, | ||
| 503 | DMA_BIDIRECTIONAL, align_mask); | ||
| 504 | |||
| 505 | flush_gart(); | ||
| 506 | if (paddr != bad_dma_address) { | ||
| 507 | *dma_addr = paddr; | ||
| 508 | return page_address(page); | ||
| 509 | } | ||
| 510 | __free_pages(page, get_order(size)); | ||
| 511 | } else | ||
| 512 | return dma_generic_alloc_coherent(dev, size, dma_addr, flag); | ||
| 513 | |||
| 514 | return NULL; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* free a coherent mapping */ | ||
| 518 | static void | ||
| 519 | gart_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 520 | dma_addr_t dma_addr) | ||
| 521 | { | ||
| 522 | gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); | ||
| 523 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 524 | } | ||
| 525 | |||
| 502 | static int no_agp; | 526 | static int no_agp; |
| 503 | 527 | ||
| 504 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 528 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
| @@ -626,7 +650,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 626 | struct pci_dev *dev; | 650 | struct pci_dev *dev; |
| 627 | void *gatt; | 651 | void *gatt; |
| 628 | int i, error; | 652 | int i, error; |
| 629 | unsigned long start_pfn, end_pfn; | ||
| 630 | 653 | ||
| 631 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 654 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
| 632 | aper_size = aper_base = info->aper_size = 0; | 655 | aper_size = aper_base = info->aper_size = 0; |
| @@ -650,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 650 | info->aper_size = aper_size >> 20; | 673 | info->aper_size = aper_size >> 20; |
| 651 | 674 | ||
| 652 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | 675 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); |
| 653 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | 676 | gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 677 | get_order(gatt_size)); | ||
| 654 | if (!gatt) | 678 | if (!gatt) |
| 655 | panic("Cannot allocate GATT table"); | 679 | panic("Cannot allocate GATT table"); |
| 656 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) | 680 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) |
| 657 | panic("Could not set GART PTEs to uncacheable pages"); | 681 | panic("Could not set GART PTEs to uncacheable pages"); |
| 658 | 682 | ||
| 659 | memset(gatt, 0, gatt_size); | ||
| 660 | agp_gatt_table = gatt; | 683 | agp_gatt_table = gatt; |
| 661 | 684 | ||
| 662 | enable_gart_translations(); | 685 | enable_gart_translations(); |
| @@ -665,19 +688,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 665 | if (!error) | 688 | if (!error) |
| 666 | error = sysdev_register(&device_gart); | 689 | error = sysdev_register(&device_gart); |
| 667 | if (error) | 690 | if (error) |
| 668 | panic("Could not register gart_sysdev -- would corrupt data on next suspend"); | 691 | panic("Could not register gart_sysdev -- " |
| 692 | "would corrupt data on next suspend"); | ||
| 669 | 693 | ||
| 670 | flush_gart(); | 694 | flush_gart(); |
| 671 | 695 | ||
| 672 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 696 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
| 673 | aper_base, aper_size>>10); | 697 | aper_base, aper_size>>10); |
| 674 | 698 | ||
| 675 | /* need to map that range */ | ||
| 676 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
| 677 | if (end_pfn > max_low_pfn_mapped) { | ||
| 678 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
| 679 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 680 | } | ||
| 681 | return 0; | 699 | return 0; |
| 682 | 700 | ||
| 683 | nommu: | 701 | nommu: |
| @@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 687 | return -1; | 705 | return -1; |
| 688 | } | 706 | } |
| 689 | 707 | ||
| 690 | extern int agp_amd64_init(void); | ||
| 691 | |||
| 692 | static struct dma_mapping_ops gart_dma_ops = { | 708 | static struct dma_mapping_ops gart_dma_ops = { |
| 693 | .map_single = gart_map_single, | 709 | .map_single = gart_map_single, |
| 694 | .map_simple = gart_map_simple, | ||
| 695 | .unmap_single = gart_unmap_single, | 710 | .unmap_single = gart_unmap_single, |
| 696 | .sync_single_for_cpu = NULL, | ||
| 697 | .sync_single_for_device = NULL, | ||
| 698 | .sync_single_range_for_cpu = NULL, | ||
| 699 | .sync_single_range_for_device = NULL, | ||
| 700 | .sync_sg_for_cpu = NULL, | ||
| 701 | .sync_sg_for_device = NULL, | ||
| 702 | .map_sg = gart_map_sg, | 711 | .map_sg = gart_map_sg, |
| 703 | .unmap_sg = gart_unmap_sg, | 712 | .unmap_sg = gart_unmap_sg, |
| 713 | .alloc_coherent = gart_alloc_coherent, | ||
| 714 | .free_coherent = gart_free_coherent, | ||
| 704 | }; | 715 | }; |
| 705 | 716 | ||
| 706 | void gart_iommu_shutdown(void) | 717 | void gart_iommu_shutdown(void) |
| @@ -727,7 +738,8 @@ void __init gart_iommu_init(void) | |||
| 727 | { | 738 | { |
| 728 | struct agp_kern_info info; | 739 | struct agp_kern_info info; |
| 729 | unsigned long iommu_start; | 740 | unsigned long iommu_start; |
| 730 | unsigned long aper_size; | 741 | unsigned long aper_base, aper_size; |
| 742 | unsigned long start_pfn, end_pfn; | ||
| 731 | unsigned long scratch; | 743 | unsigned long scratch; |
| 732 | long i; | 744 | long i; |
| 733 | 745 | ||
| @@ -759,30 +771,35 @@ void __init gart_iommu_init(void) | |||
| 759 | (no_agp && init_k8_gatt(&info) < 0)) { | 771 | (no_agp && init_k8_gatt(&info) < 0)) { |
| 760 | if (max_pfn > MAX_DMA32_PFN) { | 772 | if (max_pfn > MAX_DMA32_PFN) { |
| 761 | printk(KERN_WARNING "More than 4GB of memory " | 773 | printk(KERN_WARNING "More than 4GB of memory " |
| 762 | "but GART IOMMU not available.\n" | 774 | "but GART IOMMU not available.\n"); |
| 763 | KERN_WARNING "falling back to iommu=soft.\n"); | 775 | printk(KERN_WARNING "falling back to iommu=soft.\n"); |
| 764 | } | 776 | } |
| 765 | return; | 777 | return; |
| 766 | } | 778 | } |
| 767 | 779 | ||
| 780 | /* need to map that range */ | ||
| 781 | aper_size = info.aper_size << 20; | ||
| 782 | aper_base = info.aper_base; | ||
| 783 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
| 784 | if (end_pfn > max_low_pfn_mapped) { | ||
| 785 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
| 786 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 787 | } | ||
| 788 | |||
| 768 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 789 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); |
| 769 | aper_size = info.aper_size * 1024 * 1024; | ||
| 770 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 790 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
| 771 | iommu_pages = iommu_size >> PAGE_SHIFT; | 791 | iommu_pages = iommu_size >> PAGE_SHIFT; |
| 772 | 792 | ||
| 773 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, | 793 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 774 | get_order(iommu_pages/8)); | 794 | get_order(iommu_pages/8)); |
| 775 | if (!iommu_gart_bitmap) | 795 | if (!iommu_gart_bitmap) |
| 776 | panic("Cannot allocate iommu bitmap\n"); | 796 | panic("Cannot allocate iommu bitmap\n"); |
| 777 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | ||
| 778 | 797 | ||
| 779 | #ifdef CONFIG_IOMMU_LEAK | 798 | #ifdef CONFIG_IOMMU_LEAK |
| 780 | if (leak_trace) { | 799 | if (leak_trace) { |
| 781 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | 800 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, |
| 782 | get_order(iommu_pages*sizeof(void *))); | 801 | get_order(iommu_pages*sizeof(void *))); |
| 783 | if (iommu_leak_tab) | 802 | if (!iommu_leak_tab) |
| 784 | memset(iommu_leak_tab, 0, iommu_pages * 8); | ||
| 785 | else | ||
| 786 | printk(KERN_DEBUG | 803 | printk(KERN_DEBUG |
| 787 | "PCI-DMA: Cannot allocate leak trace area\n"); | 804 | "PCI-DMA: Cannot allocate leak trace area\n"); |
| 788 | } | 805 | } |
| @@ -792,7 +809,7 @@ void __init gart_iommu_init(void) | |||
| 792 | * Out of IOMMU space handling. | 809 | * Out of IOMMU space handling. |
| 793 | * Reserve some invalid pages at the beginning of the GART. | 810 | * Reserve some invalid pages at the beginning of the GART. |
| 794 | */ | 811 | */ |
| 795 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 812 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
| 796 | 813 | ||
| 797 | agp_memory_reserved = iommu_size; | 814 | agp_memory_reserved = iommu_size; |
| 798 | printk(KERN_INFO | 815 | printk(KERN_INFO |
| @@ -850,7 +867,8 @@ void __init gart_parse_options(char *p) | |||
| 850 | if (!strncmp(p, "leak", 4)) { | 867 | if (!strncmp(p, "leak", 4)) { |
| 851 | leak_trace = 1; | 868 | leak_trace = 1; |
| 852 | p += 4; | 869 | p += 4; |
| 853 | if (*p == '=') ++p; | 870 | if (*p == '=') |
| 871 | ++p; | ||
| 854 | if (isdigit(*p) && get_option(&p, &arg)) | 872 | if (isdigit(*p) && get_option(&p, &arg)) |
| 855 | iommu_leak_pages = arg; | 873 | iommu_leak_pages = arg; |
| 856 | } | 874 | } |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3f91f71cdc3e..c70ab5a5d4c8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | static int | 14 | static int |
| 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
| 16 | { | 16 | { |
| 17 | if (hwdev && bus + size > *hwdev->dma_mask) { | 17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { |
| 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) | 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) |
| 19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
| 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
| @@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
| 72 | return nents; | 72 | return nents; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 76 | dma_addr_t dma_addr) | ||
| 77 | { | ||
| 78 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 79 | } | ||
| 80 | |||
| 75 | struct dma_mapping_ops nommu_dma_ops = { | 81 | struct dma_mapping_ops nommu_dma_ops = { |
| 82 | .alloc_coherent = dma_generic_alloc_coherent, | ||
| 83 | .free_coherent = nommu_free_coherent, | ||
| 76 | .map_single = nommu_map_single, | 84 | .map_single = nommu_map_single, |
| 77 | .map_sg = nommu_map_sg, | 85 | .map_sg = nommu_map_sg, |
| 78 | .is_phys = 1, | 86 | .is_phys = 1, |
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3ea277..a311ffcaad16 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c | |||
| @@ -1,20 +1,13 @@ | |||
| 1 | #include <linux/platform_device.h> | 1 | #include <linux/platform_device.h> |
| 2 | #include <linux/errno.h> | 2 | #include <linux/err.h> |
| 3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
| 4 | 4 | ||
| 5 | static __init int add_pcspkr(void) | 5 | static __init int add_pcspkr(void) |
| 6 | { | 6 | { |
| 7 | struct platform_device *pd; | 7 | struct platform_device *pd; |
| 8 | int ret; | ||
| 9 | 8 | ||
| 10 | pd = platform_device_alloc("pcspkr", -1); | 9 | pd = platform_device_register_simple("pcspkr", -1, NULL, 0); |
| 11 | if (!pd) | ||
| 12 | return -ENOMEM; | ||
| 13 | 10 | ||
| 14 | ret = platform_device_add(pd); | 11 | return IS_ERR(pd) ? PTR_ERR(pd) : 0; |
| 15 | if (ret) | ||
| 16 | platform_device_put(pd); | ||
| 17 | |||
| 18 | return ret; | ||
| 19 | } | 12 | } |
| 20 | device_initcall(add_pcspkr); | 13 | device_initcall(add_pcspkr); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a0..c622772744d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -15,7 +15,6 @@ unsigned long idle_nomwait; | |||
| 15 | EXPORT_SYMBOL(idle_nomwait); | 15 | EXPORT_SYMBOL(idle_nomwait); |
| 16 | 16 | ||
| 17 | struct kmem_cache *task_xstate_cachep; | 17 | struct kmem_cache *task_xstate_cachep; |
| 18 | static int force_mwait __cpuinitdata; | ||
| 19 | 18 | ||
| 20 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 19 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
| 21 | { | 20 | { |
| @@ -185,7 +184,8 @@ static void mwait_idle(void) | |||
| 185 | static void poll_idle(void) | 184 | static void poll_idle(void) |
| 186 | { | 185 | { |
| 187 | local_irq_enable(); | 186 | local_irq_enable(); |
| 188 | cpu_relax(); | 187 | while (!need_resched()) |
| 188 | cpu_relax(); | ||
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | /* | 191 | /* |
| @@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 246 | return 1; | 246 | return 1; |
| 247 | } | 247 | } |
| 248 | 248 | ||
| 249 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
| 250 | static int c1e_detected; | ||
| 251 | |||
| 252 | void c1e_remove_cpu(int cpu) | ||
| 253 | { | ||
| 254 | cpu_clear(cpu, c1e_mask); | ||
| 255 | } | ||
| 256 | |||
| 249 | /* | 257 | /* |
| 250 | * C1E aware idle routine. We check for C1E active in the interrupt | 258 | * C1E aware idle routine. We check for C1E active in the interrupt |
| 251 | * pending message MSR. If we detect C1E, then we handle it the same | 259 | * pending message MSR. If we detect C1E, then we handle it the same |
| @@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 253 | */ | 261 | */ |
| 254 | static void c1e_idle(void) | 262 | static void c1e_idle(void) |
| 255 | { | 263 | { |
| 256 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
| 257 | static int c1e_detected; | ||
| 258 | |||
| 259 | if (need_resched()) | 264 | if (need_resched()) |
| 260 | return; | 265 | return; |
| 261 | 266 | ||
| @@ -265,8 +270,10 @@ static void c1e_idle(void) | |||
| 265 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
| 266 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
| 267 | c1e_detected = 1; | 272 | c1e_detected = 1; |
| 268 | mark_tsc_unstable("TSC halt in C1E"); | 273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
| 269 | printk(KERN_INFO "System has C1E enabled\n"); | 274 | mark_tsc_unstable("TSC halt in AMD C1E"); |
| 275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | ||
| 276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | ||
| 270 | } | 277 | } |
| 271 | } | 278 | } |
| 272 | 279 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2c9abc95e026..205188db9626 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/tick.h> | 37 | #include <linux/tick.h> |
| 38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
| 39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
| 40 | #include <linux/dmi.h> | ||
| 40 | 41 | ||
| 41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
| 42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
| @@ -55,6 +56,7 @@ | |||
| 55 | #include <asm/tlbflush.h> | 56 | #include <asm/tlbflush.h> |
| 56 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
| 57 | #include <asm/kdebug.h> | 58 | #include <asm/kdebug.h> |
| 59 | #include <asm/idle.h> | ||
| 58 | #include <asm/syscalls.h> | 60 | #include <asm/syscalls.h> |
| 59 | #include <asm/smp.h> | 61 | #include <asm/smp.h> |
| 60 | 62 | ||
| @@ -90,6 +92,7 @@ static void cpu_exit_clear(void) | |||
| 90 | cpu_clear(cpu, cpu_callin_map); | 92 | cpu_clear(cpu, cpu_callin_map); |
| 91 | 93 | ||
| 92 | numa_remove_cpu(cpu); | 94 | numa_remove_cpu(cpu); |
| 95 | c1e_remove_cpu(cpu); | ||
| 93 | } | 96 | } |
| 94 | 97 | ||
| 95 | /* We don't actually take CPU down, just spin without interrupts. */ | 98 | /* We don't actually take CPU down, just spin without interrupts. */ |
| @@ -161,6 +164,7 @@ void __show_registers(struct pt_regs *regs, int all) | |||
| 161 | unsigned long d0, d1, d2, d3, d6, d7; | 164 | unsigned long d0, d1, d2, d3, d6, d7; |
| 162 | unsigned long sp; | 165 | unsigned long sp; |
| 163 | unsigned short ss, gs; | 166 | unsigned short ss, gs; |
| 167 | const char *board; | ||
| 164 | 168 | ||
| 165 | if (user_mode_vm(regs)) { | 169 | if (user_mode_vm(regs)) { |
| 166 | sp = regs->sp; | 170 | sp = regs->sp; |
| @@ -173,11 +177,15 @@ void __show_registers(struct pt_regs *regs, int all) | |||
| 173 | } | 177 | } |
| 174 | 178 | ||
| 175 | printk("\n"); | 179 | printk("\n"); |
| 176 | printk("Pid: %d, comm: %s %s (%s %.*s)\n", | 180 | |
| 181 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
| 182 | if (!board) | ||
| 183 | board = ""; | ||
| 184 | printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", | ||
| 177 | task_pid_nr(current), current->comm, | 185 | task_pid_nr(current), current->comm, |
| 178 | print_tainted(), init_utsname()->release, | 186 | print_tainted(), init_utsname()->release, |
| 179 | (int)strcspn(init_utsname()->version, " "), | 187 | (int)strcspn(init_utsname()->version, " "), |
| 180 | init_utsname()->version); | 188 | init_utsname()->version, board); |
| 181 | 189 | ||
| 182 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | 190 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
| 183 | (u16)regs->cs, regs->ip, regs->flags, | 191 | (u16)regs->cs, regs->ip, regs->flags, |
| @@ -277,6 +285,14 @@ void exit_thread(void) | |||
| 277 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 285 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
| 278 | put_cpu(); | 286 | put_cpu(); |
| 279 | } | 287 | } |
| 288 | #ifdef CONFIG_X86_DS | ||
| 289 | /* Free any DS contexts that have not been properly released. */ | ||
| 290 | if (unlikely(current->thread.ds_ctx)) { | ||
| 291 | /* we clear debugctl to make sure DS is not used. */ | ||
| 292 | update_debugctlmsr(0); | ||
| 293 | ds_free(current->thread.ds_ctx); | ||
| 294 | } | ||
| 295 | #endif /* CONFIG_X86_DS */ | ||
| 280 | } | 296 | } |
| 281 | 297 | ||
| 282 | void flush_thread(void) | 298 | void flush_thread(void) |
| @@ -438,6 +454,35 @@ int set_tsc_mode(unsigned int val) | |||
| 438 | return 0; | 454 | return 0; |
| 439 | } | 455 | } |
| 440 | 456 | ||
| 457 | #ifdef CONFIG_X86_DS | ||
| 458 | static int update_debugctl(struct thread_struct *prev, | ||
| 459 | struct thread_struct *next, unsigned long debugctl) | ||
| 460 | { | ||
| 461 | unsigned long ds_prev = 0; | ||
| 462 | unsigned long ds_next = 0; | ||
| 463 | |||
| 464 | if (prev->ds_ctx) | ||
| 465 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 466 | if (next->ds_ctx) | ||
| 467 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 468 | |||
| 469 | if (ds_next != ds_prev) { | ||
| 470 | /* we clear debugctl to make sure DS | ||
| 471 | * is not in use when we change it */ | ||
| 472 | debugctl = 0; | ||
| 473 | update_debugctlmsr(0); | ||
| 474 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
| 475 | } | ||
| 476 | return debugctl; | ||
| 477 | } | ||
| 478 | #else | ||
| 479 | static int update_debugctl(struct thread_struct *prev, | ||
| 480 | struct thread_struct *next, unsigned long debugctl) | ||
| 481 | { | ||
| 482 | return debugctl; | ||
| 483 | } | ||
| 484 | #endif /* CONFIG_X86_DS */ | ||
| 485 | |||
| 441 | static noinline void | 486 | static noinline void |
| 442 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 487 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
| 443 | struct tss_struct *tss) | 488 | struct tss_struct *tss) |
| @@ -448,14 +493,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 448 | prev = &prev_p->thread; | 493 | prev = &prev_p->thread; |
| 449 | next = &next_p->thread; | 494 | next = &next_p->thread; |
| 450 | 495 | ||
| 451 | debugctl = prev->debugctlmsr; | 496 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); |
| 452 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
| 453 | /* we clear debugctl to make sure DS | ||
| 454 | * is not in use when we change it */ | ||
| 455 | debugctl = 0; | ||
| 456 | update_debugctlmsr(0); | ||
| 457 | wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | ||
| 458 | } | ||
| 459 | 497 | ||
| 460 | if (next->debugctlmsr != debugctl) | 498 | if (next->debugctlmsr != debugctl) |
| 461 | update_debugctlmsr(next->debugctlmsr); | 499 | update_debugctlmsr(next->debugctlmsr); |
| @@ -479,13 +517,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 479 | hard_enable_TSC(); | 517 | hard_enable_TSC(); |
| 480 | } | 518 | } |
| 481 | 519 | ||
| 482 | #ifdef X86_BTS | 520 | #ifdef CONFIG_X86_PTRACE_BTS |
| 483 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 521 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
| 484 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 522 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
| 485 | 523 | ||
| 486 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 524 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
| 487 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 525 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
| 488 | #endif | 526 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 489 | 527 | ||
| 490 | 528 | ||
| 491 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 529 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 00263c9e6500..2a8ccb9238b4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -37,11 +37,11 @@ | |||
| 37 | #include <linux/kdebug.h> | 37 | #include <linux/kdebug.h> |
| 38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
| 39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
| 40 | #include <linux/uaccess.h> | ||
| 41 | #include <linux/io.h> | ||
| 40 | 42 | ||
| 41 | #include <asm/uaccess.h> | ||
| 42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
| 43 | #include <asm/system.h> | 44 | #include <asm/system.h> |
| 44 | #include <asm/io.h> | ||
| 45 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
| 46 | #include <asm/i387.h> | 46 | #include <asm/i387.h> |
| 47 | #include <asm/mmu_context.h> | 47 | #include <asm/mmu_context.h> |
| @@ -89,11 +89,13 @@ void exit_idle(void) | |||
| 89 | #ifdef CONFIG_HOTPLUG_CPU | 89 | #ifdef CONFIG_HOTPLUG_CPU |
| 90 | DECLARE_PER_CPU(int, cpu_state); | 90 | DECLARE_PER_CPU(int, cpu_state); |
| 91 | 91 | ||
| 92 | #include <asm/nmi.h> | 92 | #include <linux/nmi.h> |
| 93 | /* We halt the CPU with physical CPU hotplug */ | 93 | /* We halt the CPU with physical CPU hotplug */ |
| 94 | static inline void play_dead(void) | 94 | static inline void play_dead(void) |
| 95 | { | 95 | { |
| 96 | idle_task_exit(); | 96 | idle_task_exit(); |
| 97 | c1e_remove_cpu(raw_smp_processor_id()); | ||
| 98 | |||
| 97 | mb(); | 99 | mb(); |
| 98 | /* Ack it */ | 100 | /* Ack it */ |
| 99 | __get_cpu_var(cpu_state) = CPU_DEAD; | 101 | __get_cpu_var(cpu_state) = CPU_DEAD; |
| @@ -152,7 +154,7 @@ void cpu_idle(void) | |||
| 152 | } | 154 | } |
| 153 | 155 | ||
| 154 | /* Prints also some state that isn't saved in the pt_regs */ | 156 | /* Prints also some state that isn't saved in the pt_regs */ |
| 155 | void __show_regs(struct pt_regs * regs) | 157 | void __show_regs(struct pt_regs *regs) |
| 156 | { | 158 | { |
| 157 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 159 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
| 158 | unsigned long d0, d1, d2, d3, d6, d7; | 160 | unsigned long d0, d1, d2, d3, d6, d7; |
| @@ -161,59 +163,61 @@ void __show_regs(struct pt_regs * regs) | |||
| 161 | 163 | ||
| 162 | printk("\n"); | 164 | printk("\n"); |
| 163 | print_modules(); | 165 | print_modules(); |
| 164 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 166 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", |
| 165 | current->pid, current->comm, print_tainted(), | 167 | current->pid, current->comm, print_tainted(), |
| 166 | init_utsname()->release, | 168 | init_utsname()->release, |
| 167 | (int)strcspn(init_utsname()->version, " "), | 169 | (int)strcspn(init_utsname()->version, " "), |
| 168 | init_utsname()->version); | 170 | init_utsname()->version); |
| 169 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | 171 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
| 170 | printk_address(regs->ip, 1); | 172 | printk_address(regs->ip, 1); |
| 171 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, | 173 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
| 172 | regs->flags); | 174 | regs->sp, regs->flags); |
| 173 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 175 | printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
| 174 | regs->ax, regs->bx, regs->cx); | 176 | regs->ax, regs->bx, regs->cx); |
| 175 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 177 | printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
| 176 | regs->dx, regs->si, regs->di); | 178 | regs->dx, regs->si, regs->di); |
| 177 | printk("RBP: %016lx R08: %016lx R09: %016lx\n", | 179 | printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", |
| 178 | regs->bp, regs->r8, regs->r9); | 180 | regs->bp, regs->r8, regs->r9); |
| 179 | printk("R10: %016lx R11: %016lx R12: %016lx\n", | 181 | printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", |
| 180 | regs->r10, regs->r11, regs->r12); | 182 | regs->r10, regs->r11, regs->r12); |
| 181 | printk("R13: %016lx R14: %016lx R15: %016lx\n", | 183 | printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", |
| 182 | regs->r13, regs->r14, regs->r15); | 184 | regs->r13, regs->r14, regs->r15); |
| 183 | 185 | ||
| 184 | asm("movl %%ds,%0" : "=r" (ds)); | 186 | asm("movl %%ds,%0" : "=r" (ds)); |
| 185 | asm("movl %%cs,%0" : "=r" (cs)); | 187 | asm("movl %%cs,%0" : "=r" (cs)); |
| 186 | asm("movl %%es,%0" : "=r" (es)); | 188 | asm("movl %%es,%0" : "=r" (es)); |
| 187 | asm("movl %%fs,%0" : "=r" (fsindex)); | 189 | asm("movl %%fs,%0" : "=r" (fsindex)); |
| 188 | asm("movl %%gs,%0" : "=r" (gsindex)); | 190 | asm("movl %%gs,%0" : "=r" (gsindex)); |
| 189 | 191 | ||
| 190 | rdmsrl(MSR_FS_BASE, fs); | 192 | rdmsrl(MSR_FS_BASE, fs); |
| 191 | rdmsrl(MSR_GS_BASE, gs); | 193 | rdmsrl(MSR_GS_BASE, gs); |
| 192 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); | 194 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
| 193 | 195 | ||
| 194 | cr0 = read_cr0(); | 196 | cr0 = read_cr0(); |
| 195 | cr2 = read_cr2(); | 197 | cr2 = read_cr2(); |
| 196 | cr3 = read_cr3(); | 198 | cr3 = read_cr3(); |
| 197 | cr4 = read_cr4(); | 199 | cr4 = read_cr4(); |
| 198 | 200 | ||
| 199 | printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 201 | printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
| 200 | fs,fsindex,gs,gsindex,shadowgs); | 202 | fs, fsindex, gs, gsindex, shadowgs); |
| 201 | printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); | 203 | printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
| 202 | printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); | 204 | es, cr0); |
| 205 | printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | ||
| 206 | cr4); | ||
| 203 | 207 | ||
| 204 | get_debugreg(d0, 0); | 208 | get_debugreg(d0, 0); |
| 205 | get_debugreg(d1, 1); | 209 | get_debugreg(d1, 1); |
| 206 | get_debugreg(d2, 2); | 210 | get_debugreg(d2, 2); |
| 207 | printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); | 211 | printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); |
| 208 | get_debugreg(d3, 3); | 212 | get_debugreg(d3, 3); |
| 209 | get_debugreg(d6, 6); | 213 | get_debugreg(d6, 6); |
| 210 | get_debugreg(d7, 7); | 214 | get_debugreg(d7, 7); |
| 211 | printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); | 215 | printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); |
| 212 | } | 216 | } |
| 213 | 217 | ||
| 214 | void show_regs(struct pt_regs *regs) | 218 | void show_regs(struct pt_regs *regs) |
| 215 | { | 219 | { |
| 216 | printk("CPU %d:", smp_processor_id()); | 220 | printk(KERN_INFO "CPU %d:", smp_processor_id()); |
| 217 | __show_regs(regs); | 221 | __show_regs(regs); |
| 218 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 222 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
| 219 | } | 223 | } |
| @@ -239,6 +243,14 @@ void exit_thread(void) | |||
| 239 | t->io_bitmap_max = 0; | 243 | t->io_bitmap_max = 0; |
| 240 | put_cpu(); | 244 | put_cpu(); |
| 241 | } | 245 | } |
| 246 | #ifdef CONFIG_X86_DS | ||
| 247 | /* Free any DS contexts that have not been properly released. */ | ||
| 248 | if (unlikely(t->ds_ctx)) { | ||
| 249 | /* we clear debugctl to make sure DS is not used. */ | ||
| 250 | update_debugctlmsr(0); | ||
| 251 | ds_free(t->ds_ctx); | ||
| 252 | } | ||
| 253 | #endif /* CONFIG_X86_DS */ | ||
| 242 | } | 254 | } |
| 243 | 255 | ||
| 244 | void flush_thread(void) | 256 | void flush_thread(void) |
| @@ -314,10 +326,10 @@ void prepare_to_copy(struct task_struct *tsk) | |||
| 314 | 326 | ||
| 315 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | 327 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, |
| 316 | unsigned long unused, | 328 | unsigned long unused, |
| 317 | struct task_struct * p, struct pt_regs * regs) | 329 | struct task_struct *p, struct pt_regs *regs) |
| 318 | { | 330 | { |
| 319 | int err; | 331 | int err; |
| 320 | struct pt_regs * childregs; | 332 | struct pt_regs *childregs; |
| 321 | struct task_struct *me = current; | 333 | struct task_struct *me = current; |
| 322 | 334 | ||
| 323 | childregs = ((struct pt_regs *) | 335 | childregs = ((struct pt_regs *) |
| @@ -362,10 +374,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
| 362 | if (test_thread_flag(TIF_IA32)) | 374 | if (test_thread_flag(TIF_IA32)) |
| 363 | err = do_set_thread_area(p, -1, | 375 | err = do_set_thread_area(p, -1, |
| 364 | (struct user_desc __user *)childregs->si, 0); | 376 | (struct user_desc __user *)childregs->si, 0); |
| 365 | else | 377 | else |
| 366 | #endif | 378 | #endif |
| 367 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); | 379 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); |
| 368 | if (err) | 380 | if (err) |
| 369 | goto out; | 381 | goto out; |
| 370 | } | 382 | } |
| 371 | err = 0; | 383 | err = 0; |
| @@ -472,13 +484,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 472 | next = &next_p->thread; | 484 | next = &next_p->thread; |
| 473 | 485 | ||
| 474 | debugctl = prev->debugctlmsr; | 486 | debugctl = prev->debugctlmsr; |
| 475 | if (next->ds_area_msr != prev->ds_area_msr) { | 487 | |
| 476 | /* we clear debugctl to make sure DS | 488 | #ifdef CONFIG_X86_DS |
| 477 | * is not in use when we change it */ | 489 | { |
| 478 | debugctl = 0; | 490 | unsigned long ds_prev = 0, ds_next = 0; |
| 479 | update_debugctlmsr(0); | 491 | |
| 480 | wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | 492 | if (prev->ds_ctx) |
| 493 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 494 | if (next->ds_ctx) | ||
| 495 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 496 | |||
| 497 | if (ds_next != ds_prev) { | ||
| 498 | /* | ||
| 499 | * We clear debugctl to make sure DS | ||
| 500 | * is not in use when we change it: | ||
| 501 | */ | ||
| 502 | debugctl = 0; | ||
| 503 | update_debugctlmsr(0); | ||
| 504 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
| 505 | } | ||
| 481 | } | 506 | } |
| 507 | #endif /* CONFIG_X86_DS */ | ||
| 482 | 508 | ||
| 483 | if (next->debugctlmsr != debugctl) | 509 | if (next->debugctlmsr != debugctl) |
| 484 | update_debugctlmsr(next->debugctlmsr); | 510 | update_debugctlmsr(next->debugctlmsr); |
| @@ -516,13 +542,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 516 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 542 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
| 517 | } | 543 | } |
| 518 | 544 | ||
| 519 | #ifdef X86_BTS | 545 | #ifdef CONFIG_X86_PTRACE_BTS |
| 520 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 546 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
| 521 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 547 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
| 522 | 548 | ||
| 523 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 549 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
| 524 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 550 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
| 525 | #endif | 551 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 526 | } | 552 | } |
| 527 | 553 | ||
| 528 | /* | 554 | /* |
| @@ -544,7 +570,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 544 | unsigned fsindex, gsindex; | 570 | unsigned fsindex, gsindex; |
| 545 | 571 | ||
| 546 | /* we're going to use this soon, after a few expensive things */ | 572 | /* we're going to use this soon, after a few expensive things */ |
| 547 | if (next_p->fpu_counter>5) | 573 | if (next_p->fpu_counter > 5) |
| 548 | prefetch(next->xstate); | 574 | prefetch(next->xstate); |
| 549 | 575 | ||
| 550 | /* | 576 | /* |
| @@ -552,13 +578,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 552 | */ | 578 | */ |
| 553 | load_sp0(tss, next); | 579 | load_sp0(tss, next); |
| 554 | 580 | ||
| 555 | /* | 581 | /* |
| 556 | * Switch DS and ES. | 582 | * Switch DS and ES. |
| 557 | * This won't pick up thread selector changes, but I guess that is ok. | 583 | * This won't pick up thread selector changes, but I guess that is ok. |
| 558 | */ | 584 | */ |
| 559 | savesegment(es, prev->es); | 585 | savesegment(es, prev->es); |
| 560 | if (unlikely(next->es | prev->es)) | 586 | if (unlikely(next->es | prev->es)) |
| 561 | loadsegment(es, next->es); | 587 | loadsegment(es, next->es); |
| 562 | 588 | ||
| 563 | savesegment(ds, prev->ds); | 589 | savesegment(ds, prev->ds); |
| 564 | if (unlikely(next->ds | prev->ds)) | 590 | if (unlikely(next->ds | prev->ds)) |
| @@ -584,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 584 | */ | 610 | */ |
| 585 | arch_leave_lazy_cpu_mode(); | 611 | arch_leave_lazy_cpu_mode(); |
| 586 | 612 | ||
| 587 | /* | 613 | /* |
| 588 | * Switch FS and GS. | 614 | * Switch FS and GS. |
| 589 | * | 615 | * |
| 590 | * Segment register != 0 always requires a reload. Also | 616 | * Segment register != 0 always requires a reload. Also |
| @@ -593,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 593 | */ | 619 | */ |
| 594 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 620 | if (unlikely(fsindex | next->fsindex | prev->fs)) { |
| 595 | loadsegment(fs, next->fsindex); | 621 | loadsegment(fs, next->fsindex); |
| 596 | /* | 622 | /* |
| 597 | * Check if the user used a selector != 0; if yes | 623 | * Check if the user used a selector != 0; if yes |
| 598 | * clear 64bit base, since overloaded base is always | 624 | * clear 64bit base, since overloaded base is always |
| 599 | * mapped to the Null selector | 625 | * mapped to the Null selector |
| 600 | */ | 626 | */ |
| 601 | if (fsindex) | 627 | if (fsindex) |
| 602 | prev->fs = 0; | 628 | prev->fs = 0; |
| 603 | } | 629 | } |
| 604 | /* when next process has a 64bit base use it */ | 630 | /* when next process has a 64bit base use it */ |
| 605 | if (next->fs) | 631 | if (next->fs) |
| @@ -609,7 +635,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 609 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | 635 | if (unlikely(gsindex | next->gsindex | prev->gs)) { |
| 610 | load_gs_index(next->gsindex); | 636 | load_gs_index(next->gsindex); |
| 611 | if (gsindex) | 637 | if (gsindex) |
| 612 | prev->gs = 0; | 638 | prev->gs = 0; |
| 613 | } | 639 | } |
| 614 | if (next->gs) | 640 | if (next->gs) |
| 615 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 641 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
| @@ -618,12 +644,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 618 | /* Must be after DS reload */ | 644 | /* Must be after DS reload */ |
| 619 | unlazy_fpu(prev_p); | 645 | unlazy_fpu(prev_p); |
| 620 | 646 | ||
| 621 | /* | 647 | /* |
| 622 | * Switch the PDA and FPU contexts. | 648 | * Switch the PDA and FPU contexts. |
| 623 | */ | 649 | */ |
| 624 | prev->usersp = read_pda(oldrsp); | 650 | prev->usersp = read_pda(oldrsp); |
| 625 | write_pda(oldrsp, next->usersp); | 651 | write_pda(oldrsp, next->usersp); |
| 626 | write_pda(pcurrent, next_p); | 652 | write_pda(pcurrent, next_p); |
| 627 | 653 | ||
| 628 | write_pda(kernelstack, | 654 | write_pda(kernelstack, |
| 629 | (unsigned long)task_stack_page(next_p) + | 655 | (unsigned long)task_stack_page(next_p) + |
| @@ -664,7 +690,7 @@ long sys_execve(char __user *name, char __user * __user *argv, | |||
| 664 | char __user * __user *envp, struct pt_regs *regs) | 690 | char __user * __user *envp, struct pt_regs *regs) |
| 665 | { | 691 | { |
| 666 | long error; | 692 | long error; |
| 667 | char * filename; | 693 | char *filename; |
| 668 | 694 | ||
| 669 | filename = getname(name); | 695 | filename = getname(name); |
| 670 | error = PTR_ERR(filename); | 696 | error = PTR_ERR(filename); |
| @@ -722,55 +748,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) | |||
| 722 | unsigned long get_wchan(struct task_struct *p) | 748 | unsigned long get_wchan(struct task_struct *p) |
| 723 | { | 749 | { |
| 724 | unsigned long stack; | 750 | unsigned long stack; |
| 725 | u64 fp,ip; | 751 | u64 fp, ip; |
| 726 | int count = 0; | 752 | int count = 0; |
| 727 | 753 | ||
| 728 | if (!p || p == current || p->state==TASK_RUNNING) | 754 | if (!p || p == current || p->state == TASK_RUNNING) |
| 729 | return 0; | 755 | return 0; |
| 730 | stack = (unsigned long)task_stack_page(p); | 756 | stack = (unsigned long)task_stack_page(p); |
| 731 | if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) | 757 | if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) |
| 732 | return 0; | 758 | return 0; |
| 733 | fp = *(u64 *)(p->thread.sp); | 759 | fp = *(u64 *)(p->thread.sp); |
| 734 | do { | 760 | do { |
| 735 | if (fp < (unsigned long)stack || | 761 | if (fp < (unsigned long)stack || |
| 736 | fp > (unsigned long)stack+THREAD_SIZE) | 762 | fp > (unsigned long)stack+THREAD_SIZE) |
| 737 | return 0; | 763 | return 0; |
| 738 | ip = *(u64 *)(fp+8); | 764 | ip = *(u64 *)(fp+8); |
| 739 | if (!in_sched_functions(ip)) | 765 | if (!in_sched_functions(ip)) |
| 740 | return ip; | 766 | return ip; |
| 741 | fp = *(u64 *)fp; | 767 | fp = *(u64 *)fp; |
| 742 | } while (count++ < 16); | 768 | } while (count++ < 16); |
| 743 | return 0; | 769 | return 0; |
| 744 | } | 770 | } |
| 745 | 771 | ||
| 746 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | 772 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) |
| 747 | { | 773 | { |
| 748 | int ret = 0; | 774 | int ret = 0; |
| 749 | int doit = task == current; | 775 | int doit = task == current; |
| 750 | int cpu; | 776 | int cpu; |
| 751 | 777 | ||
| 752 | switch (code) { | 778 | switch (code) { |
| 753 | case ARCH_SET_GS: | 779 | case ARCH_SET_GS: |
| 754 | if (addr >= TASK_SIZE_OF(task)) | 780 | if (addr >= TASK_SIZE_OF(task)) |
| 755 | return -EPERM; | 781 | return -EPERM; |
| 756 | cpu = get_cpu(); | 782 | cpu = get_cpu(); |
| 757 | /* handle small bases via the GDT because that's faster to | 783 | /* handle small bases via the GDT because that's faster to |
| 758 | switch. */ | 784 | switch. */ |
| 759 | if (addr <= 0xffffffff) { | 785 | if (addr <= 0xffffffff) { |
| 760 | set_32bit_tls(task, GS_TLS, addr); | 786 | set_32bit_tls(task, GS_TLS, addr); |
| 761 | if (doit) { | 787 | if (doit) { |
| 762 | load_TLS(&task->thread, cpu); | 788 | load_TLS(&task->thread, cpu); |
| 763 | load_gs_index(GS_TLS_SEL); | 789 | load_gs_index(GS_TLS_SEL); |
| 764 | } | 790 | } |
| 765 | task->thread.gsindex = GS_TLS_SEL; | 791 | task->thread.gsindex = GS_TLS_SEL; |
| 766 | task->thread.gs = 0; | 792 | task->thread.gs = 0; |
| 767 | } else { | 793 | } else { |
| 768 | task->thread.gsindex = 0; | 794 | task->thread.gsindex = 0; |
| 769 | task->thread.gs = addr; | 795 | task->thread.gs = addr; |
| 770 | if (doit) { | 796 | if (doit) { |
| 771 | load_gs_index(0); | 797 | load_gs_index(0); |
| 772 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 798 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); |
| 773 | } | 799 | } |
| 774 | } | 800 | } |
| 775 | put_cpu(); | 801 | put_cpu(); |
| 776 | break; | 802 | break; |
| @@ -824,8 +850,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
| 824 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 850 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
| 825 | else | 851 | else |
| 826 | base = task->thread.gs; | 852 | base = task->thread.gs; |
| 827 | } | 853 | } else |
| 828 | else | ||
| 829 | base = task->thread.gs; | 854 | base = task->thread.gs; |
| 830 | ret = put_user(base, (unsigned long __user *)addr); | 855 | ret = put_user(base, (unsigned long __user *)addr); |
| 831 | break; | 856 | break; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fc3e8dcd9da6..e375b658efc3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
| 15 | #include <linux/ptrace.h> | 15 | #include <linux/ptrace.h> |
| 16 | #include <linux/regset.h> | 16 | #include <linux/regset.h> |
| 17 | #include <linux/tracehook.h> | ||
| 17 | #include <linux/user.h> | 18 | #include <linux/user.h> |
| 18 | #include <linux/elf.h> | 19 | #include <linux/elf.h> |
| 19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
| @@ -554,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child, | |||
| 554 | return 0; | 555 | return 0; |
| 555 | } | 556 | } |
| 556 | 557 | ||
| 557 | #ifdef X86_BTS | 558 | #ifdef CONFIG_X86_PTRACE_BTS |
| 559 | /* | ||
| 560 | * The configuration for a particular BTS hardware implementation. | ||
| 561 | */ | ||
| 562 | struct bts_configuration { | ||
| 563 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
| 564 | unsigned char sizeof_bts; | ||
| 565 | /* the size of a field in the BTS record in bytes */ | ||
| 566 | unsigned char sizeof_field; | ||
| 567 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
| 568 | unsigned long debugctl_mask; | ||
| 569 | }; | ||
| 570 | static struct bts_configuration bts_cfg; | ||
| 571 | |||
| 572 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
| 573 | |||
| 574 | |||
| 575 | /* | ||
| 576 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 577 | * architectures vary in the size of those fields. | ||
| 578 | * - source linear address | ||
| 579 | * - destination linear address | ||
| 580 | * - flags | ||
| 581 | * | ||
| 582 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 583 | * architectures use 32bit pointers in 32bit mode. | ||
| 584 | * | ||
| 585 | * We compute the base address for the first 8 fields based on: | ||
| 586 | * - the field size stored in the DS configuration | ||
| 587 | * - the relative field position | ||
| 588 | * | ||
| 589 | * In order to store additional information in the BTS buffer, we use | ||
| 590 | * a special source address to indicate that the record requires | ||
| 591 | * special interpretation. | ||
| 592 | * | ||
| 593 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 594 | * was predicted; this is ignored. | ||
| 595 | */ | ||
| 596 | |||
| 597 | enum bts_field { | ||
| 598 | bts_from = 0, | ||
| 599 | bts_to, | ||
| 600 | bts_flags, | ||
| 601 | |||
| 602 | bts_escape = (unsigned long)-1, | ||
| 603 | bts_qual = bts_to, | ||
| 604 | bts_jiffies = bts_flags | ||
| 605 | }; | ||
| 606 | |||
| 607 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
| 608 | { | ||
| 609 | base += (bts_cfg.sizeof_field * field); | ||
| 610 | return *(unsigned long *)base; | ||
| 611 | } | ||
| 558 | 612 | ||
| 559 | static int ptrace_bts_get_size(struct task_struct *child) | 613 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
| 560 | { | 614 | { |
| 561 | if (!child->thread.ds_area_msr) | 615 | base += (bts_cfg.sizeof_field * field);; |
| 562 | return -ENXIO; | 616 | (*(unsigned long *)base) = val; |
| 617 | } | ||
| 563 | 618 | ||
| 564 | return ds_get_bts_index((void *)child->thread.ds_area_msr); | 619 | /* |
| 620 | * Translate a BTS record from the raw format into the bts_struct format | ||
| 621 | * | ||
| 622 | * out (out): bts_struct interpretation | ||
| 623 | * raw: raw BTS record | ||
| 624 | */ | ||
| 625 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
| 626 | { | ||
| 627 | memset(out, 0, sizeof(*out)); | ||
| 628 | if (bts_get(raw, bts_from) == bts_escape) { | ||
| 629 | out->qualifier = bts_get(raw, bts_qual); | ||
| 630 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
| 631 | } else { | ||
| 632 | out->qualifier = BTS_BRANCH; | ||
| 633 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
| 634 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
| 635 | } | ||
| 565 | } | 636 | } |
| 566 | 637 | ||
| 567 | static int ptrace_bts_read_record(struct task_struct *child, | 638 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
| 568 | long index, | ||
| 569 | struct bts_struct __user *out) | 639 | struct bts_struct __user *out) |
| 570 | { | 640 | { |
| 571 | struct bts_struct ret; | 641 | struct bts_struct ret; |
| 572 | int retval; | 642 | const void *bts_record; |
| 573 | int bts_end; | 643 | size_t bts_index, bts_end; |
| 574 | int bts_index; | 644 | int error; |
| 575 | |||
| 576 | if (!child->thread.ds_area_msr) | ||
| 577 | return -ENXIO; | ||
| 578 | 645 | ||
| 579 | if (index < 0) | 646 | error = ds_get_bts_end(child, &bts_end); |
| 580 | return -EINVAL; | 647 | if (error < 0) |
| 648 | return error; | ||
| 581 | 649 | ||
| 582 | bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); | ||
| 583 | if (bts_end <= index) | 650 | if (bts_end <= index) |
| 584 | return -EINVAL; | 651 | return -EINVAL; |
| 585 | 652 | ||
| 653 | error = ds_get_bts_index(child, &bts_index); | ||
| 654 | if (error < 0) | ||
| 655 | return error; | ||
| 656 | |||
| 586 | /* translate the ptrace bts index into the ds bts index */ | 657 | /* translate the ptrace bts index into the ds bts index */ |
| 587 | bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); | 658 | bts_index += bts_end - (index + 1); |
| 588 | bts_index -= (index + 1); | 659 | if (bts_end <= bts_index) |
| 589 | if (bts_index < 0) | 660 | bts_index -= bts_end; |
| 590 | bts_index += bts_end; | ||
| 591 | 661 | ||
| 592 | retval = ds_read_bts((void *)child->thread.ds_area_msr, | 662 | error = ds_access_bts(child, bts_index, &bts_record); |
| 593 | bts_index, &ret); | 663 | if (error < 0) |
| 594 | if (retval < 0) | 664 | return error; |
| 595 | return retval; | 665 | |
| 666 | ptrace_bts_translate_record(&ret, bts_record); | ||
| 596 | 667 | ||
| 597 | if (copy_to_user(out, &ret, sizeof(ret))) | 668 | if (copy_to_user(out, &ret, sizeof(ret))) |
| 598 | return -EFAULT; | 669 | return -EFAULT; |
| @@ -600,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child, | |||
| 600 | return sizeof(ret); | 671 | return sizeof(ret); |
| 601 | } | 672 | } |
| 602 | 673 | ||
| 603 | static int ptrace_bts_clear(struct task_struct *child) | ||
| 604 | { | ||
| 605 | if (!child->thread.ds_area_msr) | ||
| 606 | return -ENXIO; | ||
| 607 | |||
| 608 | return ds_clear((void *)child->thread.ds_area_msr); | ||
| 609 | } | ||
| 610 | |||
| 611 | static int ptrace_bts_drain(struct task_struct *child, | 674 | static int ptrace_bts_drain(struct task_struct *child, |
| 612 | long size, | 675 | long size, |
| 613 | struct bts_struct __user *out) | 676 | struct bts_struct __user *out) |
| 614 | { | 677 | { |
| 615 | int end, i; | 678 | struct bts_struct ret; |
| 616 | void *ds = (void *)child->thread.ds_area_msr; | 679 | const unsigned char *raw; |
| 617 | 680 | size_t end, i; | |
| 618 | if (!ds) | 681 | int error; |
| 619 | return -ENXIO; | ||
| 620 | 682 | ||
| 621 | end = ds_get_bts_index(ds); | 683 | error = ds_get_bts_index(child, &end); |
| 622 | if (end <= 0) | 684 | if (error < 0) |
| 623 | return end; | 685 | return error; |
| 624 | 686 | ||
| 625 | if (size < (end * sizeof(struct bts_struct))) | 687 | if (size < (end * sizeof(struct bts_struct))) |
| 626 | return -EIO; | 688 | return -EIO; |
| 627 | 689 | ||
| 628 | for (i = 0; i < end; i++, out++) { | 690 | error = ds_access_bts(child, 0, (const void **)&raw); |
| 629 | struct bts_struct ret; | 691 | if (error < 0) |
| 630 | int retval; | 692 | return error; |
| 631 | 693 | ||
| 632 | retval = ds_read_bts(ds, i, &ret); | 694 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { |
| 633 | if (retval < 0) | 695 | ptrace_bts_translate_record(&ret, raw); |
| 634 | return retval; | ||
| 635 | 696 | ||
| 636 | if (copy_to_user(out, &ret, sizeof(ret))) | 697 | if (copy_to_user(out, &ret, sizeof(ret))) |
| 637 | return -EFAULT; | 698 | return -EFAULT; |
| 638 | } | 699 | } |
| 639 | 700 | ||
| 640 | ds_clear(ds); | 701 | error = ds_clear_bts(child); |
| 702 | if (error < 0) | ||
| 703 | return error; | ||
| 641 | 704 | ||
| 642 | return end; | 705 | return end; |
| 643 | } | 706 | } |
| 644 | 707 | ||
| 708 | static void ptrace_bts_ovfl(struct task_struct *child) | ||
| 709 | { | ||
| 710 | send_sig(child->thread.bts_ovfl_signal, child, 0); | ||
| 711 | } | ||
| 712 | |||
| 645 | static int ptrace_bts_config(struct task_struct *child, | 713 | static int ptrace_bts_config(struct task_struct *child, |
| 646 | long cfg_size, | 714 | long cfg_size, |
| 647 | const struct ptrace_bts_config __user *ucfg) | 715 | const struct ptrace_bts_config __user *ucfg) |
| 648 | { | 716 | { |
| 649 | struct ptrace_bts_config cfg; | 717 | struct ptrace_bts_config cfg; |
| 650 | int bts_size, ret = 0; | 718 | int error = 0; |
| 651 | void *ds; | 719 | |
| 720 | error = -EOPNOTSUPP; | ||
| 721 | if (!bts_cfg.sizeof_bts) | ||
| 722 | goto errout; | ||
| 652 | 723 | ||
| 724 | error = -EIO; | ||
| 653 | if (cfg_size < sizeof(cfg)) | 725 | if (cfg_size < sizeof(cfg)) |
| 654 | return -EIO; | 726 | goto errout; |
| 655 | 727 | ||
| 728 | error = -EFAULT; | ||
| 656 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 729 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
| 657 | return -EFAULT; | 730 | goto errout; |
| 658 | 731 | ||
| 659 | if ((int)cfg.size < 0) | 732 | error = -EINVAL; |
| 660 | return -EINVAL; | 733 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && |
| 734 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | ||
| 735 | goto errout; | ||
| 661 | 736 | ||
| 662 | bts_size = 0; | 737 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { |
| 663 | ds = (void *)child->thread.ds_area_msr; | 738 | ds_ovfl_callback_t ovfl = NULL; |
| 664 | if (ds) { | 739 | unsigned int sig = 0; |
| 665 | bts_size = ds_get_bts_size(ds); | 740 | |
| 666 | if (bts_size < 0) | 741 | /* we ignore the error in case we were not tracing child */ |
| 667 | return bts_size; | 742 | (void)ds_release_bts(child); |
| 668 | } | ||
| 669 | cfg.size = PAGE_ALIGN(cfg.size); | ||
| 670 | 743 | ||
| 671 | if (bts_size != cfg.size) { | 744 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
| 672 | ret = ptrace_bts_realloc(child, cfg.size, | 745 | if (!cfg.signal) |
| 673 | cfg.flags & PTRACE_BTS_O_CUT_SIZE); | 746 | goto errout; |
| 674 | if (ret < 0) | 747 | |
| 748 | sig = cfg.signal; | ||
| 749 | ovfl = ptrace_bts_ovfl; | ||
| 750 | } | ||
| 751 | |||
| 752 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | ||
| 753 | if (error < 0) | ||
| 675 | goto errout; | 754 | goto errout; |
| 676 | 755 | ||
| 677 | ds = (void *)child->thread.ds_area_msr; | 756 | child->thread.bts_ovfl_signal = sig; |
| 678 | } | 757 | } |
| 679 | 758 | ||
| 680 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) | 759 | error = -EINVAL; |
| 681 | ret = ds_set_overflow(ds, DS_O_SIGNAL); | 760 | if (!child->thread.ds_ctx && cfg.flags) |
| 682 | else | ||
| 683 | ret = ds_set_overflow(ds, DS_O_WRAP); | ||
| 684 | if (ret < 0) | ||
| 685 | goto errout; | 761 | goto errout; |
| 686 | 762 | ||
| 687 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 763 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
| 688 | child->thread.debugctlmsr |= ds_debugctl_mask(); | 764 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; |
| 689 | else | 765 | else |
| 690 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 766 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 691 | 767 | ||
| 692 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 768 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
| 693 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 769 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 694 | else | 770 | else |
| 695 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 771 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 696 | 772 | ||
| 697 | ret = sizeof(cfg); | 773 | error = sizeof(cfg); |
| 698 | 774 | ||
| 699 | out: | 775 | out: |
| 700 | if (child->thread.debugctlmsr) | 776 | if (child->thread.debugctlmsr) |
| @@ -702,10 +778,10 @@ out: | |||
| 702 | else | 778 | else |
| 703 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 779 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
| 704 | 780 | ||
| 705 | return ret; | 781 | return error; |
| 706 | 782 | ||
| 707 | errout: | 783 | errout: |
| 708 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 784 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 709 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 785 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 710 | goto out; | 786 | goto out; |
| 711 | } | 787 | } |
| @@ -714,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 714 | long cfg_size, | 790 | long cfg_size, |
| 715 | struct ptrace_bts_config __user *ucfg) | 791 | struct ptrace_bts_config __user *ucfg) |
| 716 | { | 792 | { |
| 717 | void *ds = (void *)child->thread.ds_area_msr; | ||
| 718 | struct ptrace_bts_config cfg; | 793 | struct ptrace_bts_config cfg; |
| 794 | size_t end; | ||
| 795 | const void *base, *max; | ||
| 796 | int error; | ||
| 719 | 797 | ||
| 720 | if (cfg_size < sizeof(cfg)) | 798 | if (cfg_size < sizeof(cfg)) |
| 721 | return -EIO; | 799 | return -EIO; |
| 722 | 800 | ||
| 723 | memset(&cfg, 0, sizeof(cfg)); | 801 | error = ds_get_bts_end(child, &end); |
| 802 | if (error < 0) | ||
| 803 | return error; | ||
| 724 | 804 | ||
| 725 | if (ds) { | 805 | error = ds_access_bts(child, /* index = */ 0, &base); |
| 726 | cfg.size = ds_get_bts_size(ds); | 806 | if (error < 0) |
| 807 | return error; | ||
| 727 | 808 | ||
| 728 | if (ds_get_overflow(ds) == DS_O_SIGNAL) | 809 | error = ds_access_bts(child, /* index = */ end, &max); |
| 729 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 810 | if (error < 0) |
| 811 | return error; | ||
| 730 | 812 | ||
| 731 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 813 | memset(&cfg, 0, sizeof(cfg)); |
| 732 | child->thread.debugctlmsr & ds_debugctl_mask()) | 814 | cfg.size = (max - base); |
| 733 | cfg.flags |= PTRACE_BTS_O_TRACE; | 815 | cfg.signal = child->thread.bts_ovfl_signal; |
| 816 | cfg.bts_size = sizeof(struct bts_struct); | ||
| 734 | 817 | ||
| 735 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 818 | if (cfg.signal) |
| 736 | cfg.flags |= PTRACE_BTS_O_SCHED; | 819 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
| 737 | } | ||
| 738 | 820 | ||
| 739 | cfg.bts_size = sizeof(struct bts_struct); | 821 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && |
| 822 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
| 823 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
| 824 | |||
| 825 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | ||
| 826 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
| 740 | 827 | ||
| 741 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 828 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
| 742 | return -EFAULT; | 829 | return -EFAULT; |
| @@ -744,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 744 | return sizeof(cfg); | 831 | return sizeof(cfg); |
| 745 | } | 832 | } |
| 746 | 833 | ||
| 747 | |||
| 748 | static int ptrace_bts_write_record(struct task_struct *child, | 834 | static int ptrace_bts_write_record(struct task_struct *child, |
| 749 | const struct bts_struct *in) | 835 | const struct bts_struct *in) |
| 750 | { | 836 | { |
| 751 | int retval; | 837 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; |
| 752 | 838 | ||
| 753 | if (!child->thread.ds_area_msr) | 839 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); |
| 754 | return -ENXIO; | ||
| 755 | 840 | ||
| 756 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | 841 | memset(bts_record, 0, bts_cfg.sizeof_bts); |
| 757 | if (retval) | 842 | switch (in->qualifier) { |
| 758 | return retval; | 843 | case BTS_INVALID: |
| 844 | break; | ||
| 759 | 845 | ||
| 760 | return sizeof(*in); | 846 | case BTS_BRANCH: |
| 761 | } | 847 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); |
| 848 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
| 849 | break; | ||
| 762 | 850 | ||
| 763 | static int ptrace_bts_realloc(struct task_struct *child, | 851 | case BTS_TASK_ARRIVES: |
| 764 | int size, int reduce_size) | 852 | case BTS_TASK_DEPARTS: |
| 765 | { | 853 | bts_set(bts_record, bts_from, bts_escape); |
| 766 | unsigned long rlim, vm; | 854 | bts_set(bts_record, bts_qual, in->qualifier); |
| 767 | int ret, old_size; | 855 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); |
| 856 | break; | ||
| 768 | 857 | ||
| 769 | if (size < 0) | 858 | default: |
| 770 | return -EINVAL; | 859 | return -EINVAL; |
| 771 | |||
| 772 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
| 773 | if (old_size < 0) | ||
| 774 | return old_size; | ||
| 775 | |||
| 776 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
| 777 | if (ret < 0) | ||
| 778 | goto out; | ||
| 779 | |||
| 780 | size >>= PAGE_SHIFT; | ||
| 781 | old_size >>= PAGE_SHIFT; | ||
| 782 | |||
| 783 | current->mm->total_vm -= old_size; | ||
| 784 | current->mm->locked_vm -= old_size; | ||
| 785 | |||
| 786 | if (size == 0) | ||
| 787 | goto out; | ||
| 788 | |||
| 789 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
| 790 | vm = current->mm->total_vm + size; | ||
| 791 | if (rlim < vm) { | ||
| 792 | ret = -ENOMEM; | ||
| 793 | |||
| 794 | if (!reduce_size) | ||
| 795 | goto out; | ||
| 796 | |||
| 797 | size = rlim - current->mm->total_vm; | ||
| 798 | if (size <= 0) | ||
| 799 | goto out; | ||
| 800 | } | ||
| 801 | |||
| 802 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
| 803 | vm = current->mm->locked_vm + size; | ||
| 804 | if (rlim < vm) { | ||
| 805 | ret = -ENOMEM; | ||
| 806 | |||
| 807 | if (!reduce_size) | ||
| 808 | goto out; | ||
| 809 | |||
| 810 | size = rlim - current->mm->locked_vm; | ||
| 811 | if (size <= 0) | ||
| 812 | goto out; | ||
| 813 | } | 860 | } |
| 814 | 861 | ||
| 815 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | 862 | /* The writing task will be the switched-to task on a context |
| 816 | size << PAGE_SHIFT); | 863 | * switch. It needs to write into the switched-from task's BTS |
| 817 | if (ret < 0) | 864 | * buffer. */ |
| 818 | goto out; | 865 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); |
| 819 | |||
| 820 | current->mm->total_vm += size; | ||
| 821 | current->mm->locked_vm += size; | ||
| 822 | |||
| 823 | out: | ||
| 824 | if (child->thread.ds_area_msr) | ||
| 825 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
| 826 | else | ||
| 827 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
| 828 | |||
| 829 | return ret; | ||
| 830 | } | 866 | } |
| 831 | 867 | ||
| 832 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 868 | void ptrace_bts_take_timestamp(struct task_struct *tsk, |
| @@ -839,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, | |||
| 839 | 875 | ||
| 840 | ptrace_bts_write_record(tsk, &rec); | 876 | ptrace_bts_write_record(tsk, &rec); |
| 841 | } | 877 | } |
| 842 | #endif /* X86_BTS */ | 878 | |
| 879 | static const struct bts_configuration bts_cfg_netburst = { | ||
| 880 | .sizeof_bts = sizeof(long) * 3, | ||
| 881 | .sizeof_field = sizeof(long), | ||
| 882 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | ||
| 883 | }; | ||
| 884 | |||
| 885 | static const struct bts_configuration bts_cfg_pentium_m = { | ||
| 886 | .sizeof_bts = sizeof(long) * 3, | ||
| 887 | .sizeof_field = sizeof(long), | ||
| 888 | .debugctl_mask = (1<<6)|(1<<7) | ||
| 889 | }; | ||
| 890 | |||
| 891 | static const struct bts_configuration bts_cfg_core2 = { | ||
| 892 | .sizeof_bts = 8 * 3, | ||
| 893 | .sizeof_field = 8, | ||
| 894 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
| 895 | }; | ||
| 896 | |||
| 897 | static inline void bts_configure(const struct bts_configuration *cfg) | ||
| 898 | { | ||
| 899 | bts_cfg = *cfg; | ||
| 900 | } | ||
| 901 | |||
| 902 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | ||
| 903 | { | ||
| 904 | switch (c->x86) { | ||
| 905 | case 0x6: | ||
| 906 | switch (c->x86_model) { | ||
| 907 | case 0xD: | ||
| 908 | case 0xE: /* Pentium M */ | ||
| 909 | bts_configure(&bts_cfg_pentium_m); | ||
| 910 | break; | ||
| 911 | case 0xF: /* Core2 */ | ||
| 912 | case 0x1C: /* Atom */ | ||
| 913 | bts_configure(&bts_cfg_core2); | ||
| 914 | break; | ||
| 915 | default: | ||
| 916 | /* sorry, don't know about them */ | ||
| 917 | break; | ||
| 918 | } | ||
| 919 | break; | ||
| 920 | case 0xF: | ||
| 921 | switch (c->x86_model) { | ||
| 922 | case 0x0: | ||
| 923 | case 0x1: | ||
| 924 | case 0x2: /* Netburst */ | ||
| 925 | bts_configure(&bts_cfg_netburst); | ||
| 926 | break; | ||
| 927 | default: | ||
| 928 | /* sorry, don't know about them */ | ||
| 929 | break; | ||
| 930 | } | ||
| 931 | break; | ||
| 932 | default: | ||
| 933 | /* sorry, don't know about them */ | ||
| 934 | break; | ||
| 935 | } | ||
| 936 | } | ||
| 937 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 843 | 938 | ||
| 844 | /* | 939 | /* |
| 845 | * Called by kernel/ptrace.c when detaching.. | 940 | * Called by kernel/ptrace.c when detaching.. |
| @@ -852,15 +947,15 @@ void ptrace_disable(struct task_struct *child) | |||
| 852 | #ifdef TIF_SYSCALL_EMU | 947 | #ifdef TIF_SYSCALL_EMU |
| 853 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 948 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
| 854 | #endif | 949 | #endif |
| 855 | if (child->thread.ds_area_msr) { | 950 | #ifdef CONFIG_X86_PTRACE_BTS |
| 856 | #ifdef X86_BTS | 951 | (void)ds_release_bts(child); |
| 857 | ptrace_bts_realloc(child, 0, 0); | 952 | |
| 858 | #endif | 953 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 859 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 954 | if (!child->thread.debugctlmsr) |
| 860 | if (!child->thread.debugctlmsr) | 955 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
| 861 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 956 | |
| 862 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 957 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 863 | } | 958 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 864 | } | 959 | } |
| 865 | 960 | ||
| 866 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 961 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
| @@ -980,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 980 | /* | 1075 | /* |
| 981 | * These bits need more cooking - not enabled yet: | 1076 | * These bits need more cooking - not enabled yet: |
| 982 | */ | 1077 | */ |
| 983 | #ifdef X86_BTS | 1078 | #ifdef CONFIG_X86_PTRACE_BTS |
| 984 | case PTRACE_BTS_CONFIG: | 1079 | case PTRACE_BTS_CONFIG: |
| 985 | ret = ptrace_bts_config | 1080 | ret = ptrace_bts_config |
| 986 | (child, data, (struct ptrace_bts_config __user *)addr); | 1081 | (child, data, (struct ptrace_bts_config __user *)addr); |
| @@ -992,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 992 | break; | 1087 | break; |
| 993 | 1088 | ||
| 994 | case PTRACE_BTS_SIZE: | 1089 | case PTRACE_BTS_SIZE: |
| 995 | ret = ptrace_bts_get_size(child); | 1090 | ret = ds_get_bts_index(child, /* pos = */ NULL); |
| 996 | break; | 1091 | break; |
| 997 | 1092 | ||
| 998 | case PTRACE_BTS_GET: | 1093 | case PTRACE_BTS_GET: |
| @@ -1001,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1001 | break; | 1096 | break; |
| 1002 | 1097 | ||
| 1003 | case PTRACE_BTS_CLEAR: | 1098 | case PTRACE_BTS_CLEAR: |
| 1004 | ret = ptrace_bts_clear(child); | 1099 | ret = ds_clear_bts(child); |
| 1005 | break; | 1100 | break; |
| 1006 | 1101 | ||
| 1007 | case PTRACE_BTS_DRAIN: | 1102 | case PTRACE_BTS_DRAIN: |
| 1008 | ret = ptrace_bts_drain | 1103 | ret = ptrace_bts_drain |
| 1009 | (child, data, (struct bts_struct __user *) addr); | 1104 | (child, data, (struct bts_struct __user *) addr); |
| 1010 | break; | 1105 | break; |
| 1011 | #endif | 1106 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 1012 | 1107 | ||
| 1013 | default: | 1108 | default: |
| 1014 | ret = ptrace_request(child, request, addr, data); | 1109 | ret = ptrace_request(child, request, addr, data); |
| @@ -1375,30 +1470,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
| 1375 | force_sig_info(SIGTRAP, &info, tsk); | 1470 | force_sig_info(SIGTRAP, &info, tsk); |
| 1376 | } | 1471 | } |
| 1377 | 1472 | ||
| 1378 | static void syscall_trace(struct pt_regs *regs) | ||
| 1379 | { | ||
| 1380 | if (!(current->ptrace & PT_PTRACED)) | ||
| 1381 | return; | ||
| 1382 | |||
| 1383 | #if 0 | ||
| 1384 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | ||
| 1385 | current->comm, | ||
| 1386 | regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), | ||
| 1387 | current_thread_info()->flags, current->ptrace); | ||
| 1388 | #endif | ||
| 1389 | |||
| 1390 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) | ||
| 1391 | ? 0x80 : 0)); | ||
| 1392 | /* | ||
| 1393 | * this isn't the same as continuing with a signal, but it will do | ||
| 1394 | * for normal use. strace only continues with a signal if the | ||
| 1395 | * stopping signal is not SIGTRAP. -brl | ||
| 1396 | */ | ||
| 1397 | if (current->exit_code) { | ||
| 1398 | send_sig(current->exit_code, current, 1); | ||
| 1399 | current->exit_code = 0; | ||
| 1400 | } | ||
| 1401 | } | ||
| 1402 | 1473 | ||
| 1403 | #ifdef CONFIG_X86_32 | 1474 | #ifdef CONFIG_X86_32 |
| 1404 | # define IS_IA32 1 | 1475 | # define IS_IA32 1 |
| @@ -1432,8 +1503,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
| 1432 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | 1503 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
| 1433 | ret = -1L; | 1504 | ret = -1L; |
| 1434 | 1505 | ||
| 1435 | if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) | 1506 | if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && |
| 1436 | syscall_trace(regs); | 1507 | tracehook_report_syscall_entry(regs)) |
| 1508 | ret = -1L; | ||
| 1437 | 1509 | ||
| 1438 | if (unlikely(current->audit_context)) { | 1510 | if (unlikely(current->audit_context)) { |
| 1439 | if (IS_IA32) | 1511 | if (IS_IA32) |
| @@ -1459,7 +1531,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
| 1459 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1531 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
| 1460 | 1532 | ||
| 1461 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | 1533 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
| 1462 | syscall_trace(regs); | 1534 | tracehook_report_syscall_exit(regs, 0); |
| 1463 | 1535 | ||
| 1464 | /* | 1536 | /* |
| 1465 | * If TIF_SYSCALL_EMU is set, we only get here because of | 1537 | * If TIF_SYSCALL_EMU is set, we only get here because of |
| @@ -1475,6 +1547,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
| 1475 | * system call instruction. | 1547 | * system call instruction. |
| 1476 | */ | 1548 | */ |
| 1477 | if (test_thread_flag(TIF_SINGLESTEP) && | 1549 | if (test_thread_flag(TIF_SINGLESTEP) && |
| 1478 | (current->ptrace & PT_PTRACED)) | 1550 | tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) |
| 1479 | send_sigtrap(current, regs, 0); | 1551 | send_sigtrap(current, regs, 0); |
| 1480 | } | 1552 | } |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb9..f4c93f1cfc19 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); | |||
| 29 | 29 | ||
| 30 | static const struct desc_ptr no_idt = {}; | 30 | static const struct desc_ptr no_idt = {}; |
| 31 | static int reboot_mode; | 31 | static int reboot_mode; |
| 32 | enum reboot_type reboot_type = BOOT_KBD; | 32 | /* |
| 33 | * Keyboard reset and triple fault may result in INIT, not RESET, which | ||
| 34 | * doesn't work when we're in vmx root mode. Try ACPI first. | ||
| 35 | */ | ||
| 36 | enum reboot_type reboot_type = BOOT_ACPI; | ||
| 33 | int reboot_force; | 37 | int reboot_force; |
| 34 | 38 | ||
| 35 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 39 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 673f12cf6eb0..46c98efbbf8d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -223,6 +223,9 @@ unsigned long saved_video_mode; | |||
| 223 | #define RAMDISK_LOAD_FLAG 0x4000 | 223 | #define RAMDISK_LOAD_FLAG 0x4000 |
| 224 | 224 | ||
| 225 | static char __initdata command_line[COMMAND_LINE_SIZE]; | 225 | static char __initdata command_line[COMMAND_LINE_SIZE]; |
| 226 | #ifdef CONFIG_CMDLINE_BOOL | ||
| 227 | static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; | ||
| 228 | #endif | ||
| 226 | 229 | ||
| 227 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 230 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
| 228 | struct edd edd; | 231 | struct edd edd; |
| @@ -665,11 +668,28 @@ void __init setup_arch(char **cmdline_p) | |||
| 665 | bss_resource.start = virt_to_phys(&__bss_start); | 668 | bss_resource.start = virt_to_phys(&__bss_start); |
| 666 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 669 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
| 667 | 670 | ||
| 671 | #ifdef CONFIG_CMDLINE_BOOL | ||
| 672 | #ifdef CONFIG_CMDLINE_OVERRIDE | ||
| 673 | strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); | ||
| 674 | #else | ||
| 675 | if (builtin_cmdline[0]) { | ||
| 676 | /* append boot loader cmdline to builtin */ | ||
| 677 | strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); | ||
| 678 | strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); | ||
| 679 | strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); | ||
| 680 | } | ||
| 681 | #endif | ||
| 682 | #endif | ||
| 683 | |||
| 668 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 684 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
| 669 | *cmdline_p = command_line; | 685 | *cmdline_p = command_line; |
| 670 | 686 | ||
| 671 | parse_early_param(); | 687 | parse_early_param(); |
| 672 | 688 | ||
| 689 | #ifdef CONFIG_X86_64 | ||
| 690 | check_efer(); | ||
| 691 | #endif | ||
| 692 | |||
| 673 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | 693 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) |
| 674 | /* | 694 | /* |
| 675 | * Must be before kernel pagetables are setup | 695 | * Must be before kernel pagetables are setup |
| @@ -738,7 +758,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 738 | #else | 758 | #else |
| 739 | num_physpages = max_pfn; | 759 | num_physpages = max_pfn; |
| 740 | 760 | ||
| 741 | check_efer(); | ||
| 742 | if (cpu_has_x2apic) | 761 | if (cpu_has_x2apic) |
| 743 | check_x2apic(); | 762 | check_x2apic(); |
| 744 | 763 | ||
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 6dd7e2b70a4b..cc673aa55ce4 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h | |||
| @@ -34,4 +34,9 @@ struct rt_sigframe { | |||
| 34 | struct siginfo info; | 34 | struct siginfo info; |
| 35 | /* fp state follows here */ | 35 | /* fp state follows here */ |
| 36 | }; | 36 | }; |
| 37 | |||
| 38 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 39 | sigset_t *set, struct pt_regs *regs); | ||
| 40 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
| 41 | sigset_t *set, struct pt_regs *regs); | ||
| 37 | #endif | 42 | #endif |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 8d380b699c0c..b21070ea33a4 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/wait.h> | 19 | #include <linux/wait.h> |
| 20 | #include <linux/tracehook.h> | ||
| 20 | #include <linux/elf.h> | 21 | #include <linux/elf.h> |
| 21 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
| 22 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
| @@ -556,8 +557,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 556 | * handler too. | 557 | * handler too. |
| 557 | */ | 558 | */ |
| 558 | regs->flags &= ~X86_EFLAGS_TF; | 559 | regs->flags &= ~X86_EFLAGS_TF; |
| 559 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
| 560 | ptrace_notify(SIGTRAP); | ||
| 561 | 560 | ||
| 562 | spin_lock_irq(¤t->sighand->siglock); | 561 | spin_lock_irq(¤t->sighand->siglock); |
| 563 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | 562 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); |
| @@ -566,6 +565,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 566 | recalc_sigpending(); | 565 | recalc_sigpending(); |
| 567 | spin_unlock_irq(¤t->sighand->siglock); | 566 | spin_unlock_irq(¤t->sighand->siglock); |
| 568 | 567 | ||
| 568 | tracehook_signal_handler(sig, info, ka, regs, | ||
| 569 | test_thread_flag(TIF_SINGLESTEP)); | ||
| 570 | |||
| 569 | return 0; | 571 | return 0; |
| 570 | } | 572 | } |
| 571 | 573 | ||
| @@ -659,5 +661,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
| 659 | if (thread_info_flags & _TIF_SIGPENDING) | 661 | if (thread_info_flags & _TIF_SIGPENDING) |
| 660 | do_signal(regs); | 662 | do_signal(regs); |
| 661 | 663 | ||
| 664 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
| 665 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
| 666 | tracehook_notify_resume(regs); | ||
| 667 | } | ||
| 668 | |||
| 662 | clear_thread_flag(TIF_IRET); | 669 | clear_thread_flag(TIF_IRET); |
| 663 | } | 670 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 4665b598a376..823a55bf8c39 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
| @@ -15,17 +15,20 @@ | |||
| 15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
| 16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
| 17 | #include <linux/ptrace.h> | 17 | #include <linux/ptrace.h> |
| 18 | #include <linux/tracehook.h> | ||
| 18 | #include <linux/unistd.h> | 19 | #include <linux/unistd.h> |
| 19 | #include <linux/stddef.h> | 20 | #include <linux/stddef.h> |
| 20 | #include <linux/personality.h> | 21 | #include <linux/personality.h> |
| 21 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
| 23 | #include <linux/uaccess.h> | ||
| 24 | |||
| 22 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
| 23 | #include <asm/ucontext.h> | 26 | #include <asm/ucontext.h> |
| 24 | #include <asm/uaccess.h> | ||
| 25 | #include <asm/i387.h> | 27 | #include <asm/i387.h> |
| 26 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
| 27 | #include <asm/ia32_unistd.h> | 29 | #include <asm/ia32_unistd.h> |
| 28 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
| 31 | #include <asm/syscall.h> | ||
| 29 | #include <asm/syscalls.h> | 32 | #include <asm/syscalls.h> |
| 30 | #include "sigframe.h" | 33 | #include "sigframe.h" |
| 31 | 34 | ||
| @@ -42,11 +45,6 @@ | |||
| 42 | # define FIX_EFLAGS __FIX_EFLAGS | 45 | # define FIX_EFLAGS __FIX_EFLAGS |
| 43 | #endif | 46 | #endif |
| 44 | 47 | ||
| 45 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 46 | sigset_t *set, struct pt_regs * regs); | ||
| 47 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
| 48 | sigset_t *set, struct pt_regs * regs); | ||
| 49 | |||
| 50 | asmlinkage long | 48 | asmlinkage long |
| 51 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
| 52 | struct pt_regs *regs) | 50 | struct pt_regs *regs) |
| @@ -66,7 +64,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 66 | /* Always make any pending restarted system calls return -EINTR */ | 64 | /* Always make any pending restarted system calls return -EINTR */ |
| 67 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 65 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
| 68 | 66 | ||
| 69 | #define COPY(x) err |= __get_user(regs->x, &sc->x) | 67 | #define COPY(x) (err |= __get_user(regs->x, &sc->x)) |
| 70 | 68 | ||
| 71 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 69 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
| 72 | COPY(dx); COPY(cx); COPY(ip); | 70 | COPY(dx); COPY(cx); COPY(ip); |
| @@ -96,7 +94,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 96 | } | 94 | } |
| 97 | 95 | ||
| 98 | { | 96 | { |
| 99 | struct _fpstate __user * buf; | 97 | struct _fpstate __user *buf; |
| 100 | err |= __get_user(buf, &sc->fpstate); | 98 | err |= __get_user(buf, &sc->fpstate); |
| 101 | err |= restore_i387_xstate(buf); | 99 | err |= restore_i387_xstate(buf); |
| 102 | } | 100 | } |
| @@ -122,7 +120,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
| 122 | current->blocked = set; | 120 | current->blocked = set; |
| 123 | recalc_sigpending(); | 121 | recalc_sigpending(); |
| 124 | spin_unlock_irq(¤t->sighand->siglock); | 122 | spin_unlock_irq(¤t->sighand->siglock); |
| 125 | 123 | ||
| 126 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 124 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
| 127 | goto badframe; | 125 | goto badframe; |
| 128 | 126 | ||
| @@ -132,16 +130,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
| 132 | return ax; | 130 | return ax; |
| 133 | 131 | ||
| 134 | badframe: | 132 | badframe: |
| 135 | signal_fault(regs,frame,"sigreturn"); | 133 | signal_fault(regs, frame, "sigreturn"); |
| 136 | return 0; | 134 | return 0; |
| 137 | } | 135 | } |
| 138 | 136 | ||
| 139 | /* | 137 | /* |
| 140 | * Set up a signal frame. | 138 | * Set up a signal frame. |
| 141 | */ | 139 | */ |
| 142 | 140 | ||
| 143 | static inline int | 141 | static inline int |
| 144 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) | 142 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, |
| 143 | unsigned long mask, struct task_struct *me) | ||
| 145 | { | 144 | { |
| 146 | int err = 0; | 145 | int err = 0; |
| 147 | 146 | ||
| @@ -197,7 +196,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | |||
| 197 | } | 196 | } |
| 198 | 197 | ||
| 199 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 198 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
| 200 | sigset_t *set, struct pt_regs * regs) | 199 | sigset_t *set, struct pt_regs *regs) |
| 201 | { | 200 | { |
| 202 | struct rt_sigframe __user *frame; | 201 | struct rt_sigframe __user *frame; |
| 203 | void __user *fp = NULL; | 202 | void __user *fp = NULL; |
| @@ -210,19 +209,19 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 210 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | 209 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; |
| 211 | 210 | ||
| 212 | if (save_i387_xstate(fp) < 0) | 211 | if (save_i387_xstate(fp) < 0) |
| 213 | err |= -1; | 212 | err |= -1; |
| 214 | } else | 213 | } else |
| 215 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | 214 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; |
| 216 | 215 | ||
| 217 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 216 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
| 218 | goto give_sigsegv; | 217 | goto give_sigsegv; |
| 219 | 218 | ||
| 220 | if (ka->sa.sa_flags & SA_SIGINFO) { | 219 | if (ka->sa.sa_flags & SA_SIGINFO) { |
| 221 | err |= copy_siginfo_to_user(&frame->info, info); | 220 | err |= copy_siginfo_to_user(&frame->info, info); |
| 222 | if (err) | 221 | if (err) |
| 223 | goto give_sigsegv; | 222 | goto give_sigsegv; |
| 224 | } | 223 | } |
| 225 | 224 | ||
| 226 | /* Create the ucontext. */ | 225 | /* Create the ucontext. */ |
| 227 | if (cpu_has_xsave) | 226 | if (cpu_has_xsave) |
| 228 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | 227 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); |
| @@ -235,9 +234,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 235 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 234 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
| 236 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | 235 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); |
| 237 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | 236 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); |
| 238 | if (sizeof(*set) == 16) { | 237 | if (sizeof(*set) == 16) { |
| 239 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | 238 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); |
| 240 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | 239 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); |
| 241 | } else | 240 | } else |
| 242 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 241 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); |
| 243 | 242 | ||
| @@ -248,7 +247,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 248 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | 247 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); |
| 249 | } else { | 248 | } else { |
| 250 | /* could use a vstub here */ | 249 | /* could use a vstub here */ |
| 251 | goto give_sigsegv; | 250 | goto give_sigsegv; |
| 252 | } | 251 | } |
| 253 | 252 | ||
| 254 | if (err) | 253 | if (err) |
| @@ -256,7 +255,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 256 | 255 | ||
| 257 | /* Set up registers for signal handler */ | 256 | /* Set up registers for signal handler */ |
| 258 | regs->di = sig; | 257 | regs->di = sig; |
| 259 | /* In case the signal handler was declared without prototypes */ | 258 | /* In case the signal handler was declared without prototypes */ |
| 260 | regs->ax = 0; | 259 | regs->ax = 0; |
| 261 | 260 | ||
| 262 | /* This also works for non SA_SIGINFO handlers because they expect the | 261 | /* This also works for non SA_SIGINFO handlers because they expect the |
| @@ -279,37 +278,8 @@ give_sigsegv: | |||
| 279 | } | 278 | } |
| 280 | 279 | ||
| 281 | /* | 280 | /* |
| 282 | * Return -1L or the syscall number that @regs is executing. | ||
| 283 | */ | ||
| 284 | static long current_syscall(struct pt_regs *regs) | ||
| 285 | { | ||
| 286 | /* | ||
| 287 | * We always sign-extend a -1 value being set here, | ||
| 288 | * so this is always either -1L or a syscall number. | ||
| 289 | */ | ||
| 290 | return regs->orig_ax; | ||
| 291 | } | ||
| 292 | |||
| 293 | /* | ||
| 294 | * Return a value that is -EFOO if the system call in @regs->orig_ax | ||
| 295 | * returned an error. This only works for @regs from @current. | ||
| 296 | */ | ||
| 297 | static long current_syscall_ret(struct pt_regs *regs) | ||
| 298 | { | ||
| 299 | #ifdef CONFIG_IA32_EMULATION | ||
| 300 | if (test_thread_flag(TIF_IA32)) | ||
| 301 | /* | ||
| 302 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | ||
| 303 | * and will match correctly in comparisons. | ||
| 304 | */ | ||
| 305 | return (int) regs->ax; | ||
| 306 | #endif | ||
| 307 | return regs->ax; | ||
| 308 | } | ||
| 309 | |||
| 310 | /* | ||
| 311 | * OK, we're invoking a handler | 281 | * OK, we're invoking a handler |
| 312 | */ | 282 | */ |
| 313 | 283 | ||
| 314 | static int | 284 | static int |
| 315 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 285 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
| @@ -318,9 +288,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 318 | int ret; | 288 | int ret; |
| 319 | 289 | ||
| 320 | /* Are we from a system call? */ | 290 | /* Are we from a system call? */ |
| 321 | if (current_syscall(regs) >= 0) { | 291 | if (syscall_get_nr(current, regs) >= 0) { |
| 322 | /* If so, check system call restarting.. */ | 292 | /* If so, check system call restarting.. */ |
| 323 | switch (current_syscall_ret(regs)) { | 293 | switch (syscall_get_error(current, regs)) { |
| 324 | case -ERESTART_RESTARTBLOCK: | 294 | case -ERESTART_RESTARTBLOCK: |
| 325 | case -ERESTARTNOHAND: | 295 | case -ERESTARTNOHAND: |
| 326 | regs->ax = -EINTR; | 296 | regs->ax = -EINTR; |
| @@ -353,7 +323,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 353 | ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); | 323 | ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); |
| 354 | else | 324 | else |
| 355 | ret = ia32_setup_frame(sig, ka, oldset, regs); | 325 | ret = ia32_setup_frame(sig, ka, oldset, regs); |
| 356 | } else | 326 | } else |
| 357 | #endif | 327 | #endif |
| 358 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | 328 | ret = setup_rt_frame(sig, ka, info, oldset, regs); |
| 359 | 329 | ||
| @@ -377,15 +347,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 377 | * handler too. | 347 | * handler too. |
| 378 | */ | 348 | */ |
| 379 | regs->flags &= ~X86_EFLAGS_TF; | 349 | regs->flags &= ~X86_EFLAGS_TF; |
| 380 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
| 381 | ptrace_notify(SIGTRAP); | ||
| 382 | 350 | ||
| 383 | spin_lock_irq(¤t->sighand->siglock); | 351 | spin_lock_irq(¤t->sighand->siglock); |
| 384 | sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); | 352 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); |
| 385 | if (!(ka->sa.sa_flags & SA_NODEFER)) | 353 | if (!(ka->sa.sa_flags & SA_NODEFER)) |
| 386 | sigaddset(¤t->blocked,sig); | 354 | sigaddset(¤t->blocked, sig); |
| 387 | recalc_sigpending(); | 355 | recalc_sigpending(); |
| 388 | spin_unlock_irq(¤t->sighand->siglock); | 356 | spin_unlock_irq(¤t->sighand->siglock); |
| 357 | |||
| 358 | tracehook_signal_handler(sig, info, ka, regs, | ||
| 359 | test_thread_flag(TIF_SINGLESTEP)); | ||
| 389 | } | 360 | } |
| 390 | 361 | ||
| 391 | return ret; | 362 | return ret; |
| @@ -442,9 +413,9 @@ static void do_signal(struct pt_regs *regs) | |||
| 442 | } | 413 | } |
| 443 | 414 | ||
| 444 | /* Did we come from a system call? */ | 415 | /* Did we come from a system call? */ |
| 445 | if (current_syscall(regs) >= 0) { | 416 | if (syscall_get_nr(current, regs) >= 0) { |
| 446 | /* Restart the system call - no handlers present */ | 417 | /* Restart the system call - no handlers present */ |
| 447 | switch (current_syscall_ret(regs)) { | 418 | switch (syscall_get_error(current, regs)) { |
| 448 | case -ERESTARTNOHAND: | 419 | case -ERESTARTNOHAND: |
| 449 | case -ERESTARTSYS: | 420 | case -ERESTARTSYS: |
| 450 | case -ERESTARTNOINTR: | 421 | case -ERESTARTNOINTR: |
| @@ -482,17 +453,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused, | |||
| 482 | /* deal with pending signal delivery */ | 453 | /* deal with pending signal delivery */ |
| 483 | if (thread_info_flags & _TIF_SIGPENDING) | 454 | if (thread_info_flags & _TIF_SIGPENDING) |
| 484 | do_signal(regs); | 455 | do_signal(regs); |
| 456 | |||
| 457 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
| 458 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
| 459 | tracehook_notify_resume(regs); | ||
| 460 | } | ||
| 485 | } | 461 | } |
| 486 | 462 | ||
| 487 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 463 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
| 488 | { | 464 | { |
| 489 | struct task_struct *me = current; | 465 | struct task_struct *me = current; |
| 490 | if (show_unhandled_signals && printk_ratelimit()) { | 466 | if (show_unhandled_signals && printk_ratelimit()) { |
| 491 | printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 467 | printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
| 492 | me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); | 468 | me->comm, me->pid, where, frame, regs->ip, |
| 469 | regs->sp, regs->orig_ax); | ||
| 493 | print_vma_addr(" in ", regs->ip); | 470 | print_vma_addr(" in ", regs->ip); |
| 494 | printk("\n"); | 471 | printk("\n"); |
| 495 | } | 472 | } |
| 496 | 473 | ||
| 497 | force_sig(SIGSEGV, me); | 474 | force_sig(SIGSEGV, me); |
| 498 | } | 475 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aa804c64b167..9056f7e272c0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -258,6 +258,7 @@ static void __cpuinit smp_callin(void) | |||
| 258 | end_local_APIC_setup(); | 258 | end_local_APIC_setup(); |
| 259 | map_cpu_to_logical_apicid(); | 259 | map_cpu_to_logical_apicid(); |
| 260 | 260 | ||
| 261 | notify_cpu_starting(cpuid); | ||
| 261 | /* | 262 | /* |
| 262 | * Get our bogomips. | 263 | * Get our bogomips. |
| 263 | * | 264 | * |
| @@ -1313,16 +1314,13 @@ __init void prefill_possible_map(void) | |||
| 1313 | if (!num_processors) | 1314 | if (!num_processors) |
| 1314 | num_processors = 1; | 1315 | num_processors = 1; |
| 1315 | 1316 | ||
| 1316 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1317 | if (additional_cpus == -1) { | 1317 | if (additional_cpus == -1) { |
| 1318 | if (disabled_cpus > 0) | 1318 | if (disabled_cpus > 0) |
| 1319 | additional_cpus = disabled_cpus; | 1319 | additional_cpus = disabled_cpus; |
| 1320 | else | 1320 | else |
| 1321 | additional_cpus = 0; | 1321 | additional_cpus = 0; |
| 1322 | } | 1322 | } |
| 1323 | #else | 1323 | |
| 1324 | additional_cpus = 0; | ||
| 1325 | #endif | ||
| 1326 | possible = num_processors + additional_cpus; | 1324 | possible = num_processors + additional_cpus; |
| 1327 | if (possible > NR_CPUS) | 1325 | if (possible > NR_CPUS) |
| 1328 | possible = NR_CPUS; | 1326 | possible = NR_CPUS; |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index c9288c883e20..6bc211accf08 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
| @@ -13,16 +13,17 @@ | |||
| 13 | #include <linux/utsname.h> | 13 | #include <linux/utsname.h> |
| 14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
| 15 | #include <linux/random.h> | 15 | #include <linux/random.h> |
| 16 | #include <linux/uaccess.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/uaccess.h> | ||
| 18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
| 19 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
| 20 | 20 | ||
| 21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, | 21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, |
| 22 | unsigned long fd, unsigned long off) | 22 | unsigned long prot, unsigned long flags, |
| 23 | unsigned long fd, unsigned long off) | ||
| 23 | { | 24 | { |
| 24 | long error; | 25 | long error; |
| 25 | struct file * file; | 26 | struct file *file; |
| 26 | 27 | ||
| 27 | error = -EINVAL; | 28 | error = -EINVAL; |
| 28 | if (off & ~PAGE_MASK) | 29 | if (off & ~PAGE_MASK) |
| @@ -57,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
| 57 | unmapped base down for this case. This can give | 58 | unmapped base down for this case. This can give |
| 58 | conflicts with the heap, but we assume that glibc | 59 | conflicts with the heap, but we assume that glibc |
| 59 | malloc knows how to fall back to mmap. Give it 1GB | 60 | malloc knows how to fall back to mmap. Give it 1GB |
| 60 | of playground for now. -AK */ | 61 | of playground for now. -AK */ |
| 61 | *begin = 0x40000000; | 62 | *begin = 0x40000000; |
| 62 | *end = 0x80000000; | 63 | *end = 0x80000000; |
| 63 | if (current->flags & PF_RANDOMIZE) { | 64 | if (current->flags & PF_RANDOMIZE) { |
| 64 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); | 65 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); |
| 65 | if (new_begin) | 66 | if (new_begin) |
| @@ -67,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
| 67 | } | 68 | } |
| 68 | } else { | 69 | } else { |
| 69 | *begin = TASK_UNMAPPED_BASE; | 70 | *begin = TASK_UNMAPPED_BASE; |
| 70 | *end = TASK_SIZE; | 71 | *end = TASK_SIZE; |
| 71 | } | 72 | } |
| 72 | } | 73 | } |
| 73 | 74 | ||
| 74 | unsigned long | 75 | unsigned long |
| 75 | arch_get_unmapped_area(struct file *filp, unsigned long addr, | 76 | arch_get_unmapped_area(struct file *filp, unsigned long addr, |
| @@ -79,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
| 79 | struct vm_area_struct *vma; | 80 | struct vm_area_struct *vma; |
| 80 | unsigned long start_addr; | 81 | unsigned long start_addr; |
| 81 | unsigned long begin, end; | 82 | unsigned long begin, end; |
| 82 | 83 | ||
| 83 | if (flags & MAP_FIXED) | 84 | if (flags & MAP_FIXED) |
| 84 | return addr; | 85 | return addr; |
| 85 | 86 | ||
| 86 | find_start_end(flags, &begin, &end); | 87 | find_start_end(flags, &begin, &end); |
| 87 | 88 | ||
| 88 | if (len > end) | 89 | if (len > end) |
| 89 | return -ENOMEM; | 90 | return -ENOMEM; |
| @@ -97,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
| 97 | } | 98 | } |
| 98 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) | 99 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) |
| 99 | && len <= mm->cached_hole_size) { | 100 | && len <= mm->cached_hole_size) { |
| 100 | mm->cached_hole_size = 0; | 101 | mm->cached_hole_size = 0; |
| 101 | mm->free_area_cache = begin; | 102 | mm->free_area_cache = begin; |
| 102 | } | 103 | } |
| 103 | addr = mm->free_area_cache; | 104 | addr = mm->free_area_cache; |
| 104 | if (addr < begin) | 105 | if (addr < begin) |
| 105 | addr = begin; | 106 | addr = begin; |
| 106 | start_addr = addr; | 107 | start_addr = addr; |
| 107 | 108 | ||
| 108 | full_search: | 109 | full_search: |
| @@ -128,7 +129,7 @@ full_search: | |||
| 128 | return addr; | 129 | return addr; |
| 129 | } | 130 | } |
| 130 | if (addr + mm->cached_hole_size < vma->vm_start) | 131 | if (addr + mm->cached_hole_size < vma->vm_start) |
| 131 | mm->cached_hole_size = vma->vm_start - addr; | 132 | mm->cached_hole_size = vma->vm_start - addr; |
| 132 | 133 | ||
| 133 | addr = vma->vm_end; | 134 | addr = vma->vm_end; |
| 134 | } | 135 | } |
| @@ -178,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 178 | vma = find_vma(mm, addr-len); | 179 | vma = find_vma(mm, addr-len); |
| 179 | if (!vma || addr <= vma->vm_start) | 180 | if (!vma || addr <= vma->vm_start) |
| 180 | /* remember the address as a hint for next time */ | 181 | /* remember the address as a hint for next time */ |
| 181 | return (mm->free_area_cache = addr-len); | 182 | return mm->free_area_cache = addr-len; |
| 182 | } | 183 | } |
| 183 | 184 | ||
| 184 | if (mm->mmap_base < len) | 185 | if (mm->mmap_base < len) |
| @@ -195,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 195 | vma = find_vma(mm, addr); | 196 | vma = find_vma(mm, addr); |
| 196 | if (!vma || addr+len <= vma->vm_start) | 197 | if (!vma || addr+len <= vma->vm_start) |
| 197 | /* remember the address as a hint for next time */ | 198 | /* remember the address as a hint for next time */ |
| 198 | return (mm->free_area_cache = addr); | 199 | return mm->free_area_cache = addr; |
| 199 | 200 | ||
| 200 | /* remember the largest hole we saw so far */ | 201 | /* remember the largest hole we saw so far */ |
| 201 | if (addr + mm->cached_hole_size < vma->vm_start) | 202 | if (addr + mm->cached_hole_size < vma->vm_start) |
| @@ -225,13 +226,13 @@ bottomup: | |||
| 225 | } | 226 | } |
| 226 | 227 | ||
| 227 | 228 | ||
| 228 | asmlinkage long sys_uname(struct new_utsname __user * name) | 229 | asmlinkage long sys_uname(struct new_utsname __user *name) |
| 229 | { | 230 | { |
| 230 | int err; | 231 | int err; |
| 231 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |
| 232 | err = copy_to_user(name, utsname(), sizeof (*name)); | 233 | err = copy_to_user(name, utsname(), sizeof(*name)); |
| 233 | up_read(&uts_sem); | 234 | up_read(&uts_sem); |
| 234 | if (personality(current->personality) == PER_LINUX32) | 235 | if (personality(current->personality) == PER_LINUX32) |
| 235 | err |= copy_to_user(&name->machine, "i686", 5); | 236 | err |= copy_to_user(&name->machine, "i686", 5); |
| 236 | return err ? -EFAULT : 0; | 237 | return err ? -EFAULT : 0; |
| 237 | } | 238 | } |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index b42068fb7b76..2887a789e38f 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
| @@ -32,6 +32,8 @@ | |||
| 32 | #include <linux/bug.h> | 32 | #include <linux/bug.h> |
| 33 | #include <linux/nmi.h> | 33 | #include <linux/nmi.h> |
| 34 | #include <linux/mm.h> | 34 | #include <linux/mm.h> |
| 35 | #include <linux/smp.h> | ||
| 36 | #include <linux/io.h> | ||
| 35 | 37 | ||
| 36 | #if defined(CONFIG_EDAC) | 38 | #if defined(CONFIG_EDAC) |
| 37 | #include <linux/edac.h> | 39 | #include <linux/edac.h> |
| @@ -45,9 +47,6 @@ | |||
| 45 | #include <asm/unwind.h> | 47 | #include <asm/unwind.h> |
| 46 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
| 47 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
| 48 | #include <asm/nmi.h> | ||
| 49 | #include <asm/smp.h> | ||
| 50 | #include <asm/io.h> | ||
| 51 | #include <asm/pgalloc.h> | 50 | #include <asm/pgalloc.h> |
| 52 | #include <asm/proto.h> | 51 | #include <asm/proto.h> |
| 53 | #include <asm/pda.h> | 52 | #include <asm/pda.h> |
| @@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
| 85 | 84 | ||
| 86 | void printk_address(unsigned long address, int reliable) | 85 | void printk_address(unsigned long address, int reliable) |
| 87 | { | 86 | { |
| 88 | printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); | 87 | printk(" [<%016lx>] %s%pS\n", |
| 88 | address, reliable ? "" : "? ", (void *) address); | ||
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 91 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
| @@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
| 98 | [STACKFAULT_STACK - 1] = "#SS", | 98 | [STACKFAULT_STACK - 1] = "#SS", |
| 99 | [MCE_STACK - 1] = "#MC", | 99 | [MCE_STACK - 1] = "#MC", |
| 100 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 100 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
| 101 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 101 | [N_EXCEPTION_STACKS ... |
| 102 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
| 102 | #endif | 103 | #endif |
| 103 | }; | 104 | }; |
| 104 | unsigned k; | 105 | unsigned k; |
| @@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
| 163 | } | 164 | } |
| 164 | 165 | ||
| 165 | /* | 166 | /* |
| 166 | * x86-64 can have up to three kernel stacks: | 167 | * x86-64 can have up to three kernel stacks: |
| 167 | * process stack | 168 | * process stack |
| 168 | * interrupt stack | 169 | * interrupt stack |
| 169 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 170 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
| @@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 219 | const struct stacktrace_ops *ops, void *data) | 220 | const struct stacktrace_ops *ops, void *data) |
| 220 | { | 221 | { |
| 221 | const unsigned cpu = get_cpu(); | 222 | const unsigned cpu = get_cpu(); |
| 222 | unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; | 223 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
| 223 | unsigned used = 0; | 224 | unsigned used = 0; |
| 224 | struct thread_info *tinfo; | 225 | struct thread_info *tinfo; |
| 225 | 226 | ||
| @@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 237 | if (!bp) { | 238 | if (!bp) { |
| 238 | if (task == current) { | 239 | if (task == current) { |
| 239 | /* Grab bp right from our regs */ | 240 | /* Grab bp right from our regs */ |
| 240 | asm("movq %%rbp, %0" : "=r" (bp) :); | 241 | asm("movq %%rbp, %0" : "=r" (bp) : ); |
| 241 | } else { | 242 | } else { |
| 242 | /* bp is the last reg pushed by switch_to */ | 243 | /* bp is the last reg pushed by switch_to */ |
| 243 | bp = *(unsigned long *) task->thread.sp; | 244 | bp = *(unsigned long *) task->thread.sp; |
| @@ -356,11 +357,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 356 | unsigned long *stack; | 357 | unsigned long *stack; |
| 357 | int i; | 358 | int i; |
| 358 | const int cpu = smp_processor_id(); | 359 | const int cpu = smp_processor_id(); |
| 359 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); | 360 | unsigned long *irqstack_end = |
| 360 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | 361 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
| 362 | unsigned long *irqstack = | ||
| 363 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | ||
| 361 | 364 | ||
| 362 | // debugging aid: "show_stack(NULL, NULL);" prints the | 365 | /* |
| 363 | // back trace for this cpu. | 366 | * debugging aid: "show_stack(NULL, NULL);" prints the |
| 367 | * back trace for this cpu. | ||
| 368 | */ | ||
| 364 | 369 | ||
| 365 | if (sp == NULL) { | 370 | if (sp == NULL) { |
| 366 | if (task) | 371 | if (task) |
| @@ -404,7 +409,7 @@ void dump_stack(void) | |||
| 404 | 409 | ||
| 405 | #ifdef CONFIG_FRAME_POINTER | 410 | #ifdef CONFIG_FRAME_POINTER |
| 406 | if (!bp) | 411 | if (!bp) |
| 407 | asm("movq %%rbp, %0" : "=r" (bp):); | 412 | asm("movq %%rbp, %0" : "=r" (bp) : ); |
| 408 | #endif | 413 | #endif |
| 409 | 414 | ||
| 410 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 415 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
| @@ -414,7 +419,6 @@ void dump_stack(void) | |||
| 414 | init_utsname()->version); | 419 | init_utsname()->version); |
| 415 | show_trace(NULL, NULL, &stack, bp); | 420 | show_trace(NULL, NULL, &stack, bp); |
| 416 | } | 421 | } |
| 417 | |||
| 418 | EXPORT_SYMBOL(dump_stack); | 422 | EXPORT_SYMBOL(dump_stack); |
| 419 | 423 | ||
| 420 | void show_registers(struct pt_regs *regs) | 424 | void show_registers(struct pt_regs *regs) |
| @@ -492,7 +496,7 @@ unsigned __kprobes long oops_begin(void) | |||
| 492 | raw_local_irq_save(flags); | 496 | raw_local_irq_save(flags); |
| 493 | cpu = smp_processor_id(); | 497 | cpu = smp_processor_id(); |
| 494 | if (!__raw_spin_trylock(&die_lock)) { | 498 | if (!__raw_spin_trylock(&die_lock)) { |
| 495 | if (cpu == die_owner) | 499 | if (cpu == die_owner) |
| 496 | /* nested oops. should stop eventually */; | 500 | /* nested oops. should stop eventually */; |
| 497 | else | 501 | else |
| 498 | __raw_spin_lock(&die_lock); | 502 | __raw_spin_lock(&die_lock); |
| @@ -637,7 +641,7 @@ kernel_trap: | |||
| 637 | } | 641 | } |
| 638 | 642 | ||
| 639 | #define DO_ERROR(trapnr, signr, str, name) \ | 643 | #define DO_ERROR(trapnr, signr, str, name) \ |
| 640 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 644 | asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ |
| 641 | { \ | 645 | { \ |
| 642 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 646 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
| 643 | == NOTIFY_STOP) \ | 647 | == NOTIFY_STOP) \ |
| @@ -647,7 +651,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |||
| 647 | } | 651 | } |
| 648 | 652 | ||
| 649 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 653 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
| 650 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 654 | asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ |
| 651 | { \ | 655 | { \ |
| 652 | siginfo_t info; \ | 656 | siginfo_t info; \ |
| 653 | info.si_signo = signr; \ | 657 | info.si_signo = signr; \ |
| @@ -682,7 +686,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) | |||
| 682 | preempt_conditional_cli(regs); | 686 | preempt_conditional_cli(regs); |
| 683 | } | 687 | } |
| 684 | 688 | ||
| 685 | asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) | 689 | asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) |
| 686 | { | 690 | { |
| 687 | static const char str[] = "double fault"; | 691 | static const char str[] = "double fault"; |
| 688 | struct task_struct *tsk = current; | 692 | struct task_struct *tsk = current; |
| @@ -777,9 +781,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
| 777 | } | 781 | } |
| 778 | 782 | ||
| 779 | static notrace __kprobes void | 783 | static notrace __kprobes void |
| 780 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | 784 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
| 781 | { | 785 | { |
| 782 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 786 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == |
| 787 | NOTIFY_STOP) | ||
| 783 | return; | 788 | return; |
| 784 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | 789 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
| 785 | reason); | 790 | reason); |
| @@ -881,7 +886,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
| 881 | else if (user_mode(eregs)) | 886 | else if (user_mode(eregs)) |
| 882 | regs = task_pt_regs(current); | 887 | regs = task_pt_regs(current); |
| 883 | /* Exception from kernel and interrupts are enabled. Move to | 888 | /* Exception from kernel and interrupts are enabled. Move to |
| 884 | kernel process stack. */ | 889 | kernel process stack. */ |
| 885 | else if (eregs->flags & X86_EFLAGS_IF) | 890 | else if (eregs->flags & X86_EFLAGS_IF) |
| 886 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | 891 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); |
| 887 | if (eregs != regs) | 892 | if (eregs != regs) |
| @@ -890,7 +895,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
| 890 | } | 895 | } |
| 891 | 896 | ||
| 892 | /* runs on IST stack. */ | 897 | /* runs on IST stack. */ |
| 893 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, | 898 | asmlinkage void __kprobes do_debug(struct pt_regs *regs, |
| 894 | unsigned long error_code) | 899 | unsigned long error_code) |
| 895 | { | 900 | { |
| 896 | struct task_struct *tsk = current; | 901 | struct task_struct *tsk = current; |
| @@ -1034,7 +1039,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |||
| 1034 | 1039 | ||
| 1035 | asmlinkage void bad_intr(void) | 1040 | asmlinkage void bad_intr(void) |
| 1036 | { | 1041 | { |
| 1037 | printk("bad interrupt"); | 1042 | printk("bad interrupt"); |
| 1038 | } | 1043 | } |
| 1039 | 1044 | ||
| 1040 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | 1045 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) |
| @@ -1046,7 +1051,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
| 1046 | 1051 | ||
| 1047 | conditional_sti(regs); | 1052 | conditional_sti(regs); |
| 1048 | if (!user_mode(regs) && | 1053 | if (!user_mode(regs) && |
| 1049 | kernel_math_error(regs, "kernel simd math error", 19)) | 1054 | kernel_math_error(regs, "kernel simd math error", 19)) |
| 1050 | return; | 1055 | return; |
| 1051 | 1056 | ||
| 1052 | /* | 1057 | /* |
| @@ -1091,7 +1096,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
| 1091 | force_sig_info(SIGFPE, &info, task); | 1096 | force_sig_info(SIGFPE, &info, task); |
| 1092 | } | 1097 | } |
| 1093 | 1098 | ||
| 1094 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) | 1099 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) |
| 1095 | { | 1100 | { |
| 1096 | } | 1101 | } |
| 1097 | 1102 | ||
| @@ -1148,8 +1153,10 @@ void __init trap_init(void) | |||
| 1148 | set_intr_gate(0, ÷_error); | 1153 | set_intr_gate(0, ÷_error); |
| 1149 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | 1154 | set_intr_gate_ist(1, &debug, DEBUG_STACK); |
| 1150 | set_intr_gate_ist(2, &nmi, NMI_STACK); | 1155 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
| 1151 | set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ | 1156 | /* int3 can be called from all */ |
| 1152 | set_system_gate(4, &overflow); /* int4 can be called from all */ | 1157 | set_system_gate_ist(3, &int3, DEBUG_STACK); |
| 1158 | /* int4 can be called from all */ | ||
| 1159 | set_system_gate(4, &overflow); | ||
| 1153 | set_intr_gate(5, &bounds); | 1160 | set_intr_gate(5, &bounds); |
| 1154 | set_intr_gate(6, &invalid_op); | 1161 | set_intr_gate(6, &invalid_op); |
| 1155 | set_intr_gate(7, &device_not_available); | 1162 | set_intr_gate(7, &device_not_available); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac423..161bb850fc47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); | |||
| 104 | /* | 104 | /* |
| 105 | * Read TSC and the reference counters. Take care of SMI disturbance | 105 | * Read TSC and the reference counters. Take care of SMI disturbance |
| 106 | */ | 106 | */ |
| 107 | static u64 tsc_read_refs(u64 *pm, u64 *hpet) | 107 | static u64 tsc_read_refs(u64 *p, int hpet) |
| 108 | { | 108 | { |
| 109 | u64 t1, t2; | 109 | u64 t1, t2; |
| 110 | int i; | 110 | int i; |
| @@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) | |||
| 112 | for (i = 0; i < MAX_RETRIES; i++) { | 112 | for (i = 0; i < MAX_RETRIES; i++) { |
| 113 | t1 = get_cycles(); | 113 | t1 = get_cycles(); |
| 114 | if (hpet) | 114 | if (hpet) |
| 115 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | 115 | *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; |
| 116 | else | 116 | else |
| 117 | *pm = acpi_pm_read_early(); | 117 | *p = acpi_pm_read_early(); |
| 118 | t2 = get_cycles(); | 118 | t2 = get_cycles(); |
| 119 | if ((t2 - t1) < SMI_TRESHOLD) | 119 | if ((t2 - t1) < SMI_TRESHOLD) |
| 120 | return t2; | 120 | return t2; |
| @@ -123,13 +123,59 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) | |||
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /* | 125 | /* |
| 126 | * Calculate the TSC frequency from HPET reference | ||
| 127 | */ | ||
| 128 | static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) | ||
| 129 | { | ||
| 130 | u64 tmp; | ||
| 131 | |||
| 132 | if (hpet2 < hpet1) | ||
| 133 | hpet2 += 0x100000000ULL; | ||
| 134 | hpet2 -= hpet1; | ||
| 135 | tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
| 136 | do_div(tmp, 1000000); | ||
| 137 | do_div(deltatsc, tmp); | ||
| 138 | |||
| 139 | return (unsigned long) deltatsc; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Calculate the TSC frequency from PMTimer reference | ||
| 144 | */ | ||
| 145 | static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | ||
| 146 | { | ||
| 147 | u64 tmp; | ||
| 148 | |||
| 149 | if (!pm1 && !pm2) | ||
| 150 | return ULONG_MAX; | ||
| 151 | |||
| 152 | if (pm2 < pm1) | ||
| 153 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
| 154 | pm2 -= pm1; | ||
| 155 | tmp = pm2 * 1000000000LL; | ||
| 156 | do_div(tmp, PMTMR_TICKS_PER_SEC); | ||
| 157 | do_div(deltatsc, tmp); | ||
| 158 | |||
| 159 | return (unsigned long) deltatsc; | ||
| 160 | } | ||
| 161 | |||
| 162 | #define CAL_MS 10 | ||
| 163 | #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) | ||
| 164 | #define CAL_PIT_LOOPS 1000 | ||
| 165 | |||
| 166 | #define CAL2_MS 50 | ||
| 167 | #define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) | ||
| 168 | #define CAL2_PIT_LOOPS 5000 | ||
| 169 | |||
| 170 | |||
| 171 | /* | ||
| 126 | * Try to calibrate the TSC against the Programmable | 172 | * Try to calibrate the TSC against the Programmable |
| 127 | * Interrupt Timer and return the frequency of the TSC | 173 | * Interrupt Timer and return the frequency of the TSC |
| 128 | * in kHz. | 174 | * in kHz. |
| 129 | * | 175 | * |
| 130 | * Return ULONG_MAX on failure to calibrate. | 176 | * Return ULONG_MAX on failure to calibrate. |
| 131 | */ | 177 | */ |
| 132 | static unsigned long pit_calibrate_tsc(void) | 178 | static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) |
| 133 | { | 179 | { |
| 134 | u64 tsc, t1, t2, delta; | 180 | u64 tsc, t1, t2, delta; |
| 135 | unsigned long tscmin, tscmax; | 181 | unsigned long tscmin, tscmax; |
| @@ -144,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void) | |||
| 144 | * (LSB then MSB) to begin countdown. | 190 | * (LSB then MSB) to begin countdown. |
| 145 | */ | 191 | */ |
| 146 | outb(0xb0, 0x43); | 192 | outb(0xb0, 0x43); |
| 147 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 193 | outb(latch & 0xff, 0x42); |
| 148 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 194 | outb(latch >> 8, 0x42); |
| 149 | 195 | ||
| 150 | tsc = t1 = t2 = get_cycles(); | 196 | tsc = t1 = t2 = get_cycles(); |
| 151 | 197 | ||
| @@ -166,31 +212,154 @@ static unsigned long pit_calibrate_tsc(void) | |||
| 166 | /* | 212 | /* |
| 167 | * Sanity checks: | 213 | * Sanity checks: |
| 168 | * | 214 | * |
| 169 | * If we were not able to read the PIT more than 5000 | 215 | * If we were not able to read the PIT more than loopmin |
| 170 | * times, then we have been hit by a massive SMI | 216 | * times, then we have been hit by a massive SMI |
| 171 | * | 217 | * |
| 172 | * If the maximum is 10 times larger than the minimum, | 218 | * If the maximum is 10 times larger than the minimum, |
| 173 | * then we got hit by an SMI as well. | 219 | * then we got hit by an SMI as well. |
| 174 | */ | 220 | */ |
| 175 | if (pitcnt < 5000 || tscmax > 10 * tscmin) | 221 | if (pitcnt < loopmin || tscmax > 10 * tscmin) |
| 176 | return ULONG_MAX; | 222 | return ULONG_MAX; |
| 177 | 223 | ||
| 178 | /* Calculate the PIT value */ | 224 | /* Calculate the PIT value */ |
| 179 | delta = t2 - t1; | 225 | delta = t2 - t1; |
| 180 | do_div(delta, 50); | 226 | do_div(delta, ms); |
| 181 | return delta; | 227 | return delta; |
| 182 | } | 228 | } |
| 183 | 229 | ||
| 230 | /* | ||
| 231 | * This reads the current MSB of the PIT counter, and | ||
| 232 | * checks if we are running on sufficiently fast and | ||
| 233 | * non-virtualized hardware. | ||
| 234 | * | ||
| 235 | * Our expectations are: | ||
| 236 | * | ||
| 237 | * - the PIT is running at roughly 1.19MHz | ||
| 238 | * | ||
| 239 | * - each IO is going to take about 1us on real hardware, | ||
| 240 | * but we allow it to be much faster (by a factor of 10) or | ||
| 241 | * _slightly_ slower (ie we allow up to a 2us read+counter | ||
| 242 | * update - anything else implies a unacceptably slow CPU | ||
| 243 | * or PIT for the fast calibration to work. | ||
| 244 | * | ||
| 245 | * - with 256 PIT ticks to read the value, we have 214us to | ||
| 246 | * see the same MSB (and overhead like doing a single TSC | ||
| 247 | * read per MSB value etc). | ||
| 248 | * | ||
| 249 | * - We're doing 2 reads per loop (LSB, MSB), and we expect | ||
| 250 | * them each to take about a microsecond on real hardware. | ||
| 251 | * So we expect a count value of around 100. But we'll be | ||
| 252 | * generous, and accept anything over 50. | ||
| 253 | * | ||
| 254 | * - if the PIT is stuck, and we see *many* more reads, we | ||
| 255 | * return early (and the next caller of pit_expect_msb() | ||
| 256 | * then consider it a failure when they don't see the | ||
| 257 | * next expected value). | ||
| 258 | * | ||
| 259 | * These expectations mean that we know that we have seen the | ||
| 260 | * transition from one expected value to another with a fairly | ||
| 261 | * high accuracy, and we didn't miss any events. We can thus | ||
| 262 | * use the TSC value at the transitions to calculate a pretty | ||
| 263 | * good value for the TSC frequencty. | ||
| 264 | */ | ||
| 265 | static inline int pit_expect_msb(unsigned char val) | ||
| 266 | { | ||
| 267 | int count = 0; | ||
| 268 | |||
| 269 | for (count = 0; count < 50000; count++) { | ||
| 270 | /* Ignore LSB */ | ||
| 271 | inb(0x42); | ||
| 272 | if (inb(0x42) != val) | ||
| 273 | break; | ||
| 274 | } | ||
| 275 | return count > 50; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * How many MSB values do we want to see? We aim for a | ||
| 280 | * 15ms calibration, which assuming a 2us counter read | ||
| 281 | * error should give us roughly 150 ppm precision for | ||
| 282 | * the calibration. | ||
| 283 | */ | ||
| 284 | #define QUICK_PIT_MS 15 | ||
| 285 | #define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | ||
| 286 | |||
| 287 | static unsigned long quick_pit_calibrate(void) | ||
| 288 | { | ||
| 289 | /* Set the Gate high, disable speaker */ | ||
| 290 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | ||
| 291 | |||
| 292 | /* | ||
| 293 | * Counter 2, mode 0 (one-shot), binary count | ||
| 294 | * | ||
| 295 | * NOTE! Mode 2 decrements by two (and then the | ||
| 296 | * output is flipped each time, giving the same | ||
| 297 | * final output frequency as a decrement-by-one), | ||
| 298 | * so mode 0 is much better when looking at the | ||
| 299 | * individual counts. | ||
| 300 | */ | ||
| 301 | outb(0xb0, 0x43); | ||
| 302 | |||
| 303 | /* Start at 0xffff */ | ||
| 304 | outb(0xff, 0x42); | ||
| 305 | outb(0xff, 0x42); | ||
| 306 | |||
| 307 | if (pit_expect_msb(0xff)) { | ||
| 308 | int i; | ||
| 309 | u64 t1, t2, delta; | ||
| 310 | unsigned char expect = 0xfe; | ||
| 311 | |||
| 312 | t1 = get_cycles(); | ||
| 313 | for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { | ||
| 314 | if (!pit_expect_msb(expect)) | ||
| 315 | goto failed; | ||
| 316 | } | ||
| 317 | t2 = get_cycles(); | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Make sure we can rely on the second TSC timestamp: | ||
| 321 | */ | ||
| 322 | if (!pit_expect_msb(expect)) | ||
| 323 | goto failed; | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Ok, if we get here, then we've seen the | ||
| 327 | * MSB of the PIT decrement QUICK_PIT_ITERATIONS | ||
| 328 | * times, and each MSB had many hits, so we never | ||
| 329 | * had any sudden jumps. | ||
| 330 | * | ||
| 331 | * As a result, we can depend on there not being | ||
| 332 | * any odd delays anywhere, and the TSC reads are | ||
| 333 | * reliable. | ||
| 334 | * | ||
| 335 | * kHz = ticks / time-in-seconds / 1000; | ||
| 336 | * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 | ||
| 337 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) | ||
| 338 | */ | ||
| 339 | delta = (t2 - t1)*PIT_TICK_RATE; | ||
| 340 | do_div(delta, QUICK_PIT_ITERATIONS*256*1000); | ||
| 341 | printk("Fast TSC calibration using PIT\n"); | ||
| 342 | return delta; | ||
| 343 | } | ||
| 344 | failed: | ||
| 345 | return 0; | ||
| 346 | } | ||
| 184 | 347 | ||
| 185 | /** | 348 | /** |
| 186 | * native_calibrate_tsc - calibrate the tsc on boot | 349 | * native_calibrate_tsc - calibrate the tsc on boot |
| 187 | */ | 350 | */ |
| 188 | unsigned long native_calibrate_tsc(void) | 351 | unsigned long native_calibrate_tsc(void) |
| 189 | { | 352 | { |
| 190 | u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; | 353 | u64 tsc1, tsc2, delta, ref1, ref2; |
| 191 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
| 192 | unsigned long flags; | 355 | unsigned long flags, latch, ms, fast_calibrate; |
| 193 | int hpet = is_hpet_enabled(), i; | 356 | int hpet = is_hpet_enabled(), i, loopmin; |
| 357 | |||
| 358 | local_irq_save(flags); | ||
| 359 | fast_calibrate = quick_pit_calibrate(); | ||
| 360 | local_irq_restore(flags); | ||
| 361 | if (fast_calibrate) | ||
| 362 | return fast_calibrate; | ||
| 194 | 363 | ||
| 195 | /* | 364 | /* |
| 196 | * Run 5 calibration loops to get the lowest frequency value | 365 | * Run 5 calibration loops to get the lowest frequency value |
| @@ -216,7 +385,13 @@ unsigned long native_calibrate_tsc(void) | |||
| 216 | * calibration delay loop as we have to wait for a certain | 385 | * calibration delay loop as we have to wait for a certain |
| 217 | * amount of time anyway. | 386 | * amount of time anyway. |
| 218 | */ | 387 | */ |
| 219 | for (i = 0; i < 5; i++) { | 388 | |
| 389 | /* Preset PIT loop values */ | ||
| 390 | latch = CAL_LATCH; | ||
| 391 | ms = CAL_MS; | ||
| 392 | loopmin = CAL_PIT_LOOPS; | ||
| 393 | |||
| 394 | for (i = 0; i < 3; i++) { | ||
| 220 | unsigned long tsc_pit_khz; | 395 | unsigned long tsc_pit_khz; |
| 221 | 396 | ||
| 222 | /* | 397 | /* |
| @@ -226,16 +401,16 @@ unsigned long native_calibrate_tsc(void) | |||
| 226 | * read the end value. | 401 | * read the end value. |
| 227 | */ | 402 | */ |
| 228 | local_irq_save(flags); | 403 | local_irq_save(flags); |
| 229 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | 404 | tsc1 = tsc_read_refs(&ref1, hpet); |
| 230 | tsc_pit_khz = pit_calibrate_tsc(); | 405 | tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); |
| 231 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | 406 | tsc2 = tsc_read_refs(&ref2, hpet); |
| 232 | local_irq_restore(flags); | 407 | local_irq_restore(flags); |
| 233 | 408 | ||
| 234 | /* Pick the lowest PIT TSC calibration so far */ | 409 | /* Pick the lowest PIT TSC calibration so far */ |
| 235 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | 410 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); |
| 236 | 411 | ||
| 237 | /* hpet or pmtimer available ? */ | 412 | /* hpet or pmtimer available ? */ |
| 238 | if (!hpet && !pm1 && !pm2) | 413 | if (!hpet && !ref1 && !ref2) |
| 239 | continue; | 414 | continue; |
| 240 | 415 | ||
| 241 | /* Check, whether the sampling was disturbed by an SMI */ | 416 | /* Check, whether the sampling was disturbed by an SMI */ |
| @@ -243,23 +418,41 @@ unsigned long native_calibrate_tsc(void) | |||
| 243 | continue; | 418 | continue; |
| 244 | 419 | ||
| 245 | tsc2 = (tsc2 - tsc1) * 1000000LL; | 420 | tsc2 = (tsc2 - tsc1) * 1000000LL; |
| 421 | if (hpet) | ||
| 422 | tsc2 = calc_hpet_ref(tsc2, ref1, ref2); | ||
| 423 | else | ||
| 424 | tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); | ||
| 246 | 425 | ||
| 247 | if (hpet) { | 426 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); |
| 248 | if (hpet2 < hpet1) | 427 | |
| 249 | hpet2 += 0x100000000ULL; | 428 | /* Check the reference deviation */ |
| 250 | hpet2 -= hpet1; | 429 | delta = ((u64) tsc_pit_min) * 100; |
| 251 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | 430 | do_div(delta, tsc_ref_min); |
| 252 | do_div(tsc1, 1000000); | 431 | |
| 253 | } else { | 432 | /* |
| 254 | if (pm2 < pm1) | 433 | * If both calibration results are inside a 10% window |
| 255 | pm2 += (u64)ACPI_PM_OVRRUN; | 434 | * then we can be sure, that the calibration |
| 256 | pm2 -= pm1; | 435 | * succeeded. We break out of the loop right away. We |
| 257 | tsc1 = pm2 * 1000000000LL; | 436 | * use the reference value, as it is more precise. |
| 258 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | 437 | */ |
| 438 | if (delta >= 90 && delta <= 110) { | ||
| 439 | printk(KERN_INFO | ||
| 440 | "TSC: PIT calibration matches %s. %d loops\n", | ||
| 441 | hpet ? "HPET" : "PMTIMER", i + 1); | ||
| 442 | return tsc_ref_min; | ||
| 259 | } | 443 | } |
| 260 | 444 | ||
| 261 | do_div(tsc2, tsc1); | 445 | /* |
| 262 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); | 446 | * Check whether PIT failed more than once. This |
| 447 | * happens in virtualized environments. We need to | ||
| 448 | * give the virtual PC a slightly longer timeframe for | ||
| 449 | * the HPET/PMTIMER to make the result precise. | ||
| 450 | */ | ||
| 451 | if (i == 1 && tsc_pit_min == ULONG_MAX) { | ||
| 452 | latch = CAL2_LATCH; | ||
| 453 | ms = CAL2_MS; | ||
| 454 | loopmin = CAL2_PIT_LOOPS; | ||
| 455 | } | ||
| 263 | } | 456 | } |
| 264 | 457 | ||
| 265 | /* | 458 | /* |
| @@ -267,11 +460,10 @@ unsigned long native_calibrate_tsc(void) | |||
| 267 | */ | 460 | */ |
| 268 | if (tsc_pit_min == ULONG_MAX) { | 461 | if (tsc_pit_min == ULONG_MAX) { |
| 269 | /* PIT gave no useful value */ | 462 | /* PIT gave no useful value */ |
| 270 | printk(KERN_WARNING "TSC: PIT calibration failed due to " | 463 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); |
| 271 | "SMI disturbance.\n"); | ||
| 272 | 464 | ||
| 273 | /* We don't have an alternative source, disable TSC */ | 465 | /* We don't have an alternative source, disable TSC */ |
| 274 | if (!hpet && !pm1 && !pm2) { | 466 | if (!hpet && !ref1 && !ref2) { |
| 275 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | 467 | printk("TSC: No reference (HPET/PMTIMER) available\n"); |
| 276 | return 0; | 468 | return 0; |
| 277 | } | 469 | } |
| @@ -279,7 +471,7 @@ unsigned long native_calibrate_tsc(void) | |||
| 279 | /* The alternative source failed as well, disable TSC */ | 471 | /* The alternative source failed as well, disable TSC */ |
| 280 | if (tsc_ref_min == ULONG_MAX) { | 472 | if (tsc_ref_min == ULONG_MAX) { |
| 281 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | 473 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " |
| 282 | "failed due to SMI disturbance.\n"); | 474 | "failed.\n"); |
| 283 | return 0; | 475 | return 0; |
| 284 | } | 476 | } |
| 285 | 477 | ||
| @@ -291,44 +483,25 @@ unsigned long native_calibrate_tsc(void) | |||
| 291 | } | 483 | } |
| 292 | 484 | ||
| 293 | /* We don't have an alternative source, use the PIT calibration value */ | 485 | /* We don't have an alternative source, use the PIT calibration value */ |
| 294 | if (!hpet && !pm1 && !pm2) { | 486 | if (!hpet && !ref1 && !ref2) { |
| 295 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 487 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); |
| 296 | return tsc_pit_min; | 488 | return tsc_pit_min; |
| 297 | } | 489 | } |
| 298 | 490 | ||
| 299 | /* The alternative source failed, use the PIT calibration value */ | 491 | /* The alternative source failed, use the PIT calibration value */ |
| 300 | if (tsc_ref_min == ULONG_MAX) { | 492 | if (tsc_ref_min == ULONG_MAX) { |
| 301 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " | 493 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " |
| 302 | "to SMI disturbance. Using PIT calibration\n"); | 494 | "Using PIT calibration\n"); |
| 303 | return tsc_pit_min; | 495 | return tsc_pit_min; |
| 304 | } | 496 | } |
| 305 | 497 | ||
| 306 | /* Check the reference deviation */ | ||
| 307 | delta = ((u64) tsc_pit_min) * 100; | ||
| 308 | do_div(delta, tsc_ref_min); | ||
| 309 | |||
| 310 | /* | ||
| 311 | * If both calibration results are inside a 5% window, the we | ||
| 312 | * use the lower frequency of those as it is probably the | ||
| 313 | * closest estimate. | ||
| 314 | */ | ||
| 315 | if (delta >= 95 && delta <= 105) { | ||
| 316 | printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", | ||
| 317 | hpet ? "HPET" : "PMTIMER"); | ||
| 318 | printk(KERN_INFO "TSC: using %s calibration value\n", | ||
| 319 | tsc_pit_min <= tsc_ref_min ? "PIT" : | ||
| 320 | hpet ? "HPET" : "PMTIMER"); | ||
| 321 | return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; | ||
| 322 | } | ||
| 323 | |||
| 324 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | ||
| 325 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | ||
| 326 | |||
| 327 | /* | 498 | /* |
| 328 | * The calibration values differ too much. In doubt, we use | 499 | * The calibration values differ too much. In doubt, we use |
| 329 | * the PIT value as we know that there are PMTIMERs around | 500 | * the PIT value as we know that there are PMTIMERs around |
| 330 | * running at double speed. | 501 | * running at double speed. At least we let the user know: |
| 331 | */ | 502 | */ |
| 503 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | ||
| 504 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | ||
| 332 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 505 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); |
| 333 | return tsc_pit_min; | 506 | return tsc_pit_min; |
| 334 | } | 507 | } |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 594ef47f0a63..61a97e616f70 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
| @@ -25,45 +25,31 @@ | |||
| 25 | #include <asm/visws/cobalt.h> | 25 | #include <asm/visws/cobalt.h> |
| 26 | #include <asm/visws/piix4.h> | 26 | #include <asm/visws/piix4.h> |
| 27 | #include <asm/arch_hooks.h> | 27 | #include <asm/arch_hooks.h> |
| 28 | #include <asm/io_apic.h> | ||
| 28 | #include <asm/fixmap.h> | 29 | #include <asm/fixmap.h> |
| 29 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
| 30 | #include <asm/setup.h> | 31 | #include <asm/setup.h> |
| 31 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
| 32 | #include <asm/smp.h> | ||
| 33 | #include <asm/io.h> | 33 | #include <asm/io.h> |
| 34 | 34 | ||
| 35 | #include <mach_ipi.h> | 35 | #include <mach_ipi.h> |
| 36 | 36 | ||
| 37 | #include "mach_apic.h" | 37 | #include "mach_apic.h" |
| 38 | 38 | ||
| 39 | #include <linux/init.h> | ||
| 40 | #include <linux/smp.h> | ||
| 41 | |||
| 42 | #include <linux/kernel_stat.h> | 39 | #include <linux/kernel_stat.h> |
| 43 | #include <linux/interrupt.h> | ||
| 44 | #include <linux/init.h> | ||
| 45 | 40 | ||
| 46 | #include <asm/io.h> | ||
| 47 | #include <asm/apic.h> | ||
| 48 | #include <asm/i8259.h> | 41 | #include <asm/i8259.h> |
| 49 | #include <asm/irq_vectors.h> | 42 | #include <asm/irq_vectors.h> |
| 50 | #include <asm/visws/cobalt.h> | ||
| 51 | #include <asm/visws/lithium.h> | 43 | #include <asm/visws/lithium.h> |
| 52 | #include <asm/visws/piix4.h> | ||
| 53 | 44 | ||
| 54 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
| 55 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
| 56 | #include <linux/init.h> | ||
| 57 | #include <linux/pci.h> | 47 | #include <linux/pci.h> |
| 58 | #include <linux/pci_ids.h> | 48 | #include <linux/pci_ids.h> |
| 59 | 49 | ||
| 60 | extern int no_broadcast; | 50 | extern int no_broadcast; |
| 61 | 51 | ||
| 62 | #include <asm/io.h> | ||
| 63 | #include <asm/apic.h> | 52 | #include <asm/apic.h> |
| 64 | #include <asm/arch_hooks.h> | ||
| 65 | #include <asm/visws/cobalt.h> | ||
| 66 | #include <asm/visws/lithium.h> | ||
| 67 | 53 | ||
| 68 | char visws_board_type = -1; | 54 | char visws_board_type = -1; |
| 69 | char visws_board_rev = -1; | 55 | char visws_board_rev = -1; |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 61531d5c9507..8b6c393ab9fd 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
| @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | |||
| 235 | const void *desc) | 235 | const void *desc) |
| 236 | { | 236 | { |
| 237 | u32 *ldt_entry = (u32 *)desc; | 237 | u32 *ldt_entry = (u32 *)desc; |
| 238 | vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | 238 | vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | static void vmi_load_sp0(struct tss_struct *tss, | 241 | static void vmi_load_sp0(struct tss_struct *tss, |
| @@ -393,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
| 393 | } | 393 | } |
| 394 | #endif | 394 | #endif |
| 395 | 395 | ||
| 396 | static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) | 396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
| 397 | { | 397 | { |
| 398 | vmi_set_page_type(pfn, VMI_PAGE_L1); | 398 | vmi_set_page_type(pfn, VMI_PAGE_L1); |
| 399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
| 400 | } | 400 | } |
| 401 | 401 | ||
| 402 | static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) | 402 | static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) |
| 403 | { | 403 | { |
| 404 | /* | 404 | /* |
| 405 | * This call comes in very early, before mem_map is setup. | 405 | * This call comes in very early, before mem_map is setup. |
| @@ -410,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) | |||
| 410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
| 411 | } | 411 | } |
| 412 | 412 | ||
| 413 | static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | 413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
| 414 | { | 414 | { |
| 415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | 415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); |
| 416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | 416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); |
| 417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
| 418 | } | 418 | } |
| 419 | 419 | ||
| 420 | static void vmi_release_pte(u32 pfn) | 420 | static void vmi_release_pte(unsigned long pfn) |
| 421 | { | 421 | { |
| 422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
| 423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
| 424 | } | 424 | } |
| 425 | 425 | ||
| 426 | static void vmi_release_pmd(u32 pfn) | 426 | static void vmi_release_pmd(unsigned long pfn) |
| 427 | { | 427 | { |
| 428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
| 429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c7..7766d36983fc 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
| @@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) | |||
| 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, | 64 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, |
| 65 | unsigned long addr, unsigned len) | 65 | unsigned long addr, unsigned len) |
| 66 | { | 66 | { |
| 67 | switch (type) { | 67 | switch (type) { |
