diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-09-04 08:44:16 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-04 08:44:16 -0400 |
commit | 695a461296e5df148c99ac087b9e1cb380f4db15 (patch) | |
tree | 951893036fdc0b7bae0e17bc739ac8ffe909781d /arch/x86/kernel | |
parent | c7084b35eb1a4d3353a501508baf9d3d82822c93 (diff) | |
parent | 2b681fafcc50fea6304ed418667c9d04282acb73 (diff) |
Merge branch 'amd-iommu/2.6.32' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu into core/iommu
Diffstat (limited to 'arch/x86/kernel')
26 files changed, 671 insertions, 342 deletions
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..98f230f6a28d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
41 | static LIST_HEAD(iommu_pd_list); | 41 | static LIST_HEAD(iommu_pd_list); |
42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
43 | 43 | ||
44 | #ifdef CONFIG_IOMMU_API | 44 | /* |
45 | * Domain for untranslated devices - only allocated | ||
46 | * if iommu=pt passed on kernel cmd line. | ||
47 | */ | ||
48 | static struct protection_domain *pt_domain; | ||
49 | |||
45 | static struct iommu_ops amd_iommu_ops; | 50 | static struct iommu_ops amd_iommu_ops; |
46 | #endif | ||
47 | 51 | ||
48 | /* | 52 | /* |
49 | * general struct to manage commands send to an IOMMU | 53 | * general struct to manage commands send to an IOMMU |
@@ -55,16 +59,16 @@ struct iommu_cmd { | |||
55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
56 | struct unity_map_entry *e); | 60 | struct unity_map_entry *e); |
57 | static struct dma_ops_domain *find_protection_domain(u16 devid); | 61 | static struct dma_ops_domain *find_protection_domain(u16 devid); |
58 | static u64* alloc_pte(struct protection_domain *dom, | 62 | static u64 *alloc_pte(struct protection_domain *domain, |
59 | unsigned long address, u64 | 63 | unsigned long address, int end_lvl, |
60 | **pte_page, gfp_t gfp); | 64 | u64 **pte_page, gfp_t gfp); |
61 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
62 | unsigned long start_page, | 66 | unsigned long start_page, |
63 | unsigned int pages); | 67 | unsigned int pages); |
64 | 68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | |
65 | #ifndef BUS_NOTIFY_UNBOUND_DRIVER | 69 | static u64 *fetch_pte(struct protection_domain *domain, |
66 | #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 | 70 | unsigned long address, int map_size); |
67 | #endif | 71 | static void update_domain(struct protection_domain *domain); |
68 | 72 | ||
69 | #ifdef CONFIG_AMD_IOMMU_STATS | 73 | #ifdef CONFIG_AMD_IOMMU_STATS |
70 | 74 | ||
@@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
138 | * | 142 | * |
139 | ****************************************************************************/ | 143 | ****************************************************************************/ |
140 | 144 | ||
141 | static void iommu_print_event(void *__evt) | 145 | static void dump_dte_entry(u16 devid) |
146 | { | ||
147 | int i; | ||
148 | |||
149 | for (i = 0; i < 8; ++i) | ||
150 | pr_err("AMD-Vi: DTE[%d]: %08x\n", i, | ||
151 | amd_iommu_dev_table[devid].data[i]); | ||
152 | } | ||
153 | |||
154 | static void dump_command(unsigned long phys_addr) | ||
155 | { | ||
156 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | ||
157 | int i; | ||
158 | |||
159 | for (i = 0; i < 4; ++i) | ||
160 | pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); | ||
161 | } | ||
162 | |||
163 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | ||
142 | { | 164 | { |
143 | u32 *event = __evt; | 165 | u32 *event = __evt; |
144 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | 166 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; |
@@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt) | |||
147 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | 169 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
148 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | 170 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; |
149 | 171 | ||
150 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | 172 | printk(KERN_ERR "AMD-Vi: Event logged ["); |
151 | 173 | ||
152 | switch (type) { | 174 | switch (type) { |
153 | case EVENT_TYPE_ILL_DEV: | 175 | case EVENT_TYPE_ILL_DEV: |
@@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt) | |||
155 | "address=0x%016llx flags=0x%04x]\n", | 177 | "address=0x%016llx flags=0x%04x]\n", |
156 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | 178 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
157 | address, flags); | 179 | address, flags); |
180 | dump_dte_entry(devid); | ||
158 | break; | 181 | break; |
159 | case EVENT_TYPE_IO_FAULT: | 182 | case EVENT_TYPE_IO_FAULT: |
160 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | 183 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " |
@@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt) | |||
176 | break; | 199 | break; |
177 | case EVENT_TYPE_ILL_CMD: | 200 | case EVENT_TYPE_ILL_CMD: |
178 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
202 | reset_iommu_command_buffer(iommu); | ||
203 | dump_command(address); | ||
179 | break; | 204 | break; |
180 | case EVENT_TYPE_CMD_HARD_ERR: | 205 | case EVENT_TYPE_CMD_HARD_ERR: |
181 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | 206 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " |
@@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
209 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | 234 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); |
210 | 235 | ||
211 | while (head != tail) { | 236 | while (head != tail) { |
212 | iommu_print_event(iommu->evt_buf + head); | 237 | iommu_print_event(iommu, iommu->evt_buf + head); |
213 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | 238 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; |
214 | } | 239 | } |
215 | 240 | ||
@@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
296 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
297 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
298 | 323 | ||
299 | if (unlikely(i == EXIT_LOOP_COUNT)) | 324 | if (unlikely(i == EXIT_LOOP_COUNT)) { |
300 | panic("AMD IOMMU: Completion wait loop failed\n"); | 325 | spin_unlock(&iommu->lock); |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
301 | } | 329 | } |
302 | 330 | ||
303 | /* | 331 | /* |
@@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | |||
445 | } | 473 | } |
446 | 474 | ||
447 | /* | 475 | /* |
476 | * This function flushes one domain on one IOMMU | ||
477 | */ | ||
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | ||
479 | { | ||
480 | struct iommu_cmd cmd; | ||
481 | unsigned long flags; | ||
482 | |||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
484 | domid, 1, 1); | ||
485 | |||
486 | spin_lock_irqsave(&iommu->lock, flags); | ||
487 | __iommu_queue_command(iommu, &cmd); | ||
488 | __iommu_completion_wait(iommu); | ||
489 | __iommu_wait_for_completion(iommu); | ||
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | ||
492 | |||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | ||
494 | { | ||
495 | int i; | ||
496 | |||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | ||
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | ||
499 | continue; | ||
500 | flush_domain_on_iommu(iommu, i); | ||
501 | } | ||
502 | |||
503 | } | ||
504 | |||
505 | /* | ||
448 | * This function is used to flush the IO/TLB for a given protection domain | 506 | * This function is used to flush the IO/TLB for a given protection domain |
449 | * on every IOMMU in the system | 507 | * on every IOMMU in the system |
450 | */ | 508 | */ |
451 | static void iommu_flush_domain(u16 domid) | 509 | static void iommu_flush_domain(u16 domid) |
452 | { | 510 | { |
453 | unsigned long flags; | ||
454 | struct amd_iommu *iommu; | 511 | struct amd_iommu *iommu; |
455 | struct iommu_cmd cmd; | ||
456 | 512 | ||
457 | INC_STATS_COUNTER(domain_flush_all); | 513 | INC_STATS_COUNTER(domain_flush_all); |
458 | 514 | ||
459 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 515 | for_each_iommu(iommu) |
460 | domid, 1, 1); | 516 | flush_domain_on_iommu(iommu, domid); |
461 | |||
462 | for_each_iommu(iommu) { | ||
463 | spin_lock_irqsave(&iommu->lock, flags); | ||
464 | __iommu_queue_command(iommu, &cmd); | ||
465 | __iommu_completion_wait(iommu); | ||
466 | __iommu_wait_for_completion(iommu); | ||
467 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
468 | } | ||
469 | } | 517 | } |
470 | 518 | ||
471 | void amd_iommu_flush_all_domains(void) | 519 | void amd_iommu_flush_all_domains(void) |
472 | { | 520 | { |
521 | struct amd_iommu *iommu; | ||
522 | |||
523 | for_each_iommu(iommu) | ||
524 | flush_all_domains_on_iommu(iommu); | ||
525 | } | ||
526 | |||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | ||
528 | { | ||
473 | int i; | 529 | int i; |
474 | 530 | ||
475 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
476 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 532 | if (iommu != amd_iommu_rlookup_table[i]) |
477 | continue; | 533 | continue; |
478 | iommu_flush_domain(i); | 534 | |
535 | iommu_queue_inv_dev_entry(iommu, i); | ||
536 | iommu_completion_wait(iommu); | ||
479 | } | 537 | } |
480 | } | 538 | } |
481 | 539 | ||
482 | void amd_iommu_flush_all_devices(void) | 540 | static void flush_devices_by_domain(struct protection_domain *domain) |
483 | { | 541 | { |
484 | struct amd_iommu *iommu; | 542 | struct amd_iommu *iommu; |
485 | int i; | 543 | int i; |
486 | 544 | ||
487 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
488 | if (amd_iommu_pd_table[i] == NULL) | 546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || |
547 | (amd_iommu_pd_table[i] != domain)) | ||
489 | continue; | 548 | continue; |
490 | 549 | ||
491 | iommu = amd_iommu_rlookup_table[i]; | 550 | iommu = amd_iommu_rlookup_table[i]; |
@@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void) | |||
497 | } | 556 | } |
498 | } | 557 | } |
499 | 558 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
560 | { | ||
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
562 | |||
563 | if (iommu->reset_in_progress) | ||
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
565 | |||
566 | iommu->reset_in_progress = true; | ||
567 | |||
568 | amd_iommu_reset_cmd_buffer(iommu); | ||
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | |||
572 | iommu->reset_in_progress = false; | ||
573 | } | ||
574 | |||
575 | void amd_iommu_flush_all_devices(void) | ||
576 | { | ||
577 | flush_devices_by_domain(NULL); | ||
578 | } | ||
579 | |||
500 | /**************************************************************************** | 580 | /**************************************************************************** |
501 | * | 581 | * |
502 | * The functions below are used the create the page table mappings for | 582 | * The functions below are used the create the page table mappings for |
@@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void) | |||
514 | static int iommu_map_page(struct protection_domain *dom, | 594 | static int iommu_map_page(struct protection_domain *dom, |
515 | unsigned long bus_addr, | 595 | unsigned long bus_addr, |
516 | unsigned long phys_addr, | 596 | unsigned long phys_addr, |
517 | int prot) | 597 | int prot, |
598 | int map_size) | ||
518 | { | 599 | { |
519 | u64 __pte, *pte; | 600 | u64 __pte, *pte; |
520 | 601 | ||
521 | bus_addr = PAGE_ALIGN(bus_addr); | 602 | bus_addr = PAGE_ALIGN(bus_addr); |
522 | phys_addr = PAGE_ALIGN(phys_addr); | 603 | phys_addr = PAGE_ALIGN(phys_addr); |
523 | 604 | ||
524 | /* only support 512GB address spaces for now */ | 605 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); |
525 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 606 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); |
607 | |||
608 | if (!(prot & IOMMU_PROT_MASK)) | ||
526 | return -EINVAL; | 609 | return -EINVAL; |
527 | 610 | ||
528 | pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); | 611 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); |
529 | 612 | ||
530 | if (IOMMU_PTE_PRESENT(*pte)) | 613 | if (IOMMU_PTE_PRESENT(*pte)) |
531 | return -EBUSY; | 614 | return -EBUSY; |
@@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom, | |||
538 | 621 | ||
539 | *pte = __pte; | 622 | *pte = __pte; |
540 | 623 | ||
624 | update_domain(dom); | ||
625 | |||
541 | return 0; | 626 | return 0; |
542 | } | 627 | } |
543 | 628 | ||
544 | static void iommu_unmap_page(struct protection_domain *dom, | 629 | static void iommu_unmap_page(struct protection_domain *dom, |
545 | unsigned long bus_addr) | 630 | unsigned long bus_addr, int map_size) |
546 | { | 631 | { |
547 | u64 *pte; | 632 | u64 *pte = fetch_pte(dom, bus_addr, map_size); |
548 | |||
549 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
550 | |||
551 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
552 | return; | ||
553 | |||
554 | pte = IOMMU_PTE_PAGE(*pte); | ||
555 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
556 | 633 | ||
557 | if (!IOMMU_PTE_PRESENT(*pte)) | 634 | if (pte) |
558 | return; | 635 | *pte = 0; |
559 | |||
560 | pte = IOMMU_PTE_PAGE(*pte); | ||
561 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
562 | |||
563 | *pte = 0; | ||
564 | } | 636 | } |
565 | 637 | ||
566 | /* | 638 | /* |
@@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
615 | 687 | ||
616 | for (addr = e->address_start; addr < e->address_end; | 688 | for (addr = e->address_start; addr < e->address_end; |
617 | addr += PAGE_SIZE) { | 689 | addr += PAGE_SIZE) { |
618 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); | 690 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
691 | PM_MAP_4k); | ||
619 | if (ret) | 692 | if (ret) |
620 | return ret; | 693 | return ret; |
621 | /* | 694 | /* |
@@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
670 | * This function checks if there is a PTE for a given dma address. If | 743 | * This function checks if there is a PTE for a given dma address. If |
671 | * there is one, it returns the pointer to it. | 744 | * there is one, it returns the pointer to it. |
672 | */ | 745 | */ |
673 | static u64* fetch_pte(struct protection_domain *domain, | 746 | static u64 *fetch_pte(struct protection_domain *domain, |
674 | unsigned long address) | 747 | unsigned long address, int map_size) |
675 | { | 748 | { |
749 | int level; | ||
676 | u64 *pte; | 750 | u64 *pte; |
677 | 751 | ||
678 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 752 | level = domain->mode - 1; |
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
679 | 754 | ||
680 | if (!IOMMU_PTE_PRESENT(*pte)) | 755 | while (level > map_size) { |
681 | return NULL; | 756 | if (!IOMMU_PTE_PRESENT(*pte)) |
757 | return NULL; | ||
682 | 758 | ||
683 | pte = IOMMU_PTE_PAGE(*pte); | 759 | level -= 1; |
684 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
685 | 760 | ||
686 | if (!IOMMU_PTE_PRESENT(*pte)) | 761 | pte = IOMMU_PTE_PAGE(*pte); |
687 | return NULL; | 762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
688 | 763 | ||
689 | pte = IOMMU_PTE_PAGE(*pte); | 764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
690 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 765 | pte = NULL; |
766 | break; | ||
767 | } | ||
768 | } | ||
691 | 769 | ||
692 | return pte; | 770 | return pte; |
693 | } | 771 | } |
@@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
727 | u64 *pte, *pte_page; | 805 | u64 *pte, *pte_page; |
728 | 806 | ||
729 | for (i = 0; i < num_ptes; ++i) { | 807 | for (i = 0; i < num_ptes; ++i) { |
730 | pte = alloc_pte(&dma_dom->domain, address, | 808 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, |
731 | &pte_page, gfp); | 809 | &pte_page, gfp); |
732 | if (!pte) | 810 | if (!pte) |
733 | goto out_free; | 811 | goto out_free; |
@@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
760 | for (i = dma_dom->aperture[index]->offset; | 838 | for (i = dma_dom->aperture[index]->offset; |
761 | i < dma_dom->aperture_size; | 839 | i < dma_dom->aperture_size; |
762 | i += PAGE_SIZE) { | 840 | i += PAGE_SIZE) { |
763 | u64 *pte = fetch_pte(&dma_dom->domain, i); | 841 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); |
764 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 842 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
765 | continue; | 843 | continue; |
766 | 844 | ||
767 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | 845 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); |
768 | } | 846 | } |
769 | 847 | ||
848 | update_domain(&dma_dom->domain); | ||
849 | |||
770 | return 0; | 850 | return 0; |
771 | 851 | ||
772 | out_free: | 852 | out_free: |
853 | update_domain(&dma_dom->domain); | ||
854 | |||
773 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | 855 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); |
774 | 856 | ||
775 | kfree(dma_dom->aperture[index]); | 857 | kfree(dma_dom->aperture[index]); |
@@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1009 | dma_dom->domain.id = domain_id_alloc(); | 1091 | dma_dom->domain.id = domain_id_alloc(); |
1010 | if (dma_dom->domain.id == 0) | 1092 | if (dma_dom->domain.id == 0) |
1011 | goto free_dma_dom; | 1093 | goto free_dma_dom; |
1012 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1013 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1014 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
1015 | dma_dom->domain.priv = dma_dom; | 1097 | dma_dom->domain.priv = dma_dom; |
@@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
1063 | return dom; | 1145 | return dom; |
1064 | } | 1146 | } |
1065 | 1147 | ||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | ||
1149 | { | ||
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1151 | |||
1152 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1153 | << DEV_ENTRY_MODE_SHIFT; | ||
1154 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1155 | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1159 | |||
1160 | amd_iommu_pd_table[devid] = domain; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * If a device is not yet associated with a domain, this function does | ||
1165 | * assigns it visible for the hardware | ||
1166 | */ | ||
1167 | static void __attach_device(struct amd_iommu *iommu, | ||
1168 | struct protection_domain *domain, | ||
1169 | u16 devid) | ||
1170 | { | ||
1171 | /* lock domain */ | ||
1172 | spin_lock(&domain->lock); | ||
1173 | |||
1174 | /* update DTE entry */ | ||
1175 | set_dte_entry(devid, domain); | ||
1176 | |||
1177 | domain->dev_cnt += 1; | ||
1178 | |||
1179 | /* ready */ | ||
1180 | spin_unlock(&domain->lock); | ||
1181 | } | ||
1182 | |||
1066 | /* | 1183 | /* |
1067 | * If a device is not yet associated with a domain, this function does | 1184 | * If a device is not yet associated with a domain, this function does |
1068 | * assigns it visible for the hardware | 1185 | * assigns it visible for the hardware |
@@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu, | |||
1072 | u16 devid) | 1189 | u16 devid) |
1073 | { | 1190 | { |
1074 | unsigned long flags; | 1191 | unsigned long flags; |
1075 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1076 | |||
1077 | domain->dev_cnt += 1; | ||
1078 | |||
1079 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1080 | << DEV_ENTRY_MODE_SHIFT; | ||
1081 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1082 | 1192 | ||
1083 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1084 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1194 | __attach_device(iommu, domain, devid); |
1085 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1086 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1087 | |||
1088 | amd_iommu_pd_table[devid] = domain; | ||
1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1090 | 1196 | ||
1091 | /* | 1197 | /* |
1092 | * We might boot into a crash-kernel here. The crashed kernel | 1198 | * We might boot into a crash-kernel here. The crashed kernel |
1093 | * left the caches in the IOMMU dirty. So we have to flush | 1199 | * left the caches in the IOMMU dirty. So we have to flush |
1094 | * here to evict all dirty stuff. | 1200 | * here to evict all dirty stuff. |
1095 | */ | 1201 | */ |
1096 | iommu_queue_inv_dev_entry(iommu, devid); | 1202 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | 1203 | iommu_flush_tlb_pde(iommu, domain->id); |
1098 | } | 1204 | } |
@@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) | |||
1119 | 1225 | ||
1120 | /* ready */ | 1226 | /* ready */ |
1121 | spin_unlock(&domain->lock); | 1227 | spin_unlock(&domain->lock); |
1228 | |||
1229 | /* | ||
1230 | * If we run in passthrough mode the device must be assigned to the | ||
1231 | * passthrough domain if it is detached from any other domain | ||
1232 | */ | ||
1233 | if (iommu_pass_through) { | ||
1234 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | ||
1235 | __attach_device(iommu, pt_domain, devid); | ||
1236 | } | ||
1122 | } | 1237 | } |
1123 | 1238 | ||
1124 | /* | 1239 | /* |
@@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1164 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1279 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1165 | if (!domain) | 1280 | if (!domain) |
1166 | goto out; | 1281 | goto out; |
1282 | if (iommu_pass_through) | ||
1283 | break; | ||
1167 | detach_device(domain, devid); | 1284 | detach_device(domain, devid); |
1168 | break; | 1285 | break; |
1169 | case BUS_NOTIFY_ADD_DEVICE: | 1286 | case BUS_NOTIFY_ADD_DEVICE: |
@@ -1292,39 +1409,91 @@ static int get_device_resources(struct device *dev, | |||
1292 | return 1; | 1409 | return 1; |
1293 | } | 1410 | } |
1294 | 1411 | ||
1412 | static void update_device_table(struct protection_domain *domain) | ||
1413 | { | ||
1414 | unsigned long flags; | ||
1415 | int i; | ||
1416 | |||
1417 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | ||
1418 | if (amd_iommu_pd_table[i] != domain) | ||
1419 | continue; | ||
1420 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1421 | set_dte_entry(i, domain); | ||
1422 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | static void update_domain(struct protection_domain *domain) | ||
1427 | { | ||
1428 | if (!domain->updated) | ||
1429 | return; | ||
1430 | |||
1431 | update_device_table(domain); | ||
1432 | flush_devices_by_domain(domain); | ||
1433 | iommu_flush_domain(domain->id); | ||
1434 | |||
1435 | domain->updated = false; | ||
1436 | } | ||
1437 | |||
1295 | /* | 1438 | /* |
1296 | * If the pte_page is not yet allocated this function is called | 1439 | * This function is used to add another level to an IO page table. Adding |
1440 | * another level increases the size of the address space by 9 bits to a size up | ||
1441 | * to 64 bits. | ||
1297 | */ | 1442 | */ |
1298 | static u64* alloc_pte(struct protection_domain *dom, | 1443 | static bool increase_address_space(struct protection_domain *domain, |
1299 | unsigned long address, u64 **pte_page, gfp_t gfp) | 1444 | gfp_t gfp) |
1445 | { | ||
1446 | u64 *pte; | ||
1447 | |||
1448 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1449 | /* address space already 64 bit large */ | ||
1450 | return false; | ||
1451 | |||
1452 | pte = (void *)get_zeroed_page(gfp); | ||
1453 | if (!pte) | ||
1454 | return false; | ||
1455 | |||
1456 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1457 | virt_to_phys(domain->pt_root)); | ||
1458 | domain->pt_root = pte; | ||
1459 | domain->mode += 1; | ||
1460 | domain->updated = true; | ||
1461 | |||
1462 | return true; | ||
1463 | } | ||
1464 | |||
1465 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1466 | unsigned long address, | ||
1467 | int end_lvl, | ||
1468 | u64 **pte_page, | ||
1469 | gfp_t gfp) | ||
1300 | { | 1470 | { |
1301 | u64 *pte, *page; | 1471 | u64 *pte, *page; |
1472 | int level; | ||
1302 | 1473 | ||
1303 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 1474 | while (address > PM_LEVEL_SIZE(domain->mode)) |
1475 | increase_address_space(domain, gfp); | ||
1304 | 1476 | ||
1305 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1477 | level = domain->mode - 1; |
1306 | page = (u64 *)get_zeroed_page(gfp); | 1478 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
1307 | if (!page) | ||
1308 | return NULL; | ||
1309 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
1310 | } | ||
1311 | 1479 | ||
1312 | pte = IOMMU_PTE_PAGE(*pte); | 1480 | while (level > end_lvl) { |
1313 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | 1481 | if (!IOMMU_PTE_PRESENT(*pte)) { |
1482 | page = (u64 *)get_zeroed_page(gfp); | ||
1483 | if (!page) | ||
1484 | return NULL; | ||
1485 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1486 | } | ||
1314 | 1487 | ||
1315 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1488 | level -= 1; |
1316 | page = (u64 *)get_zeroed_page(gfp); | ||
1317 | if (!page) | ||
1318 | return NULL; | ||
1319 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
1320 | } | ||
1321 | 1489 | ||
1322 | pte = IOMMU_PTE_PAGE(*pte); | 1490 | pte = IOMMU_PTE_PAGE(*pte); |
1323 | 1491 | ||
1324 | if (pte_page) | 1492 | if (pte_page && level == end_lvl) |
1325 | *pte_page = pte; | 1493 | *pte_page = pte; |
1326 | 1494 | ||
1327 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 1495 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
1496 | } | ||
1328 | 1497 | ||
1329 | return pte; | 1498 | return pte; |
1330 | } | 1499 | } |
@@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1344 | 1513 | ||
1345 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1514 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
1346 | if (!pte) { | 1515 | if (!pte) { |
1347 | pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); | 1516 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, |
1517 | GFP_ATOMIC); | ||
1348 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1518 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
1349 | } else | 1519 | } else |
1350 | pte += IOMMU_PTE_L0_INDEX(address); | 1520 | pte += PM_LEVEL_INDEX(0, address); |
1521 | |||
1522 | update_domain(&dom->domain); | ||
1351 | 1523 | ||
1352 | return pte; | 1524 | return pte; |
1353 | } | 1525 | } |
@@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1409 | if (!pte) | 1581 | if (!pte) |
1410 | return; | 1582 | return; |
1411 | 1583 | ||
1412 | pte += IOMMU_PTE_L0_INDEX(address); | 1584 | pte += PM_LEVEL_INDEX(0, address); |
1413 | 1585 | ||
1414 | WARN_ON(!*pte); | 1586 | WARN_ON(!*pte); |
1415 | 1587 | ||
@@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain) | |||
1988 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2160 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1989 | } | 2161 | } |
1990 | 2162 | ||
1991 | static int amd_iommu_domain_init(struct iommu_domain *dom) | 2163 | static void protection_domain_free(struct protection_domain *domain) |
2164 | { | ||
2165 | if (!domain) | ||
2166 | return; | ||
2167 | |||
2168 | if (domain->id) | ||
2169 | domain_id_free(domain->id); | ||
2170 | |||
2171 | kfree(domain); | ||
2172 | } | ||
2173 | |||
2174 | static struct protection_domain *protection_domain_alloc(void) | ||
1992 | { | 2175 | { |
1993 | struct protection_domain *domain; | 2176 | struct protection_domain *domain; |
1994 | 2177 | ||
1995 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 2178 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
1996 | if (!domain) | 2179 | if (!domain) |
1997 | return -ENOMEM; | 2180 | return NULL; |
1998 | 2181 | ||
1999 | spin_lock_init(&domain->lock); | 2182 | spin_lock_init(&domain->lock); |
2000 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2001 | domain->id = domain_id_alloc(); | 2183 | domain->id = domain_id_alloc(); |
2002 | if (!domain->id) | 2184 | if (!domain->id) |
2185 | goto out_err; | ||
2186 | |||
2187 | return domain; | ||
2188 | |||
2189 | out_err: | ||
2190 | kfree(domain); | ||
2191 | |||
2192 | return NULL; | ||
2193 | } | ||
2194 | |||
2195 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
2196 | { | ||
2197 | struct protection_domain *domain; | ||
2198 | |||
2199 | domain = protection_domain_alloc(); | ||
2200 | if (!domain) | ||
2003 | goto out_free; | 2201 | goto out_free; |
2202 | |||
2203 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2004 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 2204 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
2005 | if (!domain->pt_root) | 2205 | if (!domain->pt_root) |
2006 | goto out_free; | 2206 | goto out_free; |
@@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) | |||
2010 | return 0; | 2210 | return 0; |
2011 | 2211 | ||
2012 | out_free: | 2212 | out_free: |
2013 | kfree(domain); | 2213 | protection_domain_free(domain); |
2014 | 2214 | ||
2015 | return -ENOMEM; | 2215 | return -ENOMEM; |
2016 | } | 2216 | } |
@@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2115 | paddr &= PAGE_MASK; | 2315 | paddr &= PAGE_MASK; |
2116 | 2316 | ||
2117 | for (i = 0; i < npages; ++i) { | 2317 | for (i = 0; i < npages; ++i) { |
2118 | ret = iommu_map_page(domain, iova, paddr, prot); | 2318 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
2119 | if (ret) | 2319 | if (ret) |
2120 | return ret; | 2320 | return ret; |
2121 | 2321 | ||
@@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2136 | iova &= PAGE_MASK; | 2336 | iova &= PAGE_MASK; |
2137 | 2337 | ||
2138 | for (i = 0; i < npages; ++i) { | 2338 | for (i = 0; i < npages; ++i) { |
2139 | iommu_unmap_page(domain, iova); | 2339 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
2140 | iova += PAGE_SIZE; | 2340 | iova += PAGE_SIZE; |
2141 | } | 2341 | } |
2142 | 2342 | ||
@@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
2151 | phys_addr_t paddr; | 2351 | phys_addr_t paddr; |
2152 | u64 *pte; | 2352 | u64 *pte; |
2153 | 2353 | ||
2154 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | 2354 | pte = fetch_pte(domain, iova, PM_MAP_4k); |
2155 | |||
2156 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2157 | return 0; | ||
2158 | |||
2159 | pte = IOMMU_PTE_PAGE(*pte); | ||
2160 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
2161 | |||
2162 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2163 | return 0; | ||
2164 | |||
2165 | pte = IOMMU_PTE_PAGE(*pte); | ||
2166 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
2167 | 2355 | ||
2168 | if (!IOMMU_PTE_PRESENT(*pte)) | 2356 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
2169 | return 0; | 2357 | return 0; |
2170 | 2358 | ||
2171 | paddr = *pte & IOMMU_PAGE_MASK; | 2359 | paddr = *pte & IOMMU_PAGE_MASK; |
@@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = { | |||
2191 | .domain_has_cap = amd_iommu_domain_has_cap, | 2379 | .domain_has_cap = amd_iommu_domain_has_cap, |
2192 | }; | 2380 | }; |
2193 | 2381 | ||
2382 | /***************************************************************************** | ||
2383 | * | ||
2384 | * The next functions do a basic initialization of IOMMU for pass through | ||
2385 | * mode | ||
2386 | * | ||
2387 | * In passthrough mode the IOMMU is initialized and enabled but not used for | ||
2388 | * DMA-API translation. | ||
2389 | * | ||
2390 | *****************************************************************************/ | ||
2391 | |||
2392 | int __init amd_iommu_init_passthrough(void) | ||
2393 | { | ||
2394 | struct pci_dev *dev = NULL; | ||
2395 | u16 devid, devid2; | ||
2396 | |||
2397 | /* allocate passthroug domain */ | ||
2398 | pt_domain = protection_domain_alloc(); | ||
2399 | if (!pt_domain) | ||
2400 | return -ENOMEM; | ||
2401 | |||
2402 | pt_domain->mode |= PAGE_MODE_NONE; | ||
2403 | |||
2404 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
2405 | struct amd_iommu *iommu; | ||
2406 | |||
2407 | devid = calc_devid(dev->bus->number, dev->devfn); | ||
2408 | if (devid > amd_iommu_last_bdf) | ||
2409 | continue; | ||
2410 | |||
2411 | devid2 = amd_iommu_alias_table[devid]; | ||
2412 | |||
2413 | iommu = amd_iommu_rlookup_table[devid2]; | ||
2414 | if (!iommu) | ||
2415 | continue; | ||
2416 | |||
2417 | __attach_device(iommu, pt_domain, devid); | ||
2418 | __attach_device(iommu, pt_domain, devid2); | ||
2419 | } | ||
2420 | |||
2421 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | ||
2422 | |||
2423 | return 0; | ||
2424 | } | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..b4b61d462dcc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
252 | /* Function to enable the hardware */ | 252 | /* Function to enable the hardware */ |
253 | static void iommu_enable(struct amd_iommu *iommu) | 253 | static void iommu_enable(struct amd_iommu *iommu) |
254 | { | 254 | { |
255 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", | 255 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", |
256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
257 | 257 | ||
258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
@@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
435 | } | 435 | } |
436 | 436 | ||
437 | /* | 437 | /* |
438 | * This function resets the command buffer if the IOMMU stopped fetching | ||
439 | * commands from it. | ||
440 | */ | ||
441 | void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) | ||
442 | { | ||
443 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
444 | |||
445 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
446 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
447 | |||
448 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
449 | } | ||
450 | |||
451 | /* | ||
438 | * This function writes the command buffer address to the hardware and | 452 | * This function writes the command buffer address to the hardware and |
439 | * enables it. | 453 | * enables it. |
440 | */ | 454 | */ |
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
450 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 464 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
451 | &entry, sizeof(entry)); | 465 | &entry, sizeof(entry)); |
452 | 466 | ||
453 | /* set head and tail to zero manually */ | 467 | amd_iommu_reset_cmd_buffer(iommu); |
454 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
455 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
456 | |||
457 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
458 | } | 468 | } |
459 | 469 | ||
460 | static void __init free_command_buffer(struct amd_iommu *iommu) | 470 | static void __init free_command_buffer(struct amd_iommu *iommu) |
@@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
858 | switch (*p) { | 868 | switch (*p) { |
859 | case ACPI_IVHD_TYPE: | 869 | case ACPI_IVHD_TYPE: |
860 | 870 | ||
861 | DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " | 871 | DUMP_printk("device: %02x:%02x.%01x cap: %04x " |
862 | "seg: %d flags: %01x info %04x\n", | 872 | "seg: %d flags: %01x info %04x\n", |
863 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | 873 | PCI_BUS(h->devid), PCI_SLOT(h->devid), |
864 | PCI_FUNC(h->devid), h->cap_ptr, | 874 | PCI_FUNC(h->devid), h->cap_ptr, |
@@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) | |||
902 | 912 | ||
903 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 913 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, |
904 | IRQF_SAMPLE_RANDOM, | 914 | IRQF_SAMPLE_RANDOM, |
905 | "AMD IOMMU", | 915 | "AMD-Vi", |
906 | NULL); | 916 | NULL); |
907 | 917 | ||
908 | if (r) { | 918 | if (r) { |
@@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void) | |||
1150 | 1160 | ||
1151 | 1161 | ||
1152 | if (no_iommu) { | 1162 | if (no_iommu) { |
1153 | printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); | 1163 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); |
1154 | return 0; | 1164 | return 0; |
1155 | } | 1165 | } |
1156 | 1166 | ||
@@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void) | |||
1242 | if (ret) | 1252 | if (ret) |
1243 | goto free; | 1253 | goto free; |
1244 | 1254 | ||
1245 | ret = amd_iommu_init_dma_ops(); | 1255 | if (iommu_pass_through) |
1256 | ret = amd_iommu_init_passthrough(); | ||
1257 | else | ||
1258 | ret = amd_iommu_init_dma_ops(); | ||
1246 | if (ret) | 1259 | if (ret) |
1247 | goto free; | 1260 | goto free; |
1248 | 1261 | ||
1249 | enable_iommus(); | 1262 | enable_iommus(); |
1250 | 1263 | ||
1251 | printk(KERN_INFO "AMD IOMMU: device isolation "); | 1264 | if (iommu_pass_through) |
1265 | goto out; | ||
1266 | |||
1267 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1252 | if (amd_iommu_isolate) | 1268 | if (amd_iommu_isolate) |
1253 | printk("enabled\n"); | 1269 | printk("enabled\n"); |
1254 | else | 1270 | else |
1255 | printk("disabled\n"); | 1271 | printk("disabled\n"); |
1256 | 1272 | ||
1257 | if (amd_iommu_unmap_flush) | 1273 | if (amd_iommu_unmap_flush) |
1258 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | 1274 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1259 | else | 1275 | else |
1260 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | 1276 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1261 | 1277 | ||
1262 | out: | 1278 | out: |
1263 | return ret; | 1279 | return ret; |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2284a4812b68..d2ed6c5ddc80 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3793 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3793 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
3794 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3794 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
3795 | 3795 | ||
3796 | if (cfg->move_in_progress) | ||
3797 | send_cleanup_vector(cfg); | ||
3798 | |||
3796 | return irq; | 3799 | return irq; |
3797 | } | 3800 | } |
3798 | 3801 | ||
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dbf5445727a9..6ef00ba4c886 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) | |||
106 | unsigned long mask = cpumask_bits(cpumask)[0]; | 106 | unsigned long mask = cpumask_bits(cpumask)[0]; |
107 | unsigned long flags; | 107 | unsigned long flags; |
108 | 108 | ||
109 | if (WARN_ONCE(!mask, "empty IPI mask")) | ||
110 | return; | ||
111 | |||
109 | local_irq_save(flags); | 112 | local_irq_save(flags); |
110 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); | 113 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); |
111 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); | 114 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880f9b82..fcec2f1d34a1 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -44,6 +44,11 @@ static struct apic *apic_probe[] __initdata = { | |||
44 | NULL, | 44 | NULL, |
45 | }; | 45 | }; |
46 | 46 | ||
47 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
48 | { | ||
49 | return hard_smp_processor_id() >> index_msb; | ||
50 | } | ||
51 | |||
47 | /* | 52 | /* |
48 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 53 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
49 | */ | 54 | */ |
@@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void) | |||
69 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | 74 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
70 | } | 75 | } |
71 | 76 | ||
77 | if (is_vsmp_box()) { | ||
78 | /* need to update phys_pkg_id */ | ||
79 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
80 | } | ||
81 | |||
72 | /* | 82 | /* |
73 | * Now that apic routing model is selected, configure the | 83 | * Now that apic routing model is selected, configure the |
74 | * fault handling for intr remapping. | 84 | * fault handling for intr remapping. |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8e4cbb255c38..a5371ec36776 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
17 | return x2apic_enabled(); | 17 | return x2apic_enabled(); |
18 | } | 18 | } |
19 | 19 | ||
20 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 20 | /* |
21 | 21 | * need to use more than cpu 0, because we need more vectors when | |
22 | * MSI-X are used. | ||
23 | */ | ||
22 | static const struct cpumask *x2apic_target_cpus(void) | 24 | static const struct cpumask *x2apic_target_cpus(void) |
23 | { | 25 | { |
24 | return cpumask_of(0); | 26 | return cpu_online_mask; |
25 | } | 27 | } |
26 | 28 | ||
27 | /* | 29 | /* |
@@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id) | |||
170 | 172 | ||
171 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) | 173 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) |
172 | { | 174 | { |
173 | return current_cpu_data.initial_apicid >> index_msb; | 175 | return initial_apicid >> index_msb; |
174 | } | 176 | } |
175 | 177 | ||
176 | static void x2apic_send_IPI_self(int vector) | 178 | static void x2apic_send_IPI_self(int vector) |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a284359627e7..a8989aadc99a 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
29 | 29 | ||
30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 30 | /* |
31 | 31 | * need to use more than cpu 0, because we need more vectors when | |
32 | * MSI-X are used. | ||
33 | */ | ||
32 | static const struct cpumask *x2apic_target_cpus(void) | 34 | static const struct cpumask *x2apic_target_cpus(void) |
33 | { | 35 | { |
34 | return cpumask_of(0); | 36 | return cpu_online_mask; |
35 | } | 37 | } |
36 | 38 | ||
37 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | 39 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
@@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id) | |||
162 | 164 | ||
163 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) | 165 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) |
164 | { | 166 | { |
165 | return current_cpu_data.initial_apicid >> index_msb; | 167 | return initial_apicid >> index_msb; |
166 | } | 168 | } |
167 | 169 | ||
168 | static void x2apic_send_IPI_self(int vector) | 170 | static void x2apic_send_IPI_self(int vector) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 096d19aea2f7..601159374e87 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -46,7 +46,7 @@ static int early_get_nodeid(void) | |||
46 | return node_id.s.node_id; | 46 | return node_id.s.node_id; |
47 | } | 47 | } |
48 | 48 | ||
49 | static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 49 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
50 | { | 50 | { |
51 | if (!strcmp(oem_id, "SGI")) { | 51 | if (!strcmp(oem_id, "SGI")) { |
52 | if (!strcmp(oem_table_id, "UVL")) | 52 | if (!strcmp(oem_table_id, "UVL")) |
@@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector) | |||
253 | apic_write(APIC_SELF_IPI, vector); | 253 | apic_write(APIC_SELF_IPI, vector); |
254 | } | 254 | } |
255 | 255 | ||
256 | struct apic apic_x2apic_uv_x = { | 256 | struct apic __refdata apic_x2apic_uv_x = { |
257 | 257 | ||
258 | .name = "UV large system", | 258 | .name = "UV large system", |
259 | .probe = NULL, | 259 | .probe = NULL, |
@@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = { | |||
261 | .apic_id_registered = uv_apic_id_registered, | 261 | .apic_id_registered = uv_apic_id_registered, |
262 | 262 | ||
263 | .irq_delivery_mode = dest_Fixed, | 263 | .irq_delivery_mode = dest_Fixed, |
264 | .irq_dest_mode = 1, /* logical */ | 264 | .irq_dest_mode = 0, /* physical */ |
265 | 265 | ||
266 | .target_cpus = uv_target_cpus, | 266 | .target_cpus = uv_target_cpus, |
267 | .disable_esr = 0, | 267 | .disable_esr = 0, |
@@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
362 | BUG(); | 362 | BUG(); |
363 | } | 363 | } |
364 | 364 | ||
365 | static __init void map_low_mmrs(void) | ||
366 | { | ||
367 | init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); | ||
368 | init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); | ||
369 | } | ||
370 | |||
371 | enum map_type {map_wb, map_uc}; | 365 | enum map_type {map_wb, map_uc}; |
372 | 366 | ||
373 | static __init void map_high(char *id, unsigned long base, int shift, | 367 | static __init void map_high(char *id, unsigned long base, int shift, |
@@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode) | |||
395 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
396 | } | 390 | } |
397 | 391 | ||
398 | static __init void map_config_high(int max_pnode) | ||
399 | { | ||
400 | union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; | ||
401 | int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
402 | |||
403 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); | ||
404 | if (cfg.s.enable) | ||
405 | map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); | ||
406 | } | ||
407 | |||
408 | static __init void map_mmr_high(int max_pnode) | ||
409 | { | ||
410 | union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; | ||
411 | int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
412 | |||
413 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | ||
414 | if (mmr.s.enable) | ||
415 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); | ||
416 | } | ||
417 | |||
418 | static __init void map_mmioh_high(int max_pnode) | 392 | static __init void map_mmioh_high(int max_pnode) |
419 | { | 393 | { |
420 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | 394 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; |
@@ -566,8 +540,6 @@ void __init uv_system_init(void) | |||
566 | unsigned long mmr_base, present, paddr; | 540 | unsigned long mmr_base, present, paddr; |
567 | unsigned short pnode_mask; | 541 | unsigned short pnode_mask; |
568 | 542 | ||
569 | map_low_mmrs(); | ||
570 | |||
571 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); | 543 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); |
572 | m_val = m_n_config.s.m_skt; | 544 | m_val = m_n_config.s.m_skt; |
573 | n_val = m_n_config.s.n_skt; | 545 | n_val = m_n_config.s.n_skt; |
@@ -591,6 +563,8 @@ void __init uv_system_init(void) | |||
591 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 563 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
592 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 564 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); |
593 | BUG_ON(!uv_blade_info); | 565 | BUG_ON(!uv_blade_info); |
566 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | ||
567 | uv_blade_info[blade].memory_nid = -1; | ||
594 | 568 | ||
595 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); | 569 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); |
596 | 570 | ||
@@ -629,6 +603,9 @@ void __init uv_system_init(void) | |||
629 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 603 | lcpu = uv_blade_info[blade].nr_possible_cpus; |
630 | uv_blade_info[blade].nr_possible_cpus++; | 604 | uv_blade_info[blade].nr_possible_cpus++; |
631 | 605 | ||
606 | /* Any node on the blade, else will contain -1. */ | ||
607 | uv_blade_info[blade].memory_nid = nid; | ||
608 | |||
632 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 609 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
633 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; | 610 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
634 | uv_cpu_hub_info(cpu)->m_val = m_val; | 611 | uv_cpu_hub_info(cpu)->m_val = m_val; |
@@ -662,11 +639,10 @@ void __init uv_system_init(void) | |||
662 | pnode = (paddr >> m_val) & pnode_mask; | 639 | pnode = (paddr >> m_val) & pnode_mask; |
663 | blade = boot_pnode_to_blade(pnode); | 640 | blade = boot_pnode_to_blade(pnode); |
664 | uv_node_to_blade[nid] = blade; | 641 | uv_node_to_blade[nid] = blade; |
642 | max_pnode = max(pnode, max_pnode); | ||
665 | } | 643 | } |
666 | 644 | ||
667 | map_gru_high(max_pnode); | 645 | map_gru_high(max_pnode); |
668 | map_mmr_high(max_pnode); | ||
669 | map_config_high(max_pnode); | ||
670 | map_mmioh_high(max_pnode); | 646 | map_mmioh_high(max_pnode); |
671 | 647 | ||
672 | uv_cpu_init(); | 648 | uv_cpu_init(); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 79302e9a33a4..442b5508893f 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -811,7 +811,7 @@ static int apm_do_idle(void) | |||
811 | u8 ret = 0; | 811 | u8 ret = 0; |
812 | int idled = 0; | 812 | int idled = 0; |
813 | int polling; | 813 | int polling; |
814 | int err; | 814 | int err = 0; |
815 | 815 | ||
816 | polling = !!(current_thread_info()->status & TS_POLLING); | 816 | polling = !!(current_thread_info()->status & TS_POLLING); |
817 | if (polling) { | 817 | if (polling) { |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3efcb2b96a15..c1f253dac155 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER | |||
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | endif | 8 | endif |
9 | 9 | ||
10 | # Make sure load_percpu_segment has no stackprotector | ||
11 | nostackp := $(call cc-option, -fno-stack-protector) | ||
12 | CFLAGS_common.o := $(nostackp) | ||
13 | |||
10 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
11 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
12 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e2485b03f1cf..63fddcd082cd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
400 | level = cpuid_eax(1); | 400 | level = cpuid_eax(1); |
401 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | 401 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) |
402 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 402 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
403 | |||
404 | /* | ||
405 | * Some BIOSes incorrectly force this feature, but only K8 | ||
406 | * revision D (model = 0x14) and later actually support it. | ||
407 | */ | ||
408 | if (c->x86_model < 0x14) | ||
409 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); | ||
403 | } | 410 | } |
404 | if (c->x86 == 0x10 || c->x86 == 0x11) | 411 | if (c->x86 == 0x10 || c->x86 == 0x11) |
405 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 412 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c07af9a..5ce60a88027b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void) | |||
59 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | 59 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); |
60 | } | 60 | } |
61 | 61 | ||
62 | static const struct cpu_dev *this_cpu __cpuinitdata; | 62 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
63 | { | ||
64 | #ifdef CONFIG_X86_64 | ||
65 | display_cacheinfo(c); | ||
66 | #else | ||
67 | /* Not much we can do here... */ | ||
68 | /* Check if at least it has cpuid */ | ||
69 | if (c->cpuid_level == -1) { | ||
70 | /* No cpuid. It must be an ancient CPU */ | ||
71 | if (c->x86 == 4) | ||
72 | strcpy(c->x86_model_id, "486"); | ||
73 | else if (c->x86 == 3) | ||
74 | strcpy(c->x86_model_id, "386"); | ||
75 | } | ||
76 | #endif | ||
77 | } | ||
78 | |||
79 | static const struct cpu_dev __cpuinitconst default_cpu = { | ||
80 | .c_init = default_init, | ||
81 | .c_vendor = "Unknown", | ||
82 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | ||
83 | }; | ||
84 | |||
85 | static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | ||
63 | 86 | ||
64 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | 87 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { |
65 | #ifdef CONFIG_X86_64 | 88 | #ifdef CONFIG_X86_64 |
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu) | |||
332 | 355 | ||
333 | static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; | 356 | static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; |
334 | 357 | ||
335 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | ||
336 | { | ||
337 | #ifdef CONFIG_X86_64 | ||
338 | display_cacheinfo(c); | ||
339 | #else | ||
340 | /* Not much we can do here... */ | ||
341 | /* Check if at least it has cpuid */ | ||
342 | if (c->cpuid_level == -1) { | ||
343 | /* No cpuid. It must be an ancient CPU */ | ||
344 | if (c->x86 == 4) | ||
345 | strcpy(c->x86_model_id, "486"); | ||
346 | else if (c->x86 == 3) | ||
347 | strcpy(c->x86_model_id, "386"); | ||
348 | } | ||
349 | #endif | ||
350 | } | ||
351 | |||
352 | static const struct cpu_dev __cpuinitconst default_cpu = { | ||
353 | .c_init = default_init, | ||
354 | .c_vendor = "Unknown", | ||
355 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | ||
356 | }; | ||
357 | |||
358 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) | 358 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) |
359 | { | 359 | { |
360 | unsigned int *v; | 360 | unsigned int *v; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1cfb623ce11c..01213048f62f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1226,8 +1226,13 @@ static void mce_init(void) | |||
1226 | } | 1226 | } |
1227 | 1227 | ||
1228 | /* Add per CPU specific workarounds here */ | 1228 | /* Add per CPU specific workarounds here */ |
1229 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | 1229 | static int mce_cpu_quirks(struct cpuinfo_x86 *c) |
1230 | { | 1230 | { |
1231 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | ||
1232 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | ||
1233 | return -EOPNOTSUPP; | ||
1234 | } | ||
1235 | |||
1231 | /* This should be disabled by the BIOS, but isn't always */ | 1236 | /* This should be disabled by the BIOS, but isn't always */ |
1232 | if (c->x86_vendor == X86_VENDOR_AMD) { | 1237 | if (c->x86_vendor == X86_VENDOR_AMD) { |
1233 | if (c->x86 == 15 && banks > 4) { | 1238 | if (c->x86 == 15 && banks > 4) { |
@@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1273 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | 1278 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && |
1274 | monarch_timeout < 0) | 1279 | monarch_timeout < 0) |
1275 | monarch_timeout = USEC_PER_SEC; | 1280 | monarch_timeout = USEC_PER_SEC; |
1281 | |||
1282 | /* | ||
1283 | * There are also broken BIOSes on some Pentium M and | ||
1284 | * earlier systems: | ||
1285 | */ | ||
1286 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | ||
1287 | mce_bootlog = 0; | ||
1276 | } | 1288 | } |
1277 | if (monarch_timeout < 0) | 1289 | if (monarch_timeout < 0) |
1278 | monarch_timeout = 0; | 1290 | monarch_timeout = 0; |
1279 | if (mce_bootlog != 0) | 1291 | if (mce_bootlog != 0) |
1280 | mce_panic_timeout = 30; | 1292 | mce_panic_timeout = 30; |
1293 | |||
1294 | return 0; | ||
1281 | } | 1295 | } |
1282 | 1296 | ||
1283 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1297 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) |
@@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | |||
1338 | if (!mce_available(c)) | 1352 | if (!mce_available(c)) |
1339 | return; | 1353 | return; |
1340 | 1354 | ||
1341 | if (mce_cap_init() < 0) { | 1355 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { |
1342 | mce_disabled = 1; | 1356 | mce_disabled = 1; |
1343 | return; | 1357 | return; |
1344 | } | 1358 | } |
1345 | mce_cpu_quirks(c); | ||
1346 | 1359 | ||
1347 | machine_check_vector = do_machine_check; | 1360 | machine_check_vector = do_machine_check; |
1348 | 1361 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd191dd5..5957a93e5173 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -36,6 +36,7 @@ | |||
36 | 36 | ||
37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
39 | static DEFINE_PER_CPU(bool, thermal_throttle_active); | ||
39 | 40 | ||
40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 41 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
41 | 42 | ||
@@ -96,27 +97,33 @@ static int therm_throt_process(int curr) | |||
96 | { | 97 | { |
97 | unsigned int cpu = smp_processor_id(); | 98 | unsigned int cpu = smp_processor_id(); |
98 | __u64 tmp_jiffs = get_jiffies_64(); | 99 | __u64 tmp_jiffs = get_jiffies_64(); |
100 | bool was_throttled = __get_cpu_var(thermal_throttle_active); | ||
101 | bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; | ||
99 | 102 | ||
100 | if (curr) | 103 | if (is_throttled) |
101 | __get_cpu_var(thermal_throttle_count)++; | 104 | __get_cpu_var(thermal_throttle_count)++; |
102 | 105 | ||
103 | if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) | 106 | if (!(was_throttled ^ is_throttled) && |
107 | time_before64(tmp_jiffs, __get_cpu_var(next_check))) | ||
104 | return 0; | 108 | return 0; |
105 | 109 | ||
106 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; | 110 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; |
107 | 111 | ||
108 | /* if we just entered the thermal event */ | 112 | /* if we just entered the thermal event */ |
109 | if (curr) { | 113 | if (is_throttled) { |
110 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " | 114 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " |
111 | "cpu clock throttled (total events = %lu)\n", cpu, | 115 | "cpu clock throttled (total events = %lu)\n", |
112 | __get_cpu_var(thermal_throttle_count)); | 116 | cpu, __get_cpu_var(thermal_throttle_count)); |
113 | 117 | ||
114 | add_taint(TAINT_MACHINE_CHECK); | 118 | add_taint(TAINT_MACHINE_CHECK); |
115 | } else { | 119 | return 1; |
116 | printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); | 120 | } |
121 | if (was_throttled) { | ||
122 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); | ||
123 | return 1; | ||
117 | } | 124 | } |
118 | 125 | ||
119 | return 1; | 126 | return 0; |
120 | } | 127 | } |
121 | 128 | ||
122 | #ifdef CONFIG_SYSFS | 129 | #ifdef CONFIG_SYSFS |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f900954..900332b800f8 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -55,6 +55,7 @@ struct x86_pmu { | |||
55 | int num_counters_fixed; | 55 | int num_counters_fixed; |
56 | int counter_bits; | 56 | int counter_bits; |
57 | u64 counter_mask; | 57 | u64 counter_mask; |
58 | int apic; | ||
58 | u64 max_period; | 59 | u64 max_period; |
59 | u64 intel_ctrl; | 60 | u64 intel_ctrl; |
60 | }; | 61 | }; |
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] = | |||
72 | { | 73 | { |
73 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | 74 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, |
74 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 75 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
75 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, | 76 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, |
76 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, | 77 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, |
77 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 78 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
78 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 79 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
79 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | 80 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, |
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); | |||
613 | 614 | ||
614 | static bool reserve_pmc_hardware(void) | 615 | static bool reserve_pmc_hardware(void) |
615 | { | 616 | { |
617 | #ifdef CONFIG_X86_LOCAL_APIC | ||
616 | int i; | 618 | int i; |
617 | 619 | ||
618 | if (nmi_watchdog == NMI_LOCAL_APIC) | 620 | if (nmi_watchdog == NMI_LOCAL_APIC) |
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void) | |||
627 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 629 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
628 | goto eventsel_fail; | 630 | goto eventsel_fail; |
629 | } | 631 | } |
632 | #endif | ||
630 | 633 | ||
631 | return true; | 634 | return true; |
632 | 635 | ||
636 | #ifdef CONFIG_X86_LOCAL_APIC | ||
633 | eventsel_fail: | 637 | eventsel_fail: |
634 | for (i--; i >= 0; i--) | 638 | for (i--; i >= 0; i--) |
635 | release_evntsel_nmi(x86_pmu.eventsel + i); | 639 | release_evntsel_nmi(x86_pmu.eventsel + i); |
@@ -644,10 +648,12 @@ perfctr_fail: | |||
644 | enable_lapic_nmi_watchdog(); | 648 | enable_lapic_nmi_watchdog(); |
645 | 649 | ||
646 | return false; | 650 | return false; |
651 | #endif | ||
647 | } | 652 | } |
648 | 653 | ||
649 | static void release_pmc_hardware(void) | 654 | static void release_pmc_hardware(void) |
650 | { | 655 | { |
656 | #ifdef CONFIG_X86_LOCAL_APIC | ||
651 | int i; | 657 | int i; |
652 | 658 | ||
653 | for (i = 0; i < x86_pmu.num_counters; i++) { | 659 | for (i = 0; i < x86_pmu.num_counters; i++) { |
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void) | |||
657 | 663 | ||
658 | if (nmi_watchdog == NMI_LOCAL_APIC) | 664 | if (nmi_watchdog == NMI_LOCAL_APIC) |
659 | enable_lapic_nmi_watchdog(); | 665 | enable_lapic_nmi_watchdog(); |
666 | #endif | ||
660 | } | 667 | } |
661 | 668 | ||
662 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 669 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
748 | hwc->sample_period = x86_pmu.max_period; | 755 | hwc->sample_period = x86_pmu.max_period; |
749 | hwc->last_period = hwc->sample_period; | 756 | hwc->last_period = hwc->sample_period; |
750 | atomic64_set(&hwc->period_left, hwc->sample_period); | 757 | atomic64_set(&hwc->period_left, hwc->sample_period); |
758 | } else { | ||
759 | /* | ||
760 | * If we have a PMU initialized but no APIC | ||
761 | * interrupts, we cannot sample hardware | ||
762 | * counters (user-space has to fall back and | ||
763 | * sample via a hrtimer based software counter): | ||
764 | */ | ||
765 | if (!x86_pmu.apic) | ||
766 | return -EOPNOTSUPP; | ||
751 | } | 767 | } |
752 | 768 | ||
753 | counter->destroy = hw_perf_counter_destroy; | 769 | counter->destroy = hw_perf_counter_destroy; |
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) | |||
1449 | 1465 | ||
1450 | void set_perf_counter_pending(void) | 1466 | void set_perf_counter_pending(void) |
1451 | { | 1467 | { |
1468 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1452 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | 1469 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); |
1470 | #endif | ||
1453 | } | 1471 | } |
1454 | 1472 | ||
1455 | void perf_counters_lapic_init(void) | 1473 | void perf_counters_lapic_init(void) |
1456 | { | 1474 | { |
1457 | if (!x86_pmu_initialized()) | 1475 | #ifdef CONFIG_X86_LOCAL_APIC |
1476 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1458 | return; | 1477 | return; |
1459 | 1478 | ||
1460 | /* | 1479 | /* |
1461 | * Always use NMI for PMU | 1480 | * Always use NMI for PMU |
1462 | */ | 1481 | */ |
1463 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1482 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1483 | #endif | ||
1464 | } | 1484 | } |
1465 | 1485 | ||
1466 | static int __kprobes | 1486 | static int __kprobes |
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, | |||
1484 | 1504 | ||
1485 | regs = args->regs; | 1505 | regs = args->regs; |
1486 | 1506 | ||
1507 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1487 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1508 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1509 | #endif | ||
1488 | /* | 1510 | /* |
1489 | * Can't rely on the handled return value to say it was our NMI, two | 1511 | * Can't rely on the handled return value to say it was our NMI, two |
1490 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | 1512 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = { | |||
1515 | .event_map = p6_pmu_event_map, | 1537 | .event_map = p6_pmu_event_map, |
1516 | .raw_event = p6_pmu_raw_event, | 1538 | .raw_event = p6_pmu_raw_event, |
1517 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 1539 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
1540 | .apic = 1, | ||
1518 | .max_period = (1ULL << 31) - 1, | 1541 | .max_period = (1ULL << 31) - 1, |
1519 | .version = 0, | 1542 | .version = 0, |
1520 | .num_counters = 2, | 1543 | .num_counters = 2, |
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = { | |||
1541 | .event_map = intel_pmu_event_map, | 1564 | .event_map = intel_pmu_event_map, |
1542 | .raw_event = intel_pmu_raw_event, | 1565 | .raw_event = intel_pmu_raw_event, |
1543 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 1566 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
1567 | .apic = 1, | ||
1544 | /* | 1568 | /* |
1545 | * Intel PMCs cannot be accessed sanely above 32 bit width, | 1569 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
1546 | * so we install an artificial 1<<31 period regardless of | 1570 | * so we install an artificial 1<<31 period regardless of |
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = { | |||
1564 | .num_counters = 4, | 1588 | .num_counters = 4, |
1565 | .counter_bits = 48, | 1589 | .counter_bits = 48, |
1566 | .counter_mask = (1ULL << 48) - 1, | 1590 | .counter_mask = (1ULL << 48) - 1, |
1591 | .apic = 1, | ||
1567 | /* use highest bit to detect overflow */ | 1592 | /* use highest bit to detect overflow */ |
1568 | .max_period = (1ULL << 47) - 1, | 1593 | .max_period = (1ULL << 47) - 1, |
1569 | }; | 1594 | }; |
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void) | |||
1589 | return -ENODEV; | 1614 | return -ENODEV; |
1590 | } | 1615 | } |
1591 | 1616 | ||
1617 | x86_pmu = p6_pmu; | ||
1618 | |||
1592 | if (!cpu_has_apic) { | 1619 | if (!cpu_has_apic) { |
1593 | pr_info("no Local APIC, try rebooting with lapic"); | 1620 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); |
1594 | return -ENODEV; | 1621 | pr_info("no hardware sampling interrupt available.\n"); |
1622 | x86_pmu.apic = 0; | ||
1595 | } | 1623 | } |
1596 | 1624 | ||
1597 | x86_pmu = p6_pmu; | ||
1598 | |||
1599 | return 0; | 1625 | return 0; |
1600 | } | 1626 | } |
1601 | 1627 | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 96f7ac0bbf01..fe26ba3e3451 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -354,7 +354,7 @@ void __init efi_init(void) | |||
354 | */ | 354 | */ |
355 | c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); | 355 | c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); |
356 | if (c16) { | 356 | if (c16) { |
357 | for (i = 0; i < sizeof(vendor) && *c16; ++i) | 357 | for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) |
358 | vendor[i] = *c16++; | 358 | vendor[i] = *c16++; |
359 | vendor[i] = '\0'; | 359 | vendor[i] = '\0'; |
360 | } else | 360 | } else |
@@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void) | |||
512 | && end_pfn <= max_pfn_mapped)) | 512 | && end_pfn <= max_pfn_mapped)) |
513 | va = __va(md->phys_addr); | 513 | va = __va(md->phys_addr); |
514 | else | 514 | else |
515 | va = efi_ioremap(md->phys_addr, size); | 515 | va = efi_ioremap(md->phys_addr, size, md->type); |
516 | 516 | ||
517 | md->virt_addr = (u64) (unsigned long) va; | 517 | md->virt_addr = (u64) (unsigned long) va; |
518 | 518 | ||
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 22c3b7828c50..ac0621a7ac3d 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c | |||
@@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void) | |||
98 | early_runtime_code_mapping_set_exec(0); | 98 | early_runtime_code_mapping_set_exec(0); |
99 | } | 99 | } |
100 | 100 | ||
101 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) | 101 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
102 | u32 type) | ||
102 | { | 103 | { |
103 | unsigned long last_map_pfn; | 104 | unsigned long last_map_pfn; |
104 | 105 | ||
106 | if (type == EFI_MEMORY_MAPPED_IO) | ||
107 | return ioremap(phys_addr, size); | ||
108 | |||
105 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); | 109 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); |
106 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) | 110 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) |
107 | return NULL; | 111 | return NULL; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8663afb56535..cc827ac9e8d3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
261 | * which will be freed later | 261 | * which will be freed later |
262 | */ | 262 | */ |
263 | 263 | ||
264 | #ifndef CONFIG_HOTPLUG_CPU | 264 | __CPUINIT |
265 | .section .init.text,"ax",@progbits | ||
266 | #endif | ||
267 | 265 | ||
268 | #ifdef CONFIG_SMP | 266 | #ifdef CONFIG_SMP |
269 | ENTRY(startup_32_smp) | 267 | ENTRY(startup_32_smp) |
@@ -602,7 +600,7 @@ ignore_int: | |||
602 | #endif | 600 | #endif |
603 | iret | 601 | iret |
604 | 602 | ||
605 | .section .cpuinit.data,"wa" | 603 | __REFDATA |
606 | .align 4 | 604 | .align 4 |
607 | ENTRY(initial_code) | 605 | ENTRY(initial_code) |
608 | .long i386_start_kernel | 606 | .long i386_start_kernel |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3c945c0b3501..8fb4ce35bea2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -32,7 +32,14 @@ int no_iommu __read_mostly; | |||
32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 32 | /* Set this to 1 if there is a HW IOMMU in the system */ |
33 | int iommu_detected __read_mostly = 0; | 33 | int iommu_detected __read_mostly = 0; |
34 | 34 | ||
35 | int iommu_pass_through; | 35 | /* |
36 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | ||
37 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | ||
38 | * devices and allow every device to access to whole physical memory. This is | ||
39 | * useful if a user want to use an IOMMU only for KVM device assignment to | ||
40 | * guests and not for driver dma translation. | ||
41 | */ | ||
42 | int iommu_pass_through __read_mostly; | ||
36 | 43 | ||
37 | dma_addr_t bad_dma_address __read_mostly = 0; | 44 | dma_addr_t bad_dma_address __read_mostly = 0; |
38 | EXPORT_SYMBOL(bad_dma_address); | 45 | EXPORT_SYMBOL(bad_dma_address); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994dd6a4a2a0..071166a4ba83 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -519,16 +519,12 @@ static void c1e_idle(void) | |||
519 | if (!cpumask_test_cpu(cpu, c1e_mask)) { | 519 | if (!cpumask_test_cpu(cpu, c1e_mask)) { |
520 | cpumask_set_cpu(cpu, c1e_mask); | 520 | cpumask_set_cpu(cpu, c1e_mask); |
521 | /* | 521 | /* |
522 | * Force broadcast so ACPI can not interfere. Needs | 522 | * Force broadcast so ACPI can not interfere. |
523 | * to run with interrupts enabled as it uses | ||
524 | * smp_function_call. | ||
525 | */ | 523 | */ |
526 | local_irq_enable(); | ||
527 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | 524 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, |
528 | &cpu); | 525 | &cpu); |
529 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | 526 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", |
530 | cpu); | 527 | cpu); |
531 | local_irq_disable(); | ||
532 | } | 528 | } |
533 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | 529 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); |
534 | 530 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 508e982dd072..a06e8d101844 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | ||
6 | #include <acpi/reboot.h> | 7 | #include <acpi/reboot.h> |
7 | #include <asm/io.h> | 8 | #include <asm/io.h> |
8 | #include <asm/apic.h> | 9 | #include <asm/apic.h> |
@@ -17,7 +18,6 @@ | |||
17 | #include <asm/cpu.h> | 18 | #include <asm/cpu.h> |
18 | 19 | ||
19 | #ifdef CONFIG_X86_32 | 20 | #ifdef CONFIG_X86_32 |
20 | # include <linux/dmi.h> | ||
21 | # include <linux/ctype.h> | 21 | # include <linux/ctype.h> |
22 | # include <linux/mc146818rtc.h> | 22 | # include <linux/mc146818rtc.h> |
23 | #else | 23 | #else |
@@ -404,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart); | |||
404 | 404 | ||
405 | #endif /* CONFIG_X86_32 */ | 405 | #endif /* CONFIG_X86_32 */ |
406 | 406 | ||
407 | /* | ||
408 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot | ||
409 | */ | ||
410 | static int __init set_pci_reboot(const struct dmi_system_id *d) | ||
411 | { | ||
412 | if (reboot_type != BOOT_CF9) { | ||
413 | reboot_type = BOOT_CF9; | ||
414 | printk(KERN_INFO "%s series board detected. " | ||
415 | "Selecting PCI-method for reboots.\n", d->ident); | ||
416 | } | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | ||
421 | { /* Handle problems with rebooting on Apple MacBook5 */ | ||
422 | .callback = set_pci_reboot, | ||
423 | .ident = "Apple MacBook5", | ||
424 | .matches = { | ||
425 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
426 | DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), | ||
427 | }, | ||
428 | }, | ||
429 | { /* Handle problems with rebooting on Apple MacBookPro5 */ | ||
430 | .callback = set_pci_reboot, | ||
431 | .ident = "Apple MacBookPro5", | ||
432 | .matches = { | ||
433 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
434 | DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), | ||
435 | }, | ||
436 | }, | ||
437 | { } | ||
438 | }; | ||
439 | |||
440 | static int __init pci_reboot_init(void) | ||
441 | { | ||
442 | dmi_check_system(pci_reboot_dmi_table); | ||
443 | return 0; | ||
444 | } | ||
445 | core_initcall(pci_reboot_init); | ||
446 | |||
407 | static inline void kb_wait(void) | 447 | static inline void kb_wait(void) |
408 | { | 448 | { |
409 | int i; | 449 | int i; |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef7cf4a..07d81916f212 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
165 | 165 | ||
166 | if (!chosen) { | 166 | if (!chosen) { |
167 | size_t vm_size = VMALLOC_END - VMALLOC_START; | 167 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
168 | size_t tot_size = num_possible_cpus() * PMD_SIZE; | 168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; |
169 | 169 | ||
170 | /* on non-NUMA, embedding is better */ | 170 | /* on non-NUMA, embedding is better */ |
171 | if (!pcpu_need_numa()) | 171 | if (!pcpu_need_numa()) |
@@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | 199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; |
200 | 200 | ||
201 | /* allocate pointer array and alloc large pages */ | 201 | /* allocate pointer array and alloc large pages */ |
202 | map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); | 202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); |
203 | pcpul_map = alloc_bootmem(map_size); | 203 | pcpul_map = alloc_bootmem(map_size); |
204 | 204 | ||
205 | for_each_possible_cpu(cpu) { | 205 | for_each_possible_cpu(cpu) { |
@@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
228 | 228 | ||
229 | /* allocate address and map */ | 229 | /* allocate address and map */ |
230 | pcpul_vm.flags = VM_ALLOC; | 230 | pcpul_vm.flags = VM_ALLOC; |
231 | pcpul_vm.size = num_possible_cpus() * PMD_SIZE; | 231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; |
232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | 232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); |
233 | 233 | ||
234 | for_each_possible_cpu(cpu) { | 234 | for_each_possible_cpu(cpu) { |
@@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
250 | PMD_SIZE, pcpul_vm.addr, NULL); | 250 | PMD_SIZE, pcpul_vm.addr, NULL); |
251 | 251 | ||
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | 252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ |
253 | for (i = 0; i < num_possible_cpus() - 1; i++) | 253 | for (i = 0; i < nr_cpu_ids - 1; i++) |
254 | for (j = i + 1; j < num_possible_cpus(); j++) | 254 | for (j = i + 1; j < nr_cpu_ids; j++) |
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | 255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { |
256 | struct pcpul_ent tmp = pcpul_map[i]; | 256 | struct pcpul_ent tmp = pcpul_map[i]; |
257 | pcpul_map[i] = pcpul_map[j]; | 257 | pcpul_map[i] = pcpul_map[j]; |
@@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr) | |||
288 | { | 288 | { |
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | 289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); |
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | 290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; |
291 | int left = 0, right = num_possible_cpus() - 1; | 291 | int left = 0, right = nr_cpu_ids - 1; |
292 | int pos; | 292 | int pos; |
293 | 293 | ||
294 | /* pcpul in use at all? */ | 294 | /* pcpul in use at all? */ |
@@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
377 | pcpu4k_nr_static_pages = PFN_UP(static_size); | 377 | pcpu4k_nr_static_pages = PFN_UP(static_size); |
378 | 378 | ||
379 | /* unaligned allocations can't be freed, round up to page size */ | 379 | /* unaligned allocations can't be freed, round up to page size */ |
380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() | 380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids |
381 | * sizeof(pcpu4k_pages[0])); | 381 | * sizeof(pcpu4k_pages[0])); |
382 | pcpu4k_pages = alloc_bootmem(pages_size); | 382 | pcpu4k_pages = alloc_bootmem(pages_size); |
383 | 383 | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 8ccabb8a2f6a..77b9689f8edb 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode) | |||
744 | * note that base_dest_nodeid is actually a nasid. | 744 | * note that base_dest_nodeid is actually a nasid. |
745 | */ | 745 | */ |
746 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | 746 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; |
747 | ad2->header.dest_subnodeid = 0x10; /* the LB */ | ||
747 | ad2->header.command = UV_NET_ENDPOINT_INTD; | 748 | ad2->header.command = UV_NET_ENDPOINT_INTD; |
748 | ad2->header.int_both = 1; | 749 | ad2->header.int_both = 1; |
749 | /* | 750 | /* |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6e1a368d21d4..71f4368b357e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) | |||
275 | * use the TSC value at the transitions to calculate a pretty | 275 | * use the TSC value at the transitions to calculate a pretty |
276 | * good value for the TSC frequencty. | 276 | * good value for the TSC frequencty. |
277 | */ | 277 | */ |
278 | static inline int pit_verify_msb(unsigned char val) | ||
279 | { | ||
280 | /* Ignore LSB */ | ||
281 | inb(0x42); | ||
282 | return inb(0x42) == val; | ||
283 | } | ||
284 | |||
278 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) | 285 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) |
279 | { | 286 | { |
280 | int count; | 287 | int count; |
281 | u64 tsc = 0; | 288 | u64 tsc = 0; |
282 | 289 | ||
283 | for (count = 0; count < 50000; count++) { | 290 | for (count = 0; count < 50000; count++) { |
284 | /* Ignore LSB */ | 291 | if (!pit_verify_msb(val)) |
285 | inb(0x42); | ||
286 | if (inb(0x42) != val) | ||
287 | break; | 292 | break; |
288 | tsc = get_cycles(); | 293 | tsc = get_cycles(); |
289 | } | 294 | } |
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void) | |||
336 | * to do that is to just read back the 16-bit counter | 341 | * to do that is to just read back the 16-bit counter |
337 | * once from the PIT. | 342 | * once from the PIT. |
338 | */ | 343 | */ |
339 | inb(0x42); | 344 | pit_verify_msb(0); |
340 | inb(0x42); | ||
341 | 345 | ||
342 | if (pit_expect_msb(0xff, &tsc, &d1)) { | 346 | if (pit_expect_msb(0xff, &tsc, &d1)) { |
343 | for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { | 347 | for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { |
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void) | |||
348 | * Iterate until the error is less than 500 ppm | 352 | * Iterate until the error is less than 500 ppm |
349 | */ | 353 | */ |
350 | delta -= tsc; | 354 | delta -= tsc; |
351 | if (d1+d2 < delta >> 11) | 355 | if (d1+d2 >= delta >> 11) |
352 | goto success; | 356 | continue; |
357 | |||
358 | /* | ||
359 | * Check the PIT one more time to verify that | ||
360 | * all TSC reads were stable wrt the PIT. | ||
361 | * | ||
362 | * This also guarantees serialization of the | ||
363 | * last cycle read ('d2') in pit_expect_msb. | ||
364 | */ | ||
365 | if (!pit_verify_msb(0xfe - i)) | ||
366 | break; | ||
367 | goto success; | ||
353 | } | 368 | } |
354 | } | 369 | } |
355 | printk("Fast TSC calibration failed\n"); | 370 | printk("Fast TSC calibration failed\n"); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b263423fbe2a..95a7289e4b0c 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
441 | ap.ds = __USER_DS; | 441 | ap.ds = __USER_DS; |
442 | ap.es = __USER_DS; | 442 | ap.es = __USER_DS; |
443 | ap.fs = __KERNEL_PERCPU; | 443 | ap.fs = __KERNEL_PERCPU; |
444 | ap.gs = 0; | 444 | ap.gs = __KERNEL_STACK_CANARY; |
445 | 445 | ||
446 | ap.eflags = 0; | 446 | ap.eflags = 0; |
447 | 447 | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 59f31d2dd435..9fc178255c04 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -46,11 +46,10 @@ PHDRS { | |||
46 | data PT_LOAD FLAGS(7); /* RWE */ | 46 | data PT_LOAD FLAGS(7); /* RWE */ |
47 | #ifdef CONFIG_X86_64 | 47 | #ifdef CONFIG_X86_64 |
48 | user PT_LOAD FLAGS(7); /* RWE */ | 48 | user PT_LOAD FLAGS(7); /* RWE */ |
49 | data.init PT_LOAD FLAGS(7); /* RWE */ | ||
50 | #ifdef CONFIG_SMP | 49 | #ifdef CONFIG_SMP |
51 | percpu PT_LOAD FLAGS(7); /* RWE */ | 50 | percpu PT_LOAD FLAGS(7); /* RWE */ |
52 | #endif | 51 | #endif |
53 | data.init2 PT_LOAD FLAGS(7); /* RWE */ | 52 | init PT_LOAD FLAGS(7); /* RWE */ |
54 | #endif | 53 | #endif |
55 | note PT_NOTE FLAGS(0); /* ___ */ | 54 | note PT_NOTE FLAGS(0); /* ___ */ |
56 | } | 55 | } |
@@ -103,65 +102,43 @@ SECTIONS | |||
103 | __stop___ex_table = .; | 102 | __stop___ex_table = .; |
104 | } :text = 0x9090 | 103 | } :text = 0x9090 |
105 | 104 | ||
106 | RODATA | 105 | RO_DATA(PAGE_SIZE) |
107 | 106 | ||
108 | /* Data */ | 107 | /* Data */ |
109 | . = ALIGN(PAGE_SIZE); | ||
110 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 108 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
111 | /* Start of data section */ | 109 | /* Start of data section */ |
112 | _sdata = .; | 110 | _sdata = .; |
113 | DATA_DATA | 111 | |
114 | CONSTRUCTORS | 112 | /* init_task */ |
115 | } :data | 113 | INIT_TASK_DATA(THREAD_SIZE) |
116 | 114 | ||
117 | #ifdef CONFIG_X86_32 | 115 | #ifdef CONFIG_X86_32 |
118 | /* 32 bit has nosave before _edata */ | 116 | /* 32 bit has nosave before _edata */ |
119 | . = ALIGN(PAGE_SIZE); | 117 | NOSAVE_DATA |
120 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | ||
121 | __nosave_begin = .; | ||
122 | *(.data.nosave) | ||
123 | . = ALIGN(PAGE_SIZE); | ||
124 | __nosave_end = .; | ||
125 | } | ||
126 | #endif | 118 | #endif |
127 | 119 | ||
128 | . = ALIGN(PAGE_SIZE); | 120 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
129 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | ||
130 | *(.data.page_aligned) | ||
131 | *(.data.idt) | 121 | *(.data.idt) |
132 | } | ||
133 | 122 | ||
134 | #ifdef CONFIG_X86_32 | 123 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) |
135 | . = ALIGN(32); | ||
136 | #else | ||
137 | . = ALIGN(PAGE_SIZE); | ||
138 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
139 | #endif | ||
140 | .data.cacheline_aligned : | ||
141 | AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { | ||
142 | *(.data.cacheline_aligned) | ||
143 | } | ||
144 | 124 | ||
145 | /* rarely changed data like cpu maps */ | 125 | DATA_DATA |
146 | #ifdef CONFIG_X86_32 | 126 | CONSTRUCTORS |
147 | . = ALIGN(32); | 127 | |
148 | #else | 128 | /* rarely changed data like cpu maps */ |
149 | . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); | 129 | READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) |
150 | #endif | ||
151 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { | ||
152 | *(.data.read_mostly) | ||
153 | 130 | ||
154 | /* End of data section */ | 131 | /* End of data section */ |
155 | _edata = .; | 132 | _edata = .; |
156 | } | 133 | } :data |
157 | 134 | ||
158 | #ifdef CONFIG_X86_64 | 135 | #ifdef CONFIG_X86_64 |
159 | 136 | ||
160 | #define VSYSCALL_ADDR (-10*1024*1024) | 137 | #define VSYSCALL_ADDR (-10*1024*1024) |
161 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ | 138 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \ |
162 | SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | 139 | PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) |
163 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ | 140 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \ |
164 | SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | 141 | PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) |
165 | 142 | ||
166 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) | 143 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) |
167 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | 144 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) |
@@ -227,35 +204,29 @@ SECTIONS | |||
227 | 204 | ||
228 | #endif /* CONFIG_X86_64 */ | 205 | #endif /* CONFIG_X86_64 */ |
229 | 206 | ||
230 | /* init_task */ | 207 | /* Init code and data - will be freed after init */ |
231 | . = ALIGN(THREAD_SIZE); | 208 | . = ALIGN(PAGE_SIZE); |
232 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | 209 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { |
233 | *(.data.init_task) | 210 | __init_begin = .; /* paired with __init_end */ |
234 | } | 211 | } |
235 | #ifdef CONFIG_X86_64 | ||
236 | :data.init | ||
237 | #endif | ||
238 | 212 | ||
213 | #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) | ||
239 | /* | 214 | /* |
240 | * smp_locks might be freed after init | 215 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the |
241 | * start/end must be page aligned | 216 | * output PHDR, so the next output section - .init.text - should |
217 | * start another segment - init. | ||
242 | */ | 218 | */ |
243 | . = ALIGN(PAGE_SIZE); | 219 | PERCPU_VADDR(0, :percpu) |
244 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | 220 | #endif |
245 | __smp_locks = .; | ||
246 | *(.smp_locks) | ||
247 | __smp_locks_end = .; | ||
248 | . = ALIGN(PAGE_SIZE); | ||
249 | } | ||
250 | 221 | ||
251 | /* Init code and data - will be freed after init */ | ||
252 | . = ALIGN(PAGE_SIZE); | ||
253 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | 222 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { |
254 | __init_begin = .; /* paired with __init_end */ | ||
255 | _sinittext = .; | 223 | _sinittext = .; |
256 | INIT_TEXT | 224 | INIT_TEXT |
257 | _einittext = .; | 225 | _einittext = .; |
258 | } | 226 | } |
227 | #ifdef CONFIG_X86_64 | ||
228 | :init | ||
229 | #endif | ||
259 | 230 | ||
260 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { | 231 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { |
261 | INIT_DATA | 232 | INIT_DATA |
@@ -326,17 +297,7 @@ SECTIONS | |||
326 | } | 297 | } |
327 | #endif | 298 | #endif |
328 | 299 | ||
329 | #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) | 300 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
330 | /* | ||
331 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
332 | * output PHDR, so the next output section - __data_nosave - should | ||
333 | * start another section data.init2. Also, pda should be at the head of | ||
334 | * percpu area. Preallocate it and define the percpu offset symbol | ||
335 | * so that it can be accessed as a percpu variable. | ||
336 | */ | ||
337 | . = ALIGN(PAGE_SIZE); | ||
338 | PERCPU_VADDR(0, :percpu) | ||
339 | #else | ||
340 | PERCPU(PAGE_SIZE) | 301 | PERCPU(PAGE_SIZE) |
341 | #endif | 302 | #endif |
342 | 303 | ||
@@ -347,15 +308,22 @@ SECTIONS | |||
347 | __init_end = .; | 308 | __init_end = .; |
348 | } | 309 | } |
349 | 310 | ||
311 | /* | ||
312 | * smp_locks might be freed after init | ||
313 | * start/end must be page aligned | ||
314 | */ | ||
315 | . = ALIGN(PAGE_SIZE); | ||
316 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | ||
317 | __smp_locks = .; | ||
318 | *(.smp_locks) | ||
319 | __smp_locks_end = .; | ||
320 | . = ALIGN(PAGE_SIZE); | ||
321 | } | ||
322 | |||
350 | #ifdef CONFIG_X86_64 | 323 | #ifdef CONFIG_X86_64 |
351 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | 324 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
352 | . = ALIGN(PAGE_SIZE); | 325 | NOSAVE_DATA |
353 | __nosave_begin = .; | 326 | } |
354 | *(.data.nosave) | ||
355 | . = ALIGN(PAGE_SIZE); | ||
356 | __nosave_end = .; | ||
357 | } :data.init2 | ||
358 | /* use another section data.init2, see PERCPU_VADDR() above */ | ||
359 | #endif | 327 | #endif |
360 | 328 | ||
361 | /* BSS */ | 329 | /* BSS */ |
@@ -393,8 +361,8 @@ SECTIONS | |||
393 | 361 | ||
394 | 362 | ||
395 | #ifdef CONFIG_X86_32 | 363 | #ifdef CONFIG_X86_32 |
396 | ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | 364 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
397 | "kernel image bigger than KERNEL_IMAGE_SIZE") | 365 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
398 | #else | 366 | #else |
399 | /* | 367 | /* |
400 | * Per-cpu symbols which need to be offset from __per_cpu_load | 368 | * Per-cpu symbols which need to be offset from __per_cpu_load |
@@ -407,12 +375,12 @@ INIT_PER_CPU(irq_stack_union); | |||
407 | /* | 375 | /* |
408 | * Build-time check on the image size: | 376 | * Build-time check on the image size: |
409 | */ | 377 | */ |
410 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | 378 | . = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), |
411 | "kernel image bigger than KERNEL_IMAGE_SIZE") | 379 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
412 | 380 | ||
413 | #ifdef CONFIG_SMP | 381 | #ifdef CONFIG_SMP |
414 | ASSERT((per_cpu__irq_stack_union == 0), | 382 | . = ASSERT((per_cpu__irq_stack_union == 0), |
415 | "irq_stack_union is not at start of per-cpu area"); | 383 | "irq_stack_union is not at start of per-cpu area"); |
416 | #endif | 384 | #endif |
417 | 385 | ||
418 | #endif /* CONFIG_X86_32 */ | 386 | #endif /* CONFIG_X86_32 */ |
@@ -420,7 +388,7 @@ ASSERT((per_cpu__irq_stack_union == 0), | |||
420 | #ifdef CONFIG_KEXEC | 388 | #ifdef CONFIG_KEXEC |
421 | #include <asm/kexec.h> | 389 | #include <asm/kexec.h> |
422 | 390 | ||
423 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | 391 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, |
424 | "kexec control code size is too big") | 392 | "kexec control code size is too big"); |
425 | #endif | 393 | #endif |
426 | 394 | ||