diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/amd_iommu.c | 489 | ||||
-rw-r--r-- | arch/x86/kernel/amd_iommu_init.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/aperture_64.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/apic/nmi.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 329 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 51 | ||||
-rw-r--r-- | arch/x86/kernel/pci-dma.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/pci-gart_64.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/pci-nommu.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/pci-swiotlb.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 8 |
14 files changed, 818 insertions, 219 deletions
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..98f230f6a28d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
41 | static LIST_HEAD(iommu_pd_list); | 41 | static LIST_HEAD(iommu_pd_list); |
42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
43 | 43 | ||
44 | #ifdef CONFIG_IOMMU_API | 44 | /* |
45 | * Domain for untranslated devices - only allocated | ||
46 | * if iommu=pt passed on kernel cmd line. | ||
47 | */ | ||
48 | static struct protection_domain *pt_domain; | ||
49 | |||
45 | static struct iommu_ops amd_iommu_ops; | 50 | static struct iommu_ops amd_iommu_ops; |
46 | #endif | ||
47 | 51 | ||
48 | /* | 52 | /* |
49 | * general struct to manage commands send to an IOMMU | 53 | * general struct to manage commands send to an IOMMU |
@@ -55,16 +59,16 @@ struct iommu_cmd { | |||
55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
56 | struct unity_map_entry *e); | 60 | struct unity_map_entry *e); |
57 | static struct dma_ops_domain *find_protection_domain(u16 devid); | 61 | static struct dma_ops_domain *find_protection_domain(u16 devid); |
58 | static u64* alloc_pte(struct protection_domain *dom, | 62 | static u64 *alloc_pte(struct protection_domain *domain, |
59 | unsigned long address, u64 | 63 | unsigned long address, int end_lvl, |
60 | **pte_page, gfp_t gfp); | 64 | u64 **pte_page, gfp_t gfp); |
61 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
62 | unsigned long start_page, | 66 | unsigned long start_page, |
63 | unsigned int pages); | 67 | unsigned int pages); |
64 | 68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | |
65 | #ifndef BUS_NOTIFY_UNBOUND_DRIVER | 69 | static u64 *fetch_pte(struct protection_domain *domain, |
66 | #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 | 70 | unsigned long address, int map_size); |
67 | #endif | 71 | static void update_domain(struct protection_domain *domain); |
68 | 72 | ||
69 | #ifdef CONFIG_AMD_IOMMU_STATS | 73 | #ifdef CONFIG_AMD_IOMMU_STATS |
70 | 74 | ||
@@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
138 | * | 142 | * |
139 | ****************************************************************************/ | 143 | ****************************************************************************/ |
140 | 144 | ||
141 | static void iommu_print_event(void *__evt) | 145 | static void dump_dte_entry(u16 devid) |
146 | { | ||
147 | int i; | ||
148 | |||
149 | for (i = 0; i < 8; ++i) | ||
150 | pr_err("AMD-Vi: DTE[%d]: %08x\n", i, | ||
151 | amd_iommu_dev_table[devid].data[i]); | ||
152 | } | ||
153 | |||
154 | static void dump_command(unsigned long phys_addr) | ||
155 | { | ||
156 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | ||
157 | int i; | ||
158 | |||
159 | for (i = 0; i < 4; ++i) | ||
160 | pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); | ||
161 | } | ||
162 | |||
163 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | ||
142 | { | 164 | { |
143 | u32 *event = __evt; | 165 | u32 *event = __evt; |
144 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | 166 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; |
@@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt) | |||
147 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | 169 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
148 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | 170 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; |
149 | 171 | ||
150 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | 172 | printk(KERN_ERR "AMD-Vi: Event logged ["); |
151 | 173 | ||
152 | switch (type) { | 174 | switch (type) { |
153 | case EVENT_TYPE_ILL_DEV: | 175 | case EVENT_TYPE_ILL_DEV: |
@@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt) | |||
155 | "address=0x%016llx flags=0x%04x]\n", | 177 | "address=0x%016llx flags=0x%04x]\n", |
156 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | 178 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
157 | address, flags); | 179 | address, flags); |
180 | dump_dte_entry(devid); | ||
158 | break; | 181 | break; |
159 | case EVENT_TYPE_IO_FAULT: | 182 | case EVENT_TYPE_IO_FAULT: |
160 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | 183 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " |
@@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt) | |||
176 | break; | 199 | break; |
177 | case EVENT_TYPE_ILL_CMD: | 200 | case EVENT_TYPE_ILL_CMD: |
178 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
202 | reset_iommu_command_buffer(iommu); | ||
203 | dump_command(address); | ||
179 | break; | 204 | break; |
180 | case EVENT_TYPE_CMD_HARD_ERR: | 205 | case EVENT_TYPE_CMD_HARD_ERR: |
181 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | 206 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " |
@@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
209 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | 234 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); |
210 | 235 | ||
211 | while (head != tail) { | 236 | while (head != tail) { |
212 | iommu_print_event(iommu->evt_buf + head); | 237 | iommu_print_event(iommu, iommu->evt_buf + head); |
213 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | 238 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; |
214 | } | 239 | } |
215 | 240 | ||
@@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
296 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
297 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
298 | 323 | ||
299 | if (unlikely(i == EXIT_LOOP_COUNT)) | 324 | if (unlikely(i == EXIT_LOOP_COUNT)) { |
300 | panic("AMD IOMMU: Completion wait loop failed\n"); | 325 | spin_unlock(&iommu->lock); |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
301 | } | 329 | } |
302 | 330 | ||
303 | /* | 331 | /* |
@@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | |||
445 | } | 473 | } |
446 | 474 | ||
447 | /* | 475 | /* |
476 | * This function flushes one domain on one IOMMU | ||
477 | */ | ||
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | ||
479 | { | ||
480 | struct iommu_cmd cmd; | ||
481 | unsigned long flags; | ||
482 | |||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
484 | domid, 1, 1); | ||
485 | |||
486 | spin_lock_irqsave(&iommu->lock, flags); | ||
487 | __iommu_queue_command(iommu, &cmd); | ||
488 | __iommu_completion_wait(iommu); | ||
489 | __iommu_wait_for_completion(iommu); | ||
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | ||
492 | |||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | ||
494 | { | ||
495 | int i; | ||
496 | |||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | ||
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | ||
499 | continue; | ||
500 | flush_domain_on_iommu(iommu, i); | ||
501 | } | ||
502 | |||
503 | } | ||
504 | |||
505 | /* | ||
448 | * This function is used to flush the IO/TLB for a given protection domain | 506 | * This function is used to flush the IO/TLB for a given protection domain |
449 | * on every IOMMU in the system | 507 | * on every IOMMU in the system |
450 | */ | 508 | */ |
451 | static void iommu_flush_domain(u16 domid) | 509 | static void iommu_flush_domain(u16 domid) |
452 | { | 510 | { |
453 | unsigned long flags; | ||
454 | struct amd_iommu *iommu; | 511 | struct amd_iommu *iommu; |
455 | struct iommu_cmd cmd; | ||
456 | 512 | ||
457 | INC_STATS_COUNTER(domain_flush_all); | 513 | INC_STATS_COUNTER(domain_flush_all); |
458 | 514 | ||
459 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 515 | for_each_iommu(iommu) |
460 | domid, 1, 1); | 516 | flush_domain_on_iommu(iommu, domid); |
461 | |||
462 | for_each_iommu(iommu) { | ||
463 | spin_lock_irqsave(&iommu->lock, flags); | ||
464 | __iommu_queue_command(iommu, &cmd); | ||
465 | __iommu_completion_wait(iommu); | ||
466 | __iommu_wait_for_completion(iommu); | ||
467 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
468 | } | ||
469 | } | 517 | } |
470 | 518 | ||
471 | void amd_iommu_flush_all_domains(void) | 519 | void amd_iommu_flush_all_domains(void) |
472 | { | 520 | { |
521 | struct amd_iommu *iommu; | ||
522 | |||
523 | for_each_iommu(iommu) | ||
524 | flush_all_domains_on_iommu(iommu); | ||
525 | } | ||
526 | |||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | ||
528 | { | ||
473 | int i; | 529 | int i; |
474 | 530 | ||
475 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
476 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 532 | if (iommu != amd_iommu_rlookup_table[i]) |
477 | continue; | 533 | continue; |
478 | iommu_flush_domain(i); | 534 | |
535 | iommu_queue_inv_dev_entry(iommu, i); | ||
536 | iommu_completion_wait(iommu); | ||
479 | } | 537 | } |
480 | } | 538 | } |
481 | 539 | ||
482 | void amd_iommu_flush_all_devices(void) | 540 | static void flush_devices_by_domain(struct protection_domain *domain) |
483 | { | 541 | { |
484 | struct amd_iommu *iommu; | 542 | struct amd_iommu *iommu; |
485 | int i; | 543 | int i; |
486 | 544 | ||
487 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
488 | if (amd_iommu_pd_table[i] == NULL) | 546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || |
547 | (amd_iommu_pd_table[i] != domain)) | ||
489 | continue; | 548 | continue; |
490 | 549 | ||
491 | iommu = amd_iommu_rlookup_table[i]; | 550 | iommu = amd_iommu_rlookup_table[i]; |
@@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void) | |||
497 | } | 556 | } |
498 | } | 557 | } |
499 | 558 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
560 | { | ||
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
562 | |||
563 | if (iommu->reset_in_progress) | ||
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
565 | |||
566 | iommu->reset_in_progress = true; | ||
567 | |||
568 | amd_iommu_reset_cmd_buffer(iommu); | ||
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | |||
572 | iommu->reset_in_progress = false; | ||
573 | } | ||
574 | |||
575 | void amd_iommu_flush_all_devices(void) | ||
576 | { | ||
577 | flush_devices_by_domain(NULL); | ||
578 | } | ||
579 | |||
500 | /**************************************************************************** | 580 | /**************************************************************************** |
501 | * | 581 | * |
502 | * The functions below are used the create the page table mappings for | 582 | * The functions below are used the create the page table mappings for |
@@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void) | |||
514 | static int iommu_map_page(struct protection_domain *dom, | 594 | static int iommu_map_page(struct protection_domain *dom, |
515 | unsigned long bus_addr, | 595 | unsigned long bus_addr, |
516 | unsigned long phys_addr, | 596 | unsigned long phys_addr, |
517 | int prot) | 597 | int prot, |
598 | int map_size) | ||
518 | { | 599 | { |
519 | u64 __pte, *pte; | 600 | u64 __pte, *pte; |
520 | 601 | ||
521 | bus_addr = PAGE_ALIGN(bus_addr); | 602 | bus_addr = PAGE_ALIGN(bus_addr); |
522 | phys_addr = PAGE_ALIGN(phys_addr); | 603 | phys_addr = PAGE_ALIGN(phys_addr); |
523 | 604 | ||
524 | /* only support 512GB address spaces for now */ | 605 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); |
525 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 606 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); |
607 | |||
608 | if (!(prot & IOMMU_PROT_MASK)) | ||
526 | return -EINVAL; | 609 | return -EINVAL; |
527 | 610 | ||
528 | pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); | 611 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); |
529 | 612 | ||
530 | if (IOMMU_PTE_PRESENT(*pte)) | 613 | if (IOMMU_PTE_PRESENT(*pte)) |
531 | return -EBUSY; | 614 | return -EBUSY; |
@@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom, | |||
538 | 621 | ||
539 | *pte = __pte; | 622 | *pte = __pte; |
540 | 623 | ||
624 | update_domain(dom); | ||
625 | |||
541 | return 0; | 626 | return 0; |
542 | } | 627 | } |
543 | 628 | ||
544 | static void iommu_unmap_page(struct protection_domain *dom, | 629 | static void iommu_unmap_page(struct protection_domain *dom, |
545 | unsigned long bus_addr) | 630 | unsigned long bus_addr, int map_size) |
546 | { | 631 | { |
547 | u64 *pte; | 632 | u64 *pte = fetch_pte(dom, bus_addr, map_size); |
548 | |||
549 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
550 | |||
551 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
552 | return; | ||
553 | |||
554 | pte = IOMMU_PTE_PAGE(*pte); | ||
555 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
556 | 633 | ||
557 | if (!IOMMU_PTE_PRESENT(*pte)) | 634 | if (pte) |
558 | return; | 635 | *pte = 0; |
559 | |||
560 | pte = IOMMU_PTE_PAGE(*pte); | ||
561 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
562 | |||
563 | *pte = 0; | ||
564 | } | 636 | } |
565 | 637 | ||
566 | /* | 638 | /* |
@@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
615 | 687 | ||
616 | for (addr = e->address_start; addr < e->address_end; | 688 | for (addr = e->address_start; addr < e->address_end; |
617 | addr += PAGE_SIZE) { | 689 | addr += PAGE_SIZE) { |
618 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); | 690 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
691 | PM_MAP_4k); | ||
619 | if (ret) | 692 | if (ret) |
620 | return ret; | 693 | return ret; |
621 | /* | 694 | /* |
@@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
670 | * This function checks if there is a PTE for a given dma address. If | 743 | * This function checks if there is a PTE for a given dma address. If |
671 | * there is one, it returns the pointer to it. | 744 | * there is one, it returns the pointer to it. |
672 | */ | 745 | */ |
673 | static u64* fetch_pte(struct protection_domain *domain, | 746 | static u64 *fetch_pte(struct protection_domain *domain, |
674 | unsigned long address) | 747 | unsigned long address, int map_size) |
675 | { | 748 | { |
749 | int level; | ||
676 | u64 *pte; | 750 | u64 *pte; |
677 | 751 | ||
678 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 752 | level = domain->mode - 1; |
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
679 | 754 | ||
680 | if (!IOMMU_PTE_PRESENT(*pte)) | 755 | while (level > map_size) { |
681 | return NULL; | 756 | if (!IOMMU_PTE_PRESENT(*pte)) |
757 | return NULL; | ||
682 | 758 | ||
683 | pte = IOMMU_PTE_PAGE(*pte); | 759 | level -= 1; |
684 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
685 | 760 | ||
686 | if (!IOMMU_PTE_PRESENT(*pte)) | 761 | pte = IOMMU_PTE_PAGE(*pte); |
687 | return NULL; | 762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
688 | 763 | ||
689 | pte = IOMMU_PTE_PAGE(*pte); | 764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
690 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 765 | pte = NULL; |
766 | break; | ||
767 | } | ||
768 | } | ||
691 | 769 | ||
692 | return pte; | 770 | return pte; |
693 | } | 771 | } |
@@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
727 | u64 *pte, *pte_page; | 805 | u64 *pte, *pte_page; |
728 | 806 | ||
729 | for (i = 0; i < num_ptes; ++i) { | 807 | for (i = 0; i < num_ptes; ++i) { |
730 | pte = alloc_pte(&dma_dom->domain, address, | 808 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, |
731 | &pte_page, gfp); | 809 | &pte_page, gfp); |
732 | if (!pte) | 810 | if (!pte) |
733 | goto out_free; | 811 | goto out_free; |
@@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
760 | for (i = dma_dom->aperture[index]->offset; | 838 | for (i = dma_dom->aperture[index]->offset; |
761 | i < dma_dom->aperture_size; | 839 | i < dma_dom->aperture_size; |
762 | i += PAGE_SIZE) { | 840 | i += PAGE_SIZE) { |
763 | u64 *pte = fetch_pte(&dma_dom->domain, i); | 841 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); |
764 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 842 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
765 | continue; | 843 | continue; |
766 | 844 | ||
767 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | 845 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); |
768 | } | 846 | } |
769 | 847 | ||
848 | update_domain(&dma_dom->domain); | ||
849 | |||
770 | return 0; | 850 | return 0; |
771 | 851 | ||
772 | out_free: | 852 | out_free: |
853 | update_domain(&dma_dom->domain); | ||
854 | |||
773 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | 855 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); |
774 | 856 | ||
775 | kfree(dma_dom->aperture[index]); | 857 | kfree(dma_dom->aperture[index]); |
@@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1009 | dma_dom->domain.id = domain_id_alloc(); | 1091 | dma_dom->domain.id = domain_id_alloc(); |
1010 | if (dma_dom->domain.id == 0) | 1092 | if (dma_dom->domain.id == 0) |
1011 | goto free_dma_dom; | 1093 | goto free_dma_dom; |
1012 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1013 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1014 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
1015 | dma_dom->domain.priv = dma_dom; | 1097 | dma_dom->domain.priv = dma_dom; |
@@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
1063 | return dom; | 1145 | return dom; |
1064 | } | 1146 | } |
1065 | 1147 | ||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | ||
1149 | { | ||
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1151 | |||
1152 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1153 | << DEV_ENTRY_MODE_SHIFT; | ||
1154 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1155 | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1159 | |||
1160 | amd_iommu_pd_table[devid] = domain; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * If a device is not yet associated with a domain, this function does | ||
1165 | * assigns it visible for the hardware | ||
1166 | */ | ||
1167 | static void __attach_device(struct amd_iommu *iommu, | ||
1168 | struct protection_domain *domain, | ||
1169 | u16 devid) | ||
1170 | { | ||
1171 | /* lock domain */ | ||
1172 | spin_lock(&domain->lock); | ||
1173 | |||
1174 | /* update DTE entry */ | ||
1175 | set_dte_entry(devid, domain); | ||
1176 | |||
1177 | domain->dev_cnt += 1; | ||
1178 | |||
1179 | /* ready */ | ||
1180 | spin_unlock(&domain->lock); | ||
1181 | } | ||
1182 | |||
1066 | /* | 1183 | /* |
1067 | * If a device is not yet associated with a domain, this function does | 1184 | * If a device is not yet associated with a domain, this function does |
1068 | * assigns it visible for the hardware | 1185 | * assigns it visible for the hardware |
@@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu, | |||
1072 | u16 devid) | 1189 | u16 devid) |
1073 | { | 1190 | { |
1074 | unsigned long flags; | 1191 | unsigned long flags; |
1075 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1076 | |||
1077 | domain->dev_cnt += 1; | ||
1078 | |||
1079 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1080 | << DEV_ENTRY_MODE_SHIFT; | ||
1081 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1082 | 1192 | ||
1083 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1084 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1194 | __attach_device(iommu, domain, devid); |
1085 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1086 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1087 | |||
1088 | amd_iommu_pd_table[devid] = domain; | ||
1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1090 | 1196 | ||
1091 | /* | 1197 | /* |
1092 | * We might boot into a crash-kernel here. The crashed kernel | 1198 | * We might boot into a crash-kernel here. The crashed kernel |
1093 | * left the caches in the IOMMU dirty. So we have to flush | 1199 | * left the caches in the IOMMU dirty. So we have to flush |
1094 | * here to evict all dirty stuff. | 1200 | * here to evict all dirty stuff. |
1095 | */ | 1201 | */ |
1096 | iommu_queue_inv_dev_entry(iommu, devid); | 1202 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | 1203 | iommu_flush_tlb_pde(iommu, domain->id); |
1098 | } | 1204 | } |
@@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) | |||
1119 | 1225 | ||
1120 | /* ready */ | 1226 | /* ready */ |
1121 | spin_unlock(&domain->lock); | 1227 | spin_unlock(&domain->lock); |
1228 | |||
1229 | /* | ||
1230 | * If we run in passthrough mode the device must be assigned to the | ||
1231 | * passthrough domain if it is detached from any other domain | ||
1232 | */ | ||
1233 | if (iommu_pass_through) { | ||
1234 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | ||
1235 | __attach_device(iommu, pt_domain, devid); | ||
1236 | } | ||
1122 | } | 1237 | } |
1123 | 1238 | ||
1124 | /* | 1239 | /* |
@@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1164 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1279 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1165 | if (!domain) | 1280 | if (!domain) |
1166 | goto out; | 1281 | goto out; |
1282 | if (iommu_pass_through) | ||
1283 | break; | ||
1167 | detach_device(domain, devid); | 1284 | detach_device(domain, devid); |
1168 | break; | 1285 | break; |
1169 | case BUS_NOTIFY_ADD_DEVICE: | 1286 | case BUS_NOTIFY_ADD_DEVICE: |
@@ -1292,39 +1409,91 @@ static int get_device_resources(struct device *dev, | |||
1292 | return 1; | 1409 | return 1; |
1293 | } | 1410 | } |
1294 | 1411 | ||
1412 | static void update_device_table(struct protection_domain *domain) | ||
1413 | { | ||
1414 | unsigned long flags; | ||
1415 | int i; | ||
1416 | |||
1417 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | ||
1418 | if (amd_iommu_pd_table[i] != domain) | ||
1419 | continue; | ||
1420 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1421 | set_dte_entry(i, domain); | ||
1422 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | static void update_domain(struct protection_domain *domain) | ||
1427 | { | ||
1428 | if (!domain->updated) | ||
1429 | return; | ||
1430 | |||
1431 | update_device_table(domain); | ||
1432 | flush_devices_by_domain(domain); | ||
1433 | iommu_flush_domain(domain->id); | ||
1434 | |||
1435 | domain->updated = false; | ||
1436 | } | ||
1437 | |||
1295 | /* | 1438 | /* |
1296 | * If the pte_page is not yet allocated this function is called | 1439 | * This function is used to add another level to an IO page table. Adding |
1440 | * another level increases the size of the address space by 9 bits to a size up | ||
1441 | * to 64 bits. | ||
1297 | */ | 1442 | */ |
1298 | static u64* alloc_pte(struct protection_domain *dom, | 1443 | static bool increase_address_space(struct protection_domain *domain, |
1299 | unsigned long address, u64 **pte_page, gfp_t gfp) | 1444 | gfp_t gfp) |
1445 | { | ||
1446 | u64 *pte; | ||
1447 | |||
1448 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1449 | /* address space already 64 bit large */ | ||
1450 | return false; | ||
1451 | |||
1452 | pte = (void *)get_zeroed_page(gfp); | ||
1453 | if (!pte) | ||
1454 | return false; | ||
1455 | |||
1456 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1457 | virt_to_phys(domain->pt_root)); | ||
1458 | domain->pt_root = pte; | ||
1459 | domain->mode += 1; | ||
1460 | domain->updated = true; | ||
1461 | |||
1462 | return true; | ||
1463 | } | ||
1464 | |||
1465 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1466 | unsigned long address, | ||
1467 | int end_lvl, | ||
1468 | u64 **pte_page, | ||
1469 | gfp_t gfp) | ||
1300 | { | 1470 | { |
1301 | u64 *pte, *page; | 1471 | u64 *pte, *page; |
1472 | int level; | ||
1302 | 1473 | ||
1303 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 1474 | while (address > PM_LEVEL_SIZE(domain->mode)) |
1475 | increase_address_space(domain, gfp); | ||
1304 | 1476 | ||
1305 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1477 | level = domain->mode - 1; |
1306 | page = (u64 *)get_zeroed_page(gfp); | 1478 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
1307 | if (!page) | ||
1308 | return NULL; | ||
1309 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
1310 | } | ||
1311 | 1479 | ||
1312 | pte = IOMMU_PTE_PAGE(*pte); | 1480 | while (level > end_lvl) { |
1313 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | 1481 | if (!IOMMU_PTE_PRESENT(*pte)) { |
1482 | page = (u64 *)get_zeroed_page(gfp); | ||
1483 | if (!page) | ||
1484 | return NULL; | ||
1485 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1486 | } | ||
1314 | 1487 | ||
1315 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1488 | level -= 1; |
1316 | page = (u64 *)get_zeroed_page(gfp); | ||
1317 | if (!page) | ||
1318 | return NULL; | ||
1319 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
1320 | } | ||
1321 | 1489 | ||
1322 | pte = IOMMU_PTE_PAGE(*pte); | 1490 | pte = IOMMU_PTE_PAGE(*pte); |
1323 | 1491 | ||
1324 | if (pte_page) | 1492 | if (pte_page && level == end_lvl) |
1325 | *pte_page = pte; | 1493 | *pte_page = pte; |
1326 | 1494 | ||
1327 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 1495 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
1496 | } | ||
1328 | 1497 | ||
1329 | return pte; | 1498 | return pte; |
1330 | } | 1499 | } |
@@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1344 | 1513 | ||
1345 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1514 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
1346 | if (!pte) { | 1515 | if (!pte) { |
1347 | pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); | 1516 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, |
1517 | GFP_ATOMIC); | ||
1348 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1518 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
1349 | } else | 1519 | } else |
1350 | pte += IOMMU_PTE_L0_INDEX(address); | 1520 | pte += PM_LEVEL_INDEX(0, address); |
1521 | |||
1522 | update_domain(&dom->domain); | ||
1351 | 1523 | ||
1352 | return pte; | 1524 | return pte; |
1353 | } | 1525 | } |
@@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1409 | if (!pte) | 1581 | if (!pte) |
1410 | return; | 1582 | return; |
1411 | 1583 | ||
1412 | pte += IOMMU_PTE_L0_INDEX(address); | 1584 | pte += PM_LEVEL_INDEX(0, address); |
1413 | 1585 | ||
1414 | WARN_ON(!*pte); | 1586 | WARN_ON(!*pte); |
1415 | 1587 | ||
@@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain) | |||
1988 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2160 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1989 | } | 2161 | } |
1990 | 2162 | ||
1991 | static int amd_iommu_domain_init(struct iommu_domain *dom) | 2163 | static void protection_domain_free(struct protection_domain *domain) |
2164 | { | ||
2165 | if (!domain) | ||
2166 | return; | ||
2167 | |||
2168 | if (domain->id) | ||
2169 | domain_id_free(domain->id); | ||
2170 | |||
2171 | kfree(domain); | ||
2172 | } | ||
2173 | |||
2174 | static struct protection_domain *protection_domain_alloc(void) | ||
1992 | { | 2175 | { |
1993 | struct protection_domain *domain; | 2176 | struct protection_domain *domain; |
1994 | 2177 | ||
1995 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 2178 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
1996 | if (!domain) | 2179 | if (!domain) |
1997 | return -ENOMEM; | 2180 | return NULL; |
1998 | 2181 | ||
1999 | spin_lock_init(&domain->lock); | 2182 | spin_lock_init(&domain->lock); |
2000 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2001 | domain->id = domain_id_alloc(); | 2183 | domain->id = domain_id_alloc(); |
2002 | if (!domain->id) | 2184 | if (!domain->id) |
2185 | goto out_err; | ||
2186 | |||
2187 | return domain; | ||
2188 | |||
2189 | out_err: | ||
2190 | kfree(domain); | ||
2191 | |||
2192 | return NULL; | ||
2193 | } | ||
2194 | |||
2195 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
2196 | { | ||
2197 | struct protection_domain *domain; | ||
2198 | |||
2199 | domain = protection_domain_alloc(); | ||
2200 | if (!domain) | ||
2003 | goto out_free; | 2201 | goto out_free; |
2202 | |||
2203 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2004 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 2204 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
2005 | if (!domain->pt_root) | 2205 | if (!domain->pt_root) |
2006 | goto out_free; | 2206 | goto out_free; |
@@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) | |||
2010 | return 0; | 2210 | return 0; |
2011 | 2211 | ||
2012 | out_free: | 2212 | out_free: |
2013 | kfree(domain); | 2213 | protection_domain_free(domain); |
2014 | 2214 | ||
2015 | return -ENOMEM; | 2215 | return -ENOMEM; |
2016 | } | 2216 | } |
@@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2115 | paddr &= PAGE_MASK; | 2315 | paddr &= PAGE_MASK; |
2116 | 2316 | ||
2117 | for (i = 0; i < npages; ++i) { | 2317 | for (i = 0; i < npages; ++i) { |
2118 | ret = iommu_map_page(domain, iova, paddr, prot); | 2318 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
2119 | if (ret) | 2319 | if (ret) |
2120 | return ret; | 2320 | return ret; |
2121 | 2321 | ||
@@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2136 | iova &= PAGE_MASK; | 2336 | iova &= PAGE_MASK; |
2137 | 2337 | ||
2138 | for (i = 0; i < npages; ++i) { | 2338 | for (i = 0; i < npages; ++i) { |
2139 | iommu_unmap_page(domain, iova); | 2339 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
2140 | iova += PAGE_SIZE; | 2340 | iova += PAGE_SIZE; |
2141 | } | 2341 | } |
2142 | 2342 | ||
@@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
2151 | phys_addr_t paddr; | 2351 | phys_addr_t paddr; |
2152 | u64 *pte; | 2352 | u64 *pte; |
2153 | 2353 | ||
2154 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | 2354 | pte = fetch_pte(domain, iova, PM_MAP_4k); |
2155 | |||
2156 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2157 | return 0; | ||
2158 | |||
2159 | pte = IOMMU_PTE_PAGE(*pte); | ||
2160 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
2161 | |||
2162 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2163 | return 0; | ||
2164 | |||
2165 | pte = IOMMU_PTE_PAGE(*pte); | ||
2166 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
2167 | 2355 | ||
2168 | if (!IOMMU_PTE_PRESENT(*pte)) | 2356 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
2169 | return 0; | 2357 | return 0; |
2170 | 2358 | ||
2171 | paddr = *pte & IOMMU_PAGE_MASK; | 2359 | paddr = *pte & IOMMU_PAGE_MASK; |
@@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = { | |||
2191 | .domain_has_cap = amd_iommu_domain_has_cap, | 2379 | .domain_has_cap = amd_iommu_domain_has_cap, |
2192 | }; | 2380 | }; |
2193 | 2381 | ||
2382 | /***************************************************************************** | ||
2383 | * | ||
2384 | * The next functions do a basic initialization of IOMMU for pass through | ||
2385 | * mode | ||
2386 | * | ||
2387 | * In passthrough mode the IOMMU is initialized and enabled but not used for | ||
2388 | * DMA-API translation. | ||
2389 | * | ||
2390 | *****************************************************************************/ | ||
2391 | |||
2392 | int __init amd_iommu_init_passthrough(void) | ||
2393 | { | ||
2394 | struct pci_dev *dev = NULL; | ||
2395 | u16 devid, devid2; | ||
2396 | |||
2397 | /* allocate passthroug domain */ | ||
2398 | pt_domain = protection_domain_alloc(); | ||
2399 | if (!pt_domain) | ||
2400 | return -ENOMEM; | ||
2401 | |||
2402 | pt_domain->mode |= PAGE_MODE_NONE; | ||
2403 | |||
2404 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
2405 | struct amd_iommu *iommu; | ||
2406 | |||
2407 | devid = calc_devid(dev->bus->number, dev->devfn); | ||
2408 | if (devid > amd_iommu_last_bdf) | ||
2409 | continue; | ||
2410 | |||
2411 | devid2 = amd_iommu_alias_table[devid]; | ||
2412 | |||
2413 | iommu = amd_iommu_rlookup_table[devid2]; | ||
2414 | if (!iommu) | ||
2415 | continue; | ||
2416 | |||
2417 | __attach_device(iommu, pt_domain, devid); | ||
2418 | __attach_device(iommu, pt_domain, devid2); | ||
2419 | } | ||
2420 | |||
2421 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | ||
2422 | |||
2423 | return 0; | ||
2424 | } | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..b4b61d462dcc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
252 | /* Function to enable the hardware */ | 252 | /* Function to enable the hardware */ |
253 | static void iommu_enable(struct amd_iommu *iommu) | 253 | static void iommu_enable(struct amd_iommu *iommu) |
254 | { | 254 | { |
255 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", | 255 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", |
256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
257 | 257 | ||
258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
@@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
435 | } | 435 | } |
436 | 436 | ||
437 | /* | 437 | /* |
438 | * This function resets the command buffer if the IOMMU stopped fetching | ||
439 | * commands from it. | ||
440 | */ | ||
441 | void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) | ||
442 | { | ||
443 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
444 | |||
445 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
446 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
447 | |||
448 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
449 | } | ||
450 | |||
451 | /* | ||
438 | * This function writes the command buffer address to the hardware and | 452 | * This function writes the command buffer address to the hardware and |
439 | * enables it. | 453 | * enables it. |
440 | */ | 454 | */ |
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
450 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 464 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
451 | &entry, sizeof(entry)); | 465 | &entry, sizeof(entry)); |
452 | 466 | ||
453 | /* set head and tail to zero manually */ | 467 | amd_iommu_reset_cmd_buffer(iommu); |
454 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
455 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
456 | |||
457 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
458 | } | 468 | } |
459 | 469 | ||
460 | static void __init free_command_buffer(struct amd_iommu *iommu) | 470 | static void __init free_command_buffer(struct amd_iommu *iommu) |
@@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
858 | switch (*p) { | 868 | switch (*p) { |
859 | case ACPI_IVHD_TYPE: | 869 | case ACPI_IVHD_TYPE: |
860 | 870 | ||
861 | DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " | 871 | DUMP_printk("device: %02x:%02x.%01x cap: %04x " |
862 | "seg: %d flags: %01x info %04x\n", | 872 | "seg: %d flags: %01x info %04x\n", |
863 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | 873 | PCI_BUS(h->devid), PCI_SLOT(h->devid), |
864 | PCI_FUNC(h->devid), h->cap_ptr, | 874 | PCI_FUNC(h->devid), h->cap_ptr, |
@@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) | |||
902 | 912 | ||
903 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 913 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, |
904 | IRQF_SAMPLE_RANDOM, | 914 | IRQF_SAMPLE_RANDOM, |
905 | "AMD IOMMU", | 915 | "AMD-Vi", |
906 | NULL); | 916 | NULL); |
907 | 917 | ||
908 | if (r) { | 918 | if (r) { |
@@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void) | |||
1150 | 1160 | ||
1151 | 1161 | ||
1152 | if (no_iommu) { | 1162 | if (no_iommu) { |
1153 | printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); | 1163 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); |
1154 | return 0; | 1164 | return 0; |
1155 | } | 1165 | } |
1156 | 1166 | ||
@@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void) | |||
1242 | if (ret) | 1252 | if (ret) |
1243 | goto free; | 1253 | goto free; |
1244 | 1254 | ||
1245 | ret = amd_iommu_init_dma_ops(); | 1255 | if (iommu_pass_through) |
1256 | ret = amd_iommu_init_passthrough(); | ||
1257 | else | ||
1258 | ret = amd_iommu_init_dma_ops(); | ||
1246 | if (ret) | 1259 | if (ret) |
1247 | goto free; | 1260 | goto free; |
1248 | 1261 | ||
1249 | enable_iommus(); | 1262 | enable_iommus(); |
1250 | 1263 | ||
1251 | printk(KERN_INFO "AMD IOMMU: device isolation "); | 1264 | if (iommu_pass_through) |
1265 | goto out; | ||
1266 | |||
1267 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1252 | if (amd_iommu_isolate) | 1268 | if (amd_iommu_isolate) |
1253 | printk("enabled\n"); | 1269 | printk("enabled\n"); |
1254 | else | 1270 | else |
1255 | printk("disabled\n"); | 1271 | printk("disabled\n"); |
1256 | 1272 | ||
1257 | if (amd_iommu_unmap_flush) | 1273 | if (amd_iommu_unmap_flush) |
1258 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | 1274 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1259 | else | 1275 | else |
1260 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | 1276 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1261 | 1277 | ||
1262 | out: | 1278 | out: |
1263 | return ret; | 1279 | return ret; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 676debfc1702..128111d8ffe0 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/ioport.h> | 21 | #include <linux/ioport.h> |
22 | #include <linux/suspend.h> | 22 | #include <linux/suspend.h> |
23 | #include <linux/kmemleak.h> | ||
23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include <asm/iommu.h> | 26 | #include <asm/iommu.h> |
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void) | |||
94 | * code for safe | 95 | * code for safe |
95 | */ | 96 | */ |
96 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); | 97 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); |
98 | /* | ||
99 | * Kmemleak should not scan this block as it may not be mapped via the | ||
100 | * kernel direct mapping. | ||
101 | */ | ||
102 | kmemleak_ignore(p); | ||
97 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 103 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
98 | printk(KERN_ERR | 104 | printk(KERN_ERR |
99 | "Cannot allocate aperture memory hole (%p,%uK)\n", | 105 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b43b63a..db7220220d09 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -39,7 +39,7 @@ | |||
39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
41 | 41 | ||
42 | static cpumask_var_t backtrace_mask; | 42 | static cpumask_t backtrace_mask __read_mostly; |
43 | 43 | ||
44 | /* nmi_active: | 44 | /* nmi_active: |
45 | * >0: the lapic NMI watchdog is active, but can be disabled | 45 | * >0: the lapic NMI watchdog is active, but can be disabled |
@@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) | |||
138 | if (!prev_nmi_count) | 138 | if (!prev_nmi_count) |
139 | goto error; | 139 | goto error; |
140 | 140 | ||
141 | alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); | ||
142 | printk(KERN_INFO "Testing NMI watchdog ... "); | 141 | printk(KERN_INFO "Testing NMI watchdog ... "); |
143 | 142 | ||
144 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
@@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
415 | } | 414 | } |
416 | 415 | ||
417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
418 | if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { | 417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { |
419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
420 | 419 | ||
421 | spin_lock(&lock); | 420 | spin_lock(&lock); |
422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 421 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
422 | show_regs(regs); | ||
423 | dump_stack(); | 423 | dump_stack(); |
424 | spin_unlock(&lock); | 424 | spin_unlock(&lock); |
425 | cpumask_clear_cpu(cpu, backtrace_mask); | 425 | cpumask_clear_cpu(cpu, &backtrace_mask); |
426 | |||
427 | rc = 1; | ||
426 | } | 428 | } |
427 | 429 | ||
428 | /* Could check oops_in_progress here too, but it's safer not to */ | 430 | /* Could check oops_in_progress here too, but it's safer not to */ |
@@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) | |||
552 | return 0; | 554 | return 0; |
553 | } | 555 | } |
554 | 556 | ||
555 | void __trigger_all_cpu_backtrace(void) | 557 | void arch_trigger_all_cpu_backtrace(void) |
556 | { | 558 | { |
557 | int i; | 559 | int i; |
558 | 560 | ||
559 | cpumask_copy(backtrace_mask, cpu_online_mask); | 561 | cpumask_copy(&backtrace_mask, cpu_online_mask); |
562 | |||
563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
564 | apic->send_IPI_all(NMI_VECTOR); | ||
565 | |||
560 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
561 | for (i = 0; i < 10 * 1000; i++) { | 567 | for (i = 0; i < 10 * 1000; i++) { |
562 | if (cpumask_empty(backtrace_mask)) | 568 | if (cpumask_empty(&backtrace_mask)) |
563 | break; | 569 | break; |
564 | mdelay(1); | 570 | mdelay(1); |
565 | } | 571 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 898ecc47e129..4a6aeedcd965 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * This code generates raw asm output which is post-processed to extract | 3 | * This code generates raw asm output which is post-processed to extract |
4 | * and format the required data. | 4 | * and format the required data. |
5 | */ | 5 | */ |
6 | #define COMPILE_OFFSETS | ||
6 | 7 | ||
7 | #include <linux/crypto.h> | 8 | #include <linux/crypto.h> |
8 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 900332b800f8..f9cd0849bd42 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | 6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
9 | * | 10 | * |
10 | * For licencing details see kernel-base/COPYING | 11 | * For licencing details see kernel-base/COPYING |
11 | */ | 12 | */ |
@@ -20,6 +21,7 @@ | |||
20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/cpu.h> | ||
23 | 25 | ||
24 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
25 | #include <asm/stacktrace.h> | 27 | #include <asm/stacktrace.h> |
@@ -27,12 +29,52 @@ | |||
27 | 29 | ||
28 | static u64 perf_counter_mask __read_mostly; | 30 | static u64 perf_counter_mask __read_mostly; |
29 | 31 | ||
32 | /* The maximal number of PEBS counters: */ | ||
33 | #define MAX_PEBS_COUNTERS 4 | ||
34 | |||
35 | /* The size of a BTS record in bytes: */ | ||
36 | #define BTS_RECORD_SIZE 24 | ||
37 | |||
38 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) | ||
40 | |||
41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) | ||
43 | |||
44 | |||
45 | /* | ||
46 | * Bits in the debugctlmsr controlling branch tracing. | ||
47 | */ | ||
48 | #define X86_DEBUGCTL_TR (1 << 6) | ||
49 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
50 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
51 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
52 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
53 | |||
54 | /* | ||
55 | * A debug store configuration. | ||
56 | * | ||
57 | * We only support architectures that use 64bit fields. | ||
58 | */ | ||
59 | struct debug_store { | ||
60 | u64 bts_buffer_base; | ||
61 | u64 bts_index; | ||
62 | u64 bts_absolute_maximum; | ||
63 | u64 bts_interrupt_threshold; | ||
64 | u64 pebs_buffer_base; | ||
65 | u64 pebs_index; | ||
66 | u64 pebs_absolute_maximum; | ||
67 | u64 pebs_interrupt_threshold; | ||
68 | u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; | ||
69 | }; | ||
70 | |||
30 | struct cpu_hw_counters { | 71 | struct cpu_hw_counters { |
31 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | 72 | struct perf_counter *counters[X86_PMC_IDX_MAX]; |
32 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
33 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
34 | unsigned long interrupts; | 75 | unsigned long interrupts; |
35 | int enabled; | 76 | int enabled; |
77 | struct debug_store *ds; | ||
36 | }; | 78 | }; |
37 | 79 | ||
38 | /* | 80 | /* |
@@ -58,6 +100,8 @@ struct x86_pmu { | |||
58 | int apic; | 100 | int apic; |
59 | u64 max_period; | 101 | u64 max_period; |
60 | u64 intel_ctrl; | 102 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | ||
104 | void (*disable_bts)(void); | ||
61 | }; | 105 | }; |
62 | 106 | ||
63 | static struct x86_pmu x86_pmu __read_mostly; | 107 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -577,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter, | |||
577 | u64 prev_raw_count, new_raw_count; | 621 | u64 prev_raw_count, new_raw_count; |
578 | s64 delta; | 622 | s64 delta; |
579 | 623 | ||
624 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
625 | return 0; | ||
626 | |||
580 | /* | 627 | /* |
581 | * Careful: an NMI might modify the previous counter value. | 628 | * Careful: an NMI might modify the previous counter value. |
582 | * | 629 | * |
@@ -666,10 +713,110 @@ static void release_pmc_hardware(void) | |||
666 | #endif | 713 | #endif |
667 | } | 714 | } |
668 | 715 | ||
716 | static inline bool bts_available(void) | ||
717 | { | ||
718 | return x86_pmu.enable_bts != NULL; | ||
719 | } | ||
720 | |||
721 | static inline void init_debug_store_on_cpu(int cpu) | ||
722 | { | ||
723 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
724 | |||
725 | if (!ds) | ||
726 | return; | ||
727 | |||
728 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
729 | (u32)((u64)(unsigned long)ds), | ||
730 | (u32)((u64)(unsigned long)ds >> 32)); | ||
731 | } | ||
732 | |||
733 | static inline void fini_debug_store_on_cpu(int cpu) | ||
734 | { | ||
735 | if (!per_cpu(cpu_hw_counters, cpu).ds) | ||
736 | return; | ||
737 | |||
738 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
739 | } | ||
740 | |||
741 | static void release_bts_hardware(void) | ||
742 | { | ||
743 | int cpu; | ||
744 | |||
745 | if (!bts_available()) | ||
746 | return; | ||
747 | |||
748 | get_online_cpus(); | ||
749 | |||
750 | for_each_online_cpu(cpu) | ||
751 | fini_debug_store_on_cpu(cpu); | ||
752 | |||
753 | for_each_possible_cpu(cpu) { | ||
754 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
755 | |||
756 | if (!ds) | ||
757 | continue; | ||
758 | |||
759 | per_cpu(cpu_hw_counters, cpu).ds = NULL; | ||
760 | |||
761 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
762 | kfree(ds); | ||
763 | } | ||
764 | |||
765 | put_online_cpus(); | ||
766 | } | ||
767 | |||
768 | static int reserve_bts_hardware(void) | ||
769 | { | ||
770 | int cpu, err = 0; | ||
771 | |||
772 | if (!bts_available()) | ||
773 | return 0; | ||
774 | |||
775 | get_online_cpus(); | ||
776 | |||
777 | for_each_possible_cpu(cpu) { | ||
778 | struct debug_store *ds; | ||
779 | void *buffer; | ||
780 | |||
781 | err = -ENOMEM; | ||
782 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
783 | if (unlikely(!buffer)) | ||
784 | break; | ||
785 | |||
786 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
787 | if (unlikely(!ds)) { | ||
788 | kfree(buffer); | ||
789 | break; | ||
790 | } | ||
791 | |||
792 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
793 | ds->bts_index = ds->bts_buffer_base; | ||
794 | ds->bts_absolute_maximum = | ||
795 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
796 | ds->bts_interrupt_threshold = | ||
797 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
798 | |||
799 | per_cpu(cpu_hw_counters, cpu).ds = ds; | ||
800 | err = 0; | ||
801 | } | ||
802 | |||
803 | if (err) | ||
804 | release_bts_hardware(); | ||
805 | else { | ||
806 | for_each_online_cpu(cpu) | ||
807 | init_debug_store_on_cpu(cpu); | ||
808 | } | ||
809 | |||
810 | put_online_cpus(); | ||
811 | |||
812 | return err; | ||
813 | } | ||
814 | |||
669 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 815 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
670 | { | 816 | { |
671 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | 817 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { |
672 | release_pmc_hardware(); | 818 | release_pmc_hardware(); |
819 | release_bts_hardware(); | ||
673 | mutex_unlock(&pmc_reserve_mutex); | 820 | mutex_unlock(&pmc_reserve_mutex); |
674 | } | 821 | } |
675 | } | 822 | } |
@@ -712,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | |||
712 | return 0; | 859 | return 0; |
713 | } | 860 | } |
714 | 861 | ||
862 | static void intel_pmu_enable_bts(u64 config) | ||
863 | { | ||
864 | unsigned long debugctlmsr; | ||
865 | |||
866 | debugctlmsr = get_debugctlmsr(); | ||
867 | |||
868 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
869 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
870 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
871 | |||
872 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
873 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
874 | |||
875 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
876 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
877 | |||
878 | update_debugctlmsr(debugctlmsr); | ||
879 | } | ||
880 | |||
881 | static void intel_pmu_disable_bts(void) | ||
882 | { | ||
883 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
884 | unsigned long debugctlmsr; | ||
885 | |||
886 | if (!cpuc->ds) | ||
887 | return; | ||
888 | |||
889 | debugctlmsr = get_debugctlmsr(); | ||
890 | |||
891 | debugctlmsr &= | ||
892 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
893 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
894 | |||
895 | update_debugctlmsr(debugctlmsr); | ||
896 | } | ||
897 | |||
715 | /* | 898 | /* |
716 | * Setup the hardware configuration for a given attr_type | 899 | * Setup the hardware configuration for a given attr_type |
717 | */ | 900 | */ |
@@ -728,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
728 | err = 0; | 911 | err = 0; |
729 | if (!atomic_inc_not_zero(&active_counters)) { | 912 | if (!atomic_inc_not_zero(&active_counters)) { |
730 | mutex_lock(&pmc_reserve_mutex); | 913 | mutex_lock(&pmc_reserve_mutex); |
731 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | 914 | if (atomic_read(&active_counters) == 0) { |
732 | err = -EBUSY; | 915 | if (!reserve_pmc_hardware()) |
733 | else | 916 | err = -EBUSY; |
917 | else | ||
918 | err = reserve_bts_hardware(); | ||
919 | } | ||
920 | if (!err) | ||
734 | atomic_inc(&active_counters); | 921 | atomic_inc(&active_counters); |
735 | mutex_unlock(&pmc_reserve_mutex); | 922 | mutex_unlock(&pmc_reserve_mutex); |
736 | } | 923 | } |
@@ -793,6 +980,20 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
793 | if (config == -1LL) | 980 | if (config == -1LL) |
794 | return -EINVAL; | 981 | return -EINVAL; |
795 | 982 | ||
983 | /* | ||
984 | * Branch tracing: | ||
985 | */ | ||
986 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
987 | (hwc->sample_period == 1)) { | ||
988 | /* BTS is not supported by this architecture. */ | ||
989 | if (!bts_available()) | ||
990 | return -EOPNOTSUPP; | ||
991 | |||
992 | /* BTS is currently only allowed for user-mode. */ | ||
993 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
994 | return -EOPNOTSUPP; | ||
995 | } | ||
996 | |||
796 | hwc->config |= config; | 997 | hwc->config |= config; |
797 | 998 | ||
798 | return 0; | 999 | return 0; |
@@ -817,7 +1018,18 @@ static void p6_pmu_disable_all(void) | |||
817 | 1018 | ||
818 | static void intel_pmu_disable_all(void) | 1019 | static void intel_pmu_disable_all(void) |
819 | { | 1020 | { |
1021 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1022 | |||
1023 | if (!cpuc->enabled) | ||
1024 | return; | ||
1025 | |||
1026 | cpuc->enabled = 0; | ||
1027 | barrier(); | ||
1028 | |||
820 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 1029 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
1030 | |||
1031 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1032 | intel_pmu_disable_bts(); | ||
821 | } | 1033 | } |
822 | 1034 | ||
823 | static void amd_pmu_disable_all(void) | 1035 | static void amd_pmu_disable_all(void) |
@@ -875,7 +1087,25 @@ static void p6_pmu_enable_all(void) | |||
875 | 1087 | ||
876 | static void intel_pmu_enable_all(void) | 1088 | static void intel_pmu_enable_all(void) |
877 | { | 1089 | { |
1090 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1091 | |||
1092 | if (cpuc->enabled) | ||
1093 | return; | ||
1094 | |||
1095 | cpuc->enabled = 1; | ||
1096 | barrier(); | ||
1097 | |||
878 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
1099 | |||
1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1101 | struct perf_counter *counter = | ||
1102 | cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
1103 | |||
1104 | if (WARN_ON_ONCE(!counter)) | ||
1105 | return; | ||
1106 | |||
1107 | intel_pmu_enable_bts(counter->hw.config); | ||
1108 | } | ||
879 | } | 1109 | } |
880 | 1110 | ||
881 | static void amd_pmu_enable_all(void) | 1111 | static void amd_pmu_enable_all(void) |
@@ -962,6 +1192,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
962 | static inline void | 1192 | static inline void |
963 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1193 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) |
964 | { | 1194 | { |
1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1196 | intel_pmu_disable_bts(); | ||
1197 | return; | ||
1198 | } | ||
1199 | |||
965 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1200 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
966 | intel_pmu_disable_fixed(hwc, idx); | 1201 | intel_pmu_disable_fixed(hwc, idx); |
967 | return; | 1202 | return; |
@@ -990,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
990 | s64 period = hwc->sample_period; | 1225 | s64 period = hwc->sample_period; |
991 | int err, ret = 0; | 1226 | int err, ret = 0; |
992 | 1227 | ||
1228 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
1229 | return 0; | ||
1230 | |||
993 | /* | 1231 | /* |
994 | * If we are way outside a reasoable range then just skip forward: | 1232 | * If we are way outside a reasoable range then just skip forward: |
995 | */ | 1233 | */ |
@@ -1072,6 +1310,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
1072 | 1310 | ||
1073 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1311 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
1074 | { | 1312 | { |
1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1314 | if (!__get_cpu_var(cpu_hw_counters).enabled) | ||
1315 | return; | ||
1316 | |||
1317 | intel_pmu_enable_bts(hwc->config); | ||
1318 | return; | ||
1319 | } | ||
1320 | |||
1075 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1321 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
1076 | intel_pmu_enable_fixed(hwc, idx); | 1322 | intel_pmu_enable_fixed(hwc, idx); |
1077 | return; | 1323 | return; |
@@ -1093,11 +1339,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
1093 | { | 1339 | { |
1094 | unsigned int event; | 1340 | unsigned int event; |
1095 | 1341 | ||
1342 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1343 | |||
1344 | if (unlikely((event == | ||
1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
1346 | (hwc->sample_period == 1))) | ||
1347 | return X86_PMC_IDX_FIXED_BTS; | ||
1348 | |||
1096 | if (!x86_pmu.num_counters_fixed) | 1349 | if (!x86_pmu.num_counters_fixed) |
1097 | return -1; | 1350 | return -1; |
1098 | 1351 | ||
1099 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1100 | |||
1101 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1352 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
1102 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
1103 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1354 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
@@ -1118,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter) | |||
1118 | int idx; | 1369 | int idx; |
1119 | 1370 | ||
1120 | idx = fixed_mode_idx(counter, hwc); | 1371 | idx = fixed_mode_idx(counter, hwc); |
1121 | if (idx >= 0) { | 1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | ||
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1375 | return -EAGAIN; | ||
1376 | |||
1377 | hwc->config_base = 0; | ||
1378 | hwc->counter_base = 0; | ||
1379 | hwc->idx = idx; | ||
1380 | } else if (idx >= 0) { | ||
1122 | /* | 1381 | /* |
1123 | * Try to get the fixed counter, if that is already taken | 1382 | * Try to get the fixed counter, if that is already taken |
1124 | * then try to get a generic counter: | 1383 | * then try to get a generic counter: |
@@ -1229,6 +1488,44 @@ void perf_counter_print_debug(void) | |||
1229 | local_irq_restore(flags); | 1488 | local_irq_restore(flags); |
1230 | } | 1489 | } |
1231 | 1490 | ||
1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | ||
1492 | struct perf_sample_data *data) | ||
1493 | { | ||
1494 | struct debug_store *ds = cpuc->ds; | ||
1495 | struct bts_record { | ||
1496 | u64 from; | ||
1497 | u64 to; | ||
1498 | u64 flags; | ||
1499 | }; | ||
1500 | struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
1501 | unsigned long orig_ip = data->regs->ip; | ||
1502 | struct bts_record *at, *top; | ||
1503 | |||
1504 | if (!counter) | ||
1505 | return; | ||
1506 | |||
1507 | if (!ds) | ||
1508 | return; | ||
1509 | |||
1510 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1511 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1512 | |||
1513 | ds->bts_index = ds->bts_buffer_base; | ||
1514 | |||
1515 | for (; at < top; at++) { | ||
1516 | data->regs->ip = at->from; | ||
1517 | data->addr = at->to; | ||
1518 | |||
1519 | perf_counter_output(counter, 1, data); | ||
1520 | } | ||
1521 | |||
1522 | data->regs->ip = orig_ip; | ||
1523 | data->addr = 0; | ||
1524 | |||
1525 | /* There's new data available. */ | ||
1526 | counter->pending_kill = POLL_IN; | ||
1527 | } | ||
1528 | |||
1232 | static void x86_pmu_disable(struct perf_counter *counter) | 1529 | static void x86_pmu_disable(struct perf_counter *counter) |
1233 | { | 1530 | { |
1234 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1531 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); |
@@ -1253,6 +1550,15 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
1253 | * that we are disabling: | 1550 | * that we are disabling: |
1254 | */ | 1551 | */ |
1255 | x86_perf_counter_update(counter, hwc, idx); | 1552 | x86_perf_counter_update(counter, hwc, idx); |
1553 | |||
1554 | /* Drain the remaining BTS records. */ | ||
1555 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1556 | struct perf_sample_data data; | ||
1557 | struct pt_regs regs; | ||
1558 | |||
1559 | data.regs = ®s; | ||
1560 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1561 | } | ||
1256 | cpuc->counters[idx] = NULL; | 1562 | cpuc->counters[idx] = NULL; |
1257 | clear_bit(idx, cpuc->used_mask); | 1563 | clear_bit(idx, cpuc->used_mask); |
1258 | 1564 | ||
@@ -1280,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) | |||
1280 | 1586 | ||
1281 | static void intel_pmu_reset(void) | 1587 | static void intel_pmu_reset(void) |
1282 | { | 1588 | { |
1589 | struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; | ||
1283 | unsigned long flags; | 1590 | unsigned long flags; |
1284 | int idx; | 1591 | int idx; |
1285 | 1592 | ||
@@ -1297,6 +1604,8 @@ static void intel_pmu_reset(void) | |||
1297 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | 1604 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1298 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1605 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
1299 | } | 1606 | } |
1607 | if (ds) | ||
1608 | ds->bts_index = ds->bts_buffer_base; | ||
1300 | 1609 | ||
1301 | local_irq_restore(flags); | 1610 | local_irq_restore(flags); |
1302 | } | 1611 | } |
@@ -1362,6 +1671,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1362 | cpuc = &__get_cpu_var(cpu_hw_counters); | 1671 | cpuc = &__get_cpu_var(cpu_hw_counters); |
1363 | 1672 | ||
1364 | perf_disable(); | 1673 | perf_disable(); |
1674 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1365 | status = intel_pmu_get_status(); | 1675 | status = intel_pmu_get_status(); |
1366 | if (!status) { | 1676 | if (!status) { |
1367 | perf_enable(); | 1677 | perf_enable(); |
@@ -1571,6 +1881,8 @@ static struct x86_pmu intel_pmu = { | |||
1571 | * the generic counter period: | 1881 | * the generic counter period: |
1572 | */ | 1882 | */ |
1573 | .max_period = (1ULL << 31) - 1, | 1883 | .max_period = (1ULL << 31) - 1, |
1884 | .enable_bts = intel_pmu_enable_bts, | ||
1885 | .disable_bts = intel_pmu_disable_bts, | ||
1574 | }; | 1886 | }; |
1575 | 1887 | ||
1576 | static struct x86_pmu amd_pmu = { | 1888 | static struct x86_pmu amd_pmu = { |
@@ -1962,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1962 | 2274 | ||
1963 | return entry; | 2275 | return entry; |
1964 | } | 2276 | } |
2277 | |||
2278 | void hw_perf_counter_setup_online(int cpu) | ||
2279 | { | ||
2280 | init_debug_store_on_cpu(cpu); | ||
2281 | } | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d94e1ea3b9fe..9dbb527e1652 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
417 | unsigned long return_hooker = (unsigned long) | 417 | unsigned long return_hooker = (unsigned long) |
418 | &return_to_handler; | 418 | &return_to_handler; |
419 | 419 | ||
420 | /* Nmi's are currently unsupported */ | ||
421 | if (unlikely(in_nmi())) | ||
422 | return; | ||
423 | |||
424 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | 420 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) |
425 | return; | 421 | return; |
426 | 422 | ||
@@ -498,37 +494,56 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | |||
498 | 494 | ||
499 | struct syscall_metadata *syscall_nr_to_meta(int nr) | 495 | struct syscall_metadata *syscall_nr_to_meta(int nr) |
500 | { | 496 | { |
501 | if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) | 497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) |
502 | return NULL; | 498 | return NULL; |
503 | 499 | ||
504 | return syscalls_metadata[nr]; | 500 | return syscalls_metadata[nr]; |
505 | } | 501 | } |
506 | 502 | ||
507 | void arch_init_ftrace_syscalls(void) | 503 | int syscall_name_to_nr(char *name) |
504 | { | ||
505 | int i; | ||
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | ||
526 | syscalls_metadata[num]->exit_id = id; | ||
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
508 | { | 530 | { |
509 | int i; | 531 | int i; |
510 | struct syscall_metadata *meta; | 532 | struct syscall_metadata *meta; |
511 | unsigned long **psys_syscall_table = &sys_call_table; | 533 | unsigned long **psys_syscall_table = &sys_call_table; |
512 | static atomic_t refs; | ||
513 | |||
514 | if (atomic_inc_return(&refs) != 1) | ||
515 | goto end; | ||
516 | 534 | ||
517 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | 535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * |
518 | FTRACE_SYSCALL_MAX, GFP_KERNEL); | 536 | NR_syscalls, GFP_KERNEL); |
519 | if (!syscalls_metadata) { | 537 | if (!syscalls_metadata) { |
520 | WARN_ON(1); | 538 | WARN_ON(1); |
521 | return; | 539 | return -ENOMEM; |
522 | } | 540 | } |
523 | 541 | ||
524 | for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { | 542 | for (i = 0; i < NR_syscalls; i++) { |
525 | meta = find_syscall_meta(psys_syscall_table[i]); | 543 | meta = find_syscall_meta(psys_syscall_table[i]); |
526 | syscalls_metadata[i] = meta; | 544 | syscalls_metadata[i] = meta; |
527 | } | 545 | } |
528 | return; | 546 | return 0; |
529 | |||
530 | /* Paranoid: avoid overflow */ | ||
531 | end: | ||
532 | atomic_dec(&refs); | ||
533 | } | 547 | } |
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
534 | #endif | 549 | #endif |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bcf506b..d71c8655905b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include <linux/kmemleak.h> | ||
6 | 7 | ||
7 | #include <asm/proto.h> | 8 | #include <asm/proto.h> |
8 | #include <asm/dma.h> | 9 | #include <asm/dma.h> |
@@ -32,7 +33,14 @@ int no_iommu __read_mostly; | |||
32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 33 | /* Set this to 1 if there is a HW IOMMU in the system */ |
33 | int iommu_detected __read_mostly = 0; | 34 | int iommu_detected __read_mostly = 0; |
34 | 35 | ||
35 | int iommu_pass_through; | 36 | /* |
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | ||
38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | ||
39 | * devices and allow every device to access to whole physical memory. This is | ||
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | ||
41 | * guests and not for driver dma translation. | ||
42 | */ | ||
43 | int iommu_pass_through __read_mostly; | ||
36 | 44 | ||
37 | dma_addr_t bad_dma_address __read_mostly = 0; | 45 | dma_addr_t bad_dma_address __read_mostly = 0; |
38 | EXPORT_SYMBOL(bad_dma_address); | 46 | EXPORT_SYMBOL(bad_dma_address); |
@@ -88,6 +96,11 @@ void __init dma32_reserve_bootmem(void) | |||
88 | size = roundup(dma32_bootmem_size, align); | 96 | size = roundup(dma32_bootmem_size, align); |
89 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 97 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
90 | 512ULL<<20); | 98 | 512ULL<<20); |
99 | /* | ||
100 | * Kmemleak should not scan this block as it may not be mapped via the | ||
101 | * kernel direct mapping. | ||
102 | */ | ||
103 | kmemleak_ignore(dma32_bootmem_ptr); | ||
91 | if (dma32_bootmem_ptr) | 104 | if (dma32_bootmem_ptr) |
92 | dma32_bootmem_size = size; | 105 | dma32_bootmem_size = size; |
93 | else | 106 | else |
@@ -147,7 +160,7 @@ again: | |||
147 | return NULL; | 160 | return NULL; |
148 | 161 | ||
149 | addr = page_to_phys(page); | 162 | addr = page_to_phys(page); |
150 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | 163 | if (addr + size > dma_mask) { |
151 | __free_pages(page, get_order(size)); | 164 | __free_pages(page, get_order(size)); |
152 | 165 | ||
153 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { | 166 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d2e56b8f48e7..98a827ee9ed7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
190 | static inline int | 190 | static inline int |
191 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 191 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
192 | { | 192 | { |
193 | return force_iommu || | 193 | return force_iommu || !dma_capable(dev, addr, size); |
194 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); | ||
195 | } | 194 | } |
196 | 195 | ||
197 | static inline int | 196 | static inline int |
198 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 197 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
199 | { | 198 | { |
200 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); | 199 | return !dma_capable(dev, addr, size); |
201 | } | 200 | } |
202 | 201 | ||
203 | /* Map a single continuous physical area into the IOMMU. | 202 | /* Map a single continuous physical area into the IOMMU. |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 71d412a09f30..a3933d4330cd 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static int | 14 | static int |
15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
16 | { | 16 | { |
17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { | 17 | if (hwdev && !dma_capable(hwdev, bus, size)) { |
18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) | 18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) |
19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
@@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
79 | free_pages((unsigned long)vaddr, get_order(size)); | 79 | free_pages((unsigned long)vaddr, get_order(size)); |
80 | } | 80 | } |
81 | 81 | ||
82 | static void nommu_sync_single_for_device(struct device *dev, | ||
83 | dma_addr_t addr, size_t size, | ||
84 | enum dma_data_direction dir) | ||
85 | { | ||
86 | flush_write_buffers(); | ||
87 | } | ||
88 | |||
89 | |||
90 | static void nommu_sync_sg_for_device(struct device *dev, | ||
91 | struct scatterlist *sg, int nelems, | ||
92 | enum dma_data_direction dir) | ||
93 | { | ||
94 | flush_write_buffers(); | ||
95 | } | ||
96 | |||
82 | struct dma_map_ops nommu_dma_ops = { | 97 | struct dma_map_ops nommu_dma_ops = { |
83 | .alloc_coherent = dma_generic_alloc_coherent, | 98 | .alloc_coherent = dma_generic_alloc_coherent, |
84 | .free_coherent = nommu_free_coherent, | 99 | .free_coherent = nommu_free_coherent, |
85 | .map_sg = nommu_map_sg, | 100 | .map_sg = nommu_map_sg, |
86 | .map_page = nommu_map_page, | 101 | .map_page = nommu_map_page, |
87 | .is_phys = 1, | 102 | .sync_single_for_device = nommu_sync_single_for_device, |
103 | .sync_sg_for_device = nommu_sync_sg_for_device, | ||
104 | .is_phys = 1, | ||
88 | }; | 105 | }; |
89 | 106 | ||
90 | void __init no_iommu_init(void) | 107 | void __init no_iommu_init(void) |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee44200..e8a35016115f 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -13,31 +13,6 @@ | |||
13 | 13 | ||
14 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
15 | 15 | ||
16 | void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
17 | { | ||
18 | return alloc_bootmem_low_pages(size); | ||
19 | } | ||
20 | |||
21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
22 | { | ||
23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
24 | } | ||
25 | |||
26 | dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | ||
27 | { | ||
28 | return paddr; | ||
29 | } | ||
30 | |||
31 | phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) | ||
32 | { | ||
33 | return baddr; | ||
34 | } | ||
35 | |||
36 | int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 16 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
42 | dma_addr_t *dma_handle, gfp_t flags) | 17 | dma_addr_t *dma_handle, gfp_t flags) |
43 | { | 18 | { |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..8d7d5c9c1be3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -35,10 +35,11 @@ | |||
35 | #include <asm/proto.h> | 35 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 36 | #include <asm/ds.h> |
37 | 37 | ||
38 | #include <trace/syscall.h> | ||
39 | |||
40 | #include "tls.h" | 38 | #include "tls.h" |
41 | 39 | ||
40 | #define CREATE_TRACE_POINTS | ||
41 | #include <trace/events/syscalls.h> | ||
42 | |||
42 | enum x86_regset { | 43 | enum x86_regset { |
43 | REGSET_GENERAL, | 44 | REGSET_GENERAL, |
44 | REGSET_FP, | 45 | REGSET_FP, |
@@ -1497,8 +1498,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1497 | tracehook_report_syscall_entry(regs)) | 1498 | tracehook_report_syscall_entry(regs)) |
1498 | ret = -1L; | 1499 | ret = -1L; |
1499 | 1500 | ||
1500 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1501 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1501 | ftrace_syscall_enter(regs); | 1502 | trace_sys_enter(regs, regs->orig_ax); |
1502 | 1503 | ||
1503 | if (unlikely(current->audit_context)) { | 1504 | if (unlikely(current->audit_context)) { |
1504 | if (IS_IA32) | 1505 | if (IS_IA32) |
@@ -1523,8 +1524,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
1523 | if (unlikely(current->audit_context)) | 1524 | if (unlikely(current->audit_context)) |
1524 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1525 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1525 | 1526 | ||
1526 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1527 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1527 | ftrace_syscall_exit(regs); | 1528 | trace_sys_exit(regs, regs->ax); |
1528 | 1529 | ||
1529 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | 1530 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1530 | tracehook_report_syscall_exit(regs, 0); | 1531 | tracehook_report_syscall_exit(regs, 0); |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94e..81e58238c4ce 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | 869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
870 | clear_thread_flag(TIF_NOTIFY_RESUME); | 870 | clear_thread_flag(TIF_NOTIFY_RESUME); |
871 | tracehook_notify_resume(regs); | 871 | tracehook_notify_resume(regs); |
872 | if (current->replacement_session_keyring) | ||
873 | key_replace_session_keyring(); | ||
872 | } | 874 | } |
873 | 875 | ||
874 | #ifdef CONFIG_X86_32 | 876 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6bc211accf08..45e00eb09c3a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -18,9 +18,9 @@ | |||
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
20 | 20 | ||
21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, | 21 | SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, |
22 | unsigned long prot, unsigned long flags, | 22 | unsigned long, prot, unsigned long, flags, |
23 | unsigned long fd, unsigned long off) | 23 | unsigned long, fd, unsigned long, off) |
24 | { | 24 | { |
25 | long error; | 25 | long error; |
26 | struct file *file; | 26 | struct file *file; |
@@ -226,7 +226,7 @@ bottomup: | |||
226 | } | 226 | } |
227 | 227 | ||
228 | 228 | ||
229 | asmlinkage long sys_uname(struct new_utsname __user *name) | 229 | SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) |
230 | { | 230 | { |
231 | int err; | 231 | int err; |
232 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |