diff options
| author | David S. Miller <davem@davemloft.net> | 2009-09-11 23:35:13 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2009-09-11 23:35:13 -0400 |
| commit | cabc5c0f7fa1342049042d6e147db5a73773955b (patch) | |
| tree | 2be09ae1777d580c7dfe05d6d5b76e57281ec447 /arch/x86 | |
| parent | b73d884756303316ead4cd7dad51236b2a515a1a (diff) | |
| parent | 86d710146fb9975f04c505ec78caa43d227c1018 (diff) | |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts:
arch/sparc/Kconfig
Diffstat (limited to 'arch/x86')
36 files changed, 1554 insertions, 679 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 13ffa5df37d7..fc20fdc0f7f2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -38,7 +38,7 @@ config X86 | |||
| 38 | select HAVE_FUNCTION_GRAPH_FP_TEST | 38 | select HAVE_FUNCTION_GRAPH_FP_TEST |
| 39 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | 39 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST |
| 40 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE | 40 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE |
| 41 | select HAVE_FTRACE_SYSCALLS | 41 | select HAVE_SYSCALL_TRACEPOINTS |
| 42 | select HAVE_KVM | 42 | select HAVE_KVM |
| 43 | select HAVE_ARCH_KGDB | 43 | select HAVE_ARCH_KGDB |
| 44 | select HAVE_ARCH_TRACEHOOK | 44 | select HAVE_ARCH_TRACEHOOK |
| @@ -586,7 +586,6 @@ config GART_IOMMU | |||
| 586 | bool "GART IOMMU support" if EMBEDDED | 586 | bool "GART IOMMU support" if EMBEDDED |
| 587 | default y | 587 | default y |
| 588 | select SWIOTLB | 588 | select SWIOTLB |
| 589 | select AGP | ||
| 590 | depends on X86_64 && PCI | 589 | depends on X86_64 && PCI |
| 591 | ---help--- | 590 | ---help--- |
| 592 | Support for full DMA access of devices with 32bit memory access only | 591 | Support for full DMA access of devices with 32bit memory access only |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edb992ebef92..d28fad19654a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
| @@ -2355,7 +2355,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y | |||
| 2355 | CONFIG_HAVE_DYNAMIC_FTRACE=y | 2355 | CONFIG_HAVE_DYNAMIC_FTRACE=y |
| 2356 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y | 2356 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y |
| 2357 | CONFIG_HAVE_HW_BRANCH_TRACER=y | 2357 | CONFIG_HAVE_HW_BRANCH_TRACER=y |
| 2358 | CONFIG_HAVE_FTRACE_SYSCALLS=y | 2358 | CONFIG_HAVE_SYSCALL_TRACEPOINTS=y |
| 2359 | CONFIG_RING_BUFFER=y | 2359 | CONFIG_RING_BUFFER=y |
| 2360 | CONFIG_TRACING=y | 2360 | CONFIG_TRACING=y |
| 2361 | CONFIG_TRACING_SUPPORT=y | 2361 | CONFIG_TRACING_SUPPORT=y |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index cee1dd2e69b2..6c86acd847a4 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
| @@ -2329,7 +2329,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y | |||
| 2329 | CONFIG_HAVE_DYNAMIC_FTRACE=y | 2329 | CONFIG_HAVE_DYNAMIC_FTRACE=y |
| 2330 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y | 2330 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y |
| 2331 | CONFIG_HAVE_HW_BRANCH_TRACER=y | 2331 | CONFIG_HAVE_HW_BRANCH_TRACER=y |
| 2332 | CONFIG_HAVE_FTRACE_SYSCALLS=y | 2332 | CONFIG_HAVE_SYSCALL_TRACEPOINTS=y |
| 2333 | CONFIG_RING_BUFFER=y | 2333 | CONFIG_RING_BUFFER=y |
| 2334 | CONFIG_TRACING=y | 2334 | CONFIG_TRACING=y |
| 2335 | CONFIG_TRACING_SUPPORT=y | 2335 | CONFIG_TRACING_SUPPORT=y |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c580c5ec1cad..d3ec8d588d4b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
| @@ -636,7 +636,7 @@ static int __init aesni_init(void) | |||
| 636 | int err; | 636 | int err; |
| 637 | 637 | ||
| 638 | if (!cpu_has_aes) { | 638 | if (!cpu_has_aes) { |
| 639 | printk(KERN_ERR "Intel AES-NI instructions are not detected.\n"); | 639 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); |
| 640 | return -ENODEV; | 640 | return -ENODEV; |
| 641 | } | 641 | } |
| 642 | if ((err = crypto_register_alg(&aesni_alg))) | 642 | if ((err = crypto_register_alg(&aesni_alg))) |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f06..ac95995b7bad 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #ifdef CONFIG_AMD_IOMMU | 25 | #ifdef CONFIG_AMD_IOMMU |
| 26 | extern int amd_iommu_init(void); | 26 | extern int amd_iommu_init(void); |
| 27 | extern int amd_iommu_init_dma_ops(void); | 27 | extern int amd_iommu_init_dma_ops(void); |
| 28 | extern int amd_iommu_init_passthrough(void); | ||
| 28 | extern void amd_iommu_detect(void); | 29 | extern void amd_iommu_detect(void); |
| 29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 30 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
| 30 | extern void amd_iommu_flush_all_domains(void); | 31 | extern void amd_iommu_flush_all_domains(void); |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 0c878caaa0a2..2a2cc7a78a81 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
| @@ -143,22 +143,29 @@ | |||
| 143 | #define EVT_BUFFER_SIZE 8192 /* 512 entries */ | 143 | #define EVT_BUFFER_SIZE 8192 /* 512 entries */ |
| 144 | #define EVT_LEN_MASK (0x9ULL << 56) | 144 | #define EVT_LEN_MASK (0x9ULL << 56) |
| 145 | 145 | ||
| 146 | #define PAGE_MODE_NONE 0x00 | ||
| 146 | #define PAGE_MODE_1_LEVEL 0x01 | 147 | #define PAGE_MODE_1_LEVEL 0x01 |
| 147 | #define PAGE_MODE_2_LEVEL 0x02 | 148 | #define PAGE_MODE_2_LEVEL 0x02 |
| 148 | #define PAGE_MODE_3_LEVEL 0x03 | 149 | #define PAGE_MODE_3_LEVEL 0x03 |
| 149 | 150 | #define PAGE_MODE_4_LEVEL 0x04 | |
| 150 | #define IOMMU_PDE_NL_0 0x000ULL | 151 | #define PAGE_MODE_5_LEVEL 0x05 |
| 151 | #define IOMMU_PDE_NL_1 0x200ULL | 152 | #define PAGE_MODE_6_LEVEL 0x06 |
| 152 | #define IOMMU_PDE_NL_2 0x400ULL | 153 | |
| 153 | #define IOMMU_PDE_NL_3 0x600ULL | 154 | #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) |
| 154 | 155 | #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ | |
| 155 | #define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL) | 156 | ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ |
| 156 | #define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL) | 157 | (0xffffffffffffffffULL)) |
| 157 | #define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL) | 158 | #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) |
| 158 | 159 | #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) | |
| 159 | #define IOMMU_MAP_SIZE_L1 (1ULL << 21) | 160 | #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ |
| 160 | #define IOMMU_MAP_SIZE_L2 (1ULL << 30) | 161 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) |
| 161 | #define IOMMU_MAP_SIZE_L3 (1ULL << 39) | 162 | #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) |
| 163 | |||
| 164 | #define PM_MAP_4k 0 | ||
| 165 | #define PM_ADDR_MASK 0x000ffffffffff000ULL | ||
| 166 | #define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ | ||
| 167 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) | ||
| 168 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) | ||
| 162 | 169 | ||
| 163 | #define IOMMU_PTE_P (1ULL << 0) | 170 | #define IOMMU_PTE_P (1ULL << 0) |
| 164 | #define IOMMU_PTE_TV (1ULL << 1) | 171 | #define IOMMU_PTE_TV (1ULL << 1) |
| @@ -167,11 +174,6 @@ | |||
| 167 | #define IOMMU_PTE_IR (1ULL << 61) | 174 | #define IOMMU_PTE_IR (1ULL << 61) |
| 168 | #define IOMMU_PTE_IW (1ULL << 62) | 175 | #define IOMMU_PTE_IW (1ULL << 62) |
| 169 | 176 | ||
| 170 | #define IOMMU_L1_PDE(address) \ | ||
| 171 | ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) | ||
| 172 | #define IOMMU_L2_PDE(address) \ | ||
| 173 | ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) | ||
| 174 | |||
| 175 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) | 177 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) |
| 176 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) | 178 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) |
| 177 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) | 179 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) |
| @@ -194,11 +196,14 @@ | |||
| 194 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ | 196 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ |
| 195 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops | 197 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops |
| 196 | domain for an IOMMU */ | 198 | domain for an IOMMU */ |
| 199 | #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page | ||
| 200 | translation */ | ||
| 201 | |||
| 197 | extern bool amd_iommu_dump; | 202 | extern bool amd_iommu_dump; |
| 198 | #define DUMP_printk(format, arg...) \ | 203 | #define DUMP_printk(format, arg...) \ |
| 199 | do { \ | 204 | do { \ |
| 200 | if (amd_iommu_dump) \ | 205 | if (amd_iommu_dump) \ |
| 201 | printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ | 206 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ |
| 202 | } while(0); | 207 | } while(0); |
| 203 | 208 | ||
| 204 | /* | 209 | /* |
| @@ -226,6 +231,7 @@ struct protection_domain { | |||
| 226 | int mode; /* paging mode (0-6 levels) */ | 231 | int mode; /* paging mode (0-6 levels) */ |
| 227 | u64 *pt_root; /* page table root pointer */ | 232 | u64 *pt_root; /* page table root pointer */ |
| 228 | unsigned long flags; /* flags to find out type of domain */ | 233 | unsigned long flags; /* flags to find out type of domain */ |
| 234 | bool updated; /* complete domain flush required */ | ||
| 229 | unsigned dev_cnt; /* devices assigned to this domain */ | 235 | unsigned dev_cnt; /* devices assigned to this domain */ |
| 230 | void *priv; /* private data */ | 236 | void *priv; /* private data */ |
| 231 | }; | 237 | }; |
| @@ -337,6 +343,9 @@ struct amd_iommu { | |||
| 337 | /* if one, we need to send a completion wait command */ | 343 | /* if one, we need to send a completion wait command */ |
| 338 | bool need_sync; | 344 | bool need_sync; |
| 339 | 345 | ||
| 346 | /* becomes true if a command buffer reset is running */ | ||
| 347 | bool reset_in_progress; | ||
| 348 | |||
| 340 | /* default dma_ops domain for that IOMMU */ | 349 | /* default dma_ops domain for that IOMMU */ |
| 341 | struct dma_ops_domain *default_dom; | 350 | struct dma_ops_domain *default_dom; |
| 342 | }; | 351 | }; |
| @@ -457,4 +466,7 @@ static inline void amd_iommu_stats_init(void) { } | |||
| 457 | 466 | ||
| 458 | #endif /* CONFIG_AMD_IOMMU_STATS */ | 467 | #endif /* CONFIG_AMD_IOMMU_STATS */ |
| 459 | 468 | ||
| 469 | /* some function prototypes */ | ||
| 470 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
| 471 | |||
| 460 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ | 472 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1c3f9435f1c9..0ee770d23d0e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
| @@ -55,6 +55,24 @@ extern int dma_set_mask(struct device *dev, u64 mask); | |||
| 55 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 55 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
| 56 | dma_addr_t *dma_addr, gfp_t flag); | 56 | dma_addr_t *dma_addr, gfp_t flag); |
| 57 | 57 | ||
| 58 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | ||
| 59 | { | ||
| 60 | if (!dev->dma_mask) | ||
| 61 | return 0; | ||
| 62 | |||
| 63 | return addr + size <= *dev->dma_mask; | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) | ||
| 67 | { | ||
| 68 | return paddr; | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) | ||
| 72 | { | ||
| 73 | return daddr; | ||
| 74 | } | ||
| 75 | |||
| 58 | static inline void | 76 | static inline void |
| 59 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 77 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
| 60 | enum dma_data_direction dir) | 78 | enum dma_data_direction dir) |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index bd2c6511c887..db24c2278be0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
| @@ -28,13 +28,6 @@ | |||
| 28 | 28 | ||
| 29 | #endif | 29 | #endif |
| 30 | 30 | ||
| 31 | /* FIXME: I don't want to stay hardcoded */ | ||
| 32 | #ifdef CONFIG_X86_64 | ||
| 33 | # define FTRACE_SYSCALL_MAX 296 | ||
| 34 | #else | ||
| 35 | # define FTRACE_SYSCALL_MAX 333 | ||
| 36 | #endif | ||
| 37 | |||
| 38 | #ifdef CONFIG_FUNCTION_TRACER | 31 | #ifdef CONFIG_FUNCTION_TRACER |
| 39 | #define MCOUNT_ADDR ((long)(mcount)) | 32 | #define MCOUNT_ADDR ((long)(mcount)) |
| 40 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ | 33 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c86e5ed4af51..e63cf7d441e1 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
| @@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | |||
| 45 | void __user *, size_t *, loff_t *); | 45 | void __user *, size_t *, loff_t *); |
| 46 | extern int unknown_nmi_panic; | 46 | extern int unknown_nmi_panic; |
| 47 | 47 | ||
| 48 | void __trigger_all_cpu_backtrace(void); | 48 | void arch_trigger_all_cpu_backtrace(void); |
| 49 | #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() | 49 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
| 50 | 50 | ||
| 51 | static inline void localise_nmi_watchdog(void) | 51 | static inline void localise_nmi_watchdog(void) |
| 52 | { | 52 | { |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index fa64e401589d..e7b7c938ae27 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h | |||
| @@ -84,6 +84,16 @@ union cpuid10_edx { | |||
| 84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
| 85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | 85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) |
| 86 | 86 | ||
| 87 | /* | ||
| 88 | * We model BTS tracing as another fixed-mode PMC. | ||
| 89 | * | ||
| 90 | * We choose a value in the middle of the fixed counter range, since lower | ||
| 91 | * values are used by actual fixed counters and higher values are used | ||
| 92 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. | ||
| 93 | */ | ||
| 94 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | ||
| 95 | |||
| 96 | |||
| 87 | #ifdef CONFIG_PERF_COUNTERS | 97 | #ifdef CONFIG_PERF_COUNTERS |
| 88 | extern void init_hw_perf_counters(void); | 98 | extern void init_hw_perf_counters(void); |
| 89 | extern void perf_counters_lapic_init(void); | 99 | extern void perf_counters_lapic_init(void); |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index fad7d40b75f8..6f7786aea4fc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -95,7 +95,7 @@ struct thread_info { | |||
| 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
| 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
| 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
| 98 | #define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ | 98 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ |
| 99 | 99 | ||
| 100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
| 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
| @@ -118,17 +118,17 @@ struct thread_info { | |||
| 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
| 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
| 120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
| 121 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) | 121 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
| 122 | 122 | ||
| 123 | /* work to do in syscall_trace_enter() */ | 123 | /* work to do in syscall_trace_enter() */ |
| 124 | #define _TIF_WORK_SYSCALL_ENTRY \ | 124 | #define _TIF_WORK_SYSCALL_ENTRY \ |
| 125 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE | \ | 125 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ |
| 126 | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) | 126 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) |
| 127 | 127 | ||
| 128 | /* work to do in syscall_trace_leave() */ | 128 | /* work to do in syscall_trace_leave() */ |
| 129 | #define _TIF_WORK_SYSCALL_EXIT \ | 129 | #define _TIF_WORK_SYSCALL_EXIT \ |
| 130 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ | 130 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ |
| 131 | _TIF_SYSCALL_FTRACE) | 131 | _TIF_SYSCALL_TRACEPOINT) |
| 132 | 132 | ||
| 133 | /* work to do on interrupt/exception return */ | 133 | /* work to do on interrupt/exception return */ |
| 134 | #define _TIF_WORK_MASK \ | 134 | #define _TIF_WORK_MASK \ |
| @@ -137,7 +137,8 @@ struct thread_info { | |||
| 137 | _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) | 137 | _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) |
| 138 | 138 | ||
| 139 | /* work to do on any return to user space */ | 139 | /* work to do on any return to user space */ |
| 140 | #define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE) | 140 | #define _TIF_ALLWORK_MASK \ |
| 141 | ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) | ||
| 141 | 142 | ||
| 142 | /* Only used for 64 bit */ | 143 | /* Only used for 64 bit */ |
| 143 | #define _TIF_DO_NOTIFY_MASK \ | 144 | #define _TIF_DO_NOTIFY_MASK \ |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 066ef590d7e0..26d06e052a18 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
| @@ -129,25 +129,34 @@ extern unsigned long node_remap_size[]; | |||
| 129 | #endif | 129 | #endif |
| 130 | 130 | ||
| 131 | /* sched_domains SD_NODE_INIT for NUMA machines */ | 131 | /* sched_domains SD_NODE_INIT for NUMA machines */ |
| 132 | #define SD_NODE_INIT (struct sched_domain) { \ | 132 | #define SD_NODE_INIT (struct sched_domain) { \ |
| 133 | .min_interval = 8, \ | 133 | .min_interval = 8, \ |
| 134 | .max_interval = 32, \ | 134 | .max_interval = 32, \ |
| 135 | .busy_factor = 32, \ | 135 | .busy_factor = 32, \ |
| 136 | .imbalance_pct = 125, \ | 136 | .imbalance_pct = 125, \ |
| 137 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ | 137 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ |
| 138 | .busy_idx = 3, \ | 138 | .busy_idx = 3, \ |
| 139 | .idle_idx = SD_IDLE_IDX, \ | 139 | .idle_idx = SD_IDLE_IDX, \ |
| 140 | .newidle_idx = SD_NEWIDLE_IDX, \ | 140 | .newidle_idx = SD_NEWIDLE_IDX, \ |
| 141 | .wake_idx = 1, \ | 141 | .wake_idx = 1, \ |
| 142 | .forkexec_idx = SD_FORKEXEC_IDX, \ | 142 | .forkexec_idx = SD_FORKEXEC_IDX, \ |
| 143 | .flags = SD_LOAD_BALANCE \ | 143 | \ |
| 144 | | SD_BALANCE_EXEC \ | 144 | .flags = 1*SD_LOAD_BALANCE \ |
| 145 | | SD_BALANCE_FORK \ | 145 | | 1*SD_BALANCE_NEWIDLE \ |
| 146 | | SD_WAKE_AFFINE \ | 146 | | 1*SD_BALANCE_EXEC \ |
| 147 | | SD_WAKE_BALANCE \ | 147 | | 1*SD_BALANCE_FORK \ |
| 148 | | SD_SERIALIZE, \ | 148 | | 0*SD_WAKE_IDLE \ |
| 149 | .last_balance = jiffies, \ | 149 | | 1*SD_WAKE_AFFINE \ |
| 150 | .balance_interval = 1, \ | 150 | | 1*SD_WAKE_BALANCE \ |
| 151 | | 0*SD_SHARE_CPUPOWER \ | ||
| 152 | | 0*SD_POWERSAVINGS_BALANCE \ | ||
| 153 | | 0*SD_SHARE_PKG_RESOURCES \ | ||
| 154 | | 1*SD_SERIALIZE \ | ||
| 155 | | 1*SD_WAKE_IDLE_FAR \ | ||
| 156 | | 0*SD_PREFER_SIBLING \ | ||
| 157 | , \ | ||
| 158 | .last_balance = jiffies, \ | ||
| 159 | .balance_interval = 1, \ | ||
| 151 | } | 160 | } |
| 152 | 161 | ||
| 153 | #ifdef CONFIG_X86_64_ACPI_NUMA | 162 | #ifdef CONFIG_X86_64_ACPI_NUMA |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 732a30706153..8deaada61bc8 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
| @@ -345,6 +345,8 @@ | |||
| 345 | 345 | ||
| 346 | #ifdef __KERNEL__ | 346 | #ifdef __KERNEL__ |
| 347 | 347 | ||
| 348 | #define NR_syscalls 337 | ||
| 349 | |||
| 348 | #define __ARCH_WANT_IPC_PARSE_VERSION | 350 | #define __ARCH_WANT_IPC_PARSE_VERSION |
| 349 | #define __ARCH_WANT_OLD_READDIR | 351 | #define __ARCH_WANT_OLD_READDIR |
| 350 | #define __ARCH_WANT_OLD_STAT | 352 | #define __ARCH_WANT_OLD_STAT |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 900e1617e672..b9f3c60de5f7 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
| @@ -688,6 +688,12 @@ __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | |||
| 688 | #endif /* __NO_STUBS */ | 688 | #endif /* __NO_STUBS */ |
| 689 | 689 | ||
| 690 | #ifdef __KERNEL__ | 690 | #ifdef __KERNEL__ |
| 691 | |||
| 692 | #ifndef COMPILE_OFFSETS | ||
| 693 | #include <asm/asm-offsets.h> | ||
| 694 | #define NR_syscalls (__NR_syscall_max + 1) | ||
| 695 | #endif | ||
| 696 | |||
| 691 | /* | 697 | /* |
| 692 | * "Conditional" syscalls | 698 | * "Conditional" syscalls |
| 693 | * | 699 | * |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..98f230f6a28d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
| 41 | static LIST_HEAD(iommu_pd_list); | 41 | static LIST_HEAD(iommu_pd_list); |
| 42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
| 43 | 43 | ||
| 44 | #ifdef CONFIG_IOMMU_API | 44 | /* |
| 45 | * Domain for untranslated devices - only allocated | ||
| 46 | * if iommu=pt passed on kernel cmd line. | ||
| 47 | */ | ||
| 48 | static struct protection_domain *pt_domain; | ||
| 49 | |||
| 45 | static struct iommu_ops amd_iommu_ops; | 50 | static struct iommu_ops amd_iommu_ops; |
| 46 | #endif | ||
| 47 | 51 | ||
| 48 | /* | 52 | /* |
| 49 | * general struct to manage commands send to an IOMMU | 53 | * general struct to manage commands send to an IOMMU |
| @@ -55,16 +59,16 @@ struct iommu_cmd { | |||
| 55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
| 56 | struct unity_map_entry *e); | 60 | struct unity_map_entry *e); |
| 57 | static struct dma_ops_domain *find_protection_domain(u16 devid); | 61 | static struct dma_ops_domain *find_protection_domain(u16 devid); |
| 58 | static u64* alloc_pte(struct protection_domain *dom, | 62 | static u64 *alloc_pte(struct protection_domain *domain, |
| 59 | unsigned long address, u64 | 63 | unsigned long address, int end_lvl, |
| 60 | **pte_page, gfp_t gfp); | 64 | u64 **pte_page, gfp_t gfp); |
| 61 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
| 62 | unsigned long start_page, | 66 | unsigned long start_page, |
| 63 | unsigned int pages); | 67 | unsigned int pages); |
| 64 | 68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | |
| 65 | #ifndef BUS_NOTIFY_UNBOUND_DRIVER | 69 | static u64 *fetch_pte(struct protection_domain *domain, |
| 66 | #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 | 70 | unsigned long address, int map_size); |
| 67 | #endif | 71 | static void update_domain(struct protection_domain *domain); |
| 68 | 72 | ||
| 69 | #ifdef CONFIG_AMD_IOMMU_STATS | 73 | #ifdef CONFIG_AMD_IOMMU_STATS |
| 70 | 74 | ||
| @@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
| 138 | * | 142 | * |
| 139 | ****************************************************************************/ | 143 | ****************************************************************************/ |
| 140 | 144 | ||
| 141 | static void iommu_print_event(void *__evt) | 145 | static void dump_dte_entry(u16 devid) |
| 146 | { | ||
| 147 | int i; | ||
| 148 | |||
| 149 | for (i = 0; i < 8; ++i) | ||
| 150 | pr_err("AMD-Vi: DTE[%d]: %08x\n", i, | ||
| 151 | amd_iommu_dev_table[devid].data[i]); | ||
| 152 | } | ||
| 153 | |||
| 154 | static void dump_command(unsigned long phys_addr) | ||
| 155 | { | ||
| 156 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | ||
| 157 | int i; | ||
| 158 | |||
| 159 | for (i = 0; i < 4; ++i) | ||
| 160 | pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); | ||
| 161 | } | ||
| 162 | |||
| 163 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | ||
| 142 | { | 164 | { |
| 143 | u32 *event = __evt; | 165 | u32 *event = __evt; |
| 144 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | 166 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; |
| @@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt) | |||
| 147 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | 169 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
| 148 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | 170 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; |
| 149 | 171 | ||
| 150 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | 172 | printk(KERN_ERR "AMD-Vi: Event logged ["); |
| 151 | 173 | ||
| 152 | switch (type) { | 174 | switch (type) { |
| 153 | case EVENT_TYPE_ILL_DEV: | 175 | case EVENT_TYPE_ILL_DEV: |
| @@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt) | |||
| 155 | "address=0x%016llx flags=0x%04x]\n", | 177 | "address=0x%016llx flags=0x%04x]\n", |
| 156 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | 178 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
| 157 | address, flags); | 179 | address, flags); |
| 180 | dump_dte_entry(devid); | ||
| 158 | break; | 181 | break; |
| 159 | case EVENT_TYPE_IO_FAULT: | 182 | case EVENT_TYPE_IO_FAULT: |
| 160 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | 183 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " |
| @@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt) | |||
| 176 | break; | 199 | break; |
| 177 | case EVENT_TYPE_ILL_CMD: | 200 | case EVENT_TYPE_ILL_CMD: |
| 178 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
| 202 | reset_iommu_command_buffer(iommu); | ||
| 203 | dump_command(address); | ||
| 179 | break; | 204 | break; |
| 180 | case EVENT_TYPE_CMD_HARD_ERR: | 205 | case EVENT_TYPE_CMD_HARD_ERR: |
| 181 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | 206 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " |
| @@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
| 209 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | 234 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); |
| 210 | 235 | ||
| 211 | while (head != tail) { | 236 | while (head != tail) { |
| 212 | iommu_print_event(iommu->evt_buf + head); | 237 | iommu_print_event(iommu, iommu->evt_buf + head); |
| 213 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | 238 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; |
| 214 | } | 239 | } |
| 215 | 240 | ||
| @@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
| 296 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
| 297 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
| 298 | 323 | ||
| 299 | if (unlikely(i == EXIT_LOOP_COUNT)) | 324 | if (unlikely(i == EXIT_LOOP_COUNT)) { |
| 300 | panic("AMD IOMMU: Completion wait loop failed\n"); | 325 | spin_unlock(&iommu->lock); |
| 326 | reset_iommu_command_buffer(iommu); | ||
| 327 | spin_lock(&iommu->lock); | ||
| 328 | } | ||
| 301 | } | 329 | } |
| 302 | 330 | ||
| 303 | /* | 331 | /* |
| @@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | |||
| 445 | } | 473 | } |
| 446 | 474 | ||
| 447 | /* | 475 | /* |
| 476 | * This function flushes one domain on one IOMMU | ||
| 477 | */ | ||
| 478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | ||
| 479 | { | ||
| 480 | struct iommu_cmd cmd; | ||
| 481 | unsigned long flags; | ||
| 482 | |||
| 483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
| 484 | domid, 1, 1); | ||
| 485 | |||
| 486 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 487 | __iommu_queue_command(iommu, &cmd); | ||
| 488 | __iommu_completion_wait(iommu); | ||
| 489 | __iommu_wait_for_completion(iommu); | ||
| 490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 491 | } | ||
| 492 | |||
| 493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | ||
| 494 | { | ||
| 495 | int i; | ||
| 496 | |||
| 497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | ||
| 498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | ||
| 499 | continue; | ||
| 500 | flush_domain_on_iommu(iommu, i); | ||
| 501 | } | ||
| 502 | |||
| 503 | } | ||
| 504 | |||
| 505 | /* | ||
| 448 | * This function is used to flush the IO/TLB for a given protection domain | 506 | * This function is used to flush the IO/TLB for a given protection domain |
| 449 | * on every IOMMU in the system | 507 | * on every IOMMU in the system |
| 450 | */ | 508 | */ |
| 451 | static void iommu_flush_domain(u16 domid) | 509 | static void iommu_flush_domain(u16 domid) |
| 452 | { | 510 | { |
| 453 | unsigned long flags; | ||
| 454 | struct amd_iommu *iommu; | 511 | struct amd_iommu *iommu; |
| 455 | struct iommu_cmd cmd; | ||
| 456 | 512 | ||
| 457 | INC_STATS_COUNTER(domain_flush_all); | 513 | INC_STATS_COUNTER(domain_flush_all); |
| 458 | 514 | ||
| 459 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 515 | for_each_iommu(iommu) |
| 460 | domid, 1, 1); | 516 | flush_domain_on_iommu(iommu, domid); |
| 461 | |||
| 462 | for_each_iommu(iommu) { | ||
| 463 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 464 | __iommu_queue_command(iommu, &cmd); | ||
| 465 | __iommu_completion_wait(iommu); | ||
| 466 | __iommu_wait_for_completion(iommu); | ||
| 467 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 468 | } | ||
| 469 | } | 517 | } |
| 470 | 518 | ||
| 471 | void amd_iommu_flush_all_domains(void) | 519 | void amd_iommu_flush_all_domains(void) |
| 472 | { | 520 | { |
| 521 | struct amd_iommu *iommu; | ||
| 522 | |||
| 523 | for_each_iommu(iommu) | ||
| 524 | flush_all_domains_on_iommu(iommu); | ||
| 525 | } | ||
| 526 | |||
| 527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | ||
| 528 | { | ||
| 473 | int i; | 529 | int i; |
| 474 | 530 | ||
| 475 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
| 476 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 532 | if (iommu != amd_iommu_rlookup_table[i]) |
| 477 | continue; | 533 | continue; |
| 478 | iommu_flush_domain(i); | 534 | |
| 535 | iommu_queue_inv_dev_entry(iommu, i); | ||
| 536 | iommu_completion_wait(iommu); | ||
| 479 | } | 537 | } |
| 480 | } | 538 | } |
| 481 | 539 | ||
| 482 | void amd_iommu_flush_all_devices(void) | 540 | static void flush_devices_by_domain(struct protection_domain *domain) |
| 483 | { | 541 | { |
| 484 | struct amd_iommu *iommu; | 542 | struct amd_iommu *iommu; |
| 485 | int i; | 543 | int i; |
| 486 | 544 | ||
| 487 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
| 488 | if (amd_iommu_pd_table[i] == NULL) | 546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || |
| 547 | (amd_iommu_pd_table[i] != domain)) | ||
| 489 | continue; | 548 | continue; |
| 490 | 549 | ||
| 491 | iommu = amd_iommu_rlookup_table[i]; | 550 | iommu = amd_iommu_rlookup_table[i]; |
| @@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void) | |||
| 497 | } | 556 | } |
| 498 | } | 557 | } |
| 499 | 558 | ||
| 559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
| 560 | { | ||
| 561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
| 562 | |||
| 563 | if (iommu->reset_in_progress) | ||
| 564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
| 565 | |||
| 566 | iommu->reset_in_progress = true; | ||
| 567 | |||
| 568 | amd_iommu_reset_cmd_buffer(iommu); | ||
| 569 | flush_all_devices_for_iommu(iommu); | ||
| 570 | flush_all_domains_on_iommu(iommu); | ||
| 571 | |||
| 572 | iommu->reset_in_progress = false; | ||
| 573 | } | ||
| 574 | |||
| 575 | void amd_iommu_flush_all_devices(void) | ||
| 576 | { | ||
| 577 | flush_devices_by_domain(NULL); | ||
| 578 | } | ||
| 579 | |||
| 500 | /**************************************************************************** | 580 | /**************************************************************************** |
| 501 | * | 581 | * |
| 502 | * The functions below are used the create the page table mappings for | 582 | * The functions below are used the create the page table mappings for |
| @@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void) | |||
| 514 | static int iommu_map_page(struct protection_domain *dom, | 594 | static int iommu_map_page(struct protection_domain *dom, |
| 515 | unsigned long bus_addr, | 595 | unsigned long bus_addr, |
| 516 | unsigned long phys_addr, | 596 | unsigned long phys_addr, |
| 517 | int prot) | 597 | int prot, |
| 598 | int map_size) | ||
| 518 | { | 599 | { |
| 519 | u64 __pte, *pte; | 600 | u64 __pte, *pte; |
| 520 | 601 | ||
| 521 | bus_addr = PAGE_ALIGN(bus_addr); | 602 | bus_addr = PAGE_ALIGN(bus_addr); |
| 522 | phys_addr = PAGE_ALIGN(phys_addr); | 603 | phys_addr = PAGE_ALIGN(phys_addr); |
| 523 | 604 | ||
| 524 | /* only support 512GB address spaces for now */ | 605 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); |
| 525 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 606 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); |
| 607 | |||
| 608 | if (!(prot & IOMMU_PROT_MASK)) | ||
| 526 | return -EINVAL; | 609 | return -EINVAL; |
| 527 | 610 | ||
| 528 | pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); | 611 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); |
| 529 | 612 | ||
| 530 | if (IOMMU_PTE_PRESENT(*pte)) | 613 | if (IOMMU_PTE_PRESENT(*pte)) |
| 531 | return -EBUSY; | 614 | return -EBUSY; |
| @@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom, | |||
| 538 | 621 | ||
| 539 | *pte = __pte; | 622 | *pte = __pte; |
| 540 | 623 | ||
| 624 | update_domain(dom); | ||
| 625 | |||
| 541 | return 0; | 626 | return 0; |
| 542 | } | 627 | } |
| 543 | 628 | ||
| 544 | static void iommu_unmap_page(struct protection_domain *dom, | 629 | static void iommu_unmap_page(struct protection_domain *dom, |
| 545 | unsigned long bus_addr) | 630 | unsigned long bus_addr, int map_size) |
| 546 | { | 631 | { |
| 547 | u64 *pte; | 632 | u64 *pte = fetch_pte(dom, bus_addr, map_size); |
| 548 | |||
| 549 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
| 550 | |||
| 551 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
| 552 | return; | ||
| 553 | |||
| 554 | pte = IOMMU_PTE_PAGE(*pte); | ||
| 555 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
| 556 | 633 | ||
| 557 | if (!IOMMU_PTE_PRESENT(*pte)) | 634 | if (pte) |
| 558 | return; | 635 | *pte = 0; |
| 559 | |||
| 560 | pte = IOMMU_PTE_PAGE(*pte); | ||
| 561 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
| 562 | |||
| 563 | *pte = 0; | ||
| 564 | } | 636 | } |
| 565 | 637 | ||
| 566 | /* | 638 | /* |
| @@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
| 615 | 687 | ||
| 616 | for (addr = e->address_start; addr < e->address_end; | 688 | for (addr = e->address_start; addr < e->address_end; |
| 617 | addr += PAGE_SIZE) { | 689 | addr += PAGE_SIZE) { |
| 618 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); | 690 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
| 691 | PM_MAP_4k); | ||
| 619 | if (ret) | 692 | if (ret) |
| 620 | return ret; | 693 | return ret; |
| 621 | /* | 694 | /* |
| @@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
| 670 | * This function checks if there is a PTE for a given dma address. If | 743 | * This function checks if there is a PTE for a given dma address. If |
| 671 | * there is one, it returns the pointer to it. | 744 | * there is one, it returns the pointer to it. |
| 672 | */ | 745 | */ |
| 673 | static u64* fetch_pte(struct protection_domain *domain, | 746 | static u64 *fetch_pte(struct protection_domain *domain, |
| 674 | unsigned long address) | 747 | unsigned long address, int map_size) |
| 675 | { | 748 | { |
| 749 | int level; | ||
| 676 | u64 *pte; | 750 | u64 *pte; |
| 677 | 751 | ||
| 678 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 752 | level = domain->mode - 1; |
| 753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
| 679 | 754 | ||
| 680 | if (!IOMMU_PTE_PRESENT(*pte)) | 755 | while (level > map_size) { |
| 681 | return NULL; | 756 | if (!IOMMU_PTE_PRESENT(*pte)) |
| 757 | return NULL; | ||
| 682 | 758 | ||
| 683 | pte = IOMMU_PTE_PAGE(*pte); | 759 | level -= 1; |
| 684 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
| 685 | 760 | ||
| 686 | if (!IOMMU_PTE_PRESENT(*pte)) | 761 | pte = IOMMU_PTE_PAGE(*pte); |
| 687 | return NULL; | 762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
| 688 | 763 | ||
| 689 | pte = IOMMU_PTE_PAGE(*pte); | 764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
| 690 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 765 | pte = NULL; |
| 766 | break; | ||
| 767 | } | ||
| 768 | } | ||
| 691 | 769 | ||
| 692 | return pte; | 770 | return pte; |
| 693 | } | 771 | } |
| @@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
| 727 | u64 *pte, *pte_page; | 805 | u64 *pte, *pte_page; |
| 728 | 806 | ||
| 729 | for (i = 0; i < num_ptes; ++i) { | 807 | for (i = 0; i < num_ptes; ++i) { |
| 730 | pte = alloc_pte(&dma_dom->domain, address, | 808 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, |
| 731 | &pte_page, gfp); | 809 | &pte_page, gfp); |
| 732 | if (!pte) | 810 | if (!pte) |
| 733 | goto out_free; | 811 | goto out_free; |
| @@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
| 760 | for (i = dma_dom->aperture[index]->offset; | 838 | for (i = dma_dom->aperture[index]->offset; |
| 761 | i < dma_dom->aperture_size; | 839 | i < dma_dom->aperture_size; |
| 762 | i += PAGE_SIZE) { | 840 | i += PAGE_SIZE) { |
| 763 | u64 *pte = fetch_pte(&dma_dom->domain, i); | 841 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); |
| 764 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 842 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
| 765 | continue; | 843 | continue; |
| 766 | 844 | ||
| 767 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | 845 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); |
| 768 | } | 846 | } |
| 769 | 847 | ||
| 848 | update_domain(&dma_dom->domain); | ||
| 849 | |||
| 770 | return 0; | 850 | return 0; |
| 771 | 851 | ||
| 772 | out_free: | 852 | out_free: |
| 853 | update_domain(&dma_dom->domain); | ||
| 854 | |||
| 773 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | 855 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); |
| 774 | 856 | ||
| 775 | kfree(dma_dom->aperture[index]); | 857 | kfree(dma_dom->aperture[index]); |
| @@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
| 1009 | dma_dom->domain.id = domain_id_alloc(); | 1091 | dma_dom->domain.id = domain_id_alloc(); |
| 1010 | if (dma_dom->domain.id == 0) | 1092 | if (dma_dom->domain.id == 0) |
| 1011 | goto free_dma_dom; | 1093 | goto free_dma_dom; |
| 1012 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
| 1013 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
| 1014 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
| 1015 | dma_dom->domain.priv = dma_dom; | 1097 | dma_dom->domain.priv = dma_dom; |
| @@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
| 1063 | return dom; | 1145 | return dom; |
| 1064 | } | 1146 | } |
| 1065 | 1147 | ||
| 1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | ||
| 1149 | { | ||
| 1150 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
| 1151 | |||
| 1152 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
| 1153 | << DEV_ENTRY_MODE_SHIFT; | ||
| 1154 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
| 1155 | |||
| 1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
| 1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
| 1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
| 1159 | |||
| 1160 | amd_iommu_pd_table[devid] = domain; | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | /* | ||
| 1164 | * If a device is not yet associated with a domain, this function does | ||
| 1165 | * assigns it visible for the hardware | ||
| 1166 | */ | ||
| 1167 | static void __attach_device(struct amd_iommu *iommu, | ||
| 1168 | struct protection_domain *domain, | ||
| 1169 | u16 devid) | ||
| 1170 | { | ||
| 1171 | /* lock domain */ | ||
| 1172 | spin_lock(&domain->lock); | ||
| 1173 | |||
| 1174 | /* update DTE entry */ | ||
| 1175 | set_dte_entry(devid, domain); | ||
| 1176 | |||
| 1177 | domain->dev_cnt += 1; | ||
| 1178 | |||
| 1179 | /* ready */ | ||
| 1180 | spin_unlock(&domain->lock); | ||
| 1181 | } | ||
| 1182 | |||
| 1066 | /* | 1183 | /* |
| 1067 | * If a device is not yet associated with a domain, this function does | 1184 | * If a device is not yet associated with a domain, this function does |
| 1068 | * assigns it visible for the hardware | 1185 | * assigns it visible for the hardware |
| @@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu, | |||
| 1072 | u16 devid) | 1189 | u16 devid) |
| 1073 | { | 1190 | { |
| 1074 | unsigned long flags; | 1191 | unsigned long flags; |
| 1075 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
| 1076 | |||
| 1077 | domain->dev_cnt += 1; | ||
| 1078 | |||
| 1079 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
| 1080 | << DEV_ENTRY_MODE_SHIFT; | ||
| 1081 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
| 1082 | 1192 | ||
| 1083 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
| 1084 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1194 | __attach_device(iommu, domain, devid); |
| 1085 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
| 1086 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
| 1087 | |||
| 1088 | amd_iommu_pd_table[devid] = domain; | ||
| 1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
| 1090 | 1196 | ||
| 1091 | /* | 1197 | /* |
| 1092 | * We might boot into a crash-kernel here. The crashed kernel | 1198 | * We might boot into a crash-kernel here. The crashed kernel |
| 1093 | * left the caches in the IOMMU dirty. So we have to flush | 1199 | * left the caches in the IOMMU dirty. So we have to flush |
| 1094 | * here to evict all dirty stuff. | 1200 | * here to evict all dirty stuff. |
| 1095 | */ | 1201 | */ |
| 1096 | iommu_queue_inv_dev_entry(iommu, devid); | 1202 | iommu_queue_inv_dev_entry(iommu, devid); |
| 1097 | iommu_flush_tlb_pde(iommu, domain->id); | 1203 | iommu_flush_tlb_pde(iommu, domain->id); |
| 1098 | } | 1204 | } |
| @@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) | |||
| 1119 | 1225 | ||
| 1120 | /* ready */ | 1226 | /* ready */ |
| 1121 | spin_unlock(&domain->lock); | 1227 | spin_unlock(&domain->lock); |
| 1228 | |||
| 1229 | /* | ||
| 1230 | * If we run in passthrough mode the device must be assigned to the | ||
| 1231 | * passthrough domain if it is detached from any other domain | ||
| 1232 | */ | ||
| 1233 | if (iommu_pass_through) { | ||
| 1234 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | ||
| 1235 | __attach_device(iommu, pt_domain, devid); | ||
| 1236 | } | ||
| 1122 | } | 1237 | } |
| 1123 | 1238 | ||
| 1124 | /* | 1239 | /* |
| @@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb, | |||
| 1164 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1279 | case BUS_NOTIFY_UNBOUND_DRIVER: |
| 1165 | if (!domain) | 1280 | if (!domain) |
| 1166 | goto out; | 1281 | goto out; |
| 1282 | if (iommu_pass_through) | ||
| 1283 | break; | ||
| 1167 | detach_device(domain, devid); | 1284 | detach_device(domain, devid); |
| 1168 | break; | 1285 | break; |
| 1169 | case BUS_NOTIFY_ADD_DEVICE: | 1286 | case BUS_NOTIFY_ADD_DEVICE: |
| @@ -1292,39 +1409,91 @@ static int get_device_resources(struct device *dev, | |||
| 1292 | return 1; | 1409 | return 1; |
| 1293 | } | 1410 | } |
| 1294 | 1411 | ||
| 1412 | static void update_device_table(struct protection_domain *domain) | ||
| 1413 | { | ||
| 1414 | unsigned long flags; | ||
| 1415 | int i; | ||
| 1416 | |||
| 1417 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | ||
| 1418 | if (amd_iommu_pd_table[i] != domain) | ||
| 1419 | continue; | ||
| 1420 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
| 1421 | set_dte_entry(i, domain); | ||
| 1422 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
| 1423 | } | ||
| 1424 | } | ||
| 1425 | |||
| 1426 | static void update_domain(struct protection_domain *domain) | ||
| 1427 | { | ||
| 1428 | if (!domain->updated) | ||
| 1429 | return; | ||
| 1430 | |||
| 1431 | update_device_table(domain); | ||
| 1432 | flush_devices_by_domain(domain); | ||
| 1433 | iommu_flush_domain(domain->id); | ||
| 1434 | |||
| 1435 | domain->updated = false; | ||
| 1436 | } | ||
| 1437 | |||
| 1295 | /* | 1438 | /* |
| 1296 | * If the pte_page is not yet allocated this function is called | 1439 | * This function is used to add another level to an IO page table. Adding |
| 1440 | * another level increases the size of the address space by 9 bits to a size up | ||
| 1441 | * to 64 bits. | ||
| 1297 | */ | 1442 | */ |
| 1298 | static u64* alloc_pte(struct protection_domain *dom, | 1443 | static bool increase_address_space(struct protection_domain *domain, |
| 1299 | unsigned long address, u64 **pte_page, gfp_t gfp) | 1444 | gfp_t gfp) |
| 1445 | { | ||
| 1446 | u64 *pte; | ||
| 1447 | |||
| 1448 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
| 1449 | /* address space already 64 bit large */ | ||
| 1450 | return false; | ||
| 1451 | |||
| 1452 | pte = (void *)get_zeroed_page(gfp); | ||
| 1453 | if (!pte) | ||
| 1454 | return false; | ||
| 1455 | |||
| 1456 | *pte = PM_LEVEL_PDE(domain->mode, | ||
| 1457 | virt_to_phys(domain->pt_root)); | ||
| 1458 | domain->pt_root = pte; | ||
| 1459 | domain->mode += 1; | ||
| 1460 | domain->updated = true; | ||
| 1461 | |||
| 1462 | return true; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | static u64 *alloc_pte(struct protection_domain *domain, | ||
| 1466 | unsigned long address, | ||
| 1467 | int end_lvl, | ||
| 1468 | u64 **pte_page, | ||
| 1469 | gfp_t gfp) | ||
| 1300 | { | 1470 | { |
| 1301 | u64 *pte, *page; | 1471 | u64 *pte, *page; |
| 1472 | int level; | ||
| 1302 | 1473 | ||
| 1303 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 1474 | while (address > PM_LEVEL_SIZE(domain->mode)) |
| 1475 | increase_address_space(domain, gfp); | ||
| 1304 | 1476 | ||
| 1305 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1477 | level = domain->mode - 1; |
| 1306 | page = (u64 *)get_zeroed_page(gfp); | 1478 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
| 1307 | if (!page) | ||
| 1308 | return NULL; | ||
| 1309 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
| 1310 | } | ||
| 1311 | 1479 | ||
| 1312 | pte = IOMMU_PTE_PAGE(*pte); | 1480 | while (level > end_lvl) { |
| 1313 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | 1481 | if (!IOMMU_PTE_PRESENT(*pte)) { |
| 1482 | page = (u64 *)get_zeroed_page(gfp); | ||
| 1483 | if (!page) | ||
| 1484 | return NULL; | ||
| 1485 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
| 1486 | } | ||
| 1314 | 1487 | ||
| 1315 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1488 | level -= 1; |
| 1316 | page = (u64 *)get_zeroed_page(gfp); | ||
| 1317 | if (!page) | ||
| 1318 | return NULL; | ||
| 1319 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
| 1320 | } | ||
| 1321 | 1489 | ||
| 1322 | pte = IOMMU_PTE_PAGE(*pte); | 1490 | pte = IOMMU_PTE_PAGE(*pte); |
| 1323 | 1491 | ||
| 1324 | if (pte_page) | 1492 | if (pte_page && level == end_lvl) |
| 1325 | *pte_page = pte; | 1493 | *pte_page = pte; |
| 1326 | 1494 | ||
| 1327 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 1495 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
| 1496 | } | ||
| 1328 | 1497 | ||
| 1329 | return pte; | 1498 | return pte; |
| 1330 | } | 1499 | } |
| @@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
| 1344 | 1513 | ||
| 1345 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1514 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
| 1346 | if (!pte) { | 1515 | if (!pte) { |
| 1347 | pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); | 1516 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, |
| 1517 | GFP_ATOMIC); | ||
| 1348 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1518 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
| 1349 | } else | 1519 | } else |
| 1350 | pte += IOMMU_PTE_L0_INDEX(address); | 1520 | pte += PM_LEVEL_INDEX(0, address); |
| 1521 | |||
| 1522 | update_domain(&dom->domain); | ||
| 1351 | 1523 | ||
| 1352 | return pte; | 1524 | return pte; |
| 1353 | } | 1525 | } |
| @@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
| 1409 | if (!pte) | 1581 | if (!pte) |
| 1410 | return; | 1582 | return; |
| 1411 | 1583 | ||
| 1412 | pte += IOMMU_PTE_L0_INDEX(address); | 1584 | pte += PM_LEVEL_INDEX(0, address); |
| 1413 | 1585 | ||
| 1414 | WARN_ON(!*pte); | 1586 | WARN_ON(!*pte); |
| 1415 | 1587 | ||
| @@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain) | |||
| 1988 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2160 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
| 1989 | } | 2161 | } |
| 1990 | 2162 | ||
| 1991 | static int amd_iommu_domain_init(struct iommu_domain *dom) | 2163 | static void protection_domain_free(struct protection_domain *domain) |
| 2164 | { | ||
| 2165 | if (!domain) | ||
| 2166 | return; | ||
| 2167 | |||
| 2168 | if (domain->id) | ||
| 2169 | domain_id_free(domain->id); | ||
| 2170 | |||
| 2171 | kfree(domain); | ||
| 2172 | } | ||
| 2173 | |||
| 2174 | static struct protection_domain *protection_domain_alloc(void) | ||
| 1992 | { | 2175 | { |
| 1993 | struct protection_domain *domain; | 2176 | struct protection_domain *domain; |
| 1994 | 2177 | ||
| 1995 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 2178 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
| 1996 | if (!domain) | 2179 | if (!domain) |
| 1997 | return -ENOMEM; | 2180 | return NULL; |
| 1998 | 2181 | ||
| 1999 | spin_lock_init(&domain->lock); | 2182 | spin_lock_init(&domain->lock); |
| 2000 | domain->mode = PAGE_MODE_3_LEVEL; | ||
| 2001 | domain->id = domain_id_alloc(); | 2183 | domain->id = domain_id_alloc(); |
| 2002 | if (!domain->id) | 2184 | if (!domain->id) |
| 2185 | goto out_err; | ||
| 2186 | |||
| 2187 | return domain; | ||
| 2188 | |||
| 2189 | out_err: | ||
| 2190 | kfree(domain); | ||
| 2191 | |||
| 2192 | return NULL; | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
| 2196 | { | ||
| 2197 | struct protection_domain *domain; | ||
| 2198 | |||
| 2199 | domain = protection_domain_alloc(); | ||
| 2200 | if (!domain) | ||
| 2003 | goto out_free; | 2201 | goto out_free; |
| 2202 | |||
| 2203 | domain->mode = PAGE_MODE_3_LEVEL; | ||
| 2004 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 2204 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
| 2005 | if (!domain->pt_root) | 2205 | if (!domain->pt_root) |
| 2006 | goto out_free; | 2206 | goto out_free; |
| @@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) | |||
| 2010 | return 0; | 2210 | return 0; |
| 2011 | 2211 | ||
| 2012 | out_free: | 2212 | out_free: |
| 2013 | kfree(domain); | 2213 | protection_domain_free(domain); |
| 2014 | 2214 | ||
| 2015 | return -ENOMEM; | 2215 | return -ENOMEM; |
| 2016 | } | 2216 | } |
| @@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
| 2115 | paddr &= PAGE_MASK; | 2315 | paddr &= PAGE_MASK; |
| 2116 | 2316 | ||
| 2117 | for (i = 0; i < npages; ++i) { | 2317 | for (i = 0; i < npages; ++i) { |
| 2118 | ret = iommu_map_page(domain, iova, paddr, prot); | 2318 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
| 2119 | if (ret) | 2319 | if (ret) |
| 2120 | return ret; | 2320 | return ret; |
| 2121 | 2321 | ||
| @@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
| 2136 | iova &= PAGE_MASK; | 2336 | iova &= PAGE_MASK; |
| 2137 | 2337 | ||
| 2138 | for (i = 0; i < npages; ++i) { | 2338 | for (i = 0; i < npages; ++i) { |
| 2139 | iommu_unmap_page(domain, iova); | 2339 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
| 2140 | iova += PAGE_SIZE; | 2340 | iova += PAGE_SIZE; |
| 2141 | } | 2341 | } |
| 2142 | 2342 | ||
| @@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
| 2151 | phys_addr_t paddr; | 2351 | phys_addr_t paddr; |
| 2152 | u64 *pte; | 2352 | u64 *pte; |
| 2153 | 2353 | ||
| 2154 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | 2354 | pte = fetch_pte(domain, iova, PM_MAP_4k); |
| 2155 | |||
| 2156 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
| 2157 | return 0; | ||
| 2158 | |||
| 2159 | pte = IOMMU_PTE_PAGE(*pte); | ||
| 2160 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
| 2161 | |||
| 2162 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
| 2163 | return 0; | ||
| 2164 | |||
| 2165 | pte = IOMMU_PTE_PAGE(*pte); | ||
| 2166 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
| 2167 | 2355 | ||
| 2168 | if (!IOMMU_PTE_PRESENT(*pte)) | 2356 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
| 2169 | return 0; | 2357 | return 0; |
| 2170 | 2358 | ||
| 2171 | paddr = *pte & IOMMU_PAGE_MASK; | 2359 | paddr = *pte & IOMMU_PAGE_MASK; |
| @@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = { | |||
| 2191 | .domain_has_cap = amd_iommu_domain_has_cap, | 2379 | .domain_has_cap = amd_iommu_domain_has_cap, |
| 2192 | }; | 2380 | }; |
| 2193 | 2381 | ||
| 2382 | /***************************************************************************** | ||
| 2383 | * | ||
| 2384 | * The next functions do a basic initialization of IOMMU for pass through | ||
| 2385 | * mode | ||
| 2386 | * | ||
| 2387 | * In passthrough mode the IOMMU is initialized and enabled but not used for | ||
| 2388 | * DMA-API translation. | ||
| 2389 | * | ||
| 2390 | *****************************************************************************/ | ||
| 2391 | |||
| 2392 | int __init amd_iommu_init_passthrough(void) | ||
| 2393 | { | ||
| 2394 | struct pci_dev *dev = NULL; | ||
| 2395 | u16 devid, devid2; | ||
| 2396 | |||
| 2397 | /* allocate passthroug domain */ | ||
| 2398 | pt_domain = protection_domain_alloc(); | ||
| 2399 | if (!pt_domain) | ||
| 2400 | return -ENOMEM; | ||
| 2401 | |||
| 2402 | pt_domain->mode |= PAGE_MODE_NONE; | ||
| 2403 | |||
| 2404 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
| 2405 | struct amd_iommu *iommu; | ||
| 2406 | |||
| 2407 | devid = calc_devid(dev->bus->number, dev->devfn); | ||
| 2408 | if (devid > amd_iommu_last_bdf) | ||
| 2409 | continue; | ||
| 2410 | |||
| 2411 | devid2 = amd_iommu_alias_table[devid]; | ||
| 2412 | |||
| 2413 | iommu = amd_iommu_rlookup_table[devid2]; | ||
| 2414 | if (!iommu) | ||
| 2415 | continue; | ||
| 2416 | |||
| 2417 | __attach_device(iommu, pt_domain, devid); | ||
| 2418 | __attach_device(iommu, pt_domain, devid2); | ||
| 2419 | } | ||
| 2420 | |||
| 2421 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | ||
| 2422 | |||
| 2423 | return 0; | ||
| 2424 | } | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..b4b61d462dcc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
| 252 | /* Function to enable the hardware */ | 252 | /* Function to enable the hardware */ |
| 253 | static void iommu_enable(struct amd_iommu *iommu) | 253 | static void iommu_enable(struct amd_iommu *iommu) |
| 254 | { | 254 | { |
| 255 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", | 255 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", |
| 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
| 257 | 257 | ||
| 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
| @@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
| 435 | } | 435 | } |
| 436 | 436 | ||
| 437 | /* | 437 | /* |
| 438 | * This function resets the command buffer if the IOMMU stopped fetching | ||
| 439 | * commands from it. | ||
| 440 | */ | ||
| 441 | void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) | ||
| 442 | { | ||
| 443 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
| 444 | |||
| 445 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
| 446 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
| 447 | |||
| 448 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
| 449 | } | ||
| 450 | |||
| 451 | /* | ||
| 438 | * This function writes the command buffer address to the hardware and | 452 | * This function writes the command buffer address to the hardware and |
| 439 | * enables it. | 453 | * enables it. |
| 440 | */ | 454 | */ |
| @@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
| 450 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 464 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
| 451 | &entry, sizeof(entry)); | 465 | &entry, sizeof(entry)); |
| 452 | 466 | ||
| 453 | /* set head and tail to zero manually */ | 467 | amd_iommu_reset_cmd_buffer(iommu); |
| 454 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
| 455 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
| 456 | |||
| 457 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
| 458 | } | 468 | } |
| 459 | 469 | ||
| 460 | static void __init free_command_buffer(struct amd_iommu *iommu) | 470 | static void __init free_command_buffer(struct amd_iommu *iommu) |
| @@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
| 858 | switch (*p) { | 868 | switch (*p) { |
| 859 | case ACPI_IVHD_TYPE: | 869 | case ACPI_IVHD_TYPE: |
| 860 | 870 | ||
| 861 | DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " | 871 | DUMP_printk("device: %02x:%02x.%01x cap: %04x " |
| 862 | "seg: %d flags: %01x info %04x\n", | 872 | "seg: %d flags: %01x info %04x\n", |
| 863 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | 873 | PCI_BUS(h->devid), PCI_SLOT(h->devid), |
| 864 | PCI_FUNC(h->devid), h->cap_ptr, | 874 | PCI_FUNC(h->devid), h->cap_ptr, |
| @@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) | |||
| 902 | 912 | ||
| 903 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 913 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, |
| 904 | IRQF_SAMPLE_RANDOM, | 914 | IRQF_SAMPLE_RANDOM, |
| 905 | "AMD IOMMU", | 915 | "AMD-Vi", |
| 906 | NULL); | 916 | NULL); |
| 907 | 917 | ||
| 908 | if (r) { | 918 | if (r) { |
| @@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void) | |||
| 1150 | 1160 | ||
| 1151 | 1161 | ||
| 1152 | if (no_iommu) { | 1162 | if (no_iommu) { |
| 1153 | printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); | 1163 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); |
| 1154 | return 0; | 1164 | return 0; |
| 1155 | } | 1165 | } |
| 1156 | 1166 | ||
| @@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void) | |||
| 1242 | if (ret) | 1252 | if (ret) |
| 1243 | goto free; | 1253 | goto free; |
| 1244 | 1254 | ||
| 1245 | ret = amd_iommu_init_dma_ops(); | 1255 | if (iommu_pass_through) |
| 1256 | ret = amd_iommu_init_passthrough(); | ||
| 1257 | else | ||
| 1258 | ret = amd_iommu_init_dma_ops(); | ||
| 1246 | if (ret) | 1259 | if (ret) |
| 1247 | goto free; | 1260 | goto free; |
| 1248 | 1261 | ||
| 1249 | enable_iommus(); | 1262 | enable_iommus(); |
| 1250 | 1263 | ||
| 1251 | printk(KERN_INFO "AMD IOMMU: device isolation "); | 1264 | if (iommu_pass_through) |
| 1265 | goto out; | ||
| 1266 | |||
| 1267 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
| 1252 | if (amd_iommu_isolate) | 1268 | if (amd_iommu_isolate) |
| 1253 | printk("enabled\n"); | 1269 | printk("enabled\n"); |
| 1254 | else | 1270 | else |
| 1255 | printk("disabled\n"); | 1271 | printk("disabled\n"); |
| 1256 | 1272 | ||
| 1257 | if (amd_iommu_unmap_flush) | 1273 | if (amd_iommu_unmap_flush) |
| 1258 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | 1274 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
| 1259 | else | 1275 | else |
| 1260 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | 1276 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
| 1261 | 1277 | ||
| 1262 | out: | 1278 | out: |
| 1263 | return ret; | 1279 | return ret; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 676debfc1702..128111d8ffe0 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
| 21 | #include <linux/ioport.h> | 21 | #include <linux/ioport.h> |
| 22 | #include <linux/suspend.h> | 22 | #include <linux/suspend.h> |
| 23 | #include <linux/kmemleak.h> | ||
| 23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
| 24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
| 25 | #include <asm/iommu.h> | 26 | #include <asm/iommu.h> |
| @@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void) | |||
| 94 | * code for safe | 95 | * code for safe |
| 95 | */ | 96 | */ |
| 96 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); | 97 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); |
| 98 | /* | ||
| 99 | * Kmemleak should not scan this block as it may not be mapped via the | ||
| 100 | * kernel direct mapping. | ||
| 101 | */ | ||
| 102 | kmemleak_ignore(p); | ||
| 97 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 103 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
| 98 | printk(KERN_ERR | 104 | printk(KERN_ERR |
| 99 | "Cannot allocate aperture memory hole (%p,%uK)\n", | 105 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b43b63a..db7220220d09 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
| @@ -39,7 +39,7 @@ | |||
| 39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
| 40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
| 41 | 41 | ||
| 42 | static cpumask_var_t backtrace_mask; | 42 | static cpumask_t backtrace_mask __read_mostly; |
| 43 | 43 | ||
| 44 | /* nmi_active: | 44 | /* nmi_active: |
| 45 | * >0: the lapic NMI watchdog is active, but can be disabled | 45 | * >0: the lapic NMI watchdog is active, but can be disabled |
| @@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) | |||
| 138 | if (!prev_nmi_count) | 138 | if (!prev_nmi_count) |
| 139 | goto error; | 139 | goto error; |
| 140 | 140 | ||
| 141 | alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); | ||
| 142 | printk(KERN_INFO "Testing NMI watchdog ... "); | 141 | printk(KERN_INFO "Testing NMI watchdog ... "); |
| 143 | 142 | ||
| 144 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
| @@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
| 415 | } | 414 | } |
| 416 | 415 | ||
| 417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
| 418 | if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { | 417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { |
| 419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
| 420 | 419 | ||
| 421 | spin_lock(&lock); | 420 | spin_lock(&lock); |
| 422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 421 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
| 422 | show_regs(regs); | ||
| 423 | dump_stack(); | 423 | dump_stack(); |
| 424 | spin_unlock(&lock); | 424 | spin_unlock(&lock); |
| 425 | cpumask_clear_cpu(cpu, backtrace_mask); | 425 | cpumask_clear_cpu(cpu, &backtrace_mask); |
| 426 | |||
| 427 | rc = 1; | ||
| 426 | } | 428 | } |
| 427 | 429 | ||
| 428 | /* Could check oops_in_progress here too, but it's safer not to */ | 430 | /* Could check oops_in_progress here too, but it's safer not to */ |
| @@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) | |||
| 552 | return 0; | 554 | return 0; |
| 553 | } | 555 | } |
| 554 | 556 | ||
| 555 | void __trigger_all_cpu_backtrace(void) | 557 | void arch_trigger_all_cpu_backtrace(void) |
| 556 | { | 558 | { |
| 557 | int i; | 559 | int i; |
| 558 | 560 | ||
| 559 | cpumask_copy(backtrace_mask, cpu_online_mask); | 561 | cpumask_copy(&backtrace_mask, cpu_online_mask); |
| 562 | |||
| 563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
| 564 | apic->send_IPI_all(NMI_VECTOR); | ||
| 565 | |||
| 560 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
| 561 | for (i = 0; i < 10 * 1000; i++) { | 567 | for (i = 0; i < 10 * 1000; i++) { |
| 562 | if (cpumask_empty(backtrace_mask)) | 568 | if (cpumask_empty(&backtrace_mask)) |
| 563 | break; | 569 | break; |
| 564 | mdelay(1); | 570 | mdelay(1); |
| 565 | } | 571 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 898ecc47e129..4a6aeedcd965 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | * This code generates raw asm output which is post-processed to extract | 3 | * This code generates raw asm output which is post-processed to extract |
| 4 | * and format the required data. | 4 | * and format the required data. |
| 5 | */ | 5 | */ |
| 6 | #define COMPILE_OFFSETS | ||
| 6 | 7 | ||
| 7 | #include <linux/crypto.h> | 8 | #include <linux/crypto.h> |
| 8 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 900332b800f8..f9cd0849bd42 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | * Copyright (C) 2009 Jaswinder Singh Rajput | 6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
| 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
| 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
| 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
| 9 | * | 10 | * |
| 10 | * For licencing details see kernel-base/COPYING | 11 | * For licencing details see kernel-base/COPYING |
| 11 | */ | 12 | */ |
| @@ -20,6 +21,7 @@ | |||
| 20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
| 21 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
| 22 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
| 24 | #include <linux/cpu.h> | ||
| 23 | 25 | ||
| 24 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
| 25 | #include <asm/stacktrace.h> | 27 | #include <asm/stacktrace.h> |
| @@ -27,12 +29,52 @@ | |||
| 27 | 29 | ||
| 28 | static u64 perf_counter_mask __read_mostly; | 30 | static u64 perf_counter_mask __read_mostly; |
| 29 | 31 | ||
| 32 | /* The maximal number of PEBS counters: */ | ||
| 33 | #define MAX_PEBS_COUNTERS 4 | ||
| 34 | |||
| 35 | /* The size of a BTS record in bytes: */ | ||
| 36 | #define BTS_RECORD_SIZE 24 | ||
| 37 | |||
| 38 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
| 39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) | ||
| 40 | |||
| 41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
| 42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) | ||
| 43 | |||
| 44 | |||
| 45 | /* | ||
| 46 | * Bits in the debugctlmsr controlling branch tracing. | ||
| 47 | */ | ||
| 48 | #define X86_DEBUGCTL_TR (1 << 6) | ||
| 49 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
| 50 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
| 51 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
| 52 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
| 53 | |||
| 54 | /* | ||
| 55 | * A debug store configuration. | ||
| 56 | * | ||
| 57 | * We only support architectures that use 64bit fields. | ||
| 58 | */ | ||
| 59 | struct debug_store { | ||
| 60 | u64 bts_buffer_base; | ||
| 61 | u64 bts_index; | ||
| 62 | u64 bts_absolute_maximum; | ||
| 63 | u64 bts_interrupt_threshold; | ||
| 64 | u64 pebs_buffer_base; | ||
| 65 | u64 pebs_index; | ||
| 66 | u64 pebs_absolute_maximum; | ||
| 67 | u64 pebs_interrupt_threshold; | ||
| 68 | u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; | ||
| 69 | }; | ||
| 70 | |||
| 30 | struct cpu_hw_counters { | 71 | struct cpu_hw_counters { |
| 31 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | 72 | struct perf_counter *counters[X86_PMC_IDX_MAX]; |
| 32 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 33 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 34 | unsigned long interrupts; | 75 | unsigned long interrupts; |
| 35 | int enabled; | 76 | int enabled; |
| 77 | struct debug_store *ds; | ||
| 36 | }; | 78 | }; |
| 37 | 79 | ||
| 38 | /* | 80 | /* |
| @@ -58,6 +100,8 @@ struct x86_pmu { | |||
| 58 | int apic; | 100 | int apic; |
| 59 | u64 max_period; | 101 | u64 max_period; |
| 60 | u64 intel_ctrl; | 102 | u64 intel_ctrl; |
| 103 | void (*enable_bts)(u64 config); | ||
| 104 | void (*disable_bts)(void); | ||
| 61 | }; | 105 | }; |
| 62 | 106 | ||
| 63 | static struct x86_pmu x86_pmu __read_mostly; | 107 | static struct x86_pmu x86_pmu __read_mostly; |
| @@ -577,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter, | |||
| 577 | u64 prev_raw_count, new_raw_count; | 621 | u64 prev_raw_count, new_raw_count; |
| 578 | s64 delta; | 622 | s64 delta; |
| 579 | 623 | ||
| 624 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
| 625 | return 0; | ||
| 626 | |||
| 580 | /* | 627 | /* |
| 581 | * Careful: an NMI might modify the previous counter value. | 628 | * Careful: an NMI might modify the previous counter value. |
| 582 | * | 629 | * |
| @@ -666,10 +713,110 @@ static void release_pmc_hardware(void) | |||
| 666 | #endif | 713 | #endif |
| 667 | } | 714 | } |
| 668 | 715 | ||
| 716 | static inline bool bts_available(void) | ||
| 717 | { | ||
| 718 | return x86_pmu.enable_bts != NULL; | ||
| 719 | } | ||
| 720 | |||
| 721 | static inline void init_debug_store_on_cpu(int cpu) | ||
| 722 | { | ||
| 723 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
| 724 | |||
| 725 | if (!ds) | ||
| 726 | return; | ||
| 727 | |||
| 728 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
| 729 | (u32)((u64)(unsigned long)ds), | ||
| 730 | (u32)((u64)(unsigned long)ds >> 32)); | ||
| 731 | } | ||
| 732 | |||
| 733 | static inline void fini_debug_store_on_cpu(int cpu) | ||
| 734 | { | ||
| 735 | if (!per_cpu(cpu_hw_counters, cpu).ds) | ||
| 736 | return; | ||
| 737 | |||
| 738 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
| 739 | } | ||
| 740 | |||
| 741 | static void release_bts_hardware(void) | ||
| 742 | { | ||
| 743 | int cpu; | ||
| 744 | |||
| 745 | if (!bts_available()) | ||
| 746 | return; | ||
| 747 | |||
| 748 | get_online_cpus(); | ||
| 749 | |||
| 750 | for_each_online_cpu(cpu) | ||
| 751 | fini_debug_store_on_cpu(cpu); | ||
| 752 | |||
| 753 | for_each_possible_cpu(cpu) { | ||
| 754 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
| 755 | |||
| 756 | if (!ds) | ||
| 757 | continue; | ||
| 758 | |||
| 759 | per_cpu(cpu_hw_counters, cpu).ds = NULL; | ||
| 760 | |||
| 761 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
| 762 | kfree(ds); | ||
| 763 | } | ||
| 764 | |||
| 765 | put_online_cpus(); | ||
| 766 | } | ||
| 767 | |||
| 768 | static int reserve_bts_hardware(void) | ||
| 769 | { | ||
| 770 | int cpu, err = 0; | ||
| 771 | |||
| 772 | if (!bts_available()) | ||
| 773 | return 0; | ||
| 774 | |||
| 775 | get_online_cpus(); | ||
| 776 | |||
| 777 | for_each_possible_cpu(cpu) { | ||
| 778 | struct debug_store *ds; | ||
| 779 | void *buffer; | ||
| 780 | |||
| 781 | err = -ENOMEM; | ||
| 782 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
| 783 | if (unlikely(!buffer)) | ||
| 784 | break; | ||
| 785 | |||
| 786 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
| 787 | if (unlikely(!ds)) { | ||
| 788 | kfree(buffer); | ||
| 789 | break; | ||
| 790 | } | ||
| 791 | |||
| 792 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
| 793 | ds->bts_index = ds->bts_buffer_base; | ||
| 794 | ds->bts_absolute_maximum = | ||
| 795 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
| 796 | ds->bts_interrupt_threshold = | ||
| 797 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
| 798 | |||
| 799 | per_cpu(cpu_hw_counters, cpu).ds = ds; | ||
| 800 | err = 0; | ||
| 801 | } | ||
| 802 | |||
| 803 | if (err) | ||
| 804 | release_bts_hardware(); | ||
| 805 | else { | ||
| 806 | for_each_online_cpu(cpu) | ||
| 807 | init_debug_store_on_cpu(cpu); | ||
| 808 | } | ||
| 809 | |||
| 810 | put_online_cpus(); | ||
| 811 | |||
| 812 | return err; | ||
| 813 | } | ||
| 814 | |||
| 669 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 815 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
| 670 | { | 816 | { |
| 671 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | 817 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { |
| 672 | release_pmc_hardware(); | 818 | release_pmc_hardware(); |
| 819 | release_bts_hardware(); | ||
| 673 | mutex_unlock(&pmc_reserve_mutex); | 820 | mutex_unlock(&pmc_reserve_mutex); |
| 674 | } | 821 | } |
| 675 | } | 822 | } |
| @@ -712,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | |||
| 712 | return 0; | 859 | return 0; |
| 713 | } | 860 | } |
| 714 | 861 | ||
| 862 | static void intel_pmu_enable_bts(u64 config) | ||
| 863 | { | ||
| 864 | unsigned long debugctlmsr; | ||
| 865 | |||
| 866 | debugctlmsr = get_debugctlmsr(); | ||
| 867 | |||
| 868 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
| 869 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
| 870 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
| 871 | |||
| 872 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
| 873 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
| 874 | |||
| 875 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
| 876 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
| 877 | |||
| 878 | update_debugctlmsr(debugctlmsr); | ||
| 879 | } | ||
| 880 | |||
| 881 | static void intel_pmu_disable_bts(void) | ||
| 882 | { | ||
| 883 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
| 884 | unsigned long debugctlmsr; | ||
| 885 | |||
| 886 | if (!cpuc->ds) | ||
| 887 | return; | ||
| 888 | |||
| 889 | debugctlmsr = get_debugctlmsr(); | ||
| 890 | |||
| 891 | debugctlmsr &= | ||
| 892 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
| 893 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
| 894 | |||
| 895 | update_debugctlmsr(debugctlmsr); | ||
| 896 | } | ||
| 897 | |||
| 715 | /* | 898 | /* |
| 716 | * Setup the hardware configuration for a given attr_type | 899 | * Setup the hardware configuration for a given attr_type |
| 717 | */ | 900 | */ |
| @@ -728,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
| 728 | err = 0; | 911 | err = 0; |
| 729 | if (!atomic_inc_not_zero(&active_counters)) { | 912 | if (!atomic_inc_not_zero(&active_counters)) { |
| 730 | mutex_lock(&pmc_reserve_mutex); | 913 | mutex_lock(&pmc_reserve_mutex); |
| 731 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | 914 | if (atomic_read(&active_counters) == 0) { |
| 732 | err = -EBUSY; | 915 | if (!reserve_pmc_hardware()) |
| 733 | else | 916 | err = -EBUSY; |
| 917 | else | ||
| 918 | err = reserve_bts_hardware(); | ||
| 919 | } | ||
| 920 | if (!err) | ||
| 734 | atomic_inc(&active_counters); | 921 | atomic_inc(&active_counters); |
| 735 | mutex_unlock(&pmc_reserve_mutex); | 922 | mutex_unlock(&pmc_reserve_mutex); |
| 736 | } | 923 | } |
| @@ -793,6 +980,20 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
| 793 | if (config == -1LL) | 980 | if (config == -1LL) |
| 794 | return -EINVAL; | 981 | return -EINVAL; |
| 795 | 982 | ||
| 983 | /* | ||
| 984 | * Branch tracing: | ||
| 985 | */ | ||
| 986 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
| 987 | (hwc->sample_period == 1)) { | ||
| 988 | /* BTS is not supported by this architecture. */ | ||
| 989 | if (!bts_available()) | ||
| 990 | return -EOPNOTSUPP; | ||
| 991 | |||
| 992 | /* BTS is currently only allowed for user-mode. */ | ||
| 993 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
| 994 | return -EOPNOTSUPP; | ||
| 995 | } | ||
| 996 | |||
| 796 | hwc->config |= config; | 997 | hwc->config |= config; |
| 797 | 998 | ||
| 798 | return 0; | 999 | return 0; |
| @@ -817,7 +1018,18 @@ static void p6_pmu_disable_all(void) | |||
| 817 | 1018 | ||
| 818 | static void intel_pmu_disable_all(void) | 1019 | static void intel_pmu_disable_all(void) |
| 819 | { | 1020 | { |
| 1021 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
| 1022 | |||
| 1023 | if (!cpuc->enabled) | ||
| 1024 | return; | ||
| 1025 | |||
| 1026 | cpuc->enabled = 0; | ||
| 1027 | barrier(); | ||
| 1028 | |||
| 820 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 1029 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
| 1030 | |||
| 1031 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
| 1032 | intel_pmu_disable_bts(); | ||
| 821 | } | 1033 | } |
| 822 | 1034 | ||
| 823 | static void amd_pmu_disable_all(void) | 1035 | static void amd_pmu_disable_all(void) |
| @@ -875,7 +1087,25 @@ static void p6_pmu_enable_all(void) | |||
| 875 | 1087 | ||
| 876 | static void intel_pmu_enable_all(void) | 1088 | static void intel_pmu_enable_all(void) |
| 877 | { | 1089 | { |
| 1090 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
| 1091 | |||
| 1092 | if (cpuc->enabled) | ||
| 1093 | return; | ||
| 1094 | |||
| 1095 | cpuc->enabled = 1; | ||
| 1096 | barrier(); | ||
| 1097 | |||
| 878 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
| 1099 | |||
| 1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
| 1101 | struct perf_counter *counter = | ||
| 1102 | cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
| 1103 | |||
| 1104 | if (WARN_ON_ONCE(!counter)) | ||
| 1105 | return; | ||
| 1106 | |||
| 1107 | intel_pmu_enable_bts(counter->hw.config); | ||
| 1108 | } | ||
| 879 | } | 1109 | } |
| 880 | 1110 | ||
| 881 | static void amd_pmu_enable_all(void) | 1111 | static void amd_pmu_enable_all(void) |
| @@ -962,6 +1192,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
| 962 | static inline void | 1192 | static inline void |
| 963 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1193 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) |
| 964 | { | 1194 | { |
| 1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
| 1196 | intel_pmu_disable_bts(); | ||
| 1197 | return; | ||
| 1198 | } | ||
| 1199 | |||
| 965 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1200 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 966 | intel_pmu_disable_fixed(hwc, idx); | 1201 | intel_pmu_disable_fixed(hwc, idx); |
| 967 | return; | 1202 | return; |
| @@ -990,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
| 990 | s64 period = hwc->sample_period; | 1225 | s64 period = hwc->sample_period; |
| 991 | int err, ret = 0; | 1226 | int err, ret = 0; |
| 992 | 1227 | ||
| 1228 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
| 1229 | return 0; | ||
| 1230 | |||
| 993 | /* | 1231 | /* |
| 994 | * If we are way outside a reasoable range then just skip forward: | 1232 | * If we are way outside a reasoable range then just skip forward: |
| 995 | */ | 1233 | */ |
| @@ -1072,6 +1310,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
| 1072 | 1310 | ||
| 1073 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1311 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
| 1074 | { | 1312 | { |
| 1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
| 1314 | if (!__get_cpu_var(cpu_hw_counters).enabled) | ||
| 1315 | return; | ||
| 1316 | |||
| 1317 | intel_pmu_enable_bts(hwc->config); | ||
| 1318 | return; | ||
| 1319 | } | ||
| 1320 | |||
| 1075 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1321 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 1076 | intel_pmu_enable_fixed(hwc, idx); | 1322 | intel_pmu_enable_fixed(hwc, idx); |
| 1077 | return; | 1323 | return; |
| @@ -1093,11 +1339,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
| 1093 | { | 1339 | { |
| 1094 | unsigned int event; | 1340 | unsigned int event; |
| 1095 | 1341 | ||
| 1342 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
| 1343 | |||
| 1344 | if (unlikely((event == | ||
| 1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
| 1346 | (hwc->sample_period == 1))) | ||
| 1347 | return X86_PMC_IDX_FIXED_BTS; | ||
| 1348 | |||
| 1096 | if (!x86_pmu.num_counters_fixed) | 1349 | if (!x86_pmu.num_counters_fixed) |
| 1097 | return -1; | 1350 | return -1; |
| 1098 | 1351 | ||
| 1099 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
| 1100 | |||
| 1101 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1352 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
| 1102 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
| 1103 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1354 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
| @@ -1118,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter) | |||
| 1118 | int idx; | 1369 | int idx; |
| 1119 | 1370 | ||
| 1120 | idx = fixed_mode_idx(counter, hwc); | 1371 | idx = fixed_mode_idx(counter, hwc); |
| 1121 | if (idx >= 0) { | 1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
| 1373 | /* BTS is already occupied. */ | ||
| 1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
| 1375 | return -EAGAIN; | ||
| 1376 | |||
| 1377 | hwc->config_base = 0; | ||
| 1378 | hwc->counter_base = 0; | ||
| 1379 | hwc->idx = idx; | ||
| 1380 | } else if (idx >= 0) { | ||
| 1122 | /* | 1381 | /* |
| 1123 | * Try to get the fixed counter, if that is already taken | 1382 | * Try to get the fixed counter, if that is already taken |
| 1124 | * then try to get a generic counter: | 1383 | * then try to get a generic counter: |
| @@ -1229,6 +1488,44 @@ void perf_counter_print_debug(void) | |||
| 1229 | local_irq_restore(flags); | 1488 | local_irq_restore(flags); |
| 1230 | } | 1489 | } |
| 1231 | 1490 | ||
| 1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | ||
| 1492 | struct perf_sample_data *data) | ||
| 1493 | { | ||
| 1494 | struct debug_store *ds = cpuc->ds; | ||
| 1495 | struct bts_record { | ||
| 1496 | u64 from; | ||
| 1497 | u64 to; | ||
| 1498 | u64 flags; | ||
| 1499 | }; | ||
| 1500 | struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
| 1501 | unsigned long orig_ip = data->regs->ip; | ||
| 1502 | struct bts_record *at, *top; | ||
| 1503 | |||
| 1504 | if (!counter) | ||
| 1505 | return; | ||
| 1506 | |||
| 1507 | if (!ds) | ||
| 1508 | return; | ||
| 1509 | |||
| 1510 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
| 1511 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
| 1512 | |||
| 1513 | ds->bts_index = ds->bts_buffer_base; | ||
| 1514 | |||
| 1515 | for (; at < top; at++) { | ||
| 1516 | data->regs->ip = at->from; | ||
| 1517 | data->addr = at->to; | ||
| 1518 | |||
| 1519 | perf_counter_output(counter, 1, data); | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | data->regs->ip = orig_ip; | ||
| 1523 | data->addr = 0; | ||
| 1524 | |||
| 1525 | /* There's new data available. */ | ||
| 1526 | counter->pending_kill = POLL_IN; | ||
| 1527 | } | ||
| 1528 | |||
| 1232 | static void x86_pmu_disable(struct perf_counter *counter) | 1529 | static void x86_pmu_disable(struct perf_counter *counter) |
| 1233 | { | 1530 | { |
| 1234 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1531 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); |
| @@ -1253,6 +1550,15 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
| 1253 | * that we are disabling: | 1550 | * that we are disabling: |
| 1254 | */ | 1551 | */ |
| 1255 | x86_perf_counter_update(counter, hwc, idx); | 1552 | x86_perf_counter_update(counter, hwc, idx); |
| 1553 | |||
| 1554 | /* Drain the remaining BTS records. */ | ||
| 1555 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
| 1556 | struct perf_sample_data data; | ||
| 1557 | struct pt_regs regs; | ||
| 1558 | |||
| 1559 | data.regs = ®s; | ||
| 1560 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
| 1561 | } | ||
| 1256 | cpuc->counters[idx] = NULL; | 1562 | cpuc->counters[idx] = NULL; |
| 1257 | clear_bit(idx, cpuc->used_mask); | 1563 | clear_bit(idx, cpuc->used_mask); |
| 1258 | 1564 | ||
| @@ -1280,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) | |||
| 1280 | 1586 | ||
| 1281 | static void intel_pmu_reset(void) | 1587 | static void intel_pmu_reset(void) |
| 1282 | { | 1588 | { |
| 1589 | struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; | ||
| 1283 | unsigned long flags; | 1590 | unsigned long flags; |
| 1284 | int idx; | 1591 | int idx; |
| 1285 | 1592 | ||
| @@ -1297,6 +1604,8 @@ static void intel_pmu_reset(void) | |||
| 1297 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | 1604 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
| 1298 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1605 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
| 1299 | } | 1606 | } |
| 1607 | if (ds) | ||
| 1608 | ds->bts_index = ds->bts_buffer_base; | ||
| 1300 | 1609 | ||
| 1301 | local_irq_restore(flags); | 1610 | local_irq_restore(flags); |
| 1302 | } | 1611 | } |
| @@ -1362,6 +1671,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
| 1362 | cpuc = &__get_cpu_var(cpu_hw_counters); | 1671 | cpuc = &__get_cpu_var(cpu_hw_counters); |
| 1363 | 1672 | ||
| 1364 | perf_disable(); | 1673 | perf_disable(); |
| 1674 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
| 1365 | status = intel_pmu_get_status(); | 1675 | status = intel_pmu_get_status(); |
| 1366 | if (!status) { | 1676 | if (!status) { |
| 1367 | perf_enable(); | 1677 | perf_enable(); |
| @@ -1571,6 +1881,8 @@ static struct x86_pmu intel_pmu = { | |||
| 1571 | * the generic counter period: | 1881 | * the generic counter period: |
| 1572 | */ | 1882 | */ |
| 1573 | .max_period = (1ULL << 31) - 1, | 1883 | .max_period = (1ULL << 31) - 1, |
| 1884 | .enable_bts = intel_pmu_enable_bts, | ||
| 1885 | .disable_bts = intel_pmu_disable_bts, | ||
| 1574 | }; | 1886 | }; |
| 1575 | 1887 | ||
| 1576 | static struct x86_pmu amd_pmu = { | 1888 | static struct x86_pmu amd_pmu = { |
| @@ -1962,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
| 1962 | 2274 | ||
| 1963 | return entry; | 2275 | return entry; |
| 1964 | } | 2276 | } |
| 2277 | |||
| 2278 | void hw_perf_counter_setup_online(int cpu) | ||
| 2279 | { | ||
| 2280 | init_debug_store_on_cpu(cpu); | ||
| 2281 | } | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d94e1ea3b9fe..9dbb527e1652 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
| 417 | unsigned long return_hooker = (unsigned long) | 417 | unsigned long return_hooker = (unsigned long) |
| 418 | &return_to_handler; | 418 | &return_to_handler; |
| 419 | 419 | ||
| 420 | /* Nmi's are currently unsupported */ | ||
| 421 | if (unlikely(in_nmi())) | ||
| 422 | return; | ||
| 423 | |||
| 424 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | 420 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) |
| 425 | return; | 421 | return; |
| 426 | 422 | ||
| @@ -498,37 +494,56 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | |||
| 498 | 494 | ||
| 499 | struct syscall_metadata *syscall_nr_to_meta(int nr) | 495 | struct syscall_metadata *syscall_nr_to_meta(int nr) |
| 500 | { | 496 | { |
| 501 | if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) | 497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) |
| 502 | return NULL; | 498 | return NULL; |
| 503 | 499 | ||
| 504 | return syscalls_metadata[nr]; | 500 | return syscalls_metadata[nr]; |
| 505 | } | 501 | } |
| 506 | 502 | ||
| 507 | void arch_init_ftrace_syscalls(void) | 503 | int syscall_name_to_nr(char *name) |
| 504 | { | ||
| 505 | int i; | ||
| 506 | |||
| 507 | if (!syscalls_metadata) | ||
| 508 | return -1; | ||
| 509 | |||
| 510 | for (i = 0; i < NR_syscalls; i++) { | ||
| 511 | if (syscalls_metadata[i]) { | ||
| 512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
| 513 | return i; | ||
| 514 | } | ||
| 515 | } | ||
| 516 | return -1; | ||
| 517 | } | ||
| 518 | |||
| 519 | void set_syscall_enter_id(int num, int id) | ||
| 520 | { | ||
| 521 | syscalls_metadata[num]->enter_id = id; | ||
| 522 | } | ||
| 523 | |||
| 524 | void set_syscall_exit_id(int num, int id) | ||
| 525 | { | ||
| 526 | syscalls_metadata[num]->exit_id = id; | ||
| 527 | } | ||
| 528 | |||
| 529 | static int __init arch_init_ftrace_syscalls(void) | ||
| 508 | { | 530 | { |
| 509 | int i; | 531 | int i; |
| 510 | struct syscall_metadata *meta; | 532 | struct syscall_metadata *meta; |
| 511 | unsigned long **psys_syscall_table = &sys_call_table; | 533 | unsigned long **psys_syscall_table = &sys_call_table; |
| 512 | static atomic_t refs; | ||
| 513 | |||
| 514 | if (atomic_inc_return(&refs) != 1) | ||
| 515 | goto end; | ||
| 516 | 534 | ||
| 517 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | 535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * |
| 518 | FTRACE_SYSCALL_MAX, GFP_KERNEL); | 536 | NR_syscalls, GFP_KERNEL); |
| 519 | if (!syscalls_metadata) { | 537 | if (!syscalls_metadata) { |
| 520 | WARN_ON(1); | 538 | WARN_ON(1); |
| 521 | return; | 539 | return -ENOMEM; |
| 522 | } | 540 | } |
| 523 | 541 | ||
| 524 | for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { | 542 | for (i = 0; i < NR_syscalls; i++) { |
| 525 | meta = find_syscall_meta(psys_syscall_table[i]); | 543 | meta = find_syscall_meta(psys_syscall_table[i]); |
| 526 | syscalls_metadata[i] = meta; | 544 | syscalls_metadata[i] = meta; |
| 527 | } | 545 | } |
| 528 | return; | 546 | return 0; |
| 529 | |||
| 530 | /* Paranoid: avoid overflow */ | ||
| 531 | end: | ||
| 532 | atomic_dec(&refs); | ||
| 533 | } | 547 | } |
| 548 | arch_initcall(arch_init_ftrace_syscalls); | ||
| 534 | #endif | 549 | #endif |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bcf506b..d71c8655905b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
| 4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
| 5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
| 6 | #include <linux/kmemleak.h> | ||
| 6 | 7 | ||
| 7 | #include <asm/proto.h> | 8 | #include <asm/proto.h> |
| 8 | #include <asm/dma.h> | 9 | #include <asm/dma.h> |
| @@ -32,7 +33,14 @@ int no_iommu __read_mostly; | |||
| 32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 33 | /* Set this to 1 if there is a HW IOMMU in the system */ |
| 33 | int iommu_detected __read_mostly = 0; | 34 | int iommu_detected __read_mostly = 0; |
| 34 | 35 | ||
| 35 | int iommu_pass_through; | 36 | /* |
| 37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | ||
| 38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | ||
| 39 | * devices and allow every device to access to whole physical memory. This is | ||
| 40 | * useful if a user want to use an IOMMU only for KVM device assignment to | ||
| 41 | * guests and not for driver dma translation. | ||
| 42 | */ | ||
| 43 | int iommu_pass_through __read_mostly; | ||
| 36 | 44 | ||
| 37 | dma_addr_t bad_dma_address __read_mostly = 0; | 45 | dma_addr_t bad_dma_address __read_mostly = 0; |
| 38 | EXPORT_SYMBOL(bad_dma_address); | 46 | EXPORT_SYMBOL(bad_dma_address); |
| @@ -88,6 +96,11 @@ void __init dma32_reserve_bootmem(void) | |||
| 88 | size = roundup(dma32_bootmem_size, align); | 96 | size = roundup(dma32_bootmem_size, align); |
| 89 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 97 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
| 90 | 512ULL<<20); | 98 | 512ULL<<20); |
| 99 | /* | ||
| 100 | * Kmemleak should not scan this block as it may not be mapped via the | ||
| 101 | * kernel direct mapping. | ||
| 102 | */ | ||
| 103 | kmemleak_ignore(dma32_bootmem_ptr); | ||
| 91 | if (dma32_bootmem_ptr) | 104 | if (dma32_bootmem_ptr) |
| 92 | dma32_bootmem_size = size; | 105 | dma32_bootmem_size = size; |
| 93 | else | 106 | else |
| @@ -147,7 +160,7 @@ again: | |||
| 147 | return NULL; | 160 | return NULL; |
| 148 | 161 | ||
| 149 | addr = page_to_phys(page); | 162 | addr = page_to_phys(page); |
| 150 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | 163 | if (addr + size > dma_mask) { |
| 151 | __free_pages(page, get_order(size)); | 164 | __free_pages(page, get_order(size)); |
| 152 | 165 | ||
| 153 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { | 166 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d2e56b8f48e7..98a827ee9ed7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
| 190 | static inline int | 190 | static inline int |
| 191 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 191 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
| 192 | { | 192 | { |
| 193 | return force_iommu || | 193 | return force_iommu || !dma_capable(dev, addr, size); |
| 194 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); | ||
| 195 | } | 194 | } |
| 196 | 195 | ||
| 197 | static inline int | 196 | static inline int |
| 198 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 197 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
| 199 | { | 198 | { |
| 200 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); | 199 | return !dma_capable(dev, addr, size); |
| 201 | } | 200 | } |
| 202 | 201 | ||
| 203 | /* Map a single continuous physical area into the IOMMU. | 202 | /* Map a single continuous physical area into the IOMMU. |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 71d412a09f30..a3933d4330cd 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | static int | 14 | static int |
| 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
| 16 | { | 16 | { |
| 17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { | 17 | if (hwdev && !dma_capable(hwdev, bus, size)) { |
| 18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) | 18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) |
| 19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
| 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
| @@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
| 79 | free_pages((unsigned long)vaddr, get_order(size)); | 79 | free_pages((unsigned long)vaddr, get_order(size)); |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | static void nommu_sync_single_for_device(struct device *dev, | ||
| 83 | dma_addr_t addr, size_t size, | ||
| 84 | enum dma_data_direction dir) | ||
| 85 | { | ||
| 86 | flush_write_buffers(); | ||
| 87 | } | ||
| 88 | |||
| 89 | |||
| 90 | static void nommu_sync_sg_for_device(struct device *dev, | ||
| 91 | struct scatterlist *sg, int nelems, | ||
| 92 | enum dma_data_direction dir) | ||
| 93 | { | ||
| 94 | flush_write_buffers(); | ||
| 95 | } | ||
| 96 | |||
| 82 | struct dma_map_ops nommu_dma_ops = { | 97 | struct dma_map_ops nommu_dma_ops = { |
| 83 | .alloc_coherent = dma_generic_alloc_coherent, | 98 | .alloc_coherent = dma_generic_alloc_coherent, |
| 84 | .free_coherent = nommu_free_coherent, | 99 | .free_coherent = nommu_free_coherent, |
| 85 | .map_sg = nommu_map_sg, | 100 | .map_sg = nommu_map_sg, |
| 86 | .map_page = nommu_map_page, | 101 | .map_page = nommu_map_page, |
| 87 | .is_phys = 1, | 102 | .sync_single_for_device = nommu_sync_single_for_device, |
| 103 | .sync_sg_for_device = nommu_sync_sg_for_device, | ||
| 104 | .is_phys = 1, | ||
| 88 | }; | 105 | }; |
| 89 | 106 | ||
| 90 | void __init no_iommu_init(void) | 107 | void __init no_iommu_init(void) |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee44200..e8a35016115f 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
| @@ -13,31 +13,6 @@ | |||
| 13 | 13 | ||
| 14 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
| 15 | 15 | ||
| 16 | void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
| 17 | { | ||
| 18 | return alloc_bootmem_low_pages(size); | ||
| 19 | } | ||
| 20 | |||
| 21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
| 22 | { | ||
| 23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
| 24 | } | ||
| 25 | |||
| 26 | dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | ||
| 27 | { | ||
| 28 | return paddr; | ||
| 29 | } | ||
| 30 | |||
| 31 | phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) | ||
| 32 | { | ||
| 33 | return baddr; | ||
| 34 | } | ||
| 35 | |||
| 36 | int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) | ||
| 37 | { | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 16 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
| 42 | dma_addr_t *dma_handle, gfp_t flags) | 17 | dma_addr_t *dma_handle, gfp_t flags) |
| 43 | { | 18 | { |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..8d7d5c9c1be3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -35,10 +35,11 @@ | |||
| 35 | #include <asm/proto.h> | 35 | #include <asm/proto.h> |
| 36 | #include <asm/ds.h> | 36 | #include <asm/ds.h> |
| 37 | 37 | ||
| 38 | #include <trace/syscall.h> | ||
| 39 | |||
| 40 | #include "tls.h" | 38 | #include "tls.h" |
| 41 | 39 | ||
| 40 | #define CREATE_TRACE_POINTS | ||
| 41 | #include <trace/events/syscalls.h> | ||
| 42 | |||
| 42 | enum x86_regset { | 43 | enum x86_regset { |
| 43 | REGSET_GENERAL, | 44 | REGSET_GENERAL, |
| 44 | REGSET_FP, | 45 | REGSET_FP, |
| @@ -1497,8 +1498,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
| 1497 | tracehook_report_syscall_entry(regs)) | 1498 | tracehook_report_syscall_entry(regs)) |
| 1498 | ret = -1L; | 1499 | ret = -1L; |
| 1499 | 1500 | ||
| 1500 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1501 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
| 1501 | ftrace_syscall_enter(regs); | 1502 | trace_sys_enter(regs, regs->orig_ax); |
| 1502 | 1503 | ||
| 1503 | if (unlikely(current->audit_context)) { | 1504 | if (unlikely(current->audit_context)) { |
| 1504 | if (IS_IA32) | 1505 | if (IS_IA32) |
| @@ -1523,8 +1524,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
| 1523 | if (unlikely(current->audit_context)) | 1524 | if (unlikely(current->audit_context)) |
| 1524 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1525 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
| 1525 | 1526 | ||
| 1526 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1527 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
| 1527 | ftrace_syscall_exit(regs); | 1528 | trace_sys_exit(regs, regs->ax); |
| 1528 | 1529 | ||
| 1529 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | 1530 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
| 1530 | tracehook_report_syscall_exit(regs, 0); | 1531 | tracehook_report_syscall_exit(regs, 0); |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94e..81e58238c4ce 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
| 869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | 869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
| 870 | clear_thread_flag(TIF_NOTIFY_RESUME); | 870 | clear_thread_flag(TIF_NOTIFY_RESUME); |
| 871 | tracehook_notify_resume(regs); | 871 | tracehook_notify_resume(regs); |
| 872 | if (current->replacement_session_keyring) | ||
| 873 | key_replace_session_keyring(); | ||
| 872 | } | 874 | } |
| 873 | 875 | ||
| 874 | #ifdef CONFIG_X86_32 | 876 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6bc211accf08..45e00eb09c3a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
| @@ -18,9 +18,9 @@ | |||
| 18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
| 19 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
| 20 | 20 | ||
| 21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, | 21 | SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, |
| 22 | unsigned long prot, unsigned long flags, | 22 | unsigned long, prot, unsigned long, flags, |
| 23 | unsigned long fd, unsigned long off) | 23 | unsigned long, fd, unsigned long, off) |
| 24 | { | 24 | { |
| 25 | long error; | 25 | long error; |
| 26 | struct file *file; | 26 | struct file *file; |
| @@ -226,7 +226,7 @@ bottomup: | |||
| 226 | } | 226 | } |
| 227 | 227 | ||
| 228 | 228 | ||
| 229 | asmlinkage long sys_uname(struct new_utsname __user *name) | 229 | SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) |
| 230 | { | 230 | { |
| 231 | int err; | 231 | int err; |
| 232 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 2c55ed098654..528bf954eb74 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
| @@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs, | |||
| 331 | kmemcheck_shadow_set(shadow, size); | 331 | kmemcheck_shadow_set(shadow, size); |
| 332 | } | 332 | } |
| 333 | 333 | ||
| 334 | bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | ||
| 335 | { | ||
| 336 | enum kmemcheck_shadow status; | ||
| 337 | void *shadow; | ||
| 338 | |||
| 339 | shadow = kmemcheck_shadow_lookup(addr); | ||
| 340 | if (!shadow) | ||
| 341 | return true; | ||
| 342 | |||
| 343 | status = kmemcheck_shadow_test(shadow, size); | ||
| 344 | |||
| 345 | return status == KMEMCHECK_SHADOW_INITIALIZED; | ||
| 346 | } | ||
| 347 | |||
| 334 | /* Access may cross page boundary */ | 348 | /* Access may cross page boundary */ |
| 335 | static void kmemcheck_read(struct pt_regs *regs, | 349 | static void kmemcheck_read(struct pt_regs *regs, |
| 336 | unsigned long addr, unsigned int size) | 350 | unsigned long addr, unsigned int size) |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 89b9a5cd63da..cb88b1a0bd5f 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
| @@ -1,11 +1,14 @@ | |||
| 1 | /** | 1 | /** |
| 2 | * @file nmi_int.c | 2 | * @file nmi_int.c |
| 3 | * | 3 | * |
| 4 | * @remark Copyright 2002-2008 OProfile authors | 4 | * @remark Copyright 2002-2009 OProfile authors |
| 5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
| 6 | * | 6 | * |
| 7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
| 8 | * @author Robert Richter <robert.richter@amd.com> | 8 | * @author Robert Richter <robert.richter@amd.com> |
| 9 | * @author Barry Kasindorf <barry.kasindorf@amd.com> | ||
| 10 | * @author Jason Yeh <jason.yeh@amd.com> | ||
| 11 | * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> | ||
| 9 | */ | 12 | */ |
| 10 | 13 | ||
| 11 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| @@ -24,13 +27,35 @@ | |||
| 24 | #include "op_counter.h" | 27 | #include "op_counter.h" |
| 25 | #include "op_x86_model.h" | 28 | #include "op_x86_model.h" |
| 26 | 29 | ||
| 27 | static struct op_x86_model_spec const *model; | 30 | static struct op_x86_model_spec *model; |
| 28 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); | 31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); |
| 29 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | 32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); |
| 30 | 33 | ||
| 31 | /* 0 == registered but off, 1 == registered and on */ | 34 | /* 0 == registered but off, 1 == registered and on */ |
| 32 | static int nmi_enabled = 0; | 35 | static int nmi_enabled = 0; |
| 33 | 36 | ||
| 37 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | ||
| 38 | |||
| 39 | /* common functions */ | ||
| 40 | |||
| 41 | u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | ||
| 42 | struct op_counter_config *counter_config) | ||
| 43 | { | ||
| 44 | u64 val = 0; | ||
| 45 | u16 event = (u16)counter_config->event; | ||
| 46 | |||
| 47 | val |= ARCH_PERFMON_EVENTSEL_INT; | ||
| 48 | val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; | ||
| 49 | val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; | ||
| 50 | val |= (counter_config->unit_mask & 0xFF) << 8; | ||
| 51 | event &= model->event_mask ? model->event_mask : 0xFF; | ||
| 52 | val |= event & 0xFF; | ||
| 53 | val |= (event & 0x0F00) << 24; | ||
| 54 | |||
| 55 | return val; | ||
| 56 | } | ||
| 57 | |||
| 58 | |||
| 34 | static int profile_exceptions_notify(struct notifier_block *self, | 59 | static int profile_exceptions_notify(struct notifier_block *self, |
| 35 | unsigned long val, void *data) | 60 | unsigned long val, void *data) |
| 36 | { | 61 | { |
| @@ -52,36 +77,214 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
| 52 | 77 | ||
| 53 | static void nmi_cpu_save_registers(struct op_msrs *msrs) | 78 | static void nmi_cpu_save_registers(struct op_msrs *msrs) |
| 54 | { | 79 | { |
| 55 | unsigned int const nr_ctrs = model->num_counters; | ||
| 56 | unsigned int const nr_ctrls = model->num_controls; | ||
| 57 | struct op_msr *counters = msrs->counters; | 80 | struct op_msr *counters = msrs->counters; |
| 58 | struct op_msr *controls = msrs->controls; | 81 | struct op_msr *controls = msrs->controls; |
| 59 | unsigned int i; | 82 | unsigned int i; |
| 60 | 83 | ||
| 61 | for (i = 0; i < nr_ctrs; ++i) { | 84 | for (i = 0; i < model->num_counters; ++i) { |
| 62 | if (counters[i].addr) { | 85 | if (counters[i].addr) |
| 63 | rdmsr(counters[i].addr, | 86 | rdmsrl(counters[i].addr, counters[i].saved); |
| 64 | counters[i].saved.low, | 87 | } |
| 65 | counters[i].saved.high); | 88 | |
| 66 | } | 89 | for (i = 0; i < model->num_controls; ++i) { |
| 90 | if (controls[i].addr) | ||
| 91 | rdmsrl(controls[i].addr, controls[i].saved); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | static void nmi_cpu_start(void *dummy) | ||
| 96 | { | ||
| 97 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
| 98 | model->start(msrs); | ||
| 99 | } | ||
| 100 | |||
| 101 | static int nmi_start(void) | ||
| 102 | { | ||
| 103 | on_each_cpu(nmi_cpu_start, NULL, 1); | ||
| 104 | return 0; | ||
| 105 | } | ||
| 106 | |||
| 107 | static void nmi_cpu_stop(void *dummy) | ||
| 108 | { | ||
| 109 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
| 110 | model->stop(msrs); | ||
| 111 | } | ||
| 112 | |||
| 113 | static void nmi_stop(void) | ||
| 114 | { | ||
| 115 | on_each_cpu(nmi_cpu_stop, NULL, 1); | ||
| 116 | } | ||
| 117 | |||
| 118 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 119 | |||
| 120 | static DEFINE_PER_CPU(int, switch_index); | ||
| 121 | |||
| 122 | static inline int has_mux(void) | ||
| 123 | { | ||
| 124 | return !!model->switch_ctrl; | ||
| 125 | } | ||
| 126 | |||
| 127 | inline int op_x86_phys_to_virt(int phys) | ||
| 128 | { | ||
| 129 | return __get_cpu_var(switch_index) + phys; | ||
| 130 | } | ||
| 131 | |||
| 132 | inline int op_x86_virt_to_phys(int virt) | ||
| 133 | { | ||
| 134 | return virt % model->num_counters; | ||
| 135 | } | ||
| 136 | |||
| 137 | static void nmi_shutdown_mux(void) | ||
| 138 | { | ||
| 139 | int i; | ||
| 140 | |||
| 141 | if (!has_mux()) | ||
| 142 | return; | ||
| 143 | |||
| 144 | for_each_possible_cpu(i) { | ||
| 145 | kfree(per_cpu(cpu_msrs, i).multiplex); | ||
| 146 | per_cpu(cpu_msrs, i).multiplex = NULL; | ||
| 147 | per_cpu(switch_index, i) = 0; | ||
| 67 | } | 148 | } |
| 149 | } | ||
| 150 | |||
| 151 | static int nmi_setup_mux(void) | ||
| 152 | { | ||
| 153 | size_t multiplex_size = | ||
| 154 | sizeof(struct op_msr) * model->num_virt_counters; | ||
| 155 | int i; | ||
| 156 | |||
| 157 | if (!has_mux()) | ||
| 158 | return 1; | ||
| 159 | |||
| 160 | for_each_possible_cpu(i) { | ||
| 161 | per_cpu(cpu_msrs, i).multiplex = | ||
| 162 | kmalloc(multiplex_size, GFP_KERNEL); | ||
| 163 | if (!per_cpu(cpu_msrs, i).multiplex) | ||
| 164 | return 0; | ||
| 165 | } | ||
| 166 | |||
| 167 | return 1; | ||
| 168 | } | ||
| 169 | |||
| 170 | static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | ||
| 171 | { | ||
| 172 | int i; | ||
| 173 | struct op_msr *multiplex = msrs->multiplex; | ||
| 174 | |||
| 175 | if (!has_mux()) | ||
| 176 | return; | ||
| 68 | 177 | ||
| 69 | for (i = 0; i < nr_ctrls; ++i) { | 178 | for (i = 0; i < model->num_virt_counters; ++i) { |
| 70 | if (controls[i].addr) { | 179 | if (counter_config[i].enabled) { |
| 71 | rdmsr(controls[i].addr, | 180 | multiplex[i].saved = -(u64)counter_config[i].count; |
| 72 | controls[i].saved.low, | 181 | } else { |
| 73 | controls[i].saved.high); | 182 | multiplex[i].addr = 0; |
| 183 | multiplex[i].saved = 0; | ||
| 74 | } | 184 | } |
| 75 | } | 185 | } |
| 186 | |||
| 187 | per_cpu(switch_index, cpu) = 0; | ||
| 188 | } | ||
| 189 | |||
| 190 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) | ||
| 191 | { | ||
| 192 | struct op_msr *multiplex = msrs->multiplex; | ||
| 193 | int i; | ||
| 194 | |||
| 195 | for (i = 0; i < model->num_counters; ++i) { | ||
| 196 | int virt = op_x86_phys_to_virt(i); | ||
| 197 | if (multiplex[virt].addr) | ||
| 198 | rdmsrl(multiplex[virt].addr, multiplex[virt].saved); | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) | ||
| 203 | { | ||
| 204 | struct op_msr *multiplex = msrs->multiplex; | ||
| 205 | int i; | ||
| 206 | |||
| 207 | for (i = 0; i < model->num_counters; ++i) { | ||
| 208 | int virt = op_x86_phys_to_virt(i); | ||
| 209 | if (multiplex[virt].addr) | ||
| 210 | wrmsrl(multiplex[virt].addr, multiplex[virt].saved); | ||
| 211 | } | ||
| 76 | } | 212 | } |
| 77 | 213 | ||
| 78 | static void nmi_save_registers(void *dummy) | 214 | static void nmi_cpu_switch(void *dummy) |
| 79 | { | 215 | { |
| 80 | int cpu = smp_processor_id(); | 216 | int cpu = smp_processor_id(); |
| 217 | int si = per_cpu(switch_index, cpu); | ||
| 81 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); | 218 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); |
| 82 | nmi_cpu_save_registers(msrs); | 219 | |
| 220 | nmi_cpu_stop(NULL); | ||
| 221 | nmi_cpu_save_mpx_registers(msrs); | ||
| 222 | |||
| 223 | /* move to next set */ | ||
| 224 | si += model->num_counters; | ||
| 225 | if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) | ||
| 226 | per_cpu(switch_index, cpu) = 0; | ||
| 227 | else | ||
| 228 | per_cpu(switch_index, cpu) = si; | ||
| 229 | |||
| 230 | model->switch_ctrl(model, msrs); | ||
| 231 | nmi_cpu_restore_mpx_registers(msrs); | ||
| 232 | |||
| 233 | nmi_cpu_start(NULL); | ||
| 234 | } | ||
| 235 | |||
| 236 | |||
| 237 | /* | ||
| 238 | * Quick check to see if multiplexing is necessary. | ||
| 239 | * The check should be sufficient since counters are used | ||
| 240 | * in ordre. | ||
| 241 | */ | ||
| 242 | static int nmi_multiplex_on(void) | ||
| 243 | { | ||
| 244 | return counter_config[model->num_counters].count ? 0 : -EINVAL; | ||
| 245 | } | ||
| 246 | |||
| 247 | static int nmi_switch_event(void) | ||
| 248 | { | ||
| 249 | if (!has_mux()) | ||
| 250 | return -ENOSYS; /* not implemented */ | ||
| 251 | if (nmi_multiplex_on() < 0) | ||
| 252 | return -EINVAL; /* not necessary */ | ||
| 253 | |||
| 254 | on_each_cpu(nmi_cpu_switch, NULL, 1); | ||
| 255 | |||
| 256 | return 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | static inline void mux_init(struct oprofile_operations *ops) | ||
| 260 | { | ||
| 261 | if (has_mux()) | ||
| 262 | ops->switch_events = nmi_switch_event; | ||
| 263 | } | ||
| 264 | |||
| 265 | static void mux_clone(int cpu) | ||
| 266 | { | ||
| 267 | if (!has_mux()) | ||
| 268 | return; | ||
| 269 | |||
| 270 | memcpy(per_cpu(cpu_msrs, cpu).multiplex, | ||
| 271 | per_cpu(cpu_msrs, 0).multiplex, | ||
| 272 | sizeof(struct op_msr) * model->num_virt_counters); | ||
| 83 | } | 273 | } |
| 84 | 274 | ||
| 275 | #else | ||
| 276 | |||
| 277 | inline int op_x86_phys_to_virt(int phys) { return phys; } | ||
| 278 | inline int op_x86_virt_to_phys(int virt) { return virt; } | ||
| 279 | static inline void nmi_shutdown_mux(void) { } | ||
| 280 | static inline int nmi_setup_mux(void) { return 1; } | ||
| 281 | static inline void | ||
| 282 | nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } | ||
| 283 | static inline void mux_init(struct oprofile_operations *ops) { } | ||
| 284 | static void mux_clone(int cpu) { } | ||
| 285 | |||
| 286 | #endif | ||
| 287 | |||
| 85 | static void free_msrs(void) | 288 | static void free_msrs(void) |
| 86 | { | 289 | { |
| 87 | int i; | 290 | int i; |
| @@ -95,38 +298,32 @@ static void free_msrs(void) | |||
| 95 | 298 | ||
| 96 | static int allocate_msrs(void) | 299 | static int allocate_msrs(void) |
| 97 | { | 300 | { |
| 98 | int success = 1; | ||
| 99 | size_t controls_size = sizeof(struct op_msr) * model->num_controls; | 301 | size_t controls_size = sizeof(struct op_msr) * model->num_controls; |
| 100 | size_t counters_size = sizeof(struct op_msr) * model->num_counters; | 302 | size_t counters_size = sizeof(struct op_msr) * model->num_counters; |
| 101 | 303 | ||
| 102 | int i; | 304 | int i; |
| 103 | for_each_possible_cpu(i) { | 305 | for_each_possible_cpu(i) { |
| 104 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, | 306 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, |
| 105 | GFP_KERNEL); | 307 | GFP_KERNEL); |
| 106 | if (!per_cpu(cpu_msrs, i).counters) { | 308 | if (!per_cpu(cpu_msrs, i).counters) |
| 107 | success = 0; | 309 | return 0; |
| 108 | break; | ||
| 109 | } | ||
| 110 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, | 310 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, |
| 111 | GFP_KERNEL); | 311 | GFP_KERNEL); |
| 112 | if (!per_cpu(cpu_msrs, i).controls) { | 312 | if (!per_cpu(cpu_msrs, i).controls) |
| 113 | success = 0; | 313 | return 0; |
| 114 | break; | ||
| 115 | } | ||
| 116 | } | 314 | } |
| 117 | 315 | ||
| 118 | if (!success) | 316 | return 1; |
| 119 | free_msrs(); | ||
| 120 | |||
| 121 | return success; | ||
| 122 | } | 317 | } |
| 123 | 318 | ||
| 124 | static void nmi_cpu_setup(void *dummy) | 319 | static void nmi_cpu_setup(void *dummy) |
| 125 | { | 320 | { |
| 126 | int cpu = smp_processor_id(); | 321 | int cpu = smp_processor_id(); |
| 127 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); | 322 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); |
| 323 | nmi_cpu_save_registers(msrs); | ||
| 128 | spin_lock(&oprofilefs_lock); | 324 | spin_lock(&oprofilefs_lock); |
| 129 | model->setup_ctrs(msrs); | 325 | model->setup_ctrs(model, msrs); |
| 326 | nmi_cpu_setup_mux(cpu, msrs); | ||
| 130 | spin_unlock(&oprofilefs_lock); | 327 | spin_unlock(&oprofilefs_lock); |
| 131 | per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); | 328 | per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); |
| 132 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 329 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| @@ -144,11 +341,15 @@ static int nmi_setup(void) | |||
| 144 | int cpu; | 341 | int cpu; |
| 145 | 342 | ||
| 146 | if (!allocate_msrs()) | 343 | if (!allocate_msrs()) |
| 147 | return -ENOMEM; | 344 | err = -ENOMEM; |
| 345 | else if (!nmi_setup_mux()) | ||
| 346 | err = -ENOMEM; | ||
| 347 | else | ||
| 348 | err = register_die_notifier(&profile_exceptions_nb); | ||
| 148 | 349 | ||
| 149 | err = register_die_notifier(&profile_exceptions_nb); | ||
| 150 | if (err) { | 350 | if (err) { |
| 151 | free_msrs(); | 351 | free_msrs(); |
| 352 | nmi_shutdown_mux(); | ||
| 152 | return err; | 353 | return err; |
| 153 | } | 354 | } |
| 154 | 355 | ||
| @@ -159,45 +360,38 @@ static int nmi_setup(void) | |||
| 159 | /* Assume saved/restored counters are the same on all CPUs */ | 360 | /* Assume saved/restored counters are the same on all CPUs */ |
| 160 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | 361 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); |
| 161 | for_each_possible_cpu(cpu) { | 362 | for_each_possible_cpu(cpu) { |
| 162 | if (cpu != 0) { | 363 | if (!cpu) |
| 163 | memcpy(per_cpu(cpu_msrs, cpu).counters, | 364 | continue; |
| 164 | per_cpu(cpu_msrs, 0).counters, | 365 | |
| 165 | sizeof(struct op_msr) * model->num_counters); | 366 | memcpy(per_cpu(cpu_msrs, cpu).counters, |
| 166 | 367 | per_cpu(cpu_msrs, 0).counters, | |
| 167 | memcpy(per_cpu(cpu_msrs, cpu).controls, | 368 | sizeof(struct op_msr) * model->num_counters); |
| 168 | per_cpu(cpu_msrs, 0).controls, | 369 | |
| 169 | sizeof(struct op_msr) * model->num_controls); | 370 | memcpy(per_cpu(cpu_msrs, cpu).controls, |
| 170 | } | 371 | per_cpu(cpu_msrs, 0).controls, |
| 372 | sizeof(struct op_msr) * model->num_controls); | ||
| 171 | 373 | ||
| 374 | mux_clone(cpu); | ||
| 172 | } | 375 | } |
| 173 | on_each_cpu(nmi_save_registers, NULL, 1); | ||
| 174 | on_each_cpu(nmi_cpu_setup, NULL, 1); | 376 | on_each_cpu(nmi_cpu_setup, NULL, 1); |
| 175 | nmi_enabled = 1; | 377 | nmi_enabled = 1; |
| 176 | return 0; | 378 | return 0; |
| 177 | } | 379 | } |
| 178 | 380 | ||
| 179 | static void nmi_restore_registers(struct op_msrs *msrs) | 381 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) |
| 180 | { | 382 | { |
| 181 | unsigned int const nr_ctrs = model->num_counters; | ||
| 182 | unsigned int const nr_ctrls = model->num_controls; | ||
| 183 | struct op_msr *counters = msrs->counters; | 383 | struct op_msr *counters = msrs->counters; |
| 184 | struct op_msr *controls = msrs->controls; | 384 | struct op_msr *controls = msrs->controls; |
| 185 | unsigned int i; | 385 | unsigned int i; |
| 186 | 386 | ||
| 187 | for (i = 0; i < nr_ctrls; ++i) { | 387 | for (i = 0; i < model->num_controls; ++i) { |
| 188 | if (controls[i].addr) { | 388 | if (controls[i].addr) |
| 189 | wrmsr(controls[i].addr, | 389 | wrmsrl(controls[i].addr, controls[i].saved); |
| 190 | controls[i].saved.low, | ||
| 191 | controls[i].saved.high); | ||
| 192 | } | ||
| 193 | } | 390 | } |
| 194 | 391 | ||
| 195 | for (i = 0; i < nr_ctrs; ++i) { | 392 | for (i = 0; i < model->num_counters; ++i) { |
| 196 | if (counters[i].addr) { | 393 | if (counters[i].addr) |
| 197 | wrmsr(counters[i].addr, | 394 | wrmsrl(counters[i].addr, counters[i].saved); |
| 198 | counters[i].saved.low, | ||
| 199 | counters[i].saved.high); | ||
| 200 | } | ||
| 201 | } | 395 | } |
| 202 | } | 396 | } |
| 203 | 397 | ||
| @@ -205,7 +399,7 @@ static void nmi_cpu_shutdown(void *dummy) | |||
| 205 | { | 399 | { |
| 206 | unsigned int v; | 400 | unsigned int v; |
| 207 | int cpu = smp_processor_id(); | 401 | int cpu = smp_processor_id(); |
| 208 | struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); | 402 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); |
| 209 | 403 | ||
| 210 | /* restoring APIC_LVTPC can trigger an apic error because the delivery | 404 | /* restoring APIC_LVTPC can trigger an apic error because the delivery |
| 211 | * mode and vector nr combination can be illegal. That's by design: on | 405 | * mode and vector nr combination can be illegal. That's by design: on |
| @@ -216,7 +410,7 @@ static void nmi_cpu_shutdown(void *dummy) | |||
| 216 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); | 410 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); |
| 217 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); | 411 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); |
| 218 | apic_write(APIC_LVTERR, v); | 412 | apic_write(APIC_LVTERR, v); |
| 219 | nmi_restore_registers(msrs); | 413 | nmi_cpu_restore_registers(msrs); |
| 220 | } | 414 | } |
| 221 | 415 | ||
| 222 | static void nmi_shutdown(void) | 416 | static void nmi_shutdown(void) |
| @@ -226,42 +420,18 @@ static void nmi_shutdown(void) | |||
| 226 | nmi_enabled = 0; | 420 | nmi_enabled = 0; |
| 227 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 421 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); |
| 228 | unregister_die_notifier(&profile_exceptions_nb); | 422 | unregister_die_notifier(&profile_exceptions_nb); |
| 423 | nmi_shutdown_mux(); | ||
| 229 | msrs = &get_cpu_var(cpu_msrs); | 424 | msrs = &get_cpu_var(cpu_msrs); |
| 230 | model->shutdown(msrs); | 425 | model->shutdown(msrs); |
| 231 | free_msrs(); | 426 | free_msrs(); |
| 232 | put_cpu_var(cpu_msrs); | 427 | put_cpu_var(cpu_msrs); |
| 233 | } | 428 | } |
| 234 | 429 | ||
| 235 | static void nmi_cpu_start(void *dummy) | ||
| 236 | { | ||
| 237 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
| 238 | model->start(msrs); | ||
| 239 | } | ||
| 240 | |||
| 241 | static int nmi_start(void) | ||
| 242 | { | ||
| 243 | on_each_cpu(nmi_cpu_start, NULL, 1); | ||
| 244 | return 0; | ||
| 245 | } | ||
| 246 | |||
| 247 | static void nmi_cpu_stop(void *dummy) | ||
| 248 | { | ||
| 249 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
| 250 | model->stop(msrs); | ||
| 251 | } | ||
| 252 | |||
| 253 | static void nmi_stop(void) | ||
| 254 | { | ||
| 255 | on_each_cpu(nmi_cpu_stop, NULL, 1); | ||
| 256 | } | ||
| 257 | |||
| 258 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | ||
| 259 | |||
| 260 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 430 | static int nmi_create_files(struct super_block *sb, struct dentry *root) |
| 261 | { | 431 | { |
| 262 | unsigned int i; | 432 | unsigned int i; |
| 263 | 433 | ||
| 264 | for (i = 0; i < model->num_counters; ++i) { | 434 | for (i = 0; i < model->num_virt_counters; ++i) { |
| 265 | struct dentry *dir; | 435 | struct dentry *dir; |
| 266 | char buf[4]; | 436 | char buf[4]; |
| 267 | 437 | ||
| @@ -270,7 +440,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
| 270 | * NOTE: assumes 1:1 mapping here (that counters are organized | 440 | * NOTE: assumes 1:1 mapping here (that counters are organized |
| 271 | * sequentially in their struct assignment). | 441 | * sequentially in their struct assignment). |
| 272 | */ | 442 | */ |
| 273 | if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) | 443 | if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i))) |
| 274 | continue; | 444 | continue; |
| 275 | 445 | ||
| 276 | snprintf(buf, sizeof(buf), "%d", i); | 446 | snprintf(buf, sizeof(buf), "%d", i); |
| @@ -402,6 +572,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); | |||
| 402 | static int __init ppro_init(char **cpu_type) | 572 | static int __init ppro_init(char **cpu_type) |
| 403 | { | 573 | { |
| 404 | __u8 cpu_model = boot_cpu_data.x86_model; | 574 | __u8 cpu_model = boot_cpu_data.x86_model; |
| 575 | struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ | ||
| 405 | 576 | ||
| 406 | if (force_arch_perfmon && cpu_has_arch_perfmon) | 577 | if (force_arch_perfmon && cpu_has_arch_perfmon) |
| 407 | return 0; | 578 | return 0; |
| @@ -428,7 +599,7 @@ static int __init ppro_init(char **cpu_type) | |||
| 428 | *cpu_type = "i386/core_2"; | 599 | *cpu_type = "i386/core_2"; |
| 429 | break; | 600 | break; |
| 430 | case 26: | 601 | case 26: |
| 431 | arch_perfmon_setup_counters(); | 602 | spec = &op_arch_perfmon_spec; |
| 432 | *cpu_type = "i386/core_i7"; | 603 | *cpu_type = "i386/core_i7"; |
| 433 | break; | 604 | break; |
| 434 | case 28: | 605 | case 28: |
| @@ -439,17 +610,7 @@ static int __init ppro_init(char **cpu_type) | |||
| 439 | return 0; | 610 | return 0; |
| 440 | } | 611 | } |
| 441 | 612 | ||
| 442 | model = &op_ppro_spec; | 613 | model = spec; |
| 443 | return 1; | ||
| 444 | } | ||
| 445 | |||
| 446 | static int __init arch_perfmon_init(char **cpu_type) | ||
| 447 | { | ||
| 448 | if (!cpu_has_arch_perfmon) | ||
| 449 | return 0; | ||
| 450 | *cpu_type = "i386/arch_perfmon"; | ||
| 451 | model = &op_arch_perfmon_spec; | ||
| 452 | arch_perfmon_setup_counters(); | ||
| 453 | return 1; | 614 | return 1; |
| 454 | } | 615 | } |
| 455 | 616 | ||
| @@ -471,27 +632,26 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
| 471 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ | 632 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ |
| 472 | 633 | ||
| 473 | switch (family) { | 634 | switch (family) { |
| 474 | default: | ||
| 475 | return -ENODEV; | ||
| 476 | case 6: | 635 | case 6: |
| 477 | model = &op_amd_spec; | ||
| 478 | cpu_type = "i386/athlon"; | 636 | cpu_type = "i386/athlon"; |
| 479 | break; | 637 | break; |
| 480 | case 0xf: | 638 | case 0xf: |
| 481 | model = &op_amd_spec; | 639 | /* |
| 482 | /* Actually it could be i386/hammer too, but give | 640 | * Actually it could be i386/hammer too, but |
| 483 | user space an consistent name. */ | 641 | * give user space an consistent name. |
| 642 | */ | ||
| 484 | cpu_type = "x86-64/hammer"; | 643 | cpu_type = "x86-64/hammer"; |
| 485 | break; | 644 | break; |
| 486 | case 0x10: | 645 | case 0x10: |
| 487 | model = &op_amd_spec; | ||
| 488 | cpu_type = "x86-64/family10"; | 646 | cpu_type = "x86-64/family10"; |
| 489 | break; | 647 | break; |
| 490 | case 0x11: | 648 | case 0x11: |
| 491 | model = &op_amd_spec; | ||
| 492 | cpu_type = "x86-64/family11h"; | 649 | cpu_type = "x86-64/family11h"; |
| 493 | break; | 650 | break; |
| 651 | default: | ||
| 652 | return -ENODEV; | ||
| 494 | } | 653 | } |
| 654 | model = &op_amd_spec; | ||
| 495 | break; | 655 | break; |
| 496 | 656 | ||
| 497 | case X86_VENDOR_INTEL: | 657 | case X86_VENDOR_INTEL: |
| @@ -510,8 +670,15 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
| 510 | break; | 670 | break; |
| 511 | } | 671 | } |
| 512 | 672 | ||
| 513 | if (!cpu_type && !arch_perfmon_init(&cpu_type)) | 673 | if (cpu_type) |
| 674 | break; | ||
| 675 | |||
| 676 | if (!cpu_has_arch_perfmon) | ||
| 514 | return -ENODEV; | 677 | return -ENODEV; |
| 678 | |||
| 679 | /* use arch perfmon as fallback */ | ||
| 680 | cpu_type = "i386/arch_perfmon"; | ||
| 681 | model = &op_arch_perfmon_spec; | ||
| 515 | break; | 682 | break; |
| 516 | 683 | ||
| 517 | default: | 684 | default: |
| @@ -522,18 +689,23 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
| 522 | register_cpu_notifier(&oprofile_cpu_nb); | 689 | register_cpu_notifier(&oprofile_cpu_nb); |
| 523 | #endif | 690 | #endif |
| 524 | /* default values, can be overwritten by model */ | 691 | /* default values, can be overwritten by model */ |
| 525 | ops->create_files = nmi_create_files; | 692 | ops->create_files = nmi_create_files; |
| 526 | ops->setup = nmi_setup; | 693 | ops->setup = nmi_setup; |
| 527 | ops->shutdown = nmi_shutdown; | 694 | ops->shutdown = nmi_shutdown; |
| 528 | ops->start = nmi_start; | 695 | ops->start = nmi_start; |
| 529 | ops->stop = nmi_stop; | 696 | ops->stop = nmi_stop; |
| 530 | ops->cpu_type = cpu_type; | 697 | ops->cpu_type = cpu_type; |
| 531 | 698 | ||
| 532 | if (model->init) | 699 | if (model->init) |
| 533 | ret = model->init(ops); | 700 | ret = model->init(ops); |
| 534 | if (ret) | 701 | if (ret) |
| 535 | return ret; | 702 | return ret; |
| 536 | 703 | ||
| 704 | if (!model->num_virt_counters) | ||
| 705 | model->num_virt_counters = model->num_counters; | ||
| 706 | |||
| 707 | mux_init(ops); | ||
| 708 | |||
| 537 | init_sysfs(); | 709 | init_sysfs(); |
| 538 | using_nmi = 1; | 710 | using_nmi = 1; |
| 539 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); | 711 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); |
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 91b6a116165e..e28398df0df2 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | #ifndef OP_COUNTER_H | 10 | #ifndef OP_COUNTER_H |
| 11 | #define OP_COUNTER_H | 11 | #define OP_COUNTER_H |
| 12 | 12 | ||
| 13 | #define OP_MAX_COUNTER 8 | 13 | #define OP_MAX_COUNTER 32 |
| 14 | 14 | ||
| 15 | /* Per-perfctr configuration as set via | 15 | /* Per-perfctr configuration as set via |
| 16 | * oprofilefs. | 16 | * oprofilefs. |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8fdf06e4edf9..39686c29f03a 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
| @@ -9,12 +9,15 @@ | |||
| 9 | * @author Philippe Elie | 9 | * @author Philippe Elie |
| 10 | * @author Graydon Hoare | 10 | * @author Graydon Hoare |
| 11 | * @author Robert Richter <robert.richter@amd.com> | 11 | * @author Robert Richter <robert.richter@amd.com> |
| 12 | * @author Barry Kasindorf | 12 | * @author Barry Kasindorf <barry.kasindorf@amd.com> |
| 13 | * @author Jason Yeh <jason.yeh@amd.com> | ||
| 14 | * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> | ||
| 13 | */ | 15 | */ |
| 14 | 16 | ||
| 15 | #include <linux/oprofile.h> | 17 | #include <linux/oprofile.h> |
| 16 | #include <linux/device.h> | 18 | #include <linux/device.h> |
| 17 | #include <linux/pci.h> | 19 | #include <linux/pci.h> |
| 20 | #include <linux/percpu.h> | ||
| 18 | 21 | ||
| 19 | #include <asm/ptrace.h> | 22 | #include <asm/ptrace.h> |
| 20 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
| @@ -25,43 +28,36 @@ | |||
| 25 | 28 | ||
| 26 | #define NUM_COUNTERS 4 | 29 | #define NUM_COUNTERS 4 |
| 27 | #define NUM_CONTROLS 4 | 30 | #define NUM_CONTROLS 4 |
| 31 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 32 | #define NUM_VIRT_COUNTERS 32 | ||
| 33 | #define NUM_VIRT_CONTROLS 32 | ||
| 34 | #else | ||
| 35 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | ||
| 36 | #define NUM_VIRT_CONTROLS NUM_CONTROLS | ||
| 37 | #endif | ||
| 38 | |||
| 39 | #define OP_EVENT_MASK 0x0FFF | ||
| 40 | #define OP_CTR_OVERFLOW (1ULL<<31) | ||
| 28 | 41 | ||
| 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 42 | #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) |
| 30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | 43 | |
| 31 | #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) | 44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; |
| 32 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
| 33 | |||
| 34 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
| 35 | #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
| 36 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
| 37 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
| 38 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
| 39 | #define CTRL_CLEAR_LO(x) (x &= (1<<21)) | ||
| 40 | #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) | ||
| 41 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
| 42 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) | ||
| 43 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | ||
| 44 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
| 45 | #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) | ||
| 46 | #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) | ||
| 47 | #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) | ||
| 48 | #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) | ||
| 49 | |||
| 50 | static unsigned long reset_value[NUM_COUNTERS]; | ||
| 51 | 45 | ||
| 52 | #ifdef CONFIG_OPROFILE_IBS | 46 | #ifdef CONFIG_OPROFILE_IBS |
| 53 | 47 | ||
| 54 | /* IbsFetchCtl bits/masks */ | 48 | /* IbsFetchCtl bits/masks */ |
| 55 | #define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ | 49 | #define IBS_FETCH_RAND_EN (1ULL<<57) |
| 56 | #define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ | 50 | #define IBS_FETCH_VAL (1ULL<<49) |
| 57 | #define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ | 51 | #define IBS_FETCH_ENABLE (1ULL<<48) |
| 52 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL | ||
| 58 | 53 | ||
| 59 | /*IbsOpCtl bits */ | 54 | /*IbsOpCtl bits */ |
| 60 | #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ | 55 | #define IBS_OP_CNT_CTL (1ULL<<19) |
| 61 | #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ | 56 | #define IBS_OP_VAL (1ULL<<18) |
| 57 | #define IBS_OP_ENABLE (1ULL<<17) | ||
| 62 | 58 | ||
| 63 | #define IBS_FETCH_SIZE 6 | 59 | #define IBS_FETCH_SIZE 6 |
| 64 | #define IBS_OP_SIZE 12 | 60 | #define IBS_OP_SIZE 12 |
| 65 | 61 | ||
| 66 | static int has_ibs; /* AMD Family10h and later */ | 62 | static int has_ibs; /* AMD Family10h and later */ |
| 67 | 63 | ||
| @@ -78,6 +74,45 @@ static struct op_ibs_config ibs_config; | |||
| 78 | 74 | ||
| 79 | #endif | 75 | #endif |
| 80 | 76 | ||
| 77 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 78 | |||
| 79 | static void op_mux_fill_in_addresses(struct op_msrs * const msrs) | ||
| 80 | { | ||
| 81 | int i; | ||
| 82 | |||
| 83 | for (i = 0; i < NUM_VIRT_COUNTERS; i++) { | ||
| 84 | int hw_counter = op_x86_virt_to_phys(i); | ||
| 85 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
| 86 | msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; | ||
| 87 | else | ||
| 88 | msrs->multiplex[i].addr = 0; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
| 93 | struct op_msrs const * const msrs) | ||
| 94 | { | ||
| 95 | u64 val; | ||
| 96 | int i; | ||
| 97 | |||
| 98 | /* enable active counters */ | ||
| 99 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 100 | int virt = op_x86_phys_to_virt(i); | ||
| 101 | if (!counter_config[virt].enabled) | ||
| 102 | continue; | ||
| 103 | rdmsrl(msrs->controls[i].addr, val); | ||
| 104 | val &= model->reserved; | ||
| 105 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
| 106 | wrmsrl(msrs->controls[i].addr, val); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | #else | ||
| 111 | |||
| 112 | static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } | ||
| 113 | |||
| 114 | #endif | ||
| 115 | |||
| 81 | /* functions for op_amd_spec */ | 116 | /* functions for op_amd_spec */ |
| 82 | 117 | ||
| 83 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | 118 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) |
| @@ -97,150 +132,174 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
| 97 | else | 132 | else |
| 98 | msrs->controls[i].addr = 0; | 133 | msrs->controls[i].addr = 0; |
| 99 | } | 134 | } |
| 100 | } | ||
| 101 | 135 | ||
| 136 | op_mux_fill_in_addresses(msrs); | ||
| 137 | } | ||
| 102 | 138 | ||
| 103 | static void op_amd_setup_ctrs(struct op_msrs const * const msrs) | 139 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, |
| 140 | struct op_msrs const * const msrs) | ||
| 104 | { | 141 | { |
| 105 | unsigned int low, high; | 142 | u64 val; |
| 106 | int i; | 143 | int i; |
| 107 | 144 | ||
| 145 | /* setup reset_value */ | ||
| 146 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
| 147 | if (counter_config[i].enabled) | ||
| 148 | reset_value[i] = counter_config[i].count; | ||
| 149 | else | ||
| 150 | reset_value[i] = 0; | ||
| 151 | } | ||
| 152 | |||
| 108 | /* clear all counters */ | 153 | /* clear all counters */ |
| 109 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 154 | for (i = 0; i < NUM_CONTROLS; ++i) { |
| 110 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 155 | if (unlikely(!msrs->controls[i].addr)) |
| 111 | continue; | 156 | continue; |
| 112 | CTRL_READ(low, high, msrs, i); | 157 | rdmsrl(msrs->controls[i].addr, val); |
| 113 | CTRL_CLEAR_LO(low); | 158 | val &= model->reserved; |
| 114 | CTRL_CLEAR_HI(high); | 159 | wrmsrl(msrs->controls[i].addr, val); |
| 115 | CTRL_WRITE(low, high, msrs, i); | ||
| 116 | } | 160 | } |
| 117 | 161 | ||
| 118 | /* avoid a false detection of ctr overflows in NMI handler */ | 162 | /* avoid a false detection of ctr overflows in NMI handler */ |
| 119 | for (i = 0; i < NUM_COUNTERS; ++i) { | 163 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 120 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 164 | if (unlikely(!msrs->counters[i].addr)) |
| 121 | continue; | 165 | continue; |
| 122 | CTR_WRITE(1, msrs, i); | 166 | wrmsrl(msrs->counters[i].addr, -1LL); |
| 123 | } | 167 | } |
| 124 | 168 | ||
| 125 | /* enable active counters */ | 169 | /* enable active counters */ |
| 126 | for (i = 0; i < NUM_COUNTERS; ++i) { | 170 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 127 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 171 | int virt = op_x86_phys_to_virt(i); |
| 128 | reset_value[i] = counter_config[i].count; | 172 | if (!counter_config[virt].enabled) |
| 173 | continue; | ||
| 174 | if (!msrs->counters[i].addr) | ||
| 175 | continue; | ||
| 129 | 176 | ||
| 130 | CTR_WRITE(counter_config[i].count, msrs, i); | 177 | /* setup counter registers */ |
| 131 | 178 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | |
| 132 | CTRL_READ(low, high, msrs, i); | 179 | |
| 133 | CTRL_CLEAR_LO(low); | 180 | /* setup control registers */ |
| 134 | CTRL_CLEAR_HI(high); | 181 | rdmsrl(msrs->controls[i].addr, val); |
| 135 | CTRL_SET_ENABLE(low); | 182 | val &= model->reserved; |
| 136 | CTRL_SET_USR(low, counter_config[i].user); | 183 | val |= op_x86_get_ctrl(model, &counter_config[virt]); |
| 137 | CTRL_SET_KERN(low, counter_config[i].kernel); | 184 | wrmsrl(msrs->controls[i].addr, val); |
| 138 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
| 139 | CTRL_SET_EVENT_LOW(low, counter_config[i].event); | ||
| 140 | CTRL_SET_EVENT_HIGH(high, counter_config[i].event); | ||
| 141 | CTRL_SET_HOST_ONLY(high, 0); | ||
| 142 | CTRL_SET_GUEST_ONLY(high, 0); | ||
| 143 | |||
| 144 | CTRL_WRITE(low, high, msrs, i); | ||
| 145 | } else { | ||
| 146 | reset_value[i] = 0; | ||
| 147 | } | ||
| 148 | } | 185 | } |
| 149 | } | 186 | } |
| 150 | 187 | ||
| 151 | #ifdef CONFIG_OPROFILE_IBS | 188 | #ifdef CONFIG_OPROFILE_IBS |
| 152 | 189 | ||
| 153 | static inline int | 190 | static inline void |
| 154 | op_amd_handle_ibs(struct pt_regs * const regs, | 191 | op_amd_handle_ibs(struct pt_regs * const regs, |
| 155 | struct op_msrs const * const msrs) | 192 | struct op_msrs const * const msrs) |
| 156 | { | 193 | { |
| 157 | u32 low, high; | 194 | u64 val, ctl; |
| 158 | u64 msr; | ||
| 159 | struct op_entry entry; | 195 | struct op_entry entry; |
| 160 | 196 | ||
| 161 | if (!has_ibs) | 197 | if (!has_ibs) |
| 162 | return 1; | 198 | return; |
| 163 | 199 | ||
| 164 | if (ibs_config.fetch_enabled) { | 200 | if (ibs_config.fetch_enabled) { |
| 165 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 201 | rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); |
| 166 | if (high & IBS_FETCH_HIGH_VALID_BIT) { | 202 | if (ctl & IBS_FETCH_VAL) { |
| 167 | rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); | 203 | rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); |
| 168 | oprofile_write_reserve(&entry, regs, msr, | 204 | oprofile_write_reserve(&entry, regs, val, |
| 169 | IBS_FETCH_CODE, IBS_FETCH_SIZE); | 205 | IBS_FETCH_CODE, IBS_FETCH_SIZE); |
| 170 | oprofile_add_data(&entry, (u32)msr); | 206 | oprofile_add_data64(&entry, val); |
| 171 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 207 | oprofile_add_data64(&entry, ctl); |
| 172 | oprofile_add_data(&entry, low); | 208 | rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); |
| 173 | oprofile_add_data(&entry, high); | 209 | oprofile_add_data64(&entry, val); |
| 174 | rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); | ||
| 175 | oprofile_add_data(&entry, (u32)msr); | ||
| 176 | oprofile_add_data(&entry, (u32)(msr >> 32)); | ||
| 177 | oprofile_write_commit(&entry); | 210 | oprofile_write_commit(&entry); |
| 178 | 211 | ||
| 179 | /* reenable the IRQ */ | 212 | /* reenable the IRQ */ |
| 180 | high &= ~IBS_FETCH_HIGH_VALID_BIT; | 213 | ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); |
| 181 | high |= IBS_FETCH_HIGH_ENABLE; | 214 | ctl |= IBS_FETCH_ENABLE; |
| 182 | low &= IBS_FETCH_LOW_MAX_CNT_MASK; | 215 | wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); |
| 183 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
| 184 | } | 216 | } |
| 185 | } | 217 | } |
| 186 | 218 | ||
| 187 | if (ibs_config.op_enabled) { | 219 | if (ibs_config.op_enabled) { |
| 188 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | 220 | rdmsrl(MSR_AMD64_IBSOPCTL, ctl); |
| 189 | if (low & IBS_OP_LOW_VALID_BIT) { | 221 | if (ctl & IBS_OP_VAL) { |
| 190 | rdmsrl(MSR_AMD64_IBSOPRIP, msr); | 222 | rdmsrl(MSR_AMD64_IBSOPRIP, val); |
| 191 | oprofile_write_reserve(&entry, regs, msr, | 223 | oprofile_write_reserve(&entry, regs, val, |
| 192 | IBS_OP_CODE, IBS_OP_SIZE); | 224 | IBS_OP_CODE, IBS_OP_SIZE); |
| 193 | oprofile_add_data(&entry, (u32)msr); | 225 | oprofile_add_data64(&entry, val); |
| 194 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 226 | rdmsrl(MSR_AMD64_IBSOPDATA, val); |
| 195 | rdmsrl(MSR_AMD64_IBSOPDATA, msr); | 227 | oprofile_add_data64(&entry, val); |
| 196 | oprofile_add_data(&entry, (u32)msr); | 228 | rdmsrl(MSR_AMD64_IBSOPDATA2, val); |
| 197 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 229 | oprofile_add_data64(&entry, val); |
| 198 | rdmsrl(MSR_AMD64_IBSOPDATA2, msr); | 230 | rdmsrl(MSR_AMD64_IBSOPDATA3, val); |
| 199 | oprofile_add_data(&entry, (u32)msr); | 231 | oprofile_add_data64(&entry, val); |
| 200 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 232 | rdmsrl(MSR_AMD64_IBSDCLINAD, val); |
| 201 | rdmsrl(MSR_AMD64_IBSOPDATA3, msr); | 233 | oprofile_add_data64(&entry, val); |
| 202 | oprofile_add_data(&entry, (u32)msr); | 234 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); |
| 203 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 235 | oprofile_add_data64(&entry, val); |
| 204 | rdmsrl(MSR_AMD64_IBSDCLINAD, msr); | ||
| 205 | oprofile_add_data(&entry, (u32)msr); | ||
| 206 | oprofile_add_data(&entry, (u32)(msr >> 32)); | ||
| 207 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); | ||
| 208 | oprofile_add_data(&entry, (u32)msr); | ||
| 209 | oprofile_add_data(&entry, (u32)(msr >> 32)); | ||
| 210 | oprofile_write_commit(&entry); | 236 | oprofile_write_commit(&entry); |
| 211 | 237 | ||
| 212 | /* reenable the IRQ */ | 238 | /* reenable the IRQ */ |
| 213 | high = 0; | 239 | ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; |
| 214 | low &= ~IBS_OP_LOW_VALID_BIT; | 240 | ctl |= IBS_OP_ENABLE; |
| 215 | low |= IBS_OP_LOW_ENABLE; | 241 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); |
| 216 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
| 217 | } | 242 | } |
| 218 | } | 243 | } |
| 244 | } | ||
| 219 | 245 | ||
| 220 | return 1; | 246 | static inline void op_amd_start_ibs(void) |
| 247 | { | ||
| 248 | u64 val; | ||
| 249 | if (has_ibs && ibs_config.fetch_enabled) { | ||
| 250 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | ||
| 251 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; | ||
| 252 | val |= IBS_FETCH_ENABLE; | ||
| 253 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); | ||
| 254 | } | ||
| 255 | |||
| 256 | if (has_ibs && ibs_config.op_enabled) { | ||
| 257 | val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; | ||
| 258 | val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; | ||
| 259 | val |= IBS_OP_ENABLE; | ||
| 260 | wrmsrl(MSR_AMD64_IBSOPCTL, val); | ||
| 261 | } | ||
| 262 | } | ||
| 263 | |||
| 264 | static void op_amd_stop_ibs(void) | ||
| 265 | { | ||
| 266 | if (has_ibs && ibs_config.fetch_enabled) | ||
| 267 | /* clear max count and enable */ | ||
| 268 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); | ||
| 269 | |||
| 270 | if (has_ibs && ibs_config.op_enabled) | ||
| 271 | /* clear max count and enable */ | ||
| 272 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | ||
| 221 | } | 273 | } |
| 222 | 274 | ||
| 275 | #else | ||
| 276 | |||
| 277 | static inline void op_amd_handle_ibs(struct pt_regs * const regs, | ||
| 278 | struct op_msrs const * const msrs) { } | ||
| 279 | static inline void op_amd_start_ibs(void) { } | ||
| 280 | static inline void op_amd_stop_ibs(void) { } | ||
| 281 | |||
| 223 | #endif | 282 | #endif |
| 224 | 283 | ||
| 225 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 284 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
| 226 | struct op_msrs const * const msrs) | 285 | struct op_msrs const * const msrs) |
| 227 | { | 286 | { |
| 228 | unsigned int low, high; | 287 | u64 val; |
| 229 | int i; | 288 | int i; |
| 230 | 289 | ||
| 231 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 290 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 232 | if (!reset_value[i]) | 291 | int virt = op_x86_phys_to_virt(i); |
| 292 | if (!reset_value[virt]) | ||
| 233 | continue; | 293 | continue; |
| 234 | CTR_READ(low, high, msrs, i); | 294 | rdmsrl(msrs->counters[i].addr, val); |
| 235 | if (CTR_OVERFLOWED(low)) { | 295 | /* bit is clear if overflowed: */ |
| 236 | oprofile_add_sample(regs, i); | 296 | if (val & OP_CTR_OVERFLOW) |
| 237 | CTR_WRITE(reset_value[i], msrs, i); | 297 | continue; |
| 238 | } | 298 | oprofile_add_sample(regs, virt); |
| 299 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
| 239 | } | 300 | } |
| 240 | 301 | ||
| 241 | #ifdef CONFIG_OPROFILE_IBS | ||
| 242 | op_amd_handle_ibs(regs, msrs); | 302 | op_amd_handle_ibs(regs, msrs); |
| 243 | #endif | ||
| 244 | 303 | ||
| 245 | /* See op_model_ppro.c */ | 304 | /* See op_model_ppro.c */ |
| 246 | return 1; | 305 | return 1; |
| @@ -248,79 +307,50 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, | |||
| 248 | 307 | ||
| 249 | static void op_amd_start(struct op_msrs const * const msrs) | 308 | static void op_amd_start(struct op_msrs const * const msrs) |
| 250 | { | 309 | { |
| 251 | unsigned int low, high; | 310 | u64 val; |
| 252 | int i; | 311 | int i; |
| 253 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
| 254 | if (reset_value[i]) { | ||
| 255 | CTRL_READ(low, high, msrs, i); | ||
| 256 | CTRL_SET_ACTIVE(low); | ||
| 257 | CTRL_WRITE(low, high, msrs, i); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | 312 | ||
| 261 | #ifdef CONFIG_OPROFILE_IBS | 313 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 262 | if (has_ibs && ibs_config.fetch_enabled) { | 314 | if (!reset_value[op_x86_phys_to_virt(i)]) |
| 263 | low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | 315 | continue; |
| 264 | high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ | 316 | rdmsrl(msrs->controls[i].addr, val); |
| 265 | + IBS_FETCH_HIGH_ENABLE; | 317 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
| 266 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 318 | wrmsrl(msrs->controls[i].addr, val); |
| 267 | } | 319 | } |
| 268 | 320 | ||
| 269 | if (has_ibs && ibs_config.op_enabled) { | 321 | op_amd_start_ibs(); |
| 270 | low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) | ||
| 271 | + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ | ||
| 272 | + IBS_OP_LOW_ENABLE; | ||
| 273 | high = 0; | ||
| 274 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
| 275 | } | ||
| 276 | #endif | ||
| 277 | } | 322 | } |
| 278 | 323 | ||
| 279 | |||
| 280 | static void op_amd_stop(struct op_msrs const * const msrs) | 324 | static void op_amd_stop(struct op_msrs const * const msrs) |
| 281 | { | 325 | { |
| 282 | unsigned int low, high; | 326 | u64 val; |
| 283 | int i; | 327 | int i; |
| 284 | 328 | ||
| 285 | /* | 329 | /* |
| 286 | * Subtle: stop on all counters to avoid race with setting our | 330 | * Subtle: stop on all counters to avoid race with setting our |
| 287 | * pm callback | 331 | * pm callback |
| 288 | */ | 332 | */ |
| 289 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 333 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 290 | if (!reset_value[i]) | 334 | if (!reset_value[op_x86_phys_to_virt(i)]) |
| 291 | continue; | 335 | continue; |
| 292 | CTRL_READ(low, high, msrs, i); | 336 | rdmsrl(msrs->controls[i].addr, val); |
| 293 | CTRL_SET_INACTIVE(low); | 337 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; |
| 294 | CTRL_WRITE(low, high, msrs, i); | 338 | wrmsrl(msrs->controls[i].addr, val); |
| 295 | } | ||
| 296 | |||
| 297 | #ifdef CONFIG_OPROFILE_IBS | ||
| 298 | if (has_ibs && ibs_config.fetch_enabled) { | ||
| 299 | /* clear max count and enable */ | ||
| 300 | low = 0; | ||
| 301 | high = 0; | ||
| 302 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
| 303 | } | 339 | } |
| 304 | 340 | ||
| 305 | if (has_ibs && ibs_config.op_enabled) { | 341 | op_amd_stop_ibs(); |
| 306 | /* clear max count and enable */ | ||
| 307 | low = 0; | ||
| 308 | high = 0; | ||
| 309 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
| 310 | } | ||
| 311 | #endif | ||
| 312 | } | 342 | } |
| 313 | 343 | ||
| 314 | static void op_amd_shutdown(struct op_msrs const * const msrs) | 344 | static void op_amd_shutdown(struct op_msrs const * const msrs) |
| 315 | { | 345 | { |
| 316 | int i; | 346 | int i; |
| 317 | 347 | ||
| 318 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 348 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 319 | if (CTR_IS_RESERVED(msrs, i)) | 349 | if (msrs->counters[i].addr) |
| 320 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | 350 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); |
| 321 | } | 351 | } |
| 322 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | 352 | for (i = 0; i < NUM_CONTROLS; ++i) { |
| 323 | if (CTRL_IS_RESERVED(msrs, i)) | 353 | if (msrs->controls[i].addr) |
| 324 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | 354 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); |
| 325 | } | 355 | } |
| 326 | } | 356 | } |
| @@ -490,15 +520,21 @@ static void op_amd_exit(void) {} | |||
| 490 | 520 | ||
| 491 | #endif /* CONFIG_OPROFILE_IBS */ | 521 | #endif /* CONFIG_OPROFILE_IBS */ |
| 492 | 522 | ||
| 493 | struct op_x86_model_spec const op_amd_spec = { | 523 | struct op_x86_model_spec op_amd_spec = { |
| 494 | .init = op_amd_init, | ||
| 495 | .exit = op_amd_exit, | ||
| 496 | .num_counters = NUM_COUNTERS, | 524 | .num_counters = NUM_COUNTERS, |
| 497 | .num_controls = NUM_CONTROLS, | 525 | .num_controls = NUM_CONTROLS, |
| 526 | .num_virt_counters = NUM_VIRT_COUNTERS, | ||
| 527 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | ||
| 528 | .event_mask = OP_EVENT_MASK, | ||
| 529 | .init = op_amd_init, | ||
| 530 | .exit = op_amd_exit, | ||
| 498 | .fill_in_addresses = &op_amd_fill_in_addresses, | 531 | .fill_in_addresses = &op_amd_fill_in_addresses, |
| 499 | .setup_ctrs = &op_amd_setup_ctrs, | 532 | .setup_ctrs = &op_amd_setup_ctrs, |
| 500 | .check_ctrs = &op_amd_check_ctrs, | 533 | .check_ctrs = &op_amd_check_ctrs, |
| 501 | .start = &op_amd_start, | 534 | .start = &op_amd_start, |
| 502 | .stop = &op_amd_stop, | 535 | .stop = &op_amd_stop, |
| 503 | .shutdown = &op_amd_shutdown | 536 | .shutdown = &op_amd_shutdown, |
| 537 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 538 | .switch_ctrl = &op_mux_switch_ctrl, | ||
| 539 | #endif | ||
| 504 | }; | 540 | }; |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 819b131fd752..ac6b354becdf 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
| @@ -32,6 +32,8 @@ | |||
| 32 | #define NUM_CCCRS_HT2 9 | 32 | #define NUM_CCCRS_HT2 9 |
| 33 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) | 33 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) |
| 34 | 34 | ||
| 35 | #define OP_CTR_OVERFLOW (1ULL<<31) | ||
| 36 | |||
| 35 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; | 37 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; |
| 36 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; | 38 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; |
| 37 | 39 | ||
| @@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { | |||
| 350 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) | 352 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) |
| 351 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) | 353 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) |
| 352 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) | 354 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) |
| 353 | #define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) | ||
| 354 | #define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) | ||
| 355 | 355 | ||
| 356 | #define CCCR_RESERVED_BITS 0x38030FFF | 356 | #define CCCR_RESERVED_BITS 0x38030FFF |
| 357 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) | 357 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) |
| @@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { | |||
| 361 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) | 361 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) |
| 362 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) | 362 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) |
| 363 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) | 363 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) |
| 364 | #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) | ||
| 365 | #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) | ||
| 366 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) | 364 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) |
| 367 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) | 365 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) |
| 368 | 366 | ||
| 369 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
| 370 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | ||
| 371 | #define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) | ||
| 372 | #define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) | ||
| 373 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) | ||
| 374 | |||
| 375 | 367 | ||
| 376 | /* this assigns a "stagger" to the current CPU, which is used throughout | 368 | /* this assigns a "stagger" to the current CPU, which is used throughout |
| 377 | the code in this module as an extra array offset, to select the "even" | 369 | the code in this module as an extra array offset, to select the "even" |
| @@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
| 515 | if (ev->bindings[i].virt_counter & counter_bit) { | 507 | if (ev->bindings[i].virt_counter & counter_bit) { |
| 516 | 508 | ||
| 517 | /* modify ESCR */ | 509 | /* modify ESCR */ |
| 518 | ESCR_READ(escr, high, ev, i); | 510 | rdmsr(ev->bindings[i].escr_address, escr, high); |
| 519 | ESCR_CLEAR(escr); | 511 | ESCR_CLEAR(escr); |
| 520 | if (stag == 0) { | 512 | if (stag == 0) { |
| 521 | ESCR_SET_USR_0(escr, counter_config[ctr].user); | 513 | ESCR_SET_USR_0(escr, counter_config[ctr].user); |
| @@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
| 526 | } | 518 | } |
| 527 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); | 519 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); |
| 528 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); | 520 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); |
| 529 | ESCR_WRITE(escr, high, ev, i); | 521 | wrmsr(ev->bindings[i].escr_address, escr, high); |
| 530 | 522 | ||
| 531 | /* modify CCCR */ | 523 | /* modify CCCR */ |
| 532 | CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); | 524 | rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, |
| 525 | cccr, high); | ||
| 533 | CCCR_CLEAR(cccr); | 526 | CCCR_CLEAR(cccr); |
| 534 | CCCR_SET_REQUIRED_BITS(cccr); | 527 | CCCR_SET_REQUIRED_BITS(cccr); |
| 535 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); | 528 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); |
| @@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
| 537 | CCCR_SET_PMI_OVF_0(cccr); | 530 | CCCR_SET_PMI_OVF_0(cccr); |
| 538 | else | 531 | else |
| 539 | CCCR_SET_PMI_OVF_1(cccr); | 532 | CCCR_SET_PMI_OVF_1(cccr); |
| 540 | CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); | 533 | wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, |
| 534 | cccr, high); | ||
| 541 | return; | 535 | return; |
| 542 | } | 536 | } |
| 543 | } | 537 | } |
| @@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
| 548 | } | 542 | } |
| 549 | 543 | ||
| 550 | 544 | ||
| 551 | static void p4_setup_ctrs(struct op_msrs const * const msrs) | 545 | static void p4_setup_ctrs(struct op_x86_model_spec const *model, |
| 546 | struct op_msrs const * const msrs) | ||
| 552 | { | 547 | { |
| 553 | unsigned int i; | 548 | unsigned int i; |
| 554 | unsigned int low, high; | 549 | unsigned int low, high; |
| @@ -563,8 +558,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
| 563 | } | 558 | } |
| 564 | 559 | ||
| 565 | /* clear the cccrs we will use */ | 560 | /* clear the cccrs we will use */ |
| 566 | for (i = 0 ; i < num_counters ; i++) { | 561 | for (i = 0; i < num_counters; i++) { |
| 567 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 562 | if (unlikely(!msrs->controls[i].addr)) |
| 568 | continue; | 563 | continue; |
| 569 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | 564 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
| 570 | CCCR_CLEAR(low); | 565 | CCCR_CLEAR(low); |
| @@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
| 574 | 569 | ||
| 575 | /* clear all escrs (including those outside our concern) */ | 570 | /* clear all escrs (including those outside our concern) */ |
| 576 | for (i = num_counters; i < num_controls; i++) { | 571 | for (i = num_counters; i < num_controls; i++) { |
| 577 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 572 | if (unlikely(!msrs->controls[i].addr)) |
| 578 | continue; | 573 | continue; |
| 579 | wrmsr(msrs->controls[i].addr, 0, 0); | 574 | wrmsr(msrs->controls[i].addr, 0, 0); |
| 580 | } | 575 | } |
| 581 | 576 | ||
| 582 | /* setup all counters */ | 577 | /* setup all counters */ |
| 583 | for (i = 0 ; i < num_counters ; ++i) { | 578 | for (i = 0; i < num_counters; ++i) { |
| 584 | if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { | 579 | if (counter_config[i].enabled && msrs->controls[i].addr) { |
| 585 | reset_value[i] = counter_config[i].count; | 580 | reset_value[i] = counter_config[i].count; |
| 586 | pmc_setup_one_p4_counter(i); | 581 | pmc_setup_one_p4_counter(i); |
| 587 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); | 582 | wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, |
| 583 | -(u64)counter_config[i].count); | ||
| 588 | } else { | 584 | } else { |
| 589 | reset_value[i] = 0; | 585 | reset_value[i] = 0; |
| 590 | } | 586 | } |
| @@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, | |||
| 624 | 620 | ||
| 625 | real = VIRT_CTR(stag, i); | 621 | real = VIRT_CTR(stag, i); |
| 626 | 622 | ||
| 627 | CCCR_READ(low, high, real); | 623 | rdmsr(p4_counters[real].cccr_address, low, high); |
| 628 | CTR_READ(ctr, high, real); | 624 | rdmsr(p4_counters[real].counter_address, ctr, high); |
| 629 | if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { | 625 | if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { |
| 630 | oprofile_add_sample(regs, i); | 626 | oprofile_add_sample(regs, i); |
| 631 | CTR_WRITE(reset_value[i], real); | 627 | wrmsrl(p4_counters[real].counter_address, |
| 628 | -(u64)reset_value[i]); | ||
| 632 | CCCR_CLEAR_OVF(low); | 629 | CCCR_CLEAR_OVF(low); |
| 633 | CCCR_WRITE(low, high, real); | 630 | wrmsr(p4_counters[real].cccr_address, low, high); |
| 634 | CTR_WRITE(reset_value[i], real); | 631 | wrmsrl(p4_counters[real].counter_address, |
| 632 | -(u64)reset_value[i]); | ||
| 635 | } | 633 | } |
| 636 | } | 634 | } |
| 637 | 635 | ||
| @@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs) | |||
| 653 | for (i = 0; i < num_counters; ++i) { | 651 | for (i = 0; i < num_counters; ++i) { |
| 654 | if (!reset_value[i]) | 652 | if (!reset_value[i]) |
| 655 | continue; | 653 | continue; |
| 656 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | 654 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
| 657 | CCCR_SET_ENABLE(low); | 655 | CCCR_SET_ENABLE(low); |
| 658 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | 656 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
| 659 | } | 657 | } |
| 660 | } | 658 | } |
| 661 | 659 | ||
| @@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
| 670 | for (i = 0; i < num_counters; ++i) { | 668 | for (i = 0; i < num_counters; ++i) { |
| 671 | if (!reset_value[i]) | 669 | if (!reset_value[i]) |
| 672 | continue; | 670 | continue; |
| 673 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | 671 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
| 674 | CCCR_SET_DISABLE(low); | 672 | CCCR_SET_DISABLE(low); |
| 675 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | 673 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
| 676 | } | 674 | } |
| 677 | } | 675 | } |
| 678 | 676 | ||
| @@ -680,8 +678,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) | |||
| 680 | { | 678 | { |
| 681 | int i; | 679 | int i; |
| 682 | 680 | ||
| 683 | for (i = 0 ; i < num_counters ; ++i) { | 681 | for (i = 0; i < num_counters; ++i) { |
| 684 | if (CTR_IS_RESERVED(msrs, i)) | 682 | if (msrs->counters[i].addr) |
| 685 | release_perfctr_nmi(msrs->counters[i].addr); | 683 | release_perfctr_nmi(msrs->counters[i].addr); |
| 686 | } | 684 | } |
| 687 | /* | 685 | /* |
| @@ -689,15 +687,15 @@ static void p4_shutdown(struct op_msrs const * const msrs) | |||
| 689 | * conjunction with the counter registers (hence the starting offset). | 687 | * conjunction with the counter registers (hence the starting offset). |
| 690 | * This saves a few bits. | 688 | * This saves a few bits. |
| 691 | */ | 689 | */ |
| 692 | for (i = num_counters ; i < num_controls ; ++i) { | 690 | for (i = num_counters; i < num_controls; ++i) { |
| 693 | if (CTRL_IS_RESERVED(msrs, i)) | 691 | if (msrs->controls[i].addr) |
| 694 | release_evntsel_nmi(msrs->controls[i].addr); | 692 | release_evntsel_nmi(msrs->controls[i].addr); |
| 695 | } | 693 | } |
| 696 | } | 694 | } |
| 697 | 695 | ||
| 698 | 696 | ||
| 699 | #ifdef CONFIG_SMP | 697 | #ifdef CONFIG_SMP |
| 700 | struct op_x86_model_spec const op_p4_ht2_spec = { | 698 | struct op_x86_model_spec op_p4_ht2_spec = { |
| 701 | .num_counters = NUM_COUNTERS_HT2, | 699 | .num_counters = NUM_COUNTERS_HT2, |
| 702 | .num_controls = NUM_CONTROLS_HT2, | 700 | .num_controls = NUM_CONTROLS_HT2, |
| 703 | .fill_in_addresses = &p4_fill_in_addresses, | 701 | .fill_in_addresses = &p4_fill_in_addresses, |
| @@ -709,7 +707,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = { | |||
| 709 | }; | 707 | }; |
| 710 | #endif | 708 | #endif |
| 711 | 709 | ||
| 712 | struct op_x86_model_spec const op_p4_spec = { | 710 | struct op_x86_model_spec op_p4_spec = { |
| 713 | .num_counters = NUM_COUNTERS_NON_HT, | 711 | .num_counters = NUM_COUNTERS_NON_HT, |
| 714 | .num_controls = NUM_CONTROLS_NON_HT, | 712 | .num_controls = NUM_CONTROLS_NON_HT, |
| 715 | .fill_in_addresses = &p4_fill_in_addresses, | 713 | .fill_in_addresses = &p4_fill_in_addresses, |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4da7230b3d17..4899215999de 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | * @author Philippe Elie | 10 | * @author Philippe Elie |
| 11 | * @author Graydon Hoare | 11 | * @author Graydon Hoare |
| 12 | * @author Andi Kleen | 12 | * @author Andi Kleen |
| 13 | * @author Robert Richter <robert.richter@amd.com> | ||
| 13 | */ | 14 | */ |
| 14 | 15 | ||
| 15 | #include <linux/oprofile.h> | 16 | #include <linux/oprofile.h> |
| @@ -18,7 +19,6 @@ | |||
| 18 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
| 19 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
| 20 | #include <asm/nmi.h> | 21 | #include <asm/nmi.h> |
| 21 | #include <asm/perf_counter.h> | ||
| 22 | 22 | ||
| 23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
| 24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
| @@ -26,20 +26,7 @@ | |||
| 26 | static int num_counters = 2; | 26 | static int num_counters = 2; |
| 27 | static int counter_width = 32; | 27 | static int counter_width = 32; |
| 28 | 28 | ||
| 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 29 | #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) |
| 30 | #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) | ||
| 31 | |||
| 32 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
| 33 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | ||
| 34 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | ||
| 35 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
| 36 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
| 37 | #define CTRL_CLEAR(x) (x &= (1<<21)) | ||
| 38 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
| 39 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) | ||
| 40 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | ||
| 41 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
| 42 | #define CTRL_SET_EVENT(val, e) (val |= e) | ||
| 43 | 30 | ||
| 44 | static u64 *reset_value; | 31 | static u64 *reset_value; |
| 45 | 32 | ||
| @@ -63,9 +50,10 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) | |||
| 63 | } | 50 | } |
| 64 | 51 | ||
| 65 | 52 | ||
| 66 | static void ppro_setup_ctrs(struct op_msrs const * const msrs) | 53 | static void ppro_setup_ctrs(struct op_x86_model_spec const *model, |
| 54 | struct op_msrs const * const msrs) | ||
| 67 | { | 55 | { |
| 68 | unsigned int low, high; | 56 | u64 val; |
| 69 | int i; | 57 | int i; |
| 70 | 58 | ||
| 71 | if (!reset_value) { | 59 | if (!reset_value) { |
| @@ -93,36 +81,30 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
| 93 | } | 81 | } |
| 94 | 82 | ||
| 95 | /* clear all counters */ | 83 | /* clear all counters */ |
| 96 | for (i = 0 ; i < num_counters; ++i) { | 84 | for (i = 0; i < num_counters; ++i) { |
| 97 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 85 | if (unlikely(!msrs->controls[i].addr)) |
| 98 | continue; | 86 | continue; |
| 99 | CTRL_READ(low, high, msrs, i); | 87 | rdmsrl(msrs->controls[i].addr, val); |
| 100 | CTRL_CLEAR(low); | 88 | val &= model->reserved; |
| 101 | CTRL_WRITE(low, high, msrs, i); | 89 | wrmsrl(msrs->controls[i].addr, val); |
| 102 | } | 90 | } |
| 103 | 91 | ||
| 104 | /* avoid a false detection of ctr overflows in NMI handler */ | 92 | /* avoid a false detection of ctr overflows in NMI handler */ |
| 105 | for (i = 0; i < num_counters; ++i) { | 93 | for (i = 0; i < num_counters; ++i) { |
| 106 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 94 | if (unlikely(!msrs->counters[i].addr)) |
| 107 | continue; | 95 | continue; |
| 108 | wrmsrl(msrs->counters[i].addr, -1LL); | 96 | wrmsrl(msrs->counters[i].addr, -1LL); |
| 109 | } | 97 | } |
| 110 | 98 | ||
| 111 | /* enable active counters */ | 99 | /* enable active counters */ |
| 112 | for (i = 0; i < num_counters; ++i) { | 100 | for (i = 0; i < num_counters; ++i) { |
| 113 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 101 | if (counter_config[i].enabled && msrs->counters[i].addr) { |
| 114 | reset_value[i] = counter_config[i].count; | 102 | reset_value[i] = counter_config[i].count; |
| 115 | |||
| 116 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); | 103 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
| 117 | 104 | rdmsrl(msrs->controls[i].addr, val); | |
| 118 | CTRL_READ(low, high, msrs, i); | 105 | val &= model->reserved; |
| 119 | CTRL_CLEAR(low); | 106 | val |= op_x86_get_ctrl(model, &counter_config[i]); |
| 120 | CTRL_SET_ENABLE(low); | 107 | wrmsrl(msrs->controls[i].addr, val); |
| 121 | CTRL_SET_USR(low, counter_config[i].user); | ||
| 122 | CTRL_SET_KERN(low, counter_config[i].kernel); | ||
| 123 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
| 124 | CTRL_SET_EVENT(low, counter_config[i].event); | ||
| 125 | CTRL_WRITE(low, high, msrs, i); | ||
| 126 | } else { | 108 | } else { |
| 127 | reset_value[i] = 0; | 109 | reset_value[i] = 0; |
| 128 | } | 110 | } |
| @@ -143,14 +125,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
| 143 | if (unlikely(!reset_value)) | 125 | if (unlikely(!reset_value)) |
| 144 | goto out; | 126 | goto out; |
| 145 | 127 | ||
| 146 | for (i = 0 ; i < num_counters; ++i) { | 128 | for (i = 0; i < num_counters; ++i) { |
| 147 | if (!reset_value[i]) | 129 | if (!reset_value[i]) |
| 148 | continue; | 130 | continue; |
| 149 | rdmsrl(msrs->counters[i].addr, val); | 131 | rdmsrl(msrs->counters[i].addr, val); |
| 150 | if (CTR_OVERFLOWED(val)) { | 132 | if (val & (1ULL << (counter_width - 1))) |
| 151 | oprofile_add_sample(regs, i); | 133 | continue; |
| 152 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); | 134 | oprofile_add_sample(regs, i); |
| 153 | } | 135 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
| 154 | } | 136 | } |
| 155 | 137 | ||
| 156 | out: | 138 | out: |
| @@ -171,16 +153,16 @@ out: | |||
| 171 | 153 | ||
| 172 | static void ppro_start(struct op_msrs const * const msrs) | 154 | static void ppro_start(struct op_msrs const * const msrs) |
| 173 | { | 155 | { |
| 174 | unsigned int low, high; | 156 | u64 val; |
| 175 | int i; | 157 | int i; |
| 176 | 158 | ||
| 177 | if (!reset_value) | 159 | if (!reset_value) |
| 178 | return; | 160 | return; |
| 179 | for (i = 0; i < num_counters; ++i) { | 161 | for (i = 0; i < num_counters; ++i) { |
| 180 | if (reset_value[i]) { | 162 | if (reset_value[i]) { |
| 181 | CTRL_READ(low, high, msrs, i); | 163 | rdmsrl(msrs->controls[i].addr, val); |
| 182 | CTRL_SET_ACTIVE(low); | 164 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
| 183 | CTRL_WRITE(low, high, msrs, i); | 165 | wrmsrl(msrs->controls[i].addr, val); |
| 184 | } | 166 | } |
| 185 | } | 167 | } |
| 186 | } | 168 | } |
| @@ -188,7 +170,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
| 188 | 170 | ||
| 189 | static void ppro_stop(struct op_msrs const * const msrs) | 171 | static void ppro_stop(struct op_msrs const * const msrs) |
| 190 | { | 172 | { |
| 191 | unsigned int low, high; | 173 | u64 val; |
| 192 | int i; | 174 | int i; |
| 193 | 175 | ||
| 194 | if (!reset_value) | 176 | if (!reset_value) |
| @@ -196,9 +178,9 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
| 196 | for (i = 0; i < num_counters; ++i) { | 178 | for (i = 0; i < num_counters; ++i) { |
| 197 | if (!reset_value[i]) | 179 | if (!reset_value[i]) |
| 198 | continue; | 180 | continue; |
| 199 | CTRL_READ(low, high, msrs, i); | 181 | rdmsrl(msrs->controls[i].addr, val); |
| 200 | CTRL_SET_INACTIVE(low); | 182 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; |
| 201 | CTRL_WRITE(low, high, msrs, i); | 183 | wrmsrl(msrs->controls[i].addr, val); |
| 202 | } | 184 | } |
| 203 | } | 185 | } |
| 204 | 186 | ||
| @@ -206,12 +188,12 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
| 206 | { | 188 | { |
| 207 | int i; | 189 | int i; |
| 208 | 190 | ||
| 209 | for (i = 0 ; i < num_counters ; ++i) { | 191 | for (i = 0; i < num_counters; ++i) { |
| 210 | if (CTR_IS_RESERVED(msrs, i)) | 192 | if (msrs->counters[i].addr) |
| 211 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | 193 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
| 212 | } | 194 | } |
| 213 | for (i = 0 ; i < num_counters ; ++i) { | 195 | for (i = 0; i < num_counters; ++i) { |
| 214 | if (CTRL_IS_RESERVED(msrs, i)) | 196 | if (msrs->controls[i].addr) |
| 215 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | 197 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
| 216 | } | 198 | } |
| 217 | if (reset_value) { | 199 | if (reset_value) { |
| @@ -222,8 +204,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
| 222 | 204 | ||
| 223 | 205 | ||
| 224 | struct op_x86_model_spec op_ppro_spec = { | 206 | struct op_x86_model_spec op_ppro_spec = { |
| 225 | .num_counters = 2, /* can be overriden */ | 207 | .num_counters = 2, |
| 226 | .num_controls = 2, /* dito */ | 208 | .num_controls = 2, |
| 209 | .reserved = MSR_PPRO_EVENTSEL_RESERVED, | ||
| 227 | .fill_in_addresses = &ppro_fill_in_addresses, | 210 | .fill_in_addresses = &ppro_fill_in_addresses, |
| 228 | .setup_ctrs = &ppro_setup_ctrs, | 211 | .setup_ctrs = &ppro_setup_ctrs, |
| 229 | .check_ctrs = &ppro_check_ctrs, | 212 | .check_ctrs = &ppro_check_ctrs, |
| @@ -241,7 +224,7 @@ struct op_x86_model_spec op_ppro_spec = { | |||
| 241 | * the specific CPU. | 224 | * the specific CPU. |
| 242 | */ | 225 | */ |
| 243 | 226 | ||
| 244 | void arch_perfmon_setup_counters(void) | 227 | static void arch_perfmon_setup_counters(void) |
| 245 | { | 228 | { |
| 246 | union cpuid10_eax eax; | 229 | union cpuid10_eax eax; |
| 247 | 230 | ||
| @@ -259,11 +242,17 @@ void arch_perfmon_setup_counters(void) | |||
| 259 | 242 | ||
| 260 | op_arch_perfmon_spec.num_counters = num_counters; | 243 | op_arch_perfmon_spec.num_counters = num_counters; |
| 261 | op_arch_perfmon_spec.num_controls = num_counters; | 244 | op_arch_perfmon_spec.num_controls = num_counters; |
| 262 | op_ppro_spec.num_counters = num_counters; | 245 | } |
| 263 | op_ppro_spec.num_controls = num_counters; | 246 | |
| 247 | static int arch_perfmon_init(struct oprofile_operations *ignore) | ||
| 248 | { | ||
| 249 | arch_perfmon_setup_counters(); | ||
| 250 | return 0; | ||
| 264 | } | 251 | } |
| 265 | 252 | ||
| 266 | struct op_x86_model_spec op_arch_perfmon_spec = { | 253 | struct op_x86_model_spec op_arch_perfmon_spec = { |
| 254 | .reserved = MSR_PPRO_EVENTSEL_RESERVED, | ||
| 255 | .init = &arch_perfmon_init, | ||
| 267 | /* num_counters/num_controls filled in at runtime */ | 256 | /* num_counters/num_controls filled in at runtime */ |
| 268 | .fill_in_addresses = &ppro_fill_in_addresses, | 257 | .fill_in_addresses = &ppro_fill_in_addresses, |
| 269 | /* user space does the cpuid check for available events */ | 258 | /* user space does the cpuid check for available events */ |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 825e79064d64..b83776180c7f 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
| @@ -6,51 +6,66 @@ | |||
| 6 | * @remark Read the file COPYING | 6 | * @remark Read the file COPYING |
| 7 | * | 7 | * |
| 8 | * @author Graydon Hoare | 8 | * @author Graydon Hoare |
| 9 | * @author Robert Richter <robert.richter@amd.com> | ||
| 9 | */ | 10 | */ |
| 10 | 11 | ||
| 11 | #ifndef OP_X86_MODEL_H | 12 | #ifndef OP_X86_MODEL_H |
| 12 | #define OP_X86_MODEL_H | 13 | #define OP_X86_MODEL_H |
| 13 | 14 | ||
| 14 | struct op_saved_msr { | 15 | #include <asm/types.h> |
| 15 | unsigned int high; | 16 | #include <asm/perf_counter.h> |
| 16 | unsigned int low; | ||
| 17 | }; | ||
| 18 | 17 | ||
| 19 | struct op_msr { | 18 | struct op_msr { |
| 20 | unsigned long addr; | 19 | unsigned long addr; |
| 21 | struct op_saved_msr saved; | 20 | u64 saved; |
| 22 | }; | 21 | }; |
| 23 | 22 | ||
| 24 | struct op_msrs { | 23 | struct op_msrs { |
| 25 | struct op_msr *counters; | 24 | struct op_msr *counters; |
| 26 | struct op_msr *controls; | 25 | struct op_msr *controls; |
| 26 | struct op_msr *multiplex; | ||
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | struct pt_regs; | 29 | struct pt_regs; |
| 30 | 30 | ||
| 31 | struct oprofile_operations; | ||
| 32 | |||
| 31 | /* The model vtable abstracts the differences between | 33 | /* The model vtable abstracts the differences between |
| 32 | * various x86 CPU models' perfctr support. | 34 | * various x86 CPU models' perfctr support. |
| 33 | */ | 35 | */ |
| 34 | struct op_x86_model_spec { | 36 | struct op_x86_model_spec { |
| 35 | int (*init)(struct oprofile_operations *ops); | 37 | unsigned int num_counters; |
| 36 | void (*exit)(void); | 38 | unsigned int num_controls; |
| 37 | unsigned int num_counters; | 39 | unsigned int num_virt_counters; |
| 38 | unsigned int num_controls; | 40 | u64 reserved; |
| 39 | void (*fill_in_addresses)(struct op_msrs * const msrs); | 41 | u16 event_mask; |
| 40 | void (*setup_ctrs)(struct op_msrs const * const msrs); | 42 | int (*init)(struct oprofile_operations *ops); |
| 41 | int (*check_ctrs)(struct pt_regs * const regs, | 43 | void (*exit)(void); |
| 42 | struct op_msrs const * const msrs); | 44 | void (*fill_in_addresses)(struct op_msrs * const msrs); |
| 43 | void (*start)(struct op_msrs const * const msrs); | 45 | void (*setup_ctrs)(struct op_x86_model_spec const *model, |
| 44 | void (*stop)(struct op_msrs const * const msrs); | 46 | struct op_msrs const * const msrs); |
| 45 | void (*shutdown)(struct op_msrs const * const msrs); | 47 | int (*check_ctrs)(struct pt_regs * const regs, |
| 48 | struct op_msrs const * const msrs); | ||
| 49 | void (*start)(struct op_msrs const * const msrs); | ||
| 50 | void (*stop)(struct op_msrs const * const msrs); | ||
| 51 | void (*shutdown)(struct op_msrs const * const msrs); | ||
| 52 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 53 | void (*switch_ctrl)(struct op_x86_model_spec const *model, | ||
| 54 | struct op_msrs const * const msrs); | ||
| 55 | #endif | ||
| 46 | }; | 56 | }; |
| 47 | 57 | ||
| 58 | struct op_counter_config; | ||
| 59 | |||
| 60 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | ||
| 61 | struct op_counter_config *counter_config); | ||
| 62 | extern int op_x86_phys_to_virt(int phys); | ||
| 63 | extern int op_x86_virt_to_phys(int virt); | ||
| 64 | |||
| 48 | extern struct op_x86_model_spec op_ppro_spec; | 65 | extern struct op_x86_model_spec op_ppro_spec; |
| 49 | extern struct op_x86_model_spec const op_p4_spec; | 66 | extern struct op_x86_model_spec op_p4_spec; |
| 50 | extern struct op_x86_model_spec const op_p4_ht2_spec; | 67 | extern struct op_x86_model_spec op_p4_ht2_spec; |
| 51 | extern struct op_x86_model_spec const op_amd_spec; | 68 | extern struct op_x86_model_spec op_amd_spec; |
| 52 | extern struct op_x86_model_spec op_arch_perfmon_spec; | 69 | extern struct op_x86_model_spec op_arch_perfmon_spec; |
| 53 | 70 | ||
| 54 | extern void arch_perfmon_setup_counters(void); | ||
| 55 | |||
| 56 | #endif /* OP_X86_MODEL_H */ | 71 | #endif /* OP_X86_MODEL_H */ |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd13c3e4c6db..347d882b3bb3 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
| @@ -192,13 +192,14 @@ struct pci_raw_ops pci_direct_conf2 = { | |||
| 192 | static int __init pci_sanity_check(struct pci_raw_ops *o) | 192 | static int __init pci_sanity_check(struct pci_raw_ops *o) |
| 193 | { | 193 | { |
| 194 | u32 x = 0; | 194 | u32 x = 0; |
| 195 | int devfn; | 195 | int year, devfn; |
| 196 | 196 | ||
| 197 | if (pci_probe & PCI_NO_CHECKS) | 197 | if (pci_probe & PCI_NO_CHECKS) |
| 198 | return 1; | 198 | return 1; |
| 199 | /* Assume Type 1 works for newer systems. | 199 | /* Assume Type 1 works for newer systems. |
| 200 | This handles machines that don't have anything on PCI Bus 0. */ | 200 | This handles machines that don't have anything on PCI Bus 0. */ |
| 201 | if (dmi_get_year(DMI_BIOS_DATE) >= 2001) | 201 | dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL); |
| 202 | if (year >= 2001) | ||
| 202 | return 1; | 203 | return 1; |
| 203 | 204 | ||
| 204 | for (devfn = 0; devfn < 0x100; devfn++) { | 205 | for (devfn = 0; devfn < 0x100; devfn++) { |
