diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 19:15:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 19:15:23 -0400 |
commit | f0bb4c0ab064a8aeeffbda1cee380151a594eaab (patch) | |
tree | 14d55a89c5db455aa10ff9a96ca14c474a9c4d55 /drivers/iommu | |
parent | a4883ef6af5e513a1e8c2ab9aab721604aa3a4f5 (diff) | |
parent | 983433b5812c5cf33a9008fa38c6f9b407fedb76 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel improvements:
- watchdog driver improvements by Li Zefan
- Power7 CPI stack events related improvements by Sukadev Bhattiprolu
- event multiplexing via hrtimers and other improvements by Stephane
Eranian
- kernel stack use optimization by Andrew Hunter
- AMD IOMMU uncore PMU support by Suravee Suthikulpanit
- NMI handling rate-limits by Dave Hansen
- various hw_breakpoint fixes by Oleg Nesterov
- hw_breakpoint overflow period sampling and related signal handling
fixes by Jiri Olsa
- Intel Haswell PMU support by Andi Kleen
Tooling improvements:
- Reset SIGTERM handler in workload child process, fix from David
Ahern.
- Makefile reorganization, prep work for Kconfig patches, from Jiri
Olsa.
- Add automated make test suite, from Jiri Olsa.
- Add --percent-limit option to 'top' and 'report', from Namhyung
Kim.
- Sorting improvements, from Namhyung Kim.
- Expand definition of sysfs format attribute, from Michael Ellerman.
Tooling fixes:
- 'perf tests' fixes from Jiri Olsa.
- Make Power7 CPI stack events available in sysfs, from Sukadev
Bhattiprolu.
- Handle death by SIGTERM in 'perf record', fix from David Ahern.
- Fix printing of perf_event_paranoid message, from David Ahern.
- Handle realloc failures in 'perf kvm', from David Ahern.
- Fix divide by 0 in variance, from David Ahern.
- Save parent pid in thread struct, from David Ahern.
- Handle JITed code in shared memory, from Andi Kleen.
- Fixes for 'perf diff', from Jiri Olsa.
- Remove some unused struct members, from Jiri Olsa.
- Add missing liblk.a dependency for python/perf.so, fix from Jiri
Olsa.
- Respect CROSS_COMPILE in liblk.a, from Rabin Vincent.
- No need to do locking when adding hists in perf report, only 'top'
needs that, from Namhyung Kim.
- Fix alignment of symbol column in in the hists browser (top,
report) when -v is given, from NAmhyung Kim.
- Fix 'perf top' -E option behavior, from Namhyung Kim.
- Fix bug in isupper() and islower(), from Sukadev Bhattiprolu.
- Fix compile errors in bp_signal 'perf test', from Sukadev
Bhattiprolu.
... and more things"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits)
perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable()
perf/x86: Fix shared register mutual exclusion enforcement
perf/x86/intel: Support full width counting
x86: Add NMI duration tracepoints
perf: Drop sample rate when sampling is too slow
x86: Warn when NMI handlers take large amounts of time
hw_breakpoint: Introduce "struct bp_cpuinfo"
hw_breakpoint: Simplify *register_wide_hw_breakpoint()
hw_breakpoint: Introduce cpumask_of_bp()
hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths
hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths
perf/x86/intel: Add mem-loads/stores support for Haswell
perf/x86/intel: Support Haswell/v4 LBR format
perf/x86/intel: Move NMI clearing to end of PMI handler
perf/x86/intel: Add Haswell PEBS support
perf/x86/intel: Add simple Haswell PMU support
perf/x86/intel: Add Haswell PEBS record support
perf/x86/intel: Fix sparse warning
perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation
perf/x86/amd: Add IOMMU Performance Counter resource management
...
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 140 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_proto.h | 7 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 15 |
3 files changed, 150 insertions, 12 deletions
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index bf51abb78dee..7acbf351e9af 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c | |||
@@ -99,7 +99,7 @@ struct ivhd_header { | |||
99 | u64 mmio_phys; | 99 | u64 mmio_phys; |
100 | u16 pci_seg; | 100 | u16 pci_seg; |
101 | u16 info; | 101 | u16 info; |
102 | u32 reserved; | 102 | u32 efr; |
103 | } __attribute__((packed)); | 103 | } __attribute__((packed)); |
104 | 104 | ||
105 | /* | 105 | /* |
@@ -154,6 +154,7 @@ bool amd_iommu_iotlb_sup __read_mostly = true; | |||
154 | u32 amd_iommu_max_pasids __read_mostly = ~0; | 154 | u32 amd_iommu_max_pasids __read_mostly = ~0; |
155 | 155 | ||
156 | bool amd_iommu_v2_present __read_mostly; | 156 | bool amd_iommu_v2_present __read_mostly; |
157 | bool amd_iommu_pc_present __read_mostly; | ||
157 | 158 | ||
158 | bool amd_iommu_force_isolation __read_mostly; | 159 | bool amd_iommu_force_isolation __read_mostly; |
159 | 160 | ||
@@ -369,23 +370,23 @@ static void iommu_disable(struct amd_iommu *iommu) | |||
369 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | 370 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in |
370 | * the system has one. | 371 | * the system has one. |
371 | */ | 372 | */ |
372 | static u8 __iomem * __init iommu_map_mmio_space(u64 address) | 373 | static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) |
373 | { | 374 | { |
374 | if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { | 375 | if (!request_mem_region(address, end, "amd_iommu")) { |
375 | pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", | 376 | pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n", |
376 | address); | 377 | address, end); |
377 | pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); | 378 | pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); |
378 | return NULL; | 379 | return NULL; |
379 | } | 380 | } |
380 | 381 | ||
381 | return (u8 __iomem *)ioremap_nocache(address, MMIO_REGION_LENGTH); | 382 | return (u8 __iomem *)ioremap_nocache(address, end); |
382 | } | 383 | } |
383 | 384 | ||
384 | static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | 385 | static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) |
385 | { | 386 | { |
386 | if (iommu->mmio_base) | 387 | if (iommu->mmio_base) |
387 | iounmap(iommu->mmio_base); | 388 | iounmap(iommu->mmio_base); |
388 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); | 389 | release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); |
389 | } | 390 | } |
390 | 391 | ||
391 | /**************************************************************************** | 392 | /**************************************************************************** |
@@ -1085,7 +1086,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
1085 | iommu->cap_ptr = h->cap_ptr; | 1086 | iommu->cap_ptr = h->cap_ptr; |
1086 | iommu->pci_seg = h->pci_seg; | 1087 | iommu->pci_seg = h->pci_seg; |
1087 | iommu->mmio_phys = h->mmio_phys; | 1088 | iommu->mmio_phys = h->mmio_phys; |
1088 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); | 1089 | |
1090 | /* Check if IVHD EFR contains proper max banks/counters */ | ||
1091 | if ((h->efr != 0) && | ||
1092 | ((h->efr & (0xF << 13)) != 0) && | ||
1093 | ((h->efr & (0x3F << 17)) != 0)) { | ||
1094 | iommu->mmio_phys_end = MMIO_REG_END_OFFSET; | ||
1095 | } else { | ||
1096 | iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; | ||
1097 | } | ||
1098 | |||
1099 | iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, | ||
1100 | iommu->mmio_phys_end); | ||
1089 | if (!iommu->mmio_base) | 1101 | if (!iommu->mmio_base) |
1090 | return -ENOMEM; | 1102 | return -ENOMEM; |
1091 | 1103 | ||
@@ -1160,6 +1172,33 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
1160 | return 0; | 1172 | return 0; |
1161 | } | 1173 | } |
1162 | 1174 | ||
1175 | |||
1176 | static void init_iommu_perf_ctr(struct amd_iommu *iommu) | ||
1177 | { | ||
1178 | u64 val = 0xabcd, val2 = 0; | ||
1179 | |||
1180 | if (!iommu_feature(iommu, FEATURE_PC)) | ||
1181 | return; | ||
1182 | |||
1183 | amd_iommu_pc_present = true; | ||
1184 | |||
1185 | /* Check if the performance counters can be written to */ | ||
1186 | if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || | ||
1187 | (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || | ||
1188 | (val != val2)) { | ||
1189 | pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); | ||
1190 | amd_iommu_pc_present = false; | ||
1191 | return; | ||
1192 | } | ||
1193 | |||
1194 | pr_info("AMD-Vi: IOMMU performance counters supported\n"); | ||
1195 | |||
1196 | val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); | ||
1197 | iommu->max_banks = (u8) ((val >> 12) & 0x3f); | ||
1198 | iommu->max_counters = (u8) ((val >> 7) & 0xf); | ||
1199 | } | ||
1200 | |||
1201 | |||
1163 | static int iommu_init_pci(struct amd_iommu *iommu) | 1202 | static int iommu_init_pci(struct amd_iommu *iommu) |
1164 | { | 1203 | { |
1165 | int cap_ptr = iommu->cap_ptr; | 1204 | int cap_ptr = iommu->cap_ptr; |
@@ -1226,6 +1265,8 @@ static int iommu_init_pci(struct amd_iommu *iommu) | |||
1226 | if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) | 1265 | if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) |
1227 | amd_iommu_np_cache = true; | 1266 | amd_iommu_np_cache = true; |
1228 | 1267 | ||
1268 | init_iommu_perf_ctr(iommu); | ||
1269 | |||
1229 | if (is_rd890_iommu(iommu->dev)) { | 1270 | if (is_rd890_iommu(iommu->dev)) { |
1230 | int i, j; | 1271 | int i, j; |
1231 | 1272 | ||
@@ -1278,7 +1319,7 @@ static void print_iommu_info(void) | |||
1278 | if (iommu_feature(iommu, (1ULL << i))) | 1319 | if (iommu_feature(iommu, (1ULL << i))) |
1279 | pr_cont(" %s", feat_str[i]); | 1320 | pr_cont(" %s", feat_str[i]); |
1280 | } | 1321 | } |
1281 | pr_cont("\n"); | 1322 | pr_cont("\n"); |
1282 | } | 1323 | } |
1283 | } | 1324 | } |
1284 | if (irq_remapping_enabled) | 1325 | if (irq_remapping_enabled) |
@@ -2232,3 +2273,84 @@ bool amd_iommu_v2_supported(void) | |||
2232 | return amd_iommu_v2_present; | 2273 | return amd_iommu_v2_present; |
2233 | } | 2274 | } |
2234 | EXPORT_SYMBOL(amd_iommu_v2_supported); | 2275 | EXPORT_SYMBOL(amd_iommu_v2_supported); |
2276 | |||
2277 | /**************************************************************************** | ||
2278 | * | ||
2279 | * IOMMU EFR Performance Counter support functionality. This code allows | ||
2280 | * access to the IOMMU PC functionality. | ||
2281 | * | ||
2282 | ****************************************************************************/ | ||
2283 | |||
2284 | u8 amd_iommu_pc_get_max_banks(u16 devid) | ||
2285 | { | ||
2286 | struct amd_iommu *iommu; | ||
2287 | u8 ret = 0; | ||
2288 | |||
2289 | /* locate the iommu governing the devid */ | ||
2290 | iommu = amd_iommu_rlookup_table[devid]; | ||
2291 | if (iommu) | ||
2292 | ret = iommu->max_banks; | ||
2293 | |||
2294 | return ret; | ||
2295 | } | ||
2296 | EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); | ||
2297 | |||
2298 | bool amd_iommu_pc_supported(void) | ||
2299 | { | ||
2300 | return amd_iommu_pc_present; | ||
2301 | } | ||
2302 | EXPORT_SYMBOL(amd_iommu_pc_supported); | ||
2303 | |||
2304 | u8 amd_iommu_pc_get_max_counters(u16 devid) | ||
2305 | { | ||
2306 | struct amd_iommu *iommu; | ||
2307 | u8 ret = 0; | ||
2308 | |||
2309 | /* locate the iommu governing the devid */ | ||
2310 | iommu = amd_iommu_rlookup_table[devid]; | ||
2311 | if (iommu) | ||
2312 | ret = iommu->max_counters; | ||
2313 | |||
2314 | return ret; | ||
2315 | } | ||
2316 | EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); | ||
2317 | |||
2318 | int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, | ||
2319 | u64 *value, bool is_write) | ||
2320 | { | ||
2321 | struct amd_iommu *iommu; | ||
2322 | u32 offset; | ||
2323 | u32 max_offset_lim; | ||
2324 | |||
2325 | /* Make sure the IOMMU PC resource is available */ | ||
2326 | if (!amd_iommu_pc_present) | ||
2327 | return -ENODEV; | ||
2328 | |||
2329 | /* Locate the iommu associated with the device ID */ | ||
2330 | iommu = amd_iommu_rlookup_table[devid]; | ||
2331 | |||
2332 | /* Check for valid iommu and pc register indexing */ | ||
2333 | if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) | ||
2334 | return -ENODEV; | ||
2335 | |||
2336 | offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); | ||
2337 | |||
2338 | /* Limit the offset to the hw defined mmio region aperture */ | ||
2339 | max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) | | ||
2340 | (iommu->max_counters << 8) | 0x28); | ||
2341 | if ((offset < MMIO_CNTR_REG_OFFSET) || | ||
2342 | (offset > max_offset_lim)) | ||
2343 | return -EINVAL; | ||
2344 | |||
2345 | if (is_write) { | ||
2346 | writel((u32)*value, iommu->mmio_base + offset); | ||
2347 | writel((*value >> 32), iommu->mmio_base + offset + 4); | ||
2348 | } else { | ||
2349 | *value = readl(iommu->mmio_base + offset + 4); | ||
2350 | *value <<= 32; | ||
2351 | *value = readl(iommu->mmio_base + offset); | ||
2352 | } | ||
2353 | |||
2354 | return 0; | ||
2355 | } | ||
2356 | EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); | ||
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index c294961bdd36..95ed6deae47f 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h | |||
@@ -56,6 +56,13 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, | |||
56 | extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); | 56 | extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); |
57 | extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); | 57 | extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); |
58 | 58 | ||
59 | /* IOMMU Performance Counter functions */ | ||
60 | extern bool amd_iommu_pc_supported(void); | ||
61 | extern u8 amd_iommu_pc_get_max_banks(u16 devid); | ||
62 | extern u8 amd_iommu_pc_get_max_counters(u16 devid); | ||
63 | extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, | ||
64 | u64 *value, bool is_write); | ||
65 | |||
59 | #define PPR_SUCCESS 0x0 | 66 | #define PPR_SUCCESS 0x0 |
60 | #define PPR_INVALID 0x1 | 67 | #define PPR_INVALID 0x1 |
61 | #define PPR_FAILURE 0xf | 68 | #define PPR_FAILURE 0xf |
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 0285a215df16..e400fbe411de 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h | |||
@@ -38,9 +38,6 @@ | |||
38 | #define ALIAS_TABLE_ENTRY_SIZE 2 | 38 | #define ALIAS_TABLE_ENTRY_SIZE 2 |
39 | #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) | 39 | #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) |
40 | 40 | ||
41 | /* Length of the MMIO region for the AMD IOMMU */ | ||
42 | #define MMIO_REGION_LENGTH 0x4000 | ||
43 | |||
44 | /* Capability offsets used by the driver */ | 41 | /* Capability offsets used by the driver */ |
45 | #define MMIO_CAP_HDR_OFFSET 0x00 | 42 | #define MMIO_CAP_HDR_OFFSET 0x00 |
46 | #define MMIO_RANGE_OFFSET 0x0c | 43 | #define MMIO_RANGE_OFFSET 0x0c |
@@ -78,6 +75,10 @@ | |||
78 | #define MMIO_STATUS_OFFSET 0x2020 | 75 | #define MMIO_STATUS_OFFSET 0x2020 |
79 | #define MMIO_PPR_HEAD_OFFSET 0x2030 | 76 | #define MMIO_PPR_HEAD_OFFSET 0x2030 |
80 | #define MMIO_PPR_TAIL_OFFSET 0x2038 | 77 | #define MMIO_PPR_TAIL_OFFSET 0x2038 |
78 | #define MMIO_CNTR_CONF_OFFSET 0x4000 | ||
79 | #define MMIO_CNTR_REG_OFFSET 0x40000 | ||
80 | #define MMIO_REG_END_OFFSET 0x80000 | ||
81 | |||
81 | 82 | ||
82 | 83 | ||
83 | /* Extended Feature Bits */ | 84 | /* Extended Feature Bits */ |
@@ -507,6 +508,10 @@ struct amd_iommu { | |||
507 | 508 | ||
508 | /* physical address of MMIO space */ | 509 | /* physical address of MMIO space */ |
509 | u64 mmio_phys; | 510 | u64 mmio_phys; |
511 | |||
512 | /* physical end address of MMIO space */ | ||
513 | u64 mmio_phys_end; | ||
514 | |||
510 | /* virtual address of MMIO space */ | 515 | /* virtual address of MMIO space */ |
511 | u8 __iomem *mmio_base; | 516 | u8 __iomem *mmio_base; |
512 | 517 | ||
@@ -584,6 +589,10 @@ struct amd_iommu { | |||
584 | 589 | ||
585 | /* The l2 indirect registers */ | 590 | /* The l2 indirect registers */ |
586 | u32 stored_l2[0x83]; | 591 | u32 stored_l2[0x83]; |
592 | |||
593 | /* The maximum PC banks and counters/bank (PCSup=1) */ | ||
594 | u8 max_banks; | ||
595 | u8 max_counters; | ||
587 | }; | 596 | }; |
588 | 597 | ||
589 | struct devid_map { | 598 | struct devid_map { |