diff options
author | Barry Kasindorf <barry.kasindorf@amd.com> | 2008-07-22 15:08:55 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-26 05:48:05 -0400 |
commit | 56784f11df473b4c1d9d0e37777fd7c0b77b6bca (patch) | |
tree | 32cb2b67ec2c60acba23eec4dde5a40da1c74e79 | |
parent | 345c25730d085c45622ac779da4dbd97dc3a10fe (diff) |
x86/oprofile: add IBS support for AMD CPUs, model specific code
This patchset supports the new profiling hardware available in the
latest AMD CPUs in the oProfile driver.
Signed-off-by: Barry Kasindorf <barry.kasindorf@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: oprofile-list <oprofile-list@lists.sourceforge.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/oprofile/op_model_athlon.c | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 40ecb020c7d2..229e0b4e21e3 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c | |||
@@ -9,9 +9,13 @@ | |||
9 | * @author Philippe Elie | 9 | * @author Philippe Elie |
10 | * @author Graydon Hoare | 10 | * @author Graydon Hoare |
11 | * @author Robert Richter <robert.richter@amd.com> | 11 | * @author Robert Richter <robert.richter@amd.com> |
12 | * @author Barry Kasindorf | ||
12 | */ | 13 | */ |
13 | 14 | ||
14 | #include <linux/oprofile.h> | 15 | #include <linux/oprofile.h> |
16 | #include <linux/device.h> | ||
17 | #include <linux/pci.h> | ||
18 | |||
15 | #include <asm/ptrace.h> | 19 | #include <asm/ptrace.h> |
16 | #include <asm/msr.h> | 20 | #include <asm/msr.h> |
17 | #include <asm/nmi.h> | 21 | #include <asm/nmi.h> |
@@ -43,7 +47,83 @@ | |||
43 | #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) | 47 | #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) |
44 | #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) | 48 | #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) |
45 | 49 | ||
50 | #define IBS_FETCH_CTL_HIGH_MASK 0xFFFFFFFF | ||
51 | /* high dword bit IbsFetchCtl[bit 49] */ | ||
52 | #define IBS_FETCH_VALID_BIT (1UL << 17) | ||
53 | /* high dword bit IbsFetchCtl[bit 52] */ | ||
54 | #define IBS_FETCH_PHY_ADDR_VALID_BIT (1UL << 20) | ||
55 | /* high dword bit IbsFetchCtl[bit 48] */ | ||
56 | #define IBS_FETCH_ENABLE (1UL << 16) | ||
57 | |||
58 | #define IBS_FETCH_CTL_CNT_MASK 0x00000000FFFF0000UL | ||
59 | #define IBS_FETCH_CTL_MAX_CNT_MASK 0x000000000000FFFFUL | ||
60 | |||
61 | /*IbsOpCtl masks/bits */ | ||
62 | #define IBS_OP_VALID_BIT (1ULL<<18) /* IbsOpCtl[bit18] */ | ||
63 | #define IBS_OP_ENABLE (1ULL<<17) /* IBS_OP_ENABLE[bit17]*/ | ||
64 | |||
65 | /* Codes used in cpu_buffer.c */ | ||
66 | #define IBS_FETCH_BEGIN 3 | ||
67 | #define IBS_OP_BEGIN 4 | ||
68 | |||
69 | /*IbsOpData3 masks */ | ||
70 | #define IBS_CTL_LVT_OFFSET_VALID_BIT (1ULL<<8) | ||
71 | |||
72 | /*PCI Extended Configuration Constants */ | ||
73 | /* MSR to set the IBS control register APIC LVT offset */ | ||
74 | #define IBS_LVT_OFFSET_PCI 0x1CC | ||
75 | |||
76 | struct ibs_fetch_sample { | ||
77 | /* MSRC001_1031 IBS Fetch Linear Address Register */ | ||
78 | unsigned int ibs_fetch_lin_addr_low; | ||
79 | unsigned int ibs_fetch_lin_addr_high; | ||
80 | /* MSRC001_1030 IBS Fetch Control Register */ | ||
81 | unsigned int ibs_fetch_ctl_low; | ||
82 | unsigned int ibs_fetch_ctl_high; | ||
83 | /* MSRC001_1032 IBS Fetch Physical Address Register */ | ||
84 | unsigned int ibs_fetch_phys_addr_low; | ||
85 | unsigned int ibs_fetch_phys_addr_high; | ||
86 | }; | ||
87 | |||
88 | struct ibs_op_sample { | ||
89 | /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ | ||
90 | unsigned int ibs_op_rip_low; | ||
91 | unsigned int ibs_op_rip_high; | ||
92 | /* MSRC001_1035 IBS Op Data Register */ | ||
93 | unsigned int ibs_op_data1_low; | ||
94 | unsigned int ibs_op_data1_high; | ||
95 | /* MSRC001_1036 IBS Op Data 2 Register */ | ||
96 | unsigned int ibs_op_data2_low; | ||
97 | unsigned int ibs_op_data2_high; | ||
98 | /* MSRC001_1037 IBS Op Data 3 Register */ | ||
99 | unsigned int ibs_op_data3_low; | ||
100 | unsigned int ibs_op_data3_high; | ||
101 | /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ | ||
102 | unsigned int ibs_dc_linear_low; | ||
103 | unsigned int ibs_dc_linear_high; | ||
104 | /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ | ||
105 | unsigned int ibs_dc_phys_low; | ||
106 | unsigned int ibs_dc_phys_high; | ||
107 | }; | ||
108 | |||
109 | /* | ||
110 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ | ||
111 | */ | ||
112 | static void clear_ibs_nmi(void); | ||
113 | |||
46 | static unsigned long reset_value[NUM_COUNTERS]; | 114 | static unsigned long reset_value[NUM_COUNTERS]; |
115 | static int ibs_allowed; /* AMD Family10h and later */ | ||
116 | |||
117 | struct op_ibs_config { | ||
118 | unsigned long op_enabled; | ||
119 | unsigned long fetch_enabled; | ||
120 | unsigned long max_cnt_fetch; | ||
121 | unsigned long max_cnt_op; | ||
122 | unsigned long rand_en; | ||
123 | unsigned long dispatched_ops; | ||
124 | }; | ||
125 | |||
126 | static struct op_ibs_config ibs_config; | ||
47 | 127 | ||
48 | /* functions for op_amd_spec */ | 128 | /* functions for op_amd_spec */ |
49 | 129 | ||
@@ -121,6 +201,8 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, | |||
121 | { | 201 | { |
122 | unsigned int low, high; | 202 | unsigned int low, high; |
123 | int i; | 203 | int i; |
204 | struct ibs_fetch_sample ibs_fetch; | ||
205 | struct ibs_op_sample ibs_op; | ||
124 | 206 | ||
125 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 207 | for (i = 0 ; i < NUM_COUNTERS; ++i) { |
126 | if (!reset_value[i]) | 208 | if (!reset_value[i]) |
@@ -132,6 +214,65 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, | |||
132 | } | 214 | } |
133 | } | 215 | } |
134 | 216 | ||
217 | /*If AMD and IBS is available */ | ||
218 | if (ibs_allowed && ibs_config.fetch_enabled) { | ||
219 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
220 | if (high & IBS_FETCH_VALID_BIT) { | ||
221 | ibs_fetch.ibs_fetch_ctl_high = high; | ||
222 | ibs_fetch.ibs_fetch_ctl_low = low; | ||
223 | rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); | ||
224 | ibs_fetch.ibs_fetch_lin_addr_high = high; | ||
225 | ibs_fetch.ibs_fetch_lin_addr_low = low; | ||
226 | rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); | ||
227 | ibs_fetch.ibs_fetch_phys_addr_high = high; | ||
228 | ibs_fetch.ibs_fetch_phys_addr_low = low; | ||
229 | |||
230 | oprofile_add_ibs_sample(regs, | ||
231 | (unsigned int *)&ibs_fetch, | ||
232 | IBS_FETCH_BEGIN); | ||
233 | |||
234 | /*reenable the IRQ */ | ||
235 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
236 | high &= ~(IBS_FETCH_VALID_BIT); | ||
237 | high |= IBS_FETCH_ENABLE; | ||
238 | low &= IBS_FETCH_CTL_MAX_CNT_MASK; | ||
239 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | if (ibs_allowed && ibs_config.op_enabled) { | ||
244 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
245 | if (low & IBS_OP_VALID_BIT) { | ||
246 | rdmsr(MSR_AMD64_IBSOPRIP, low, high); | ||
247 | ibs_op.ibs_op_rip_low = low; | ||
248 | ibs_op.ibs_op_rip_high = high; | ||
249 | rdmsr(MSR_AMD64_IBSOPDATA, low, high); | ||
250 | ibs_op.ibs_op_data1_low = low; | ||
251 | ibs_op.ibs_op_data1_high = high; | ||
252 | rdmsr(MSR_AMD64_IBSOPDATA2, low, high); | ||
253 | ibs_op.ibs_op_data2_low = low; | ||
254 | ibs_op.ibs_op_data2_high = high; | ||
255 | rdmsr(MSR_AMD64_IBSOPDATA3, low, high); | ||
256 | ibs_op.ibs_op_data3_low = low; | ||
257 | ibs_op.ibs_op_data3_high = high; | ||
258 | rdmsr(MSR_AMD64_IBSDCLINAD, low, high); | ||
259 | ibs_op.ibs_dc_linear_low = low; | ||
260 | ibs_op.ibs_dc_linear_high = high; | ||
261 | rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); | ||
262 | ibs_op.ibs_dc_phys_low = low; | ||
263 | ibs_op.ibs_dc_phys_high = high; | ||
264 | |||
265 | /* reenable the IRQ */ | ||
266 | oprofile_add_ibs_sample(regs, | ||
267 | (unsigned int *)&ibs_op, | ||
268 | IBS_OP_BEGIN); | ||
269 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
270 | low &= ~(IBS_OP_VALID_BIT); | ||
271 | low |= IBS_OP_ENABLE; | ||
272 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
273 | } | ||
274 | } | ||
275 | |||
135 | /* See op_model_ppro.c */ | 276 | /* See op_model_ppro.c */ |
136 | return 1; | 277 | return 1; |
137 | } | 278 | } |
@@ -148,6 +289,17 @@ static void op_amd_start(struct op_msrs const * const msrs) | |||
148 | CTRL_WRITE(low, high, msrs, i); | 289 | CTRL_WRITE(low, high, msrs, i); |
149 | } | 290 | } |
150 | } | 291 | } |
292 | if (ibs_allowed && ibs_config.fetch_enabled) { | ||
293 | low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | ||
294 | high = IBS_FETCH_ENABLE; | ||
295 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
296 | } | ||
297 | |||
298 | if (ibs_allowed && ibs_config.op_enabled) { | ||
299 | low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_ENABLE; | ||
300 | high = 0; | ||
301 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
302 | } | ||
151 | } | 303 | } |
152 | 304 | ||
153 | 305 | ||
@@ -165,6 +317,18 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
165 | CTRL_SET_INACTIVE(low); | 317 | CTRL_SET_INACTIVE(low); |
166 | CTRL_WRITE(low, high, msrs, i); | 318 | CTRL_WRITE(low, high, msrs, i); |
167 | } | 319 | } |
320 | |||
321 | if (ibs_allowed && ibs_config.fetch_enabled) { | ||
322 | low = 0; /* clear max count and enable */ | ||
323 | high = 0; | ||
324 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
325 | } | ||
326 | |||
327 | if (ibs_allowed && ibs_config.op_enabled) { | ||
328 | low = 0; /* clear max count and enable */ | ||
329 | high = 0; | ||
330 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
331 | } | ||
168 | } | 332 | } |
169 | 333 | ||
170 | static void op_amd_shutdown(struct op_msrs const * const msrs) | 334 | static void op_amd_shutdown(struct op_msrs const * const msrs) |
@@ -181,6 +345,99 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
181 | } | 345 | } |
182 | } | 346 | } |
183 | 347 | ||
348 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | ||
349 | { | ||
350 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
351 | } | ||
352 | |||
353 | static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | ||
354 | { | ||
355 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * initialize the APIC for the IBS interrupts | ||
360 | * if needed on AMD Family10h rev B0 and later | ||
361 | */ | ||
362 | static void setup_ibs(void) | ||
363 | { | ||
364 | struct pci_dev *gh_device = NULL; | ||
365 | u32 low, high; | ||
366 | u8 vector; | ||
367 | |||
368 | ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); | ||
369 | |||
370 | if (!ibs_allowed) | ||
371 | return; | ||
372 | |||
373 | /* This gets the APIC_EILVT_LVTOFF_IBS value */ | ||
374 | vector = setup_APIC_eilvt_ibs(0, 0, 1); | ||
375 | |||
376 | /*see if the IBS control register is already set correctly*/ | ||
377 | /*remove this when we know for sure it is done | ||
378 | in the kernel init*/ | ||
379 | rdmsr(MSR_AMD64_IBSCTL, low, high); | ||
380 | if ((low & (IBS_CTL_LVT_OFFSET_VALID_BIT | vector)) != | ||
381 | (IBS_CTL_LVT_OFFSET_VALID_BIT | vector)) { | ||
382 | |||
383 | /**** Be sure to run loop until NULL is returned to | ||
384 | decrement reference count on any pci_dev structures | ||
385 | returned ****/ | ||
386 | while ((gh_device = pci_get_device(PCI_VENDOR_ID_AMD, | ||
387 | PCI_DEVICE_ID_AMD_10H_NB_MISC, gh_device)) | ||
388 | != NULL) { | ||
389 | /* This code may change if we can find a proper | ||
390 | * way to get at the PCI extended config space */ | ||
391 | pci_write_config_dword( | ||
392 | gh_device, IBS_LVT_OFFSET_PCI, | ||
393 | (vector | IBS_CTL_LVT_OFFSET_VALID_BIT)); | ||
394 | } | ||
395 | } | ||
396 | on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1, 1); | ||
397 | } | ||
398 | |||
399 | |||
400 | /* | ||
401 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h | ||
402 | * rev B0 and later */ | ||
403 | static void clear_ibs_nmi(void) | ||
404 | { | ||
405 | if (ibs_allowed) | ||
406 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1, 1); | ||
407 | } | ||
408 | |||
409 | static void setup_ibs_files(struct super_block *sb, struct dentry *root) | ||
410 | { | ||
411 | char buf[12]; | ||
412 | struct dentry *dir; | ||
413 | |||
414 | if (!ibs_allowed) | ||
415 | return; | ||
416 | |||
417 | /* setup some reasonable defaults */ | ||
418 | ibs_config.max_cnt_fetch = 250000; | ||
419 | ibs_config.fetch_enabled = 0; | ||
420 | ibs_config.max_cnt_op = 250000; | ||
421 | ibs_config.op_enabled = 0; | ||
422 | ibs_config.dispatched_ops = 1; | ||
423 | snprintf(buf, sizeof(buf), "ibs_fetch"); | ||
424 | dir = oprofilefs_mkdir(sb, root, buf); | ||
425 | oprofilefs_create_ulong(sb, dir, "rand_enable", | ||
426 | &ibs_config.rand_en); | ||
427 | oprofilefs_create_ulong(sb, dir, "enable", | ||
428 | &ibs_config.fetch_enabled); | ||
429 | oprofilefs_create_ulong(sb, dir, "max_count", | ||
430 | &ibs_config.max_cnt_fetch); | ||
431 | snprintf(buf, sizeof(buf), "ibs_uops"); | ||
432 | dir = oprofilefs_mkdir(sb, root, buf); | ||
433 | oprofilefs_create_ulong(sb, dir, "enable", | ||
434 | &ibs_config.op_enabled); | ||
435 | oprofilefs_create_ulong(sb, dir, "max_count", | ||
436 | &ibs_config.max_cnt_op); | ||
437 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | ||
438 | &ibs_config.dispatched_ops); | ||
439 | } | ||
440 | |||
184 | static int op_amd_init(struct oprofile_operations *ops) | 441 | static int op_amd_init(struct oprofile_operations *ops) |
185 | { | 442 | { |
186 | return 0; | 443 | return 0; |