diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 21:48:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 21:48:00 -0400 |
commit | a042e26137d7674ac04b1cd2d5c06b9ebc1ee2d5 (patch) | |
tree | c1a7a8bda41b99caa4b4a0fe320fc73278879f7d /arch/x86 | |
parent | f66dd539feb849a3a00f7fac67c026e0935e373a (diff) | |
parent | e25804a0327dad954f7d43803178fdef2fd35b4e (diff) |
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
perf python scripting: Add futex-contention script
perf python scripting: Fixup cut'n'paste error in sctop script
perf scripting: Shut up 'perf record' final status
perf record: Remove newline character from perror() argument
perf python scripting: Support fedora 11 (audit 1.7.17)
perf python scripting: Improve the syscalls-by-pid script
perf python scripting: print the syscall name on sctop
perf python scripting: Improve the syscalls-counts script
perf python scripting: Improve the failed-syscalls-by-pid script
kprobes: Remove redundant text_mutex lock in optimize
x86/oprofile: Fix uninitialized variable use in debug printk
tracing: Fix 'faild' -> 'failed' typo
perf probe: Fix format specified for Dwarf_Off parameter
perf trace: Fix detection of script extension
perf trace: Use $PERF_EXEC_PATH in canned report scripts
perf tools: Document event modifiers
perf tools: Remove direct slang.h include
perf_events: Fix for transaction recovery in group_sched_in()
perf_events: Revert: Fix transaction recovery in group_sched_in()
perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 19 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 216 | ||||
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 6 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_amd.c | 146 |
6 files changed, 271 insertions, 138 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83c4bb1d917d..3ea3dc487047 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -121,6 +121,7 @@ | |||
121 | #define MSR_AMD64_IBSDCLINAD 0xc0011038 | 121 | #define MSR_AMD64_IBSDCLINAD 0xc0011038 |
122 | #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 | 122 | #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 |
123 | #define MSR_AMD64_IBSCTL 0xc001103a | 123 | #define MSR_AMD64_IBSCTL 0xc001103a |
124 | #define MSR_AMD64_IBSBRTARGET 0xc001103b | ||
124 | 125 | ||
125 | /* Fam 10h MSRs */ | 126 | /* Fam 10h MSRs */ |
126 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 | 127 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6e742cc4251b..550e26b1dbb3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -111,17 +111,18 @@ union cpuid10_edx { | |||
111 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | 111 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) |
112 | 112 | ||
113 | /* IbsFetchCtl bits/masks */ | 113 | /* IbsFetchCtl bits/masks */ |
114 | #define IBS_FETCH_RAND_EN (1ULL<<57) | 114 | #define IBS_FETCH_RAND_EN (1ULL<<57) |
115 | #define IBS_FETCH_VAL (1ULL<<49) | 115 | #define IBS_FETCH_VAL (1ULL<<49) |
116 | #define IBS_FETCH_ENABLE (1ULL<<48) | 116 | #define IBS_FETCH_ENABLE (1ULL<<48) |
117 | #define IBS_FETCH_CNT 0xFFFF0000ULL | 117 | #define IBS_FETCH_CNT 0xFFFF0000ULL |
118 | #define IBS_FETCH_MAX_CNT 0x0000FFFFULL | 118 | #define IBS_FETCH_MAX_CNT 0x0000FFFFULL |
119 | 119 | ||
120 | /* IbsOpCtl bits */ | 120 | /* IbsOpCtl bits */ |
121 | #define IBS_OP_CNT_CTL (1ULL<<19) | 121 | #define IBS_OP_CNT_CTL (1ULL<<19) |
122 | #define IBS_OP_VAL (1ULL<<18) | 122 | #define IBS_OP_VAL (1ULL<<18) |
123 | #define IBS_OP_ENABLE (1ULL<<17) | 123 | #define IBS_OP_ENABLE (1ULL<<17) |
124 | #define IBS_OP_MAX_CNT 0x0000FFFFULL | 124 | #define IBS_OP_MAX_CNT 0x0000FFFFULL |
125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ | ||
125 | 126 | ||
126 | #ifdef CONFIG_PERF_EVENTS | 127 | #ifdef CONFIG_PERF_EVENTS |
127 | extern void init_hw_perf_events(void); | 128 | extern void init_hw_perf_events(void); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c1e8c7a51164..ed6310183efb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -237,6 +237,7 @@ struct x86_pmu { | |||
237 | * Intel DebugStore bits | 237 | * Intel DebugStore bits |
238 | */ | 238 | */ |
239 | int bts, pebs; | 239 | int bts, pebs; |
240 | int bts_active, pebs_active; | ||
240 | int pebs_record_size; | 241 | int pebs_record_size; |
241 | void (*drain_pebs)(struct pt_regs *regs); | 242 | void (*drain_pebs)(struct pt_regs *regs); |
242 | struct event_constraint *pebs_constraints; | 243 | struct event_constraint *pebs_constraints; |
@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {} | |||
380 | 381 | ||
381 | #endif | 382 | #endif |
382 | 383 | ||
383 | static int reserve_ds_buffers(void); | 384 | static void reserve_ds_buffers(void); |
384 | static void release_ds_buffers(void); | 385 | static void release_ds_buffers(void); |
385 | 386 | ||
386 | static void hw_perf_event_destroy(struct perf_event *event) | 387 | static void hw_perf_event_destroy(struct perf_event *event) |
@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
477 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 478 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
478 | (hwc->sample_period == 1)) { | 479 | (hwc->sample_period == 1)) { |
479 | /* BTS is not supported by this architecture. */ | 480 | /* BTS is not supported by this architecture. */ |
480 | if (!x86_pmu.bts) | 481 | if (!x86_pmu.bts_active) |
481 | return -EOPNOTSUPP; | 482 | return -EOPNOTSUPP; |
482 | 483 | ||
483 | /* BTS is currently only allowed for user-mode. */ | 484 | /* BTS is currently only allowed for user-mode. */ |
@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event) | |||
496 | int precise = 0; | 497 | int precise = 0; |
497 | 498 | ||
498 | /* Support for constant skid */ | 499 | /* Support for constant skid */ |
499 | if (x86_pmu.pebs) | 500 | if (x86_pmu.pebs_active) { |
500 | precise++; | 501 | precise++; |
501 | 502 | ||
502 | /* Support for IP fixup */ | 503 | /* Support for IP fixup */ |
503 | if (x86_pmu.lbr_nr) | 504 | if (x86_pmu.lbr_nr) |
504 | precise++; | 505 | precise++; |
506 | } | ||
505 | 507 | ||
506 | if (event->attr.precise_ip > precise) | 508 | if (event->attr.precise_ip > precise) |
507 | return -EOPNOTSUPP; | 509 | return -EOPNOTSUPP; |
@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
543 | if (atomic_read(&active_events) == 0) { | 545 | if (atomic_read(&active_events) == 0) { |
544 | if (!reserve_pmc_hardware()) | 546 | if (!reserve_pmc_hardware()) |
545 | err = -EBUSY; | 547 | err = -EBUSY; |
546 | else { | 548 | else |
547 | err = reserve_ds_buffers(); | 549 | reserve_ds_buffers(); |
548 | if (err) | ||
549 | release_pmc_hardware(); | ||
550 | } | ||
551 | } | 550 | } |
552 | if (!err) | 551 | if (!err) |
553 | atomic_inc(&active_events); | 552 | atomic_inc(&active_events); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 4977f9c400e5..b7dcd9f2b8a0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu) | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | 74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); |
75 | } | 75 | } |
76 | 76 | ||
77 | static int alloc_pebs_buffer(int cpu) | ||
78 | { | ||
79 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
80 | int node = cpu_to_node(cpu); | ||
81 | int max, thresh = 1; /* always use a single PEBS record */ | ||
82 | void *buffer; | ||
83 | |||
84 | if (!x86_pmu.pebs) | ||
85 | return 0; | ||
86 | |||
87 | buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); | ||
88 | if (unlikely(!buffer)) | ||
89 | return -ENOMEM; | ||
90 | |||
91 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
92 | |||
93 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
94 | ds->pebs_index = ds->pebs_buffer_base; | ||
95 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
96 | max * x86_pmu.pebs_record_size; | ||
97 | |||
98 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
99 | thresh * x86_pmu.pebs_record_size; | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void release_pebs_buffer(int cpu) | ||
105 | { | ||
106 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
107 | |||
108 | if (!ds || !x86_pmu.pebs) | ||
109 | return; | ||
110 | |||
111 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
112 | ds->pebs_buffer_base = 0; | ||
113 | } | ||
114 | |||
115 | static int alloc_bts_buffer(int cpu) | ||
116 | { | ||
117 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
118 | int node = cpu_to_node(cpu); | ||
119 | int max, thresh; | ||
120 | void *buffer; | ||
121 | |||
122 | if (!x86_pmu.bts) | ||
123 | return 0; | ||
124 | |||
125 | buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); | ||
126 | if (unlikely(!buffer)) | ||
127 | return -ENOMEM; | ||
128 | |||
129 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
130 | thresh = max / 16; | ||
131 | |||
132 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
133 | ds->bts_index = ds->bts_buffer_base; | ||
134 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
135 | max * BTS_RECORD_SIZE; | ||
136 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
137 | thresh * BTS_RECORD_SIZE; | ||
138 | |||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static void release_bts_buffer(int cpu) | ||
143 | { | ||
144 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
145 | |||
146 | if (!ds || !x86_pmu.bts) | ||
147 | return; | ||
148 | |||
149 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
150 | ds->bts_buffer_base = 0; | ||
151 | } | ||
152 | |||
153 | static int alloc_ds_buffer(int cpu) | ||
154 | { | ||
155 | int node = cpu_to_node(cpu); | ||
156 | struct debug_store *ds; | ||
157 | |||
158 | ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); | ||
159 | if (unlikely(!ds)) | ||
160 | return -ENOMEM; | ||
161 | |||
162 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void release_ds_buffer(int cpu) | ||
168 | { | ||
169 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
170 | |||
171 | if (!ds) | ||
172 | return; | ||
173 | |||
174 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
175 | kfree(ds); | ||
176 | } | ||
177 | |||
77 | static void release_ds_buffers(void) | 178 | static void release_ds_buffers(void) |
78 | { | 179 | { |
79 | int cpu; | 180 | int cpu; |
@@ -82,93 +183,77 @@ static void release_ds_buffers(void) | |||
82 | return; | 183 | return; |
83 | 184 | ||
84 | get_online_cpus(); | 185 | get_online_cpus(); |
85 | |||
86 | for_each_online_cpu(cpu) | 186 | for_each_online_cpu(cpu) |
87 | fini_debug_store_on_cpu(cpu); | 187 | fini_debug_store_on_cpu(cpu); |
88 | 188 | ||
89 | for_each_possible_cpu(cpu) { | 189 | for_each_possible_cpu(cpu) { |
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 190 | release_pebs_buffer(cpu); |
91 | 191 | release_bts_buffer(cpu); | |
92 | if (!ds) | 192 | release_ds_buffer(cpu); |
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | 193 | } |
101 | |||
102 | put_online_cpus(); | 194 | put_online_cpus(); |
103 | } | 195 | } |
104 | 196 | ||
105 | static int reserve_ds_buffers(void) | 197 | static void reserve_ds_buffers(void) |
106 | { | 198 | { |
107 | int cpu, err = 0; | 199 | int bts_err = 0, pebs_err = 0; |
200 | int cpu; | ||
201 | |||
202 | x86_pmu.bts_active = 0; | ||
203 | x86_pmu.pebs_active = 0; | ||
108 | 204 | ||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | 205 | if (!x86_pmu.bts && !x86_pmu.pebs) |
110 | return 0; | 206 | return; |
207 | |||
208 | if (!x86_pmu.bts) | ||
209 | bts_err = 1; | ||
210 | |||
211 | if (!x86_pmu.pebs) | ||
212 | pebs_err = 1; | ||
111 | 213 | ||
112 | get_online_cpus(); | 214 | get_online_cpus(); |
113 | 215 | ||
114 | for_each_possible_cpu(cpu) { | 216 | for_each_possible_cpu(cpu) { |
115 | struct debug_store *ds; | 217 | if (alloc_ds_buffer(cpu)) { |
116 | void *buffer; | 218 | bts_err = 1; |
117 | int max, thresh; | 219 | pebs_err = 1; |
220 | } | ||
221 | |||
222 | if (!bts_err && alloc_bts_buffer(cpu)) | ||
223 | bts_err = 1; | ||
118 | 224 | ||
119 | err = -ENOMEM; | 225 | if (!pebs_err && alloc_pebs_buffer(cpu)) |
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | 226 | pebs_err = 1; |
121 | if (unlikely(!ds)) | 227 | |
228 | if (bts_err && pebs_err) | ||
122 | break; | 229 | break; |
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | 230 | } |
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | 231 | ||
141 | if (x86_pmu.pebs) { | 232 | if (bts_err) { |
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | 233 | for_each_possible_cpu(cpu) |
143 | if (unlikely(!buffer)) | 234 | release_bts_buffer(cpu); |
144 | break; | 235 | } |
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | 236 | ||
159 | err = 0; | 237 | if (pebs_err) { |
238 | for_each_possible_cpu(cpu) | ||
239 | release_pebs_buffer(cpu); | ||
160 | } | 240 | } |
161 | 241 | ||
162 | if (err) | 242 | if (bts_err && pebs_err) { |
163 | release_ds_buffers(); | 243 | for_each_possible_cpu(cpu) |
164 | else { | 244 | release_ds_buffer(cpu); |
245 | } else { | ||
246 | if (x86_pmu.bts && !bts_err) | ||
247 | x86_pmu.bts_active = 1; | ||
248 | |||
249 | if (x86_pmu.pebs && !pebs_err) | ||
250 | x86_pmu.pebs_active = 1; | ||
251 | |||
165 | for_each_online_cpu(cpu) | 252 | for_each_online_cpu(cpu) |
166 | init_debug_store_on_cpu(cpu); | 253 | init_debug_store_on_cpu(cpu); |
167 | } | 254 | } |
168 | 255 | ||
169 | put_online_cpus(); | 256 | put_online_cpus(); |
170 | |||
171 | return err; | ||
172 | } | 257 | } |
173 | 258 | ||
174 | /* | 259 | /* |
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void) | |||
233 | if (!event) | 318 | if (!event) |
234 | return 0; | 319 | return 0; |
235 | 320 | ||
236 | if (!ds) | 321 | if (!x86_pmu.bts_active) |
237 | return 0; | 322 | return 0; |
238 | 323 | ||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | 324 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; |
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
503 | struct pebs_record_core *at, *top; | 588 | struct pebs_record_core *at, *top; |
504 | int n; | 589 | int n; |
505 | 590 | ||
506 | if (!ds || !x86_pmu.pebs) | 591 | if (!x86_pmu.pebs_active) |
507 | return; | 592 | return; |
508 | 593 | ||
509 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | 594 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; |
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
545 | u64 status = 0; | 630 | u64 status = 0; |
546 | int bit, n; | 631 | int bit, n; |
547 | 632 | ||
548 | if (!ds || !x86_pmu.pebs) | 633 | if (!x86_pmu.pebs_active) |
549 | return; | 634 | return; |
550 | 635 | ||
551 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | 636 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; |
@@ -630,9 +715,8 @@ static void intel_ds_init(void) | |||
630 | 715 | ||
631 | #else /* CONFIG_CPU_SUP_INTEL */ | 716 | #else /* CONFIG_CPU_SUP_INTEL */ |
632 | 717 | ||
633 | static int reserve_ds_buffers(void) | 718 | static void reserve_ds_buffers(void) |
634 | { | 719 | { |
635 | return 0; | ||
636 | } | 720 | } |
637 | 721 | ||
638 | static void release_ds_buffers(void) | 722 | static void release_ds_buffers(void) |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index bd1489c3ce09..4e8baad36d37 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
726 | case 0x11: | 726 | case 0x11: |
727 | cpu_type = "x86-64/family11h"; | 727 | cpu_type = "x86-64/family11h"; |
728 | break; | 728 | break; |
729 | case 0x12: | ||
730 | cpu_type = "x86-64/family12h"; | ||
731 | break; | ||
732 | case 0x14: | ||
733 | cpu_type = "x86-64/family14h"; | ||
734 | break; | ||
729 | default: | 735 | default: |
730 | return -ENODEV; | 736 | return -ENODEV; |
731 | } | 737 | } |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 42fb46f83883..a011bcc0f943 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; | |||
48 | 48 | ||
49 | static u32 ibs_caps; | 49 | static u32 ibs_caps; |
50 | 50 | ||
51 | struct op_ibs_config { | 51 | struct ibs_config { |
52 | unsigned long op_enabled; | 52 | unsigned long op_enabled; |
53 | unsigned long fetch_enabled; | 53 | unsigned long fetch_enabled; |
54 | unsigned long max_cnt_fetch; | 54 | unsigned long max_cnt_fetch; |
55 | unsigned long max_cnt_op; | 55 | unsigned long max_cnt_op; |
56 | unsigned long rand_en; | 56 | unsigned long rand_en; |
57 | unsigned long dispatched_ops; | 57 | unsigned long dispatched_ops; |
58 | unsigned long branch_target; | ||
58 | }; | 59 | }; |
59 | 60 | ||
60 | static struct op_ibs_config ibs_config; | 61 | struct ibs_state { |
61 | static u64 ibs_op_ctl; | 62 | u64 ibs_op_ctl; |
63 | int branch_target; | ||
64 | unsigned long sample_size; | ||
65 | }; | ||
66 | |||
67 | static struct ibs_config ibs_config; | ||
68 | static struct ibs_state ibs_state; | ||
62 | 69 | ||
63 | /* | 70 | /* |
64 | * IBS cpuid feature detection | 71 | * IBS cpuid feature detection |
@@ -71,8 +78,16 @@ static u64 ibs_op_ctl; | |||
71 | * bit 0 is used to indicate the existence of IBS. | 78 | * bit 0 is used to indicate the existence of IBS. |
72 | */ | 79 | */ |
73 | #define IBS_CAPS_AVAIL (1U<<0) | 80 | #define IBS_CAPS_AVAIL (1U<<0) |
81 | #define IBS_CAPS_FETCHSAM (1U<<1) | ||
82 | #define IBS_CAPS_OPSAM (1U<<2) | ||
74 | #define IBS_CAPS_RDWROPCNT (1U<<3) | 83 | #define IBS_CAPS_RDWROPCNT (1U<<3) |
75 | #define IBS_CAPS_OPCNT (1U<<4) | 84 | #define IBS_CAPS_OPCNT (1U<<4) |
85 | #define IBS_CAPS_BRNTRGT (1U<<5) | ||
86 | #define IBS_CAPS_OPCNTEXT (1U<<6) | ||
87 | |||
88 | #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | ||
89 | | IBS_CAPS_FETCHSAM \ | ||
90 | | IBS_CAPS_OPSAM) | ||
76 | 91 | ||
77 | /* | 92 | /* |
78 | * IBS APIC setup | 93 | * IBS APIC setup |
@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void) | |||
99 | /* check IBS cpuid feature flags */ | 114 | /* check IBS cpuid feature flags */ |
100 | max_level = cpuid_eax(0x80000000); | 115 | max_level = cpuid_eax(0x80000000); |
101 | if (max_level < IBS_CPUID_FEATURES) | 116 | if (max_level < IBS_CPUID_FEATURES) |
102 | return IBS_CAPS_AVAIL; | 117 | return IBS_CAPS_DEFAULT; |
103 | 118 | ||
104 | ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); | 119 | ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); |
105 | if (!(ibs_caps & IBS_CAPS_AVAIL)) | 120 | if (!(ibs_caps & IBS_CAPS_AVAIL)) |
106 | /* cpuid flags not valid */ | 121 | /* cpuid flags not valid */ |
107 | return IBS_CAPS_AVAIL; | 122 | return IBS_CAPS_DEFAULT; |
108 | 123 | ||
109 | return ibs_caps; | 124 | return ibs_caps; |
110 | } | 125 | } |
@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
197 | rdmsrl(MSR_AMD64_IBSOPCTL, ctl); | 212 | rdmsrl(MSR_AMD64_IBSOPCTL, ctl); |
198 | if (ctl & IBS_OP_VAL) { | 213 | if (ctl & IBS_OP_VAL) { |
199 | rdmsrl(MSR_AMD64_IBSOPRIP, val); | 214 | rdmsrl(MSR_AMD64_IBSOPRIP, val); |
200 | oprofile_write_reserve(&entry, regs, val, | 215 | oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, |
201 | IBS_OP_CODE, IBS_OP_SIZE); | 216 | ibs_state.sample_size); |
202 | oprofile_add_data64(&entry, val); | 217 | oprofile_add_data64(&entry, val); |
203 | rdmsrl(MSR_AMD64_IBSOPDATA, val); | 218 | rdmsrl(MSR_AMD64_IBSOPDATA, val); |
204 | oprofile_add_data64(&entry, val); | 219 | oprofile_add_data64(&entry, val); |
@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
210 | oprofile_add_data64(&entry, val); | 225 | oprofile_add_data64(&entry, val); |
211 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); | 226 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); |
212 | oprofile_add_data64(&entry, val); | 227 | oprofile_add_data64(&entry, val); |
228 | if (ibs_state.branch_target) { | ||
229 | rdmsrl(MSR_AMD64_IBSBRTARGET, val); | ||
230 | oprofile_add_data(&entry, (unsigned long)val); | ||
231 | } | ||
213 | oprofile_write_commit(&entry); | 232 | oprofile_write_commit(&entry); |
214 | 233 | ||
215 | /* reenable the IRQ */ | 234 | /* reenable the IRQ */ |
216 | ctl = op_amd_randomize_ibs_op(ibs_op_ctl); | 235 | ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); |
217 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); | 236 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); |
218 | } | 237 | } |
219 | } | 238 | } |
@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void) | |||
226 | if (!ibs_caps) | 245 | if (!ibs_caps) |
227 | return; | 246 | return; |
228 | 247 | ||
248 | memset(&ibs_state, 0, sizeof(ibs_state)); | ||
249 | |||
250 | /* | ||
251 | * Note: Since the max count settings may out of range we | ||
252 | * write back the actual used values so that userland can read | ||
253 | * it. | ||
254 | */ | ||
255 | |||
229 | if (ibs_config.fetch_enabled) { | 256 | if (ibs_config.fetch_enabled) { |
230 | val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; | 257 | val = ibs_config.max_cnt_fetch >> 4; |
258 | val = min(val, IBS_FETCH_MAX_CNT); | ||
259 | ibs_config.max_cnt_fetch = val << 4; | ||
231 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; | 260 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; |
232 | val |= IBS_FETCH_ENABLE; | 261 | val |= IBS_FETCH_ENABLE; |
233 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); | 262 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); |
234 | } | 263 | } |
235 | 264 | ||
236 | if (ibs_config.op_enabled) { | 265 | if (ibs_config.op_enabled) { |
237 | ibs_op_ctl = ibs_config.max_cnt_op >> 4; | 266 | val = ibs_config.max_cnt_op >> 4; |
238 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { | 267 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { |
239 | /* | 268 | /* |
240 | * IbsOpCurCnt not supported. See | 269 | * IbsOpCurCnt not supported. See |
241 | * op_amd_randomize_ibs_op() for details. | 270 | * op_amd_randomize_ibs_op() for details. |
242 | */ | 271 | */ |
243 | ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); | 272 | val = clamp(val, 0x0081ULL, 0xFF80ULL); |
273 | ibs_config.max_cnt_op = val << 4; | ||
244 | } else { | 274 | } else { |
245 | /* | 275 | /* |
246 | * The start value is randomized with a | 276 | * The start value is randomized with a |
@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void) | |||
248 | * with the half of the randomized range. Also | 278 | * with the half of the randomized range. Also |
249 | * avoid underflows. | 279 | * avoid underflows. |
250 | */ | 280 | */ |
251 | ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, | 281 | val += IBS_RANDOM_MAXCNT_OFFSET; |
252 | IBS_OP_MAX_CNT); | 282 | if (ibs_caps & IBS_CAPS_OPCNTEXT) |
283 | val = min(val, IBS_OP_MAX_CNT_EXT); | ||
284 | else | ||
285 | val = min(val, IBS_OP_MAX_CNT); | ||
286 | ibs_config.max_cnt_op = | ||
287 | (val - IBS_RANDOM_MAXCNT_OFFSET) << 4; | ||
288 | } | ||
289 | val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT); | ||
290 | val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; | ||
291 | val |= IBS_OP_ENABLE; | ||
292 | ibs_state.ibs_op_ctl = val; | ||
293 | ibs_state.sample_size = IBS_OP_SIZE; | ||
294 | if (ibs_config.branch_target) { | ||
295 | ibs_state.branch_target = 1; | ||
296 | ibs_state.sample_size++; | ||
253 | } | 297 | } |
254 | if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) | 298 | val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); |
255 | ibs_op_ctl |= IBS_OP_CNT_CTL; | ||
256 | ibs_op_ctl |= IBS_OP_ENABLE; | ||
257 | val = op_amd_randomize_ibs_op(ibs_op_ctl); | ||
258 | wrmsrl(MSR_AMD64_IBSOPCTL, val); | 299 | wrmsrl(MSR_AMD64_IBSOPCTL, val); |
259 | } | 300 | } |
260 | } | 301 | } |
@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset) | |||
281 | 322 | ||
282 | static inline int ibs_eilvt_valid(void) | 323 | static inline int ibs_eilvt_valid(void) |
283 | { | 324 | { |
284 | u64 val; | ||
285 | int offset; | 325 | int offset; |
326 | u64 val; | ||
286 | 327 | ||
287 | rdmsrl(MSR_AMD64_IBSCTL, val); | 328 | rdmsrl(MSR_AMD64_IBSCTL, val); |
329 | offset = val & IBSCTL_LVT_OFFSET_MASK; | ||
330 | |||
288 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | 331 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { |
289 | pr_err(FW_BUG "cpu %d, invalid IBS " | 332 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", |
290 | "interrupt offset %d (MSR%08X=0x%016llx)", | 333 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); |
291 | smp_processor_id(), offset, | ||
292 | MSR_AMD64_IBSCTL, val); | ||
293 | return 0; | 334 | return 0; |
294 | } | 335 | } |
295 | 336 | ||
296 | offset = val & IBSCTL_LVT_OFFSET_MASK; | 337 | if (!eilvt_is_available(offset)) { |
297 | 338 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | |
298 | if (eilvt_is_available(offset)) | 339 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); |
299 | return !0; | 340 | return 0; |
300 | 341 | } | |
301 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d " | ||
302 | "not available (MSR%08X=0x%016llx)", | ||
303 | smp_processor_id(), offset, | ||
304 | MSR_AMD64_IBSCTL, val); | ||
305 | 342 | ||
306 | return 0; | 343 | return 1; |
307 | } | 344 | } |
308 | 345 | ||
309 | static inline int get_ibs_offset(void) | 346 | static inline int get_ibs_offset(void) |
@@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
630 | /* model specific files */ | 667 | /* model specific files */ |
631 | 668 | ||
632 | /* setup some reasonable defaults */ | 669 | /* setup some reasonable defaults */ |
670 | memset(&ibs_config, 0, sizeof(ibs_config)); | ||
633 | ibs_config.max_cnt_fetch = 250000; | 671 | ibs_config.max_cnt_fetch = 250000; |
634 | ibs_config.fetch_enabled = 0; | ||
635 | ibs_config.max_cnt_op = 250000; | 672 | ibs_config.max_cnt_op = 250000; |
636 | ibs_config.op_enabled = 0; | 673 | |
637 | ibs_config.dispatched_ops = 0; | 674 | if (ibs_caps & IBS_CAPS_FETCHSAM) { |
638 | 675 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | |
639 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | 676 | oprofilefs_create_ulong(sb, dir, "enable", |
640 | oprofilefs_create_ulong(sb, dir, "enable", | 677 | &ibs_config.fetch_enabled); |
641 | &ibs_config.fetch_enabled); | 678 | oprofilefs_create_ulong(sb, dir, "max_count", |
642 | oprofilefs_create_ulong(sb, dir, "max_count", | 679 | &ibs_config.max_cnt_fetch); |
643 | &ibs_config.max_cnt_fetch); | 680 | oprofilefs_create_ulong(sb, dir, "rand_enable", |
644 | oprofilefs_create_ulong(sb, dir, "rand_enable", | 681 | &ibs_config.rand_en); |
645 | &ibs_config.rand_en); | 682 | } |
646 | 683 | ||
647 | dir = oprofilefs_mkdir(sb, root, "ibs_op"); | 684 | if (ibs_caps & IBS_CAPS_OPSAM) { |
648 | oprofilefs_create_ulong(sb, dir, "enable", | 685 | dir = oprofilefs_mkdir(sb, root, "ibs_op"); |
649 | &ibs_config.op_enabled); | 686 | oprofilefs_create_ulong(sb, dir, "enable", |
650 | oprofilefs_create_ulong(sb, dir, "max_count", | 687 | &ibs_config.op_enabled); |
651 | &ibs_config.max_cnt_op); | 688 | oprofilefs_create_ulong(sb, dir, "max_count", |
652 | if (ibs_caps & IBS_CAPS_OPCNT) | 689 | &ibs_config.max_cnt_op); |
653 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 690 | if (ibs_caps & IBS_CAPS_OPCNT) |
654 | &ibs_config.dispatched_ops); | 691 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", |
692 | &ibs_config.dispatched_ops); | ||
693 | if (ibs_caps & IBS_CAPS_BRNTRGT) | ||
694 | oprofilefs_create_ulong(sb, dir, "branch_target", | ||
695 | &ibs_config.branch_target); | ||
696 | } | ||
655 | 697 | ||
656 | return 0; | 698 | return 0; |
657 | } | 699 | } |