aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:48:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:48:00 -0400
commita042e26137d7674ac04b1cd2d5c06b9ebc1ee2d5 (patch)
treec1a7a8bda41b99caa4b4a0fe320fc73278879f7d /arch/x86
parentf66dd539feb849a3a00f7fac67c026e0935e373a (diff)
parente25804a0327dad954f7d43803178fdef2fd35b4e (diff)
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits) perf python scripting: Add futex-contention script perf python scripting: Fixup cut'n'paste error in sctop script perf scripting: Shut up 'perf record' final status perf record: Remove newline character from perror() argument perf python scripting: Support fedora 11 (audit 1.7.17) perf python scripting: Improve the syscalls-by-pid script perf python scripting: print the syscall name on sctop perf python scripting: Improve the syscalls-counts script perf python scripting: Improve the failed-syscalls-by-pid script kprobes: Remove redundant text_mutex lock in optimize x86/oprofile: Fix uninitialized variable use in debug printk tracing: Fix 'faild' -> 'failed' typo perf probe: Fix format specified for Dwarf_Off parameter perf trace: Fix detection of script extension perf trace: Use $PERF_EXEC_PATH in canned report scripts perf tools: Document event modifiers perf tools: Remove direct slang.h include perf_events: Fix for transaction recovery in group_sched_in() perf_events: Revert: Fix transaction recovery in group_sched_in() perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/perf_event.h19
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c216
-rw-r--r--arch/x86/oprofile/nmi_int.c6
-rw-r--r--arch/x86/oprofile/op_model_amd.c146
6 files changed, 271 insertions, 138 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83c4bb1d917d..3ea3dc487047 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,7 @@
121#define MSR_AMD64_IBSDCLINAD 0xc0011038 121#define MSR_AMD64_IBSDCLINAD 0xc0011038
122#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 122#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
123#define MSR_AMD64_IBSCTL 0xc001103a 123#define MSR_AMD64_IBSCTL 0xc001103a
124#define MSR_AMD64_IBSBRTARGET 0xc001103b
124 125
125/* Fam 10h MSRs */ 126/* Fam 10h MSRs */
126#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 127#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 6e742cc4251b..550e26b1dbb3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -111,17 +111,18 @@ union cpuid10_edx {
111#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) 111#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
112 112
113/* IbsFetchCtl bits/masks */ 113/* IbsFetchCtl bits/masks */
114#define IBS_FETCH_RAND_EN (1ULL<<57) 114#define IBS_FETCH_RAND_EN (1ULL<<57)
115#define IBS_FETCH_VAL (1ULL<<49) 115#define IBS_FETCH_VAL (1ULL<<49)
116#define IBS_FETCH_ENABLE (1ULL<<48) 116#define IBS_FETCH_ENABLE (1ULL<<48)
117#define IBS_FETCH_CNT 0xFFFF0000ULL 117#define IBS_FETCH_CNT 0xFFFF0000ULL
118#define IBS_FETCH_MAX_CNT 0x0000FFFFULL 118#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
119 119
120/* IbsOpCtl bits */ 120/* IbsOpCtl bits */
121#define IBS_OP_CNT_CTL (1ULL<<19) 121#define IBS_OP_CNT_CTL (1ULL<<19)
122#define IBS_OP_VAL (1ULL<<18) 122#define IBS_OP_VAL (1ULL<<18)
123#define IBS_OP_ENABLE (1ULL<<17) 123#define IBS_OP_ENABLE (1ULL<<17)
124#define IBS_OP_MAX_CNT 0x0000FFFFULL 124#define IBS_OP_MAX_CNT 0x0000FFFFULL
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
125 126
126#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
127extern void init_hw_perf_events(void); 128extern void init_hw_perf_events(void);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c1e8c7a51164..ed6310183efb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -237,6 +237,7 @@ struct x86_pmu {
237 * Intel DebugStore bits 237 * Intel DebugStore bits
238 */ 238 */
239 int bts, pebs; 239 int bts, pebs;
240 int bts_active, pebs_active;
240 int pebs_record_size; 241 int pebs_record_size;
241 void (*drain_pebs)(struct pt_regs *regs); 242 void (*drain_pebs)(struct pt_regs *regs);
242 struct event_constraint *pebs_constraints; 243 struct event_constraint *pebs_constraints;
@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}
380 381
381#endif 382#endif
382 383
383static int reserve_ds_buffers(void); 384static void reserve_ds_buffers(void);
384static void release_ds_buffers(void); 385static void release_ds_buffers(void);
385 386
386static void hw_perf_event_destroy(struct perf_event *event) 387static void hw_perf_event_destroy(struct perf_event *event)
@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
477 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
478 (hwc->sample_period == 1)) { 479 (hwc->sample_period == 1)) {
479 /* BTS is not supported by this architecture. */ 480 /* BTS is not supported by this architecture. */
480 if (!x86_pmu.bts) 481 if (!x86_pmu.bts_active)
481 return -EOPNOTSUPP; 482 return -EOPNOTSUPP;
482 483
483 /* BTS is currently only allowed for user-mode. */ 484 /* BTS is currently only allowed for user-mode. */
@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
496 int precise = 0; 497 int precise = 0;
497 498
498 /* Support for constant skid */ 499 /* Support for constant skid */
499 if (x86_pmu.pebs) 500 if (x86_pmu.pebs_active) {
500 precise++; 501 precise++;
501 502
502 /* Support for IP fixup */ 503 /* Support for IP fixup */
503 if (x86_pmu.lbr_nr) 504 if (x86_pmu.lbr_nr)
504 precise++; 505 precise++;
506 }
505 507
506 if (event->attr.precise_ip > precise) 508 if (event->attr.precise_ip > precise)
507 return -EOPNOTSUPP; 509 return -EOPNOTSUPP;
@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
543 if (atomic_read(&active_events) == 0) { 545 if (atomic_read(&active_events) == 0) {
544 if (!reserve_pmc_hardware()) 546 if (!reserve_pmc_hardware())
545 err = -EBUSY; 547 err = -EBUSY;
546 else { 548 else
547 err = reserve_ds_buffers(); 549 reserve_ds_buffers();
548 if (err)
549 release_pmc_hardware();
550 }
551 } 550 }
552 if (!err) 551 if (!err)
553 atomic_inc(&active_events); 552 atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75} 75}
76 76
77static int alloc_pebs_buffer(int cpu)
78{
79 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
80 int node = cpu_to_node(cpu);
81 int max, thresh = 1; /* always use a single PEBS record */
82 void *buffer;
83
84 if (!x86_pmu.pebs)
85 return 0;
86
87 buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
88 if (unlikely(!buffer))
89 return -ENOMEM;
90
91 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
92
93 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
94 ds->pebs_index = ds->pebs_buffer_base;
95 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
96 max * x86_pmu.pebs_record_size;
97
98 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
99 thresh * x86_pmu.pebs_record_size;
100
101 return 0;
102}
103
104static void release_pebs_buffer(int cpu)
105{
106 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
107
108 if (!ds || !x86_pmu.pebs)
109 return;
110
111 kfree((void *)(unsigned long)ds->pebs_buffer_base);
112 ds->pebs_buffer_base = 0;
113}
114
115static int alloc_bts_buffer(int cpu)
116{
117 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
118 int node = cpu_to_node(cpu);
119 int max, thresh;
120 void *buffer;
121
122 if (!x86_pmu.bts)
123 return 0;
124
125 buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
126 if (unlikely(!buffer))
127 return -ENOMEM;
128
129 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
130 thresh = max / 16;
131
132 ds->bts_buffer_base = (u64)(unsigned long)buffer;
133 ds->bts_index = ds->bts_buffer_base;
134 ds->bts_absolute_maximum = ds->bts_buffer_base +
135 max * BTS_RECORD_SIZE;
136 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
137 thresh * BTS_RECORD_SIZE;
138
139 return 0;
140}
141
142static void release_bts_buffer(int cpu)
143{
144 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
145
146 if (!ds || !x86_pmu.bts)
147 return;
148
149 kfree((void *)(unsigned long)ds->bts_buffer_base);
150 ds->bts_buffer_base = 0;
151}
152
153static int alloc_ds_buffer(int cpu)
154{
155 int node = cpu_to_node(cpu);
156 struct debug_store *ds;
157
158 ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
159 if (unlikely(!ds))
160 return -ENOMEM;
161
162 per_cpu(cpu_hw_events, cpu).ds = ds;
163
164 return 0;
165}
166
167static void release_ds_buffer(int cpu)
168{
169 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170
171 if (!ds)
172 return;
173
174 per_cpu(cpu_hw_events, cpu).ds = NULL;
175 kfree(ds);
176}
177
77static void release_ds_buffers(void) 178static void release_ds_buffers(void)
78{ 179{
79 int cpu; 180 int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
82 return; 183 return;
83 184
84 get_online_cpus(); 185 get_online_cpus();
85
86 for_each_online_cpu(cpu) 186 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu); 187 fini_debug_store_on_cpu(cpu);
88 188
89 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 190 release_pebs_buffer(cpu);
91 191 release_bts_buffer(cpu);
92 if (!ds) 192 release_ds_buffer(cpu);
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 } 193 }
101
102 put_online_cpus(); 194 put_online_cpus();
103} 195}
104 196
105static int reserve_ds_buffers(void) 197static void reserve_ds_buffers(void)
106{ 198{
107 int cpu, err = 0; 199 int bts_err = 0, pebs_err = 0;
200 int cpu;
201
202 x86_pmu.bts_active = 0;
203 x86_pmu.pebs_active = 0;
108 204
109 if (!x86_pmu.bts && !x86_pmu.pebs) 205 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0; 206 return;
207
208 if (!x86_pmu.bts)
209 bts_err = 1;
210
211 if (!x86_pmu.pebs)
212 pebs_err = 1;
111 213
112 get_online_cpus(); 214 get_online_cpus();
113 215
114 for_each_possible_cpu(cpu) { 216 for_each_possible_cpu(cpu) {
115 struct debug_store *ds; 217 if (alloc_ds_buffer(cpu)) {
116 void *buffer; 218 bts_err = 1;
117 int max, thresh; 219 pebs_err = 1;
220 }
221
222 if (!bts_err && alloc_bts_buffer(cpu))
223 bts_err = 1;
118 224
119 err = -ENOMEM; 225 if (!pebs_err && alloc_pebs_buffer(cpu))
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL); 226 pebs_err = 1;
121 if (unlikely(!ds)) 227
228 if (bts_err && pebs_err)
122 break; 229 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds; 230 }
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140 231
141 if (x86_pmu.pebs) { 232 if (bts_err) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); 233 for_each_possible_cpu(cpu)
143 if (unlikely(!buffer)) 234 release_bts_buffer(cpu);
144 break; 235 }
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158 236
159 err = 0; 237 if (pebs_err) {
238 for_each_possible_cpu(cpu)
239 release_pebs_buffer(cpu);
160 } 240 }
161 241
162 if (err) 242 if (bts_err && pebs_err) {
163 release_ds_buffers(); 243 for_each_possible_cpu(cpu)
164 else { 244 release_ds_buffer(cpu);
245 } else {
246 if (x86_pmu.bts && !bts_err)
247 x86_pmu.bts_active = 1;
248
249 if (x86_pmu.pebs && !pebs_err)
250 x86_pmu.pebs_active = 1;
251
165 for_each_online_cpu(cpu) 252 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu); 253 init_debug_store_on_cpu(cpu);
167 } 254 }
168 255
169 put_online_cpus(); 256 put_online_cpus();
170
171 return err;
172} 257}
173 258
174/* 259/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
233 if (!event) 318 if (!event)
234 return 0; 319 return 0;
235 320
236 if (!ds) 321 if (!x86_pmu.bts_active)
237 return 0; 322 return 0;
238 323
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 324 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
503 struct pebs_record_core *at, *top; 588 struct pebs_record_core *at, *top;
504 int n; 589 int n;
505 590
506 if (!ds || !x86_pmu.pebs) 591 if (!x86_pmu.pebs_active)
507 return; 592 return;
508 593
509 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 594 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
545 u64 status = 0; 630 u64 status = 0;
546 int bit, n; 631 int bit, n;
547 632
548 if (!ds || !x86_pmu.pebs) 633 if (!x86_pmu.pebs_active)
549 return; 634 return;
550 635
551 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 636 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
630 715
631#else /* CONFIG_CPU_SUP_INTEL */ 716#else /* CONFIG_CPU_SUP_INTEL */
632 717
633static int reserve_ds_buffers(void) 718static void reserve_ds_buffers(void)
634{ 719{
635 return 0;
636} 720}
637 721
638static void release_ds_buffers(void) 722static void release_ds_buffers(void)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index bd1489c3ce09..4e8baad36d37 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
726 case 0x11: 726 case 0x11:
727 cpu_type = "x86-64/family11h"; 727 cpu_type = "x86-64/family11h";
728 break; 728 break;
729 case 0x12:
730 cpu_type = "x86-64/family12h";
731 break;
732 case 0x14:
733 cpu_type = "x86-64/family14h";
734 break;
729 default: 735 default:
730 return -ENODEV; 736 return -ENODEV;
731 } 737 }
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 42fb46f83883..a011bcc0f943 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
48 48
49static u32 ibs_caps; 49static u32 ibs_caps;
50 50
51struct op_ibs_config { 51struct ibs_config {
52 unsigned long op_enabled; 52 unsigned long op_enabled;
53 unsigned long fetch_enabled; 53 unsigned long fetch_enabled;
54 unsigned long max_cnt_fetch; 54 unsigned long max_cnt_fetch;
55 unsigned long max_cnt_op; 55 unsigned long max_cnt_op;
56 unsigned long rand_en; 56 unsigned long rand_en;
57 unsigned long dispatched_ops; 57 unsigned long dispatched_ops;
58 unsigned long branch_target;
58}; 59};
59 60
60static struct op_ibs_config ibs_config; 61struct ibs_state {
61static u64 ibs_op_ctl; 62 u64 ibs_op_ctl;
63 int branch_target;
64 unsigned long sample_size;
65};
66
67static struct ibs_config ibs_config;
68static struct ibs_state ibs_state;
62 69
63/* 70/*
64 * IBS cpuid feature detection 71 * IBS cpuid feature detection
@@ -71,8 +78,16 @@ static u64 ibs_op_ctl;
71 * bit 0 is used to indicate the existence of IBS. 78 * bit 0 is used to indicate the existence of IBS.
72 */ 79 */
73#define IBS_CAPS_AVAIL (1U<<0) 80#define IBS_CAPS_AVAIL (1U<<0)
81#define IBS_CAPS_FETCHSAM (1U<<1)
82#define IBS_CAPS_OPSAM (1U<<2)
74#define IBS_CAPS_RDWROPCNT (1U<<3) 83#define IBS_CAPS_RDWROPCNT (1U<<3)
75#define IBS_CAPS_OPCNT (1U<<4) 84#define IBS_CAPS_OPCNT (1U<<4)
85#define IBS_CAPS_BRNTRGT (1U<<5)
86#define IBS_CAPS_OPCNTEXT (1U<<6)
87
88#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
89 | IBS_CAPS_FETCHSAM \
90 | IBS_CAPS_OPSAM)
76 91
77/* 92/*
78 * IBS APIC setup 93 * IBS APIC setup
@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)
99 /* check IBS cpuid feature flags */ 114 /* check IBS cpuid feature flags */
100 max_level = cpuid_eax(0x80000000); 115 max_level = cpuid_eax(0x80000000);
101 if (max_level < IBS_CPUID_FEATURES) 116 if (max_level < IBS_CPUID_FEATURES)
102 return IBS_CAPS_AVAIL; 117 return IBS_CAPS_DEFAULT;
103 118
104 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); 119 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
105 if (!(ibs_caps & IBS_CAPS_AVAIL)) 120 if (!(ibs_caps & IBS_CAPS_AVAIL))
106 /* cpuid flags not valid */ 121 /* cpuid flags not valid */
107 return IBS_CAPS_AVAIL; 122 return IBS_CAPS_DEFAULT;
108 123
109 return ibs_caps; 124 return ibs_caps;
110} 125}
@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
197 rdmsrl(MSR_AMD64_IBSOPCTL, ctl); 212 rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
198 if (ctl & IBS_OP_VAL) { 213 if (ctl & IBS_OP_VAL) {
199 rdmsrl(MSR_AMD64_IBSOPRIP, val); 214 rdmsrl(MSR_AMD64_IBSOPRIP, val);
200 oprofile_write_reserve(&entry, regs, val, 215 oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
201 IBS_OP_CODE, IBS_OP_SIZE); 216 ibs_state.sample_size);
202 oprofile_add_data64(&entry, val); 217 oprofile_add_data64(&entry, val);
203 rdmsrl(MSR_AMD64_IBSOPDATA, val); 218 rdmsrl(MSR_AMD64_IBSOPDATA, val);
204 oprofile_add_data64(&entry, val); 219 oprofile_add_data64(&entry, val);
@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
210 oprofile_add_data64(&entry, val); 225 oprofile_add_data64(&entry, val);
211 rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); 226 rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
212 oprofile_add_data64(&entry, val); 227 oprofile_add_data64(&entry, val);
228 if (ibs_state.branch_target) {
229 rdmsrl(MSR_AMD64_IBSBRTARGET, val);
230 oprofile_add_data(&entry, (unsigned long)val);
231 }
213 oprofile_write_commit(&entry); 232 oprofile_write_commit(&entry);
214 233
215 /* reenable the IRQ */ 234 /* reenable the IRQ */
216 ctl = op_amd_randomize_ibs_op(ibs_op_ctl); 235 ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
217 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 236 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
218 } 237 }
219 } 238 }
@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)
226 if (!ibs_caps) 245 if (!ibs_caps)
227 return; 246 return;
228 247
248 memset(&ibs_state, 0, sizeof(ibs_state));
249
250 /*
251 * Note: Since the max count settings may out of range we
252 * write back the actual used values so that userland can read
253 * it.
254 */
255
229 if (ibs_config.fetch_enabled) { 256 if (ibs_config.fetch_enabled) {
230 val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; 257 val = ibs_config.max_cnt_fetch >> 4;
258 val = min(val, IBS_FETCH_MAX_CNT);
259 ibs_config.max_cnt_fetch = val << 4;
231 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 260 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
232 val |= IBS_FETCH_ENABLE; 261 val |= IBS_FETCH_ENABLE;
233 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 262 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
234 } 263 }
235 264
236 if (ibs_config.op_enabled) { 265 if (ibs_config.op_enabled) {
237 ibs_op_ctl = ibs_config.max_cnt_op >> 4; 266 val = ibs_config.max_cnt_op >> 4;
238 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { 267 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
239 /* 268 /*
240 * IbsOpCurCnt not supported. See 269 * IbsOpCurCnt not supported. See
241 * op_amd_randomize_ibs_op() for details. 270 * op_amd_randomize_ibs_op() for details.
242 */ 271 */
243 ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); 272 val = clamp(val, 0x0081ULL, 0xFF80ULL);
273 ibs_config.max_cnt_op = val << 4;
244 } else { 274 } else {
245 /* 275 /*
246 * The start value is randomized with a 276 * The start value is randomized with a
@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)
248 * with the half of the randomized range. Also 278 * with the half of the randomized range. Also
249 * avoid underflows. 279 * avoid underflows.
250 */ 280 */
251 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, 281 val += IBS_RANDOM_MAXCNT_OFFSET;
252 IBS_OP_MAX_CNT); 282 if (ibs_caps & IBS_CAPS_OPCNTEXT)
283 val = min(val, IBS_OP_MAX_CNT_EXT);
284 else
285 val = min(val, IBS_OP_MAX_CNT);
286 ibs_config.max_cnt_op =
287 (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
288 }
289 val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
290 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
291 val |= IBS_OP_ENABLE;
292 ibs_state.ibs_op_ctl = val;
293 ibs_state.sample_size = IBS_OP_SIZE;
294 if (ibs_config.branch_target) {
295 ibs_state.branch_target = 1;
296 ibs_state.sample_size++;
253 } 297 }
254 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) 298 val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
255 ibs_op_ctl |= IBS_OP_CNT_CTL;
256 ibs_op_ctl |= IBS_OP_ENABLE;
257 val = op_amd_randomize_ibs_op(ibs_op_ctl);
258 wrmsrl(MSR_AMD64_IBSOPCTL, val); 299 wrmsrl(MSR_AMD64_IBSOPCTL, val);
259 } 300 }
260} 301}
@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)
281 322
282static inline int ibs_eilvt_valid(void) 323static inline int ibs_eilvt_valid(void)
283{ 324{
284 u64 val;
285 int offset; 325 int offset;
326 u64 val;
286 327
287 rdmsrl(MSR_AMD64_IBSCTL, val); 328 rdmsrl(MSR_AMD64_IBSCTL, val);
329 offset = val & IBSCTL_LVT_OFFSET_MASK;
330
288 if (!(val & IBSCTL_LVT_OFFSET_VALID)) { 331 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
289 pr_err(FW_BUG "cpu %d, invalid IBS " 332 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
290 "interrupt offset %d (MSR%08X=0x%016llx)", 333 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
291 smp_processor_id(), offset,
292 MSR_AMD64_IBSCTL, val);
293 return 0; 334 return 0;
294 } 335 }
295 336
296 offset = val & IBSCTL_LVT_OFFSET_MASK; 337 if (!eilvt_is_available(offset)) {
297 338 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
298 if (eilvt_is_available(offset)) 339 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
299 return !0; 340 return 0;
300 341 }
301 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
302 "not available (MSR%08X=0x%016llx)",
303 smp_processor_id(), offset,
304 MSR_AMD64_IBSCTL, val);
305 342
306 return 0; 343 return 1;
307} 344}
308 345
309static inline int get_ibs_offset(void) 346static inline int get_ibs_offset(void)
@@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
630 /* model specific files */ 667 /* model specific files */
631 668
632 /* setup some reasonable defaults */ 669 /* setup some reasonable defaults */
670 memset(&ibs_config, 0, sizeof(ibs_config));
633 ibs_config.max_cnt_fetch = 250000; 671 ibs_config.max_cnt_fetch = 250000;
634 ibs_config.fetch_enabled = 0;
635 ibs_config.max_cnt_op = 250000; 672 ibs_config.max_cnt_op = 250000;
636 ibs_config.op_enabled = 0; 673
637 ibs_config.dispatched_ops = 0; 674 if (ibs_caps & IBS_CAPS_FETCHSAM) {
638 675 dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
639 dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); 676 oprofilefs_create_ulong(sb, dir, "enable",
640 oprofilefs_create_ulong(sb, dir, "enable", 677 &ibs_config.fetch_enabled);
641 &ibs_config.fetch_enabled); 678 oprofilefs_create_ulong(sb, dir, "max_count",
642 oprofilefs_create_ulong(sb, dir, "max_count", 679 &ibs_config.max_cnt_fetch);
643 &ibs_config.max_cnt_fetch); 680 oprofilefs_create_ulong(sb, dir, "rand_enable",
644 oprofilefs_create_ulong(sb, dir, "rand_enable", 681 &ibs_config.rand_en);
645 &ibs_config.rand_en); 682 }
646 683
647 dir = oprofilefs_mkdir(sb, root, "ibs_op"); 684 if (ibs_caps & IBS_CAPS_OPSAM) {
648 oprofilefs_create_ulong(sb, dir, "enable", 685 dir = oprofilefs_mkdir(sb, root, "ibs_op");
649 &ibs_config.op_enabled); 686 oprofilefs_create_ulong(sb, dir, "enable",
650 oprofilefs_create_ulong(sb, dir, "max_count", 687 &ibs_config.op_enabled);
651 &ibs_config.max_cnt_op); 688 oprofilefs_create_ulong(sb, dir, "max_count",
652 if (ibs_caps & IBS_CAPS_OPCNT) 689 &ibs_config.max_cnt_op);
653 oprofilefs_create_ulong(sb, dir, "dispatched_ops", 690 if (ibs_caps & IBS_CAPS_OPCNT)
654 &ibs_config.dispatched_ops); 691 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
692 &ibs_config.dispatched_ops);
693 if (ibs_caps & IBS_CAPS_BRNTRGT)
694 oprofilefs_create_ulong(sb, dir, "branch_target",
695 &ibs_config.branch_target);
696 }
655 697
656 return 0; 698 return 0;
657} 699}