aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2009-01-05 04:35:31 -0500
committerRobert Richter <robert.richter@amd.com>2009-01-07 16:47:23 -0500
commit1acda878e20ea0cd3708ba66dca67d52eaafdd2b (patch)
tree97eb93396efc357f91dbd0ce080a5df51f29fd9b
parentbd7dc46f770d317ada1348294ff1f319243b803b (diff)
oprofile: use new data sample format for ibs
The new ring buffer implementation allows the storage of samples with different size. This patch implements the usage of the new sample format to store ibs samples in the cpu buffer. Until now, writing to the cpu buffer could lead to incomplete sampling sequences since IBS samples were transfered in multiple samples. Due to a full buffer, data could be lost at any time. This can't happen any more since the complete data is reserved in advance and then stored in a single sample. Signed-off-by: Robert Richter <robert.richter@amd.com>
-rw-r--r--arch/x86/oprofile/op_model_amd.c119
-rw-r--r--drivers/oprofile/buffer_sync.c53
-rw-r--r--drivers/oprofile/cpu_buffer.c39
-rw-r--r--drivers/oprofile/cpu_buffer.h2
4 files changed, 76 insertions, 137 deletions
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f101724db80a..cf310aeb462c 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,7 @@
22 22
23#include "op_x86_model.h" 23#include "op_x86_model.h"
24#include "op_counter.h" 24#include "op_counter.h"
25#include "../../../drivers/oprofile/cpu_buffer.h"
25 26
26#define NUM_COUNTERS 4 27#define NUM_COUNTERS 4
27#define NUM_CONTROLS 4 28#define NUM_CONTROLS 4
@@ -60,51 +61,16 @@ static unsigned long reset_value[NUM_COUNTERS];
60#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ 61#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
61#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ 62#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */
62 63
63/* Codes used in cpu_buffer.c */
64/* This produces duplicate code, need to be fixed */
65#define IBS_FETCH_BEGIN (1UL << 4)
66#define IBS_OP_BEGIN (1UL << 5)
67
68/* 64/*
69 * The function interface needs to be fixed, something like add 65 * The function interface needs to be fixed, something like add
70 * data. Should then be added to linux/oprofile.h. 66 * data. Should then be added to linux/oprofile.h.
71 */ 67 */
72extern void 68extern
73oprofile_add_ibs_sample(struct pt_regs * const regs, 69void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
74 unsigned int * const ibs_sample, int ibs_code); 70 unsigned long pc, int code, int size);
75
76struct ibs_fetch_sample {
77 /* MSRC001_1031 IBS Fetch Linear Address Register */
78 unsigned int ibs_fetch_lin_addr_low;
79 unsigned int ibs_fetch_lin_addr_high;
80 /* MSRC001_1030 IBS Fetch Control Register */
81 unsigned int ibs_fetch_ctl_low;
82 unsigned int ibs_fetch_ctl_high;
83 /* MSRC001_1032 IBS Fetch Physical Address Register */
84 unsigned int ibs_fetch_phys_addr_low;
85 unsigned int ibs_fetch_phys_addr_high;
86};
87 71
88struct ibs_op_sample { 72#define IBS_FETCH_SIZE 6
89 /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ 73#define IBS_OP_SIZE 12
90 unsigned int ibs_op_rip_low;
91 unsigned int ibs_op_rip_high;
92 /* MSRC001_1035 IBS Op Data Register */
93 unsigned int ibs_op_data1_low;
94 unsigned int ibs_op_data1_high;
95 /* MSRC001_1036 IBS Op Data 2 Register */
96 unsigned int ibs_op_data2_low;
97 unsigned int ibs_op_data2_high;
98 /* MSRC001_1037 IBS Op Data 3 Register */
99 unsigned int ibs_op_data3_low;
100 unsigned int ibs_op_data3_high;
101 /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
102 unsigned int ibs_dc_linear_low;
103 unsigned int ibs_dc_linear_high;
104 /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
105 unsigned int ibs_dc_phys_low;
106 unsigned int ibs_dc_phys_high;
107};
108 74
109static int has_ibs; /* AMD Family10h and later */ 75static int has_ibs; /* AMD Family10h and later */
110 76
@@ -197,9 +163,9 @@ static inline int
197op_amd_handle_ibs(struct pt_regs * const regs, 163op_amd_handle_ibs(struct pt_regs * const regs,
198 struct op_msrs const * const msrs) 164 struct op_msrs const * const msrs)
199{ 165{
200 unsigned int low, high; 166 u32 low, high;
201 struct ibs_fetch_sample ibs_fetch; 167 u64 msr;
202 struct ibs_op_sample ibs_op; 168 struct op_entry entry;
203 169
204 if (!has_ibs) 170 if (!has_ibs)
205 return 1; 171 return 1;
@@ -207,21 +173,19 @@ op_amd_handle_ibs(struct pt_regs * const regs,
207 if (ibs_config.fetch_enabled) { 173 if (ibs_config.fetch_enabled) {
208 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 174 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
209 if (high & IBS_FETCH_HIGH_VALID_BIT) { 175 if (high & IBS_FETCH_HIGH_VALID_BIT) {
210 ibs_fetch.ibs_fetch_ctl_high = high; 176 rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
211 ibs_fetch.ibs_fetch_ctl_low = low; 177 oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE,
212 rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); 178 IBS_FETCH_SIZE);
213 ibs_fetch.ibs_fetch_lin_addr_high = high; 179 op_cpu_buffer_add_data(&entry, (u32)msr);
214 ibs_fetch.ibs_fetch_lin_addr_low = low; 180 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
215 rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); 181 op_cpu_buffer_add_data(&entry, low);
216 ibs_fetch.ibs_fetch_phys_addr_high = high; 182 op_cpu_buffer_add_data(&entry, high);
217 ibs_fetch.ibs_fetch_phys_addr_low = low; 183 rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
218 184 op_cpu_buffer_add_data(&entry, (u32)msr);
219 oprofile_add_ibs_sample(regs, 185 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
220 (unsigned int *)&ibs_fetch, 186 op_cpu_buffer_write_commit(&entry);
221 IBS_FETCH_BEGIN);
222 187
223 /* reenable the IRQ */ 188 /* reenable the IRQ */
224 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
225 high &= ~IBS_FETCH_HIGH_VALID_BIT; 189 high &= ~IBS_FETCH_HIGH_VALID_BIT;
226 high |= IBS_FETCH_HIGH_ENABLE; 190 high |= IBS_FETCH_HIGH_ENABLE;
227 low &= IBS_FETCH_LOW_MAX_CNT_MASK; 191 low &= IBS_FETCH_LOW_MAX_CNT_MASK;
@@ -232,30 +196,29 @@ op_amd_handle_ibs(struct pt_regs * const regs,
232 if (ibs_config.op_enabled) { 196 if (ibs_config.op_enabled) {
233 rdmsr(MSR_AMD64_IBSOPCTL, low, high); 197 rdmsr(MSR_AMD64_IBSOPCTL, low, high);
234 if (low & IBS_OP_LOW_VALID_BIT) { 198 if (low & IBS_OP_LOW_VALID_BIT) {
235 rdmsr(MSR_AMD64_IBSOPRIP, low, high); 199 rdmsrl(MSR_AMD64_IBSOPRIP, msr);
236 ibs_op.ibs_op_rip_low = low; 200 oprofile_add_data(&entry, regs, msr, IBS_OP_CODE,
237 ibs_op.ibs_op_rip_high = high; 201 IBS_OP_SIZE);
238 rdmsr(MSR_AMD64_IBSOPDATA, low, high); 202 op_cpu_buffer_add_data(&entry, (u32)msr);
239 ibs_op.ibs_op_data1_low = low; 203 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
240 ibs_op.ibs_op_data1_high = high; 204 rdmsrl(MSR_AMD64_IBSOPDATA, msr);
241 rdmsr(MSR_AMD64_IBSOPDATA2, low, high); 205 op_cpu_buffer_add_data(&entry, (u32)msr);
242 ibs_op.ibs_op_data2_low = low; 206 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
243 ibs_op.ibs_op_data2_high = high; 207 rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
244 rdmsr(MSR_AMD64_IBSOPDATA3, low, high); 208 op_cpu_buffer_add_data(&entry, (u32)msr);
245 ibs_op.ibs_op_data3_low = low; 209 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
246 ibs_op.ibs_op_data3_high = high; 210 rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
247 rdmsr(MSR_AMD64_IBSDCLINAD, low, high); 211 op_cpu_buffer_add_data(&entry, (u32)msr);
248 ibs_op.ibs_dc_linear_low = low; 212 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
249 ibs_op.ibs_dc_linear_high = high; 213 rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
250 rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); 214 op_cpu_buffer_add_data(&entry, (u32)msr);
251 ibs_op.ibs_dc_phys_low = low; 215 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
252 ibs_op.ibs_dc_phys_high = high; 216 rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
217 op_cpu_buffer_add_data(&entry, (u32)msr);
218 op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
219 op_cpu_buffer_write_commit(&entry);
253 220
254 /* reenable the IRQ */ 221 /* reenable the IRQ */
255 oprofile_add_ibs_sample(regs,
256 (unsigned int *)&ibs_op,
257 IBS_OP_BEGIN);
258 rdmsr(MSR_AMD64_IBSOPCTL, low, high);
259 high = 0; 222 high = 0;
260 low &= ~IBS_OP_LOW_VALID_BIT; 223 low &= ~IBS_OP_LOW_VALID_BIT;
261 low |= IBS_OP_LOW_ENABLE; 224 low |= IBS_OP_LOW_ENABLE;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index f9031d31eeb7..d692fdc1a211 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -318,29 +318,18 @@ static void add_trace_begin(void)
318 318
319#ifdef CONFIG_OPROFILE_IBS 319#ifdef CONFIG_OPROFILE_IBS
320 320
321#define IBS_FETCH_CODE_SIZE 2 321static void add_data(struct op_entry *entry, struct mm_struct *mm)
322#define IBS_OP_CODE_SIZE 5
323
324/*
325 * Add IBS fetch and op entries to event buffer
326 */
327static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
328{ 322{
329 unsigned long pc; 323 unsigned long code, pc, val;
330 int i, count; 324 unsigned long cookie;
331 unsigned long cookie = 0;
332 off_t offset; 325 off_t offset;
333 struct op_entry entry;
334 struct op_sample *sample;
335 326
336 sample = op_cpu_buffer_read_entry(&entry, cpu); 327 if (!op_cpu_buffer_get_data(entry, &code))
337 if (!sample) 328 return;
329 if (!op_cpu_buffer_get_data(entry, &pc))
330 return;
331 if (!op_cpu_buffer_get_size(entry))
338 return; 332 return;
339 pc = sample->eip;
340
341#ifdef __LP64__
342 pc += sample->event << 32;
343#endif
344 333
345 if (mm) { 334 if (mm) {
346 cookie = lookup_dcookie(mm, pc, &offset); 335 cookie = lookup_dcookie(mm, pc, &offset);
@@ -362,24 +351,8 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
362 add_event_entry(code); 351 add_event_entry(code);
363 add_event_entry(offset); /* Offset from Dcookie */ 352 add_event_entry(offset); /* Offset from Dcookie */
364 353
365 /* we send the Dcookie offset, but send the raw Linear Add also*/ 354 while (op_cpu_buffer_get_data(entry, &val))
366 add_event_entry(sample->eip); 355 add_event_entry(val);
367 add_event_entry(sample->event);
368
369 if (code == IBS_FETCH_CODE)
370 count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
371 else
372 count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
373
374 for (i = 0; i < count; i++) {
375 sample = op_cpu_buffer_read_entry(&entry, cpu);
376 if (!sample)
377 return;
378 add_event_entry(sample->eip);
379 add_event_entry(sample->event);
380 }
381
382 return;
383} 356}
384 357
385#endif 358#endif
@@ -572,10 +545,8 @@ void sync_buffer(int cpu)
572 add_user_ctx_switch(new, cookie); 545 add_user_ctx_switch(new, cookie);
573 } 546 }
574#ifdef CONFIG_OPROFILE_IBS 547#ifdef CONFIG_OPROFILE_IBS
575 if (flags & IBS_FETCH_BEGIN) 548 if (op_cpu_buffer_get_size(&entry))
576 add_ibs_begin(cpu, IBS_FETCH_CODE, mm); 549 add_data(&entry, mm);
577 if (flags & IBS_OP_BEGIN)
578 add_ibs_begin(cpu, IBS_OP_CODE, mm);
579#endif 550#endif
580 continue; 551 continue;
581 } 552 }
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 1b6590746be4..ddba9d01f09b 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -363,31 +363,38 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
363 363
364#ifdef CONFIG_OPROFILE_IBS 364#ifdef CONFIG_OPROFILE_IBS
365 365
366void oprofile_add_ibs_sample(struct pt_regs * const regs, 366/*
367 unsigned int * const ibs_sample, int ibs_code) 367 * Add samples with data to the ring buffer.
368 *
369 * Use op_cpu_buffer_add_data(&entry, val) to add data and
370 * op_cpu_buffer_write_commit(&entry) to commit the sample.
371 */
372void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
373 unsigned long pc, int code, int size)
368{ 374{
375 struct op_sample *sample;
369 int is_kernel = !user_mode(regs); 376 int is_kernel = !user_mode(regs);
370 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 377 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
371 int fail = 0;
372 378
373 cpu_buf->sample_received++; 379 cpu_buf->sample_received++;
374 380
375 /* backtraces disabled for ibs */ 381 /* no backtraces for samples with data */
376 fail = fail || op_add_code(cpu_buf, 0, is_kernel, current); 382 if (op_add_code(cpu_buf, 0, is_kernel, current))
383 goto fail;
377 384
378 fail = fail || op_add_sample(cpu_buf, ESCAPE_CODE, ibs_code); 385 sample = op_cpu_buffer_write_reserve(entry, size + 2);
379 fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); 386 if (!sample)
380 fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); 387 goto fail;
381 fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); 388 sample->eip = ESCAPE_CODE;
389 sample->event = 0; /* no flags */
382 390
383 if (ibs_code == IBS_OP_BEGIN) { 391 op_cpu_buffer_add_data(entry, code);
384 fail = fail || op_add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); 392 op_cpu_buffer_add_data(entry, pc);
385 fail = fail || op_add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); 393
386 fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); 394 return;
387 }
388 395
389 if (fail) 396fail:
390 cpu_buf->sample_lost_overflow++; 397 cpu_buf->sample_lost_overflow++;
391} 398}
392 399
393#endif 400#endif
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index f34376046573..525cc4d13d8d 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -115,7 +115,5 @@ int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
115#define IS_KERNEL (1UL << 1) 115#define IS_KERNEL (1UL << 1)
116#define TRACE_BEGIN (1UL << 2) 116#define TRACE_BEGIN (1UL << 2)
117#define USER_CTX_SWITCH (1UL << 3) 117#define USER_CTX_SWITCH (1UL << 3)
118#define IBS_FETCH_BEGIN (1UL << 4)
119#define IBS_OP_BEGIN (1UL << 5)
120 118
121#endif /* OPROFILE_CPU_BUFFER_H */ 119#endif /* OPROFILE_CPU_BUFFER_H */