diff options
author | Robert Richter <robert.richter@amd.com> | 2009-01-05 04:35:31 -0500 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2009-01-07 16:47:23 -0500 |
commit | 1acda878e20ea0cd3708ba66dca67d52eaafdd2b (patch) | |
tree | 97eb93396efc357f91dbd0ce080a5df51f29fd9b | |
parent | bd7dc46f770d317ada1348294ff1f319243b803b (diff) |
oprofile: use new data sample format for ibs
The new ring buffer implementation allows the storage of samples with
different size. This patch implements the usage of the new sample
format to store ibs samples in the cpu buffer. Until now, writing to
the cpu buffer could lead to incomplete sampling sequences since IBS
samples were transfered in multiple samples. Due to a full buffer,
data could be lost at any time. This can't happen any more since the
complete data is reserved in advance and then stored in a single
sample.
Signed-off-by: Robert Richter <robert.richter@amd.com>
-rw-r--r-- | arch/x86/oprofile/op_model_amd.c | 119 | ||||
-rw-r--r-- | drivers/oprofile/buffer_sync.c | 53 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.c | 39 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.h | 2 |
4 files changed, 76 insertions, 137 deletions
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index f101724db80a..cf310aeb462c 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
25 | #include "../../../drivers/oprofile/cpu_buffer.h" | ||
25 | 26 | ||
26 | #define NUM_COUNTERS 4 | 27 | #define NUM_COUNTERS 4 |
27 | #define NUM_CONTROLS 4 | 28 | #define NUM_CONTROLS 4 |
@@ -60,51 +61,16 @@ static unsigned long reset_value[NUM_COUNTERS]; | |||
60 | #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ | 61 | #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ |
61 | #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ | 62 | #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ |
62 | 63 | ||
63 | /* Codes used in cpu_buffer.c */ | ||
64 | /* This produces duplicate code, need to be fixed */ | ||
65 | #define IBS_FETCH_BEGIN (1UL << 4) | ||
66 | #define IBS_OP_BEGIN (1UL << 5) | ||
67 | |||
68 | /* | 64 | /* |
69 | * The function interface needs to be fixed, something like add | 65 | * The function interface needs to be fixed, something like add |
70 | * data. Should then be added to linux/oprofile.h. | 66 | * data. Should then be added to linux/oprofile.h. |
71 | */ | 67 | */ |
72 | extern void | 68 | extern |
73 | oprofile_add_ibs_sample(struct pt_regs * const regs, | 69 | void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs, |
74 | unsigned int * const ibs_sample, int ibs_code); | 70 | unsigned long pc, int code, int size); |
75 | |||
76 | struct ibs_fetch_sample { | ||
77 | /* MSRC001_1031 IBS Fetch Linear Address Register */ | ||
78 | unsigned int ibs_fetch_lin_addr_low; | ||
79 | unsigned int ibs_fetch_lin_addr_high; | ||
80 | /* MSRC001_1030 IBS Fetch Control Register */ | ||
81 | unsigned int ibs_fetch_ctl_low; | ||
82 | unsigned int ibs_fetch_ctl_high; | ||
83 | /* MSRC001_1032 IBS Fetch Physical Address Register */ | ||
84 | unsigned int ibs_fetch_phys_addr_low; | ||
85 | unsigned int ibs_fetch_phys_addr_high; | ||
86 | }; | ||
87 | 71 | ||
88 | struct ibs_op_sample { | 72 | #define IBS_FETCH_SIZE 6 |
89 | /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ | 73 | #define IBS_OP_SIZE 12 |
90 | unsigned int ibs_op_rip_low; | ||
91 | unsigned int ibs_op_rip_high; | ||
92 | /* MSRC001_1035 IBS Op Data Register */ | ||
93 | unsigned int ibs_op_data1_low; | ||
94 | unsigned int ibs_op_data1_high; | ||
95 | /* MSRC001_1036 IBS Op Data 2 Register */ | ||
96 | unsigned int ibs_op_data2_low; | ||
97 | unsigned int ibs_op_data2_high; | ||
98 | /* MSRC001_1037 IBS Op Data 3 Register */ | ||
99 | unsigned int ibs_op_data3_low; | ||
100 | unsigned int ibs_op_data3_high; | ||
101 | /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ | ||
102 | unsigned int ibs_dc_linear_low; | ||
103 | unsigned int ibs_dc_linear_high; | ||
104 | /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ | ||
105 | unsigned int ibs_dc_phys_low; | ||
106 | unsigned int ibs_dc_phys_high; | ||
107 | }; | ||
108 | 74 | ||
109 | static int has_ibs; /* AMD Family10h and later */ | 75 | static int has_ibs; /* AMD Family10h and later */ |
110 | 76 | ||
@@ -197,9 +163,9 @@ static inline int | |||
197 | op_amd_handle_ibs(struct pt_regs * const regs, | 163 | op_amd_handle_ibs(struct pt_regs * const regs, |
198 | struct op_msrs const * const msrs) | 164 | struct op_msrs const * const msrs) |
199 | { | 165 | { |
200 | unsigned int low, high; | 166 | u32 low, high; |
201 | struct ibs_fetch_sample ibs_fetch; | 167 | u64 msr; |
202 | struct ibs_op_sample ibs_op; | 168 | struct op_entry entry; |
203 | 169 | ||
204 | if (!has_ibs) | 170 | if (!has_ibs) |
205 | return 1; | 171 | return 1; |
@@ -207,21 +173,19 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
207 | if (ibs_config.fetch_enabled) { | 173 | if (ibs_config.fetch_enabled) { |
208 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 174 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); |
209 | if (high & IBS_FETCH_HIGH_VALID_BIT) { | 175 | if (high & IBS_FETCH_HIGH_VALID_BIT) { |
210 | ibs_fetch.ibs_fetch_ctl_high = high; | 176 | rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); |
211 | ibs_fetch.ibs_fetch_ctl_low = low; | 177 | oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE, |
212 | rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); | 178 | IBS_FETCH_SIZE); |
213 | ibs_fetch.ibs_fetch_lin_addr_high = high; | 179 | op_cpu_buffer_add_data(&entry, (u32)msr); |
214 | ibs_fetch.ibs_fetch_lin_addr_low = low; | 180 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
215 | rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); | 181 | op_cpu_buffer_add_data(&entry, low); |
216 | ibs_fetch.ibs_fetch_phys_addr_high = high; | 182 | op_cpu_buffer_add_data(&entry, high); |
217 | ibs_fetch.ibs_fetch_phys_addr_low = low; | 183 | rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); |
218 | 184 | op_cpu_buffer_add_data(&entry, (u32)msr); | |
219 | oprofile_add_ibs_sample(regs, | 185 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
220 | (unsigned int *)&ibs_fetch, | 186 | op_cpu_buffer_write_commit(&entry); |
221 | IBS_FETCH_BEGIN); | ||
222 | 187 | ||
223 | /* reenable the IRQ */ | 188 | /* reenable the IRQ */ |
224 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
225 | high &= ~IBS_FETCH_HIGH_VALID_BIT; | 189 | high &= ~IBS_FETCH_HIGH_VALID_BIT; |
226 | high |= IBS_FETCH_HIGH_ENABLE; | 190 | high |= IBS_FETCH_HIGH_ENABLE; |
227 | low &= IBS_FETCH_LOW_MAX_CNT_MASK; | 191 | low &= IBS_FETCH_LOW_MAX_CNT_MASK; |
@@ -232,30 +196,29 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
232 | if (ibs_config.op_enabled) { | 196 | if (ibs_config.op_enabled) { |
233 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | 197 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); |
234 | if (low & IBS_OP_LOW_VALID_BIT) { | 198 | if (low & IBS_OP_LOW_VALID_BIT) { |
235 | rdmsr(MSR_AMD64_IBSOPRIP, low, high); | 199 | rdmsrl(MSR_AMD64_IBSOPRIP, msr); |
236 | ibs_op.ibs_op_rip_low = low; | 200 | oprofile_add_data(&entry, regs, msr, IBS_OP_CODE, |
237 | ibs_op.ibs_op_rip_high = high; | 201 | IBS_OP_SIZE); |
238 | rdmsr(MSR_AMD64_IBSOPDATA, low, high); | 202 | op_cpu_buffer_add_data(&entry, (u32)msr); |
239 | ibs_op.ibs_op_data1_low = low; | 203 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
240 | ibs_op.ibs_op_data1_high = high; | 204 | rdmsrl(MSR_AMD64_IBSOPDATA, msr); |
241 | rdmsr(MSR_AMD64_IBSOPDATA2, low, high); | 205 | op_cpu_buffer_add_data(&entry, (u32)msr); |
242 | ibs_op.ibs_op_data2_low = low; | 206 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
243 | ibs_op.ibs_op_data2_high = high; | 207 | rdmsrl(MSR_AMD64_IBSOPDATA2, msr); |
244 | rdmsr(MSR_AMD64_IBSOPDATA3, low, high); | 208 | op_cpu_buffer_add_data(&entry, (u32)msr); |
245 | ibs_op.ibs_op_data3_low = low; | 209 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
246 | ibs_op.ibs_op_data3_high = high; | 210 | rdmsrl(MSR_AMD64_IBSOPDATA3, msr); |
247 | rdmsr(MSR_AMD64_IBSDCLINAD, low, high); | 211 | op_cpu_buffer_add_data(&entry, (u32)msr); |
248 | ibs_op.ibs_dc_linear_low = low; | 212 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
249 | ibs_op.ibs_dc_linear_high = high; | 213 | rdmsrl(MSR_AMD64_IBSDCLINAD, msr); |
250 | rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); | 214 | op_cpu_buffer_add_data(&entry, (u32)msr); |
251 | ibs_op.ibs_dc_phys_low = low; | 215 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); |
252 | ibs_op.ibs_dc_phys_high = high; | 216 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); |
217 | op_cpu_buffer_add_data(&entry, (u32)msr); | ||
218 | op_cpu_buffer_add_data(&entry, (u32)(msr >> 32)); | ||
219 | op_cpu_buffer_write_commit(&entry); | ||
253 | 220 | ||
254 | /* reenable the IRQ */ | 221 | /* reenable the IRQ */ |
255 | oprofile_add_ibs_sample(regs, | ||
256 | (unsigned int *)&ibs_op, | ||
257 | IBS_OP_BEGIN); | ||
258 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
259 | high = 0; | 222 | high = 0; |
260 | low &= ~IBS_OP_LOW_VALID_BIT; | 223 | low &= ~IBS_OP_LOW_VALID_BIT; |
261 | low |= IBS_OP_LOW_ENABLE; | 224 | low |= IBS_OP_LOW_ENABLE; |
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index f9031d31eeb7..d692fdc1a211 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c | |||
@@ -318,29 +318,18 @@ static void add_trace_begin(void) | |||
318 | 318 | ||
319 | #ifdef CONFIG_OPROFILE_IBS | 319 | #ifdef CONFIG_OPROFILE_IBS |
320 | 320 | ||
321 | #define IBS_FETCH_CODE_SIZE 2 | 321 | static void add_data(struct op_entry *entry, struct mm_struct *mm) |
322 | #define IBS_OP_CODE_SIZE 5 | ||
323 | |||
324 | /* | ||
325 | * Add IBS fetch and op entries to event buffer | ||
326 | */ | ||
327 | static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) | ||
328 | { | 322 | { |
329 | unsigned long pc; | 323 | unsigned long code, pc, val; |
330 | int i, count; | 324 | unsigned long cookie; |
331 | unsigned long cookie = 0; | ||
332 | off_t offset; | 325 | off_t offset; |
333 | struct op_entry entry; | ||
334 | struct op_sample *sample; | ||
335 | 326 | ||
336 | sample = op_cpu_buffer_read_entry(&entry, cpu); | 327 | if (!op_cpu_buffer_get_data(entry, &code)) |
337 | if (!sample) | 328 | return; |
329 | if (!op_cpu_buffer_get_data(entry, &pc)) | ||
330 | return; | ||
331 | if (!op_cpu_buffer_get_size(entry)) | ||
338 | return; | 332 | return; |
339 | pc = sample->eip; | ||
340 | |||
341 | #ifdef __LP64__ | ||
342 | pc += sample->event << 32; | ||
343 | #endif | ||
344 | 333 | ||
345 | if (mm) { | 334 | if (mm) { |
346 | cookie = lookup_dcookie(mm, pc, &offset); | 335 | cookie = lookup_dcookie(mm, pc, &offset); |
@@ -362,24 +351,8 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) | |||
362 | add_event_entry(code); | 351 | add_event_entry(code); |
363 | add_event_entry(offset); /* Offset from Dcookie */ | 352 | add_event_entry(offset); /* Offset from Dcookie */ |
364 | 353 | ||
365 | /* we send the Dcookie offset, but send the raw Linear Add also*/ | 354 | while (op_cpu_buffer_get_data(entry, &val)) |
366 | add_event_entry(sample->eip); | 355 | add_event_entry(val); |
367 | add_event_entry(sample->event); | ||
368 | |||
369 | if (code == IBS_FETCH_CODE) | ||
370 | count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ | ||
371 | else | ||
372 | count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ | ||
373 | |||
374 | for (i = 0; i < count; i++) { | ||
375 | sample = op_cpu_buffer_read_entry(&entry, cpu); | ||
376 | if (!sample) | ||
377 | return; | ||
378 | add_event_entry(sample->eip); | ||
379 | add_event_entry(sample->event); | ||
380 | } | ||
381 | |||
382 | return; | ||
383 | } | 356 | } |
384 | 357 | ||
385 | #endif | 358 | #endif |
@@ -572,10 +545,8 @@ void sync_buffer(int cpu) | |||
572 | add_user_ctx_switch(new, cookie); | 545 | add_user_ctx_switch(new, cookie); |
573 | } | 546 | } |
574 | #ifdef CONFIG_OPROFILE_IBS | 547 | #ifdef CONFIG_OPROFILE_IBS |
575 | if (flags & IBS_FETCH_BEGIN) | 548 | if (op_cpu_buffer_get_size(&entry)) |
576 | add_ibs_begin(cpu, IBS_FETCH_CODE, mm); | 549 | add_data(&entry, mm); |
577 | if (flags & IBS_OP_BEGIN) | ||
578 | add_ibs_begin(cpu, IBS_OP_CODE, mm); | ||
579 | #endif | 550 | #endif |
580 | continue; | 551 | continue; |
581 | } | 552 | } |
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 1b6590746be4..ddba9d01f09b 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c | |||
@@ -363,31 +363,38 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) | |||
363 | 363 | ||
364 | #ifdef CONFIG_OPROFILE_IBS | 364 | #ifdef CONFIG_OPROFILE_IBS |
365 | 365 | ||
366 | void oprofile_add_ibs_sample(struct pt_regs * const regs, | 366 | /* |
367 | unsigned int * const ibs_sample, int ibs_code) | 367 | * Add samples with data to the ring buffer. |
368 | * | ||
369 | * Use op_cpu_buffer_add_data(&entry, val) to add data and | ||
370 | * op_cpu_buffer_write_commit(&entry) to commit the sample. | ||
371 | */ | ||
372 | void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs, | ||
373 | unsigned long pc, int code, int size) | ||
368 | { | 374 | { |
375 | struct op_sample *sample; | ||
369 | int is_kernel = !user_mode(regs); | 376 | int is_kernel = !user_mode(regs); |
370 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); | 377 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); |
371 | int fail = 0; | ||
372 | 378 | ||
373 | cpu_buf->sample_received++; | 379 | cpu_buf->sample_received++; |
374 | 380 | ||
375 | /* backtraces disabled for ibs */ | 381 | /* no backtraces for samples with data */ |
376 | fail = fail || op_add_code(cpu_buf, 0, is_kernel, current); | 382 | if (op_add_code(cpu_buf, 0, is_kernel, current)) |
383 | goto fail; | ||
377 | 384 | ||
378 | fail = fail || op_add_sample(cpu_buf, ESCAPE_CODE, ibs_code); | 385 | sample = op_cpu_buffer_write_reserve(entry, size + 2); |
379 | fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); | 386 | if (!sample) |
380 | fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); | 387 | goto fail; |
381 | fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); | 388 | sample->eip = ESCAPE_CODE; |
389 | sample->event = 0; /* no flags */ | ||
382 | 390 | ||
383 | if (ibs_code == IBS_OP_BEGIN) { | 391 | op_cpu_buffer_add_data(entry, code); |
384 | fail = fail || op_add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); | 392 | op_cpu_buffer_add_data(entry, pc); |
385 | fail = fail || op_add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); | 393 | |
386 | fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); | 394 | return; |
387 | } | ||
388 | 395 | ||
389 | if (fail) | 396 | fail: |
390 | cpu_buf->sample_lost_overflow++; | 397 | cpu_buf->sample_lost_overflow++; |
391 | } | 398 | } |
392 | 399 | ||
393 | #endif | 400 | #endif |
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index f34376046573..525cc4d13d8d 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h | |||
@@ -115,7 +115,5 @@ int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) | |||
115 | #define IS_KERNEL (1UL << 1) | 115 | #define IS_KERNEL (1UL << 1) |
116 | #define TRACE_BEGIN (1UL << 2) | 116 | #define TRACE_BEGIN (1UL << 2) |
117 | #define USER_CTX_SWITCH (1UL << 3) | 117 | #define USER_CTX_SWITCH (1UL << 3) |
118 | #define IBS_FETCH_BEGIN (1UL << 4) | ||
119 | #define IBS_OP_BEGIN (1UL << 5) | ||
120 | 118 | ||
121 | #endif /* OPROFILE_CPU_BUFFER_H */ | 119 | #endif /* OPROFILE_CPU_BUFFER_H */ |