diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 21:48:00 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 21:48:00 -0400 |
| commit | a042e26137d7674ac04b1cd2d5c06b9ebc1ee2d5 (patch) | |
| tree | c1a7a8bda41b99caa4b4a0fe320fc73278879f7d /kernel | |
| parent | f66dd539feb849a3a00f7fac67c026e0935e373a (diff) | |
| parent | e25804a0327dad954f7d43803178fdef2fd35b4e (diff) | |
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
perf python scripting: Add futex-contention script
perf python scripting: Fixup cut'n'paste error in sctop script
perf scripting: Shut up 'perf record' final status
perf record: Remove newline character from perror() argument
perf python scripting: Support fedora 11 (audit 1.7.17)
perf python scripting: Improve the syscalls-by-pid script
perf python scripting: print the syscall name on sctop
perf python scripting: Improve the syscalls-counts script
perf python scripting: Improve the failed-syscalls-by-pid script
kprobes: Remove redundant text_mutex lock in optimize
x86/oprofile: Fix uninitialized variable use in debug printk
tracing: Fix 'faild' -> 'failed' typo
perf probe: Fix format specified for Dwarf_Off parameter
perf trace: Fix detection of script extension
perf trace: Use $PERF_EXEC_PATH in canned report scripts
perf tools: Document event modifiers
perf tools: Remove direct slang.h include
perf_events: Fix for transaction recovery in group_sched_in()
perf_events: Revert: Fix transaction recovery in group_sched_in()
perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/kprobes.c | 7 | ||||
| -rw-r--r-- | kernel/perf_event.c | 94 | ||||
| -rw-r--r-- | kernel/softirq.c | 16 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 335 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 8 |
5 files changed, 218 insertions, 242 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 56a891914273..99865c33a60d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | |||
| 74 | /* NOTE: change this value only with kprobe_mutex held */ | 74 | /* NOTE: change this value only with kprobe_mutex held */ |
| 75 | static bool kprobes_all_disarmed; | 75 | static bool kprobes_all_disarmed; |
| 76 | 76 | ||
| 77 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 77 | /* This protects kprobe_table and optimizing_list */ |
| 78 | static DEFINE_MUTEX(kprobe_mutex); | ||
| 78 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 79 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
| 79 | static struct { | 80 | static struct { |
| 80 | spinlock_t lock ____cacheline_aligned_in_smp; | 81 | spinlock_t lock ____cacheline_aligned_in_smp; |
| @@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) | |||
| 595 | } | 596 | } |
| 596 | 597 | ||
| 597 | #ifdef CONFIG_SYSCTL | 598 | #ifdef CONFIG_SYSCTL |
| 599 | /* This should be called with kprobe_mutex locked */ | ||
| 598 | static void __kprobes optimize_all_kprobes(void) | 600 | static void __kprobes optimize_all_kprobes(void) |
| 599 | { | 601 | { |
| 600 | struct hlist_head *head; | 602 | struct hlist_head *head; |
| @@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void) | |||
| 607 | return; | 609 | return; |
| 608 | 610 | ||
| 609 | kprobes_allow_optimization = true; | 611 | kprobes_allow_optimization = true; |
| 610 | mutex_lock(&text_mutex); | ||
| 611 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 612 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| 612 | head = &kprobe_table[i]; | 613 | head = &kprobe_table[i]; |
| 613 | hlist_for_each_entry_rcu(p, node, head, hlist) | 614 | hlist_for_each_entry_rcu(p, node, head, hlist) |
| 614 | if (!kprobe_disabled(p)) | 615 | if (!kprobe_disabled(p)) |
| 615 | optimize_kprobe(p); | 616 | optimize_kprobe(p); |
| 616 | } | 617 | } |
| 617 | mutex_unlock(&text_mutex); | ||
| 618 | printk(KERN_INFO "Kprobes globally optimized\n"); | 618 | printk(KERN_INFO "Kprobes globally optimized\n"); |
| 619 | } | 619 | } |
| 620 | 620 | ||
| 621 | /* This should be called with kprobe_mutex locked */ | ||
| 621 | static void __kprobes unoptimize_all_kprobes(void) | 622 | static void __kprobes unoptimize_all_kprobes(void) |
| 622 | { | 623 | { |
| 623 | struct hlist_head *head; | 624 | struct hlist_head *head; |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f309e8014c78..517d827f4982 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event) | |||
| 417 | return event->cpu == -1 || event->cpu == smp_processor_id(); | 417 | return event->cpu == -1 || event->cpu == smp_processor_id(); |
| 418 | } | 418 | } |
| 419 | 419 | ||
| 420 | static int | 420 | static void |
| 421 | __event_sched_out(struct perf_event *event, | 421 | event_sched_out(struct perf_event *event, |
| 422 | struct perf_cpu_context *cpuctx, | 422 | struct perf_cpu_context *cpuctx, |
| 423 | struct perf_event_context *ctx) | 423 | struct perf_event_context *ctx) |
| 424 | { | 424 | { |
| @@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event, | |||
| 437 | } | 437 | } |
| 438 | 438 | ||
| 439 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 439 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 440 | return 0; | 440 | return; |
| 441 | 441 | ||
| 442 | event->state = PERF_EVENT_STATE_INACTIVE; | 442 | event->state = PERF_EVENT_STATE_INACTIVE; |
| 443 | if (event->pending_disable) { | 443 | if (event->pending_disable) { |
| 444 | event->pending_disable = 0; | 444 | event->pending_disable = 0; |
| 445 | event->state = PERF_EVENT_STATE_OFF; | 445 | event->state = PERF_EVENT_STATE_OFF; |
| 446 | } | 446 | } |
| 447 | event->tstamp_stopped = ctx->time; | ||
| 447 | event->pmu->del(event, 0); | 448 | event->pmu->del(event, 0); |
| 448 | event->oncpu = -1; | 449 | event->oncpu = -1; |
| 449 | 450 | ||
| @@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event, | |||
| 452 | ctx->nr_active--; | 453 | ctx->nr_active--; |
| 453 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 454 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
| 454 | cpuctx->exclusive = 0; | 455 | cpuctx->exclusive = 0; |
| 455 | return 1; | ||
| 456 | } | ||
| 457 | |||
| 458 | static void | ||
| 459 | event_sched_out(struct perf_event *event, | ||
| 460 | struct perf_cpu_context *cpuctx, | ||
| 461 | struct perf_event_context *ctx) | ||
| 462 | { | ||
| 463 | int ret; | ||
| 464 | |||
| 465 | ret = __event_sched_out(event, cpuctx, ctx); | ||
| 466 | if (ret) | ||
| 467 | event->tstamp_stopped = ctx->time; | ||
| 468 | } | 456 | } |
| 469 | 457 | ||
| 470 | static void | 458 | static void |
| @@ -664,7 +652,7 @@ retry: | |||
| 664 | } | 652 | } |
| 665 | 653 | ||
| 666 | static int | 654 | static int |
| 667 | __event_sched_in(struct perf_event *event, | 655 | event_sched_in(struct perf_event *event, |
| 668 | struct perf_cpu_context *cpuctx, | 656 | struct perf_cpu_context *cpuctx, |
| 669 | struct perf_event_context *ctx) | 657 | struct perf_event_context *ctx) |
| 670 | { | 658 | { |
| @@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event, | |||
| 684 | return -EAGAIN; | 672 | return -EAGAIN; |
| 685 | } | 673 | } |
| 686 | 674 | ||
| 675 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
| 676 | |||
| 687 | if (!is_software_event(event)) | 677 | if (!is_software_event(event)) |
| 688 | cpuctx->active_oncpu++; | 678 | cpuctx->active_oncpu++; |
| 689 | ctx->nr_active++; | 679 | ctx->nr_active++; |
| @@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event, | |||
| 694 | return 0; | 684 | return 0; |
| 695 | } | 685 | } |
| 696 | 686 | ||
| 697 | static inline int | ||
| 698 | event_sched_in(struct perf_event *event, | ||
| 699 | struct perf_cpu_context *cpuctx, | ||
| 700 | struct perf_event_context *ctx) | ||
| 701 | { | ||
| 702 | int ret = __event_sched_in(event, cpuctx, ctx); | ||
| 703 | if (ret) | ||
| 704 | return ret; | ||
| 705 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
| 706 | return 0; | ||
| 707 | } | ||
| 708 | |||
| 709 | static void | ||
| 710 | group_commit_event_sched_in(struct perf_event *group_event, | ||
| 711 | struct perf_cpu_context *cpuctx, | ||
| 712 | struct perf_event_context *ctx) | ||
| 713 | { | ||
| 714 | struct perf_event *event; | ||
| 715 | u64 now = ctx->time; | ||
| 716 | |||
| 717 | group_event->tstamp_running += now - group_event->tstamp_stopped; | ||
| 718 | /* | ||
| 719 | * Schedule in siblings as one group (if any): | ||
| 720 | */ | ||
| 721 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | ||
| 722 | event->tstamp_running += now - event->tstamp_stopped; | ||
| 723 | } | ||
| 724 | } | ||
| 725 | |||
| 726 | static int | 687 | static int |
| 727 | group_sched_in(struct perf_event *group_event, | 688 | group_sched_in(struct perf_event *group_event, |
| 728 | struct perf_cpu_context *cpuctx, | 689 | struct perf_cpu_context *cpuctx, |
| @@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event, | |||
| 730 | { | 691 | { |
| 731 | struct perf_event *event, *partial_group = NULL; | 692 | struct perf_event *event, *partial_group = NULL; |
| 732 | struct pmu *pmu = group_event->pmu; | 693 | struct pmu *pmu = group_event->pmu; |
| 694 | u64 now = ctx->time; | ||
| 695 | bool simulate = false; | ||
| 733 | 696 | ||
| 734 | if (group_event->state == PERF_EVENT_STATE_OFF) | 697 | if (group_event->state == PERF_EVENT_STATE_OFF) |
| 735 | return 0; | 698 | return 0; |
| 736 | 699 | ||
| 737 | pmu->start_txn(pmu); | 700 | pmu->start_txn(pmu); |
| 738 | 701 | ||
| 739 | /* | 702 | if (event_sched_in(group_event, cpuctx, ctx)) { |
| 740 | * use __event_sched_in() to delay updating tstamp_running | ||
| 741 | * until the transaction is committed. In case of failure | ||
| 742 | * we will keep an unmodified tstamp_running which is a | ||
| 743 | * requirement to get correct timing information | ||
| 744 | */ | ||
| 745 | if (__event_sched_in(group_event, cpuctx, ctx)) { | ||
| 746 | pmu->cancel_txn(pmu); | 703 | pmu->cancel_txn(pmu); |
| 747 | return -EAGAIN; | 704 | return -EAGAIN; |
| 748 | } | 705 | } |
| @@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event, | |||
| 751 | * Schedule in siblings as one group (if any): | 708 | * Schedule in siblings as one group (if any): |
| 752 | */ | 709 | */ |
| 753 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 710 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
| 754 | if (__event_sched_in(event, cpuctx, ctx)) { | 711 | if (event_sched_in(event, cpuctx, ctx)) { |
| 755 | partial_group = event; | 712 | partial_group = event; |
| 756 | goto group_error; | 713 | goto group_error; |
| 757 | } | 714 | } |
| 758 | } | 715 | } |
| 759 | 716 | ||
| 760 | if (!pmu->commit_txn(pmu)) { | 717 | if (!pmu->commit_txn(pmu)) |
| 761 | /* commit tstamp_running */ | ||
| 762 | group_commit_event_sched_in(group_event, cpuctx, ctx); | ||
| 763 | return 0; | 718 | return 0; |
| 764 | } | 719 | |
| 765 | group_error: | 720 | group_error: |
| 766 | /* | 721 | /* |
| 767 | * Groups can be scheduled in as one unit only, so undo any | 722 | * Groups can be scheduled in as one unit only, so undo any |
| 768 | * partial group before returning: | 723 | * partial group before returning: |
| 724 | * The events up to the failed event are scheduled out normally, | ||
| 725 | * tstamp_stopped will be updated. | ||
| 769 | * | 726 | * |
| 770 | * use __event_sched_out() to avoid updating tstamp_stopped | 727 | * The failed events and the remaining siblings need to have |
| 771 | * because the event never actually ran | 728 | * their timings updated as if they had gone thru event_sched_in() |
| 729 | * and event_sched_out(). This is required to get consistent timings | ||
| 730 | * across the group. This also takes care of the case where the group | ||
| 731 | * could never be scheduled by ensuring tstamp_stopped is set to mark | ||
| 732 | * the time the event was actually stopped, such that time delta | ||
| 733 | * calculation in update_event_times() is correct. | ||
| 772 | */ | 734 | */ |
| 773 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 735 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
| 774 | if (event == partial_group) | 736 | if (event == partial_group) |
| 775 | break; | 737 | simulate = true; |
| 776 | __event_sched_out(event, cpuctx, ctx); | 738 | |
| 739 | if (simulate) { | ||
| 740 | event->tstamp_running += now - event->tstamp_stopped; | ||
| 741 | event->tstamp_stopped = now; | ||
| 742 | } else { | ||
| 743 | event_sched_out(event, cpuctx, ctx); | ||
| 744 | } | ||
| 777 | } | 745 | } |
| 778 | __event_sched_out(group_event, cpuctx, ctx); | 746 | event_sched_out(group_event, cpuctx, ctx); |
| 779 | 747 | ||
| 780 | pmu->cancel_txn(pmu); | 748 | pmu->cancel_txn(pmu); |
| 781 | 749 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index f02a9dfa19bc..18f4be0d5fe0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -229,18 +229,20 @@ restart: | |||
| 229 | 229 | ||
| 230 | do { | 230 | do { |
| 231 | if (pending & 1) { | 231 | if (pending & 1) { |
| 232 | unsigned int vec_nr = h - softirq_vec; | ||
| 232 | int prev_count = preempt_count(); | 233 | int prev_count = preempt_count(); |
| 233 | kstat_incr_softirqs_this_cpu(h - softirq_vec); | ||
| 234 | 234 | ||
| 235 | trace_softirq_entry(h, softirq_vec); | 235 | kstat_incr_softirqs_this_cpu(vec_nr); |
| 236 | |||
| 237 | trace_softirq_entry(vec_nr); | ||
| 236 | h->action(h); | 238 | h->action(h); |
| 237 | trace_softirq_exit(h, softirq_vec); | 239 | trace_softirq_exit(vec_nr); |
| 238 | if (unlikely(prev_count != preempt_count())) { | 240 | if (unlikely(prev_count != preempt_count())) { |
| 239 | printk(KERN_ERR "huh, entered softirq %td %s %p" | 241 | printk(KERN_ERR "huh, entered softirq %u %s %p" |
| 240 | "with preempt_count %08x," | 242 | "with preempt_count %08x," |
| 241 | " exited with %08x?\n", h - softirq_vec, | 243 | " exited with %08x?\n", vec_nr, |
| 242 | softirq_to_name[h - softirq_vec], | 244 | softirq_to_name[vec_nr], h->action, |
| 243 | h->action, prev_count, preempt_count()); | 245 | prev_count, preempt_count()); |
| 244 | preempt_count() = prev_count; | 246 | preempt_count() = prev_count; |
| 245 | } | 247 | } |
| 246 | 248 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c3dab054d18e..9ed509a015d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -224,6 +224,9 @@ enum { | |||
| 224 | RB_LEN_TIME_STAMP = 16, | 224 | RB_LEN_TIME_STAMP = 16, |
| 225 | }; | 225 | }; |
| 226 | 226 | ||
| 227 | #define skip_time_extend(event) \ | ||
| 228 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | ||
| 229 | |||
| 227 | static inline int rb_null_event(struct ring_buffer_event *event) | 230 | static inline int rb_null_event(struct ring_buffer_event *event) |
| 228 | { | 231 | { |
| 229 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 232 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
| @@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
| 248 | return length + RB_EVNT_HDR_SIZE; | 251 | return length + RB_EVNT_HDR_SIZE; |
| 249 | } | 252 | } |
| 250 | 253 | ||
| 251 | /* inline for ring buffer fast paths */ | 254 | /* |
| 252 | static unsigned | 255 | * Return the length of the given event. Will return |
| 256 | * the length of the time extend if the event is a | ||
| 257 | * time extend. | ||
| 258 | */ | ||
| 259 | static inline unsigned | ||
| 253 | rb_event_length(struct ring_buffer_event *event) | 260 | rb_event_length(struct ring_buffer_event *event) |
| 254 | { | 261 | { |
| 255 | switch (event->type_len) { | 262 | switch (event->type_len) { |
| @@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
| 274 | return 0; | 281 | return 0; |
| 275 | } | 282 | } |
| 276 | 283 | ||
| 284 | /* | ||
| 285 | * Return total length of time extend and data, | ||
| 286 | * or just the event length for all other events. | ||
| 287 | */ | ||
| 288 | static inline unsigned | ||
| 289 | rb_event_ts_length(struct ring_buffer_event *event) | ||
| 290 | { | ||
| 291 | unsigned len = 0; | ||
| 292 | |||
| 293 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
| 294 | /* time extends include the data event after it */ | ||
| 295 | len = RB_LEN_TIME_EXTEND; | ||
| 296 | event = skip_time_extend(event); | ||
| 297 | } | ||
| 298 | return len + rb_event_length(event); | ||
| 299 | } | ||
| 300 | |||
| 277 | /** | 301 | /** |
| 278 | * ring_buffer_event_length - return the length of the event | 302 | * ring_buffer_event_length - return the length of the event |
| 279 | * @event: the event to get the length of | 303 | * @event: the event to get the length of |
| 304 | * | ||
| 305 | * Returns the size of the data load of a data event. | ||
| 306 | * If the event is something other than a data event, it | ||
| 307 | * returns the size of the event itself. With the exception | ||
| 308 | * of a TIME EXTEND, where it still returns the size of the | ||
| 309 | * data load of the data event after it. | ||
| 280 | */ | 310 | */ |
| 281 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 311 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
| 282 | { | 312 | { |
| 283 | unsigned length = rb_event_length(event); | 313 | unsigned length; |
| 314 | |||
| 315 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
| 316 | event = skip_time_extend(event); | ||
| 317 | |||
| 318 | length = rb_event_length(event); | ||
| 284 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 319 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
| 285 | return length; | 320 | return length; |
| 286 | length -= RB_EVNT_HDR_SIZE; | 321 | length -= RB_EVNT_HDR_SIZE; |
| @@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
| 294 | static void * | 329 | static void * |
| 295 | rb_event_data(struct ring_buffer_event *event) | 330 | rb_event_data(struct ring_buffer_event *event) |
| 296 | { | 331 | { |
| 332 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
| 333 | event = skip_time_extend(event); | ||
| 297 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 334 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
| 298 | /* If length is in len field, then array[0] has the data */ | 335 | /* If length is in len field, then array[0] has the data */ |
| 299 | if (event->type_len) | 336 | if (event->type_len) |
| @@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta) | |||
| 404 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 441 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
| 405 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 442 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
| 406 | 443 | ||
| 407 | /* Max number of timestamps that can fit on a page */ | ||
| 408 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) | ||
| 409 | |||
| 410 | int ring_buffer_print_page_header(struct trace_seq *s) | 444 | int ring_buffer_print_page_header(struct trace_seq *s) |
| 411 | { | 445 | { |
| 412 | struct buffer_data_page field; | 446 | struct buffer_data_page field; |
| @@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
| 1546 | iter->head = 0; | 1580 | iter->head = 0; |
| 1547 | } | 1581 | } |
| 1548 | 1582 | ||
| 1583 | /* Slow path, do not inline */ | ||
| 1584 | static noinline struct ring_buffer_event * | ||
| 1585 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
| 1586 | { | ||
| 1587 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
| 1588 | |||
| 1589 | /* Not the first event on the page? */ | ||
| 1590 | if (rb_event_index(event)) { | ||
| 1591 | event->time_delta = delta & TS_MASK; | ||
| 1592 | event->array[0] = delta >> TS_SHIFT; | ||
| 1593 | } else { | ||
| 1594 | /* nope, just zero it */ | ||
| 1595 | event->time_delta = 0; | ||
| 1596 | event->array[0] = 0; | ||
| 1597 | } | ||
| 1598 | |||
| 1599 | return skip_time_extend(event); | ||
| 1600 | } | ||
| 1601 | |||
| 1549 | /** | 1602 | /** |
| 1550 | * ring_buffer_update_event - update event type and data | 1603 | * ring_buffer_update_event - update event type and data |
| 1551 | * @event: the even to update | 1604 | * @event: the even to update |
| @@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
| 1558 | * data field. | 1611 | * data field. |
| 1559 | */ | 1612 | */ |
| 1560 | static void | 1613 | static void |
| 1561 | rb_update_event(struct ring_buffer_event *event, | 1614 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
| 1562 | unsigned type, unsigned length) | 1615 | struct ring_buffer_event *event, unsigned length, |
| 1616 | int add_timestamp, u64 delta) | ||
| 1563 | { | 1617 | { |
| 1564 | event->type_len = type; | 1618 | /* Only a commit updates the timestamp */ |
| 1565 | 1619 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | |
| 1566 | switch (type) { | 1620 | delta = 0; |
| 1567 | |||
| 1568 | case RINGBUF_TYPE_PADDING: | ||
| 1569 | case RINGBUF_TYPE_TIME_EXTEND: | ||
| 1570 | case RINGBUF_TYPE_TIME_STAMP: | ||
| 1571 | break; | ||
| 1572 | 1621 | ||
| 1573 | case 0: | 1622 | /* |
| 1574 | length -= RB_EVNT_HDR_SIZE; | 1623 | * If we need to add a timestamp, then we |
| 1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 1624 | * add it to the start of the resevered space. |
| 1576 | event->array[0] = length; | 1625 | */ |
| 1577 | else | 1626 | if (unlikely(add_timestamp)) { |
| 1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1627 | event = rb_add_time_stamp(event, delta); |
| 1579 | break; | 1628 | length -= RB_LEN_TIME_EXTEND; |
| 1580 | default: | 1629 | delta = 0; |
| 1581 | BUG(); | ||
| 1582 | } | 1630 | } |
| 1631 | |||
| 1632 | event->time_delta = delta; | ||
| 1633 | length -= RB_EVNT_HDR_SIZE; | ||
| 1634 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
| 1635 | event->type_len = 0; | ||
| 1636 | event->array[0] = length; | ||
| 1637 | } else | ||
| 1638 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
| 1583 | } | 1639 | } |
| 1584 | 1640 | ||
| 1585 | /* | 1641 | /* |
| @@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1823 | local_sub(length, &tail_page->write); | 1879 | local_sub(length, &tail_page->write); |
| 1824 | } | 1880 | } |
| 1825 | 1881 | ||
| 1826 | static struct ring_buffer_event * | 1882 | /* |
| 1883 | * This is the slow path, force gcc not to inline it. | ||
| 1884 | */ | ||
| 1885 | static noinline struct ring_buffer_event * | ||
| 1827 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1886 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
| 1828 | unsigned long length, unsigned long tail, | 1887 | unsigned long length, unsigned long tail, |
| 1829 | struct buffer_page *tail_page, u64 *ts) | 1888 | struct buffer_page *tail_page, u64 ts) |
| 1830 | { | 1889 | { |
| 1831 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 1890 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
| 1832 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1891 | struct ring_buffer *buffer = cpu_buffer->buffer; |
| @@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1909 | * Nested commits always have zero deltas, so | 1968 | * Nested commits always have zero deltas, so |
| 1910 | * just reread the time stamp | 1969 | * just reread the time stamp |
| 1911 | */ | 1970 | */ |
| 1912 | *ts = rb_time_stamp(buffer); | 1971 | ts = rb_time_stamp(buffer); |
| 1913 | next_page->page->time_stamp = *ts; | 1972 | next_page->page->time_stamp = ts; |
| 1914 | } | 1973 | } |
| 1915 | 1974 | ||
| 1916 | out_again: | 1975 | out_again: |
| @@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1929 | 1988 | ||
| 1930 | static struct ring_buffer_event * | 1989 | static struct ring_buffer_event * |
| 1931 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1990 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
| 1932 | unsigned type, unsigned long length, u64 *ts) | 1991 | unsigned long length, u64 ts, |
| 1992 | u64 delta, int add_timestamp) | ||
| 1933 | { | 1993 | { |
| 1934 | struct buffer_page *tail_page; | 1994 | struct buffer_page *tail_page; |
| 1935 | struct ring_buffer_event *event; | 1995 | struct ring_buffer_event *event; |
| 1936 | unsigned long tail, write; | 1996 | unsigned long tail, write; |
| 1937 | 1997 | ||
| 1998 | /* | ||
| 1999 | * If the time delta since the last event is too big to | ||
| 2000 | * hold in the time field of the event, then we append a | ||
| 2001 | * TIME EXTEND event ahead of the data event. | ||
| 2002 | */ | ||
| 2003 | if (unlikely(add_timestamp)) | ||
| 2004 | length += RB_LEN_TIME_EXTEND; | ||
| 2005 | |||
| 1938 | tail_page = cpu_buffer->tail_page; | 2006 | tail_page = cpu_buffer->tail_page; |
| 1939 | write = local_add_return(length, &tail_page->write); | 2007 | write = local_add_return(length, &tail_page->write); |
| 1940 | 2008 | ||
| @@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1943 | tail = write - length; | 2011 | tail = write - length; |
| 1944 | 2012 | ||
| 1945 | /* See if we shot pass the end of this buffer page */ | 2013 | /* See if we shot pass the end of this buffer page */ |
| 1946 | if (write > BUF_PAGE_SIZE) | 2014 | if (unlikely(write > BUF_PAGE_SIZE)) |
| 1947 | return rb_move_tail(cpu_buffer, length, tail, | 2015 | return rb_move_tail(cpu_buffer, length, tail, |
| 1948 | tail_page, ts); | 2016 | tail_page, ts); |
| 1949 | 2017 | ||
| @@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1951 | 2019 | ||
| 1952 | event = __rb_page_index(tail_page, tail); | 2020 | event = __rb_page_index(tail_page, tail); |
| 1953 | kmemcheck_annotate_bitfield(event, bitfield); | 2021 | kmemcheck_annotate_bitfield(event, bitfield); |
| 1954 | rb_update_event(event, type, length); | 2022 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
| 1955 | 2023 | ||
| 1956 | /* The passed in type is zero for DATA */ | 2024 | local_inc(&tail_page->entries); |
| 1957 | if (likely(!type)) | ||
| 1958 | local_inc(&tail_page->entries); | ||
| 1959 | 2025 | ||
| 1960 | /* | 2026 | /* |
| 1961 | * If this is the first commit on the page, then update | 2027 | * If this is the first commit on the page, then update |
| 1962 | * its timestamp. | 2028 | * its timestamp. |
| 1963 | */ | 2029 | */ |
| 1964 | if (!tail) | 2030 | if (!tail) |
| 1965 | tail_page->page->time_stamp = *ts; | 2031 | tail_page->page->time_stamp = ts; |
| 1966 | 2032 | ||
| 1967 | return event; | 2033 | return event; |
| 1968 | } | 2034 | } |
| @@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1977 | unsigned long addr; | 2043 | unsigned long addr; |
| 1978 | 2044 | ||
| 1979 | new_index = rb_event_index(event); | 2045 | new_index = rb_event_index(event); |
| 1980 | old_index = new_index + rb_event_length(event); | 2046 | old_index = new_index + rb_event_ts_length(event); |
| 1981 | addr = (unsigned long)event; | 2047 | addr = (unsigned long)event; |
| 1982 | addr &= PAGE_MASK; | 2048 | addr &= PAGE_MASK; |
| 1983 | 2049 | ||
| @@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 2003 | return 0; | 2069 | return 0; |
| 2004 | } | 2070 | } |
| 2005 | 2071 | ||
| 2006 | static int | ||
| 2007 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 2008 | u64 *ts, u64 *delta) | ||
| 2009 | { | ||
| 2010 | struct ring_buffer_event *event; | ||
| 2011 | int ret; | ||
| 2012 | |||
| 2013 | WARN_ONCE(*delta > (1ULL << 59), | ||
| 2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", | ||
| 2015 | (unsigned long long)*delta, | ||
| 2016 | (unsigned long long)*ts, | ||
| 2017 | (unsigned long long)cpu_buffer->write_stamp); | ||
| 2018 | |||
| 2019 | /* | ||
| 2020 | * The delta is too big, we to add a | ||
| 2021 | * new timestamp. | ||
| 2022 | */ | ||
| 2023 | event = __rb_reserve_next(cpu_buffer, | ||
| 2024 | RINGBUF_TYPE_TIME_EXTEND, | ||
| 2025 | RB_LEN_TIME_EXTEND, | ||
| 2026 | ts); | ||
| 2027 | if (!event) | ||
| 2028 | return -EBUSY; | ||
| 2029 | |||
| 2030 | if (PTR_ERR(event) == -EAGAIN) | ||
| 2031 | return -EAGAIN; | ||
| 2032 | |||
| 2033 | /* Only a commited time event can update the write stamp */ | ||
| 2034 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
| 2035 | /* | ||
| 2036 | * If this is the first on the page, then it was | ||
| 2037 | * updated with the page itself. Try to discard it | ||
| 2038 | * and if we can't just make it zero. | ||
| 2039 | */ | ||
| 2040 | if (rb_event_index(event)) { | ||
| 2041 | event->time_delta = *delta & TS_MASK; | ||
| 2042 | event->array[0] = *delta >> TS_SHIFT; | ||
| 2043 | } else { | ||
| 2044 | /* try to discard, since we do not need this */ | ||
| 2045 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
| 2046 | /* nope, just zero it */ | ||
| 2047 | event->time_delta = 0; | ||
| 2048 | event->array[0] = 0; | ||
| 2049 | } | ||
| 2050 | } | ||
| 2051 | cpu_buffer->write_stamp = *ts; | ||
| 2052 | /* let the caller know this was the commit */ | ||
| 2053 | ret = 1; | ||
| 2054 | } else { | ||
| 2055 | /* Try to discard the event */ | ||
| 2056 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
| 2057 | /* Darn, this is just wasted space */ | ||
| 2058 | event->time_delta = 0; | ||
| 2059 | event->array[0] = 0; | ||
| 2060 | } | ||
| 2061 | ret = 0; | ||
| 2062 | } | ||
| 2063 | |||
| 2064 | *delta = 0; | ||
| 2065 | |||
| 2066 | return ret; | ||
| 2067 | } | ||
| 2068 | |||
| 2069 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2072 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
| 2070 | { | 2073 | { |
| 2071 | local_inc(&cpu_buffer->committing); | 2074 | local_inc(&cpu_buffer->committing); |
| 2072 | local_inc(&cpu_buffer->commits); | 2075 | local_inc(&cpu_buffer->commits); |
| 2073 | } | 2076 | } |
| 2074 | 2077 | ||
| 2075 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2078 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
| 2076 | { | 2079 | { |
| 2077 | unsigned long commits; | 2080 | unsigned long commits; |
| 2078 | 2081 | ||
| @@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
| 2110 | unsigned long length) | 2113 | unsigned long length) |
| 2111 | { | 2114 | { |
| 2112 | struct ring_buffer_event *event; | 2115 | struct ring_buffer_event *event; |
| 2113 | u64 ts, delta = 0; | 2116 | u64 ts, delta; |
| 2114 | int commit = 0; | ||
| 2115 | int nr_loops = 0; | 2117 | int nr_loops = 0; |
| 2118 | int add_timestamp; | ||
| 2119 | u64 diff; | ||
| 2116 | 2120 | ||
| 2117 | rb_start_commit(cpu_buffer); | 2121 | rb_start_commit(cpu_buffer); |
| 2118 | 2122 | ||
| @@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
| 2133 | 2137 | ||
| 2134 | length = rb_calculate_event_length(length); | 2138 | length = rb_calculate_event_length(length); |
| 2135 | again: | 2139 | again: |
| 2140 | add_timestamp = 0; | ||
| 2141 | delta = 0; | ||
| 2142 | |||
| 2136 | /* | 2143 | /* |
| 2137 | * We allow for interrupts to reenter here and do a trace. | 2144 | * We allow for interrupts to reenter here and do a trace. |
| 2138 | * If one does, it will cause this original code to loop | 2145 | * If one does, it will cause this original code to loop |
| @@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
| 2146 | goto out_fail; | 2153 | goto out_fail; |
| 2147 | 2154 | ||
| 2148 | ts = rb_time_stamp(cpu_buffer->buffer); | 2155 | ts = rb_time_stamp(cpu_buffer->buffer); |
| 2156 | diff = ts - cpu_buffer->write_stamp; | ||
| 2149 | 2157 | ||
| 2150 | /* | 2158 | /* make sure this diff is calculated here */ |
| 2151 | * Only the first commit can update the timestamp. | 2159 | barrier(); |
| 2152 | * Yes there is a race here. If an interrupt comes in | ||
| 2153 | * just after the conditional and it traces too, then it | ||
| 2154 | * will also check the deltas. More than one timestamp may | ||
| 2155 | * also be made. But only the entry that did the actual | ||
| 2156 | * commit will be something other than zero. | ||
| 2157 | */ | ||
| 2158 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
| 2159 | rb_page_write(cpu_buffer->tail_page) == | ||
| 2160 | rb_commit_index(cpu_buffer))) { | ||
| 2161 | u64 diff; | ||
| 2162 | |||
| 2163 | diff = ts - cpu_buffer->write_stamp; | ||
| 2164 | |||
| 2165 | /* make sure this diff is calculated here */ | ||
| 2166 | barrier(); | ||
| 2167 | |||
| 2168 | /* Did the write stamp get updated already? */ | ||
| 2169 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
| 2170 | goto get_event; | ||
| 2171 | 2160 | ||
| 2161 | /* Did the write stamp get updated already? */ | ||
| 2162 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
| 2172 | delta = diff; | 2163 | delta = diff; |
| 2173 | if (unlikely(test_time_stamp(delta))) { | 2164 | if (unlikely(test_time_stamp(delta))) { |
| 2174 | 2165 | WARN_ONCE(delta > (1ULL << 59), | |
| 2175 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2166 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
| 2176 | if (commit == -EBUSY) | 2167 | (unsigned long long)delta, |
| 2177 | goto out_fail; | 2168 | (unsigned long long)ts, |
| 2178 | 2169 | (unsigned long long)cpu_buffer->write_stamp); | |
| 2179 | if (commit == -EAGAIN) | 2170 | add_timestamp = 1; |
| 2180 | goto again; | ||
| 2181 | |||
| 2182 | RB_WARN_ON(cpu_buffer, commit < 0); | ||
| 2183 | } | 2171 | } |
| 2184 | } | 2172 | } |
| 2185 | 2173 | ||
| 2186 | get_event: | 2174 | event = __rb_reserve_next(cpu_buffer, length, ts, |
| 2187 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2175 | delta, add_timestamp); |
| 2188 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2176 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
| 2189 | goto again; | 2177 | goto again; |
| 2190 | 2178 | ||
| 2191 | if (!event) | 2179 | if (!event) |
| 2192 | goto out_fail; | 2180 | goto out_fail; |
| 2193 | 2181 | ||
| 2194 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
| 2195 | delta = 0; | ||
| 2196 | |||
| 2197 | event->time_delta = delta; | ||
| 2198 | |||
| 2199 | return event; | 2182 | return event; |
| 2200 | 2183 | ||
| 2201 | out_fail: | 2184 | out_fail: |
| @@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
| 2207 | 2190 | ||
| 2208 | #define TRACE_RECURSIVE_DEPTH 16 | 2191 | #define TRACE_RECURSIVE_DEPTH 16 |
| 2209 | 2192 | ||
| 2210 | static int trace_recursive_lock(void) | 2193 | /* Keep this code out of the fast path cache */ |
| 2194 | static noinline void trace_recursive_fail(void) | ||
| 2211 | { | 2195 | { |
| 2212 | current->trace_recursion++; | ||
| 2213 | |||
| 2214 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
| 2215 | return 0; | ||
| 2216 | |||
| 2217 | /* Disable all tracing before we do anything else */ | 2196 | /* Disable all tracing before we do anything else */ |
| 2218 | tracing_off_permanent(); | 2197 | tracing_off_permanent(); |
| 2219 | 2198 | ||
| @@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void) | |||
| 2225 | in_nmi()); | 2204 | in_nmi()); |
| 2226 | 2205 | ||
| 2227 | WARN_ON_ONCE(1); | 2206 | WARN_ON_ONCE(1); |
| 2207 | } | ||
| 2208 | |||
| 2209 | static inline int trace_recursive_lock(void) | ||
| 2210 | { | ||
| 2211 | current->trace_recursion++; | ||
| 2212 | |||
| 2213 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
| 2214 | return 0; | ||
| 2215 | |||
| 2216 | trace_recursive_fail(); | ||
| 2217 | |||
| 2228 | return -1; | 2218 | return -1; |
| 2229 | } | 2219 | } |
| 2230 | 2220 | ||
| 2231 | static void trace_recursive_unlock(void) | 2221 | static inline void trace_recursive_unlock(void) |
| 2232 | { | 2222 | { |
| 2233 | WARN_ON_ONCE(!current->trace_recursion); | 2223 | WARN_ON_ONCE(!current->trace_recursion); |
| 2234 | 2224 | ||
| @@ -2308,12 +2298,28 @@ static void | |||
| 2308 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2298 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
| 2309 | struct ring_buffer_event *event) | 2299 | struct ring_buffer_event *event) |
| 2310 | { | 2300 | { |
| 2301 | u64 delta; | ||
| 2302 | |||
| 2311 | /* | 2303 | /* |
| 2312 | * The event first in the commit queue updates the | 2304 | * The event first in the commit queue updates the |
| 2313 | * time stamp. | 2305 | * time stamp. |
| 2314 | */ | 2306 | */ |
| 2315 | if (rb_event_is_commit(cpu_buffer, event)) | 2307 | if (rb_event_is_commit(cpu_buffer, event)) { |
| 2316 | cpu_buffer->write_stamp += event->time_delta; | 2308 | /* |
| 2309 | * A commit event that is first on a page | ||
| 2310 | * updates the write timestamp with the page stamp | ||
| 2311 | */ | ||
| 2312 | if (!rb_event_index(event)) | ||
| 2313 | cpu_buffer->write_stamp = | ||
| 2314 | cpu_buffer->commit_page->page->time_stamp; | ||
| 2315 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
| 2316 | delta = event->array[0]; | ||
| 2317 | delta <<= TS_SHIFT; | ||
| 2318 | delta += event->time_delta; | ||
| 2319 | cpu_buffer->write_stamp += delta; | ||
| 2320 | } else | ||
| 2321 | cpu_buffer->write_stamp += event->time_delta; | ||
| 2322 | } | ||
| 2317 | } | 2323 | } |
| 2318 | 2324 | ||
| 2319 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2325 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
| @@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
| 2353 | 2359 | ||
| 2354 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2360 | static inline void rb_event_discard(struct ring_buffer_event *event) |
| 2355 | { | 2361 | { |
| 2362 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
| 2363 | event = skip_time_extend(event); | ||
| 2364 | |||
| 2356 | /* array[0] holds the actual length for the discarded event */ | 2365 | /* array[0] holds the actual length for the discarded event */ |
| 2357 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2366 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
| 2358 | event->type_len = RINGBUF_TYPE_PADDING; | 2367 | event->type_len = RINGBUF_TYPE_PADDING; |
| @@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
| 3049 | 3058 | ||
| 3050 | again: | 3059 | again: |
| 3051 | /* | 3060 | /* |
| 3052 | * We repeat when a timestamp is encountered. It is possible | 3061 | * We repeat when a time extend is encountered. |
| 3053 | * to get multiple timestamps from an interrupt entering just | 3062 | * Since the time extend is always attached to a data event, |
| 3054 | * as one timestamp is about to be written, or from discarded | 3063 | * we should never loop more than once. |
| 3055 | * commits. The most that we can have is the number on a single page. | 3064 | * (We never hit the following condition more than twice). |
| 3056 | */ | 3065 | */ |
| 3057 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3066 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
| 3058 | return NULL; | 3067 | return NULL; |
| 3059 | 3068 | ||
| 3060 | reader = rb_get_reader_page(cpu_buffer); | 3069 | reader = rb_get_reader_page(cpu_buffer); |
| @@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 3130 | return NULL; | 3139 | return NULL; |
| 3131 | 3140 | ||
| 3132 | /* | 3141 | /* |
| 3133 | * We repeat when a timestamp is encountered. | 3142 | * We repeat when a time extend is encountered. |
| 3134 | * We can get multiple timestamps by nested interrupts or also | 3143 | * Since the time extend is always attached to a data event, |
| 3135 | * if filtering is on (discarding commits). Since discarding | 3144 | * we should never loop more than once. |
| 3136 | * commits can be frequent we can get a lot of timestamps. | 3145 | * (We never hit the following condition more than twice). |
| 3137 | * But we limit them by not adding timestamps if they begin | ||
| 3138 | * at the start of a page. | ||
| 3139 | */ | 3146 | */ |
| 3140 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3147 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
| 3141 | return NULL; | 3148 | return NULL; |
| 3142 | 3149 | ||
| 3143 | if (rb_per_cpu_empty(cpu_buffer)) | 3150 | if (rb_per_cpu_empty(cpu_buffer)) |
| @@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3835 | if (len > (commit - read)) | 3842 | if (len > (commit - read)) |
| 3836 | len = (commit - read); | 3843 | len = (commit - read); |
| 3837 | 3844 | ||
| 3838 | size = rb_event_length(event); | 3845 | /* Always keep the time extend and data together */ |
| 3846 | size = rb_event_ts_length(event); | ||
| 3839 | 3847 | ||
| 3840 | if (len < size) | 3848 | if (len < size) |
| 3841 | goto out_unlock; | 3849 | goto out_unlock; |
| @@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3857 | break; | 3865 | break; |
| 3858 | 3866 | ||
| 3859 | event = rb_reader_event(cpu_buffer); | 3867 | event = rb_reader_event(cpu_buffer); |
| 3860 | size = rb_event_length(event); | 3868 | /* Always keep the time extend and data together */ |
| 3869 | size = rb_event_ts_length(event); | ||
| 3861 | } while (len > size); | 3870 | } while (len > size); |
| 3862 | 3871 | ||
| 3863 | /* update bpage */ | 3872 | /* update bpage */ |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 001bcd2ccf4a..82d9b8106cd0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
| 3996 | { | 3996 | { |
| 3997 | struct dentry *d_percpu = tracing_dentry_percpu(); | 3997 | struct dentry *d_percpu = tracing_dentry_percpu(); |
| 3998 | struct dentry *d_cpu; | 3998 | struct dentry *d_cpu; |
| 3999 | /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ | 3999 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
| 4000 | char cpu_dir[7]; | ||
| 4001 | 4000 | ||
| 4002 | if (cpu > 999 || cpu < 0) | 4001 | snprintf(cpu_dir, 30, "cpu%ld", cpu); |
| 4003 | return; | ||
| 4004 | |||
| 4005 | sprintf(cpu_dir, "cpu%ld", cpu); | ||
| 4006 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); | 4002 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); |
| 4007 | if (!d_cpu) { | 4003 | if (!d_cpu) { |
| 4008 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); | 4004 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); |
