aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-lock.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-04-23 18:04:12 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-04-23 21:49:58 -0400
commitc61e52ee705f938596d307625dce00cc4345aaf0 (patch)
tree6bb8a1d2662790c6b5ee8d09e0b94d91c97d1da0 /tools/perf/builtin-lock.c
parent5710fcad7c367adefe5634dc998f1f88780a8457 (diff)
perf: Generalize perf lock's sample event reordering to the session layer
The sample events recorded by perf record are not time ordered because we have one buffer per cpu for each event (even demultiplexed per task/per cpu for task bound events). But when we read trace events we want them to be ordered by time because many state machines are involved. There are currently two ways perf tools deal with that: - use -M to multiplex every buffers (perf sched, perf kmem) But this creates a lot of contention in SMP machines on record time. - use a post-processing time reordering (perf timechart, perf lock) The reordering used by timechart is simple but doesn't scale well with huge flow of events, in terms of performance and memory use (unusable with perf lock for example). Perf lock has its own samples reordering that flushes its memory use in a regular basis and that uses a sorting based on the previous event queued (a new event to be queued is close to the previous one most of the time). This patch proposes to export perf lock's samples reordering facility to the session layer that reads the events. So if a tool wants to get ordered sample events, it needs to set its struct perf_event_ops::ordered_samples to true and that's it. This prepares tracing based perf tools to get rid of the need to use buffers multiplexing (-M) or to implement their own reordering. Also lower the flush period to 2 as it's sufficient already. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Tom Zanussi <tzanussi@gmail.com>
Diffstat (limited to 'tools/perf/builtin-lock.c')
-rw-r--r--tools/perf/builtin-lock.c197
1 files changed, 22 insertions, 175 deletions
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 716d8c544a56..ce276750b140 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -316,8 +316,6 @@ alloc_failed:
316 316
317static char const *input_name = "perf.data"; 317static char const *input_name = "perf.data";
318 318
319static int profile_cpu = -1;
320
321struct raw_event_sample { 319struct raw_event_sample {
322 u32 size; 320 u32 size;
323 char data[0]; 321 char data[0];
@@ -697,8 +695,7 @@ process_lock_release_event(void *data,
697} 695}
698 696
699static void 697static void
700process_raw_event(void *data, int cpu __used, 698process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
701 u64 timestamp __used, struct thread *thread __used)
702{ 699{
703 struct event *event; 700 struct event *event;
704 int type; 701 int type;
@@ -716,176 +713,6 @@ process_raw_event(void *data, int cpu __used,
716 process_lock_release_event(data, event, cpu, timestamp, thread); 713 process_lock_release_event(data, event, cpu, timestamp, thread);
717} 714}
718 715
719struct raw_event_queue {
720 u64 timestamp;
721 int cpu;
722 void *data;
723 struct thread *thread;
724 struct list_head list;
725};
726
727static LIST_HEAD(raw_event_head);
728
729#define FLUSH_PERIOD (5 * NSEC_PER_SEC)
730
731static u64 flush_limit = ULLONG_MAX;
732static u64 last_flush = 0;
733struct raw_event_queue *last_inserted;
734
735static void flush_raw_event_queue(u64 limit)
736{
737 struct raw_event_queue *tmp, *iter;
738
739 list_for_each_entry_safe(iter, tmp, &raw_event_head, list) {
740 if (iter->timestamp > limit)
741 return;
742
743 if (iter == last_inserted)
744 last_inserted = NULL;
745
746 process_raw_event(iter->data, iter->cpu, iter->timestamp,
747 iter->thread);
748
749 last_flush = iter->timestamp;
750 list_del(&iter->list);
751 free(iter->data);
752 free(iter);
753 }
754}
755
756static void __queue_raw_event_end(struct raw_event_queue *new)
757{
758 struct raw_event_queue *iter;
759
760 list_for_each_entry_reverse(iter, &raw_event_head, list) {
761 if (iter->timestamp < new->timestamp) {
762 list_add(&new->list, &iter->list);
763 return;
764 }
765 }
766
767 list_add(&new->list, &raw_event_head);
768}
769
770static void __queue_raw_event_before(struct raw_event_queue *new,
771 struct raw_event_queue *iter)
772{
773 list_for_each_entry_continue_reverse(iter, &raw_event_head, list) {
774 if (iter->timestamp < new->timestamp) {
775 list_add(&new->list, &iter->list);
776 return;
777 }
778 }
779
780 list_add(&new->list, &raw_event_head);
781}
782
783static void __queue_raw_event_after(struct raw_event_queue *new,
784 struct raw_event_queue *iter)
785{
786 list_for_each_entry_continue(iter, &raw_event_head, list) {
787 if (iter->timestamp > new->timestamp) {
788 list_add_tail(&new->list, &iter->list);
789 return;
790 }
791 }
792 list_add_tail(&new->list, &raw_event_head);
793}
794
795/* The queue is ordered by time */
796static void __queue_raw_event(struct raw_event_queue *new)
797{
798 if (!last_inserted) {
799 __queue_raw_event_end(new);
800 return;
801 }
802
803 /*
804 * Most of the time the current event has a timestamp
805 * very close to the last event inserted, unless we just switched
806 * to another event buffer. Having a sorting based on a list and
807 * on the last inserted event that is close to the current one is
808 * probably more efficient than an rbtree based sorting.
809 */
810 if (last_inserted->timestamp >= new->timestamp)
811 __queue_raw_event_before(new, last_inserted);
812 else
813 __queue_raw_event_after(new, last_inserted);
814}
815
816static void queue_raw_event(void *data, int raw_size, int cpu,
817 u64 timestamp, struct thread *thread)
818{
819 struct raw_event_queue *new;
820
821 if (flush_limit == ULLONG_MAX)
822 flush_limit = timestamp + FLUSH_PERIOD;
823
824 if (timestamp < last_flush) {
825 printf("Warning: Timestamp below last timeslice flush\n");
826 return;
827 }
828
829 new = malloc(sizeof(*new));
830 if (!new)
831 die("Not enough memory\n");
832
833 new->timestamp = timestamp;
834 new->cpu = cpu;
835 new->thread = thread;
836
837 new->data = malloc(raw_size);
838 if (!new->data)
839 die("Not enough memory\n");
840
841 memcpy(new->data, data, raw_size);
842
843 __queue_raw_event(new);
844 last_inserted = new;
845
846 /*
847 * We want to have a slice of events covering 2 * FLUSH_PERIOD
848 * If FLUSH_PERIOD is big enough, it ensures every events that occured
849 * in the first half of the timeslice have all been buffered and there
850 * are none remaining (we need that because of the weakly ordered
851 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
852 * timeslice, we flush the first half to be gentle with the memory
853 * (the second half can still get new events in the middle, so wait
854 * another period to flush it)
855 */
856 if (new->timestamp > flush_limit &&
857 new->timestamp - flush_limit > FLUSH_PERIOD) {
858 flush_limit += FLUSH_PERIOD;
859 flush_raw_event_queue(flush_limit);
860 }
861}
862
863static int process_sample_event(event_t *event, struct perf_session *s)
864{
865 struct thread *thread;
866 struct sample_data data;
867
868 bzero(&data, sizeof(struct sample_data));
869 event__parse_sample(event, s->sample_type, &data);
870 /* CAUTION: using tid as thread.pid */
871 thread = perf_session__findnew(s, data.tid);
872
873 if (thread == NULL) {
874 pr_debug("problem processing %d event, skipping it.\n",
875 event->header.type);
876 return -1;
877 }
878
879 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
880
881 if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
882 return 0;
883
884 queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread);
885
886 return 0;
887}
888
889/* TODO: various way to print, coloring, nano or milli sec */ 716/* TODO: various way to print, coloring, nano or milli sec */
890static void print_result(void) 717static void print_result(void)
891{ 718{
@@ -963,9 +790,30 @@ static void dump_map(void)
963 } 790 }
964} 791}
965 792
793static int process_sample_event(event_t *self, struct perf_session *s)
794{
795 struct sample_data data;
796 struct thread *thread;
797
798 bzero(&data, sizeof(data));
799 event__parse_sample(self, s->sample_type, &data);
800
801 thread = perf_session__findnew(s, data.tid);
802 if (thread == NULL) {
803 pr_debug("problem processing %d event, skipping it.\n",
804 self->header.type);
805 return -1;
806 }
807
808 process_raw_event(data.raw_data, data.cpu, data.time, thread);
809
810 return 0;
811}
812
966static struct perf_event_ops eops = { 813static struct perf_event_ops eops = {
967 .sample = process_sample_event, 814 .sample = process_sample_event,
968 .comm = event__process_comm, 815 .comm = event__process_comm,
816 .ordered_samples = true,
969}; 817};
970 818
971static int read_events(void) 819static int read_events(void)
@@ -994,7 +842,6 @@ static void __cmd_report(void)
994 setup_pager(); 842 setup_pager();
995 select_key(); 843 select_key();
996 read_events(); 844 read_events();
997 flush_raw_event_queue(ULLONG_MAX);
998 sort_result(); 845 sort_result();
999 print_result(); 846 print_result();
1000} 847}