diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-10-06 09:31:47 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-10-23 10:20:53 -0400 |
commit | 1695849735752d2ace22d8c424ba579e33df691c (patch) | |
tree | 83133ee193eafdaf8802fc0465c85964adf47974 | |
parent | ead81ee4f887c2e6205dacb83ab2e24367a003c1 (diff) |
perf mmap: Move perf_mmap and methods to separate mmap.[ch] files
To better organize the sources, and we may end up even using it
directly, without evlists and evsels.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-oiqrm7grflurnnzo2ovfnslg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/util/Build | 1 | ||||
-rw-r--r-- | tools/perf/util/evlist.c | 248 | ||||
-rw-r--r-- | tools/perf/util/evlist.h | 76 | ||||
-rw-r--r-- | tools/perf/util/mmap.c | 252 | ||||
-rw-r--r-- | tools/perf/util/mmap.h | 94 | ||||
-rw-r--r-- | tools/perf/util/python-ext-sources | 1 |
6 files changed, 349 insertions, 323 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 369c3163e68c..a3de7916fe63 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build | |||
@@ -13,6 +13,7 @@ libperf-y += find_bit.o | |||
13 | libperf-y += kallsyms.o | 13 | libperf-y += kallsyms.o |
14 | libperf-y += levenshtein.o | 14 | libperf-y += levenshtein.o |
15 | libperf-y += llvm-utils.o | 15 | libperf-y += llvm-utils.o |
16 | libperf-y += mmap.o | ||
16 | libperf-y += memswap.o | 17 | libperf-y += memswap.o |
17 | libperf-y += parse-events.o | 18 | libperf-y += parse-events.o |
18 | libperf-y += perf_regs.o | 19 | libperf-y += perf_regs.o |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6a0d7ffbeba0..c6c891e154a6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -33,9 +33,6 @@ | |||
33 | #include <linux/log2.h> | 33 | #include <linux/log2.h> |
34 | #include <linux/err.h> | 34 | #include <linux/err.h> |
35 | 35 | ||
36 | static void perf_mmap__munmap(struct perf_mmap *map); | ||
37 | static void perf_mmap__put(struct perf_mmap *map); | ||
38 | |||
39 | #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) | 36 | #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) |
40 | #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) | 37 | #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) |
41 | 38 | ||
@@ -704,129 +701,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist) | |||
704 | return perf_evlist__set_paused(evlist, false); | 701 | return perf_evlist__set_paused(evlist, false); |
705 | } | 702 | } |
706 | 703 | ||
707 | /* When check_messup is true, 'end' must points to a good entry */ | ||
708 | static union perf_event * | ||
709 | perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, | ||
710 | u64 end, u64 *prev) | ||
711 | { | ||
712 | unsigned char *data = md->base + page_size; | ||
713 | union perf_event *event = NULL; | ||
714 | int diff = end - start; | ||
715 | |||
716 | if (check_messup) { | ||
717 | /* | ||
718 | * If we're further behind than half the buffer, there's a chance | ||
719 | * the writer will bite our tail and mess up the samples under us. | ||
720 | * | ||
721 | * If we somehow ended up ahead of the 'end', we got messed up. | ||
722 | * | ||
723 | * In either case, truncate and restart at 'end'. | ||
724 | */ | ||
725 | if (diff > md->mask / 2 || diff < 0) { | ||
726 | fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); | ||
727 | |||
728 | /* | ||
729 | * 'end' points to a known good entry, start there. | ||
730 | */ | ||
731 | start = end; | ||
732 | diff = 0; | ||
733 | } | ||
734 | } | ||
735 | |||
736 | if (diff >= (int)sizeof(event->header)) { | ||
737 | size_t size; | ||
738 | |||
739 | event = (union perf_event *)&data[start & md->mask]; | ||
740 | size = event->header.size; | ||
741 | |||
742 | if (size < sizeof(event->header) || diff < (int)size) { | ||
743 | event = NULL; | ||
744 | goto broken_event; | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Event straddles the mmap boundary -- header should always | ||
749 | * be inside due to u64 alignment of output. | ||
750 | */ | ||
751 | if ((start & md->mask) + size != ((start + size) & md->mask)) { | ||
752 | unsigned int offset = start; | ||
753 | unsigned int len = min(sizeof(*event), size), cpy; | ||
754 | void *dst = md->event_copy; | ||
755 | |||
756 | do { | ||
757 | cpy = min(md->mask + 1 - (offset & md->mask), len); | ||
758 | memcpy(dst, &data[offset & md->mask], cpy); | ||
759 | offset += cpy; | ||
760 | dst += cpy; | ||
761 | len -= cpy; | ||
762 | } while (len); | ||
763 | |||
764 | event = (union perf_event *) md->event_copy; | ||
765 | } | ||
766 | |||
767 | start += size; | ||
768 | } | ||
769 | |||
770 | broken_event: | ||
771 | if (prev) | ||
772 | *prev = start; | ||
773 | |||
774 | return event; | ||
775 | } | ||
776 | |||
777 | union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) | ||
778 | { | ||
779 | u64 head; | ||
780 | u64 old = md->prev; | ||
781 | |||
782 | /* | ||
783 | * Check if event was unmapped due to a POLLHUP/POLLERR. | ||
784 | */ | ||
785 | if (!refcount_read(&md->refcnt)) | ||
786 | return NULL; | ||
787 | |||
788 | head = perf_mmap__read_head(md); | ||
789 | |||
790 | return perf_mmap__read(md, check_messup, old, head, &md->prev); | ||
791 | } | ||
792 | |||
793 | union perf_event * | ||
794 | perf_mmap__read_backward(struct perf_mmap *md) | ||
795 | { | ||
796 | u64 head, end; | ||
797 | u64 start = md->prev; | ||
798 | |||
799 | /* | ||
800 | * Check if event was unmapped due to a POLLHUP/POLLERR. | ||
801 | */ | ||
802 | if (!refcount_read(&md->refcnt)) | ||
803 | return NULL; | ||
804 | |||
805 | head = perf_mmap__read_head(md); | ||
806 | if (!head) | ||
807 | return NULL; | ||
808 | |||
809 | /* | ||
810 | * 'head' pointer starts from 0. Kernel minus sizeof(record) form | ||
811 | * it each time when kernel writes to it, so in fact 'head' is | ||
812 | * negative. 'end' pointer is made manually by adding the size of | ||
813 | * the ring buffer to 'head' pointer, means the validate data can | ||
814 | * read is the whole ring buffer. If 'end' is positive, the ring | ||
815 | * buffer has not fully filled, so we must adjust 'end' to 0. | ||
816 | * | ||
817 | * However, since both 'head' and 'end' is unsigned, we can't | ||
818 | * simply compare 'end' against 0. Here we compare '-head' and | ||
819 | * the size of the ring buffer, where -head is the number of bytes | ||
820 | * kernel write to the ring buffer. | ||
821 | */ | ||
822 | if (-head < (u64)(md->mask + 1)) | ||
823 | end = 0; | ||
824 | else | ||
825 | end = head + md->mask + 1; | ||
826 | |||
827 | return perf_mmap__read(md, false, start, end, &md->prev); | ||
828 | } | ||
829 | |||
830 | union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) | 704 | union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) |
831 | { | 705 | { |
832 | struct perf_mmap *md = &evlist->mmap[idx]; | 706 | struct perf_mmap *md = &evlist->mmap[idx]; |
@@ -857,96 +731,16 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) | |||
857 | return perf_evlist__mmap_read_forward(evlist, idx); | 731 | return perf_evlist__mmap_read_forward(evlist, idx); |
858 | } | 732 | } |
859 | 733 | ||
860 | void perf_mmap__read_catchup(struct perf_mmap *md) | ||
861 | { | ||
862 | u64 head; | ||
863 | |||
864 | if (!refcount_read(&md->refcnt)) | ||
865 | return; | ||
866 | |||
867 | head = perf_mmap__read_head(md); | ||
868 | md->prev = head; | ||
869 | } | ||
870 | |||
871 | void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) | 734 | void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) |
872 | { | 735 | { |
873 | perf_mmap__read_catchup(&evlist->mmap[idx]); | 736 | perf_mmap__read_catchup(&evlist->mmap[idx]); |
874 | } | 737 | } |
875 | 738 | ||
876 | static bool perf_mmap__empty(struct perf_mmap *md) | ||
877 | { | ||
878 | return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; | ||
879 | } | ||
880 | |||
881 | static void perf_mmap__get(struct perf_mmap *map) | ||
882 | { | ||
883 | refcount_inc(&map->refcnt); | ||
884 | } | ||
885 | |||
886 | static void perf_mmap__put(struct perf_mmap *md) | ||
887 | { | ||
888 | BUG_ON(md->base && refcount_read(&md->refcnt) == 0); | ||
889 | |||
890 | if (refcount_dec_and_test(&md->refcnt)) | ||
891 | perf_mmap__munmap(md); | ||
892 | } | ||
893 | |||
894 | void perf_mmap__consume(struct perf_mmap *md, bool overwrite) | ||
895 | { | ||
896 | if (!overwrite) { | ||
897 | u64 old = md->prev; | ||
898 | |||
899 | perf_mmap__write_tail(md, old); | ||
900 | } | ||
901 | |||
902 | if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) | ||
903 | perf_mmap__put(md); | ||
904 | } | ||
905 | |||
906 | void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) | 739 | void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) |
907 | { | 740 | { |
908 | perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); | 741 | perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); |
909 | } | 742 | } |
910 | 743 | ||
911 | int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, | ||
912 | struct auxtrace_mmap_params *mp __maybe_unused, | ||
913 | void *userpg __maybe_unused, | ||
914 | int fd __maybe_unused) | ||
915 | { | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) | ||
920 | { | ||
921 | } | ||
922 | |||
923 | void __weak auxtrace_mmap_params__init( | ||
924 | struct auxtrace_mmap_params *mp __maybe_unused, | ||
925 | off_t auxtrace_offset __maybe_unused, | ||
926 | unsigned int auxtrace_pages __maybe_unused, | ||
927 | bool auxtrace_overwrite __maybe_unused) | ||
928 | { | ||
929 | } | ||
930 | |||
931 | void __weak auxtrace_mmap_params__set_idx( | ||
932 | struct auxtrace_mmap_params *mp __maybe_unused, | ||
933 | struct perf_evlist *evlist __maybe_unused, | ||
934 | int idx __maybe_unused, | ||
935 | bool per_cpu __maybe_unused) | ||
936 | { | ||
937 | } | ||
938 | |||
939 | static void perf_mmap__munmap(struct perf_mmap *map) | ||
940 | { | ||
941 | if (map->base != NULL) { | ||
942 | munmap(map->base, perf_mmap__mmap_len(map)); | ||
943 | map->base = NULL; | ||
944 | map->fd = -1; | ||
945 | refcount_set(&map->refcnt, 0); | ||
946 | } | ||
947 | auxtrace_mmap__munmap(&map->auxtrace_mmap); | ||
948 | } | ||
949 | |||
950 | static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) | 744 | static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) |
951 | { | 745 | { |
952 | int i; | 746 | int i; |
@@ -995,48 +789,6 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) | |||
995 | return map; | 789 | return map; |
996 | } | 790 | } |
997 | 791 | ||
998 | struct mmap_params { | ||
999 | int prot; | ||
1000 | int mask; | ||
1001 | struct auxtrace_mmap_params auxtrace_mp; | ||
1002 | }; | ||
1003 | |||
1004 | static int perf_mmap__mmap(struct perf_mmap *map, | ||
1005 | struct mmap_params *mp, int fd) | ||
1006 | { | ||
1007 | /* | ||
1008 | * The last one will be done at perf_evlist__mmap_consume(), so that we | ||
1009 | * make sure we don't prevent tools from consuming every last event in | ||
1010 | * the ring buffer. | ||
1011 | * | ||
1012 | * I.e. we can get the POLLHUP meaning that the fd doesn't exist | ||
1013 | * anymore, but the last events for it are still in the ring buffer, | ||
1014 | * waiting to be consumed. | ||
1015 | * | ||
1016 | * Tools can chose to ignore this at their own discretion, but the | ||
1017 | * evlist layer can't just drop it when filtering events in | ||
1018 | * perf_evlist__filter_pollfd(). | ||
1019 | */ | ||
1020 | refcount_set(&map->refcnt, 2); | ||
1021 | map->prev = 0; | ||
1022 | map->mask = mp->mask; | ||
1023 | map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, | ||
1024 | MAP_SHARED, fd, 0); | ||
1025 | if (map->base == MAP_FAILED) { | ||
1026 | pr_debug2("failed to mmap perf event ring buffer, error %d\n", | ||
1027 | errno); | ||
1028 | map->base = NULL; | ||
1029 | return -1; | ||
1030 | } | ||
1031 | map->fd = fd; | ||
1032 | |||
1033 | if (auxtrace_mmap__mmap(&map->auxtrace_mmap, | ||
1034 | &mp->auxtrace_mp, map->base, fd)) | ||
1035 | return -1; | ||
1036 | |||
1037 | return 0; | ||
1038 | } | ||
1039 | |||
1040 | static bool | 792 | static bool |
1041 | perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, | 793 | perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, |
1042 | struct perf_evsel *evsel) | 794 | struct perf_evsel *evsel) |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b1c14f1fdc27..8c433e95bd9a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
@@ -11,8 +11,8 @@ | |||
11 | #include "../perf.h" | 11 | #include "../perf.h" |
12 | #include "event.h" | 12 | #include "event.h" |
13 | #include "evsel.h" | 13 | #include "evsel.h" |
14 | #include "mmap.h" | ||
14 | #include "util.h" | 15 | #include "util.h" |
15 | #include "auxtrace.h" | ||
16 | #include <signal.h> | 16 | #include <signal.h> |
17 | #include <unistd.h> | 17 | #include <unistd.h> |
18 | 18 | ||
@@ -24,55 +24,6 @@ struct record_opts; | |||
24 | #define PERF_EVLIST__HLIST_BITS 8 | 24 | #define PERF_EVLIST__HLIST_BITS 8 |
25 | #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) | 25 | #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) |
26 | 26 | ||
27 | /** | ||
28 | * struct perf_mmap - perf's ring buffer mmap details | ||
29 | * | ||
30 | * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this | ||
31 | */ | ||
32 | struct perf_mmap { | ||
33 | void *base; | ||
34 | int mask; | ||
35 | int fd; | ||
36 | refcount_t refcnt; | ||
37 | u64 prev; | ||
38 | struct auxtrace_mmap auxtrace_mmap; | ||
39 | char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); | ||
40 | }; | ||
41 | |||
42 | static inline size_t | ||
43 | perf_mmap__mmap_len(struct perf_mmap *map) | ||
44 | { | ||
45 | return map->mask + 1 + page_size; | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * State machine of bkw_mmap_state: | ||
50 | * | ||
51 | * .________________(forbid)_____________. | ||
52 | * | V | ||
53 | * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY | ||
54 | * ^ ^ | ^ | | ||
55 | * | |__(forbid)____/ |___(forbid)___/| | ||
56 | * | | | ||
57 | * \_________________(3)_______________/ | ||
58 | * | ||
59 | * NOTREADY : Backward ring buffers are not ready | ||
60 | * RUNNING : Backward ring buffers are recording | ||
61 | * DATA_PENDING : We are required to collect data from backward ring buffers | ||
62 | * EMPTY : We have collected data from backward ring buffers. | ||
63 | * | ||
64 | * (0): Setup backward ring buffer | ||
65 | * (1): Pause ring buffers for reading | ||
66 | * (2): Read from ring buffers | ||
67 | * (3): Resume ring buffers for recording | ||
68 | */ | ||
69 | enum bkw_mmap_state { | ||
70 | BKW_MMAP_NOTREADY, | ||
71 | BKW_MMAP_RUNNING, | ||
72 | BKW_MMAP_DATA_PENDING, | ||
73 | BKW_MMAP_EMPTY, | ||
74 | }; | ||
75 | |||
76 | struct perf_evlist { | 27 | struct perf_evlist { |
77 | struct list_head entries; | 28 | struct list_head entries; |
78 | struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; | 29 | struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; |
@@ -177,12 +128,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); | |||
177 | 128 | ||
178 | void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); | 129 | void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); |
179 | 130 | ||
180 | union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); | ||
181 | union perf_event *perf_mmap__read_backward(struct perf_mmap *map); | ||
182 | |||
183 | void perf_mmap__read_catchup(struct perf_mmap *md); | ||
184 | void perf_mmap__consume(struct perf_mmap *md, bool overwrite); | ||
185 | |||
186 | union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); | 131 | union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); |
187 | 132 | ||
188 | union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, | 133 | union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, |
@@ -286,25 +231,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); | |||
286 | int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); | 231 | int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); |
287 | int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); | 232 | int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); |
288 | 233 | ||
289 | static inline u64 perf_mmap__read_head(struct perf_mmap *mm) | ||
290 | { | ||
291 | struct perf_event_mmap_page *pc = mm->base; | ||
292 | u64 head = ACCESS_ONCE(pc->data_head); | ||
293 | rmb(); | ||
294 | return head; | ||
295 | } | ||
296 | |||
297 | static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) | ||
298 | { | ||
299 | struct perf_event_mmap_page *pc = md->base; | ||
300 | |||
301 | /* | ||
302 | * ensure all reads are done before we write the tail out. | ||
303 | */ | ||
304 | mb(); | ||
305 | pc->data_tail = tail; | ||
306 | } | ||
307 | |||
308 | bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str); | 234 | bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str); |
309 | void perf_evlist__to_front(struct perf_evlist *evlist, | 235 | void perf_evlist__to_front(struct perf_evlist *evlist, |
310 | struct perf_evsel *move_evsel); | 236 | struct perf_evsel *move_evsel); |
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c new file mode 100644 index 000000000000..dfc4a007f2c6 --- /dev/null +++ b/tools/perf/util/mmap.c | |||
@@ -0,0 +1,252 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> | ||
3 | * | ||
4 | * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further | ||
5 | * copyright notes. | ||
6 | * | ||
7 | * Released under the GPL v2. (and only v2, not any later version) | ||
8 | */ | ||
9 | |||
10 | #include <sys/mman.h> | ||
11 | #include "event.h" | ||
12 | #include "mmap.h" | ||
13 | #include "util.h" /* page_size */ | ||
14 | |||
15 | size_t perf_mmap__mmap_len(struct perf_mmap *map) | ||
16 | { | ||
17 | return map->mask + 1 + page_size; | ||
18 | } | ||
19 | |||
20 | /* When check_messup is true, 'end' must points to a good entry */ | ||
21 | static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, | ||
22 | u64 start, u64 end, u64 *prev) | ||
23 | { | ||
24 | unsigned char *data = map->base + page_size; | ||
25 | union perf_event *event = NULL; | ||
26 | int diff = end - start; | ||
27 | |||
28 | if (check_messup) { | ||
29 | /* | ||
30 | * If we're further behind than half the buffer, there's a chance | ||
31 | * the writer will bite our tail and mess up the samples under us. | ||
32 | * | ||
33 | * If we somehow ended up ahead of the 'end', we got messed up. | ||
34 | * | ||
35 | * In either case, truncate and restart at 'end'. | ||
36 | */ | ||
37 | if (diff > map->mask / 2 || diff < 0) { | ||
38 | fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); | ||
39 | |||
40 | /* | ||
41 | * 'end' points to a known good entry, start there. | ||
42 | */ | ||
43 | start = end; | ||
44 | diff = 0; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | if (diff >= (int)sizeof(event->header)) { | ||
49 | size_t size; | ||
50 | |||
51 | event = (union perf_event *)&data[start & map->mask]; | ||
52 | size = event->header.size; | ||
53 | |||
54 | if (size < sizeof(event->header) || diff < (int)size) { | ||
55 | event = NULL; | ||
56 | goto broken_event; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Event straddles the mmap boundary -- header should always | ||
61 | * be inside due to u64 alignment of output. | ||
62 | */ | ||
63 | if ((start & map->mask) + size != ((start + size) & map->mask)) { | ||
64 | unsigned int offset = start; | ||
65 | unsigned int len = min(sizeof(*event), size), cpy; | ||
66 | void *dst = map->event_copy; | ||
67 | |||
68 | do { | ||
69 | cpy = min(map->mask + 1 - (offset & map->mask), len); | ||
70 | memcpy(dst, &data[offset & map->mask], cpy); | ||
71 | offset += cpy; | ||
72 | dst += cpy; | ||
73 | len -= cpy; | ||
74 | } while (len); | ||
75 | |||
76 | event = (union perf_event *)map->event_copy; | ||
77 | } | ||
78 | |||
79 | start += size; | ||
80 | } | ||
81 | |||
82 | broken_event: | ||
83 | if (prev) | ||
84 | *prev = start; | ||
85 | |||
86 | return event; | ||
87 | } | ||
88 | |||
89 | union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) | ||
90 | { | ||
91 | u64 head; | ||
92 | u64 old = map->prev; | ||
93 | |||
94 | /* | ||
95 | * Check if event was unmapped due to a POLLHUP/POLLERR. | ||
96 | */ | ||
97 | if (!refcount_read(&map->refcnt)) | ||
98 | return NULL; | ||
99 | |||
100 | head = perf_mmap__read_head(map); | ||
101 | |||
102 | return perf_mmap__read(map, check_messup, old, head, &map->prev); | ||
103 | } | ||
104 | |||
105 | union perf_event *perf_mmap__read_backward(struct perf_mmap *map) | ||
106 | { | ||
107 | u64 head, end; | ||
108 | u64 start = map->prev; | ||
109 | |||
110 | /* | ||
111 | * Check if event was unmapped due to a POLLHUP/POLLERR. | ||
112 | */ | ||
113 | if (!refcount_read(&map->refcnt)) | ||
114 | return NULL; | ||
115 | |||
116 | head = perf_mmap__read_head(map); | ||
117 | if (!head) | ||
118 | return NULL; | ||
119 | |||
120 | /* | ||
121 | * 'head' pointer starts from 0. Kernel minus sizeof(record) form | ||
122 | * it each time when kernel writes to it, so in fact 'head' is | ||
123 | * negative. 'end' pointer is made manually by adding the size of | ||
124 | * the ring buffer to 'head' pointer, means the validate data can | ||
125 | * read is the whole ring buffer. If 'end' is positive, the ring | ||
126 | * buffer has not fully filled, so we must adjust 'end' to 0. | ||
127 | * | ||
128 | * However, since both 'head' and 'end' is unsigned, we can't | ||
129 | * simply compare 'end' against 0. Here we compare '-head' and | ||
130 | * the size of the ring buffer, where -head is the number of bytes | ||
131 | * kernel write to the ring buffer. | ||
132 | */ | ||
133 | if (-head < (u64)(map->mask + 1)) | ||
134 | end = 0; | ||
135 | else | ||
136 | end = head + map->mask + 1; | ||
137 | |||
138 | return perf_mmap__read(map, false, start, end, &map->prev); | ||
139 | } | ||
140 | |||
141 | void perf_mmap__read_catchup(struct perf_mmap *map) | ||
142 | { | ||
143 | u64 head; | ||
144 | |||
145 | if (!refcount_read(&map->refcnt)) | ||
146 | return; | ||
147 | |||
148 | head = perf_mmap__read_head(map); | ||
149 | map->prev = head; | ||
150 | } | ||
151 | |||
152 | static bool perf_mmap__empty(struct perf_mmap *map) | ||
153 | { | ||
154 | return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; | ||
155 | } | ||
156 | |||
157 | void perf_mmap__get(struct perf_mmap *map) | ||
158 | { | ||
159 | refcount_inc(&map->refcnt); | ||
160 | } | ||
161 | |||
162 | void perf_mmap__put(struct perf_mmap *map) | ||
163 | { | ||
164 | BUG_ON(map->base && refcount_read(&map->refcnt) == 0); | ||
165 | |||
166 | if (refcount_dec_and_test(&map->refcnt)) | ||
167 | perf_mmap__munmap(map); | ||
168 | } | ||
169 | |||
170 | void perf_mmap__consume(struct perf_mmap *map, bool overwrite) | ||
171 | { | ||
172 | if (!overwrite) { | ||
173 | u64 old = map->prev; | ||
174 | |||
175 | perf_mmap__write_tail(map, old); | ||
176 | } | ||
177 | |||
178 | if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) | ||
179 | perf_mmap__put(map); | ||
180 | } | ||
181 | |||
182 | int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, | ||
183 | struct auxtrace_mmap_params *mp __maybe_unused, | ||
184 | void *userpg __maybe_unused, | ||
185 | int fd __maybe_unused) | ||
186 | { | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) | ||
191 | { | ||
192 | } | ||
193 | |||
194 | void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused, | ||
195 | off_t auxtrace_offset __maybe_unused, | ||
196 | unsigned int auxtrace_pages __maybe_unused, | ||
197 | bool auxtrace_overwrite __maybe_unused) | ||
198 | { | ||
199 | } | ||
200 | |||
201 | void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, | ||
202 | struct perf_evlist *evlist __maybe_unused, | ||
203 | int idx __maybe_unused, | ||
204 | bool per_cpu __maybe_unused) | ||
205 | { | ||
206 | } | ||
207 | |||
208 | void perf_mmap__munmap(struct perf_mmap *map) | ||
209 | { | ||
210 | if (map->base != NULL) { | ||
211 | munmap(map->base, perf_mmap__mmap_len(map)); | ||
212 | map->base = NULL; | ||
213 | map->fd = -1; | ||
214 | refcount_set(&map->refcnt, 0); | ||
215 | } | ||
216 | auxtrace_mmap__munmap(&map->auxtrace_mmap); | ||
217 | } | ||
218 | |||
219 | int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) | ||
220 | { | ||
221 | /* | ||
222 | * The last one will be done at perf_evlist__mmap_consume(), so that we | ||
223 | * make sure we don't prevent tools from consuming every last event in | ||
224 | * the ring buffer. | ||
225 | * | ||
226 | * I.e. we can get the POLLHUP meaning that the fd doesn't exist | ||
227 | * anymore, but the last events for it are still in the ring buffer, | ||
228 | * waiting to be consumed. | ||
229 | * | ||
230 | * Tools can chose to ignore this at their own discretion, but the | ||
231 | * evlist layer can't just drop it when filtering events in | ||
232 | * perf_evlist__filter_pollfd(). | ||
233 | */ | ||
234 | refcount_set(&map->refcnt, 2); | ||
235 | map->prev = 0; | ||
236 | map->mask = mp->mask; | ||
237 | map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, | ||
238 | MAP_SHARED, fd, 0); | ||
239 | if (map->base == MAP_FAILED) { | ||
240 | pr_debug2("failed to mmap perf event ring buffer, error %d\n", | ||
241 | errno); | ||
242 | map->base = NULL; | ||
243 | return -1; | ||
244 | } | ||
245 | map->fd = fd; | ||
246 | |||
247 | if (auxtrace_mmap__mmap(&map->auxtrace_mmap, | ||
248 | &mp->auxtrace_mp, map->base, fd)) | ||
249 | return -1; | ||
250 | |||
251 | return 0; | ||
252 | } | ||
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h new file mode 100644 index 000000000000..f37ff45c8ec1 --- /dev/null +++ b/tools/perf/util/mmap.h | |||
@@ -0,0 +1,94 @@ | |||
1 | #ifndef __PERF_MMAP_H | ||
2 | #define __PERF_MMAP_H 1 | ||
3 | |||
4 | #include <linux/compiler.h> | ||
5 | #include <linux/refcount.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <asm/barrier.h> | ||
8 | #include <stdbool.h> | ||
9 | #include "auxtrace.h" | ||
10 | #include "event.h" | ||
11 | |||
12 | /** | ||
13 | * struct perf_mmap - perf's ring buffer mmap details | ||
14 | * | ||
15 | * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this | ||
16 | */ | ||
17 | struct perf_mmap { | ||
18 | void *base; | ||
19 | int mask; | ||
20 | int fd; | ||
21 | refcount_t refcnt; | ||
22 | u64 prev; | ||
23 | struct auxtrace_mmap auxtrace_mmap; | ||
24 | char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); | ||
25 | }; | ||
26 | |||
27 | /* | ||
28 | * State machine of bkw_mmap_state: | ||
29 | * | ||
30 | * .________________(forbid)_____________. | ||
31 | * | V | ||
32 | * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY | ||
33 | * ^ ^ | ^ | | ||
34 | * | |__(forbid)____/ |___(forbid)___/| | ||
35 | * | | | ||
36 | * \_________________(3)_______________/ | ||
37 | * | ||
38 | * NOTREADY : Backward ring buffers are not ready | ||
39 | * RUNNING : Backward ring buffers are recording | ||
40 | * DATA_PENDING : We are required to collect data from backward ring buffers | ||
41 | * EMPTY : We have collected data from backward ring buffers. | ||
42 | * | ||
43 | * (0): Setup backward ring buffer | ||
44 | * (1): Pause ring buffers for reading | ||
45 | * (2): Read from ring buffers | ||
46 | * (3): Resume ring buffers for recording | ||
47 | */ | ||
48 | enum bkw_mmap_state { | ||
49 | BKW_MMAP_NOTREADY, | ||
50 | BKW_MMAP_RUNNING, | ||
51 | BKW_MMAP_DATA_PENDING, | ||
52 | BKW_MMAP_EMPTY, | ||
53 | }; | ||
54 | |||
55 | struct mmap_params { | ||
56 | int prot, mask; | ||
57 | struct auxtrace_mmap_params auxtrace_mp; | ||
58 | }; | ||
59 | |||
60 | int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd); | ||
61 | void perf_mmap__munmap(struct perf_mmap *map); | ||
62 | |||
63 | void perf_mmap__get(struct perf_mmap *map); | ||
64 | void perf_mmap__put(struct perf_mmap *map); | ||
65 | |||
66 | void perf_mmap__consume(struct perf_mmap *map, bool overwrite); | ||
67 | |||
68 | void perf_mmap__read_catchup(struct perf_mmap *md); | ||
69 | |||
70 | static inline u64 perf_mmap__read_head(struct perf_mmap *mm) | ||
71 | { | ||
72 | struct perf_event_mmap_page *pc = mm->base; | ||
73 | u64 head = ACCESS_ONCE(pc->data_head); | ||
74 | rmb(); | ||
75 | return head; | ||
76 | } | ||
77 | |||
78 | static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) | ||
79 | { | ||
80 | struct perf_event_mmap_page *pc = md->base; | ||
81 | |||
82 | /* | ||
83 | * ensure all reads are done before we write the tail out. | ||
84 | */ | ||
85 | mb(); | ||
86 | pc->data_tail = tail; | ||
87 | } | ||
88 | |||
89 | union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); | ||
90 | union perf_event *perf_mmap__read_backward(struct perf_mmap *map); | ||
91 | |||
92 | size_t perf_mmap__mmap_len(struct perf_mmap *map); | ||
93 | |||
94 | #endif /*__PERF_MMAP_H */ | ||
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e66dc495809a..b4f2f06722a7 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources | |||
@@ -10,6 +10,7 @@ util/ctype.c | |||
10 | util/evlist.c | 10 | util/evlist.c |
11 | util/evsel.c | 11 | util/evsel.c |
12 | util/cpumap.c | 12 | util/cpumap.c |
13 | util/mmap.c | ||
13 | util/namespaces.c | 14 | util/namespaces.c |
14 | ../lib/bitmap.c | 15 | ../lib/bitmap.c |
15 | ../lib/find_bit.c | 16 | ../lib/find_bit.c |