diff options
112 files changed, 20892 insertions, 111 deletions
diff --git a/Documentation/perf_counter/.gitignore b/Documentation/perf_counter/.gitignore new file mode 100644 index 000000000000..41c0b20a76ce --- /dev/null +++ b/Documentation/perf_counter/.gitignore | |||
@@ -0,0 +1,179 @@ | |||
1 | GIT-BUILD-OPTIONS | ||
2 | GIT-CFLAGS | ||
3 | GIT-GUI-VARS | ||
4 | GIT-VERSION-FILE | ||
5 | git | ||
6 | git-add | ||
7 | git-add--interactive | ||
8 | git-am | ||
9 | git-annotate | ||
10 | git-apply | ||
11 | git-archimport | ||
12 | git-archive | ||
13 | git-bisect | ||
14 | git-bisect--helper | ||
15 | git-blame | ||
16 | git-branch | ||
17 | git-bundle | ||
18 | git-cat-file | ||
19 | git-check-attr | ||
20 | git-check-ref-format | ||
21 | git-checkout | ||
22 | git-checkout-index | ||
23 | git-cherry | ||
24 | git-cherry-pick | ||
25 | git-clean | ||
26 | git-clone | ||
27 | git-commit | ||
28 | git-commit-tree | ||
29 | git-config | ||
30 | git-count-objects | ||
31 | git-cvsexportcommit | ||
32 | git-cvsimport | ||
33 | git-cvsserver | ||
34 | git-daemon | ||
35 | git-diff | ||
36 | git-diff-files | ||
37 | git-diff-index | ||
38 | git-diff-tree | ||
39 | git-difftool | ||
40 | git-difftool--helper | ||
41 | git-describe | ||
42 | git-fast-export | ||
43 | git-fast-import | ||
44 | git-fetch | ||
45 | git-fetch--tool | ||
46 | git-fetch-pack | ||
47 | git-filter-branch | ||
48 | git-fmt-merge-msg | ||
49 | git-for-each-ref | ||
50 | git-format-patch | ||
51 | git-fsck | ||
52 | git-fsck-objects | ||
53 | git-gc | ||
54 | git-get-tar-commit-id | ||
55 | git-grep | ||
56 | git-hash-object | ||
57 | git-help | ||
58 | git-http-fetch | ||
59 | git-http-push | ||
60 | git-imap-send | ||
61 | git-index-pack | ||
62 | git-init | ||
63 | git-init-db | ||
64 | git-instaweb | ||
65 | git-log | ||
66 | git-lost-found | ||
67 | git-ls-files | ||
68 | git-ls-remote | ||
69 | git-ls-tree | ||
70 | git-mailinfo | ||
71 | git-mailsplit | ||
72 | git-merge | ||
73 | git-merge-base | ||
74 | git-merge-index | ||
75 | git-merge-file | ||
76 | git-merge-tree | ||
77 | git-merge-octopus | ||
78 | git-merge-one-file | ||
79 | git-merge-ours | ||
80 | git-merge-recursive | ||
81 | git-merge-resolve | ||
82 | git-merge-subtree | ||
83 | git-mergetool | ||
84 | git-mergetool--lib | ||
85 | git-mktag | ||
86 | git-mktree | ||
87 | git-name-rev | ||
88 | git-mv | ||
89 | git-pack-redundant | ||
90 | git-pack-objects | ||
91 | git-pack-refs | ||
92 | git-parse-remote | ||
93 | git-patch-id | ||
94 | git-peek-remote | ||
95 | git-prune | ||
96 | git-prune-packed | ||
97 | git-pull | ||
98 | git-push | ||
99 | git-quiltimport | ||
100 | git-read-tree | ||
101 | git-rebase | ||
102 | git-rebase--interactive | ||
103 | git-receive-pack | ||
104 | git-reflog | ||
105 | git-relink | ||
106 | git-remote | ||
107 | git-repack | ||
108 | git-repo-config | ||
109 | git-request-pull | ||
110 | git-rerere | ||
111 | git-reset | ||
112 | git-rev-list | ||
113 | git-rev-parse | ||
114 | git-revert | ||
115 | git-rm | ||
116 | git-send-email | ||
117 | git-send-pack | ||
118 | git-sh-setup | ||
119 | git-shell | ||
120 | git-shortlog | ||
121 | git-show | ||
122 | git-show-branch | ||
123 | git-show-index | ||
124 | git-show-ref | ||
125 | git-stage | ||
126 | git-stash | ||
127 | git-status | ||
128 | git-stripspace | ||
129 | git-submodule | ||
130 | git-svn | ||
131 | git-symbolic-ref | ||
132 | git-tag | ||
133 | git-tar-tree | ||
134 | git-unpack-file | ||
135 | git-unpack-objects | ||
136 | git-update-index | ||
137 | git-update-ref | ||
138 | git-update-server-info | ||
139 | git-upload-archive | ||
140 | git-upload-pack | ||
141 | git-var | ||
142 | git-verify-pack | ||
143 | git-verify-tag | ||
144 | git-web--browse | ||
145 | git-whatchanged | ||
146 | git-write-tree | ||
147 | git-core-*/?* | ||
148 | gitk-wish | ||
149 | gitweb/gitweb.cgi | ||
150 | test-chmtime | ||
151 | test-ctype | ||
152 | test-date | ||
153 | test-delta | ||
154 | test-dump-cache-tree | ||
155 | test-genrandom | ||
156 | test-match-trees | ||
157 | test-parse-options | ||
158 | test-path-utils | ||
159 | test-sha1 | ||
160 | test-sigchain | ||
161 | common-cmds.h | ||
162 | *.tar.gz | ||
163 | *.dsc | ||
164 | *.deb | ||
165 | git.spec | ||
166 | *.exe | ||
167 | *.[aos] | ||
168 | *.py[co] | ||
169 | config.mak | ||
170 | autom4te.cache | ||
171 | config.cache | ||
172 | config.log | ||
173 | config.status | ||
174 | config.mak.autogen | ||
175 | config.mak.append | ||
176 | configure | ||
177 | tags | ||
178 | TAGS | ||
179 | cscope* | ||
diff --git a/Documentation/perf_counter/Documentation/perf-help.txt b/Documentation/perf_counter/Documentation/perf-help.txt new file mode 100644 index 000000000000..f85fed5a7edb --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-help.txt | |||
@@ -0,0 +1,38 @@ | |||
1 | perf-help(1) | ||
2 | =========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-help - display help information about perf | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | 'perf help' [-a|--all] [COMMAND] | ||
11 | |||
12 | DESCRIPTION | ||
13 | ----------- | ||
14 | |||
15 | With no options and no COMMAND given, the synopsis of the 'perf' | ||
16 | command and a list of the most commonly used perf commands are printed | ||
17 | on the standard output. | ||
18 | |||
19 | If the option '--all' or '-a' is given, then all available commands are | ||
20 | printed on the standard output. | ||
21 | |||
22 | If a perf command is named, a manual page for that command is brought | ||
23 | up. The 'man' program is used by default for this purpose, but this | ||
24 | can be overridden by other options or configuration variables. | ||
25 | |||
26 | Note that `perf --help ...` is identical to `perf help ...` because the | ||
27 | former is internally converted into the latter. | ||
28 | |||
29 | OPTIONS | ||
30 | ------- | ||
31 | -a:: | ||
32 | --all:: | ||
33 | Prints all the available commands on the standard output. This | ||
34 | option supersedes any other option. | ||
35 | |||
36 | PERF | ||
37 | ---- | ||
38 | Part of the linkperf:perf[1] suite | ||
diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt new file mode 100644 index 000000000000..d07700e35eb2 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-record.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | perf-record(1) | ||
2 | ========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-record - Run a command and record its profile into output.perf | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers a performance counter profile | ||
16 | from it, into output.perf - without displaying anything. | ||
17 | |||
18 | This file can then be inspected later on, using 'perf report'. | ||
19 | |||
20 | |||
21 | OPTIONS | ||
22 | ------- | ||
23 | <command>...:: | ||
24 | Any command you can specify in a shell. | ||
25 | |||
26 | -e:: | ||
27 | --event=:: | ||
28 | 0:0: cpu-cycles | ||
29 | 0:0: cycles | ||
30 | 0:1: instructions | ||
31 | 0:2: cache-references | ||
32 | 0:3: cache-misses | ||
33 | 0:4: branch-instructions | ||
34 | 0:4: branches | ||
35 | 0:5: branch-misses | ||
36 | 0:6: bus-cycles | ||
37 | 1:0: cpu-clock | ||
38 | 1:1: task-clock | ||
39 | 1:2: page-faults | ||
40 | 1:2: faults | ||
41 | 1:5: minor-faults | ||
42 | 1:6: major-faults | ||
43 | 1:3: context-switches | ||
44 | 1:3: cs | ||
45 | 1:4: cpu-migrations | ||
46 | 1:4: migrations | ||
47 | rNNN: raw PMU events (eventsel+umask) | ||
48 | |||
49 | -a:: | ||
50 | system-wide collection | ||
51 | |||
52 | -l:: | ||
53 | scale counter values | ||
54 | |||
55 | Configuration | ||
56 | ------------- | ||
57 | |||
58 | EXAMPLES | ||
59 | -------- | ||
60 | |||
61 | SEE ALSO | ||
62 | -------- | ||
63 | linkperf:git-stat[1] | ||
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt new file mode 100644 index 000000000000..7fcab271e570 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-stat.txt | |||
@@ -0,0 +1,76 @@ | |||
1 | perf-stat(1) | ||
2 | ========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-stat - Run a command and gather performance counter statistics | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers performance counter statistics | ||
16 | from it. | ||
17 | |||
18 | |||
19 | OPTIONS | ||
20 | ------- | ||
21 | <command>...:: | ||
22 | Any command you can specify in a shell. | ||
23 | |||
24 | -e:: | ||
25 | --event=:: | ||
26 | 0:0: cpu-cycles | ||
27 | 0:0: cycles | ||
28 | 0:1: instructions | ||
29 | 0:2: cache-references | ||
30 | 0:3: cache-misses | ||
31 | 0:4: branch-instructions | ||
32 | 0:4: branches | ||
33 | 0:5: branch-misses | ||
34 | 0:6: bus-cycles | ||
35 | 1:0: cpu-clock | ||
36 | 1:1: task-clock | ||
37 | 1:2: page-faults | ||
38 | 1:2: faults | ||
39 | 1:5: minor-faults | ||
40 | 1:6: major-faults | ||
41 | 1:3: context-switches | ||
42 | 1:3: cs | ||
43 | 1:4: cpu-migrations | ||
44 | 1:4: migrations | ||
45 | rNNN: raw PMU events (eventsel+umask) | ||
46 | |||
47 | -a:: | ||
48 | system-wide collection | ||
49 | |||
50 | -l:: | ||
51 | scale counter values | ||
52 | |||
53 | Configuration | ||
54 | ------------- | ||
55 | |||
56 | EXAMPLES | ||
57 | -------- | ||
58 | |||
59 | $ perf stat sleep 1 | ||
60 | |||
61 | Performance counter stats for 'sleep': | ||
62 | |||
63 | 0.678356 task clock ticks (msecs) | ||
64 | 7 context switches (events) | ||
65 | 4 CPU migrations (events) | ||
66 | 232 pagefaults (events) | ||
67 | 1810403 CPU cycles (events) | ||
68 | 946759 instructions (events) | ||
69 | 18952 cache references (events) | ||
70 | 4885 cache misses (events) | ||
71 | |||
72 | Wall-clock time elapsed: 1001.252894 msecs | ||
73 | |||
74 | SEE ALSO | ||
75 | -------- | ||
76 | linkperf:git-tops[1] | ||
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt new file mode 100644 index 000000000000..057333b72534 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-top.txt | |||
@@ -0,0 +1,61 @@ | |||
1 | perf-top(1) | ||
2 | ========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-top - Run a command and profile it | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers a performance counter profile | ||
16 | from it. | ||
17 | |||
18 | |||
19 | OPTIONS | ||
20 | ------- | ||
21 | <command>...:: | ||
22 | Any command you can specify in a shell. | ||
23 | |||
24 | -e:: | ||
25 | --event=:: | ||
26 | 0:0: cpu-cycles | ||
27 | 0:0: cycles | ||
28 | 0:1: instructions | ||
29 | 0:2: cache-references | ||
30 | 0:3: cache-misses | ||
31 | 0:4: branch-instructions | ||
32 | 0:4: branches | ||
33 | 0:5: branch-misses | ||
34 | 0:6: bus-cycles | ||
35 | 1:0: cpu-clock | ||
36 | 1:1: task-clock | ||
37 | 1:2: page-faults | ||
38 | 1:2: faults | ||
39 | 1:5: minor-faults | ||
40 | 1:6: major-faults | ||
41 | 1:3: context-switches | ||
42 | 1:3: cs | ||
43 | 1:4: cpu-migrations | ||
44 | 1:4: migrations | ||
45 | rNNN: raw PMU events (eventsel+umask) | ||
46 | |||
47 | -a:: | ||
48 | system-wide collection | ||
49 | |||
50 | -l:: | ||
51 | scale counter values | ||
52 | |||
53 | Configuration | ||
54 | ------------- | ||
55 | |||
56 | EXAMPLES | ||
57 | -------- | ||
58 | |||
59 | SEE ALSO | ||
60 | -------- | ||
61 | linkperf:git-stat[1] | ||
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile new file mode 100644 index 000000000000..481e4c26cd45 --- /dev/null +++ b/Documentation/perf_counter/Makefile | |||
@@ -0,0 +1,849 @@ | |||
1 | # The default target of this Makefile is... | ||
2 | all:: | ||
3 | |||
4 | # Define V=1 to have a more verbose compile. | ||
5 | # | ||
6 | # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() | ||
7 | # or vsnprintf() return -1 instead of number of characters which would | ||
8 | # have been written to the final string if enough space had been available. | ||
9 | # | ||
10 | # Define FREAD_READS_DIRECTORIES if your are on a system which succeeds | ||
11 | # when attempting to read from an fopen'ed directory. | ||
12 | # | ||
13 | # Define NO_OPENSSL environment variable if you do not have OpenSSL. | ||
14 | # This also implies MOZILLA_SHA1. | ||
15 | # | ||
16 | # Define CURLDIR=/foo/bar if your curl header and library files are in | ||
17 | # /foo/bar/include and /foo/bar/lib directories. | ||
18 | # | ||
19 | # Define EXPATDIR=/foo/bar if your expat header and library files are in | ||
20 | # /foo/bar/include and /foo/bar/lib directories. | ||
21 | # | ||
22 | # Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. | ||
23 | # | ||
24 | # Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks | ||
25 | # d_type in struct dirent (latest Cygwin -- will be fixed soonish). | ||
26 | # | ||
27 | # Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) | ||
28 | # do not support the 'size specifiers' introduced by C99, namely ll, hh, | ||
29 | # j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). | ||
30 | # some C compilers supported these specifiers prior to C99 as an extension. | ||
31 | # | ||
32 | # Define NO_STRCASESTR if you don't have strcasestr. | ||
33 | # | ||
34 | # Define NO_MEMMEM if you don't have memmem. | ||
35 | # | ||
36 | # Define NO_STRTOUMAX if you don't have strtoumax in the C library. | ||
37 | # If your compiler also does not support long long or does not have | ||
38 | # strtoull, define NO_STRTOULL. | ||
39 | # | ||
40 | # Define NO_SETENV if you don't have setenv in the C library. | ||
41 | # | ||
42 | # Define NO_UNSETENV if you don't have unsetenv in the C library. | ||
43 | # | ||
44 | # Define NO_MKDTEMP if you don't have mkdtemp in the C library. | ||
45 | # | ||
46 | # Define NO_SYS_SELECT_H if you don't have sys/select.h. | ||
47 | # | ||
48 | # Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link. | ||
49 | # Enable it on Windows. By default, symrefs are still used. | ||
50 | # | ||
51 | # Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability | ||
52 | # tests. These tests take up a significant amount of the total test time | ||
53 | # but are not needed unless you plan to talk to SVN repos. | ||
54 | # | ||
55 | # Define NO_FINK if you are building on Darwin/Mac OS X, have Fink | ||
56 | # installed in /sw, but don't want PERF to link against any libraries | ||
57 | # installed there. If defined you may specify your own (or Fink's) | ||
58 | # include directories and library directories by defining CFLAGS | ||
59 | # and LDFLAGS appropriately. | ||
60 | # | ||
61 | # Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, | ||
62 | # have DarwinPorts installed in /opt/local, but don't want PERF to | ||
63 | # link against any libraries installed there. If defined you may | ||
64 | # specify your own (or DarwinPort's) include directories and | ||
65 | # library directories by defining CFLAGS and LDFLAGS appropriately. | ||
66 | # | ||
67 | # Define PPC_SHA1 environment variable when running make to make use of | ||
68 | # a bundled SHA1 routine optimized for PowerPC. | ||
69 | # | ||
70 | # Define ARM_SHA1 environment variable when running make to make use of | ||
71 | # a bundled SHA1 routine optimized for ARM. | ||
72 | # | ||
73 | # Define MOZILLA_SHA1 environment variable when running make to make use of | ||
74 | # a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast | ||
75 | # on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default | ||
76 | # choice) has very fast version optimized for i586. | ||
77 | # | ||
78 | # Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). | ||
79 | # | ||
80 | # Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). | ||
81 | # | ||
82 | # Define NEEDS_SOCKET if linking with libc is not enough (SunOS, | ||
83 | # Patrick Mauritz). | ||
84 | # | ||
85 | # Define NO_MMAP if you want to avoid mmap. | ||
86 | # | ||
87 | # Define NO_PTHREADS if you do not have or do not want to use Pthreads. | ||
88 | # | ||
89 | # Define NO_PREAD if you have a problem with pread() system call (e.g. | ||
90 | # cygwin.dll before v1.5.22). | ||
91 | # | ||
92 | # Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is | ||
93 | # generally faster on your platform than accessing the working directory. | ||
94 | # | ||
95 | # Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support | ||
96 | # the executable mode bit, but doesn't really do so. | ||
97 | # | ||
98 | # Define NO_IPV6 if you lack IPv6 support and getaddrinfo(). | ||
99 | # | ||
100 | # Define NO_SOCKADDR_STORAGE if your platform does not have struct | ||
101 | # sockaddr_storage. | ||
102 | # | ||
103 | # Define NO_ICONV if your libc does not properly support iconv. | ||
104 | # | ||
105 | # Define OLD_ICONV if your library has an old iconv(), where the second | ||
106 | # (input buffer pointer) parameter is declared with type (const char **). | ||
107 | # | ||
108 | # Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. | ||
109 | # | ||
110 | # Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" | ||
111 | # that tells runtime paths to dynamic libraries; | ||
112 | # "-Wl,-rpath=/path/lib" is used instead. | ||
113 | # | ||
114 | # Define USE_NSEC below if you want perf to care about sub-second file mtimes | ||
115 | # and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and | ||
116 | # it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely | ||
117 | # randomly break unless your underlying filesystem supports those sub-second | ||
118 | # times (my ext3 doesn't). | ||
119 | # | ||
120 | # Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of | ||
121 | # "st_ctim" | ||
122 | # | ||
123 | # Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" | ||
124 | # available. This automatically turns USE_NSEC off. | ||
125 | # | ||
126 | # Define USE_STDEV below if you want perf to care about the underlying device | ||
127 | # change being considered an inode change from the update-index perspective. | ||
128 | # | ||
129 | # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks | ||
130 | # field that counts the on-disk footprint in 512-byte blocks. | ||
131 | # | ||
132 | # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 | ||
133 | # | ||
134 | # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. | ||
135 | # | ||
136 | # Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's | ||
137 | # MakeMaker (e.g. using ActiveState under Cygwin). | ||
138 | # | ||
139 | # Define NO_PERL if you do not want Perl scripts or libraries at all. | ||
140 | # | ||
141 | # Define INTERNAL_QSORT to use Git's implementation of qsort(), which | ||
142 | # is a simplified version of the merge sort used in glibc. This is | ||
143 | # recommended if Git triggers O(n^2) behavior in your platform's qsort(). | ||
144 | # | ||
145 | # Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call | ||
146 | # your external grep (e.g., if your system lacks grep, if its grep is | ||
147 | # broken, or spawning external process is slower than built-in grep perf has). | ||
148 | |||
149 | PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | ||
150 | @$(SHELL_PATH) util/PERF-VERSION-GEN | ||
151 | -include PERF-VERSION-FILE | ||
152 | |||
153 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') | ||
154 | uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') | ||
155 | uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not') | ||
156 | uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') | ||
157 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') | ||
158 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') | ||
159 | |||
160 | # CFLAGS and LDFLAGS are for the users to override from the command line. | ||
161 | |||
162 | CFLAGS = -g -O2 -Wall | ||
163 | LDFLAGS = -lpthread -lrt | ||
164 | ALL_CFLAGS = $(CFLAGS) | ||
165 | ALL_LDFLAGS = $(LDFLAGS) | ||
166 | STRIP ?= strip | ||
167 | |||
168 | # Among the variables below, these: | ||
169 | # perfexecdir | ||
170 | # template_dir | ||
171 | # mandir | ||
172 | # infodir | ||
173 | # htmldir | ||
174 | # ETC_PERFCONFIG (but not sysconfdir) | ||
175 | # can be specified as a relative path some/where/else; | ||
176 | # this is interpreted as relative to $(prefix) and "perf" at | ||
177 | # runtime figures out where they are based on the path to the executable. | ||
178 | # This can help installing the suite in a relocatable way. | ||
179 | |||
180 | prefix = $(HOME) | ||
181 | bindir_relative = bin | ||
182 | bindir = $(prefix)/$(bindir_relative) | ||
183 | mandir = share/man | ||
184 | infodir = share/info | ||
185 | perfexecdir = libexec/perf-core | ||
186 | sharedir = $(prefix)/share | ||
187 | template_dir = share/perf-core/templates | ||
188 | htmldir = share/doc/perf-doc | ||
189 | ifeq ($(prefix),/usr) | ||
190 | sysconfdir = /etc | ||
191 | ETC_PERFCONFIG = $(sysconfdir)/perfconfig | ||
192 | else | ||
193 | sysconfdir = $(prefix)/etc | ||
194 | ETC_PERFCONFIG = etc/perfconfig | ||
195 | endif | ||
196 | lib = lib | ||
197 | # DESTDIR= | ||
198 | |||
199 | export prefix bindir sharedir sysconfdir | ||
200 | |||
201 | CC = gcc | ||
202 | AR = ar | ||
203 | RM = rm -f | ||
204 | TAR = tar | ||
205 | FIND = find | ||
206 | INSTALL = install | ||
207 | RPMBUILD = rpmbuild | ||
208 | PTHREAD_LIBS = -lpthread | ||
209 | |||
210 | # sparse is architecture-neutral, which means that we need to tell it | ||
211 | # explicitly what architecture to check for. Fix this up for yours.. | ||
212 | SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ | ||
213 | |||
214 | |||
215 | |||
216 | ### --- END CONFIGURATION SECTION --- | ||
217 | |||
218 | # Those must not be GNU-specific; they are shared with perl/ which may | ||
219 | # be built by a different compiler. (Note that this is an artifact now | ||
220 | # but it still might be nice to keep that distinction.) | ||
221 | BASIC_CFLAGS = | ||
222 | BASIC_LDFLAGS = | ||
223 | |||
224 | # Guard against environment variables | ||
225 | BUILTIN_OBJS = | ||
226 | BUILT_INS = | ||
227 | COMPAT_CFLAGS = | ||
228 | COMPAT_OBJS = | ||
229 | LIB_H = | ||
230 | LIB_OBJS = | ||
231 | PROGRAMS = perf-report | ||
232 | SCRIPT_PERL = | ||
233 | SCRIPT_SH = | ||
234 | TEST_PROGRAMS = | ||
235 | |||
236 | # | ||
237 | # No scripts right now: | ||
238 | # | ||
239 | |||
240 | # SCRIPT_SH += perf-am.sh | ||
241 | |||
242 | # | ||
243 | # No Perl scripts right now: | ||
244 | # | ||
245 | |||
246 | # SCRIPT_PERL += perf-add--interactive.perl | ||
247 | |||
248 | SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ | ||
249 | $(patsubst %.perl,%,$(SCRIPT_PERL)) | ||
250 | |||
251 | # Empty... | ||
252 | EXTRA_PROGRAMS = | ||
253 | |||
254 | # ... and all the rest that could be moved out of bindir to perfexecdir | ||
255 | PROGRAMS += $(EXTRA_PROGRAMS) | ||
256 | |||
257 | # | ||
258 | # Single 'perf' binary right now: | ||
259 | # | ||
260 | PROGRAMS += perf | ||
261 | |||
262 | # List built-in command $C whose implementation cmd_$C() is not in | ||
263 | # builtin-$C.o but is linked in as part of some other command. | ||
264 | BUILT_INS += $(patsubst builtin-%.o,perf-%$X,$(BUILTIN_OBJS)) | ||
265 | |||
266 | # | ||
267 | # None right now: | ||
268 | # | ||
269 | # BUILT_INS += perf-init $X | ||
270 | |||
271 | # what 'all' will build and 'install' will install, in perfexecdir | ||
272 | ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) | ||
273 | |||
274 | # what 'all' will build but not install in perfexecdir | ||
275 | OTHER_PROGRAMS = perf$X | ||
276 | |||
277 | # Set paths to tools early so that they can be used for version tests. | ||
278 | ifndef SHELL_PATH | ||
279 | SHELL_PATH = /bin/sh | ||
280 | endif | ||
281 | ifndef PERL_PATH | ||
282 | PERL_PATH = /usr/bin/perl | ||
283 | endif | ||
284 | |||
285 | export PERL_PATH | ||
286 | |||
287 | LIB_FILE=libperf.a | ||
288 | |||
289 | LIB_H += ../../include/linux/perf_counter.h | ||
290 | LIB_H += perf.h | ||
291 | LIB_H += util/levenshtein.h | ||
292 | LIB_H += util/parse-options.h | ||
293 | LIB_H += util/quote.h | ||
294 | LIB_H += util/util.h | ||
295 | LIB_H += util/help.h | ||
296 | LIB_H += util/strbuf.h | ||
297 | LIB_H += util/run-command.h | ||
298 | |||
299 | LIB_OBJS += util/abspath.o | ||
300 | LIB_OBJS += util/alias.o | ||
301 | LIB_OBJS += util/config.o | ||
302 | LIB_OBJS += util/ctype.o | ||
303 | LIB_OBJS += util/exec_cmd.o | ||
304 | LIB_OBJS += util/help.o | ||
305 | LIB_OBJS += util/levenshtein.o | ||
306 | LIB_OBJS += util/parse-options.o | ||
307 | LIB_OBJS += util/path.o | ||
308 | LIB_OBJS += util/run-command.o | ||
309 | LIB_OBJS += util/quote.o | ||
310 | LIB_OBJS += util/strbuf.o | ||
311 | LIB_OBJS += util/usage.o | ||
312 | LIB_OBJS += util/wrapper.o | ||
313 | |||
314 | BUILTIN_OBJS += builtin-help.o | ||
315 | BUILTIN_OBJS += builtin-record.o | ||
316 | BUILTIN_OBJS += builtin-stat.o | ||
317 | BUILTIN_OBJS += builtin-top.o | ||
318 | |||
319 | PERFLIBS = $(LIB_FILE) | ||
320 | EXTLIBS = | ||
321 | |||
322 | # | ||
323 | # Platform specific tweaks | ||
324 | # | ||
325 | |||
326 | # We choose to avoid "if .. else if .. else .. endif endif" | ||
327 | # because maintaining the nesting to match is a pain. If | ||
328 | # we had "elif" things would have been much nicer... | ||
329 | |||
330 | -include config.mak.autogen | ||
331 | -include config.mak | ||
332 | |||
333 | ifeq ($(uname_S),Darwin) | ||
334 | ifndef NO_FINK | ||
335 | ifeq ($(shell test -d /sw/lib && echo y),y) | ||
336 | BASIC_CFLAGS += -I/sw/include | ||
337 | BASIC_LDFLAGS += -L/sw/lib | ||
338 | endif | ||
339 | endif | ||
340 | ifndef NO_DARWIN_PORTS | ||
341 | ifeq ($(shell test -d /opt/local/lib && echo y),y) | ||
342 | BASIC_CFLAGS += -I/opt/local/include | ||
343 | BASIC_LDFLAGS += -L/opt/local/lib | ||
344 | endif | ||
345 | endif | ||
346 | PTHREAD_LIBS = | ||
347 | endif | ||
348 | |||
349 | ifndef CC_LD_DYNPATH | ||
350 | ifdef NO_R_TO_GCC_LINKER | ||
351 | # Some gcc does not accept and pass -R to the linker to specify | ||
352 | # the runtime dynamic library path. | ||
353 | CC_LD_DYNPATH = -Wl,-rpath, | ||
354 | else | ||
355 | CC_LD_DYNPATH = -R | ||
356 | endif | ||
357 | endif | ||
358 | |||
359 | ifdef ZLIB_PATH | ||
360 | BASIC_CFLAGS += -I$(ZLIB_PATH)/include | ||
361 | EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) | ||
362 | endif | ||
363 | EXTLIBS += -lz | ||
364 | |||
365 | ifdef NEEDS_SOCKET | ||
366 | EXTLIBS += -lsocket | ||
367 | endif | ||
368 | ifdef NEEDS_NSL | ||
369 | EXTLIBS += -lnsl | ||
370 | endif | ||
371 | ifdef NO_D_TYPE_IN_DIRENT | ||
372 | BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT | ||
373 | endif | ||
374 | ifdef NO_D_INO_IN_DIRENT | ||
375 | BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT | ||
376 | endif | ||
377 | ifdef NO_ST_BLOCKS_IN_STRUCT_STAT | ||
378 | BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT | ||
379 | endif | ||
380 | ifdef USE_NSEC | ||
381 | BASIC_CFLAGS += -DUSE_NSEC | ||
382 | endif | ||
383 | ifdef USE_ST_TIMESPEC | ||
384 | BASIC_CFLAGS += -DUSE_ST_TIMESPEC | ||
385 | endif | ||
386 | ifdef NO_NSEC | ||
387 | BASIC_CFLAGS += -DNO_NSEC | ||
388 | endif | ||
389 | ifdef NO_C99_FORMAT | ||
390 | BASIC_CFLAGS += -DNO_C99_FORMAT | ||
391 | endif | ||
392 | ifdef SNPRINTF_RETURNS_BOGUS | ||
393 | COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS | ||
394 | COMPAT_OBJS += compat/snprintf.o | ||
395 | endif | ||
396 | ifdef FREAD_READS_DIRECTORIES | ||
397 | COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES | ||
398 | COMPAT_OBJS += compat/fopen.o | ||
399 | endif | ||
400 | ifdef NO_SYMLINK_HEAD | ||
401 | BASIC_CFLAGS += -DNO_SYMLINK_HEAD | ||
402 | endif | ||
403 | ifdef NO_STRCASESTR | ||
404 | COMPAT_CFLAGS += -DNO_STRCASESTR | ||
405 | COMPAT_OBJS += compat/strcasestr.o | ||
406 | endif | ||
407 | ifdef NO_STRTOUMAX | ||
408 | COMPAT_CFLAGS += -DNO_STRTOUMAX | ||
409 | COMPAT_OBJS += compat/strtoumax.o | ||
410 | endif | ||
411 | ifdef NO_STRTOULL | ||
412 | COMPAT_CFLAGS += -DNO_STRTOULL | ||
413 | endif | ||
414 | ifdef NO_SETENV | ||
415 | COMPAT_CFLAGS += -DNO_SETENV | ||
416 | COMPAT_OBJS += compat/setenv.o | ||
417 | endif | ||
418 | ifdef NO_MKDTEMP | ||
419 | COMPAT_CFLAGS += -DNO_MKDTEMP | ||
420 | COMPAT_OBJS += compat/mkdtemp.o | ||
421 | endif | ||
422 | ifdef NO_UNSETENV | ||
423 | COMPAT_CFLAGS += -DNO_UNSETENV | ||
424 | COMPAT_OBJS += compat/unsetenv.o | ||
425 | endif | ||
426 | ifdef NO_SYS_SELECT_H | ||
427 | BASIC_CFLAGS += -DNO_SYS_SELECT_H | ||
428 | endif | ||
429 | ifdef NO_MMAP | ||
430 | COMPAT_CFLAGS += -DNO_MMAP | ||
431 | COMPAT_OBJS += compat/mmap.o | ||
432 | else | ||
433 | ifdef USE_WIN32_MMAP | ||
434 | COMPAT_CFLAGS += -DUSE_WIN32_MMAP | ||
435 | COMPAT_OBJS += compat/win32mmap.o | ||
436 | endif | ||
437 | endif | ||
438 | ifdef NO_PREAD | ||
439 | COMPAT_CFLAGS += -DNO_PREAD | ||
440 | COMPAT_OBJS += compat/pread.o | ||
441 | endif | ||
442 | ifdef NO_FAST_WORKING_DIRECTORY | ||
443 | BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY | ||
444 | endif | ||
445 | ifdef NO_TRUSTABLE_FILEMODE | ||
446 | BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE | ||
447 | endif | ||
448 | ifdef NO_IPV6 | ||
449 | BASIC_CFLAGS += -DNO_IPV6 | ||
450 | endif | ||
451 | ifdef NO_UINTMAX_T | ||
452 | BASIC_CFLAGS += -Duintmax_t=uint32_t | ||
453 | endif | ||
454 | ifdef NO_SOCKADDR_STORAGE | ||
455 | ifdef NO_IPV6 | ||
456 | BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in | ||
457 | else | ||
458 | BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6 | ||
459 | endif | ||
460 | endif | ||
461 | ifdef NO_INET_NTOP | ||
462 | LIB_OBJS += compat/inet_ntop.o | ||
463 | endif | ||
464 | ifdef NO_INET_PTON | ||
465 | LIB_OBJS += compat/inet_pton.o | ||
466 | endif | ||
467 | |||
468 | ifdef NO_ICONV | ||
469 | BASIC_CFLAGS += -DNO_ICONV | ||
470 | endif | ||
471 | |||
472 | ifdef OLD_ICONV | ||
473 | BASIC_CFLAGS += -DOLD_ICONV | ||
474 | endif | ||
475 | |||
476 | ifdef NO_DEFLATE_BOUND | ||
477 | BASIC_CFLAGS += -DNO_DEFLATE_BOUND | ||
478 | endif | ||
479 | |||
480 | ifdef PPC_SHA1 | ||
481 | SHA1_HEADER = "ppc/sha1.h" | ||
482 | LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o | ||
483 | else | ||
484 | ifdef ARM_SHA1 | ||
485 | SHA1_HEADER = "arm/sha1.h" | ||
486 | LIB_OBJS += arm/sha1.o arm/sha1_arm.o | ||
487 | else | ||
488 | ifdef MOZILLA_SHA1 | ||
489 | SHA1_HEADER = "mozilla-sha1/sha1.h" | ||
490 | LIB_OBJS += mozilla-sha1/sha1.o | ||
491 | else | ||
492 | SHA1_HEADER = <openssl/sha.h> | ||
493 | EXTLIBS += $(LIB_4_CRYPTO) | ||
494 | endif | ||
495 | endif | ||
496 | endif | ||
497 | ifdef NO_PERL_MAKEMAKER | ||
498 | export NO_PERL_MAKEMAKER | ||
499 | endif | ||
500 | ifdef NO_HSTRERROR | ||
501 | COMPAT_CFLAGS += -DNO_HSTRERROR | ||
502 | COMPAT_OBJS += compat/hstrerror.o | ||
503 | endif | ||
504 | ifdef NO_MEMMEM | ||
505 | COMPAT_CFLAGS += -DNO_MEMMEM | ||
506 | COMPAT_OBJS += compat/memmem.o | ||
507 | endif | ||
508 | ifdef INTERNAL_QSORT | ||
509 | COMPAT_CFLAGS += -DINTERNAL_QSORT | ||
510 | COMPAT_OBJS += compat/qsort.o | ||
511 | endif | ||
512 | ifdef RUNTIME_PREFIX | ||
513 | COMPAT_CFLAGS += -DRUNTIME_PREFIX | ||
514 | endif | ||
515 | |||
516 | ifdef DIR_HAS_BSD_GROUP_SEMANTICS | ||
517 | COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS | ||
518 | endif | ||
519 | ifdef NO_EXTERNAL_GREP | ||
520 | BASIC_CFLAGS += -DNO_EXTERNAL_GREP | ||
521 | endif | ||
522 | |||
523 | ifeq ($(PERL_PATH),) | ||
524 | NO_PERL=NoThanks | ||
525 | endif | ||
526 | |||
527 | QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir | ||
528 | QUIET_SUBDIR1 = | ||
529 | |||
530 | ifneq ($(findstring $(MAKEFLAGS),w),w) | ||
531 | PRINT_DIR = --no-print-directory | ||
532 | else # "make -w" | ||
533 | NO_SUBDIR = : | ||
534 | endif | ||
535 | |||
536 | ifneq ($(findstring $(MAKEFLAGS),s),s) | ||
537 | ifndef V | ||
538 | QUIET_CC = @echo ' ' CC $@; | ||
539 | QUIET_AR = @echo ' ' AR $@; | ||
540 | QUIET_LINK = @echo ' ' LINK $@; | ||
541 | QUIET_BUILT_IN = @echo ' ' BUILTIN $@; | ||
542 | QUIET_GEN = @echo ' ' GEN $@; | ||
543 | QUIET_SUBDIR0 = +@subdir= | ||
544 | QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ | ||
545 | $(MAKE) $(PRINT_DIR) -C $$subdir | ||
546 | export V | ||
547 | export QUIET_GEN | ||
548 | export QUIET_BUILT_IN | ||
549 | endif | ||
550 | endif | ||
551 | |||
552 | ifdef ASCIIDOC8 | ||
553 | export ASCIIDOC8 | ||
554 | endif | ||
555 | |||
556 | # Shell quote (do not use $(call) to accommodate ancient setups); | ||
557 | |||
558 | SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) | ||
559 | ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) | ||
560 | |||
561 | DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) | ||
562 | bindir_SQ = $(subst ','\'',$(bindir)) | ||
563 | bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) | ||
564 | mandir_SQ = $(subst ','\'',$(mandir)) | ||
565 | infodir_SQ = $(subst ','\'',$(infodir)) | ||
566 | perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) | ||
567 | template_dir_SQ = $(subst ','\'',$(template_dir)) | ||
568 | htmldir_SQ = $(subst ','\'',$(htmldir)) | ||
569 | prefix_SQ = $(subst ','\'',$(prefix)) | ||
570 | |||
571 | SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) | ||
572 | PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) | ||
573 | |||
574 | LIBS = $(PERFLIBS) $(EXTLIBS) | ||
575 | |||
576 | BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ | ||
577 | $(COMPAT_CFLAGS) | ||
578 | LIB_OBJS += $(COMPAT_OBJS) | ||
579 | |||
580 | ALL_CFLAGS += $(BASIC_CFLAGS) | ||
581 | ALL_LDFLAGS += $(BASIC_LDFLAGS) | ||
582 | |||
583 | export TAR INSTALL DESTDIR SHELL_PATH | ||
584 | |||
585 | |||
586 | ### Build rules | ||
587 | |||
588 | SHELL = $(SHELL_PATH) | ||
589 | |||
590 | all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS | ||
591 | ifneq (,$X) | ||
592 | $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) | ||
593 | endif | ||
594 | |||
595 | all:: | ||
596 | |||
597 | please_set_SHELL_PATH_to_a_more_modern_shell: | ||
598 | @$$(:) | ||
599 | |||
600 | shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell | ||
601 | |||
602 | strip: $(PROGRAMS) perf$X | ||
603 | $(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X | ||
604 | |||
605 | perf.o: perf.c common-cmds.h PERF-CFLAGS | ||
606 | $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ | ||
607 | '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ | ||
608 | $(ALL_CFLAGS) -c $(filter %.c,$^) | ||
609 | |||
610 | perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS) | ||
611 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \ | ||
612 | $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) | ||
613 | |||
614 | builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS | ||
615 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ | ||
616 | '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ | ||
617 | '-DPERF_MAN_PATH="$(mandir_SQ)"' \ | ||
618 | '-DPERF_INFO_PATH="$(infodir_SQ)"' $< | ||
619 | |||
620 | $(BUILT_INS): perf$X | ||
621 | $(QUIET_BUILT_IN)$(RM) $@ && \ | ||
622 | ln perf$X $@ 2>/dev/null || \ | ||
623 | ln -s perf$X $@ 2>/dev/null || \ | ||
624 | cp perf$X $@ | ||
625 | |||
626 | common-cmds.h: util/generate-cmdlist.sh command-list.txt | ||
627 | |||
628 | common-cmds.h: $(wildcard Documentation/perf-*.txt) | ||
629 | $(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@ | ||
630 | |||
631 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh | ||
632 | $(QUIET_GEN)$(RM) $@ $@+ && \ | ||
633 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ | ||
634 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ | ||
635 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ | ||
636 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | ||
637 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ | ||
638 | $@.sh >$@+ && \ | ||
639 | chmod +x $@+ && \ | ||
640 | mv $@+ $@ | ||
641 | |||
642 | configure: configure.ac | ||
643 | $(QUIET_GEN)$(RM) $@ $<+ && \ | ||
644 | sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | ||
645 | $< > $<+ && \ | ||
646 | autoconf -o $@ $<+ && \ | ||
647 | $(RM) $<+ | ||
648 | |||
649 | # These can record PERF_VERSION | ||
650 | perf.o perf.spec \ | ||
651 | $(patsubst %.sh,%,$(SCRIPT_SH)) \ | ||
652 | $(patsubst %.perl,%,$(SCRIPT_PERL)) \ | ||
653 | : PERF-VERSION-FILE | ||
654 | |||
655 | %.o: %.c PERF-CFLAGS | ||
656 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< | ||
657 | %.s: %.c PERF-CFLAGS | ||
658 | $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< | ||
659 | %.o: %.S | ||
660 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< | ||
661 | |||
662 | util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS | ||
663 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ | ||
664 | '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ | ||
665 | '-DBINDIR="$(bindir_relative_SQ)"' \ | ||
666 | '-DPREFIX="$(prefix_SQ)"' \ | ||
667 | $< | ||
668 | |||
669 | builtin-init-db.o: builtin-init-db.c PERF-CFLAGS | ||
670 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< | ||
671 | |||
672 | util/config.o: util/config.c PERF-CFLAGS | ||
673 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< | ||
674 | |||
675 | perf-%$X: %.o $(PERFLIBS) | ||
676 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) | ||
677 | |||
678 | $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) | ||
679 | $(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) | ||
680 | builtin-revert.o wt-status.o: wt-status.h | ||
681 | |||
682 | $(LIB_FILE): $(LIB_OBJS) | ||
683 | $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) | ||
684 | |||
685 | TAGS: | ||
686 | $(RM) TAGS | ||
687 | $(FIND) . -name '*.[hcS]' -print | xargs etags -a | ||
688 | |||
689 | tags: | ||
690 | $(RM) tags | ||
691 | $(FIND) . -name '*.[hcS]' -print | xargs ctags -a | ||
692 | |||
693 | cscope: | ||
694 | $(RM) cscope* | ||
695 | $(FIND) . -name '*.[hcS]' -print | xargs cscope -b | ||
696 | |||
697 | ### Detect prefix changes | ||
698 | TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ | ||
699 | $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) | ||
700 | |||
701 | PERF-CFLAGS: .FORCE-PERF-CFLAGS | ||
702 | @FLAGS='$(TRACK_CFLAGS)'; \ | ||
703 | if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \ | ||
704 | echo 1>&2 " * new build flags or prefix"; \ | ||
705 | echo "$$FLAGS" >PERF-CFLAGS; \ | ||
706 | fi | ||
707 | |||
708 | # We need to apply sq twice, once to protect from the shell | ||
709 | # that runs PERF-BUILD-OPTIONS, and then again to protect it | ||
710 | # and the first level quoting from the shell that runs "echo". | ||
711 | PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS | ||
712 | @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ | ||
713 | @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ | ||
714 | @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ | ||
715 | @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ | ||
716 | |||
717 | ### Testing rules | ||
718 | |||
719 | # | ||
720 | # None right now: | ||
721 | # | ||
722 | # TEST_PROGRAMS += test-something$X | ||
723 | |||
724 | all:: $(TEST_PROGRAMS) | ||
725 | |||
726 | # GNU make supports exporting all variables by "export" without parameters. | ||
727 | # However, the environment gets quite big, and some programs have problems | ||
728 | # with that. | ||
729 | |||
730 | export NO_SVN_TESTS | ||
731 | |||
732 | check: common-cmds.h | ||
733 | if sparse; \ | ||
734 | then \ | ||
735 | for i in *.c */*.c; \ | ||
736 | do \ | ||
737 | sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ | ||
738 | done; \ | ||
739 | else \ | ||
740 | echo 2>&1 "Did you mean 'make test'?"; \ | ||
741 | exit 1; \ | ||
742 | fi | ||
743 | |||
744 | remove-dashes: | ||
745 | ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS) | ||
746 | |||
747 | ### Installation rules | ||
748 | |||
749 | ifneq ($(filter /%,$(firstword $(template_dir))),) | ||
750 | template_instdir = $(template_dir) | ||
751 | else | ||
752 | template_instdir = $(prefix)/$(template_dir) | ||
753 | endif | ||
754 | export template_instdir | ||
755 | |||
756 | ifneq ($(filter /%,$(firstword $(perfexecdir))),) | ||
757 | perfexec_instdir = $(perfexecdir) | ||
758 | else | ||
759 | perfexec_instdir = $(prefix)/$(perfexecdir) | ||
760 | endif | ||
761 | perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) | ||
762 | export perfexec_instdir | ||
763 | |||
764 | install: all | ||
765 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' | ||
766 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | ||
767 | $(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | ||
768 | ifneq (,$X) | ||
769 | $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) | ||
770 | endif | ||
771 | |||
772 | ### Maintainer's dist rules | ||
773 | |||
774 | perf.spec: perf.spec.in | ||
775 | sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+ | ||
776 | mv $@+ $@ | ||
777 | |||
778 | PERF_TARNAME=perf-$(PERF_VERSION) | ||
779 | dist: perf.spec perf-archive$(X) configure | ||
780 | ./perf-archive --format=tar \ | ||
781 | --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar | ||
782 | @mkdir -p $(PERF_TARNAME) | ||
783 | @cp perf.spec configure $(PERF_TARNAME) | ||
784 | @echo $(PERF_VERSION) > $(PERF_TARNAME)/version | ||
785 | $(TAR) rf $(PERF_TARNAME).tar \ | ||
786 | $(PERF_TARNAME)/perf.spec \ | ||
787 | $(PERF_TARNAME)/configure \ | ||
788 | $(PERF_TARNAME)/version | ||
789 | @$(RM) -r $(PERF_TARNAME) | ||
790 | gzip -f -9 $(PERF_TARNAME).tar | ||
791 | |||
792 | rpm: dist | ||
793 | $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz | ||
794 | |||
795 | ### Cleaning rules | ||
796 | |||
797 | distclean: clean | ||
798 | $(RM) configure | ||
799 | |||
800 | clean: | ||
801 | $(RM) *.o */*.o $(LIB_FILE) | ||
802 | $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X | ||
803 | $(RM) $(TEST_PROGRAMS) | ||
804 | $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* | ||
805 | $(RM) -r autom4te.cache | ||
806 | $(RM) config.log config.mak.autogen config.mak.append config.status config.cache | ||
807 | $(RM) -r $(PERF_TARNAME) .doc-tmp-dir | ||
808 | $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz | ||
809 | $(RM) $(htmldocs).tar.gz $(manpages).tar.gz | ||
810 | $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS | ||
811 | |||
812 | # temporary hack: | ||
813 | perf-report: perf-report.cc ../../include/linux/perf_counter.h Makefile | ||
814 | g++ -g -O2 -Wall -lrt -o $@ $< | ||
815 | |||
816 | .PHONY: all install clean strip | ||
817 | .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell | ||
818 | .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS | ||
819 | .PHONY: .FORCE-PERF-BUILD-OPTIONS | ||
820 | |||
821 | ### Make sure built-ins do not have dups and listed in perf.c | ||
822 | # | ||
823 | check-builtins:: | ||
824 | ./check-builtins.sh | ||
825 | |||
826 | ### Test suite coverage testing | ||
827 | # | ||
828 | .PHONY: coverage coverage-clean coverage-build coverage-report | ||
829 | |||
830 | coverage: | ||
831 | $(MAKE) coverage-build | ||
832 | $(MAKE) coverage-report | ||
833 | |||
834 | coverage-clean: | ||
835 | rm -f *.gcda *.gcno | ||
836 | |||
837 | COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs | ||
838 | COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov | ||
839 | |||
840 | coverage-build: coverage-clean | ||
841 | $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all | ||
842 | $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \ | ||
843 | -j1 test | ||
844 | |||
845 | coverage-report: | ||
846 | gcov -b *.c */*.c | ||
847 | grep '^function.*called 0 ' *.c.gcov */*.c.gcov \ | ||
848 | | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ | ||
849 | | tee coverage-untested-functions | ||
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c new file mode 100644 index 000000000000..6616de0ef053 --- /dev/null +++ b/Documentation/perf_counter/builtin-help.c | |||
@@ -0,0 +1,461 @@ | |||
1 | /* | ||
2 | * builtin-help.c | ||
3 | * | ||
4 | * Builtin help command | ||
5 | */ | ||
6 | #include "util/cache.h" | ||
7 | #include "builtin.h" | ||
8 | #include "util/exec_cmd.h" | ||
9 | #include "common-cmds.h" | ||
10 | #include "util/parse-options.h" | ||
11 | #include "util/run-command.h" | ||
12 | #include "util/help.h" | ||
13 | |||
14 | static struct man_viewer_list { | ||
15 | struct man_viewer_list *next; | ||
16 | char name[FLEX_ARRAY]; | ||
17 | } *man_viewer_list; | ||
18 | |||
19 | static struct man_viewer_info_list { | ||
20 | struct man_viewer_info_list *next; | ||
21 | const char *info; | ||
22 | char name[FLEX_ARRAY]; | ||
23 | } *man_viewer_info_list; | ||
24 | |||
25 | enum help_format { | ||
26 | HELP_FORMAT_MAN, | ||
27 | HELP_FORMAT_INFO, | ||
28 | HELP_FORMAT_WEB, | ||
29 | }; | ||
30 | |||
31 | static int show_all = 0; | ||
32 | static enum help_format help_format = HELP_FORMAT_MAN; | ||
33 | static struct option builtin_help_options[] = { | ||
34 | OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), | ||
35 | OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), | ||
36 | OPT_SET_INT('w', "web", &help_format, "show manual in web browser", | ||
37 | HELP_FORMAT_WEB), | ||
38 | OPT_SET_INT('i', "info", &help_format, "show info page", | ||
39 | HELP_FORMAT_INFO), | ||
40 | OPT_END(), | ||
41 | }; | ||
42 | |||
43 | static const char * const builtin_help_usage[] = { | ||
44 | "perf help [--all] [--man|--web|--info] [command]", | ||
45 | NULL | ||
46 | }; | ||
47 | |||
48 | static enum help_format parse_help_format(const char *format) | ||
49 | { | ||
50 | if (!strcmp(format, "man")) | ||
51 | return HELP_FORMAT_MAN; | ||
52 | if (!strcmp(format, "info")) | ||
53 | return HELP_FORMAT_INFO; | ||
54 | if (!strcmp(format, "web") || !strcmp(format, "html")) | ||
55 | return HELP_FORMAT_WEB; | ||
56 | die("unrecognized help format '%s'", format); | ||
57 | } | ||
58 | |||
59 | static const char *get_man_viewer_info(const char *name) | ||
60 | { | ||
61 | struct man_viewer_info_list *viewer; | ||
62 | |||
63 | for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) | ||
64 | { | ||
65 | if (!strcasecmp(name, viewer->name)) | ||
66 | return viewer->info; | ||
67 | } | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static int check_emacsclient_version(void) | ||
72 | { | ||
73 | struct strbuf buffer = STRBUF_INIT; | ||
74 | struct child_process ec_process; | ||
75 | const char *argv_ec[] = { "emacsclient", "--version", NULL }; | ||
76 | int version; | ||
77 | |||
78 | /* emacsclient prints its version number on stderr */ | ||
79 | memset(&ec_process, 0, sizeof(ec_process)); | ||
80 | ec_process.argv = argv_ec; | ||
81 | ec_process.err = -1; | ||
82 | ec_process.stdout_to_stderr = 1; | ||
83 | if (start_command(&ec_process)) { | ||
84 | fprintf(stderr, "Failed to start emacsclient.\n"); | ||
85 | return -1; | ||
86 | } | ||
87 | strbuf_read(&buffer, ec_process.err, 20); | ||
88 | close(ec_process.err); | ||
89 | |||
90 | /* | ||
91 | * Don't bother checking return value, because "emacsclient --version" | ||
92 | * seems to always exits with code 1. | ||
93 | */ | ||
94 | finish_command(&ec_process); | ||
95 | |||
96 | if (prefixcmp(buffer.buf, "emacsclient")) { | ||
97 | fprintf(stderr, "Failed to parse emacsclient version.\n"); | ||
98 | strbuf_release(&buffer); | ||
99 | return -1; | ||
100 | } | ||
101 | |||
102 | strbuf_remove(&buffer, 0, strlen("emacsclient")); | ||
103 | version = atoi(buffer.buf); | ||
104 | |||
105 | if (version < 22) { | ||
106 | fprintf(stderr, | ||
107 | "emacsclient version '%d' too old (< 22).\n", | ||
108 | version); | ||
109 | strbuf_release(&buffer); | ||
110 | return -1; | ||
111 | } | ||
112 | |||
113 | strbuf_release(&buffer); | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | static void exec_woman_emacs(const char* path, const char *page) | ||
118 | { | ||
119 | if (!check_emacsclient_version()) { | ||
120 | /* This works only with emacsclient version >= 22. */ | ||
121 | struct strbuf man_page = STRBUF_INIT; | ||
122 | |||
123 | if (!path) | ||
124 | path = "emacsclient"; | ||
125 | strbuf_addf(&man_page, "(woman \"%s\")", page); | ||
126 | execlp(path, "emacsclient", "-e", man_page.buf, NULL); | ||
127 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | static void exec_man_konqueror(const char* path, const char *page) | ||
132 | { | ||
133 | const char *display = getenv("DISPLAY"); | ||
134 | if (display && *display) { | ||
135 | struct strbuf man_page = STRBUF_INIT; | ||
136 | const char *filename = "kfmclient"; | ||
137 | |||
138 | /* It's simpler to launch konqueror using kfmclient. */ | ||
139 | if (path) { | ||
140 | const char *file = strrchr(path, '/'); | ||
141 | if (file && !strcmp(file + 1, "konqueror")) { | ||
142 | char *new = strdup(path); | ||
143 | char *dest = strrchr(new, '/'); | ||
144 | |||
145 | /* strlen("konqueror") == strlen("kfmclient") */ | ||
146 | strcpy(dest + 1, "kfmclient"); | ||
147 | path = new; | ||
148 | } | ||
149 | if (file) | ||
150 | filename = file; | ||
151 | } else | ||
152 | path = "kfmclient"; | ||
153 | strbuf_addf(&man_page, "man:%s(1)", page); | ||
154 | execlp(path, filename, "newTab", man_page.buf, NULL); | ||
155 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static void exec_man_man(const char* path, const char *page) | ||
160 | { | ||
161 | if (!path) | ||
162 | path = "man"; | ||
163 | execlp(path, "man", page, NULL); | ||
164 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
165 | } | ||
166 | |||
167 | static void exec_man_cmd(const char *cmd, const char *page) | ||
168 | { | ||
169 | struct strbuf shell_cmd = STRBUF_INIT; | ||
170 | strbuf_addf(&shell_cmd, "%s %s", cmd, page); | ||
171 | execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); | ||
172 | warning("failed to exec '%s': %s", cmd, strerror(errno)); | ||
173 | } | ||
174 | |||
175 | static void add_man_viewer(const char *name) | ||
176 | { | ||
177 | struct man_viewer_list **p = &man_viewer_list; | ||
178 | size_t len = strlen(name); | ||
179 | |||
180 | while (*p) | ||
181 | p = &((*p)->next); | ||
182 | *p = calloc(1, (sizeof(**p) + len + 1)); | ||
183 | strncpy((*p)->name, name, len); | ||
184 | } | ||
185 | |||
186 | static int supported_man_viewer(const char *name, size_t len) | ||
187 | { | ||
188 | return (!strncasecmp("man", name, len) || | ||
189 | !strncasecmp("woman", name, len) || | ||
190 | !strncasecmp("konqueror", name, len)); | ||
191 | } | ||
192 | |||
193 | static void do_add_man_viewer_info(const char *name, | ||
194 | size_t len, | ||
195 | const char *value) | ||
196 | { | ||
197 | struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); | ||
198 | |||
199 | strncpy(new->name, name, len); | ||
200 | new->info = strdup(value); | ||
201 | new->next = man_viewer_info_list; | ||
202 | man_viewer_info_list = new; | ||
203 | } | ||
204 | |||
205 | static int add_man_viewer_path(const char *name, | ||
206 | size_t len, | ||
207 | const char *value) | ||
208 | { | ||
209 | if (supported_man_viewer(name, len)) | ||
210 | do_add_man_viewer_info(name, len, value); | ||
211 | else | ||
212 | warning("'%s': path for unsupported man viewer.\n" | ||
213 | "Please consider using 'man.<tool>.cmd' instead.", | ||
214 | name); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int add_man_viewer_cmd(const char *name, | ||
220 | size_t len, | ||
221 | const char *value) | ||
222 | { | ||
223 | if (supported_man_viewer(name, len)) | ||
224 | warning("'%s': cmd for supported man viewer.\n" | ||
225 | "Please consider using 'man.<tool>.path' instead.", | ||
226 | name); | ||
227 | else | ||
228 | do_add_man_viewer_info(name, len, value); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int add_man_viewer_info(const char *var, const char *value) | ||
234 | { | ||
235 | const char *name = var + 4; | ||
236 | const char *subkey = strrchr(name, '.'); | ||
237 | |||
238 | if (!subkey) | ||
239 | return error("Config with no key for man viewer: %s", name); | ||
240 | |||
241 | if (!strcmp(subkey, ".path")) { | ||
242 | if (!value) | ||
243 | return config_error_nonbool(var); | ||
244 | return add_man_viewer_path(name, subkey - name, value); | ||
245 | } | ||
246 | if (!strcmp(subkey, ".cmd")) { | ||
247 | if (!value) | ||
248 | return config_error_nonbool(var); | ||
249 | return add_man_viewer_cmd(name, subkey - name, value); | ||
250 | } | ||
251 | |||
252 | warning("'%s': unsupported man viewer sub key.", subkey); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int perf_help_config(const char *var, const char *value, void *cb) | ||
257 | { | ||
258 | if (!strcmp(var, "help.format")) { | ||
259 | if (!value) | ||
260 | return config_error_nonbool(var); | ||
261 | help_format = parse_help_format(value); | ||
262 | return 0; | ||
263 | } | ||
264 | if (!strcmp(var, "man.viewer")) { | ||
265 | if (!value) | ||
266 | return config_error_nonbool(var); | ||
267 | add_man_viewer(value); | ||
268 | return 0; | ||
269 | } | ||
270 | if (!prefixcmp(var, "man.")) | ||
271 | return add_man_viewer_info(var, value); | ||
272 | |||
273 | return perf_default_config(var, value, cb); | ||
274 | } | ||
275 | |||
276 | static struct cmdnames main_cmds, other_cmds; | ||
277 | |||
278 | void list_common_cmds_help(void) | ||
279 | { | ||
280 | int i, longest = 0; | ||
281 | |||
282 | for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { | ||
283 | if (longest < strlen(common_cmds[i].name)) | ||
284 | longest = strlen(common_cmds[i].name); | ||
285 | } | ||
286 | |||
287 | puts("The most commonly used perf commands are:"); | ||
288 | for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { | ||
289 | printf(" %s ", common_cmds[i].name); | ||
290 | mput_char(' ', longest - strlen(common_cmds[i].name)); | ||
291 | puts(common_cmds[i].help); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static int is_perf_command(const char *s) | ||
296 | { | ||
297 | return is_in_cmdlist(&main_cmds, s) || | ||
298 | is_in_cmdlist(&other_cmds, s); | ||
299 | } | ||
300 | |||
301 | static const char *prepend(const char *prefix, const char *cmd) | ||
302 | { | ||
303 | size_t pre_len = strlen(prefix); | ||
304 | size_t cmd_len = strlen(cmd); | ||
305 | char *p = malloc(pre_len + cmd_len + 1); | ||
306 | memcpy(p, prefix, pre_len); | ||
307 | strcpy(p + pre_len, cmd); | ||
308 | return p; | ||
309 | } | ||
310 | |||
311 | static const char *cmd_to_page(const char *perf_cmd) | ||
312 | { | ||
313 | if (!perf_cmd) | ||
314 | return "perf"; | ||
315 | else if (!prefixcmp(perf_cmd, "perf")) | ||
316 | return perf_cmd; | ||
317 | else if (is_perf_command(perf_cmd)) | ||
318 | return prepend("perf-", perf_cmd); | ||
319 | else | ||
320 | return prepend("perf", perf_cmd); | ||
321 | } | ||
322 | |||
323 | static void setup_man_path(void) | ||
324 | { | ||
325 | struct strbuf new_path = STRBUF_INIT; | ||
326 | const char *old_path = getenv("MANPATH"); | ||
327 | |||
328 | /* We should always put ':' after our path. If there is no | ||
329 | * old_path, the ':' at the end will let 'man' to try | ||
330 | * system-wide paths after ours to find the manual page. If | ||
331 | * there is old_path, we need ':' as delimiter. */ | ||
332 | strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); | ||
333 | strbuf_addch(&new_path, ':'); | ||
334 | if (old_path) | ||
335 | strbuf_addstr(&new_path, old_path); | ||
336 | |||
337 | setenv("MANPATH", new_path.buf, 1); | ||
338 | |||
339 | strbuf_release(&new_path); | ||
340 | } | ||
341 | |||
342 | static void exec_viewer(const char *name, const char *page) | ||
343 | { | ||
344 | const char *info = get_man_viewer_info(name); | ||
345 | |||
346 | if (!strcasecmp(name, "man")) | ||
347 | exec_man_man(info, page); | ||
348 | else if (!strcasecmp(name, "woman")) | ||
349 | exec_woman_emacs(info, page); | ||
350 | else if (!strcasecmp(name, "konqueror")) | ||
351 | exec_man_konqueror(info, page); | ||
352 | else if (info) | ||
353 | exec_man_cmd(info, page); | ||
354 | else | ||
355 | warning("'%s': unknown man viewer.", name); | ||
356 | } | ||
357 | |||
358 | static void show_man_page(const char *perf_cmd) | ||
359 | { | ||
360 | struct man_viewer_list *viewer; | ||
361 | const char *page = cmd_to_page(perf_cmd); | ||
362 | const char *fallback = getenv("PERF_MAN_VIEWER"); | ||
363 | |||
364 | setup_man_path(); | ||
365 | for (viewer = man_viewer_list; viewer; viewer = viewer->next) | ||
366 | { | ||
367 | exec_viewer(viewer->name, page); /* will return when unable */ | ||
368 | } | ||
369 | if (fallback) | ||
370 | exec_viewer(fallback, page); | ||
371 | exec_viewer("man", page); | ||
372 | die("no man viewer handled the request"); | ||
373 | } | ||
374 | |||
375 | static void show_info_page(const char *perf_cmd) | ||
376 | { | ||
377 | const char *page = cmd_to_page(perf_cmd); | ||
378 | setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); | ||
379 | execlp("info", "info", "perfman", page, NULL); | ||
380 | } | ||
381 | |||
382 | static void get_html_page_path(struct strbuf *page_path, const char *page) | ||
383 | { | ||
384 | struct stat st; | ||
385 | const char *html_path = system_path(PERF_HTML_PATH); | ||
386 | |||
387 | /* Check that we have a perf documentation directory. */ | ||
388 | if (stat(mkpath("%s/perf.html", html_path), &st) | ||
389 | || !S_ISREG(st.st_mode)) | ||
390 | die("'%s': not a documentation directory.", html_path); | ||
391 | |||
392 | strbuf_init(page_path, 0); | ||
393 | strbuf_addf(page_path, "%s/%s.html", html_path, page); | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * If open_html is not defined in a platform-specific way (see for | ||
398 | * example compat/mingw.h), we use the script web--browse to display | ||
399 | * HTML. | ||
400 | */ | ||
401 | #ifndef open_html | ||
402 | void open_html(const char *path) | ||
403 | { | ||
404 | execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL); | ||
405 | } | ||
406 | #endif | ||
407 | |||
408 | static void show_html_page(const char *perf_cmd) | ||
409 | { | ||
410 | const char *page = cmd_to_page(perf_cmd); | ||
411 | struct strbuf page_path; /* it leaks but we exec bellow */ | ||
412 | |||
413 | get_html_page_path(&page_path, page); | ||
414 | |||
415 | open_html(page_path.buf); | ||
416 | } | ||
417 | |||
418 | int cmd_help(int argc, const char **argv, const char *prefix) | ||
419 | { | ||
420 | const char *alias; | ||
421 | load_command_list("perf-", &main_cmds, &other_cmds); | ||
422 | |||
423 | perf_config(perf_help_config, NULL); | ||
424 | |||
425 | argc = parse_options(argc, argv, builtin_help_options, | ||
426 | builtin_help_usage, 0); | ||
427 | |||
428 | if (show_all) { | ||
429 | printf("usage: %s\n\n", perf_usage_string); | ||
430 | list_commands("perf commands", &main_cmds, &other_cmds); | ||
431 | printf("%s\n", perf_more_info_string); | ||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | if (!argv[0]) { | ||
436 | printf("usage: %s\n\n", perf_usage_string); | ||
437 | list_common_cmds_help(); | ||
438 | printf("\n%s\n", perf_more_info_string); | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | alias = alias_lookup(argv[0]); | ||
443 | if (alias && !is_perf_command(argv[0])) { | ||
444 | printf("`perf %s' is aliased to `%s'\n", argv[0], alias); | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | switch (help_format) { | ||
449 | case HELP_FORMAT_MAN: | ||
450 | show_man_page(argv[0]); | ||
451 | break; | ||
452 | case HELP_FORMAT_INFO: | ||
453 | show_info_page(argv[0]); | ||
454 | break; | ||
455 | case HELP_FORMAT_WEB: | ||
456 | show_html_page(argv[0]); | ||
457 | break; | ||
458 | } | ||
459 | |||
460 | return 0; | ||
461 | } | ||
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c new file mode 100644 index 000000000000..efb87595f3cb --- /dev/null +++ b/Documentation/perf_counter/builtin-record.c | |||
@@ -0,0 +1,613 @@ | |||
1 | |||
2 | |||
3 | #include "util/util.h" | ||
4 | |||
5 | #include <sys/types.h> | ||
6 | #include <sys/stat.h> | ||
7 | #include <sys/time.h> | ||
8 | #include <unistd.h> | ||
9 | #include <stdint.h> | ||
10 | #include <stdlib.h> | ||
11 | #include <string.h> | ||
12 | #include <limits.h> | ||
13 | #include <getopt.h> | ||
14 | #include <assert.h> | ||
15 | #include <fcntl.h> | ||
16 | #include <stdio.h> | ||
17 | #include <errno.h> | ||
18 | #include <time.h> | ||
19 | #include <sched.h> | ||
20 | #include <pthread.h> | ||
21 | |||
22 | #include <sys/syscall.h> | ||
23 | #include <sys/ioctl.h> | ||
24 | #include <sys/poll.h> | ||
25 | #include <sys/prctl.h> | ||
26 | #include <sys/wait.h> | ||
27 | #include <sys/uio.h> | ||
28 | #include <sys/mman.h> | ||
29 | |||
30 | #include <linux/unistd.h> | ||
31 | #include <linux/types.h> | ||
32 | |||
33 | #include "../../include/linux/perf_counter.h" | ||
34 | |||
35 | #include "perf.h" | ||
36 | |||
37 | #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) | ||
38 | #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) | ||
39 | |||
40 | static int nr_counters = 0; | ||
41 | static __u64 event_id[MAX_COUNTERS] = { }; | ||
42 | static int default_interval = 100000; | ||
43 | static int event_count[MAX_COUNTERS]; | ||
44 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
45 | static int nr_cpus = 0; | ||
46 | static unsigned int page_size; | ||
47 | static unsigned int mmap_pages = 16; | ||
48 | static int output; | ||
49 | static char *output_name = "output.perf"; | ||
50 | static int group = 0; | ||
51 | static unsigned int realtime_prio = 0; | ||
52 | static int system_wide = 0; | ||
53 | static pid_t target_pid = -1; | ||
54 | static int inherit = 1; | ||
55 | static int nmi = 1; | ||
56 | |||
57 | const unsigned int default_count[] = { | ||
58 | 1000000, | ||
59 | 1000000, | ||
60 | 10000, | ||
61 | 10000, | ||
62 | 1000000, | ||
63 | 10000, | ||
64 | }; | ||
65 | |||
66 | struct event_symbol { | ||
67 | __u64 event; | ||
68 | char *symbol; | ||
69 | }; | ||
70 | |||
71 | static struct event_symbol event_symbols[] = { | ||
72 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | ||
73 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | ||
74 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | ||
75 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | ||
76 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | ||
77 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | ||
78 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | ||
79 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | ||
80 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | ||
81 | |||
82 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | ||
83 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | ||
84 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | ||
85 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | ||
86 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | ||
87 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | ||
88 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | ||
89 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | ||
90 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | ||
91 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | ||
92 | }; | ||
93 | |||
94 | /* | ||
95 | * Each event can have multiple symbolic names. | ||
96 | * Symbolic names are (almost) exactly matched. | ||
97 | */ | ||
98 | static __u64 match_event_symbols(char *str) | ||
99 | { | ||
100 | __u64 config, id; | ||
101 | int type; | ||
102 | unsigned int i; | ||
103 | |||
104 | if (sscanf(str, "r%llx", &config) == 1) | ||
105 | return config | PERF_COUNTER_RAW_MASK; | ||
106 | |||
107 | if (sscanf(str, "%d:%llu", &type, &id) == 2) | ||
108 | return EID(type, id); | ||
109 | |||
110 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
111 | if (!strncmp(str, event_symbols[i].symbol, | ||
112 | strlen(event_symbols[i].symbol))) | ||
113 | return event_symbols[i].event; | ||
114 | } | ||
115 | |||
116 | return ~0ULL; | ||
117 | } | ||
118 | |||
119 | static int parse_events(char *str) | ||
120 | { | ||
121 | __u64 config; | ||
122 | |||
123 | again: | ||
124 | if (nr_counters == MAX_COUNTERS) | ||
125 | return -1; | ||
126 | |||
127 | config = match_event_symbols(str); | ||
128 | if (config == ~0ULL) | ||
129 | return -1; | ||
130 | |||
131 | event_id[nr_counters] = config; | ||
132 | nr_counters++; | ||
133 | |||
134 | str = strstr(str, ","); | ||
135 | if (str) { | ||
136 | str++; | ||
137 | goto again; | ||
138 | } | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
144 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
145 | |||
146 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
147 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
148 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
149 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
150 | |||
151 | static void display_events_help(void) | ||
152 | { | ||
153 | unsigned int i; | ||
154 | __u64 e; | ||
155 | |||
156 | printf( | ||
157 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
158 | |||
159 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
160 | int type, id; | ||
161 | |||
162 | e = event_symbols[i].event; | ||
163 | type = PERF_COUNTER_TYPE(e); | ||
164 | id = PERF_COUNTER_ID(e); | ||
165 | |||
166 | printf("\n %d:%d: %-20s", | ||
167 | type, id, event_symbols[i].symbol); | ||
168 | } | ||
169 | |||
170 | printf("\n" | ||
171 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
172 | } | ||
173 | |||
174 | static void display_help(void) | ||
175 | { | ||
176 | printf( | ||
177 | "Usage: perf-record [<options>] <cmd>\n" | ||
178 | "perf-record Options (up to %d event types can be specified at once):\n\n", | ||
179 | MAX_COUNTERS); | ||
180 | |||
181 | display_events_help(); | ||
182 | |||
183 | printf( | ||
184 | " -c CNT --count=CNT # event period to sample\n" | ||
185 | " -m pages --mmap_pages=<pages> # number of mmap data pages\n" | ||
186 | " -o file --output=<file> # output file\n" | ||
187 | " -p pid --pid=<pid> # record events on existing pid\n" | ||
188 | " -r prio --realtime=<prio> # use RT prio\n" | ||
189 | " -s --system # system wide profiling\n" | ||
190 | ); | ||
191 | |||
192 | exit(0); | ||
193 | } | ||
194 | |||
195 | static void process_options(int argc, const char *argv[]) | ||
196 | { | ||
197 | int error = 0, counter; | ||
198 | |||
199 | for (;;) { | ||
200 | int option_index = 0; | ||
201 | /** Options for getopt */ | ||
202 | static struct option long_options[] = { | ||
203 | {"count", required_argument, NULL, 'c'}, | ||
204 | {"event", required_argument, NULL, 'e'}, | ||
205 | {"mmap_pages", required_argument, NULL, 'm'}, | ||
206 | {"output", required_argument, NULL, 'o'}, | ||
207 | {"pid", required_argument, NULL, 'p'}, | ||
208 | {"realtime", required_argument, NULL, 'r'}, | ||
209 | {"system", no_argument, NULL, 's'}, | ||
210 | {"inherit", no_argument, NULL, 'i'}, | ||
211 | {"nmi", no_argument, NULL, 'n'}, | ||
212 | {NULL, 0, NULL, 0 } | ||
213 | }; | ||
214 | int c = getopt_long(argc, argv, "+:c:e:m:o:p:r:sin", | ||
215 | long_options, &option_index); | ||
216 | if (c == -1) | ||
217 | break; | ||
218 | |||
219 | switch (c) { | ||
220 | case 'c': default_interval = atoi(optarg); break; | ||
221 | case 'e': error = parse_events(optarg); break; | ||
222 | case 'm': mmap_pages = atoi(optarg); break; | ||
223 | case 'o': output_name = strdup(optarg); break; | ||
224 | case 'p': target_pid = atoi(optarg); break; | ||
225 | case 'r': realtime_prio = atoi(optarg); break; | ||
226 | case 's': system_wide ^= 1; break; | ||
227 | case 'i': inherit ^= 1; break; | ||
228 | case 'n': nmi ^= 1; break; | ||
229 | default: error = 1; break; | ||
230 | } | ||
231 | } | ||
232 | |||
233 | if (argc - optind == 0 && target_pid == -1) | ||
234 | error = 1; | ||
235 | |||
236 | if (error) | ||
237 | display_help(); | ||
238 | |||
239 | if (!nr_counters) { | ||
240 | nr_counters = 1; | ||
241 | event_id[0] = 0; | ||
242 | } | ||
243 | |||
244 | for (counter = 0; counter < nr_counters; counter++) { | ||
245 | if (event_count[counter]) | ||
246 | continue; | ||
247 | |||
248 | event_count[counter] = default_interval; | ||
249 | } | ||
250 | } | ||
251 | |||
252 | struct mmap_data { | ||
253 | int counter; | ||
254 | void *base; | ||
255 | unsigned int mask; | ||
256 | unsigned int prev; | ||
257 | }; | ||
258 | |||
259 | static unsigned int mmap_read_head(struct mmap_data *md) | ||
260 | { | ||
261 | struct perf_counter_mmap_page *pc = md->base; | ||
262 | int head; | ||
263 | |||
264 | head = pc->data_head; | ||
265 | rmb(); | ||
266 | |||
267 | return head; | ||
268 | } | ||
269 | |||
270 | static long events; | ||
271 | static struct timeval last_read, this_read; | ||
272 | |||
273 | static void mmap_read(struct mmap_data *md) | ||
274 | { | ||
275 | unsigned int head = mmap_read_head(md); | ||
276 | unsigned int old = md->prev; | ||
277 | unsigned char *data = md->base + page_size; | ||
278 | unsigned long size; | ||
279 | void *buf; | ||
280 | int diff; | ||
281 | |||
282 | gettimeofday(&this_read, NULL); | ||
283 | |||
284 | /* | ||
285 | * If we're further behind than half the buffer, there's a chance | ||
286 | * the writer will bite our tail and screw up the events under us. | ||
287 | * | ||
288 | * If we somehow ended up ahead of the head, we got messed up. | ||
289 | * | ||
290 | * In either case, truncate and restart at head. | ||
291 | */ | ||
292 | diff = head - old; | ||
293 | if (diff > md->mask / 2 || diff < 0) { | ||
294 | struct timeval iv; | ||
295 | unsigned long msecs; | ||
296 | |||
297 | timersub(&this_read, &last_read, &iv); | ||
298 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | ||
299 | |||
300 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | ||
301 | " Last read %lu msecs ago.\n", msecs); | ||
302 | |||
303 | /* | ||
304 | * head points to a known good entry, start there. | ||
305 | */ | ||
306 | old = head; | ||
307 | } | ||
308 | |||
309 | last_read = this_read; | ||
310 | |||
311 | if (old != head) | ||
312 | events++; | ||
313 | |||
314 | size = head - old; | ||
315 | |||
316 | if ((old & md->mask) + size != (head & md->mask)) { | ||
317 | buf = &data[old & md->mask]; | ||
318 | size = md->mask + 1 - (old & md->mask); | ||
319 | old += size; | ||
320 | while (size) { | ||
321 | int ret = write(output, buf, size); | ||
322 | if (ret < 0) { | ||
323 | perror("failed to write"); | ||
324 | exit(-1); | ||
325 | } | ||
326 | size -= ret; | ||
327 | buf += ret; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | buf = &data[old & md->mask]; | ||
332 | size = head - old; | ||
333 | old += size; | ||
334 | while (size) { | ||
335 | int ret = write(output, buf, size); | ||
336 | if (ret < 0) { | ||
337 | perror("failed to write"); | ||
338 | exit(-1); | ||
339 | } | ||
340 | size -= ret; | ||
341 | buf += ret; | ||
342 | } | ||
343 | |||
344 | md->prev = old; | ||
345 | } | ||
346 | |||
347 | static volatile int done = 0; | ||
348 | |||
349 | static void sig_handler(int sig) | ||
350 | { | ||
351 | done = 1; | ||
352 | } | ||
353 | |||
354 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | ||
355 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
356 | |||
357 | static int nr_poll; | ||
358 | static int nr_cpu; | ||
359 | |||
360 | struct mmap_event { | ||
361 | struct perf_event_header header; | ||
362 | __u32 pid, tid; | ||
363 | __u64 start; | ||
364 | __u64 len; | ||
365 | __u64 pgoff; | ||
366 | char filename[PATH_MAX]; | ||
367 | }; | ||
368 | struct comm_event { | ||
369 | struct perf_event_header header; | ||
370 | __u32 pid,tid; | ||
371 | char comm[16]; | ||
372 | }; | ||
373 | |||
374 | static pid_t pid_synthesize_comm_event(pid_t pid) | ||
375 | { | ||
376 | char filename[PATH_MAX]; | ||
377 | char bf[BUFSIZ]; | ||
378 | struct comm_event comm_ev; | ||
379 | size_t size; | ||
380 | int fd; | ||
381 | |||
382 | snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); | ||
383 | |||
384 | fd = open(filename, O_RDONLY); | ||
385 | if (fd < 0) { | ||
386 | fprintf(stderr, "couldn't open %s\n", filename); | ||
387 | exit(EXIT_FAILURE); | ||
388 | } | ||
389 | if (read(fd, bf, sizeof(bf)) < 0) { | ||
390 | fprintf(stderr, "couldn't read %s\n", filename); | ||
391 | exit(EXIT_FAILURE); | ||
392 | } | ||
393 | close(fd); | ||
394 | |||
395 | pid_t spid, ppid; | ||
396 | char state; | ||
397 | char comm[18]; | ||
398 | |||
399 | memset(&comm_ev, 0, sizeof(comm_ev)); | ||
400 | int nr = sscanf(bf, "%d %s %c %d %d ", | ||
401 | &spid, comm, &state, &ppid, &comm_ev.pid); | ||
402 | if (nr != 5) { | ||
403 | fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", | ||
404 | filename); | ||
405 | exit(EXIT_FAILURE); | ||
406 | } | ||
407 | comm_ev.header.type = PERF_EVENT_COMM; | ||
408 | comm_ev.tid = pid; | ||
409 | size = strlen(comm); | ||
410 | comm[--size] = '\0'; /* Remove the ')' at the end */ | ||
411 | --size; /* Remove the '(' at the begin */ | ||
412 | memcpy(comm_ev.comm, comm + 1, size); | ||
413 | size = ALIGN(size, sizeof(uint64_t)); | ||
414 | comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); | ||
415 | int ret = write(output, &comm_ev, comm_ev.header.size); | ||
416 | if (ret < 0) { | ||
417 | perror("failed to write"); | ||
418 | exit(-1); | ||
419 | } | ||
420 | return comm_ev.pid; | ||
421 | } | ||
422 | |||
423 | static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid) | ||
424 | { | ||
425 | char filename[PATH_MAX]; | ||
426 | FILE *fp; | ||
427 | |||
428 | snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); | ||
429 | |||
430 | fp = fopen(filename, "r"); | ||
431 | if (fp == NULL) { | ||
432 | fprintf(stderr, "couldn't open %s\n", filename); | ||
433 | exit(EXIT_FAILURE); | ||
434 | } | ||
435 | while (1) { | ||
436 | char bf[BUFSIZ]; | ||
437 | unsigned char vm_read, vm_write, vm_exec, vm_mayshare; | ||
438 | struct mmap_event mmap_ev = { | ||
439 | .header.type = PERF_EVENT_MMAP, | ||
440 | }; | ||
441 | unsigned long ino; | ||
442 | int major, minor; | ||
443 | size_t size; | ||
444 | if (fgets(bf, sizeof(bf), fp) == NULL) | ||
445 | break; | ||
446 | |||
447 | /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ | ||
448 | sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu", | ||
449 | &mmap_ev.start, &mmap_ev.len, | ||
450 | &vm_read, &vm_write, &vm_exec, &vm_mayshare, | ||
451 | &mmap_ev.pgoff, &major, &minor, &ino); | ||
452 | if (vm_exec == 'x') { | ||
453 | char *execname = strrchr(bf, ' '); | ||
454 | |||
455 | if (execname == NULL || execname[1] != '/') | ||
456 | continue; | ||
457 | |||
458 | execname += 1; | ||
459 | size = strlen(execname); | ||
460 | execname[size - 1] = '\0'; /* Remove \n */ | ||
461 | memcpy(mmap_ev.filename, execname, size); | ||
462 | size = ALIGN(size, sizeof(uint64_t)); | ||
463 | mmap_ev.len -= mmap_ev.start; | ||
464 | mmap_ev.header.size = (sizeof(mmap_ev) - | ||
465 | (sizeof(mmap_ev.filename) - size)); | ||
466 | mmap_ev.pid = pgid; | ||
467 | mmap_ev.tid = pid; | ||
468 | |||
469 | if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { | ||
470 | perror("failed to write"); | ||
471 | exit(-1); | ||
472 | } | ||
473 | } | ||
474 | } | ||
475 | |||
476 | fclose(fp); | ||
477 | } | ||
478 | |||
479 | static void open_counters(int cpu, pid_t pid) | ||
480 | { | ||
481 | struct perf_counter_hw_event hw_event; | ||
482 | int counter, group_fd; | ||
483 | int track = 1; | ||
484 | |||
485 | if (pid > 0) { | ||
486 | pid_t pgid = pid_synthesize_comm_event(pid); | ||
487 | pid_synthesize_mmap_events(pid, pgid); | ||
488 | } | ||
489 | |||
490 | group_fd = -1; | ||
491 | for (counter = 0; counter < nr_counters; counter++) { | ||
492 | |||
493 | memset(&hw_event, 0, sizeof(hw_event)); | ||
494 | hw_event.config = event_id[counter]; | ||
495 | hw_event.irq_period = event_count[counter]; | ||
496 | hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; | ||
497 | hw_event.nmi = nmi; | ||
498 | hw_event.mmap = track; | ||
499 | hw_event.comm = track; | ||
500 | hw_event.inherit = (cpu < 0) && inherit; | ||
501 | |||
502 | track = 0; // only the first counter needs these | ||
503 | |||
504 | fd[nr_cpu][counter] = | ||
505 | sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0); | ||
506 | |||
507 | if (fd[nr_cpu][counter] < 0) { | ||
508 | int err = errno; | ||
509 | printf("kerneltop error: syscall returned with %d (%s)\n", | ||
510 | fd[nr_cpu][counter], strerror(err)); | ||
511 | if (err == EPERM) | ||
512 | printf("Are you root?\n"); | ||
513 | exit(-1); | ||
514 | } | ||
515 | assert(fd[nr_cpu][counter] >= 0); | ||
516 | fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); | ||
517 | |||
518 | /* | ||
519 | * First counter acts as the group leader: | ||
520 | */ | ||
521 | if (group && group_fd == -1) | ||
522 | group_fd = fd[nr_cpu][counter]; | ||
523 | |||
524 | event_array[nr_poll].fd = fd[nr_cpu][counter]; | ||
525 | event_array[nr_poll].events = POLLIN; | ||
526 | nr_poll++; | ||
527 | |||
528 | mmap_array[nr_cpu][counter].counter = counter; | ||
529 | mmap_array[nr_cpu][counter].prev = 0; | ||
530 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | ||
531 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
532 | PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); | ||
533 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | ||
534 | printf("kerneltop error: failed to mmap with %d (%s)\n", | ||
535 | errno, strerror(errno)); | ||
536 | exit(-1); | ||
537 | } | ||
538 | } | ||
539 | nr_cpu++; | ||
540 | } | ||
541 | |||
542 | int cmd_record(int argc, const char **argv) | ||
543 | { | ||
544 | int i, counter; | ||
545 | pid_t pid; | ||
546 | int ret; | ||
547 | |||
548 | page_size = sysconf(_SC_PAGE_SIZE); | ||
549 | |||
550 | process_options(argc, argv); | ||
551 | |||
552 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
553 | assert(nr_cpus <= MAX_NR_CPUS); | ||
554 | assert(nr_cpus >= 0); | ||
555 | |||
556 | output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); | ||
557 | if (output < 0) { | ||
558 | perror("failed to create output file"); | ||
559 | exit(-1); | ||
560 | } | ||
561 | |||
562 | argc -= optind; | ||
563 | argv += optind; | ||
564 | |||
565 | if (!system_wide) { | ||
566 | open_counters(-1, target_pid != -1 ? target_pid : 0); | ||
567 | } else for (i = 0; i < nr_cpus; i++) | ||
568 | open_counters(i, target_pid); | ||
569 | |||
570 | signal(SIGCHLD, sig_handler); | ||
571 | signal(SIGINT, sig_handler); | ||
572 | |||
573 | if (target_pid == -1) { | ||
574 | pid = fork(); | ||
575 | if (pid < 0) | ||
576 | perror("failed to fork"); | ||
577 | |||
578 | if (!pid) { | ||
579 | if (execvp(argv[0], argv)) { | ||
580 | perror(argv[0]); | ||
581 | exit(-1); | ||
582 | } | ||
583 | } | ||
584 | } | ||
585 | |||
586 | if (realtime_prio) { | ||
587 | struct sched_param param; | ||
588 | |||
589 | param.sched_priority = realtime_prio; | ||
590 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | ||
591 | printf("Could not set realtime priority.\n"); | ||
592 | exit(-1); | ||
593 | } | ||
594 | } | ||
595 | |||
596 | /* | ||
597 | * TODO: store the current /proc/$/maps information somewhere | ||
598 | */ | ||
599 | |||
600 | while (!done) { | ||
601 | int hits = events; | ||
602 | |||
603 | for (i = 0; i < nr_cpu; i++) { | ||
604 | for (counter = 0; counter < nr_counters; counter++) | ||
605 | mmap_read(&mmap_array[i][counter]); | ||
606 | } | ||
607 | |||
608 | if (hits == events) | ||
609 | ret = poll(event_array, nr_poll, 100); | ||
610 | } | ||
611 | |||
612 | return 0; | ||
613 | } | ||
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c new file mode 100644 index 000000000000..03518d75d864 --- /dev/null +++ b/Documentation/perf_counter/builtin-stat.c | |||
@@ -0,0 +1,568 @@ | |||
1 | /* | ||
2 | * kerneltop.c: show top kernel functions - performance counters showcase | ||
3 | |||
4 | Build with: | ||
5 | |||
6 | cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt | ||
7 | |||
8 | Sample output: | ||
9 | |||
10 | ------------------------------------------------------------------------------ | ||
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | ||
12 | ------------------------------------------------------------------------------ | ||
13 | |||
14 | weight RIP kernel function | ||
15 | ______ ________________ _______________ | ||
16 | |||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | ||
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | ||
19 | 31.26 - ffffffff804ce808 : skb_push | ||
20 | 22.43 - ffffffff80510004 : tcp_established_options | ||
21 | 19.00 - ffffffff8027d250 : find_get_page | ||
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | ||
23 | 15.20 - ffffffff804d8baa : dst_release | ||
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | ||
25 | 14.00 - ffffffff802217d5 : read_hpet | ||
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | ||
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | ||
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * perfstat: /usr/bin/time -alike performance counter statistics utility | ||
33 | |||
34 | It summarizes the counter events of all tasks (and child tasks), | ||
35 | covering all CPUs that the command (or workload) executes on. | ||
36 | It only counts the per-task events of the workload started, | ||
37 | independent of how many other tasks run on those CPUs. | ||
38 | |||
39 | Sample output: | ||
40 | |||
41 | $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null | ||
42 | |||
43 | Performance counter stats for 'ls': | ||
44 | |||
45 | 163516953 instructions | ||
46 | 2295 cache-misses | ||
47 | 2855182 branch-misses | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
52 | * | ||
53 | * Improvements and fixes by: | ||
54 | * | ||
55 | * Arjan van de Ven <arjan@linux.intel.com> | ||
56 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
57 | * Wu Fengguang <fengguang.wu@intel.com> | ||
58 | * Mike Galbraith <efault@gmx.de> | ||
59 | * Paul Mackerras <paulus@samba.org> | ||
60 | * | ||
61 | * Released under the GPL v2. (and only v2, not any later version) | ||
62 | */ | ||
63 | |||
64 | #include "util/util.h" | ||
65 | |||
66 | #include <getopt.h> | ||
67 | #include <assert.h> | ||
68 | #include <fcntl.h> | ||
69 | #include <stdio.h> | ||
70 | #include <errno.h> | ||
71 | #include <time.h> | ||
72 | #include <sched.h> | ||
73 | #include <pthread.h> | ||
74 | |||
75 | #include <sys/syscall.h> | ||
76 | #include <sys/ioctl.h> | ||
77 | #include <sys/poll.h> | ||
78 | #include <sys/prctl.h> | ||
79 | #include <sys/wait.h> | ||
80 | #include <sys/uio.h> | ||
81 | #include <sys/mman.h> | ||
82 | |||
83 | #include <linux/unistd.h> | ||
84 | #include <linux/types.h> | ||
85 | |||
86 | #include "../../include/linux/perf_counter.h" | ||
87 | |||
88 | #include "perf.h" | ||
89 | |||
90 | #define EVENT_MASK_KERNEL 1 | ||
91 | #define EVENT_MASK_USER 2 | ||
92 | |||
93 | static int system_wide = 0; | ||
94 | |||
95 | static int nr_counters = 0; | ||
96 | static __u64 event_id[MAX_COUNTERS] = { | ||
97 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | ||
98 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), | ||
99 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | ||
100 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | ||
101 | |||
102 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), | ||
103 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), | ||
104 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), | ||
105 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), | ||
106 | }; | ||
107 | static int default_interval = 100000; | ||
108 | static int event_count[MAX_COUNTERS]; | ||
109 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
110 | static int event_mask[MAX_COUNTERS]; | ||
111 | |||
112 | static int tid = -1; | ||
113 | static int profile_cpu = -1; | ||
114 | static int nr_cpus = 0; | ||
115 | static int nmi = 1; | ||
116 | static int group = 0; | ||
117 | static unsigned int page_size; | ||
118 | |||
119 | static int zero; | ||
120 | |||
121 | static int scale = 1; | ||
122 | |||
123 | static const unsigned int default_count[] = { | ||
124 | 1000000, | ||
125 | 1000000, | ||
126 | 10000, | ||
127 | 10000, | ||
128 | 1000000, | ||
129 | 10000, | ||
130 | }; | ||
131 | |||
132 | static char *hw_event_names[] = { | ||
133 | "CPU cycles", | ||
134 | "instructions", | ||
135 | "cache references", | ||
136 | "cache misses", | ||
137 | "branches", | ||
138 | "branch misses", | ||
139 | "bus cycles", | ||
140 | }; | ||
141 | |||
142 | static char *sw_event_names[] = { | ||
143 | "cpu clock ticks", | ||
144 | "task clock ticks", | ||
145 | "pagefaults", | ||
146 | "context switches", | ||
147 | "CPU migrations", | ||
148 | "minor faults", | ||
149 | "major faults", | ||
150 | }; | ||
151 | |||
152 | struct event_symbol { | ||
153 | __u64 event; | ||
154 | char *symbol; | ||
155 | }; | ||
156 | |||
157 | static struct event_symbol event_symbols[] = { | ||
158 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | ||
159 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | ||
160 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | ||
161 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | ||
162 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | ||
163 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | ||
164 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | ||
165 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | ||
166 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | ||
167 | |||
168 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | ||
169 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | ||
170 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | ||
171 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | ||
172 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | ||
173 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | ||
174 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | ||
175 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | ||
176 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | ||
177 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | ||
178 | }; | ||
179 | |||
180 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
181 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
182 | |||
183 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
184 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
185 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
186 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
187 | |||
188 | static void display_events_help(void) | ||
189 | { | ||
190 | unsigned int i; | ||
191 | __u64 e; | ||
192 | |||
193 | printf( | ||
194 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
195 | |||
196 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
197 | int type, id; | ||
198 | |||
199 | e = event_symbols[i].event; | ||
200 | type = PERF_COUNTER_TYPE(e); | ||
201 | id = PERF_COUNTER_ID(e); | ||
202 | |||
203 | printf("\n %d:%d: %-20s", | ||
204 | type, id, event_symbols[i].symbol); | ||
205 | } | ||
206 | |||
207 | printf("\n" | ||
208 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
209 | } | ||
210 | |||
211 | static void display_help(void) | ||
212 | { | ||
213 | printf( | ||
214 | "Usage: perfstat [<events...>] <cmd...>\n\n" | ||
215 | "PerfStat Options (up to %d event types can be specified):\n\n", | ||
216 | MAX_COUNTERS); | ||
217 | |||
218 | display_events_help(); | ||
219 | |||
220 | printf( | ||
221 | " -l # scale counter values\n" | ||
222 | " -a # system-wide collection\n"); | ||
223 | exit(0); | ||
224 | } | ||
225 | |||
226 | static char *event_name(int ctr) | ||
227 | { | ||
228 | __u64 config = event_id[ctr]; | ||
229 | int type = PERF_COUNTER_TYPE(config); | ||
230 | int id = PERF_COUNTER_ID(config); | ||
231 | static char buf[32]; | ||
232 | |||
233 | if (PERF_COUNTER_RAW(config)) { | ||
234 | sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); | ||
235 | return buf; | ||
236 | } | ||
237 | |||
238 | switch (type) { | ||
239 | case PERF_TYPE_HARDWARE: | ||
240 | if (id < PERF_HW_EVENTS_MAX) | ||
241 | return hw_event_names[id]; | ||
242 | return "unknown-hardware"; | ||
243 | |||
244 | case PERF_TYPE_SOFTWARE: | ||
245 | if (id < PERF_SW_EVENTS_MAX) | ||
246 | return sw_event_names[id]; | ||
247 | return "unknown-software"; | ||
248 | |||
249 | default: | ||
250 | break; | ||
251 | } | ||
252 | |||
253 | return "unknown"; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Each event can have multiple symbolic names. | ||
258 | * Symbolic names are (almost) exactly matched. | ||
259 | */ | ||
260 | static __u64 match_event_symbols(char *str) | ||
261 | { | ||
262 | __u64 config, id; | ||
263 | int type; | ||
264 | unsigned int i; | ||
265 | char mask_str[4]; | ||
266 | |||
267 | if (sscanf(str, "r%llx", &config) == 1) | ||
268 | return config | PERF_COUNTER_RAW_MASK; | ||
269 | |||
270 | switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) { | ||
271 | case 3: | ||
272 | if (strchr(mask_str, 'u')) | ||
273 | event_mask[nr_counters] |= EVENT_MASK_USER; | ||
274 | if (strchr(mask_str, 'k')) | ||
275 | event_mask[nr_counters] |= EVENT_MASK_KERNEL; | ||
276 | case 2: | ||
277 | return EID(type, id); | ||
278 | |||
279 | default: | ||
280 | break; | ||
281 | } | ||
282 | |||
283 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
284 | if (!strncmp(str, event_symbols[i].symbol, | ||
285 | strlen(event_symbols[i].symbol))) | ||
286 | return event_symbols[i].event; | ||
287 | } | ||
288 | |||
289 | return ~0ULL; | ||
290 | } | ||
291 | |||
292 | static int parse_events(char *str) | ||
293 | { | ||
294 | __u64 config; | ||
295 | |||
296 | again: | ||
297 | if (nr_counters == MAX_COUNTERS) | ||
298 | return -1; | ||
299 | |||
300 | config = match_event_symbols(str); | ||
301 | if (config == ~0ULL) | ||
302 | return -1; | ||
303 | |||
304 | event_id[nr_counters] = config; | ||
305 | nr_counters++; | ||
306 | |||
307 | str = strstr(str, ","); | ||
308 | if (str) { | ||
309 | str++; | ||
310 | goto again; | ||
311 | } | ||
312 | |||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | |||
317 | /* | ||
318 | * perfstat | ||
319 | */ | ||
320 | |||
321 | char fault_here[1000000]; | ||
322 | |||
323 | static void create_perfstat_counter(int counter) | ||
324 | { | ||
325 | struct perf_counter_hw_event hw_event; | ||
326 | |||
327 | memset(&hw_event, 0, sizeof(hw_event)); | ||
328 | hw_event.config = event_id[counter]; | ||
329 | hw_event.record_type = 0; | ||
330 | hw_event.nmi = 0; | ||
331 | hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL; | ||
332 | hw_event.exclude_user = event_mask[counter] & EVENT_MASK_USER; | ||
333 | |||
334 | if (scale) | ||
335 | hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
336 | PERF_FORMAT_TOTAL_TIME_RUNNING; | ||
337 | |||
338 | if (system_wide) { | ||
339 | int cpu; | ||
340 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
341 | fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); | ||
342 | if (fd[cpu][counter] < 0) { | ||
343 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
344 | fd[cpu][counter], strerror(errno)); | ||
345 | exit(-1); | ||
346 | } | ||
347 | } | ||
348 | } else { | ||
349 | hw_event.inherit = 1; | ||
350 | hw_event.disabled = 1; | ||
351 | |||
352 | fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); | ||
353 | if (fd[0][counter] < 0) { | ||
354 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
355 | fd[0][counter], strerror(errno)); | ||
356 | exit(-1); | ||
357 | } | ||
358 | } | ||
359 | } | ||
360 | |||
361 | int do_perfstat(int argc, char *argv[]) | ||
362 | { | ||
363 | unsigned long long t0, t1; | ||
364 | int counter; | ||
365 | ssize_t res; | ||
366 | int status; | ||
367 | int pid; | ||
368 | |||
369 | if (!system_wide) | ||
370 | nr_cpus = 1; | ||
371 | |||
372 | for (counter = 0; counter < nr_counters; counter++) | ||
373 | create_perfstat_counter(counter); | ||
374 | |||
375 | argc -= optind; | ||
376 | argv += optind; | ||
377 | |||
378 | if (!argc) | ||
379 | display_help(); | ||
380 | |||
381 | /* | ||
382 | * Enable counters and exec the command: | ||
383 | */ | ||
384 | t0 = rdclock(); | ||
385 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
386 | |||
387 | if ((pid = fork()) < 0) | ||
388 | perror("failed to fork"); | ||
389 | if (!pid) { | ||
390 | if (execvp(argv[0], argv)) { | ||
391 | perror(argv[0]); | ||
392 | exit(-1); | ||
393 | } | ||
394 | } | ||
395 | while (wait(&status) >= 0) | ||
396 | ; | ||
397 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
398 | t1 = rdclock(); | ||
399 | |||
400 | fflush(stdout); | ||
401 | |||
402 | fprintf(stderr, "\n"); | ||
403 | fprintf(stderr, " Performance counter stats for \'%s\':\n", | ||
404 | argv[0]); | ||
405 | fprintf(stderr, "\n"); | ||
406 | |||
407 | for (counter = 0; counter < nr_counters; counter++) { | ||
408 | int cpu, nv; | ||
409 | __u64 count[3], single_count[3]; | ||
410 | int scaled; | ||
411 | |||
412 | count[0] = count[1] = count[2] = 0; | ||
413 | nv = scale ? 3 : 1; | ||
414 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
415 | res = read(fd[cpu][counter], | ||
416 | single_count, nv * sizeof(__u64)); | ||
417 | assert(res == nv * sizeof(__u64)); | ||
418 | |||
419 | count[0] += single_count[0]; | ||
420 | if (scale) { | ||
421 | count[1] += single_count[1]; | ||
422 | count[2] += single_count[2]; | ||
423 | } | ||
424 | } | ||
425 | |||
426 | scaled = 0; | ||
427 | if (scale) { | ||
428 | if (count[2] == 0) { | ||
429 | fprintf(stderr, " %14s %-20s\n", | ||
430 | "<not counted>", event_name(counter)); | ||
431 | continue; | ||
432 | } | ||
433 | if (count[2] < count[1]) { | ||
434 | scaled = 1; | ||
435 | count[0] = (unsigned long long) | ||
436 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
437 | } | ||
438 | } | ||
439 | |||
440 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || | ||
441 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { | ||
442 | |||
443 | double msecs = (double)count[0] / 1000000; | ||
444 | |||
445 | fprintf(stderr, " %14.6f %-20s (msecs)", | ||
446 | msecs, event_name(counter)); | ||
447 | } else { | ||
448 | fprintf(stderr, " %14Ld %-20s (events)", | ||
449 | count[0], event_name(counter)); | ||
450 | } | ||
451 | if (scaled) | ||
452 | fprintf(stderr, " (scaled from %.2f%%)", | ||
453 | (double) count[2] / count[1] * 100); | ||
454 | fprintf(stderr, "\n"); | ||
455 | } | ||
456 | fprintf(stderr, "\n"); | ||
457 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
458 | (double)(t1-t0)/1e6); | ||
459 | fprintf(stderr, "\n"); | ||
460 | |||
461 | return 0; | ||
462 | } | ||
463 | |||
464 | static void process_options(int argc, char **argv) | ||
465 | { | ||
466 | int error = 0, counter; | ||
467 | |||
468 | for (;;) { | ||
469 | int option_index = 0; | ||
470 | /** Options for getopt */ | ||
471 | static struct option long_options[] = { | ||
472 | {"count", required_argument, NULL, 'c'}, | ||
473 | {"cpu", required_argument, NULL, 'C'}, | ||
474 | {"delay", required_argument, NULL, 'd'}, | ||
475 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
476 | {"event", required_argument, NULL, 'e'}, | ||
477 | {"filter", required_argument, NULL, 'f'}, | ||
478 | {"group", required_argument, NULL, 'g'}, | ||
479 | {"help", no_argument, NULL, 'h'}, | ||
480 | {"nmi", required_argument, NULL, 'n'}, | ||
481 | {"munmap_info", no_argument, NULL, 'U'}, | ||
482 | {"pid", required_argument, NULL, 'p'}, | ||
483 | {"realtime", required_argument, NULL, 'r'}, | ||
484 | {"scale", no_argument, NULL, 'l'}, | ||
485 | {"symbol", required_argument, NULL, 's'}, | ||
486 | {"stat", no_argument, NULL, 'S'}, | ||
487 | {"vmlinux", required_argument, NULL, 'x'}, | ||
488 | {"zero", no_argument, NULL, 'z'}, | ||
489 | {NULL, 0, NULL, 0 } | ||
490 | }; | ||
491 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", | ||
492 | long_options, &option_index); | ||
493 | if (c == -1) | ||
494 | break; | ||
495 | |||
496 | switch (c) { | ||
497 | case 'a': system_wide = 1; break; | ||
498 | case 'c': default_interval = atoi(optarg); break; | ||
499 | case 'C': | ||
500 | /* CPU and PID are mutually exclusive */ | ||
501 | if (tid != -1) { | ||
502 | printf("WARNING: CPU switch overriding PID\n"); | ||
503 | sleep(1); | ||
504 | tid = -1; | ||
505 | } | ||
506 | profile_cpu = atoi(optarg); break; | ||
507 | |||
508 | case 'e': error = parse_events(optarg); break; | ||
509 | |||
510 | case 'g': group = atoi(optarg); break; | ||
511 | case 'h': display_help(); break; | ||
512 | case 'l': scale = 1; break; | ||
513 | case 'n': nmi = atoi(optarg); break; | ||
514 | case 'p': | ||
515 | /* CPU and PID are mutually exclusive */ | ||
516 | if (profile_cpu != -1) { | ||
517 | printf("WARNING: PID switch overriding CPU\n"); | ||
518 | sleep(1); | ||
519 | profile_cpu = -1; | ||
520 | } | ||
521 | tid = atoi(optarg); break; | ||
522 | case 'z': zero = 1; break; | ||
523 | default: error = 1; break; | ||
524 | } | ||
525 | } | ||
526 | if (error) | ||
527 | display_help(); | ||
528 | |||
529 | if (!nr_counters) { | ||
530 | nr_counters = 8; | ||
531 | } | ||
532 | |||
533 | for (counter = 0; counter < nr_counters; counter++) { | ||
534 | if (event_count[counter]) | ||
535 | continue; | ||
536 | |||
537 | event_count[counter] = default_interval; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | static void skip_signal(int signo) | ||
542 | { | ||
543 | } | ||
544 | |||
545 | int cmd_stat(int argc, char **argv, const char *prefix) | ||
546 | { | ||
547 | sigset_t blocked; | ||
548 | |||
549 | page_size = sysconf(_SC_PAGE_SIZE); | ||
550 | |||
551 | process_options(argc, argv); | ||
552 | |||
553 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
554 | assert(nr_cpus <= MAX_NR_CPUS); | ||
555 | assert(nr_cpus >= 0); | ||
556 | |||
557 | /* | ||
558 | * We dont want to block the signals - that would cause | ||
559 | * child tasks to inherit that and Ctrl-C would not work. | ||
560 | * What we want is for Ctrl-C to work in the exec()-ed | ||
561 | * task, but being ignored by perf stat itself: | ||
562 | */ | ||
563 | signal(SIGINT, skip_signal); | ||
564 | signal(SIGALRM, skip_signal); | ||
565 | signal(SIGABRT, skip_signal); | ||
566 | |||
567 | return do_perfstat(argc, argv); | ||
568 | } | ||
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c new file mode 100644 index 000000000000..814b2e4925e3 --- /dev/null +++ b/Documentation/perf_counter/builtin-top.c | |||
@@ -0,0 +1,1146 @@ | |||
1 | /* | ||
2 | * kerneltop.c: show top kernel functions - performance counters showcase | ||
3 | |||
4 | Build with: | ||
5 | |||
6 | make -C Documentation/perf_counter/ | ||
7 | |||
8 | Sample output: | ||
9 | |||
10 | ------------------------------------------------------------------------------ | ||
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | ||
12 | ------------------------------------------------------------------------------ | ||
13 | |||
14 | weight RIP kernel function | ||
15 | ______ ________________ _______________ | ||
16 | |||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | ||
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | ||
19 | 31.26 - ffffffff804ce808 : skb_push | ||
20 | 22.43 - ffffffff80510004 : tcp_established_options | ||
21 | 19.00 - ffffffff8027d250 : find_get_page | ||
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | ||
23 | 15.20 - ffffffff804d8baa : dst_release | ||
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | ||
25 | 14.00 - ffffffff802217d5 : read_hpet | ||
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | ||
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | ||
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
33 | * | ||
34 | * Improvements and fixes by: | ||
35 | * | ||
36 | * Arjan van de Ven <arjan@linux.intel.com> | ||
37 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
38 | * Wu Fengguang <fengguang.wu@intel.com> | ||
39 | * Mike Galbraith <efault@gmx.de> | ||
40 | * Paul Mackerras <paulus@samba.org> | ||
41 | * | ||
42 | * Released under the GPL v2. (and only v2, not any later version) | ||
43 | */ | ||
44 | |||
45 | #include "util/util.h" | ||
46 | |||
47 | #include <getopt.h> | ||
48 | #include <assert.h> | ||
49 | #include <fcntl.h> | ||
50 | #include <stdio.h> | ||
51 | #include <errno.h> | ||
52 | #include <time.h> | ||
53 | #include <sched.h> | ||
54 | #include <pthread.h> | ||
55 | |||
56 | #include <sys/syscall.h> | ||
57 | #include <sys/ioctl.h> | ||
58 | #include <sys/poll.h> | ||
59 | #include <sys/prctl.h> | ||
60 | #include <sys/wait.h> | ||
61 | #include <sys/uio.h> | ||
62 | #include <sys/mman.h> | ||
63 | |||
64 | #include <linux/unistd.h> | ||
65 | #include <linux/types.h> | ||
66 | |||
67 | #include "../../include/linux/perf_counter.h" | ||
68 | |||
69 | #include "perf.h" | ||
70 | |||
71 | static int system_wide = 0; | ||
72 | |||
73 | static int nr_counters = 0; | ||
74 | static __u64 event_id[MAX_COUNTERS] = { | ||
75 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | ||
76 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), | ||
77 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | ||
78 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | ||
79 | |||
80 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), | ||
81 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), | ||
82 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), | ||
83 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), | ||
84 | }; | ||
85 | static int default_interval = 100000; | ||
86 | static int event_count[MAX_COUNTERS]; | ||
87 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
88 | |||
89 | static __u64 count_filter = 100; | ||
90 | |||
91 | static int tid = -1; | ||
92 | static int profile_cpu = -1; | ||
93 | static int nr_cpus = 0; | ||
94 | static int nmi = 1; | ||
95 | static unsigned int realtime_prio = 0; | ||
96 | static int group = 0; | ||
97 | static unsigned int page_size; | ||
98 | static unsigned int mmap_pages = 16; | ||
99 | static int use_mmap = 0; | ||
100 | static int use_munmap = 0; | ||
101 | static int freq = 0; | ||
102 | |||
103 | static char *vmlinux; | ||
104 | |||
105 | static char *sym_filter; | ||
106 | static unsigned long filter_start; | ||
107 | static unsigned long filter_end; | ||
108 | |||
109 | static int delay_secs = 2; | ||
110 | static int zero; | ||
111 | static int dump_symtab; | ||
112 | |||
113 | static int scale; | ||
114 | |||
115 | struct source_line { | ||
116 | uint64_t EIP; | ||
117 | unsigned long count; | ||
118 | char *line; | ||
119 | struct source_line *next; | ||
120 | }; | ||
121 | |||
122 | static struct source_line *lines; | ||
123 | static struct source_line **lines_tail; | ||
124 | |||
125 | static const unsigned int default_count[] = { | ||
126 | 1000000, | ||
127 | 1000000, | ||
128 | 10000, | ||
129 | 10000, | ||
130 | 1000000, | ||
131 | 10000, | ||
132 | }; | ||
133 | |||
134 | static char *hw_event_names[] = { | ||
135 | "CPU cycles", | ||
136 | "instructions", | ||
137 | "cache references", | ||
138 | "cache misses", | ||
139 | "branches", | ||
140 | "branch misses", | ||
141 | "bus cycles", | ||
142 | }; | ||
143 | |||
144 | static char *sw_event_names[] = { | ||
145 | "cpu clock ticks", | ||
146 | "task clock ticks", | ||
147 | "pagefaults", | ||
148 | "context switches", | ||
149 | "CPU migrations", | ||
150 | "minor faults", | ||
151 | "major faults", | ||
152 | }; | ||
153 | |||
154 | struct event_symbol { | ||
155 | __u64 event; | ||
156 | char *symbol; | ||
157 | }; | ||
158 | |||
159 | static struct event_symbol event_symbols[] = { | ||
160 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | ||
161 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | ||
162 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | ||
163 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | ||
164 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | ||
165 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | ||
166 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | ||
167 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | ||
168 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | ||
169 | |||
170 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | ||
171 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | ||
172 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | ||
173 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | ||
174 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | ||
175 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | ||
176 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | ||
177 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | ||
178 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | ||
179 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | ||
180 | }; | ||
181 | |||
182 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
183 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
184 | |||
185 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
186 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
187 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
188 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
189 | |||
190 | static void display_events_help(void) | ||
191 | { | ||
192 | unsigned int i; | ||
193 | __u64 e; | ||
194 | |||
195 | printf( | ||
196 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
197 | |||
198 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
199 | int type, id; | ||
200 | |||
201 | e = event_symbols[i].event; | ||
202 | type = PERF_COUNTER_TYPE(e); | ||
203 | id = PERF_COUNTER_ID(e); | ||
204 | |||
205 | printf("\n %d:%d: %-20s", | ||
206 | type, id, event_symbols[i].symbol); | ||
207 | } | ||
208 | |||
209 | printf("\n" | ||
210 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
211 | } | ||
212 | |||
213 | static void display_help(void) | ||
214 | { | ||
215 | printf( | ||
216 | "Usage: kerneltop [<options>]\n" | ||
217 | " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n" | ||
218 | "KernelTop Options (up to %d event types can be specified at once):\n\n", | ||
219 | MAX_COUNTERS); | ||
220 | |||
221 | display_events_help(); | ||
222 | |||
223 | printf( | ||
224 | " -c CNT --count=CNT # event period to sample\n\n" | ||
225 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" | ||
226 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" | ||
227 | " -l # show scale factor for RR events\n" | ||
228 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" | ||
229 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" | ||
230 | " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n" | ||
231 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" | ||
232 | " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n" | ||
233 | " -z --zero # zero counts after display\n" | ||
234 | " -D --dump_symtab # dump symbol table to stderr on startup\n" | ||
235 | " -m pages --mmap_pages=<pages> # number of mmap data pages\n" | ||
236 | " -M --mmap_info # print mmap info stream\n" | ||
237 | " -U --munmap_info # print munmap info stream\n" | ||
238 | ); | ||
239 | |||
240 | exit(0); | ||
241 | } | ||
242 | |||
243 | static char *event_name(int ctr) | ||
244 | { | ||
245 | __u64 config = event_id[ctr]; | ||
246 | int type = PERF_COUNTER_TYPE(config); | ||
247 | int id = PERF_COUNTER_ID(config); | ||
248 | static char buf[32]; | ||
249 | |||
250 | if (PERF_COUNTER_RAW(config)) { | ||
251 | sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); | ||
252 | return buf; | ||
253 | } | ||
254 | |||
255 | switch (type) { | ||
256 | case PERF_TYPE_HARDWARE: | ||
257 | if (id < PERF_HW_EVENTS_MAX) | ||
258 | return hw_event_names[id]; | ||
259 | return "unknown-hardware"; | ||
260 | |||
261 | case PERF_TYPE_SOFTWARE: | ||
262 | if (id < PERF_SW_EVENTS_MAX) | ||
263 | return sw_event_names[id]; | ||
264 | return "unknown-software"; | ||
265 | |||
266 | default: | ||
267 | break; | ||
268 | } | ||
269 | |||
270 | return "unknown"; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * Each event can have multiple symbolic names. | ||
275 | * Symbolic names are (almost) exactly matched. | ||
276 | */ | ||
277 | static __u64 match_event_symbols(char *str) | ||
278 | { | ||
279 | __u64 config, id; | ||
280 | int type; | ||
281 | unsigned int i; | ||
282 | |||
283 | if (sscanf(str, "r%llx", &config) == 1) | ||
284 | return config | PERF_COUNTER_RAW_MASK; | ||
285 | |||
286 | if (sscanf(str, "%d:%llu", &type, &id) == 2) | ||
287 | return EID(type, id); | ||
288 | |||
289 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
290 | if (!strncmp(str, event_symbols[i].symbol, | ||
291 | strlen(event_symbols[i].symbol))) | ||
292 | return event_symbols[i].event; | ||
293 | } | ||
294 | |||
295 | return ~0ULL; | ||
296 | } | ||
297 | |||
298 | static int parse_events(char *str) | ||
299 | { | ||
300 | __u64 config; | ||
301 | |||
302 | again: | ||
303 | if (nr_counters == MAX_COUNTERS) | ||
304 | return -1; | ||
305 | |||
306 | config = match_event_symbols(str); | ||
307 | if (config == ~0ULL) | ||
308 | return -1; | ||
309 | |||
310 | event_id[nr_counters] = config; | ||
311 | nr_counters++; | ||
312 | |||
313 | str = strstr(str, ","); | ||
314 | if (str) { | ||
315 | str++; | ||
316 | goto again; | ||
317 | } | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Symbols | ||
324 | */ | ||
325 | |||
326 | static uint64_t min_ip; | ||
327 | static uint64_t max_ip = -1ll; | ||
328 | |||
329 | struct sym_entry { | ||
330 | unsigned long long addr; | ||
331 | char *sym; | ||
332 | unsigned long count[MAX_COUNTERS]; | ||
333 | int skip; | ||
334 | struct source_line *source; | ||
335 | }; | ||
336 | |||
337 | #define MAX_SYMS 100000 | ||
338 | |||
339 | static int sym_table_count; | ||
340 | |||
341 | struct sym_entry *sym_filter_entry; | ||
342 | |||
343 | static struct sym_entry sym_table[MAX_SYMS]; | ||
344 | |||
345 | static void show_details(struct sym_entry *sym); | ||
346 | |||
347 | /* | ||
348 | * Ordering weight: count-1 * count-2 * ... / count-n | ||
349 | */ | ||
350 | static double sym_weight(const struct sym_entry *sym) | ||
351 | { | ||
352 | double weight; | ||
353 | int counter; | ||
354 | |||
355 | weight = sym->count[0]; | ||
356 | |||
357 | for (counter = 1; counter < nr_counters-1; counter++) | ||
358 | weight *= sym->count[counter]; | ||
359 | |||
360 | weight /= (sym->count[counter] + 1); | ||
361 | |||
362 | return weight; | ||
363 | } | ||
364 | |||
365 | static int compare(const void *__sym1, const void *__sym2) | ||
366 | { | ||
367 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | ||
368 | |||
369 | return sym_weight(sym1) < sym_weight(sym2); | ||
370 | } | ||
371 | |||
372 | static long events; | ||
373 | static long userspace_events; | ||
374 | static const char CONSOLE_CLEAR[] = "[H[2J"; | ||
375 | |||
376 | static struct sym_entry tmp[MAX_SYMS]; | ||
377 | |||
378 | static void print_sym_table(void) | ||
379 | { | ||
380 | int i, printed; | ||
381 | int counter; | ||
382 | float events_per_sec = events/delay_secs; | ||
383 | float kevents_per_sec = (events-userspace_events)/delay_secs; | ||
384 | float sum_kevents = 0.0; | ||
385 | |||
386 | events = userspace_events = 0; | ||
387 | memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); | ||
388 | qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); | ||
389 | |||
390 | for (i = 0; i < sym_table_count && tmp[i].count[0]; i++) | ||
391 | sum_kevents += tmp[i].count[0]; | ||
392 | |||
393 | write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); | ||
394 | |||
395 | printf( | ||
396 | "------------------------------------------------------------------------------\n"); | ||
397 | printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ", | ||
398 | events_per_sec, | ||
399 | 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), | ||
400 | nmi ? "NMI" : "IRQ"); | ||
401 | |||
402 | if (nr_counters == 1) | ||
403 | printf("%d ", event_count[0]); | ||
404 | |||
405 | for (counter = 0; counter < nr_counters; counter++) { | ||
406 | if (counter) | ||
407 | printf("/"); | ||
408 | |||
409 | printf("%s", event_name(counter)); | ||
410 | } | ||
411 | |||
412 | printf( "], "); | ||
413 | |||
414 | if (tid != -1) | ||
415 | printf(" (tid: %d", tid); | ||
416 | else | ||
417 | printf(" (all"); | ||
418 | |||
419 | if (profile_cpu != -1) | ||
420 | printf(", cpu: %d)\n", profile_cpu); | ||
421 | else { | ||
422 | if (tid != -1) | ||
423 | printf(")\n"); | ||
424 | else | ||
425 | printf(", %d CPUs)\n", nr_cpus); | ||
426 | } | ||
427 | |||
428 | printf("------------------------------------------------------------------------------\n\n"); | ||
429 | |||
430 | if (nr_counters == 1) | ||
431 | printf(" events pcnt"); | ||
432 | else | ||
433 | printf(" weight events pcnt"); | ||
434 | |||
435 | printf(" RIP kernel function\n" | ||
436 | " ______ ______ _____ ________________ _______________\n\n" | ||
437 | ); | ||
438 | |||
439 | for (i = 0, printed = 0; i < sym_table_count; i++) { | ||
440 | float pcnt; | ||
441 | int count; | ||
442 | |||
443 | if (printed <= 18 && tmp[i].count[0] >= count_filter) { | ||
444 | pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents)); | ||
445 | |||
446 | if (nr_counters == 1) | ||
447 | printf("%19.2f - %4.1f%% - %016llx : %s\n", | ||
448 | sym_weight(tmp + i), | ||
449 | pcnt, tmp[i].addr, tmp[i].sym); | ||
450 | else | ||
451 | printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n", | ||
452 | sym_weight(tmp + i), | ||
453 | tmp[i].count[0], | ||
454 | pcnt, tmp[i].addr, tmp[i].sym); | ||
455 | printed++; | ||
456 | } | ||
457 | /* | ||
458 | * Add decay to the counts: | ||
459 | */ | ||
460 | for (count = 0; count < nr_counters; count++) | ||
461 | sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; | ||
462 | } | ||
463 | |||
464 | if (sym_filter_entry) | ||
465 | show_details(sym_filter_entry); | ||
466 | |||
467 | { | ||
468 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | ||
469 | |||
470 | if (poll(&stdin_poll, 1, 0) == 1) { | ||
471 | printf("key pressed - exiting.\n"); | ||
472 | exit(0); | ||
473 | } | ||
474 | } | ||
475 | } | ||
476 | |||
477 | static void *display_thread(void *arg) | ||
478 | { | ||
479 | printf("KernelTop refresh period: %d seconds\n", delay_secs); | ||
480 | |||
481 | while (!sleep(delay_secs)) | ||
482 | print_sym_table(); | ||
483 | |||
484 | return NULL; | ||
485 | } | ||
486 | |||
487 | static int read_symbol(FILE *in, struct sym_entry *s) | ||
488 | { | ||
489 | static int filter_match = 0; | ||
490 | char *sym, stype; | ||
491 | char str[500]; | ||
492 | int rc, pos; | ||
493 | |||
494 | rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); | ||
495 | if (rc == EOF) | ||
496 | return -1; | ||
497 | |||
498 | assert(rc == 3); | ||
499 | |||
500 | /* skip until end of line: */ | ||
501 | pos = strlen(str); | ||
502 | do { | ||
503 | rc = fgetc(in); | ||
504 | if (rc == '\n' || rc == EOF || pos >= 499) | ||
505 | break; | ||
506 | str[pos] = rc; | ||
507 | pos++; | ||
508 | } while (1); | ||
509 | str[pos] = 0; | ||
510 | |||
511 | sym = str; | ||
512 | |||
513 | /* Filter out known duplicates and non-text symbols. */ | ||
514 | if (!strcmp(sym, "_text")) | ||
515 | return 1; | ||
516 | if (!min_ip && !strcmp(sym, "_stext")) | ||
517 | return 1; | ||
518 | if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) | ||
519 | return 1; | ||
520 | if (stype != 'T' && stype != 't') | ||
521 | return 1; | ||
522 | if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) | ||
523 | return 1; | ||
524 | if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) | ||
525 | return 1; | ||
526 | |||
527 | s->sym = malloc(strlen(str)+1); | ||
528 | assert(s->sym); | ||
529 | |||
530 | strcpy((char *)s->sym, str); | ||
531 | s->skip = 0; | ||
532 | |||
533 | /* Tag events to be skipped. */ | ||
534 | if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) | ||
535 | s->skip = 1; | ||
536 | else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) | ||
537 | s->skip = 1; | ||
538 | else if (!strcmp("mwait_idle", s->sym)) | ||
539 | s->skip = 1; | ||
540 | |||
541 | if (filter_match == 1) { | ||
542 | filter_end = s->addr; | ||
543 | filter_match = -1; | ||
544 | if (filter_end - filter_start > 10000) { | ||
545 | printf("hm, too large filter symbol <%s> - skipping.\n", | ||
546 | sym_filter); | ||
547 | printf("symbol filter start: %016lx\n", filter_start); | ||
548 | printf(" end: %016lx\n", filter_end); | ||
549 | filter_end = filter_start = 0; | ||
550 | sym_filter = NULL; | ||
551 | sleep(1); | ||
552 | } | ||
553 | } | ||
554 | if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { | ||
555 | filter_match = 1; | ||
556 | filter_start = s->addr; | ||
557 | } | ||
558 | |||
559 | return 0; | ||
560 | } | ||
561 | |||
562 | static int compare_addr(const void *__sym1, const void *__sym2) | ||
563 | { | ||
564 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | ||
565 | |||
566 | return sym1->addr > sym2->addr; | ||
567 | } | ||
568 | |||
569 | static void sort_symbol_table(void) | ||
570 | { | ||
571 | int i, dups; | ||
572 | |||
573 | do { | ||
574 | qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); | ||
575 | for (i = 0, dups = 0; i < sym_table_count; i++) { | ||
576 | if (sym_table[i].addr == sym_table[i+1].addr) { | ||
577 | sym_table[i+1].addr = -1ll; | ||
578 | dups++; | ||
579 | } | ||
580 | } | ||
581 | sym_table_count -= dups; | ||
582 | } while(dups); | ||
583 | } | ||
584 | |||
585 | static void parse_symbols(void) | ||
586 | { | ||
587 | struct sym_entry *last; | ||
588 | |||
589 | FILE *kallsyms = fopen("/proc/kallsyms", "r"); | ||
590 | |||
591 | if (!kallsyms) { | ||
592 | printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); | ||
593 | exit(-1); | ||
594 | } | ||
595 | |||
596 | while (!feof(kallsyms)) { | ||
597 | if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { | ||
598 | sym_table_count++; | ||
599 | assert(sym_table_count <= MAX_SYMS); | ||
600 | } | ||
601 | } | ||
602 | |||
603 | sort_symbol_table(); | ||
604 | min_ip = sym_table[0].addr; | ||
605 | max_ip = sym_table[sym_table_count-1].addr; | ||
606 | last = sym_table + sym_table_count++; | ||
607 | |||
608 | last->addr = -1ll; | ||
609 | last->sym = "<end>"; | ||
610 | |||
611 | if (filter_end) { | ||
612 | int count; | ||
613 | for (count=0; count < sym_table_count; count ++) { | ||
614 | if (!strcmp(sym_table[count].sym, sym_filter)) { | ||
615 | sym_filter_entry = &sym_table[count]; | ||
616 | break; | ||
617 | } | ||
618 | } | ||
619 | } | ||
620 | if (dump_symtab) { | ||
621 | int i; | ||
622 | |||
623 | for (i = 0; i < sym_table_count; i++) | ||
624 | fprintf(stderr, "%llx %s\n", | ||
625 | sym_table[i].addr, sym_table[i].sym); | ||
626 | } | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * Source lines | ||
631 | */ | ||
632 | |||
633 | static void parse_vmlinux(char *filename) | ||
634 | { | ||
635 | FILE *file; | ||
636 | char command[PATH_MAX*2]; | ||
637 | if (!filename) | ||
638 | return; | ||
639 | |||
640 | sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); | ||
641 | |||
642 | file = popen(command, "r"); | ||
643 | if (!file) | ||
644 | return; | ||
645 | |||
646 | lines_tail = &lines; | ||
647 | while (!feof(file)) { | ||
648 | struct source_line *src; | ||
649 | size_t dummy = 0; | ||
650 | char *c; | ||
651 | |||
652 | src = malloc(sizeof(struct source_line)); | ||
653 | assert(src != NULL); | ||
654 | memset(src, 0, sizeof(struct source_line)); | ||
655 | |||
656 | if (getline(&src->line, &dummy, file) < 0) | ||
657 | break; | ||
658 | if (!src->line) | ||
659 | break; | ||
660 | |||
661 | c = strchr(src->line, '\n'); | ||
662 | if (c) | ||
663 | *c = 0; | ||
664 | |||
665 | src->next = NULL; | ||
666 | *lines_tail = src; | ||
667 | lines_tail = &src->next; | ||
668 | |||
669 | if (strlen(src->line)>8 && src->line[8] == ':') | ||
670 | src->EIP = strtoull(src->line, NULL, 16); | ||
671 | if (strlen(src->line)>8 && src->line[16] == ':') | ||
672 | src->EIP = strtoull(src->line, NULL, 16); | ||
673 | } | ||
674 | pclose(file); | ||
675 | } | ||
676 | |||
677 | static void record_precise_ip(uint64_t ip) | ||
678 | { | ||
679 | struct source_line *line; | ||
680 | |||
681 | for (line = lines; line; line = line->next) { | ||
682 | if (line->EIP == ip) | ||
683 | line->count++; | ||
684 | if (line->EIP > ip) | ||
685 | break; | ||
686 | } | ||
687 | } | ||
688 | |||
689 | static void lookup_sym_in_vmlinux(struct sym_entry *sym) | ||
690 | { | ||
691 | struct source_line *line; | ||
692 | char pattern[PATH_MAX]; | ||
693 | sprintf(pattern, "<%s>:", sym->sym); | ||
694 | |||
695 | for (line = lines; line; line = line->next) { | ||
696 | if (strstr(line->line, pattern)) { | ||
697 | sym->source = line; | ||
698 | break; | ||
699 | } | ||
700 | } | ||
701 | } | ||
702 | |||
703 | static void show_lines(struct source_line *line_queue, int line_queue_count) | ||
704 | { | ||
705 | int i; | ||
706 | struct source_line *line; | ||
707 | |||
708 | line = line_queue; | ||
709 | for (i = 0; i < line_queue_count; i++) { | ||
710 | printf("%8li\t%s\n", line->count, line->line); | ||
711 | line = line->next; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | #define TRACE_COUNT 3 | ||
716 | |||
717 | static void show_details(struct sym_entry *sym) | ||
718 | { | ||
719 | struct source_line *line; | ||
720 | struct source_line *line_queue = NULL; | ||
721 | int displayed = 0; | ||
722 | int line_queue_count = 0; | ||
723 | |||
724 | if (!sym->source) | ||
725 | lookup_sym_in_vmlinux(sym); | ||
726 | if (!sym->source) | ||
727 | return; | ||
728 | |||
729 | printf("Showing details for %s\n", sym->sym); | ||
730 | |||
731 | line = sym->source; | ||
732 | while (line) { | ||
733 | if (displayed && strstr(line->line, ">:")) | ||
734 | break; | ||
735 | |||
736 | if (!line_queue_count) | ||
737 | line_queue = line; | ||
738 | line_queue_count ++; | ||
739 | |||
740 | if (line->count >= count_filter) { | ||
741 | show_lines(line_queue, line_queue_count); | ||
742 | line_queue_count = 0; | ||
743 | line_queue = NULL; | ||
744 | } else if (line_queue_count > TRACE_COUNT) { | ||
745 | line_queue = line_queue->next; | ||
746 | line_queue_count --; | ||
747 | } | ||
748 | |||
749 | line->count = 0; | ||
750 | displayed++; | ||
751 | if (displayed > 300) | ||
752 | break; | ||
753 | line = line->next; | ||
754 | } | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * Binary search in the histogram table and record the hit: | ||
759 | */ | ||
760 | static void record_ip(uint64_t ip, int counter) | ||
761 | { | ||
762 | int left_idx, middle_idx, right_idx, idx; | ||
763 | unsigned long left, middle, right; | ||
764 | |||
765 | record_precise_ip(ip); | ||
766 | |||
767 | left_idx = 0; | ||
768 | right_idx = sym_table_count-1; | ||
769 | assert(ip <= max_ip && ip >= min_ip); | ||
770 | |||
771 | while (left_idx + 1 < right_idx) { | ||
772 | middle_idx = (left_idx + right_idx) / 2; | ||
773 | |||
774 | left = sym_table[ left_idx].addr; | ||
775 | middle = sym_table[middle_idx].addr; | ||
776 | right = sym_table[ right_idx].addr; | ||
777 | |||
778 | if (!(left <= middle && middle <= right)) { | ||
779 | printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); | ||
780 | printf("%d %d %d\n", left_idx, middle_idx, right_idx); | ||
781 | } | ||
782 | assert(left <= middle && middle <= right); | ||
783 | if (!(left <= ip && ip <= right)) { | ||
784 | printf(" left: %016lx\n", left); | ||
785 | printf(" ip: %016lx\n", (unsigned long)ip); | ||
786 | printf("right: %016lx\n", right); | ||
787 | } | ||
788 | assert(left <= ip && ip <= right); | ||
789 | /* | ||
790 | * [ left .... target .... middle .... right ] | ||
791 | * => right := middle | ||
792 | */ | ||
793 | if (ip < middle) { | ||
794 | right_idx = middle_idx; | ||
795 | continue; | ||
796 | } | ||
797 | /* | ||
798 | * [ left .... middle ... target ... right ] | ||
799 | * => left := middle | ||
800 | */ | ||
801 | left_idx = middle_idx; | ||
802 | } | ||
803 | |||
804 | idx = left_idx; | ||
805 | |||
806 | if (!sym_table[idx].skip) | ||
807 | sym_table[idx].count[counter]++; | ||
808 | else events--; | ||
809 | } | ||
810 | |||
811 | static void process_event(uint64_t ip, int counter) | ||
812 | { | ||
813 | events++; | ||
814 | |||
815 | if (ip < min_ip || ip > max_ip) { | ||
816 | userspace_events++; | ||
817 | return; | ||
818 | } | ||
819 | |||
820 | record_ip(ip, counter); | ||
821 | } | ||
822 | |||
823 | static void process_options(int argc, char **argv) | ||
824 | { | ||
825 | int error = 0, counter; | ||
826 | |||
827 | for (;;) { | ||
828 | int option_index = 0; | ||
829 | /** Options for getopt */ | ||
830 | static struct option long_options[] = { | ||
831 | {"count", required_argument, NULL, 'c'}, | ||
832 | {"cpu", required_argument, NULL, 'C'}, | ||
833 | {"delay", required_argument, NULL, 'd'}, | ||
834 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
835 | {"event", required_argument, NULL, 'e'}, | ||
836 | {"filter", required_argument, NULL, 'f'}, | ||
837 | {"group", required_argument, NULL, 'g'}, | ||
838 | {"help", no_argument, NULL, 'h'}, | ||
839 | {"nmi", required_argument, NULL, 'n'}, | ||
840 | {"mmap_info", no_argument, NULL, 'M'}, | ||
841 | {"mmap_pages", required_argument, NULL, 'm'}, | ||
842 | {"munmap_info", no_argument, NULL, 'U'}, | ||
843 | {"pid", required_argument, NULL, 'p'}, | ||
844 | {"realtime", required_argument, NULL, 'r'}, | ||
845 | {"scale", no_argument, NULL, 'l'}, | ||
846 | {"symbol", required_argument, NULL, 's'}, | ||
847 | {"stat", no_argument, NULL, 'S'}, | ||
848 | {"vmlinux", required_argument, NULL, 'x'}, | ||
849 | {"zero", no_argument, NULL, 'z'}, | ||
850 | {"freq", required_argument, NULL, 'F'}, | ||
851 | {NULL, 0, NULL, 0 } | ||
852 | }; | ||
853 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:", | ||
854 | long_options, &option_index); | ||
855 | if (c == -1) | ||
856 | break; | ||
857 | |||
858 | switch (c) { | ||
859 | case 'a': system_wide = 1; break; | ||
860 | case 'c': default_interval = atoi(optarg); break; | ||
861 | case 'C': | ||
862 | /* CPU and PID are mutually exclusive */ | ||
863 | if (tid != -1) { | ||
864 | printf("WARNING: CPU switch overriding PID\n"); | ||
865 | sleep(1); | ||
866 | tid = -1; | ||
867 | } | ||
868 | profile_cpu = atoi(optarg); break; | ||
869 | case 'd': delay_secs = atoi(optarg); break; | ||
870 | case 'D': dump_symtab = 1; break; | ||
871 | |||
872 | case 'e': error = parse_events(optarg); break; | ||
873 | |||
874 | case 'f': count_filter = atoi(optarg); break; | ||
875 | case 'g': group = atoi(optarg); break; | ||
876 | case 'h': display_help(); break; | ||
877 | case 'l': scale = 1; break; | ||
878 | case 'n': nmi = atoi(optarg); break; | ||
879 | case 'p': | ||
880 | /* CPU and PID are mutually exclusive */ | ||
881 | if (profile_cpu != -1) { | ||
882 | printf("WARNING: PID switch overriding CPU\n"); | ||
883 | sleep(1); | ||
884 | profile_cpu = -1; | ||
885 | } | ||
886 | tid = atoi(optarg); break; | ||
887 | case 'r': realtime_prio = atoi(optarg); break; | ||
888 | case 's': sym_filter = strdup(optarg); break; | ||
889 | case 'x': vmlinux = strdup(optarg); break; | ||
890 | case 'z': zero = 1; break; | ||
891 | case 'm': mmap_pages = atoi(optarg); break; | ||
892 | case 'M': use_mmap = 1; break; | ||
893 | case 'U': use_munmap = 1; break; | ||
894 | case 'F': freq = 1; default_interval = atoi(optarg); break; | ||
895 | default: error = 1; break; | ||
896 | } | ||
897 | } | ||
898 | if (error) | ||
899 | display_help(); | ||
900 | |||
901 | if (!nr_counters) { | ||
902 | nr_counters = 1; | ||
903 | event_id[0] = 0; | ||
904 | } | ||
905 | |||
906 | for (counter = 0; counter < nr_counters; counter++) { | ||
907 | if (event_count[counter]) | ||
908 | continue; | ||
909 | |||
910 | event_count[counter] = default_interval; | ||
911 | } | ||
912 | } | ||
913 | |||
914 | struct mmap_data { | ||
915 | int counter; | ||
916 | void *base; | ||
917 | unsigned int mask; | ||
918 | unsigned int prev; | ||
919 | }; | ||
920 | |||
921 | static unsigned int mmap_read_head(struct mmap_data *md) | ||
922 | { | ||
923 | struct perf_counter_mmap_page *pc = md->base; | ||
924 | int head; | ||
925 | |||
926 | head = pc->data_head; | ||
927 | rmb(); | ||
928 | |||
929 | return head; | ||
930 | } | ||
931 | |||
932 | struct timeval last_read, this_read; | ||
933 | |||
934 | static void mmap_read(struct mmap_data *md) | ||
935 | { | ||
936 | unsigned int head = mmap_read_head(md); | ||
937 | unsigned int old = md->prev; | ||
938 | unsigned char *data = md->base + page_size; | ||
939 | int diff; | ||
940 | |||
941 | gettimeofday(&this_read, NULL); | ||
942 | |||
943 | /* | ||
944 | * If we're further behind than half the buffer, there's a chance | ||
945 | * the writer will bite our tail and screw up the events under us. | ||
946 | * | ||
947 | * If we somehow ended up ahead of the head, we got messed up. | ||
948 | * | ||
949 | * In either case, truncate and restart at head. | ||
950 | */ | ||
951 | diff = head - old; | ||
952 | if (diff > md->mask / 2 || diff < 0) { | ||
953 | struct timeval iv; | ||
954 | unsigned long msecs; | ||
955 | |||
956 | timersub(&this_read, &last_read, &iv); | ||
957 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | ||
958 | |||
959 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | ||
960 | " Last read %lu msecs ago.\n", msecs); | ||
961 | |||
962 | /* | ||
963 | * head points to a known good entry, start there. | ||
964 | */ | ||
965 | old = head; | ||
966 | } | ||
967 | |||
968 | last_read = this_read; | ||
969 | |||
970 | for (; old != head;) { | ||
971 | struct ip_event { | ||
972 | struct perf_event_header header; | ||
973 | __u64 ip; | ||
974 | __u32 pid, tid; | ||
975 | }; | ||
976 | struct mmap_event { | ||
977 | struct perf_event_header header; | ||
978 | __u32 pid, tid; | ||
979 | __u64 start; | ||
980 | __u64 len; | ||
981 | __u64 pgoff; | ||
982 | char filename[PATH_MAX]; | ||
983 | }; | ||
984 | |||
985 | typedef union event_union { | ||
986 | struct perf_event_header header; | ||
987 | struct ip_event ip; | ||
988 | struct mmap_event mmap; | ||
989 | } event_t; | ||
990 | |||
991 | event_t *event = (event_t *)&data[old & md->mask]; | ||
992 | |||
993 | event_t event_copy; | ||
994 | |||
995 | size_t size = event->header.size; | ||
996 | |||
997 | /* | ||
998 | * Event straddles the mmap boundary -- header should always | ||
999 | * be inside due to u64 alignment of output. | ||
1000 | */ | ||
1001 | if ((old & md->mask) + size != ((old + size) & md->mask)) { | ||
1002 | unsigned int offset = old; | ||
1003 | unsigned int len = min(sizeof(*event), size), cpy; | ||
1004 | void *dst = &event_copy; | ||
1005 | |||
1006 | do { | ||
1007 | cpy = min(md->mask + 1 - (offset & md->mask), len); | ||
1008 | memcpy(dst, &data[offset & md->mask], cpy); | ||
1009 | offset += cpy; | ||
1010 | dst += cpy; | ||
1011 | len -= cpy; | ||
1012 | } while (len); | ||
1013 | |||
1014 | event = &event_copy; | ||
1015 | } | ||
1016 | |||
1017 | old += size; | ||
1018 | |||
1019 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | ||
1020 | if (event->header.type & PERF_RECORD_IP) | ||
1021 | process_event(event->ip.ip, md->counter); | ||
1022 | } else { | ||
1023 | switch (event->header.type) { | ||
1024 | case PERF_EVENT_MMAP: | ||
1025 | case PERF_EVENT_MUNMAP: | ||
1026 | printf("%s: %Lu %Lu %Lu %s\n", | ||
1027 | event->header.type == PERF_EVENT_MMAP | ||
1028 | ? "mmap" : "munmap", | ||
1029 | event->mmap.start, | ||
1030 | event->mmap.len, | ||
1031 | event->mmap.pgoff, | ||
1032 | event->mmap.filename); | ||
1033 | break; | ||
1034 | } | ||
1035 | } | ||
1036 | } | ||
1037 | |||
1038 | md->prev = old; | ||
1039 | } | ||
1040 | |||
1041 | int cmd_top(int argc, char **argv, const char *prefix) | ||
1042 | { | ||
1043 | struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | ||
1044 | struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
1045 | struct perf_counter_hw_event hw_event; | ||
1046 | pthread_t thread; | ||
1047 | int i, counter, group_fd, nr_poll = 0; | ||
1048 | unsigned int cpu; | ||
1049 | int ret; | ||
1050 | |||
1051 | page_size = sysconf(_SC_PAGE_SIZE); | ||
1052 | |||
1053 | process_options(argc, argv); | ||
1054 | |||
1055 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
1056 | assert(nr_cpus <= MAX_NR_CPUS); | ||
1057 | assert(nr_cpus >= 0); | ||
1058 | |||
1059 | if (tid != -1 || profile_cpu != -1) | ||
1060 | nr_cpus = 1; | ||
1061 | |||
1062 | parse_symbols(); | ||
1063 | if (vmlinux && sym_filter_entry) | ||
1064 | parse_vmlinux(vmlinux); | ||
1065 | |||
1066 | for (i = 0; i < nr_cpus; i++) { | ||
1067 | group_fd = -1; | ||
1068 | for (counter = 0; counter < nr_counters; counter++) { | ||
1069 | |||
1070 | cpu = profile_cpu; | ||
1071 | if (tid == -1 && profile_cpu == -1) | ||
1072 | cpu = i; | ||
1073 | |||
1074 | memset(&hw_event, 0, sizeof(hw_event)); | ||
1075 | hw_event.config = event_id[counter]; | ||
1076 | hw_event.irq_period = event_count[counter]; | ||
1077 | hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; | ||
1078 | hw_event.nmi = nmi; | ||
1079 | hw_event.mmap = use_mmap; | ||
1080 | hw_event.munmap = use_munmap; | ||
1081 | hw_event.freq = freq; | ||
1082 | |||
1083 | fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); | ||
1084 | if (fd[i][counter] < 0) { | ||
1085 | int err = errno; | ||
1086 | printf("kerneltop error: syscall returned with %d (%s)\n", | ||
1087 | fd[i][counter], strerror(err)); | ||
1088 | if (err == EPERM) | ||
1089 | printf("Are you root?\n"); | ||
1090 | exit(-1); | ||
1091 | } | ||
1092 | assert(fd[i][counter] >= 0); | ||
1093 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
1094 | |||
1095 | /* | ||
1096 | * First counter acts as the group leader: | ||
1097 | */ | ||
1098 | if (group && group_fd == -1) | ||
1099 | group_fd = fd[i][counter]; | ||
1100 | |||
1101 | event_array[nr_poll].fd = fd[i][counter]; | ||
1102 | event_array[nr_poll].events = POLLIN; | ||
1103 | nr_poll++; | ||
1104 | |||
1105 | mmap_array[i][counter].counter = counter; | ||
1106 | mmap_array[i][counter].prev = 0; | ||
1107 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | ||
1108 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
1109 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | ||
1110 | if (mmap_array[i][counter].base == MAP_FAILED) { | ||
1111 | printf("kerneltop error: failed to mmap with %d (%s)\n", | ||
1112 | errno, strerror(errno)); | ||
1113 | exit(-1); | ||
1114 | } | ||
1115 | } | ||
1116 | } | ||
1117 | |||
1118 | if (pthread_create(&thread, NULL, display_thread, NULL)) { | ||
1119 | printf("Could not create display thread.\n"); | ||
1120 | exit(-1); | ||
1121 | } | ||
1122 | |||
1123 | if (realtime_prio) { | ||
1124 | struct sched_param param; | ||
1125 | |||
1126 | param.sched_priority = realtime_prio; | ||
1127 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | ||
1128 | printf("Could not set realtime priority.\n"); | ||
1129 | exit(-1); | ||
1130 | } | ||
1131 | } | ||
1132 | |||
1133 | while (1) { | ||
1134 | int hits = events; | ||
1135 | |||
1136 | for (i = 0; i < nr_cpus; i++) { | ||
1137 | for (counter = 0; counter < nr_counters; counter++) | ||
1138 | mmap_read(&mmap_array[i][counter]); | ||
1139 | } | ||
1140 | |||
1141 | if (hits == events) | ||
1142 | ret = poll(event_array, nr_poll, 100); | ||
1143 | } | ||
1144 | |||
1145 | return 0; | ||
1146 | } | ||
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h new file mode 100644 index 000000000000..d32318aed8cf --- /dev/null +++ b/Documentation/perf_counter/builtin.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef BUILTIN_H | ||
2 | #define BUILTIN_H | ||
3 | |||
4 | #include "util/util.h" | ||
5 | #include "util/strbuf.h" | ||
6 | |||
7 | extern const char perf_version_string[]; | ||
8 | extern const char perf_usage_string[]; | ||
9 | extern const char perf_more_info_string[]; | ||
10 | |||
11 | extern void list_common_cmds_help(void); | ||
12 | extern const char *help_unknown_cmd(const char *cmd); | ||
13 | extern void prune_packed_objects(int); | ||
14 | extern int read_line_with_nul(char *buf, int size, FILE *file); | ||
15 | extern int check_pager_config(const char *cmd); | ||
16 | |||
17 | extern int cmd_help(int argc, const char **argv, const char *prefix); | ||
18 | extern int cmd_record(int argc, const char **argv, const char *prefix); | ||
19 | extern int cmd_stat(int argc, const char **argv, const char *prefix); | ||
20 | extern int cmd_top(int argc, const char **argv, const char *prefix); | ||
21 | extern int cmd_version(int argc, const char **argv, const char *prefix); | ||
22 | #endif | ||
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt new file mode 100644 index 000000000000..d15210aa0cae --- /dev/null +++ b/Documentation/perf_counter/command-list.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | # List of known perf commands. | ||
2 | # command name category [deprecated] [common] | ||
3 | perf-record mainporcelain common | ||
4 | perf-stat mainporcelain common | ||
5 | perf-top mainporcelain common | ||
6 | |||
diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt new file mode 100644 index 000000000000..9930c4bddc6f --- /dev/null +++ b/Documentation/perf_counter/design.txt | |||
@@ -0,0 +1,449 @@ | |||
1 | |||
2 | Performance Counters for Linux | ||
3 | ------------------------------ | ||
4 | |||
5 | Performance counters are special hardware registers available on most modern | ||
6 | CPUs. These registers count the number of certain types of hw events: such | ||
7 | as instructions executed, cachemisses suffered, or branches mis-predicted - | ||
8 | without slowing down the kernel or applications. These registers can also | ||
9 | trigger interrupts when a threshold number of events have passed - and can | ||
10 | thus be used to profile the code that runs on that CPU. | ||
11 | |||
12 | The Linux Performance Counter subsystem provides an abstraction of these | ||
13 | hardware capabilities. It provides per task and per CPU counters, counter | ||
14 | groups, and it provides event capabilities on top of those. It | ||
15 | provides "virtual" 64-bit counters, regardless of the width of the | ||
16 | underlying hardware counters. | ||
17 | |||
18 | Performance counters are accessed via special file descriptors. | ||
19 | There's one file descriptor per virtual counter used. | ||
20 | |||
21 | The special file descriptor is opened via the perf_counter_open() | ||
22 | system call: | ||
23 | |||
24 | int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, | ||
25 | pid_t pid, int cpu, int group_fd, | ||
26 | unsigned long flags); | ||
27 | |||
28 | The syscall returns the new fd. The fd can be used via the normal | ||
29 | VFS system calls: read() can be used to read the counter, fcntl() | ||
30 | can be used to set the blocking mode, etc. | ||
31 | |||
32 | Multiple counters can be kept open at a time, and the counters | ||
33 | can be poll()ed. | ||
34 | |||
35 | When creating a new counter fd, 'perf_counter_hw_event' is: | ||
36 | |||
37 | struct perf_counter_hw_event { | ||
38 | /* | ||
39 | * The MSB of the config word signifies if the rest contains cpu | ||
40 | * specific (raw) counter configuration data, if unset, the next | ||
41 | * 7 bits are an event type and the rest of the bits are the event | ||
42 | * identifier. | ||
43 | */ | ||
44 | __u64 config; | ||
45 | |||
46 | __u64 irq_period; | ||
47 | __u32 record_type; | ||
48 | __u32 read_format; | ||
49 | |||
50 | __u64 disabled : 1, /* off by default */ | ||
51 | nmi : 1, /* NMI sampling */ | ||
52 | inherit : 1, /* children inherit it */ | ||
53 | pinned : 1, /* must always be on PMU */ | ||
54 | exclusive : 1, /* only group on PMU */ | ||
55 | exclude_user : 1, /* don't count user */ | ||
56 | exclude_kernel : 1, /* ditto kernel */ | ||
57 | exclude_hv : 1, /* ditto hypervisor */ | ||
58 | exclude_idle : 1, /* don't count when idle */ | ||
59 | mmap : 1, /* include mmap data */ | ||
60 | munmap : 1, /* include munmap data */ | ||
61 | comm : 1, /* include comm data */ | ||
62 | |||
63 | __reserved_1 : 52; | ||
64 | |||
65 | __u32 extra_config_len; | ||
66 | __u32 wakeup_events; /* wakeup every n events */ | ||
67 | |||
68 | __u64 __reserved_2; | ||
69 | __u64 __reserved_3; | ||
70 | }; | ||
71 | |||
72 | The 'config' field specifies what the counter should count. It | ||
73 | is divided into 3 bit-fields: | ||
74 | |||
75 | raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000 | ||
76 | type: 7 bits (next most significant) 0x7f00_0000_0000_0000 | ||
77 | event_id: 56 bits (least significant) 0x00ff_ffff_ffff_ffff | ||
78 | |||
79 | If 'raw_type' is 1, then the counter will count a hardware event | ||
80 | specified by the remaining 63 bits of event_config. The encoding is | ||
81 | machine-specific. | ||
82 | |||
83 | If 'raw_type' is 0, then the 'type' field says what kind of counter | ||
84 | this is, with the following encoding: | ||
85 | |||
86 | enum perf_event_types { | ||
87 | PERF_TYPE_HARDWARE = 0, | ||
88 | PERF_TYPE_SOFTWARE = 1, | ||
89 | PERF_TYPE_TRACEPOINT = 2, | ||
90 | }; | ||
91 | |||
92 | A counter of PERF_TYPE_HARDWARE will count the hardware event | ||
93 | specified by 'event_id': | ||
94 | |||
95 | /* | ||
96 | * Generalized performance counter event types, used by the hw_event.event_id | ||
97 | * parameter of the sys_perf_counter_open() syscall: | ||
98 | */ | ||
99 | enum hw_event_ids { | ||
100 | /* | ||
101 | * Common hardware events, generalized by the kernel: | ||
102 | */ | ||
103 | PERF_COUNT_CPU_CYCLES = 0, | ||
104 | PERF_COUNT_INSTRUCTIONS = 1, | ||
105 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
106 | PERF_COUNT_CACHE_MISSES = 3, | ||
107 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
108 | PERF_COUNT_BRANCH_MISSES = 5, | ||
109 | PERF_COUNT_BUS_CYCLES = 6, | ||
110 | }; | ||
111 | |||
112 | These are standardized types of events that work relatively uniformly | ||
113 | on all CPUs that implement Performance Counters support under Linux, | ||
114 | although there may be variations (e.g., different CPUs might count | ||
115 | cache references and misses at different levels of the cache hierarchy). | ||
116 | If a CPU is not able to count the selected event, then the system call | ||
117 | will return -EINVAL. | ||
118 | |||
119 | More hw_event_types are supported as well, but they are CPU-specific | ||
120 | and accessed as raw events. For example, to count "External bus | ||
121 | cycles while bus lock signal asserted" events on Intel Core CPUs, pass | ||
122 | in a 0x4064 event_id value and set hw_event.raw_type to 1. | ||
123 | |||
124 | A counter of type PERF_TYPE_SOFTWARE will count one of the available | ||
125 | software events, selected by 'event_id': | ||
126 | |||
127 | /* | ||
128 | * Special "software" counters provided by the kernel, even if the hardware | ||
129 | * does not support performance counters. These counters measure various | ||
130 | * physical and sw events of the kernel (and allow the profiling of them as | ||
131 | * well): | ||
132 | */ | ||
133 | enum sw_event_ids { | ||
134 | PERF_COUNT_CPU_CLOCK = 0, | ||
135 | PERF_COUNT_TASK_CLOCK = 1, | ||
136 | PERF_COUNT_PAGE_FAULTS = 2, | ||
137 | PERF_COUNT_CONTEXT_SWITCHES = 3, | ||
138 | PERF_COUNT_CPU_MIGRATIONS = 4, | ||
139 | PERF_COUNT_PAGE_FAULTS_MIN = 5, | ||
140 | PERF_COUNT_PAGE_FAULTS_MAJ = 6, | ||
141 | }; | ||
142 | |||
143 | Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event | ||
144 | tracer is available, and event_id values can be obtained from | ||
145 | /debug/tracing/events/*/*/id | ||
146 | |||
147 | |||
148 | Counters come in two flavours: counting counters and sampling | ||
149 | counters. A "counting" counter is one that is used for counting the | ||
150 | number of events that occur, and is characterised by having | ||
151 | irq_period = 0. | ||
152 | |||
153 | |||
154 | A read() on a counter returns the current value of the counter and possible | ||
155 | additional values as specified by 'read_format', each value is a u64 (8 bytes) | ||
156 | in size. | ||
157 | |||
158 | /* | ||
159 | * Bits that can be set in hw_event.read_format to request that | ||
160 | * reads on the counter should return the indicated quantities, | ||
161 | * in increasing order of bit value, after the counter value. | ||
162 | */ | ||
163 | enum perf_counter_read_format { | ||
164 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1, | ||
165 | PERF_FORMAT_TOTAL_TIME_RUNNING = 2, | ||
166 | }; | ||
167 | |||
168 | Using these additional values one can establish the overcommit ratio for a | ||
169 | particular counter allowing one to take the round-robin scheduling effect | ||
170 | into account. | ||
171 | |||
172 | |||
173 | A "sampling" counter is one that is set up to generate an interrupt | ||
174 | every N events, where N is given by 'irq_period'. A sampling counter | ||
175 | has irq_period > 0. The record_type controls what data is recorded on each | ||
176 | interrupt: | ||
177 | |||
178 | /* | ||
179 | * Bits that can be set in hw_event.record_type to request information | ||
180 | * in the overflow packets. | ||
181 | */ | ||
182 | enum perf_counter_record_format { | ||
183 | PERF_RECORD_IP = 1U << 0, | ||
184 | PERF_RECORD_TID = 1U << 1, | ||
185 | PERF_RECORD_TIME = 1U << 2, | ||
186 | PERF_RECORD_ADDR = 1U << 3, | ||
187 | PERF_RECORD_GROUP = 1U << 4, | ||
188 | PERF_RECORD_CALLCHAIN = 1U << 5, | ||
189 | }; | ||
190 | |||
191 | Such (and other) events will be recorded in a ring-buffer, which is | ||
192 | available to user-space using mmap() (see below). | ||
193 | |||
194 | The 'disabled' bit specifies whether the counter starts out disabled | ||
195 | or enabled. If it is initially disabled, it can be enabled by ioctl | ||
196 | or prctl (see below). | ||
197 | |||
198 | The 'nmi' bit specifies, for hardware events, whether the counter | ||
199 | should be set up to request non-maskable interrupts (NMIs) or normal | ||
200 | interrupts. This bit is ignored if the user doesn't have | ||
201 | CAP_SYS_ADMIN privilege (i.e. is not root) or if the CPU doesn't | ||
202 | generate NMIs from hardware counters. | ||
203 | |||
204 | The 'inherit' bit, if set, specifies that this counter should count | ||
205 | events on descendant tasks as well as the task specified. This only | ||
206 | applies to new descendents, not to any existing descendents at the | ||
207 | time the counter is created (nor to any new descendents of existing | ||
208 | descendents). | ||
209 | |||
210 | The 'pinned' bit, if set, specifies that the counter should always be | ||
211 | on the CPU if at all possible. It only applies to hardware counters | ||
212 | and only to group leaders. If a pinned counter cannot be put onto the | ||
213 | CPU (e.g. because there are not enough hardware counters or because of | ||
214 | a conflict with some other event), then the counter goes into an | ||
215 | 'error' state, where reads return end-of-file (i.e. read() returns 0) | ||
216 | until the counter is subsequently enabled or disabled. | ||
217 | |||
218 | The 'exclusive' bit, if set, specifies that when this counter's group | ||
219 | is on the CPU, it should be the only group using the CPU's counters. | ||
220 | In future, this will allow sophisticated monitoring programs to supply | ||
221 | extra configuration information via 'extra_config_len' to exploit | ||
222 | advanced features of the CPU's Performance Monitor Unit (PMU) that are | ||
223 | not otherwise accessible and that might disrupt other hardware | ||
224 | counters. | ||
225 | |||
226 | The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a | ||
227 | way to request that counting of events be restricted to times when the | ||
228 | CPU is in user, kernel and/or hypervisor mode. | ||
229 | |||
230 | The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap | ||
231 | operations, these can be used to relate userspace IP addresses to actual | ||
232 | code, even after the mapping (or even the whole process) is gone, | ||
233 | these events are recorded in the ring-buffer (see below). | ||
234 | |||
235 | The 'comm' bit allows tracking of process comm data on process creation. | ||
236 | This too is recorded in the ring-buffer (see below). | ||
237 | |||
238 | The 'pid' parameter to the perf_counter_open() system call allows the | ||
239 | counter to be specific to a task: | ||
240 | |||
241 | pid == 0: if the pid parameter is zero, the counter is attached to the | ||
242 | current task. | ||
243 | |||
244 | pid > 0: the counter is attached to a specific task (if the current task | ||
245 | has sufficient privilege to do so) | ||
246 | |||
247 | pid < 0: all tasks are counted (per cpu counters) | ||
248 | |||
249 | The 'cpu' parameter allows a counter to be made specific to a CPU: | ||
250 | |||
251 | cpu >= 0: the counter is restricted to a specific CPU | ||
252 | cpu == -1: the counter counts on all CPUs | ||
253 | |||
254 | (Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) | ||
255 | |||
256 | A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts | ||
257 | events of that task and 'follows' that task to whatever CPU the task | ||
258 | gets schedule to. Per task counters can be created by any user, for | ||
259 | their own tasks. | ||
260 | |||
261 | A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts | ||
262 | all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. | ||
263 | |||
264 | The 'flags' parameter is currently unused and must be zero. | ||
265 | |||
266 | The 'group_fd' parameter allows counter "groups" to be set up. A | ||
267 | counter group has one counter which is the group "leader". The leader | ||
268 | is created first, with group_fd = -1 in the perf_counter_open call | ||
269 | that creates it. The rest of the group members are created | ||
270 | subsequently, with group_fd giving the fd of the group leader. | ||
271 | (A single counter on its own is created with group_fd = -1 and is | ||
272 | considered to be a group with only 1 member.) | ||
273 | |||
274 | A counter group is scheduled onto the CPU as a unit, that is, it will | ||
275 | only be put onto the CPU if all of the counters in the group can be | ||
276 | put onto the CPU. This means that the values of the member counters | ||
277 | can be meaningfully compared, added, divided (to get ratios), etc., | ||
278 | with each other, since they have counted events for the same set of | ||
279 | executed instructions. | ||
280 | |||
281 | |||
282 | Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap | ||
283 | tracking are logged into a ring-buffer. This ring-buffer is created and | ||
284 | accessed through mmap(). | ||
285 | |||
286 | The mmap size should be 1+2^n pages, where the first page is a meta-data page | ||
287 | (struct perf_counter_mmap_page) that contains various bits of information such | ||
288 | as where the ring-buffer head is. | ||
289 | |||
290 | /* | ||
291 | * Structure of the page that can be mapped via mmap | ||
292 | */ | ||
293 | struct perf_counter_mmap_page { | ||
294 | __u32 version; /* version number of this structure */ | ||
295 | __u32 compat_version; /* lowest version this is compat with */ | ||
296 | |||
297 | /* | ||
298 | * Bits needed to read the hw counters in user-space. | ||
299 | * | ||
300 | * u32 seq; | ||
301 | * s64 count; | ||
302 | * | ||
303 | * do { | ||
304 | * seq = pc->lock; | ||
305 | * | ||
306 | * barrier() | ||
307 | * if (pc->index) { | ||
308 | * count = pmc_read(pc->index - 1); | ||
309 | * count += pc->offset; | ||
310 | * } else | ||
311 | * goto regular_read; | ||
312 | * | ||
313 | * barrier(); | ||
314 | * } while (pc->lock != seq); | ||
315 | * | ||
316 | * NOTE: for obvious reason this only works on self-monitoring | ||
317 | * processes. | ||
318 | */ | ||
319 | __u32 lock; /* seqlock for synchronization */ | ||
320 | __u32 index; /* hardware counter identifier */ | ||
321 | __s64 offset; /* add to hardware counter value */ | ||
322 | |||
323 | /* | ||
324 | * Control data for the mmap() data buffer. | ||
325 | * | ||
326 | * User-space reading this value should issue an rmb(), on SMP capable | ||
327 | * platforms, after reading this value -- see perf_counter_wakeup(). | ||
328 | */ | ||
329 | __u32 data_head; /* head in the data section */ | ||
330 | }; | ||
331 | |||
332 | NOTE: the hw-counter userspace bits are arch specific and are currently only | ||
333 | implemented on powerpc. | ||
334 | |||
335 | The following 2^n pages are the ring-buffer which contains events of the form: | ||
336 | |||
337 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
338 | #define PERF_EVENT_MISC_USER (1 << 1) | ||
339 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
340 | |||
341 | struct perf_event_header { | ||
342 | __u32 type; | ||
343 | __u16 misc; | ||
344 | __u16 size; | ||
345 | }; | ||
346 | |||
347 | enum perf_event_type { | ||
348 | |||
349 | /* | ||
350 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
351 | * correlate userspace IPs to code. They have the following structure: | ||
352 | * | ||
353 | * struct { | ||
354 | * struct perf_event_header header; | ||
355 | * | ||
356 | * u32 pid, tid; | ||
357 | * u64 addr; | ||
358 | * u64 len; | ||
359 | * u64 pgoff; | ||
360 | * char filename[]; | ||
361 | * }; | ||
362 | */ | ||
363 | PERF_EVENT_MMAP = 1, | ||
364 | PERF_EVENT_MUNMAP = 2, | ||
365 | |||
366 | /* | ||
367 | * struct { | ||
368 | * struct perf_event_header header; | ||
369 | * | ||
370 | * u32 pid, tid; | ||
371 | * char comm[]; | ||
372 | * }; | ||
373 | */ | ||
374 | PERF_EVENT_COMM = 3, | ||
375 | |||
376 | /* | ||
377 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | ||
378 | * will be PERF_RECORD_* | ||
379 | * | ||
380 | * struct { | ||
381 | * struct perf_event_header header; | ||
382 | * | ||
383 | * { u64 ip; } && PERF_RECORD_IP | ||
384 | * { u32 pid, tid; } && PERF_RECORD_TID | ||
385 | * { u64 time; } && PERF_RECORD_TIME | ||
386 | * { u64 addr; } && PERF_RECORD_ADDR | ||
387 | * | ||
388 | * { u64 nr; | ||
389 | * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP | ||
390 | * | ||
391 | * { u16 nr, | ||
392 | * hv, | ||
393 | * kernel, | ||
394 | * user; | ||
395 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
396 | * }; | ||
397 | */ | ||
398 | }; | ||
399 | |||
400 | NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented | ||
401 | on x86. | ||
402 | |||
403 | Notification of new events is possible through poll()/select()/epoll() and | ||
404 | fcntl() managing signals. | ||
405 | |||
406 | Normally a notification is generated for every page filled, however one can | ||
407 | additionally set perf_counter_hw_event.wakeup_events to generate one every | ||
408 | so many counter overflow events. | ||
409 | |||
410 | Future work will include a splice() interface to the ring-buffer. | ||
411 | |||
412 | |||
413 | Counters can be enabled and disabled in two ways: via ioctl and via | ||
414 | prctl. When a counter is disabled, it doesn't count or generate | ||
415 | events but does continue to exist and maintain its count value. | ||
416 | |||
417 | An individual counter or counter group can be enabled with | ||
418 | |||
419 | ioctl(fd, PERF_COUNTER_IOC_ENABLE); | ||
420 | |||
421 | or disabled with | ||
422 | |||
423 | ioctl(fd, PERF_COUNTER_IOC_DISABLE); | ||
424 | |||
425 | Enabling or disabling the leader of a group enables or disables the | ||
426 | whole group; that is, while the group leader is disabled, none of the | ||
427 | counters in the group will count. Enabling or disabling a member of a | ||
428 | group other than the leader only affects that counter - disabling an | ||
429 | non-leader stops that counter from counting but doesn't affect any | ||
430 | other counter. | ||
431 | |||
432 | Additionally, non-inherited overflow counters can use | ||
433 | |||
434 | ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr); | ||
435 | |||
436 | to enable a counter for 'nr' events, after which it gets disabled again. | ||
437 | |||
438 | A process can enable or disable all the counter groups that are | ||
439 | attached to it, using prctl: | ||
440 | |||
441 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
442 | |||
443 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
444 | |||
445 | This applies to all counters on the current process, whether created | ||
446 | by this process or by another, and doesn't affect any counters that | ||
447 | this process has created on other processes. It only enables or | ||
448 | disables the group leaders, not any other members in the groups. | ||
449 | |||
diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc new file mode 100644 index 000000000000..8855107fe6b3 --- /dev/null +++ b/Documentation/perf_counter/perf-report.cc | |||
@@ -0,0 +1,515 @@ | |||
1 | #define _GNU_SOURCE | ||
2 | #include <sys/types.h> | ||
3 | #include <sys/stat.h> | ||
4 | #include <sys/time.h> | ||
5 | #include <unistd.h> | ||
6 | #include <stdint.h> | ||
7 | #include <stdlib.h> | ||
8 | #include <string.h> | ||
9 | #include <limits.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <stdio.h> | ||
12 | #include <errno.h> | ||
13 | #include <ctype.h> | ||
14 | #include <time.h> | ||
15 | #include <getopt.h> | ||
16 | #include <assert.h> | ||
17 | |||
18 | #include <sys/ioctl.h> | ||
19 | #include <sys/poll.h> | ||
20 | #include <sys/prctl.h> | ||
21 | #include <sys/wait.h> | ||
22 | #include <sys/mman.h> | ||
23 | #include <sys/types.h> | ||
24 | #include <sys/stat.h> | ||
25 | |||
26 | #include <linux/unistd.h> | ||
27 | #include <linux/types.h> | ||
28 | |||
29 | #include "../../include/linux/perf_counter.h" | ||
30 | |||
31 | #include <set> | ||
32 | #include <map> | ||
33 | #include <string> | ||
34 | |||
35 | |||
36 | #define SHOW_KERNEL 1 | ||
37 | #define SHOW_USER 2 | ||
38 | #define SHOW_HV 4 | ||
39 | |||
40 | static char const *input_name = "output.perf"; | ||
41 | static int input; | ||
42 | static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; | ||
43 | |||
44 | static unsigned long page_size; | ||
45 | static unsigned long mmap_window = 32; | ||
46 | |||
47 | struct ip_event { | ||
48 | struct perf_event_header header; | ||
49 | __u64 ip; | ||
50 | __u32 pid, tid; | ||
51 | }; | ||
52 | struct mmap_event { | ||
53 | struct perf_event_header header; | ||
54 | __u32 pid, tid; | ||
55 | __u64 start; | ||
56 | __u64 len; | ||
57 | __u64 pgoff; | ||
58 | char filename[PATH_MAX]; | ||
59 | }; | ||
60 | struct comm_event { | ||
61 | struct perf_event_header header; | ||
62 | __u32 pid,tid; | ||
63 | char comm[16]; | ||
64 | }; | ||
65 | |||
66 | typedef union event_union { | ||
67 | struct perf_event_header header; | ||
68 | struct ip_event ip; | ||
69 | struct mmap_event mmap; | ||
70 | struct comm_event comm; | ||
71 | } event_t; | ||
72 | |||
73 | struct section { | ||
74 | uint64_t start; | ||
75 | uint64_t end; | ||
76 | |||
77 | uint64_t offset; | ||
78 | |||
79 | std::string name; | ||
80 | |||
81 | section() { }; | ||
82 | |||
83 | section(uint64_t stab) : end(stab) { }; | ||
84 | |||
85 | section(uint64_t start, uint64_t size, uint64_t offset, std::string name) : | ||
86 | start(start), end(start + size), offset(offset), name(name) | ||
87 | { }; | ||
88 | |||
89 | bool operator < (const struct section &s) const { | ||
90 | return end < s.end; | ||
91 | }; | ||
92 | }; | ||
93 | |||
94 | typedef std::set<struct section> sections_t; | ||
95 | |||
96 | struct symbol { | ||
97 | uint64_t start; | ||
98 | uint64_t end; | ||
99 | |||
100 | std::string name; | ||
101 | |||
102 | symbol() { }; | ||
103 | |||
104 | symbol(uint64_t ip) : start(ip) { } | ||
105 | |||
106 | symbol(uint64_t start, uint64_t len, std::string name) : | ||
107 | start(start), end(start + len), name(name) | ||
108 | { }; | ||
109 | |||
110 | bool operator < (const struct symbol &s) const { | ||
111 | return start < s.start; | ||
112 | }; | ||
113 | }; | ||
114 | |||
115 | typedef std::set<struct symbol> symbols_t; | ||
116 | |||
117 | struct dso { | ||
118 | sections_t sections; | ||
119 | symbols_t syms; | ||
120 | }; | ||
121 | |||
122 | static std::map<std::string, struct dso> dsos; | ||
123 | |||
124 | static void load_dso_sections(std::string dso_name) | ||
125 | { | ||
126 | struct dso &dso = dsos[dso_name]; | ||
127 | |||
128 | std::string cmd = "readelf -DSW " + dso_name; | ||
129 | |||
130 | FILE *file = popen(cmd.c_str(), "r"); | ||
131 | if (!file) { | ||
132 | perror("failed to open pipe"); | ||
133 | exit(-1); | ||
134 | } | ||
135 | |||
136 | char *line = NULL; | ||
137 | size_t n = 0; | ||
138 | |||
139 | while (!feof(file)) { | ||
140 | uint64_t addr, off, size; | ||
141 | char name[32]; | ||
142 | |||
143 | if (getline(&line, &n, file) < 0) | ||
144 | break; | ||
145 | if (!line) | ||
146 | break; | ||
147 | |||
148 | if (sscanf(line, " [%*2d] %16s %*14s %Lx %Lx %Lx", | ||
149 | name, &addr, &off, &size) == 4) { | ||
150 | |||
151 | dso.sections.insert(section(addr, size, addr - off, name)); | ||
152 | } | ||
153 | #if 0 | ||
154 | /* | ||
155 | * for reading readelf symbols (-s), however these don't seem | ||
156 | * to include nearly everything, so use nm for that. | ||
157 | */ | ||
158 | if (sscanf(line, " %*4d %*3d: %Lx %5Lu %*7s %*6s %*7s %3d %s", | ||
159 | &start, &size, §ion, sym) == 4) { | ||
160 | |||
161 | start -= dso.section_offsets[section]; | ||
162 | |||
163 | dso.syms.insert(symbol(start, size, std::string(sym))); | ||
164 | } | ||
165 | #endif | ||
166 | } | ||
167 | pclose(file); | ||
168 | } | ||
169 | |||
170 | static void load_dso_symbols(std::string dso_name, std::string args) | ||
171 | { | ||
172 | struct dso &dso = dsos[dso_name]; | ||
173 | |||
174 | std::string cmd = "nm -nSC " + args + " " + dso_name; | ||
175 | |||
176 | FILE *file = popen(cmd.c_str(), "r"); | ||
177 | if (!file) { | ||
178 | perror("failed to open pipe"); | ||
179 | exit(-1); | ||
180 | } | ||
181 | |||
182 | char *line = NULL; | ||
183 | size_t n = 0; | ||
184 | |||
185 | while (!feof(file)) { | ||
186 | uint64_t start, size; | ||
187 | char c; | ||
188 | char sym[1024]; | ||
189 | |||
190 | if (getline(&line, &n, file) < 0) | ||
191 | break; | ||
192 | if (!line) | ||
193 | break; | ||
194 | |||
195 | |||
196 | if (sscanf(line, "%Lx %Lx %c %s", &start, &size, &c, sym) == 4) { | ||
197 | sections_t::const_iterator si = | ||
198 | dso.sections.upper_bound(section(start)); | ||
199 | if (si == dso.sections.end()) { | ||
200 | printf("symbol in unknown section: %s\n", sym); | ||
201 | continue; | ||
202 | } | ||
203 | |||
204 | start -= si->offset; | ||
205 | |||
206 | dso.syms.insert(symbol(start, size, sym)); | ||
207 | } | ||
208 | } | ||
209 | pclose(file); | ||
210 | } | ||
211 | |||
212 | static void load_dso(std::string dso_name) | ||
213 | { | ||
214 | load_dso_sections(dso_name); | ||
215 | load_dso_symbols(dso_name, "-D"); /* dynamic symbols */ | ||
216 | load_dso_symbols(dso_name, ""); /* regular ones */ | ||
217 | } | ||
218 | |||
219 | void load_kallsyms(void) | ||
220 | { | ||
221 | struct dso &dso = dsos["[kernel]"]; | ||
222 | |||
223 | FILE *file = fopen("/proc/kallsyms", "r"); | ||
224 | if (!file) { | ||
225 | perror("failed to open kallsyms"); | ||
226 | exit(-1); | ||
227 | } | ||
228 | |||
229 | char *line; | ||
230 | size_t n; | ||
231 | |||
232 | while (!feof(file)) { | ||
233 | uint64_t start; | ||
234 | char c; | ||
235 | char sym[1024000]; | ||
236 | |||
237 | if (getline(&line, &n, file) < 0) | ||
238 | break; | ||
239 | if (!line) | ||
240 | break; | ||
241 | |||
242 | if (sscanf(line, "%Lx %c %s", &start, &c, sym) == 3) | ||
243 | dso.syms.insert(symbol(start, 0x1000000, std::string(sym))); | ||
244 | } | ||
245 | fclose(file); | ||
246 | } | ||
247 | |||
248 | struct map { | ||
249 | uint64_t start; | ||
250 | uint64_t end; | ||
251 | uint64_t pgoff; | ||
252 | |||
253 | std::string dso; | ||
254 | |||
255 | map() { }; | ||
256 | |||
257 | map(uint64_t ip) : end(ip) { } | ||
258 | |||
259 | map(mmap_event *mmap) { | ||
260 | start = mmap->start; | ||
261 | end = mmap->start + mmap->len; | ||
262 | pgoff = mmap->pgoff; | ||
263 | |||
264 | dso = std::string(mmap->filename); | ||
265 | |||
266 | if (dsos.find(dso) == dsos.end()) | ||
267 | load_dso(dso); | ||
268 | }; | ||
269 | |||
270 | bool operator < (const struct map &m) const { | ||
271 | return end < m.end; | ||
272 | }; | ||
273 | }; | ||
274 | |||
275 | typedef std::set<struct map> maps_t; | ||
276 | |||
277 | static std::map<int, maps_t> maps; | ||
278 | |||
279 | static std::map<int, std::string> comms; | ||
280 | |||
281 | static std::map<std::string, int> hist; | ||
282 | static std::multimap<int, std::string> rev_hist; | ||
283 | |||
284 | static std::string resolve_comm(int pid) | ||
285 | { | ||
286 | std::string comm; | ||
287 | |||
288 | std::map<int, std::string>::const_iterator ci = comms.find(pid); | ||
289 | if (ci != comms.end()) { | ||
290 | comm = ci->second; | ||
291 | } else { | ||
292 | char pid_str[30]; | ||
293 | |||
294 | sprintf(pid_str, ":%d", pid); | ||
295 | comm = pid_str; | ||
296 | } | ||
297 | |||
298 | return comm; | ||
299 | } | ||
300 | |||
301 | static std::string resolve_user_symbol(int pid, uint64_t ip) | ||
302 | { | ||
303 | std::string sym = "<unknown>"; | ||
304 | |||
305 | maps_t &m = maps[pid]; | ||
306 | maps_t::const_iterator mi = m.upper_bound(map(ip)); | ||
307 | if (mi == m.end()) | ||
308 | return sym; | ||
309 | |||
310 | ip -= mi->start + mi->pgoff; | ||
311 | |||
312 | symbols_t &s = dsos[mi->dso].syms; | ||
313 | symbols_t::const_iterator si = s.upper_bound(symbol(ip)); | ||
314 | |||
315 | sym = mi->dso + ": <unknown>"; | ||
316 | |||
317 | if (si == s.begin()) | ||
318 | return sym; | ||
319 | si--; | ||
320 | |||
321 | if (si->start <= ip && ip < si->end) | ||
322 | sym = mi->dso + ": " + si->name; | ||
323 | #if 0 | ||
324 | else if (si->start <= ip) | ||
325 | sym = mi->dso + ": ?" + si->name; | ||
326 | #endif | ||
327 | |||
328 | return sym; | ||
329 | } | ||
330 | |||
331 | static std::string resolve_kernel_symbol(uint64_t ip) | ||
332 | { | ||
333 | std::string sym = "<unknown>"; | ||
334 | |||
335 | symbols_t &s = dsos["[kernel]"].syms; | ||
336 | symbols_t::const_iterator si = s.upper_bound(symbol(ip)); | ||
337 | |||
338 | if (si == s.begin()) | ||
339 | return sym; | ||
340 | si--; | ||
341 | |||
342 | if (si->start <= ip && ip < si->end) | ||
343 | sym = si->name; | ||
344 | |||
345 | return sym; | ||
346 | } | ||
347 | |||
348 | static void display_help(void) | ||
349 | { | ||
350 | printf( | ||
351 | "Usage: perf-report [<options>]\n" | ||
352 | " -i file --input=<file> # input file\n" | ||
353 | ); | ||
354 | |||
355 | exit(0); | ||
356 | } | ||
357 | |||
358 | static void process_options(int argc, char *argv[]) | ||
359 | { | ||
360 | int error = 0; | ||
361 | |||
362 | for (;;) { | ||
363 | int option_index = 0; | ||
364 | /** Options for getopt */ | ||
365 | static struct option long_options[] = { | ||
366 | {"input", required_argument, NULL, 'i'}, | ||
367 | {"no-user", no_argument, NULL, 'u'}, | ||
368 | {"no-kernel", no_argument, NULL, 'k'}, | ||
369 | {"no-hv", no_argument, NULL, 'h'}, | ||
370 | {NULL, 0, NULL, 0 } | ||
371 | }; | ||
372 | int c = getopt_long(argc, argv, "+:i:kuh", | ||
373 | long_options, &option_index); | ||
374 | if (c == -1) | ||
375 | break; | ||
376 | |||
377 | switch (c) { | ||
378 | case 'i': input_name = strdup(optarg); break; | ||
379 | case 'k': show_mask &= ~SHOW_KERNEL; break; | ||
380 | case 'u': show_mask &= ~SHOW_USER; break; | ||
381 | case 'h': show_mask &= ~SHOW_HV; break; | ||
382 | default: error = 1; break; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | if (error) | ||
387 | display_help(); | ||
388 | } | ||
389 | |||
390 | int main(int argc, char *argv[]) | ||
391 | { | ||
392 | unsigned long offset = 0; | ||
393 | unsigned long head = 0; | ||
394 | struct stat stat; | ||
395 | char *buf; | ||
396 | event_t *event; | ||
397 | int ret; | ||
398 | unsigned long total = 0; | ||
399 | |||
400 | page_size = getpagesize(); | ||
401 | |||
402 | process_options(argc, argv); | ||
403 | |||
404 | input = open(input_name, O_RDONLY); | ||
405 | if (input < 0) { | ||
406 | perror("failed to open file"); | ||
407 | exit(-1); | ||
408 | } | ||
409 | |||
410 | ret = fstat(input, &stat); | ||
411 | if (ret < 0) { | ||
412 | perror("failed to stat file"); | ||
413 | exit(-1); | ||
414 | } | ||
415 | |||
416 | if (!stat.st_size) { | ||
417 | fprintf(stderr, "zero-sized file, nothing to do!\n"); | ||
418 | exit(0); | ||
419 | } | ||
420 | |||
421 | load_kallsyms(); | ||
422 | |||
423 | remap: | ||
424 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, | ||
425 | MAP_SHARED, input, offset); | ||
426 | if (buf == MAP_FAILED) { | ||
427 | perror("failed to mmap file"); | ||
428 | exit(-1); | ||
429 | } | ||
430 | |||
431 | more: | ||
432 | event = (event_t *)(buf + head); | ||
433 | |||
434 | if (head + event->header.size >= page_size * mmap_window) { | ||
435 | unsigned long shift = page_size * (head / page_size); | ||
436 | int ret; | ||
437 | |||
438 | ret = munmap(buf, page_size * mmap_window); | ||
439 | assert(ret == 0); | ||
440 | |||
441 | offset += shift; | ||
442 | head -= shift; | ||
443 | goto remap; | ||
444 | } | ||
445 | |||
446 | |||
447 | if (!event->header.size) { | ||
448 | fprintf(stderr, "zero-sized event at file offset %ld\n", offset + head); | ||
449 | fprintf(stderr, "skipping %ld bytes of events.\n", stat.st_size - offset - head); | ||
450 | goto done; | ||
451 | } | ||
452 | |||
453 | head += event->header.size; | ||
454 | |||
455 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | ||
456 | std::string comm, sym, level; | ||
457 | int show = 0; | ||
458 | char output[1024]; | ||
459 | |||
460 | if (event->header.misc & PERF_EVENT_MISC_KERNEL) { | ||
461 | show |= SHOW_KERNEL; | ||
462 | level = " [k] "; | ||
463 | sym = resolve_kernel_symbol(event->ip.ip); | ||
464 | } else if (event->header.misc & PERF_EVENT_MISC_USER) { | ||
465 | show |= SHOW_USER; | ||
466 | level = " [.] "; | ||
467 | sym = resolve_user_symbol(event->ip.pid, event->ip.ip); | ||
468 | } else { | ||
469 | show |= SHOW_HV; | ||
470 | level = " [H] "; | ||
471 | } | ||
472 | |||
473 | if (show & show_mask) { | ||
474 | comm = resolve_comm(event->ip.pid); | ||
475 | snprintf(output, sizeof(output), "%16s %s %s", | ||
476 | comm.c_str(), level.c_str(), sym.c_str()); | ||
477 | hist[output]++; | ||
478 | } | ||
479 | |||
480 | total++; | ||
481 | |||
482 | } else switch (event->header.type) { | ||
483 | case PERF_EVENT_MMAP: | ||
484 | maps[event->mmap.pid].insert(map(&event->mmap)); | ||
485 | break; | ||
486 | |||
487 | case PERF_EVENT_COMM: | ||
488 | comms[event->comm.pid] = std::string(event->comm.comm); | ||
489 | break; | ||
490 | } | ||
491 | |||
492 | if (offset + head < stat.st_size) | ||
493 | goto more; | ||
494 | |||
495 | done: | ||
496 | |||
497 | close(input); | ||
498 | |||
499 | std::map<std::string, int>::iterator hi = hist.begin(); | ||
500 | |||
501 | while (hi != hist.end()) { | ||
502 | rev_hist.insert(std::pair<int, std::string>(hi->second, hi->first)); | ||
503 | hist.erase(hi++); | ||
504 | } | ||
505 | |||
506 | std::multimap<int, std::string>::const_iterator ri = rev_hist.begin(); | ||
507 | |||
508 | while (ri != rev_hist.end()) { | ||
509 | printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str()); | ||
510 | ri++; | ||
511 | } | ||
512 | |||
513 | return 0; | ||
514 | } | ||
515 | |||
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c new file mode 100644 index 000000000000..594d270be390 --- /dev/null +++ b/Documentation/perf_counter/perf.c | |||
@@ -0,0 +1,414 @@ | |||
1 | #include "builtin.h" | ||
2 | #include "util/exec_cmd.h" | ||
3 | #include "util/cache.h" | ||
4 | #include "util/quote.h" | ||
5 | #include "util/run-command.h" | ||
6 | |||
7 | const char perf_usage_string[] = | ||
8 | "perf [--version] [--help] COMMAND [ARGS]"; | ||
9 | |||
10 | const char perf_more_info_string[] = | ||
11 | "See 'perf help COMMAND' for more information on a specific command."; | ||
12 | |||
13 | static int use_pager = -1; | ||
14 | struct pager_config { | ||
15 | const char *cmd; | ||
16 | int val; | ||
17 | }; | ||
18 | |||
19 | static int pager_command_config(const char *var, const char *value, void *data) | ||
20 | { | ||
21 | struct pager_config *c = data; | ||
22 | if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) | ||
23 | c->val = perf_config_bool(var, value); | ||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | /* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ | ||
28 | int check_pager_config(const char *cmd) | ||
29 | { | ||
30 | struct pager_config c; | ||
31 | c.cmd = cmd; | ||
32 | c.val = -1; | ||
33 | perf_config(pager_command_config, &c); | ||
34 | return c.val; | ||
35 | } | ||
36 | |||
37 | static void commit_pager_choice(void) { | ||
38 | switch (use_pager) { | ||
39 | case 0: | ||
40 | setenv("PERF_PAGER", "cat", 1); | ||
41 | break; | ||
42 | case 1: | ||
43 | /* setup_pager(); */ | ||
44 | break; | ||
45 | default: | ||
46 | break; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static int handle_options(const char*** argv, int* argc, int* envchanged) | ||
51 | { | ||
52 | int handled = 0; | ||
53 | |||
54 | while (*argc > 0) { | ||
55 | const char *cmd = (*argv)[0]; | ||
56 | if (cmd[0] != '-') | ||
57 | break; | ||
58 | |||
59 | /* | ||
60 | * For legacy reasons, the "version" and "help" | ||
61 | * commands can be written with "--" prepended | ||
62 | * to make them look like flags. | ||
63 | */ | ||
64 | if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) | ||
65 | break; | ||
66 | |||
67 | /* | ||
68 | * Check remaining flags. | ||
69 | */ | ||
70 | if (!prefixcmp(cmd, "--exec-path")) { | ||
71 | cmd += 11; | ||
72 | if (*cmd == '=') | ||
73 | perf_set_argv_exec_path(cmd + 1); | ||
74 | else { | ||
75 | puts(perf_exec_path()); | ||
76 | exit(0); | ||
77 | } | ||
78 | } else if (!strcmp(cmd, "--html-path")) { | ||
79 | puts(system_path(PERF_HTML_PATH)); | ||
80 | exit(0); | ||
81 | } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) { | ||
82 | use_pager = 1; | ||
83 | } else if (!strcmp(cmd, "--no-pager")) { | ||
84 | use_pager = 0; | ||
85 | if (envchanged) | ||
86 | *envchanged = 1; | ||
87 | } else if (!strcmp(cmd, "--perf-dir")) { | ||
88 | if (*argc < 2) { | ||
89 | fprintf(stderr, "No directory given for --perf-dir.\n" ); | ||
90 | usage(perf_usage_string); | ||
91 | } | ||
92 | setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); | ||
93 | if (envchanged) | ||
94 | *envchanged = 1; | ||
95 | (*argv)++; | ||
96 | (*argc)--; | ||
97 | handled++; | ||
98 | } else if (!prefixcmp(cmd, "--perf-dir=")) { | ||
99 | setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); | ||
100 | if (envchanged) | ||
101 | *envchanged = 1; | ||
102 | } else if (!strcmp(cmd, "--work-tree")) { | ||
103 | if (*argc < 2) { | ||
104 | fprintf(stderr, "No directory given for --work-tree.\n" ); | ||
105 | usage(perf_usage_string); | ||
106 | } | ||
107 | setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); | ||
108 | if (envchanged) | ||
109 | *envchanged = 1; | ||
110 | (*argv)++; | ||
111 | (*argc)--; | ||
112 | } else if (!prefixcmp(cmd, "--work-tree=")) { | ||
113 | setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); | ||
114 | if (envchanged) | ||
115 | *envchanged = 1; | ||
116 | } else { | ||
117 | fprintf(stderr, "Unknown option: %s\n", cmd); | ||
118 | usage(perf_usage_string); | ||
119 | } | ||
120 | |||
121 | (*argv)++; | ||
122 | (*argc)--; | ||
123 | handled++; | ||
124 | } | ||
125 | return handled; | ||
126 | } | ||
127 | |||
128 | static int handle_alias(int *argcp, const char ***argv) | ||
129 | { | ||
130 | int envchanged = 0, ret = 0, saved_errno = errno; | ||
131 | int count, option_count; | ||
132 | const char** new_argv; | ||
133 | const char *alias_command; | ||
134 | char *alias_string; | ||
135 | |||
136 | alias_command = (*argv)[0]; | ||
137 | alias_string = alias_lookup(alias_command); | ||
138 | if (alias_string) { | ||
139 | if (alias_string[0] == '!') { | ||
140 | if (*argcp > 1) { | ||
141 | struct strbuf buf; | ||
142 | |||
143 | strbuf_init(&buf, PATH_MAX); | ||
144 | strbuf_addstr(&buf, alias_string); | ||
145 | sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); | ||
146 | free(alias_string); | ||
147 | alias_string = buf.buf; | ||
148 | } | ||
149 | ret = system(alias_string + 1); | ||
150 | if (ret >= 0 && WIFEXITED(ret) && | ||
151 | WEXITSTATUS(ret) != 127) | ||
152 | exit(WEXITSTATUS(ret)); | ||
153 | die("Failed to run '%s' when expanding alias '%s'", | ||
154 | alias_string + 1, alias_command); | ||
155 | } | ||
156 | count = split_cmdline(alias_string, &new_argv); | ||
157 | if (count < 0) | ||
158 | die("Bad alias.%s string", alias_command); | ||
159 | option_count = handle_options(&new_argv, &count, &envchanged); | ||
160 | if (envchanged) | ||
161 | die("alias '%s' changes environment variables\n" | ||
162 | "You can use '!perf' in the alias to do this.", | ||
163 | alias_command); | ||
164 | memmove(new_argv - option_count, new_argv, | ||
165 | count * sizeof(char *)); | ||
166 | new_argv -= option_count; | ||
167 | |||
168 | if (count < 1) | ||
169 | die("empty alias for %s", alias_command); | ||
170 | |||
171 | if (!strcmp(alias_command, new_argv[0])) | ||
172 | die("recursive alias: %s", alias_command); | ||
173 | |||
174 | new_argv = realloc(new_argv, sizeof(char*) * | ||
175 | (count + *argcp + 1)); | ||
176 | /* insert after command name */ | ||
177 | memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); | ||
178 | new_argv[count+*argcp] = NULL; | ||
179 | |||
180 | *argv = new_argv; | ||
181 | *argcp += count - 1; | ||
182 | |||
183 | ret = 1; | ||
184 | } | ||
185 | |||
186 | errno = saved_errno; | ||
187 | |||
188 | return ret; | ||
189 | } | ||
190 | |||
191 | const char perf_version_string[] = PERF_VERSION; | ||
192 | |||
193 | #define RUN_SETUP (1<<0) | ||
194 | #define USE_PAGER (1<<1) | ||
195 | /* | ||
196 | * require working tree to be present -- anything uses this needs | ||
197 | * RUN_SETUP for reading from the configuration file. | ||
198 | */ | ||
199 | #define NEED_WORK_TREE (1<<2) | ||
200 | |||
201 | struct cmd_struct { | ||
202 | const char *cmd; | ||
203 | int (*fn)(int, const char **, const char *); | ||
204 | int option; | ||
205 | }; | ||
206 | |||
207 | static int run_builtin(struct cmd_struct *p, int argc, const char **argv) | ||
208 | { | ||
209 | int status; | ||
210 | struct stat st; | ||
211 | const char *prefix; | ||
212 | |||
213 | prefix = NULL; | ||
214 | if (p->option & RUN_SETUP) | ||
215 | prefix = NULL; /* setup_perf_directory(); */ | ||
216 | |||
217 | if (use_pager == -1 && p->option & RUN_SETUP) | ||
218 | use_pager = check_pager_config(p->cmd); | ||
219 | if (use_pager == -1 && p->option & USE_PAGER) | ||
220 | use_pager = 1; | ||
221 | commit_pager_choice(); | ||
222 | |||
223 | if (p->option & NEED_WORK_TREE) | ||
224 | /* setup_work_tree() */; | ||
225 | |||
226 | status = p->fn(argc, argv, prefix); | ||
227 | if (status) | ||
228 | return status & 0xff; | ||
229 | |||
230 | /* Somebody closed stdout? */ | ||
231 | if (fstat(fileno(stdout), &st)) | ||
232 | return 0; | ||
233 | /* Ignore write errors for pipes and sockets.. */ | ||
234 | if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) | ||
235 | return 0; | ||
236 | |||
237 | /* Check for ENOSPC and EIO errors.. */ | ||
238 | if (fflush(stdout)) | ||
239 | die("write failure on standard output: %s", strerror(errno)); | ||
240 | if (ferror(stdout)) | ||
241 | die("unknown write failure on standard output"); | ||
242 | if (fclose(stdout)) | ||
243 | die("close failed on standard output: %s", strerror(errno)); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static void handle_internal_command(int argc, const char **argv) | ||
248 | { | ||
249 | const char *cmd = argv[0]; | ||
250 | static struct cmd_struct commands[] = { | ||
251 | { "help", cmd_help, 0 }, | ||
252 | { "record", cmd_record, 0 }, | ||
253 | { "stat", cmd_stat, 0 }, | ||
254 | { "top", cmd_top, 0 }, | ||
255 | { "version", cmd_version, 0 }, | ||
256 | }; | ||
257 | int i; | ||
258 | static const char ext[] = STRIP_EXTENSION; | ||
259 | |||
260 | if (sizeof(ext) > 1) { | ||
261 | i = strlen(argv[0]) - strlen(ext); | ||
262 | if (i > 0 && !strcmp(argv[0] + i, ext)) { | ||
263 | char *argv0 = strdup(argv[0]); | ||
264 | argv[0] = cmd = argv0; | ||
265 | argv0[i] = '\0'; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | /* Turn "perf cmd --help" into "perf help cmd" */ | ||
270 | if (argc > 1 && !strcmp(argv[1], "--help")) { | ||
271 | argv[1] = argv[0]; | ||
272 | argv[0] = cmd = "help"; | ||
273 | } | ||
274 | |||
275 | for (i = 0; i < ARRAY_SIZE(commands); i++) { | ||
276 | struct cmd_struct *p = commands+i; | ||
277 | if (strcmp(p->cmd, cmd)) | ||
278 | continue; | ||
279 | exit(run_builtin(p, argc, argv)); | ||
280 | } | ||
281 | } | ||
282 | |||
283 | static void execv_dashed_external(const char **argv) | ||
284 | { | ||
285 | struct strbuf cmd = STRBUF_INIT; | ||
286 | const char *tmp; | ||
287 | int status; | ||
288 | |||
289 | strbuf_addf(&cmd, "perf-%s", argv[0]); | ||
290 | |||
291 | /* | ||
292 | * argv[0] must be the perf command, but the argv array | ||
293 | * belongs to the caller, and may be reused in | ||
294 | * subsequent loop iterations. Save argv[0] and | ||
295 | * restore it on error. | ||
296 | */ | ||
297 | tmp = argv[0]; | ||
298 | argv[0] = cmd.buf; | ||
299 | |||
300 | /* | ||
301 | * if we fail because the command is not found, it is | ||
302 | * OK to return. Otherwise, we just pass along the status code. | ||
303 | */ | ||
304 | status = run_command_v_opt(argv, 0); | ||
305 | if (status != -ERR_RUN_COMMAND_EXEC) { | ||
306 | if (IS_RUN_COMMAND_ERR(status)) | ||
307 | die("unable to run '%s'", argv[0]); | ||
308 | exit(-status); | ||
309 | } | ||
310 | errno = ENOENT; /* as if we called execvp */ | ||
311 | |||
312 | argv[0] = tmp; | ||
313 | |||
314 | strbuf_release(&cmd); | ||
315 | } | ||
316 | |||
317 | static int run_argv(int *argcp, const char ***argv) | ||
318 | { | ||
319 | int done_alias = 0; | ||
320 | |||
321 | while (1) { | ||
322 | /* See if it's an internal command */ | ||
323 | handle_internal_command(*argcp, *argv); | ||
324 | |||
325 | /* .. then try the external ones */ | ||
326 | execv_dashed_external(*argv); | ||
327 | |||
328 | /* It could be an alias -- this works around the insanity | ||
329 | * of overriding "perf log" with "perf show" by having | ||
330 | * alias.log = show | ||
331 | */ | ||
332 | if (done_alias || !handle_alias(argcp, argv)) | ||
333 | break; | ||
334 | done_alias = 1; | ||
335 | } | ||
336 | |||
337 | return done_alias; | ||
338 | } | ||
339 | |||
340 | |||
341 | int main(int argc, const char **argv) | ||
342 | { | ||
343 | const char *cmd; | ||
344 | |||
345 | cmd = perf_extract_argv0_path(argv[0]); | ||
346 | if (!cmd) | ||
347 | cmd = "perf-help"; | ||
348 | |||
349 | /* | ||
350 | * "perf-xxxx" is the same as "perf xxxx", but we obviously: | ||
351 | * | ||
352 | * - cannot take flags in between the "perf" and the "xxxx". | ||
353 | * - cannot execute it externally (since it would just do | ||
354 | * the same thing over again) | ||
355 | * | ||
356 | * So we just directly call the internal command handler, and | ||
357 | * die if that one cannot handle it. | ||
358 | */ | ||
359 | if (!prefixcmp(cmd, "perf-")) { | ||
360 | cmd += 4; | ||
361 | argv[0] = cmd; | ||
362 | handle_internal_command(argc, argv); | ||
363 | die("cannot handle %s internally", cmd); | ||
364 | } | ||
365 | |||
366 | /* Look for flags.. */ | ||
367 | argv++; | ||
368 | argc--; | ||
369 | handle_options(&argv, &argc, NULL); | ||
370 | commit_pager_choice(); | ||
371 | if (argc > 0) { | ||
372 | if (!prefixcmp(argv[0], "--")) | ||
373 | argv[0] += 2; | ||
374 | } else { | ||
375 | /* The user didn't specify a command; give them help */ | ||
376 | printf("usage: %s\n\n", perf_usage_string); | ||
377 | list_common_cmds_help(); | ||
378 | printf("\n%s\n", perf_more_info_string); | ||
379 | exit(1); | ||
380 | } | ||
381 | cmd = argv[0]; | ||
382 | |||
383 | /* | ||
384 | * We use PATH to find perf commands, but we prepend some higher | ||
385 | * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH | ||
386 | * environment, and the $(perfexecdir) from the Makefile at build | ||
387 | * time. | ||
388 | */ | ||
389 | setup_path(); | ||
390 | |||
391 | while (1) { | ||
392 | static int done_help = 0; | ||
393 | static int was_alias = 0; | ||
394 | was_alias = run_argv(&argc, &argv); | ||
395 | if (errno != ENOENT) | ||
396 | break; | ||
397 | if (was_alias) { | ||
398 | fprintf(stderr, "Expansion of alias '%s' failed; " | ||
399 | "'%s' is not a perf-command\n", | ||
400 | cmd, argv[0]); | ||
401 | exit(1); | ||
402 | } | ||
403 | if (!done_help) { | ||
404 | cmd = argv[0] = help_unknown_cmd(cmd); | ||
405 | done_help = 1; | ||
406 | } else | ||
407 | break; | ||
408 | } | ||
409 | |||
410 | fprintf(stderr, "Failed to run command '%s': %s\n", | ||
411 | cmd, strerror(errno)); | ||
412 | |||
413 | return 1; | ||
414 | } | ||
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h new file mode 100644 index 000000000000..6fa3656399f4 --- /dev/null +++ b/Documentation/perf_counter/perf.h | |||
@@ -0,0 +1,62 @@ | |||
1 | #ifndef _PERF_PERF_H | ||
2 | #define _PERF_PERF_H | ||
3 | |||
4 | /* | ||
5 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | ||
6 | * counters in the current task. | ||
7 | */ | ||
8 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
9 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
10 | |||
11 | #ifndef NSEC_PER_SEC | ||
12 | # define NSEC_PER_SEC 1000000000ULL | ||
13 | #endif | ||
14 | |||
15 | static inline unsigned long long rdclock(void) | ||
16 | { | ||
17 | struct timespec ts; | ||
18 | |||
19 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
20 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; | ||
21 | } | ||
22 | |||
23 | /* | ||
24 | * Pick up some kernel type conventions: | ||
25 | */ | ||
26 | #define __user | ||
27 | #define asmlinkage | ||
28 | |||
29 | #if defined(__x86_64__) || defined(__i386__) | ||
30 | #include "../../arch/x86/include/asm/unistd.h" | ||
31 | #define rmb() asm volatile("lfence" ::: "memory") | ||
32 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
33 | #endif | ||
34 | |||
35 | #ifdef __powerpc__ | ||
36 | #include "../../arch/powerpc/include/asm/unistd.h" | ||
37 | #define rmb() asm volatile ("sync" ::: "memory") | ||
38 | #define cpu_relax() asm volatile ("" ::: "memory"); | ||
39 | #endif | ||
40 | |||
41 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
42 | #define min(x, y) ({ \ | ||
43 | typeof(x) _min1 = (x); \ | ||
44 | typeof(y) _min2 = (y); \ | ||
45 | (void) (&_min1 == &_min2); \ | ||
46 | _min1 < _min2 ? _min1 : _min2; }) | ||
47 | |||
48 | static inline int | ||
49 | sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, | ||
50 | pid_t pid, int cpu, int group_fd, | ||
51 | unsigned long flags) | ||
52 | { | ||
53 | return syscall(__NR_perf_counter_open, hw_event_uptr, pid, cpu, | ||
54 | group_fd, flags); | ||
55 | } | ||
56 | |||
57 | #define MAX_COUNTERS 64 | ||
58 | #define MAX_NR_CPUS 256 | ||
59 | |||
60 | #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) | ||
61 | |||
62 | #endif | ||
diff --git a/Documentation/perf_counter/util/PERF-VERSION-GEN b/Documentation/perf_counter/util/PERF-VERSION-GEN new file mode 100755 index 000000000000..c561d1538c03 --- /dev/null +++ b/Documentation/perf_counter/util/PERF-VERSION-GEN | |||
@@ -0,0 +1,42 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | GVF=PERF-VERSION-FILE | ||
4 | DEF_VER=v0.0.1.PERF | ||
5 | |||
6 | LF=' | ||
7 | ' | ||
8 | |||
9 | # First see if there is a version file (included in release tarballs), | ||
10 | # then try git-describe, then default. | ||
11 | if test -f version | ||
12 | then | ||
13 | VN=$(cat version) || VN="$DEF_VER" | ||
14 | elif test -d .git -o -f .git && | ||
15 | VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && | ||
16 | case "$VN" in | ||
17 | *$LF*) (exit 1) ;; | ||
18 | v[0-9]*) | ||
19 | git update-index -q --refresh | ||
20 | test -z "$(git diff-index --name-only HEAD --)" || | ||
21 | VN="$VN-dirty" ;; | ||
22 | esac | ||
23 | then | ||
24 | VN=$(echo "$VN" | sed -e 's/-/./g'); | ||
25 | else | ||
26 | VN="$DEF_VER" | ||
27 | fi | ||
28 | |||
29 | VN=$(expr "$VN" : v*'\(.*\)') | ||
30 | |||
31 | if test -r $GVF | ||
32 | then | ||
33 | VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) | ||
34 | else | ||
35 | VC=unset | ||
36 | fi | ||
37 | test "$VN" = "$VC" || { | ||
38 | echo >&2 "PERF_VERSION = $VN" | ||
39 | echo "PERF_VERSION = $VN" >$GVF | ||
40 | } | ||
41 | |||
42 | |||
diff --git a/Documentation/perf_counter/util/abspath.c b/Documentation/perf_counter/util/abspath.c new file mode 100644 index 000000000000..649f34f83365 --- /dev/null +++ b/Documentation/perf_counter/util/abspath.c | |||
@@ -0,0 +1,117 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | /* | ||
4 | * Do not use this for inspecting *tracked* content. When path is a | ||
5 | * symlink to a directory, we do not want to say it is a directory when | ||
6 | * dealing with tracked content in the working tree. | ||
7 | */ | ||
8 | int is_directory(const char *path) | ||
9 | { | ||
10 | struct stat st; | ||
11 | return (!stat(path, &st) && S_ISDIR(st.st_mode)); | ||
12 | } | ||
13 | |||
14 | /* We allow "recursive" symbolic links. Only within reason, though. */ | ||
15 | #define MAXDEPTH 5 | ||
16 | |||
17 | const char *make_absolute_path(const char *path) | ||
18 | { | ||
19 | static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; | ||
20 | char cwd[1024] = ""; | ||
21 | int buf_index = 1, len; | ||
22 | |||
23 | int depth = MAXDEPTH; | ||
24 | char *last_elem = NULL; | ||
25 | struct stat st; | ||
26 | |||
27 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
28 | die ("Too long path: %.*s", 60, path); | ||
29 | |||
30 | while (depth--) { | ||
31 | if (!is_directory(buf)) { | ||
32 | char *last_slash = strrchr(buf, '/'); | ||
33 | if (last_slash) { | ||
34 | *last_slash = '\0'; | ||
35 | last_elem = xstrdup(last_slash + 1); | ||
36 | } else { | ||
37 | last_elem = xstrdup(buf); | ||
38 | *buf = '\0'; | ||
39 | } | ||
40 | } | ||
41 | |||
42 | if (*buf) { | ||
43 | if (!*cwd && !getcwd(cwd, sizeof(cwd))) | ||
44 | die ("Could not get current working directory"); | ||
45 | |||
46 | if (chdir(buf)) | ||
47 | die ("Could not switch to '%s'", buf); | ||
48 | } | ||
49 | if (!getcwd(buf, PATH_MAX)) | ||
50 | die ("Could not get current working directory"); | ||
51 | |||
52 | if (last_elem) { | ||
53 | int len = strlen(buf); | ||
54 | if (len + strlen(last_elem) + 2 > PATH_MAX) | ||
55 | die ("Too long path name: '%s/%s'", | ||
56 | buf, last_elem); | ||
57 | buf[len] = '/'; | ||
58 | strcpy(buf + len + 1, last_elem); | ||
59 | free(last_elem); | ||
60 | last_elem = NULL; | ||
61 | } | ||
62 | |||
63 | if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { | ||
64 | len = readlink(buf, next_buf, PATH_MAX); | ||
65 | if (len < 0) | ||
66 | die ("Invalid symlink: %s", buf); | ||
67 | if (PATH_MAX <= len) | ||
68 | die("symbolic link too long: %s", buf); | ||
69 | next_buf[len] = '\0'; | ||
70 | buf = next_buf; | ||
71 | buf_index = 1 - buf_index; | ||
72 | next_buf = bufs[buf_index]; | ||
73 | } else | ||
74 | break; | ||
75 | } | ||
76 | |||
77 | if (*cwd && chdir(cwd)) | ||
78 | die ("Could not change back to '%s'", cwd); | ||
79 | |||
80 | return buf; | ||
81 | } | ||
82 | |||
83 | static const char *get_pwd_cwd(void) | ||
84 | { | ||
85 | static char cwd[PATH_MAX + 1]; | ||
86 | char *pwd; | ||
87 | struct stat cwd_stat, pwd_stat; | ||
88 | if (getcwd(cwd, PATH_MAX) == NULL) | ||
89 | return NULL; | ||
90 | pwd = getenv("PWD"); | ||
91 | if (pwd && strcmp(pwd, cwd)) { | ||
92 | stat(cwd, &cwd_stat); | ||
93 | if (!stat(pwd, &pwd_stat) && | ||
94 | pwd_stat.st_dev == cwd_stat.st_dev && | ||
95 | pwd_stat.st_ino == cwd_stat.st_ino) { | ||
96 | strlcpy(cwd, pwd, PATH_MAX); | ||
97 | } | ||
98 | } | ||
99 | return cwd; | ||
100 | } | ||
101 | |||
102 | const char *make_nonrelative_path(const char *path) | ||
103 | { | ||
104 | static char buf[PATH_MAX + 1]; | ||
105 | |||
106 | if (is_absolute_path(path)) { | ||
107 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
108 | die("Too long path: %.*s", 60, path); | ||
109 | } else { | ||
110 | const char *cwd = get_pwd_cwd(); | ||
111 | if (!cwd) | ||
112 | die("Cannot determine the current working directory"); | ||
113 | if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) | ||
114 | die("Too long path: %.*s", 60, path); | ||
115 | } | ||
116 | return buf; | ||
117 | } | ||
diff --git a/Documentation/perf_counter/util/alias.c b/Documentation/perf_counter/util/alias.c new file mode 100644 index 000000000000..9b3dd2b428df --- /dev/null +++ b/Documentation/perf_counter/util/alias.c | |||
@@ -0,0 +1,77 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | static const char *alias_key; | ||
4 | static char *alias_val; | ||
5 | |||
6 | static int alias_lookup_cb(const char *k, const char *v, void *cb) | ||
7 | { | ||
8 | if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { | ||
9 | if (!v) | ||
10 | return config_error_nonbool(k); | ||
11 | alias_val = strdup(v); | ||
12 | return 0; | ||
13 | } | ||
14 | return 0; | ||
15 | } | ||
16 | |||
17 | char *alias_lookup(const char *alias) | ||
18 | { | ||
19 | alias_key = alias; | ||
20 | alias_val = NULL; | ||
21 | perf_config(alias_lookup_cb, NULL); | ||
22 | return alias_val; | ||
23 | } | ||
24 | |||
25 | int split_cmdline(char *cmdline, const char ***argv) | ||
26 | { | ||
27 | int src, dst, count = 0, size = 16; | ||
28 | char quoted = 0; | ||
29 | |||
30 | *argv = malloc(sizeof(char*) * size); | ||
31 | |||
32 | /* split alias_string */ | ||
33 | (*argv)[count++] = cmdline; | ||
34 | for (src = dst = 0; cmdline[src];) { | ||
35 | char c = cmdline[src]; | ||
36 | if (!quoted && isspace(c)) { | ||
37 | cmdline[dst++] = 0; | ||
38 | while (cmdline[++src] | ||
39 | && isspace(cmdline[src])) | ||
40 | ; /* skip */ | ||
41 | if (count >= size) { | ||
42 | size += 16; | ||
43 | *argv = realloc(*argv, sizeof(char*) * size); | ||
44 | } | ||
45 | (*argv)[count++] = cmdline + dst; | ||
46 | } else if (!quoted && (c == '\'' || c == '"')) { | ||
47 | quoted = c; | ||
48 | src++; | ||
49 | } else if (c == quoted) { | ||
50 | quoted = 0; | ||
51 | src++; | ||
52 | } else { | ||
53 | if (c == '\\' && quoted != '\'') { | ||
54 | src++; | ||
55 | c = cmdline[src]; | ||
56 | if (!c) { | ||
57 | free(*argv); | ||
58 | *argv = NULL; | ||
59 | return error("cmdline ends with \\"); | ||
60 | } | ||
61 | } | ||
62 | cmdline[dst++] = c; | ||
63 | src++; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | cmdline[dst] = 0; | ||
68 | |||
69 | if (quoted) { | ||
70 | free(*argv); | ||
71 | *argv = NULL; | ||
72 | return error("unclosed quote"); | ||
73 | } | ||
74 | |||
75 | return count; | ||
76 | } | ||
77 | |||
diff --git a/Documentation/perf_counter/util/cache.h b/Documentation/perf_counter/util/cache.h new file mode 100644 index 000000000000..71080512fa86 --- /dev/null +++ b/Documentation/perf_counter/util/cache.h | |||
@@ -0,0 +1,117 @@ | |||
1 | #ifndef CACHE_H | ||
2 | #define CACHE_H | ||
3 | |||
4 | #include "util.h" | ||
5 | #include "strbuf.h" | ||
6 | |||
7 | #define PERF_DIR_ENVIRONMENT "PERF_DIR" | ||
8 | #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" | ||
9 | #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" | ||
10 | #define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" | ||
11 | #define INDEX_ENVIRONMENT "PERF_INDEX_FILE" | ||
12 | #define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" | ||
13 | #define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" | ||
14 | #define CONFIG_ENVIRONMENT "PERF_CONFIG" | ||
15 | #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" | ||
16 | #define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" | ||
17 | #define PERFATTRIBUTES_FILE ".perfattributes" | ||
18 | #define INFOATTRIBUTES_FILE "info/attributes" | ||
19 | #define ATTRIBUTE_MACRO_PREFIX "[attr]" | ||
20 | |||
21 | typedef int (*config_fn_t)(const char *, const char *, void *); | ||
22 | extern int perf_default_config(const char *, const char *, void *); | ||
23 | extern int perf_config_from_file(config_fn_t fn, const char *, void *); | ||
24 | extern int perf_config(config_fn_t fn, void *); | ||
25 | extern int perf_parse_ulong(const char *, unsigned long *); | ||
26 | extern int perf_config_int(const char *, const char *); | ||
27 | extern unsigned long perf_config_ulong(const char *, const char *); | ||
28 | extern int perf_config_bool_or_int(const char *, const char *, int *); | ||
29 | extern int perf_config_bool(const char *, const char *); | ||
30 | extern int perf_config_string(const char **, const char *, const char *); | ||
31 | extern int perf_config_set(const char *, const char *); | ||
32 | extern int perf_config_set_multivar(const char *, const char *, const char *, int); | ||
33 | extern int perf_config_rename_section(const char *, const char *); | ||
34 | extern const char *perf_etc_perfconfig(void); | ||
35 | extern int check_repository_format_version(const char *var, const char *value, void *cb); | ||
36 | extern int perf_config_system(void); | ||
37 | extern int perf_config_global(void); | ||
38 | extern int config_error_nonbool(const char *); | ||
39 | extern const char *config_exclusive_filename; | ||
40 | |||
41 | #define MAX_PERFNAME (1000) | ||
42 | extern char perf_default_email[MAX_PERFNAME]; | ||
43 | extern char perf_default_name[MAX_PERFNAME]; | ||
44 | extern int user_ident_explicitly_given; | ||
45 | |||
46 | extern const char *perf_log_output_encoding; | ||
47 | extern const char *perf_mailmap_file; | ||
48 | |||
49 | /* IO helper functions */ | ||
50 | extern void maybe_flush_or_die(FILE *, const char *); | ||
51 | extern int copy_fd(int ifd, int ofd); | ||
52 | extern int copy_file(const char *dst, const char *src, int mode); | ||
53 | extern ssize_t read_in_full(int fd, void *buf, size_t count); | ||
54 | extern ssize_t write_in_full(int fd, const void *buf, size_t count); | ||
55 | extern void write_or_die(int fd, const void *buf, size_t count); | ||
56 | extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); | ||
57 | extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); | ||
58 | extern void fsync_or_die(int fd, const char *); | ||
59 | |||
60 | /* pager.c */ | ||
61 | extern void setup_pager(void); | ||
62 | extern const char *pager_program; | ||
63 | extern int pager_in_use(void); | ||
64 | extern int pager_use_color; | ||
65 | |||
66 | extern const char *editor_program; | ||
67 | extern const char *excludes_file; | ||
68 | |||
69 | char *alias_lookup(const char *alias); | ||
70 | int split_cmdline(char *cmdline, const char ***argv); | ||
71 | |||
72 | #define alloc_nr(x) (((x)+16)*3/2) | ||
73 | |||
74 | /* | ||
75 | * Realloc the buffer pointed at by variable 'x' so that it can hold | ||
76 | * at least 'nr' entries; the number of entries currently allocated | ||
77 | * is 'alloc', using the standard growing factor alloc_nr() macro. | ||
78 | * | ||
79 | * DO NOT USE any expression with side-effect for 'x' or 'alloc'. | ||
80 | */ | ||
81 | #define ALLOC_GROW(x, nr, alloc) \ | ||
82 | do { \ | ||
83 | if ((nr) > alloc) { \ | ||
84 | if (alloc_nr(alloc) < (nr)) \ | ||
85 | alloc = (nr); \ | ||
86 | else \ | ||
87 | alloc = alloc_nr(alloc); \ | ||
88 | x = xrealloc((x), alloc * sizeof(*(x))); \ | ||
89 | } \ | ||
90 | } while(0) | ||
91 | |||
92 | |||
93 | static inline int is_absolute_path(const char *path) | ||
94 | { | ||
95 | return path[0] == '/'; | ||
96 | } | ||
97 | |||
98 | const char *make_absolute_path(const char *path); | ||
99 | const char *make_nonrelative_path(const char *path); | ||
100 | const char *make_relative_path(const char *abs, const char *base); | ||
101 | int normalize_path_copy(char *dst, const char *src); | ||
102 | int longest_ancestor_length(const char *path, const char *prefix_list); | ||
103 | char *strip_path_suffix(const char *path, const char *suffix); | ||
104 | |||
105 | extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); | ||
106 | extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); | ||
107 | |||
108 | extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) | ||
109 | __attribute__((format (printf, 3, 4))); | ||
110 | extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) | ||
111 | __attribute__((format (printf, 3, 4))); | ||
112 | extern char *perf_pathdup(const char *fmt, ...) | ||
113 | __attribute__((format (printf, 1, 2))); | ||
114 | |||
115 | extern size_t strlcpy(char *dest, const char *src, size_t size); | ||
116 | |||
117 | #endif /* CACHE_H */ | ||
diff --git a/Documentation/perf_counter/util/config.c b/Documentation/perf_counter/util/config.c new file mode 100644 index 000000000000..3dd13faa6a27 --- /dev/null +++ b/Documentation/perf_counter/util/config.c | |||
@@ -0,0 +1,873 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | * Copyright (C) Johannes Schindelin, 2005 | ||
6 | * | ||
7 | */ | ||
8 | #include "util.h" | ||
9 | #include "cache.h" | ||
10 | #include "exec_cmd.h" | ||
11 | |||
12 | #define MAXNAME (256) | ||
13 | |||
14 | static FILE *config_file; | ||
15 | static const char *config_file_name; | ||
16 | static int config_linenr; | ||
17 | static int config_file_eof; | ||
18 | |||
19 | const char *config_exclusive_filename = NULL; | ||
20 | |||
21 | static int get_next_char(void) | ||
22 | { | ||
23 | int c; | ||
24 | FILE *f; | ||
25 | |||
26 | c = '\n'; | ||
27 | if ((f = config_file) != NULL) { | ||
28 | c = fgetc(f); | ||
29 | if (c == '\r') { | ||
30 | /* DOS like systems */ | ||
31 | c = fgetc(f); | ||
32 | if (c != '\n') { | ||
33 | ungetc(c, f); | ||
34 | c = '\r'; | ||
35 | } | ||
36 | } | ||
37 | if (c == '\n') | ||
38 | config_linenr++; | ||
39 | if (c == EOF) { | ||
40 | config_file_eof = 1; | ||
41 | c = '\n'; | ||
42 | } | ||
43 | } | ||
44 | return c; | ||
45 | } | ||
46 | |||
47 | static char *parse_value(void) | ||
48 | { | ||
49 | static char value[1024]; | ||
50 | int quote = 0, comment = 0, len = 0, space = 0; | ||
51 | |||
52 | for (;;) { | ||
53 | int c = get_next_char(); | ||
54 | if (len >= sizeof(value) - 1) | ||
55 | return NULL; | ||
56 | if (c == '\n') { | ||
57 | if (quote) | ||
58 | return NULL; | ||
59 | value[len] = 0; | ||
60 | return value; | ||
61 | } | ||
62 | if (comment) | ||
63 | continue; | ||
64 | if (isspace(c) && !quote) { | ||
65 | space = 1; | ||
66 | continue; | ||
67 | } | ||
68 | if (!quote) { | ||
69 | if (c == ';' || c == '#') { | ||
70 | comment = 1; | ||
71 | continue; | ||
72 | } | ||
73 | } | ||
74 | if (space) { | ||
75 | if (len) | ||
76 | value[len++] = ' '; | ||
77 | space = 0; | ||
78 | } | ||
79 | if (c == '\\') { | ||
80 | c = get_next_char(); | ||
81 | switch (c) { | ||
82 | case '\n': | ||
83 | continue; | ||
84 | case 't': | ||
85 | c = '\t'; | ||
86 | break; | ||
87 | case 'b': | ||
88 | c = '\b'; | ||
89 | break; | ||
90 | case 'n': | ||
91 | c = '\n'; | ||
92 | break; | ||
93 | /* Some characters escape as themselves */ | ||
94 | case '\\': case '"': | ||
95 | break; | ||
96 | /* Reject unknown escape sequences */ | ||
97 | default: | ||
98 | return NULL; | ||
99 | } | ||
100 | value[len++] = c; | ||
101 | continue; | ||
102 | } | ||
103 | if (c == '"') { | ||
104 | quote = 1-quote; | ||
105 | continue; | ||
106 | } | ||
107 | value[len++] = c; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | static inline int iskeychar(int c) | ||
112 | { | ||
113 | return isalnum(c) || c == '-'; | ||
114 | } | ||
115 | |||
116 | static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) | ||
117 | { | ||
118 | int c; | ||
119 | char *value; | ||
120 | |||
121 | /* Get the full name */ | ||
122 | for (;;) { | ||
123 | c = get_next_char(); | ||
124 | if (config_file_eof) | ||
125 | break; | ||
126 | if (!iskeychar(c)) | ||
127 | break; | ||
128 | name[len++] = tolower(c); | ||
129 | if (len >= MAXNAME) | ||
130 | return -1; | ||
131 | } | ||
132 | name[len] = 0; | ||
133 | while (c == ' ' || c == '\t') | ||
134 | c = get_next_char(); | ||
135 | |||
136 | value = NULL; | ||
137 | if (c != '\n') { | ||
138 | if (c != '=') | ||
139 | return -1; | ||
140 | value = parse_value(); | ||
141 | if (!value) | ||
142 | return -1; | ||
143 | } | ||
144 | return fn(name, value, data); | ||
145 | } | ||
146 | |||
147 | static int get_extended_base_var(char *name, int baselen, int c) | ||
148 | { | ||
149 | do { | ||
150 | if (c == '\n') | ||
151 | return -1; | ||
152 | c = get_next_char(); | ||
153 | } while (isspace(c)); | ||
154 | |||
155 | /* We require the format to be '[base "extension"]' */ | ||
156 | if (c != '"') | ||
157 | return -1; | ||
158 | name[baselen++] = '.'; | ||
159 | |||
160 | for (;;) { | ||
161 | int c = get_next_char(); | ||
162 | if (c == '\n') | ||
163 | return -1; | ||
164 | if (c == '"') | ||
165 | break; | ||
166 | if (c == '\\') { | ||
167 | c = get_next_char(); | ||
168 | if (c == '\n') | ||
169 | return -1; | ||
170 | } | ||
171 | name[baselen++] = c; | ||
172 | if (baselen > MAXNAME / 2) | ||
173 | return -1; | ||
174 | } | ||
175 | |||
176 | /* Final ']' */ | ||
177 | if (get_next_char() != ']') | ||
178 | return -1; | ||
179 | return baselen; | ||
180 | } | ||
181 | |||
182 | static int get_base_var(char *name) | ||
183 | { | ||
184 | int baselen = 0; | ||
185 | |||
186 | for (;;) { | ||
187 | int c = get_next_char(); | ||
188 | if (config_file_eof) | ||
189 | return -1; | ||
190 | if (c == ']') | ||
191 | return baselen; | ||
192 | if (isspace(c)) | ||
193 | return get_extended_base_var(name, baselen, c); | ||
194 | if (!iskeychar(c) && c != '.') | ||
195 | return -1; | ||
196 | if (baselen > MAXNAME / 2) | ||
197 | return -1; | ||
198 | name[baselen++] = tolower(c); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | static int perf_parse_file(config_fn_t fn, void *data) | ||
203 | { | ||
204 | int comment = 0; | ||
205 | int baselen = 0; | ||
206 | static char var[MAXNAME]; | ||
207 | |||
208 | /* U+FEFF Byte Order Mark in UTF8 */ | ||
209 | static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; | ||
210 | const unsigned char *bomptr = utf8_bom; | ||
211 | |||
212 | for (;;) { | ||
213 | int c = get_next_char(); | ||
214 | if (bomptr && *bomptr) { | ||
215 | /* We are at the file beginning; skip UTF8-encoded BOM | ||
216 | * if present. Sane editors won't put this in on their | ||
217 | * own, but e.g. Windows Notepad will do it happily. */ | ||
218 | if ((unsigned char) c == *bomptr) { | ||
219 | bomptr++; | ||
220 | continue; | ||
221 | } else { | ||
222 | /* Do not tolerate partial BOM. */ | ||
223 | if (bomptr != utf8_bom) | ||
224 | break; | ||
225 | /* No BOM at file beginning. Cool. */ | ||
226 | bomptr = NULL; | ||
227 | } | ||
228 | } | ||
229 | if (c == '\n') { | ||
230 | if (config_file_eof) | ||
231 | return 0; | ||
232 | comment = 0; | ||
233 | continue; | ||
234 | } | ||
235 | if (comment || isspace(c)) | ||
236 | continue; | ||
237 | if (c == '#' || c == ';') { | ||
238 | comment = 1; | ||
239 | continue; | ||
240 | } | ||
241 | if (c == '[') { | ||
242 | baselen = get_base_var(var); | ||
243 | if (baselen <= 0) | ||
244 | break; | ||
245 | var[baselen++] = '.'; | ||
246 | var[baselen] = 0; | ||
247 | continue; | ||
248 | } | ||
249 | if (!isalpha(c)) | ||
250 | break; | ||
251 | var[baselen] = tolower(c); | ||
252 | if (get_value(fn, data, var, baselen+1) < 0) | ||
253 | break; | ||
254 | } | ||
255 | die("bad config file line %d in %s", config_linenr, config_file_name); | ||
256 | } | ||
257 | |||
258 | static int parse_unit_factor(const char *end, unsigned long *val) | ||
259 | { | ||
260 | if (!*end) | ||
261 | return 1; | ||
262 | else if (!strcasecmp(end, "k")) { | ||
263 | *val *= 1024; | ||
264 | return 1; | ||
265 | } | ||
266 | else if (!strcasecmp(end, "m")) { | ||
267 | *val *= 1024 * 1024; | ||
268 | return 1; | ||
269 | } | ||
270 | else if (!strcasecmp(end, "g")) { | ||
271 | *val *= 1024 * 1024 * 1024; | ||
272 | return 1; | ||
273 | } | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int perf_parse_long(const char *value, long *ret) | ||
278 | { | ||
279 | if (value && *value) { | ||
280 | char *end; | ||
281 | long val = strtol(value, &end, 0); | ||
282 | unsigned long factor = 1; | ||
283 | if (!parse_unit_factor(end, &factor)) | ||
284 | return 0; | ||
285 | *ret = val * factor; | ||
286 | return 1; | ||
287 | } | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | int perf_parse_ulong(const char *value, unsigned long *ret) | ||
292 | { | ||
293 | if (value && *value) { | ||
294 | char *end; | ||
295 | unsigned long val = strtoul(value, &end, 0); | ||
296 | if (!parse_unit_factor(end, &val)) | ||
297 | return 0; | ||
298 | *ret = val; | ||
299 | return 1; | ||
300 | } | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void die_bad_config(const char *name) | ||
305 | { | ||
306 | if (config_file_name) | ||
307 | die("bad config value for '%s' in %s", name, config_file_name); | ||
308 | die("bad config value for '%s'", name); | ||
309 | } | ||
310 | |||
311 | int perf_config_int(const char *name, const char *value) | ||
312 | { | ||
313 | long ret = 0; | ||
314 | if (!perf_parse_long(value, &ret)) | ||
315 | die_bad_config(name); | ||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | unsigned long perf_config_ulong(const char *name, const char *value) | ||
320 | { | ||
321 | unsigned long ret; | ||
322 | if (!perf_parse_ulong(value, &ret)) | ||
323 | die_bad_config(name); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) | ||
328 | { | ||
329 | *is_bool = 1; | ||
330 | if (!value) | ||
331 | return 1; | ||
332 | if (!*value) | ||
333 | return 0; | ||
334 | if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) | ||
335 | return 1; | ||
336 | if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) | ||
337 | return 0; | ||
338 | *is_bool = 0; | ||
339 | return perf_config_int(name, value); | ||
340 | } | ||
341 | |||
342 | int perf_config_bool(const char *name, const char *value) | ||
343 | { | ||
344 | int discard; | ||
345 | return !!perf_config_bool_or_int(name, value, &discard); | ||
346 | } | ||
347 | |||
348 | int perf_config_string(const char **dest, const char *var, const char *value) | ||
349 | { | ||
350 | if (!value) | ||
351 | return config_error_nonbool(var); | ||
352 | *dest = strdup(value); | ||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | static int perf_default_core_config(const char *var, const char *value) | ||
357 | { | ||
358 | /* Add other config variables here and to Documentation/config.txt. */ | ||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | int perf_default_config(const char *var, const char *value, void *dummy) | ||
363 | { | ||
364 | if (!prefixcmp(var, "core.")) | ||
365 | return perf_default_core_config(var, value); | ||
366 | |||
367 | /* Add other config variables here and to Documentation/config.txt. */ | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | int perf_config_from_file(config_fn_t fn, const char *filename, void *data) | ||
372 | { | ||
373 | int ret; | ||
374 | FILE *f = fopen(filename, "r"); | ||
375 | |||
376 | ret = -1; | ||
377 | if (f) { | ||
378 | config_file = f; | ||
379 | config_file_name = filename; | ||
380 | config_linenr = 1; | ||
381 | config_file_eof = 0; | ||
382 | ret = perf_parse_file(fn, data); | ||
383 | fclose(f); | ||
384 | config_file_name = NULL; | ||
385 | } | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | const char *perf_etc_perfconfig(void) | ||
390 | { | ||
391 | static const char *system_wide; | ||
392 | if (!system_wide) | ||
393 | system_wide = system_path(ETC_PERFCONFIG); | ||
394 | return system_wide; | ||
395 | } | ||
396 | |||
397 | static int perf_env_bool(const char *k, int def) | ||
398 | { | ||
399 | const char *v = getenv(k); | ||
400 | return v ? perf_config_bool(k, v) : def; | ||
401 | } | ||
402 | |||
403 | int perf_config_system(void) | ||
404 | { | ||
405 | return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); | ||
406 | } | ||
407 | |||
408 | int perf_config_global(void) | ||
409 | { | ||
410 | return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); | ||
411 | } | ||
412 | |||
413 | int perf_config(config_fn_t fn, void *data) | ||
414 | { | ||
415 | int ret = 0, found = 0; | ||
416 | char *repo_config = NULL; | ||
417 | const char *home = NULL; | ||
418 | |||
419 | /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ | ||
420 | if (config_exclusive_filename) | ||
421 | return perf_config_from_file(fn, config_exclusive_filename, data); | ||
422 | if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { | ||
423 | ret += perf_config_from_file(fn, perf_etc_perfconfig(), | ||
424 | data); | ||
425 | found += 1; | ||
426 | } | ||
427 | |||
428 | home = getenv("HOME"); | ||
429 | if (perf_config_global() && home) { | ||
430 | char *user_config = strdup(mkpath("%s/.perfconfig", home)); | ||
431 | if (!access(user_config, R_OK)) { | ||
432 | ret += perf_config_from_file(fn, user_config, data); | ||
433 | found += 1; | ||
434 | } | ||
435 | free(user_config); | ||
436 | } | ||
437 | |||
438 | repo_config = perf_pathdup("config"); | ||
439 | if (!access(repo_config, R_OK)) { | ||
440 | ret += perf_config_from_file(fn, repo_config, data); | ||
441 | found += 1; | ||
442 | } | ||
443 | free(repo_config); | ||
444 | if (found == 0) | ||
445 | return -1; | ||
446 | return ret; | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * Find all the stuff for perf_config_set() below. | ||
451 | */ | ||
452 | |||
453 | #define MAX_MATCHES 512 | ||
454 | |||
455 | static struct { | ||
456 | int baselen; | ||
457 | char* key; | ||
458 | int do_not_match; | ||
459 | regex_t* value_regex; | ||
460 | int multi_replace; | ||
461 | size_t offset[MAX_MATCHES]; | ||
462 | enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; | ||
463 | int seen; | ||
464 | } store; | ||
465 | |||
466 | static int matches(const char* key, const char* value) | ||
467 | { | ||
468 | return !strcmp(key, store.key) && | ||
469 | (store.value_regex == NULL || | ||
470 | (store.do_not_match ^ | ||
471 | !regexec(store.value_regex, value, 0, NULL, 0))); | ||
472 | } | ||
473 | |||
474 | static int store_aux(const char* key, const char* value, void *cb) | ||
475 | { | ||
476 | const char *ep; | ||
477 | size_t section_len; | ||
478 | |||
479 | switch (store.state) { | ||
480 | case KEY_SEEN: | ||
481 | if (matches(key, value)) { | ||
482 | if (store.seen == 1 && store.multi_replace == 0) { | ||
483 | warning("%s has multiple values", key); | ||
484 | } else if (store.seen >= MAX_MATCHES) { | ||
485 | error("too many matches for %s", key); | ||
486 | return 1; | ||
487 | } | ||
488 | |||
489 | store.offset[store.seen] = ftell(config_file); | ||
490 | store.seen++; | ||
491 | } | ||
492 | break; | ||
493 | case SECTION_SEEN: | ||
494 | /* | ||
495 | * What we are looking for is in store.key (both | ||
496 | * section and var), and its section part is baselen | ||
497 | * long. We found key (again, both section and var). | ||
498 | * We would want to know if this key is in the same | ||
499 | * section as what we are looking for. We already | ||
500 | * know we are in the same section as what should | ||
501 | * hold store.key. | ||
502 | */ | ||
503 | ep = strrchr(key, '.'); | ||
504 | section_len = ep - key; | ||
505 | |||
506 | if ((section_len != store.baselen) || | ||
507 | memcmp(key, store.key, section_len+1)) { | ||
508 | store.state = SECTION_END_SEEN; | ||
509 | break; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Do not increment matches: this is no match, but we | ||
514 | * just made sure we are in the desired section. | ||
515 | */ | ||
516 | store.offset[store.seen] = ftell(config_file); | ||
517 | /* fallthru */ | ||
518 | case SECTION_END_SEEN: | ||
519 | case START: | ||
520 | if (matches(key, value)) { | ||
521 | store.offset[store.seen] = ftell(config_file); | ||
522 | store.state = KEY_SEEN; | ||
523 | store.seen++; | ||
524 | } else { | ||
525 | if (strrchr(key, '.') - key == store.baselen && | ||
526 | !strncmp(key, store.key, store.baselen)) { | ||
527 | store.state = SECTION_SEEN; | ||
528 | store.offset[store.seen] = ftell(config_file); | ||
529 | } | ||
530 | } | ||
531 | } | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static int store_write_section(int fd, const char* key) | ||
536 | { | ||
537 | const char *dot; | ||
538 | int i, success; | ||
539 | struct strbuf sb = STRBUF_INIT; | ||
540 | |||
541 | dot = memchr(key, '.', store.baselen); | ||
542 | if (dot) { | ||
543 | strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); | ||
544 | for (i = dot - key + 1; i < store.baselen; i++) { | ||
545 | if (key[i] == '"' || key[i] == '\\') | ||
546 | strbuf_addch(&sb, '\\'); | ||
547 | strbuf_addch(&sb, key[i]); | ||
548 | } | ||
549 | strbuf_addstr(&sb, "\"]\n"); | ||
550 | } else { | ||
551 | strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); | ||
552 | } | ||
553 | |||
554 | success = write_in_full(fd, sb.buf, sb.len) == sb.len; | ||
555 | strbuf_release(&sb); | ||
556 | |||
557 | return success; | ||
558 | } | ||
559 | |||
560 | static int store_write_pair(int fd, const char* key, const char* value) | ||
561 | { | ||
562 | int i, success; | ||
563 | int length = strlen(key + store.baselen + 1); | ||
564 | const char *quote = ""; | ||
565 | struct strbuf sb = STRBUF_INIT; | ||
566 | |||
567 | /* | ||
568 | * Check to see if the value needs to be surrounded with a dq pair. | ||
569 | * Note that problematic characters are always backslash-quoted; this | ||
570 | * check is about not losing leading or trailing SP and strings that | ||
571 | * follow beginning-of-comment characters (i.e. ';' and '#') by the | ||
572 | * configuration parser. | ||
573 | */ | ||
574 | if (value[0] == ' ') | ||
575 | quote = "\""; | ||
576 | for (i = 0; value[i]; i++) | ||
577 | if (value[i] == ';' || value[i] == '#') | ||
578 | quote = "\""; | ||
579 | if (i && value[i - 1] == ' ') | ||
580 | quote = "\""; | ||
581 | |||
582 | strbuf_addf(&sb, "\t%.*s = %s", | ||
583 | length, key + store.baselen + 1, quote); | ||
584 | |||
585 | for (i = 0; value[i]; i++) | ||
586 | switch (value[i]) { | ||
587 | case '\n': | ||
588 | strbuf_addstr(&sb, "\\n"); | ||
589 | break; | ||
590 | case '\t': | ||
591 | strbuf_addstr(&sb, "\\t"); | ||
592 | break; | ||
593 | case '"': | ||
594 | case '\\': | ||
595 | strbuf_addch(&sb, '\\'); | ||
596 | default: | ||
597 | strbuf_addch(&sb, value[i]); | ||
598 | break; | ||
599 | } | ||
600 | strbuf_addf(&sb, "%s\n", quote); | ||
601 | |||
602 | success = write_in_full(fd, sb.buf, sb.len) == sb.len; | ||
603 | strbuf_release(&sb); | ||
604 | |||
605 | return success; | ||
606 | } | ||
607 | |||
608 | static ssize_t find_beginning_of_line(const char* contents, size_t size, | ||
609 | size_t offset_, int* found_bracket) | ||
610 | { | ||
611 | size_t equal_offset = size, bracket_offset = size; | ||
612 | ssize_t offset; | ||
613 | |||
614 | contline: | ||
615 | for (offset = offset_-2; offset > 0 | ||
616 | && contents[offset] != '\n'; offset--) | ||
617 | switch (contents[offset]) { | ||
618 | case '=': equal_offset = offset; break; | ||
619 | case ']': bracket_offset = offset; break; | ||
620 | } | ||
621 | if (offset > 0 && contents[offset-1] == '\\') { | ||
622 | offset_ = offset; | ||
623 | goto contline; | ||
624 | } | ||
625 | if (bracket_offset < equal_offset) { | ||
626 | *found_bracket = 1; | ||
627 | offset = bracket_offset+1; | ||
628 | } else | ||
629 | offset++; | ||
630 | |||
631 | return offset; | ||
632 | } | ||
633 | |||
634 | int perf_config_set(const char* key, const char* value) | ||
635 | { | ||
636 | return perf_config_set_multivar(key, value, NULL, 0); | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * If value==NULL, unset in (remove from) config, | ||
641 | * if value_regex!=NULL, disregard key/value pairs where value does not match. | ||
642 | * if multi_replace==0, nothing, or only one matching key/value is replaced, | ||
643 | * else all matching key/values (regardless how many) are removed, | ||
644 | * before the new pair is written. | ||
645 | * | ||
646 | * Returns 0 on success. | ||
647 | * | ||
648 | * This function does this: | ||
649 | * | ||
650 | * - it locks the config file by creating ".perf/config.lock" | ||
651 | * | ||
652 | * - it then parses the config using store_aux() as validator to find | ||
653 | * the position on the key/value pair to replace. If it is to be unset, | ||
654 | * it must be found exactly once. | ||
655 | * | ||
656 | * - the config file is mmap()ed and the part before the match (if any) is | ||
657 | * written to the lock file, then the changed part and the rest. | ||
658 | * | ||
659 | * - the config file is removed and the lock file rename()d to it. | ||
660 | * | ||
661 | */ | ||
662 | int perf_config_set_multivar(const char* key, const char* value, | ||
663 | const char* value_regex, int multi_replace) | ||
664 | { | ||
665 | int i, dot; | ||
666 | int fd = -1, in_fd; | ||
667 | int ret = 0; | ||
668 | char* config_filename; | ||
669 | const char* last_dot = strrchr(key, '.'); | ||
670 | |||
671 | if (config_exclusive_filename) | ||
672 | config_filename = strdup(config_exclusive_filename); | ||
673 | else | ||
674 | config_filename = perf_pathdup("config"); | ||
675 | |||
676 | /* | ||
677 | * Since "key" actually contains the section name and the real | ||
678 | * key name separated by a dot, we have to know where the dot is. | ||
679 | */ | ||
680 | |||
681 | if (last_dot == NULL) { | ||
682 | error("key does not contain a section: %s", key); | ||
683 | ret = 2; | ||
684 | goto out_free; | ||
685 | } | ||
686 | store.baselen = last_dot - key; | ||
687 | |||
688 | store.multi_replace = multi_replace; | ||
689 | |||
690 | /* | ||
691 | * Validate the key and while at it, lower case it for matching. | ||
692 | */ | ||
693 | store.key = malloc(strlen(key) + 1); | ||
694 | dot = 0; | ||
695 | for (i = 0; key[i]; i++) { | ||
696 | unsigned char c = key[i]; | ||
697 | if (c == '.') | ||
698 | dot = 1; | ||
699 | /* Leave the extended basename untouched.. */ | ||
700 | if (!dot || i > store.baselen) { | ||
701 | if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { | ||
702 | error("invalid key: %s", key); | ||
703 | free(store.key); | ||
704 | ret = 1; | ||
705 | goto out_free; | ||
706 | } | ||
707 | c = tolower(c); | ||
708 | } else if (c == '\n') { | ||
709 | error("invalid key (newline): %s", key); | ||
710 | free(store.key); | ||
711 | ret = 1; | ||
712 | goto out_free; | ||
713 | } | ||
714 | store.key[i] = c; | ||
715 | } | ||
716 | store.key[i] = 0; | ||
717 | |||
718 | /* | ||
719 | * If .perf/config does not exist yet, write a minimal version. | ||
720 | */ | ||
721 | in_fd = open(config_filename, O_RDONLY); | ||
722 | if ( in_fd < 0 ) { | ||
723 | free(store.key); | ||
724 | |||
725 | if ( ENOENT != errno ) { | ||
726 | error("opening %s: %s", config_filename, | ||
727 | strerror(errno)); | ||
728 | ret = 3; /* same as "invalid config file" */ | ||
729 | goto out_free; | ||
730 | } | ||
731 | /* if nothing to unset, error out */ | ||
732 | if (value == NULL) { | ||
733 | ret = 5; | ||
734 | goto out_free; | ||
735 | } | ||
736 | |||
737 | store.key = (char*)key; | ||
738 | if (!store_write_section(fd, key) || | ||
739 | !store_write_pair(fd, key, value)) | ||
740 | goto write_err_out; | ||
741 | } else { | ||
742 | struct stat st; | ||
743 | char* contents; | ||
744 | size_t contents_sz, copy_begin, copy_end; | ||
745 | int i, new_line = 0; | ||
746 | |||
747 | if (value_regex == NULL) | ||
748 | store.value_regex = NULL; | ||
749 | else { | ||
750 | if (value_regex[0] == '!') { | ||
751 | store.do_not_match = 1; | ||
752 | value_regex++; | ||
753 | } else | ||
754 | store.do_not_match = 0; | ||
755 | |||
756 | store.value_regex = (regex_t*)malloc(sizeof(regex_t)); | ||
757 | if (regcomp(store.value_regex, value_regex, | ||
758 | REG_EXTENDED)) { | ||
759 | error("invalid pattern: %s", value_regex); | ||
760 | free(store.value_regex); | ||
761 | ret = 6; | ||
762 | goto out_free; | ||
763 | } | ||
764 | } | ||
765 | |||
766 | store.offset[0] = 0; | ||
767 | store.state = START; | ||
768 | store.seen = 0; | ||
769 | |||
770 | /* | ||
771 | * After this, store.offset will contain the *end* offset | ||
772 | * of the last match, or remain at 0 if no match was found. | ||
773 | * As a side effect, we make sure to transform only a valid | ||
774 | * existing config file. | ||
775 | */ | ||
776 | if (perf_config_from_file(store_aux, config_filename, NULL)) { | ||
777 | error("invalid config file %s", config_filename); | ||
778 | free(store.key); | ||
779 | if (store.value_regex != NULL) { | ||
780 | regfree(store.value_regex); | ||
781 | free(store.value_regex); | ||
782 | } | ||
783 | ret = 3; | ||
784 | goto out_free; | ||
785 | } | ||
786 | |||
787 | free(store.key); | ||
788 | if (store.value_regex != NULL) { | ||
789 | regfree(store.value_regex); | ||
790 | free(store.value_regex); | ||
791 | } | ||
792 | |||
793 | /* if nothing to unset, or too many matches, error out */ | ||
794 | if ((store.seen == 0 && value == NULL) || | ||
795 | (store.seen > 1 && multi_replace == 0)) { | ||
796 | ret = 5; | ||
797 | goto out_free; | ||
798 | } | ||
799 | |||
800 | fstat(in_fd, &st); | ||
801 | contents_sz = xsize_t(st.st_size); | ||
802 | contents = mmap(NULL, contents_sz, PROT_READ, | ||
803 | MAP_PRIVATE, in_fd, 0); | ||
804 | close(in_fd); | ||
805 | |||
806 | if (store.seen == 0) | ||
807 | store.seen = 1; | ||
808 | |||
809 | for (i = 0, copy_begin = 0; i < store.seen; i++) { | ||
810 | if (store.offset[i] == 0) { | ||
811 | store.offset[i] = copy_end = contents_sz; | ||
812 | } else if (store.state != KEY_SEEN) { | ||
813 | copy_end = store.offset[i]; | ||
814 | } else | ||
815 | copy_end = find_beginning_of_line( | ||
816 | contents, contents_sz, | ||
817 | store.offset[i]-2, &new_line); | ||
818 | |||
819 | if (copy_end > 0 && contents[copy_end-1] != '\n') | ||
820 | new_line = 1; | ||
821 | |||
822 | /* write the first part of the config */ | ||
823 | if (copy_end > copy_begin) { | ||
824 | if (write_in_full(fd, contents + copy_begin, | ||
825 | copy_end - copy_begin) < | ||
826 | copy_end - copy_begin) | ||
827 | goto write_err_out; | ||
828 | if (new_line && | ||
829 | write_in_full(fd, "\n", 1) != 1) | ||
830 | goto write_err_out; | ||
831 | } | ||
832 | copy_begin = store.offset[i]; | ||
833 | } | ||
834 | |||
835 | /* write the pair (value == NULL means unset) */ | ||
836 | if (value != NULL) { | ||
837 | if (store.state == START) { | ||
838 | if (!store_write_section(fd, key)) | ||
839 | goto write_err_out; | ||
840 | } | ||
841 | if (!store_write_pair(fd, key, value)) | ||
842 | goto write_err_out; | ||
843 | } | ||
844 | |||
845 | /* write the rest of the config */ | ||
846 | if (copy_begin < contents_sz) | ||
847 | if (write_in_full(fd, contents + copy_begin, | ||
848 | contents_sz - copy_begin) < | ||
849 | contents_sz - copy_begin) | ||
850 | goto write_err_out; | ||
851 | |||
852 | munmap(contents, contents_sz); | ||
853 | } | ||
854 | |||
855 | ret = 0; | ||
856 | |||
857 | out_free: | ||
858 | free(config_filename); | ||
859 | return ret; | ||
860 | |||
861 | write_err_out: | ||
862 | goto out_free; | ||
863 | |||
864 | } | ||
865 | |||
866 | /* | ||
867 | * Call this to report error for your variable that should not | ||
868 | * get a boolean value (i.e. "[my] var" means "true"). | ||
869 | */ | ||
870 | int config_error_nonbool(const char *var) | ||
871 | { | ||
872 | return error("Missing value for '%s'", var); | ||
873 | } | ||
diff --git a/Documentation/perf_counter/util/ctype.c b/Documentation/perf_counter/util/ctype.c new file mode 100644 index 000000000000..b90ec004f29c --- /dev/null +++ b/Documentation/perf_counter/util/ctype.c | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Sane locale-independent, ASCII ctype. | ||
3 | * | ||
4 | * No surprises, and works with signed and unsigned chars. | ||
5 | */ | ||
6 | #include "cache.h" | ||
7 | |||
8 | enum { | ||
9 | S = GIT_SPACE, | ||
10 | A = GIT_ALPHA, | ||
11 | D = GIT_DIGIT, | ||
12 | G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ | ||
13 | R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ | ||
14 | }; | ||
15 | |||
16 | unsigned char sane_ctype[256] = { | ||
17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ | ||
18 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ | ||
19 | S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ | ||
20 | D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ | ||
21 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ | ||
22 | A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ | ||
23 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ | ||
24 | A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ | ||
25 | /* Nothing in the 128.. range */ | ||
26 | }; | ||
diff --git a/Documentation/perf_counter/util/exec_cmd.c b/Documentation/perf_counter/util/exec_cmd.c new file mode 100644 index 000000000000..d39292263153 --- /dev/null +++ b/Documentation/perf_counter/util/exec_cmd.c | |||
@@ -0,0 +1,165 @@ | |||
1 | #include "cache.h" | ||
2 | #include "exec_cmd.h" | ||
3 | #include "quote.h" | ||
4 | #define MAX_ARGS 32 | ||
5 | |||
6 | extern char **environ; | ||
7 | static const char *argv_exec_path; | ||
8 | static const char *argv0_path; | ||
9 | |||
10 | const char *system_path(const char *path) | ||
11 | { | ||
12 | #ifdef RUNTIME_PREFIX | ||
13 | static const char *prefix; | ||
14 | #else | ||
15 | static const char *prefix = PREFIX; | ||
16 | #endif | ||
17 | struct strbuf d = STRBUF_INIT; | ||
18 | |||
19 | if (is_absolute_path(path)) | ||
20 | return path; | ||
21 | |||
22 | #ifdef RUNTIME_PREFIX | ||
23 | assert(argv0_path); | ||
24 | assert(is_absolute_path(argv0_path)); | ||
25 | |||
26 | if (!prefix && | ||
27 | !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && | ||
28 | !(prefix = strip_path_suffix(argv0_path, BINDIR)) && | ||
29 | !(prefix = strip_path_suffix(argv0_path, "perf"))) { | ||
30 | prefix = PREFIX; | ||
31 | fprintf(stderr, "RUNTIME_PREFIX requested, " | ||
32 | "but prefix computation failed. " | ||
33 | "Using static fallback '%s'.\n", prefix); | ||
34 | } | ||
35 | #endif | ||
36 | |||
37 | strbuf_addf(&d, "%s/%s", prefix, path); | ||
38 | path = strbuf_detach(&d, NULL); | ||
39 | return path; | ||
40 | } | ||
41 | |||
42 | const char *perf_extract_argv0_path(const char *argv0) | ||
43 | { | ||
44 | const char *slash; | ||
45 | |||
46 | if (!argv0 || !*argv0) | ||
47 | return NULL; | ||
48 | slash = argv0 + strlen(argv0); | ||
49 | |||
50 | while (argv0 <= slash && !is_dir_sep(*slash)) | ||
51 | slash--; | ||
52 | |||
53 | if (slash >= argv0) { | ||
54 | argv0_path = strndup(argv0, slash - argv0); | ||
55 | return slash + 1; | ||
56 | } | ||
57 | |||
58 | return argv0; | ||
59 | } | ||
60 | |||
61 | void perf_set_argv_exec_path(const char *exec_path) | ||
62 | { | ||
63 | argv_exec_path = exec_path; | ||
64 | /* | ||
65 | * Propagate this setting to external programs. | ||
66 | */ | ||
67 | setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); | ||
68 | } | ||
69 | |||
70 | |||
71 | /* Returns the highest-priority, location to look for perf programs. */ | ||
72 | const char *perf_exec_path(void) | ||
73 | { | ||
74 | const char *env; | ||
75 | |||
76 | if (argv_exec_path) | ||
77 | return argv_exec_path; | ||
78 | |||
79 | env = getenv(EXEC_PATH_ENVIRONMENT); | ||
80 | if (env && *env) { | ||
81 | return env; | ||
82 | } | ||
83 | |||
84 | return system_path(PERF_EXEC_PATH); | ||
85 | } | ||
86 | |||
87 | static void add_path(struct strbuf *out, const char *path) | ||
88 | { | ||
89 | if (path && *path) { | ||
90 | if (is_absolute_path(path)) | ||
91 | strbuf_addstr(out, path); | ||
92 | else | ||
93 | strbuf_addstr(out, make_nonrelative_path(path)); | ||
94 | |||
95 | strbuf_addch(out, PATH_SEP); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | void setup_path(void) | ||
100 | { | ||
101 | const char *old_path = getenv("PATH"); | ||
102 | struct strbuf new_path = STRBUF_INIT; | ||
103 | |||
104 | add_path(&new_path, perf_exec_path()); | ||
105 | add_path(&new_path, argv0_path); | ||
106 | |||
107 | if (old_path) | ||
108 | strbuf_addstr(&new_path, old_path); | ||
109 | else | ||
110 | strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); | ||
111 | |||
112 | setenv("PATH", new_path.buf, 1); | ||
113 | |||
114 | strbuf_release(&new_path); | ||
115 | } | ||
116 | |||
117 | const char **prepare_perf_cmd(const char **argv) | ||
118 | { | ||
119 | int argc; | ||
120 | const char **nargv; | ||
121 | |||
122 | for (argc = 0; argv[argc]; argc++) | ||
123 | ; /* just counting */ | ||
124 | nargv = malloc(sizeof(*nargv) * (argc + 2)); | ||
125 | |||
126 | nargv[0] = "perf"; | ||
127 | for (argc = 0; argv[argc]; argc++) | ||
128 | nargv[argc + 1] = argv[argc]; | ||
129 | nargv[argc + 1] = NULL; | ||
130 | return nargv; | ||
131 | } | ||
132 | |||
133 | int execv_perf_cmd(const char **argv) { | ||
134 | const char **nargv = prepare_perf_cmd(argv); | ||
135 | |||
136 | /* execvp() can only ever return if it fails */ | ||
137 | execvp("perf", (char **)nargv); | ||
138 | |||
139 | free(nargv); | ||
140 | return -1; | ||
141 | } | ||
142 | |||
143 | |||
144 | int execl_perf_cmd(const char *cmd,...) | ||
145 | { | ||
146 | int argc; | ||
147 | const char *argv[MAX_ARGS + 1]; | ||
148 | const char *arg; | ||
149 | va_list param; | ||
150 | |||
151 | va_start(param, cmd); | ||
152 | argv[0] = cmd; | ||
153 | argc = 1; | ||
154 | while (argc < MAX_ARGS) { | ||
155 | arg = argv[argc++] = va_arg(param, char *); | ||
156 | if (!arg) | ||
157 | break; | ||
158 | } | ||
159 | va_end(param); | ||
160 | if (MAX_ARGS <= argc) | ||
161 | return error("too many args to run %s", cmd); | ||
162 | |||
163 | argv[argc] = NULL; | ||
164 | return execv_perf_cmd(argv); | ||
165 | } | ||
diff --git a/Documentation/perf_counter/util/exec_cmd.h b/Documentation/perf_counter/util/exec_cmd.h new file mode 100644 index 000000000000..effe25eb1545 --- /dev/null +++ b/Documentation/perf_counter/util/exec_cmd.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef PERF_EXEC_CMD_H | ||
2 | #define PERF_EXEC_CMD_H | ||
3 | |||
4 | extern void perf_set_argv_exec_path(const char *exec_path); | ||
5 | extern const char *perf_extract_argv0_path(const char *path); | ||
6 | extern const char *perf_exec_path(void); | ||
7 | extern void setup_path(void); | ||
8 | extern const char **prepare_perf_cmd(const char **argv); | ||
9 | extern int execv_perf_cmd(const char **argv); /* NULL terminated */ | ||
10 | extern int execl_perf_cmd(const char *cmd, ...); | ||
11 | extern const char *system_path(const char *path); | ||
12 | |||
13 | #endif /* PERF_EXEC_CMD_H */ | ||
diff --git a/Documentation/perf_counter/util/generate-cmdlist.sh b/Documentation/perf_counter/util/generate-cmdlist.sh new file mode 100755 index 000000000000..f06f6fd148f8 --- /dev/null +++ b/Documentation/perf_counter/util/generate-cmdlist.sh | |||
@@ -0,0 +1,24 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | echo "/* Automatically generated by $0 */ | ||
4 | struct cmdname_help | ||
5 | { | ||
6 | char name[16]; | ||
7 | char help[80]; | ||
8 | }; | ||
9 | |||
10 | static struct cmdname_help common_cmds[] = {" | ||
11 | |||
12 | sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | | ||
13 | sort | | ||
14 | while read cmd | ||
15 | do | ||
16 | sed -n ' | ||
17 | /^NAME/,/perf-'"$cmd"'/H | ||
18 | ${ | ||
19 | x | ||
20 | s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ | ||
21 | p | ||
22 | }' "Documentation/perf-$cmd.txt" | ||
23 | done | ||
24 | echo "};" | ||
diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c new file mode 100644 index 000000000000..edde541d238d --- /dev/null +++ b/Documentation/perf_counter/util/help.c | |||
@@ -0,0 +1,366 @@ | |||
1 | #include "cache.h" | ||
2 | #include "../builtin.h" | ||
3 | #include "exec_cmd.h" | ||
4 | #include "levenshtein.h" | ||
5 | #include "help.h" | ||
6 | |||
7 | /* most GUI terminals set COLUMNS (although some don't export it) */ | ||
8 | static int term_columns(void) | ||
9 | { | ||
10 | char *col_string = getenv("COLUMNS"); | ||
11 | int n_cols; | ||
12 | |||
13 | if (col_string && (n_cols = atoi(col_string)) > 0) | ||
14 | return n_cols; | ||
15 | |||
16 | #ifdef TIOCGWINSZ | ||
17 | { | ||
18 | struct winsize ws; | ||
19 | if (!ioctl(1, TIOCGWINSZ, &ws)) { | ||
20 | if (ws.ws_col) | ||
21 | return ws.ws_col; | ||
22 | } | ||
23 | } | ||
24 | #endif | ||
25 | |||
26 | return 80; | ||
27 | } | ||
28 | |||
29 | void add_cmdname(struct cmdnames *cmds, const char *name, int len) | ||
30 | { | ||
31 | struct cmdname *ent = malloc(sizeof(*ent) + len + 1); | ||
32 | |||
33 | ent->len = len; | ||
34 | memcpy(ent->name, name, len); | ||
35 | ent->name[len] = 0; | ||
36 | |||
37 | ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); | ||
38 | cmds->names[cmds->cnt++] = ent; | ||
39 | } | ||
40 | |||
41 | static void clean_cmdnames(struct cmdnames *cmds) | ||
42 | { | ||
43 | int i; | ||
44 | for (i = 0; i < cmds->cnt; ++i) | ||
45 | free(cmds->names[i]); | ||
46 | free(cmds->names); | ||
47 | cmds->cnt = 0; | ||
48 | cmds->alloc = 0; | ||
49 | } | ||
50 | |||
51 | static int cmdname_compare(const void *a_, const void *b_) | ||
52 | { | ||
53 | struct cmdname *a = *(struct cmdname **)a_; | ||
54 | struct cmdname *b = *(struct cmdname **)b_; | ||
55 | return strcmp(a->name, b->name); | ||
56 | } | ||
57 | |||
58 | static void uniq(struct cmdnames *cmds) | ||
59 | { | ||
60 | int i, j; | ||
61 | |||
62 | if (!cmds->cnt) | ||
63 | return; | ||
64 | |||
65 | for (i = j = 1; i < cmds->cnt; i++) | ||
66 | if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) | ||
67 | cmds->names[j++] = cmds->names[i]; | ||
68 | |||
69 | cmds->cnt = j; | ||
70 | } | ||
71 | |||
72 | void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) | ||
73 | { | ||
74 | int ci, cj, ei; | ||
75 | int cmp; | ||
76 | |||
77 | ci = cj = ei = 0; | ||
78 | while (ci < cmds->cnt && ei < excludes->cnt) { | ||
79 | cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); | ||
80 | if (cmp < 0) | ||
81 | cmds->names[cj++] = cmds->names[ci++]; | ||
82 | else if (cmp == 0) | ||
83 | ci++, ei++; | ||
84 | else if (cmp > 0) | ||
85 | ei++; | ||
86 | } | ||
87 | |||
88 | while (ci < cmds->cnt) | ||
89 | cmds->names[cj++] = cmds->names[ci++]; | ||
90 | |||
91 | cmds->cnt = cj; | ||
92 | } | ||
93 | |||
94 | static void pretty_print_string_list(struct cmdnames *cmds, int longest) | ||
95 | { | ||
96 | int cols = 1, rows; | ||
97 | int space = longest + 1; /* min 1 SP between words */ | ||
98 | int max_cols = term_columns() - 1; /* don't print *on* the edge */ | ||
99 | int i, j; | ||
100 | |||
101 | if (space < max_cols) | ||
102 | cols = max_cols / space; | ||
103 | rows = (cmds->cnt + cols - 1) / cols; | ||
104 | |||
105 | for (i = 0; i < rows; i++) { | ||
106 | printf(" "); | ||
107 | |||
108 | for (j = 0; j < cols; j++) { | ||
109 | int n = j * rows + i; | ||
110 | int size = space; | ||
111 | if (n >= cmds->cnt) | ||
112 | break; | ||
113 | if (j == cols-1 || n + rows >= cmds->cnt) | ||
114 | size = 1; | ||
115 | printf("%-*s", size, cmds->names[n]->name); | ||
116 | } | ||
117 | putchar('\n'); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | static int is_executable(const char *name) | ||
122 | { | ||
123 | struct stat st; | ||
124 | |||
125 | if (stat(name, &st) || /* stat, not lstat */ | ||
126 | !S_ISREG(st.st_mode)) | ||
127 | return 0; | ||
128 | |||
129 | #ifdef __MINGW32__ | ||
130 | /* cannot trust the executable bit, peek into the file instead */ | ||
131 | char buf[3] = { 0 }; | ||
132 | int n; | ||
133 | int fd = open(name, O_RDONLY); | ||
134 | st.st_mode &= ~S_IXUSR; | ||
135 | if (fd >= 0) { | ||
136 | n = read(fd, buf, 2); | ||
137 | if (n == 2) | ||
138 | /* DOS executables start with "MZ" */ | ||
139 | if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) | ||
140 | st.st_mode |= S_IXUSR; | ||
141 | close(fd); | ||
142 | } | ||
143 | #endif | ||
144 | return st.st_mode & S_IXUSR; | ||
145 | } | ||
146 | |||
147 | static void list_commands_in_dir(struct cmdnames *cmds, | ||
148 | const char *path, | ||
149 | const char *prefix) | ||
150 | { | ||
151 | int prefix_len; | ||
152 | DIR *dir = opendir(path); | ||
153 | struct dirent *de; | ||
154 | struct strbuf buf = STRBUF_INIT; | ||
155 | int len; | ||
156 | |||
157 | if (!dir) | ||
158 | return; | ||
159 | if (!prefix) | ||
160 | prefix = "perf-"; | ||
161 | prefix_len = strlen(prefix); | ||
162 | |||
163 | strbuf_addf(&buf, "%s/", path); | ||
164 | len = buf.len; | ||
165 | |||
166 | while ((de = readdir(dir)) != NULL) { | ||
167 | int entlen; | ||
168 | |||
169 | if (prefixcmp(de->d_name, prefix)) | ||
170 | continue; | ||
171 | |||
172 | strbuf_setlen(&buf, len); | ||
173 | strbuf_addstr(&buf, de->d_name); | ||
174 | if (!is_executable(buf.buf)) | ||
175 | continue; | ||
176 | |||
177 | entlen = strlen(de->d_name) - prefix_len; | ||
178 | if (has_extension(de->d_name, ".exe")) | ||
179 | entlen -= 4; | ||
180 | |||
181 | add_cmdname(cmds, de->d_name + prefix_len, entlen); | ||
182 | } | ||
183 | closedir(dir); | ||
184 | strbuf_release(&buf); | ||
185 | } | ||
186 | |||
187 | void load_command_list(const char *prefix, | ||
188 | struct cmdnames *main_cmds, | ||
189 | struct cmdnames *other_cmds) | ||
190 | { | ||
191 | const char *env_path = getenv("PATH"); | ||
192 | const char *exec_path = perf_exec_path(); | ||
193 | |||
194 | if (exec_path) { | ||
195 | list_commands_in_dir(main_cmds, exec_path, prefix); | ||
196 | qsort(main_cmds->names, main_cmds->cnt, | ||
197 | sizeof(*main_cmds->names), cmdname_compare); | ||
198 | uniq(main_cmds); | ||
199 | } | ||
200 | |||
201 | if (env_path) { | ||
202 | char *paths, *path, *colon; | ||
203 | path = paths = strdup(env_path); | ||
204 | while (1) { | ||
205 | if ((colon = strchr(path, PATH_SEP))) | ||
206 | *colon = 0; | ||
207 | if (!exec_path || strcmp(path, exec_path)) | ||
208 | list_commands_in_dir(other_cmds, path, prefix); | ||
209 | |||
210 | if (!colon) | ||
211 | break; | ||
212 | path = colon + 1; | ||
213 | } | ||
214 | free(paths); | ||
215 | |||
216 | qsort(other_cmds->names, other_cmds->cnt, | ||
217 | sizeof(*other_cmds->names), cmdname_compare); | ||
218 | uniq(other_cmds); | ||
219 | } | ||
220 | exclude_cmds(other_cmds, main_cmds); | ||
221 | } | ||
222 | |||
223 | void list_commands(const char *title, struct cmdnames *main_cmds, | ||
224 | struct cmdnames *other_cmds) | ||
225 | { | ||
226 | int i, longest = 0; | ||
227 | |||
228 | for (i = 0; i < main_cmds->cnt; i++) | ||
229 | if (longest < main_cmds->names[i]->len) | ||
230 | longest = main_cmds->names[i]->len; | ||
231 | for (i = 0; i < other_cmds->cnt; i++) | ||
232 | if (longest < other_cmds->names[i]->len) | ||
233 | longest = other_cmds->names[i]->len; | ||
234 | |||
235 | if (main_cmds->cnt) { | ||
236 | const char *exec_path = perf_exec_path(); | ||
237 | printf("available %s in '%s'\n", title, exec_path); | ||
238 | printf("----------------"); | ||
239 | mput_char('-', strlen(title) + strlen(exec_path)); | ||
240 | putchar('\n'); | ||
241 | pretty_print_string_list(main_cmds, longest); | ||
242 | putchar('\n'); | ||
243 | } | ||
244 | |||
245 | if (other_cmds->cnt) { | ||
246 | printf("%s available from elsewhere on your $PATH\n", title); | ||
247 | printf("---------------------------------------"); | ||
248 | mput_char('-', strlen(title)); | ||
249 | putchar('\n'); | ||
250 | pretty_print_string_list(other_cmds, longest); | ||
251 | putchar('\n'); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | int is_in_cmdlist(struct cmdnames *c, const char *s) | ||
256 | { | ||
257 | int i; | ||
258 | for (i = 0; i < c->cnt; i++) | ||
259 | if (!strcmp(s, c->names[i]->name)) | ||
260 | return 1; | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static int autocorrect; | ||
265 | static struct cmdnames aliases; | ||
266 | |||
267 | static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) | ||
268 | { | ||
269 | if (!strcmp(var, "help.autocorrect")) | ||
270 | autocorrect = perf_config_int(var,value); | ||
271 | /* Also use aliases for command lookup */ | ||
272 | if (!prefixcmp(var, "alias.")) | ||
273 | add_cmdname(&aliases, var + 6, strlen(var + 6)); | ||
274 | |||
275 | return perf_default_config(var, value, cb); | ||
276 | } | ||
277 | |||
278 | static int levenshtein_compare(const void *p1, const void *p2) | ||
279 | { | ||
280 | const struct cmdname *const *c1 = p1, *const *c2 = p2; | ||
281 | const char *s1 = (*c1)->name, *s2 = (*c2)->name; | ||
282 | int l1 = (*c1)->len; | ||
283 | int l2 = (*c2)->len; | ||
284 | return l1 != l2 ? l1 - l2 : strcmp(s1, s2); | ||
285 | } | ||
286 | |||
287 | static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) | ||
288 | { | ||
289 | int i; | ||
290 | ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); | ||
291 | |||
292 | for (i = 0; i < old->cnt; i++) | ||
293 | cmds->names[cmds->cnt++] = old->names[i]; | ||
294 | free(old->names); | ||
295 | old->cnt = 0; | ||
296 | old->names = NULL; | ||
297 | } | ||
298 | |||
299 | const char *help_unknown_cmd(const char *cmd) | ||
300 | { | ||
301 | int i, n, best_similarity = 0; | ||
302 | struct cmdnames main_cmds, other_cmds; | ||
303 | |||
304 | memset(&main_cmds, 0, sizeof(main_cmds)); | ||
305 | memset(&other_cmds, 0, sizeof(main_cmds)); | ||
306 | memset(&aliases, 0, sizeof(aliases)); | ||
307 | |||
308 | perf_config(perf_unknown_cmd_config, NULL); | ||
309 | |||
310 | load_command_list("perf-", &main_cmds, &other_cmds); | ||
311 | |||
312 | add_cmd_list(&main_cmds, &aliases); | ||
313 | add_cmd_list(&main_cmds, &other_cmds); | ||
314 | qsort(main_cmds.names, main_cmds.cnt, | ||
315 | sizeof(main_cmds.names), cmdname_compare); | ||
316 | uniq(&main_cmds); | ||
317 | |||
318 | /* This reuses cmdname->len for similarity index */ | ||
319 | for (i = 0; i < main_cmds.cnt; ++i) | ||
320 | main_cmds.names[i]->len = | ||
321 | levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); | ||
322 | |||
323 | qsort(main_cmds.names, main_cmds.cnt, | ||
324 | sizeof(*main_cmds.names), levenshtein_compare); | ||
325 | |||
326 | if (!main_cmds.cnt) | ||
327 | die ("Uh oh. Your system reports no Git commands at all."); | ||
328 | |||
329 | best_similarity = main_cmds.names[0]->len; | ||
330 | n = 1; | ||
331 | while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) | ||
332 | ++n; | ||
333 | if (autocorrect && n == 1) { | ||
334 | const char *assumed = main_cmds.names[0]->name; | ||
335 | main_cmds.names[0] = NULL; | ||
336 | clean_cmdnames(&main_cmds); | ||
337 | fprintf(stderr, "WARNING: You called a Git program named '%s', " | ||
338 | "which does not exist.\n" | ||
339 | "Continuing under the assumption that you meant '%s'\n", | ||
340 | cmd, assumed); | ||
341 | if (autocorrect > 0) { | ||
342 | fprintf(stderr, "in %0.1f seconds automatically...\n", | ||
343 | (float)autocorrect/10.0); | ||
344 | poll(NULL, 0, autocorrect * 100); | ||
345 | } | ||
346 | return assumed; | ||
347 | } | ||
348 | |||
349 | fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); | ||
350 | |||
351 | if (best_similarity < 6) { | ||
352 | fprintf(stderr, "\nDid you mean %s?\n", | ||
353 | n < 2 ? "this": "one of these"); | ||
354 | |||
355 | for (i = 0; i < n; i++) | ||
356 | fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); | ||
357 | } | ||
358 | |||
359 | exit(1); | ||
360 | } | ||
361 | |||
362 | int cmd_version(int argc, const char **argv, const char *prefix) | ||
363 | { | ||
364 | printf("perf version %s\n", perf_version_string); | ||
365 | return 0; | ||
366 | } | ||
diff --git a/Documentation/perf_counter/util/help.h b/Documentation/perf_counter/util/help.h new file mode 100644 index 000000000000..56bc15406ffc --- /dev/null +++ b/Documentation/perf_counter/util/help.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef HELP_H | ||
2 | #define HELP_H | ||
3 | |||
4 | struct cmdnames { | ||
5 | int alloc; | ||
6 | int cnt; | ||
7 | struct cmdname { | ||
8 | size_t len; /* also used for similarity index in help.c */ | ||
9 | char name[FLEX_ARRAY]; | ||
10 | } **names; | ||
11 | }; | ||
12 | |||
13 | static inline void mput_char(char c, unsigned int num) | ||
14 | { | ||
15 | while(num--) | ||
16 | putchar(c); | ||
17 | } | ||
18 | |||
19 | void load_command_list(const char *prefix, | ||
20 | struct cmdnames *main_cmds, | ||
21 | struct cmdnames *other_cmds); | ||
22 | void add_cmdname(struct cmdnames *cmds, const char *name, int len); | ||
23 | /* Here we require that excludes is a sorted list. */ | ||
24 | void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); | ||
25 | int is_in_cmdlist(struct cmdnames *c, const char *s); | ||
26 | void list_commands(const char *title, struct cmdnames *main_cmds, | ||
27 | struct cmdnames *other_cmds); | ||
28 | |||
29 | #endif /* HELP_H */ | ||
diff --git a/Documentation/perf_counter/util/levenshtein.c b/Documentation/perf_counter/util/levenshtein.c new file mode 100644 index 000000000000..e521d1516df6 --- /dev/null +++ b/Documentation/perf_counter/util/levenshtein.c | |||
@@ -0,0 +1,84 @@ | |||
1 | #include "cache.h" | ||
2 | #include "levenshtein.h" | ||
3 | |||
4 | /* | ||
5 | * This function implements the Damerau-Levenshtein algorithm to | ||
6 | * calculate a distance between strings. | ||
7 | * | ||
8 | * Basically, it says how many letters need to be swapped, substituted, | ||
9 | * deleted from, or added to string1, at least, to get string2. | ||
10 | * | ||
11 | * The idea is to build a distance matrix for the substrings of both | ||
12 | * strings. To avoid a large space complexity, only the last three rows | ||
13 | * are kept in memory (if swaps had the same or higher cost as one deletion | ||
14 | * plus one insertion, only two rows would be needed). | ||
15 | * | ||
16 | * At any stage, "i + 1" denotes the length of the current substring of | ||
17 | * string1 that the distance is calculated for. | ||
18 | * | ||
19 | * row2 holds the current row, row1 the previous row (i.e. for the substring | ||
20 | * of string1 of length "i"), and row0 the row before that. | ||
21 | * | ||
22 | * In other words, at the start of the big loop, row2[j + 1] contains the | ||
23 | * Damerau-Levenshtein distance between the substring of string1 of length | ||
24 | * "i" and the substring of string2 of length "j + 1". | ||
25 | * | ||
26 | * All the big loop does is determine the partial minimum-cost paths. | ||
27 | * | ||
28 | * It does so by calculating the costs of the path ending in characters | ||
29 | * i (in string1) and j (in string2), respectively, given that the last | ||
30 | * operation is a substition, a swap, a deletion, or an insertion. | ||
31 | * | ||
32 | * This implementation allows the costs to be weighted: | ||
33 | * | ||
34 | * - w (as in "sWap") | ||
35 | * - s (as in "Substitution") | ||
36 | * - a (for insertion, AKA "Add") | ||
37 | * - d (as in "Deletion") | ||
38 | * | ||
39 | * Note that this algorithm calculates a distance _iff_ d == a. | ||
40 | */ | ||
41 | int levenshtein(const char *string1, const char *string2, | ||
42 | int w, int s, int a, int d) | ||
43 | { | ||
44 | int len1 = strlen(string1), len2 = strlen(string2); | ||
45 | int *row0 = malloc(sizeof(int) * (len2 + 1)); | ||
46 | int *row1 = malloc(sizeof(int) * (len2 + 1)); | ||
47 | int *row2 = malloc(sizeof(int) * (len2 + 1)); | ||
48 | int i, j; | ||
49 | |||
50 | for (j = 0; j <= len2; j++) | ||
51 | row1[j] = j * a; | ||
52 | for (i = 0; i < len1; i++) { | ||
53 | int *dummy; | ||
54 | |||
55 | row2[0] = (i + 1) * d; | ||
56 | for (j = 0; j < len2; j++) { | ||
57 | /* substitution */ | ||
58 | row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); | ||
59 | /* swap */ | ||
60 | if (i > 0 && j > 0 && string1[i - 1] == string2[j] && | ||
61 | string1[i] == string2[j - 1] && | ||
62 | row2[j + 1] > row0[j - 1] + w) | ||
63 | row2[j + 1] = row0[j - 1] + w; | ||
64 | /* deletion */ | ||
65 | if (row2[j + 1] > row1[j + 1] + d) | ||
66 | row2[j + 1] = row1[j + 1] + d; | ||
67 | /* insertion */ | ||
68 | if (row2[j + 1] > row2[j] + a) | ||
69 | row2[j + 1] = row2[j] + a; | ||
70 | } | ||
71 | |||
72 | dummy = row0; | ||
73 | row0 = row1; | ||
74 | row1 = row2; | ||
75 | row2 = dummy; | ||
76 | } | ||
77 | |||
78 | i = row1[len2]; | ||
79 | free(row0); | ||
80 | free(row1); | ||
81 | free(row2); | ||
82 | |||
83 | return i; | ||
84 | } | ||
diff --git a/Documentation/perf_counter/util/levenshtein.h b/Documentation/perf_counter/util/levenshtein.h new file mode 100644 index 000000000000..0173abeef52c --- /dev/null +++ b/Documentation/perf_counter/util/levenshtein.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef LEVENSHTEIN_H | ||
2 | #define LEVENSHTEIN_H | ||
3 | |||
4 | int levenshtein(const char *string1, const char *string2, | ||
5 | int swap_penalty, int substition_penalty, | ||
6 | int insertion_penalty, int deletion_penalty); | ||
7 | |||
8 | #endif | ||
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c new file mode 100644 index 000000000000..28b34c1c29cf --- /dev/null +++ b/Documentation/perf_counter/util/parse-options.c | |||
@@ -0,0 +1,492 @@ | |||
1 | #include "util.h" | ||
2 | #include "parse-options.h" | ||
3 | #include "cache.h" | ||
4 | |||
5 | #define OPT_SHORT 1 | ||
6 | #define OPT_UNSET 2 | ||
7 | |||
8 | static int opterror(const struct option *opt, const char *reason, int flags) | ||
9 | { | ||
10 | if (flags & OPT_SHORT) | ||
11 | return error("switch `%c' %s", opt->short_name, reason); | ||
12 | if (flags & OPT_UNSET) | ||
13 | return error("option `no-%s' %s", opt->long_name, reason); | ||
14 | return error("option `%s' %s", opt->long_name, reason); | ||
15 | } | ||
16 | |||
17 | static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, | ||
18 | int flags, const char **arg) | ||
19 | { | ||
20 | if (p->opt) { | ||
21 | *arg = p->opt; | ||
22 | p->opt = NULL; | ||
23 | } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { | ||
24 | *arg = (const char *)opt->defval; | ||
25 | } else if (p->argc > 1) { | ||
26 | p->argc--; | ||
27 | *arg = *++p->argv; | ||
28 | } else | ||
29 | return opterror(opt, "requires a value", flags); | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | static int get_value(struct parse_opt_ctx_t *p, | ||
34 | const struct option *opt, int flags) | ||
35 | { | ||
36 | const char *s, *arg; | ||
37 | const int unset = flags & OPT_UNSET; | ||
38 | |||
39 | if (unset && p->opt) | ||
40 | return opterror(opt, "takes no value", flags); | ||
41 | if (unset && (opt->flags & PARSE_OPT_NONEG)) | ||
42 | return opterror(opt, "isn't available", flags); | ||
43 | |||
44 | if (!(flags & OPT_SHORT) && p->opt) { | ||
45 | switch (opt->type) { | ||
46 | case OPTION_CALLBACK: | ||
47 | if (!(opt->flags & PARSE_OPT_NOARG)) | ||
48 | break; | ||
49 | /* FALLTHROUGH */ | ||
50 | case OPTION_BOOLEAN: | ||
51 | case OPTION_BIT: | ||
52 | case OPTION_SET_INT: | ||
53 | case OPTION_SET_PTR: | ||
54 | return opterror(opt, "takes no value", flags); | ||
55 | default: | ||
56 | break; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | switch (opt->type) { | ||
61 | case OPTION_BIT: | ||
62 | if (unset) | ||
63 | *(int *)opt->value &= ~opt->defval; | ||
64 | else | ||
65 | *(int *)opt->value |= opt->defval; | ||
66 | return 0; | ||
67 | |||
68 | case OPTION_BOOLEAN: | ||
69 | *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; | ||
70 | return 0; | ||
71 | |||
72 | case OPTION_SET_INT: | ||
73 | *(int *)opt->value = unset ? 0 : opt->defval; | ||
74 | return 0; | ||
75 | |||
76 | case OPTION_SET_PTR: | ||
77 | *(void **)opt->value = unset ? NULL : (void *)opt->defval; | ||
78 | return 0; | ||
79 | |||
80 | case OPTION_STRING: | ||
81 | if (unset) | ||
82 | *(const char **)opt->value = NULL; | ||
83 | else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) | ||
84 | *(const char **)opt->value = (const char *)opt->defval; | ||
85 | else | ||
86 | return get_arg(p, opt, flags, (const char **)opt->value); | ||
87 | return 0; | ||
88 | |||
89 | case OPTION_CALLBACK: | ||
90 | if (unset) | ||
91 | return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; | ||
92 | if (opt->flags & PARSE_OPT_NOARG) | ||
93 | return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; | ||
94 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) | ||
95 | return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; | ||
96 | if (get_arg(p, opt, flags, &arg)) | ||
97 | return -1; | ||
98 | return (*opt->callback)(opt, arg, 0) ? (-1) : 0; | ||
99 | |||
100 | case OPTION_INTEGER: | ||
101 | if (unset) { | ||
102 | *(int *)opt->value = 0; | ||
103 | return 0; | ||
104 | } | ||
105 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { | ||
106 | *(int *)opt->value = opt->defval; | ||
107 | return 0; | ||
108 | } | ||
109 | if (get_arg(p, opt, flags, &arg)) | ||
110 | return -1; | ||
111 | *(int *)opt->value = strtol(arg, (char **)&s, 10); | ||
112 | if (*s) | ||
113 | return opterror(opt, "expects a numerical value", flags); | ||
114 | return 0; | ||
115 | |||
116 | default: | ||
117 | die("should not happen, someone must be hit on the forehead"); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) | ||
122 | { | ||
123 | for (; options->type != OPTION_END; options++) { | ||
124 | if (options->short_name == *p->opt) { | ||
125 | p->opt = p->opt[1] ? p->opt + 1 : NULL; | ||
126 | return get_value(p, options, OPT_SHORT); | ||
127 | } | ||
128 | } | ||
129 | return -2; | ||
130 | } | ||
131 | |||
132 | static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, | ||
133 | const struct option *options) | ||
134 | { | ||
135 | const char *arg_end = strchr(arg, '='); | ||
136 | const struct option *abbrev_option = NULL, *ambiguous_option = NULL; | ||
137 | int abbrev_flags = 0, ambiguous_flags = 0; | ||
138 | |||
139 | if (!arg_end) | ||
140 | arg_end = arg + strlen(arg); | ||
141 | |||
142 | for (; options->type != OPTION_END; options++) { | ||
143 | const char *rest; | ||
144 | int flags = 0; | ||
145 | |||
146 | if (!options->long_name) | ||
147 | continue; | ||
148 | |||
149 | rest = skip_prefix(arg, options->long_name); | ||
150 | if (options->type == OPTION_ARGUMENT) { | ||
151 | if (!rest) | ||
152 | continue; | ||
153 | if (*rest == '=') | ||
154 | return opterror(options, "takes no value", flags); | ||
155 | if (*rest) | ||
156 | continue; | ||
157 | p->out[p->cpidx++] = arg - 2; | ||
158 | return 0; | ||
159 | } | ||
160 | if (!rest) { | ||
161 | /* abbreviated? */ | ||
162 | if (!strncmp(options->long_name, arg, arg_end - arg)) { | ||
163 | is_abbreviated: | ||
164 | if (abbrev_option) { | ||
165 | /* | ||
166 | * If this is abbreviated, it is | ||
167 | * ambiguous. So when there is no | ||
168 | * exact match later, we need to | ||
169 | * error out. | ||
170 | */ | ||
171 | ambiguous_option = abbrev_option; | ||
172 | ambiguous_flags = abbrev_flags; | ||
173 | } | ||
174 | if (!(flags & OPT_UNSET) && *arg_end) | ||
175 | p->opt = arg_end + 1; | ||
176 | abbrev_option = options; | ||
177 | abbrev_flags = flags; | ||
178 | continue; | ||
179 | } | ||
180 | /* negated and abbreviated very much? */ | ||
181 | if (!prefixcmp("no-", arg)) { | ||
182 | flags |= OPT_UNSET; | ||
183 | goto is_abbreviated; | ||
184 | } | ||
185 | /* negated? */ | ||
186 | if (strncmp(arg, "no-", 3)) | ||
187 | continue; | ||
188 | flags |= OPT_UNSET; | ||
189 | rest = skip_prefix(arg + 3, options->long_name); | ||
190 | /* abbreviated and negated? */ | ||
191 | if (!rest && !prefixcmp(options->long_name, arg + 3)) | ||
192 | goto is_abbreviated; | ||
193 | if (!rest) | ||
194 | continue; | ||
195 | } | ||
196 | if (*rest) { | ||
197 | if (*rest != '=') | ||
198 | continue; | ||
199 | p->opt = rest + 1; | ||
200 | } | ||
201 | return get_value(p, options, flags); | ||
202 | } | ||
203 | |||
204 | if (ambiguous_option) | ||
205 | return error("Ambiguous option: %s " | ||
206 | "(could be --%s%s or --%s%s)", | ||
207 | arg, | ||
208 | (ambiguous_flags & OPT_UNSET) ? "no-" : "", | ||
209 | ambiguous_option->long_name, | ||
210 | (abbrev_flags & OPT_UNSET) ? "no-" : "", | ||
211 | abbrev_option->long_name); | ||
212 | if (abbrev_option) | ||
213 | return get_value(p, abbrev_option, abbrev_flags); | ||
214 | return -2; | ||
215 | } | ||
216 | |||
217 | static void check_typos(const char *arg, const struct option *options) | ||
218 | { | ||
219 | if (strlen(arg) < 3) | ||
220 | return; | ||
221 | |||
222 | if (!prefixcmp(arg, "no-")) { | ||
223 | error ("did you mean `--%s` (with two dashes ?)", arg); | ||
224 | exit(129); | ||
225 | } | ||
226 | |||
227 | for (; options->type != OPTION_END; options++) { | ||
228 | if (!options->long_name) | ||
229 | continue; | ||
230 | if (!prefixcmp(options->long_name, arg)) { | ||
231 | error ("did you mean `--%s` (with two dashes ?)", arg); | ||
232 | exit(129); | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | |||
237 | void parse_options_start(struct parse_opt_ctx_t *ctx, | ||
238 | int argc, const char **argv, int flags) | ||
239 | { | ||
240 | memset(ctx, 0, sizeof(*ctx)); | ||
241 | ctx->argc = argc - 1; | ||
242 | ctx->argv = argv + 1; | ||
243 | ctx->out = argv; | ||
244 | ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); | ||
245 | ctx->flags = flags; | ||
246 | if ((flags & PARSE_OPT_KEEP_UNKNOWN) && | ||
247 | (flags & PARSE_OPT_STOP_AT_NON_OPTION)) | ||
248 | die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); | ||
249 | } | ||
250 | |||
251 | static int usage_with_options_internal(const char * const *, | ||
252 | const struct option *, int); | ||
253 | |||
254 | int parse_options_step(struct parse_opt_ctx_t *ctx, | ||
255 | const struct option *options, | ||
256 | const char * const usagestr[]) | ||
257 | { | ||
258 | int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); | ||
259 | |||
260 | /* we must reset ->opt, unknown short option leave it dangling */ | ||
261 | ctx->opt = NULL; | ||
262 | |||
263 | for (; ctx->argc; ctx->argc--, ctx->argv++) { | ||
264 | const char *arg = ctx->argv[0]; | ||
265 | |||
266 | if (*arg != '-' || !arg[1]) { | ||
267 | if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) | ||
268 | break; | ||
269 | ctx->out[ctx->cpidx++] = ctx->argv[0]; | ||
270 | continue; | ||
271 | } | ||
272 | |||
273 | if (arg[1] != '-') { | ||
274 | ctx->opt = arg + 1; | ||
275 | if (internal_help && *ctx->opt == 'h') | ||
276 | return parse_options_usage(usagestr, options); | ||
277 | switch (parse_short_opt(ctx, options)) { | ||
278 | case -1: | ||
279 | return parse_options_usage(usagestr, options); | ||
280 | case -2: | ||
281 | goto unknown; | ||
282 | } | ||
283 | if (ctx->opt) | ||
284 | check_typos(arg + 1, options); | ||
285 | while (ctx->opt) { | ||
286 | if (internal_help && *ctx->opt == 'h') | ||
287 | return parse_options_usage(usagestr, options); | ||
288 | switch (parse_short_opt(ctx, options)) { | ||
289 | case -1: | ||
290 | return parse_options_usage(usagestr, options); | ||
291 | case -2: | ||
292 | /* fake a short option thing to hide the fact that we may have | ||
293 | * started to parse aggregated stuff | ||
294 | * | ||
295 | * This is leaky, too bad. | ||
296 | */ | ||
297 | ctx->argv[0] = strdup(ctx->opt - 1); | ||
298 | *(char *)ctx->argv[0] = '-'; | ||
299 | goto unknown; | ||
300 | } | ||
301 | } | ||
302 | continue; | ||
303 | } | ||
304 | |||
305 | if (!arg[2]) { /* "--" */ | ||
306 | if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { | ||
307 | ctx->argc--; | ||
308 | ctx->argv++; | ||
309 | } | ||
310 | break; | ||
311 | } | ||
312 | |||
313 | if (internal_help && !strcmp(arg + 2, "help-all")) | ||
314 | return usage_with_options_internal(usagestr, options, 1); | ||
315 | if (internal_help && !strcmp(arg + 2, "help")) | ||
316 | return parse_options_usage(usagestr, options); | ||
317 | switch (parse_long_opt(ctx, arg + 2, options)) { | ||
318 | case -1: | ||
319 | return parse_options_usage(usagestr, options); | ||
320 | case -2: | ||
321 | goto unknown; | ||
322 | } | ||
323 | continue; | ||
324 | unknown: | ||
325 | if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) | ||
326 | return PARSE_OPT_UNKNOWN; | ||
327 | ctx->out[ctx->cpidx++] = ctx->argv[0]; | ||
328 | ctx->opt = NULL; | ||
329 | } | ||
330 | return PARSE_OPT_DONE; | ||
331 | } | ||
332 | |||
333 | int parse_options_end(struct parse_opt_ctx_t *ctx) | ||
334 | { | ||
335 | memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); | ||
336 | ctx->out[ctx->cpidx + ctx->argc] = NULL; | ||
337 | return ctx->cpidx + ctx->argc; | ||
338 | } | ||
339 | |||
340 | int parse_options(int argc, const char **argv, const struct option *options, | ||
341 | const char * const usagestr[], int flags) | ||
342 | { | ||
343 | struct parse_opt_ctx_t ctx; | ||
344 | |||
345 | parse_options_start(&ctx, argc, argv, flags); | ||
346 | switch (parse_options_step(&ctx, options, usagestr)) { | ||
347 | case PARSE_OPT_HELP: | ||
348 | exit(129); | ||
349 | case PARSE_OPT_DONE: | ||
350 | break; | ||
351 | default: /* PARSE_OPT_UNKNOWN */ | ||
352 | if (ctx.argv[0][1] == '-') { | ||
353 | error("unknown option `%s'", ctx.argv[0] + 2); | ||
354 | } else { | ||
355 | error("unknown switch `%c'", *ctx.opt); | ||
356 | } | ||
357 | usage_with_options(usagestr, options); | ||
358 | } | ||
359 | |||
360 | return parse_options_end(&ctx); | ||
361 | } | ||
362 | |||
363 | #define USAGE_OPTS_WIDTH 24 | ||
364 | #define USAGE_GAP 2 | ||
365 | |||
366 | int usage_with_options_internal(const char * const *usagestr, | ||
367 | const struct option *opts, int full) | ||
368 | { | ||
369 | if (!usagestr) | ||
370 | return PARSE_OPT_HELP; | ||
371 | |||
372 | fprintf(stderr, "usage: %s\n", *usagestr++); | ||
373 | while (*usagestr && **usagestr) | ||
374 | fprintf(stderr, " or: %s\n", *usagestr++); | ||
375 | while (*usagestr) { | ||
376 | fprintf(stderr, "%s%s\n", | ||
377 | **usagestr ? " " : "", | ||
378 | *usagestr); | ||
379 | usagestr++; | ||
380 | } | ||
381 | |||
382 | if (opts->type != OPTION_GROUP) | ||
383 | fputc('\n', stderr); | ||
384 | |||
385 | for (; opts->type != OPTION_END; opts++) { | ||
386 | size_t pos; | ||
387 | int pad; | ||
388 | |||
389 | if (opts->type == OPTION_GROUP) { | ||
390 | fputc('\n', stderr); | ||
391 | if (*opts->help) | ||
392 | fprintf(stderr, "%s\n", opts->help); | ||
393 | continue; | ||
394 | } | ||
395 | if (!full && (opts->flags & PARSE_OPT_HIDDEN)) | ||
396 | continue; | ||
397 | |||
398 | pos = fprintf(stderr, " "); | ||
399 | if (opts->short_name) | ||
400 | pos += fprintf(stderr, "-%c", opts->short_name); | ||
401 | if (opts->long_name && opts->short_name) | ||
402 | pos += fprintf(stderr, ", "); | ||
403 | if (opts->long_name) | ||
404 | pos += fprintf(stderr, "--%s", opts->long_name); | ||
405 | |||
406 | switch (opts->type) { | ||
407 | case OPTION_ARGUMENT: | ||
408 | break; | ||
409 | case OPTION_INTEGER: | ||
410 | if (opts->flags & PARSE_OPT_OPTARG) | ||
411 | if (opts->long_name) | ||
412 | pos += fprintf(stderr, "[=<n>]"); | ||
413 | else | ||
414 | pos += fprintf(stderr, "[<n>]"); | ||
415 | else | ||
416 | pos += fprintf(stderr, " <n>"); | ||
417 | break; | ||
418 | case OPTION_CALLBACK: | ||
419 | if (opts->flags & PARSE_OPT_NOARG) | ||
420 | break; | ||
421 | /* FALLTHROUGH */ | ||
422 | case OPTION_STRING: | ||
423 | if (opts->argh) { | ||
424 | if (opts->flags & PARSE_OPT_OPTARG) | ||
425 | if (opts->long_name) | ||
426 | pos += fprintf(stderr, "[=<%s>]", opts->argh); | ||
427 | else | ||
428 | pos += fprintf(stderr, "[<%s>]", opts->argh); | ||
429 | else | ||
430 | pos += fprintf(stderr, " <%s>", opts->argh); | ||
431 | } else { | ||
432 | if (opts->flags & PARSE_OPT_OPTARG) | ||
433 | if (opts->long_name) | ||
434 | pos += fprintf(stderr, "[=...]"); | ||
435 | else | ||
436 | pos += fprintf(stderr, "[...]"); | ||
437 | else | ||
438 | pos += fprintf(stderr, " ..."); | ||
439 | } | ||
440 | break; | ||
441 | default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ | ||
442 | break; | ||
443 | } | ||
444 | |||
445 | if (pos <= USAGE_OPTS_WIDTH) | ||
446 | pad = USAGE_OPTS_WIDTH - pos; | ||
447 | else { | ||
448 | fputc('\n', stderr); | ||
449 | pad = USAGE_OPTS_WIDTH; | ||
450 | } | ||
451 | fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); | ||
452 | } | ||
453 | fputc('\n', stderr); | ||
454 | |||
455 | return PARSE_OPT_HELP; | ||
456 | } | ||
457 | |||
458 | void usage_with_options(const char * const *usagestr, | ||
459 | const struct option *opts) | ||
460 | { | ||
461 | usage_with_options_internal(usagestr, opts, 0); | ||
462 | exit(129); | ||
463 | } | ||
464 | |||
465 | int parse_options_usage(const char * const *usagestr, | ||
466 | const struct option *opts) | ||
467 | { | ||
468 | return usage_with_options_internal(usagestr, opts, 0); | ||
469 | } | ||
470 | |||
471 | |||
472 | int parse_opt_verbosity_cb(const struct option *opt, const char *arg, | ||
473 | int unset) | ||
474 | { | ||
475 | int *target = opt->value; | ||
476 | |||
477 | if (unset) | ||
478 | /* --no-quiet, --no-verbose */ | ||
479 | *target = 0; | ||
480 | else if (opt->short_name == 'v') { | ||
481 | if (*target >= 0) | ||
482 | (*target)++; | ||
483 | else | ||
484 | *target = 1; | ||
485 | } else { | ||
486 | if (*target <= 0) | ||
487 | (*target)--; | ||
488 | else | ||
489 | *target = -1; | ||
490 | } | ||
491 | return 0; | ||
492 | } | ||
diff --git a/Documentation/perf_counter/util/parse-options.h b/Documentation/perf_counter/util/parse-options.h new file mode 100644 index 000000000000..a81c7faff68e --- /dev/null +++ b/Documentation/perf_counter/util/parse-options.h | |||
@@ -0,0 +1,172 @@ | |||
1 | #ifndef PARSE_OPTIONS_H | ||
2 | #define PARSE_OPTIONS_H | ||
3 | |||
4 | enum parse_opt_type { | ||
5 | /* special types */ | ||
6 | OPTION_END, | ||
7 | OPTION_ARGUMENT, | ||
8 | OPTION_GROUP, | ||
9 | /* options with no arguments */ | ||
10 | OPTION_BIT, | ||
11 | OPTION_BOOLEAN, /* _INCR would have been a better name */ | ||
12 | OPTION_SET_INT, | ||
13 | OPTION_SET_PTR, | ||
14 | /* options with arguments (usually) */ | ||
15 | OPTION_STRING, | ||
16 | OPTION_INTEGER, | ||
17 | OPTION_CALLBACK, | ||
18 | }; | ||
19 | |||
20 | enum parse_opt_flags { | ||
21 | PARSE_OPT_KEEP_DASHDASH = 1, | ||
22 | PARSE_OPT_STOP_AT_NON_OPTION = 2, | ||
23 | PARSE_OPT_KEEP_ARGV0 = 4, | ||
24 | PARSE_OPT_KEEP_UNKNOWN = 8, | ||
25 | PARSE_OPT_NO_INTERNAL_HELP = 16, | ||
26 | }; | ||
27 | |||
28 | enum parse_opt_option_flags { | ||
29 | PARSE_OPT_OPTARG = 1, | ||
30 | PARSE_OPT_NOARG = 2, | ||
31 | PARSE_OPT_NONEG = 4, | ||
32 | PARSE_OPT_HIDDEN = 8, | ||
33 | PARSE_OPT_LASTARG_DEFAULT = 16, | ||
34 | }; | ||
35 | |||
36 | struct option; | ||
37 | typedef int parse_opt_cb(const struct option *, const char *arg, int unset); | ||
38 | |||
39 | /* | ||
40 | * `type`:: | ||
41 | * holds the type of the option, you must have an OPTION_END last in your | ||
42 | * array. | ||
43 | * | ||
44 | * `short_name`:: | ||
45 | * the character to use as a short option name, '\0' if none. | ||
46 | * | ||
47 | * `long_name`:: | ||
48 | * the long option name, without the leading dashes, NULL if none. | ||
49 | * | ||
50 | * `value`:: | ||
51 | * stores pointers to the values to be filled. | ||
52 | * | ||
53 | * `argh`:: | ||
54 | * token to explain the kind of argument this option wants. Keep it | ||
55 | * homogenous across the repository. | ||
56 | * | ||
57 | * `help`:: | ||
58 | * the short help associated to what the option does. | ||
59 | * Must never be NULL (except for OPTION_END). | ||
60 | * OPTION_GROUP uses this pointer to store the group header. | ||
61 | * | ||
62 | * `flags`:: | ||
63 | * mask of parse_opt_option_flags. | ||
64 | * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) | ||
65 | * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs | ||
66 | * PARSE_OPT_NONEG: says that this option cannot be negated | ||
67 | * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in | ||
68 | * the long one. | ||
69 | * | ||
70 | * `callback`:: | ||
71 | * pointer to the callback to use for OPTION_CALLBACK. | ||
72 | * | ||
73 | * `defval`:: | ||
74 | * default value to fill (*->value) with for PARSE_OPT_OPTARG. | ||
75 | * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in | ||
76 | * the value when met. | ||
77 | * CALLBACKS can use it like they want. | ||
78 | */ | ||
79 | struct option { | ||
80 | enum parse_opt_type type; | ||
81 | int short_name; | ||
82 | const char *long_name; | ||
83 | void *value; | ||
84 | const char *argh; | ||
85 | const char *help; | ||
86 | |||
87 | int flags; | ||
88 | parse_opt_cb *callback; | ||
89 | intptr_t defval; | ||
90 | }; | ||
91 | |||
92 | #define OPT_END() { OPTION_END } | ||
93 | #define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } | ||
94 | #define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } | ||
95 | #define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } | ||
96 | #define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } | ||
97 | #define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } | ||
98 | #define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } | ||
99 | #define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } | ||
100 | #define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } | ||
101 | #define OPT_DATE(s, l, v, h) \ | ||
102 | { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ | ||
103 | parse_opt_approxidate_cb } | ||
104 | #define OPT_CALLBACK(s, l, v, a, h, f) \ | ||
105 | { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } | ||
106 | |||
107 | /* parse_options() will filter out the processed options and leave the | ||
108 | * non-option argments in argv[]. | ||
109 | * Returns the number of arguments left in argv[]. | ||
110 | */ | ||
111 | extern int parse_options(int argc, const char **argv, | ||
112 | const struct option *options, | ||
113 | const char * const usagestr[], int flags); | ||
114 | |||
115 | extern NORETURN void usage_with_options(const char * const *usagestr, | ||
116 | const struct option *options); | ||
117 | |||
118 | /*----- incremantal advanced APIs -----*/ | ||
119 | |||
120 | enum { | ||
121 | PARSE_OPT_HELP = -1, | ||
122 | PARSE_OPT_DONE, | ||
123 | PARSE_OPT_UNKNOWN, | ||
124 | }; | ||
125 | |||
126 | /* | ||
127 | * It's okay for the caller to consume argv/argc in the usual way. | ||
128 | * Other fields of that structure are private to parse-options and should not | ||
129 | * be modified in any way. | ||
130 | */ | ||
131 | struct parse_opt_ctx_t { | ||
132 | const char **argv; | ||
133 | const char **out; | ||
134 | int argc, cpidx; | ||
135 | const char *opt; | ||
136 | int flags; | ||
137 | }; | ||
138 | |||
139 | extern int parse_options_usage(const char * const *usagestr, | ||
140 | const struct option *opts); | ||
141 | |||
142 | extern void parse_options_start(struct parse_opt_ctx_t *ctx, | ||
143 | int argc, const char **argv, int flags); | ||
144 | |||
145 | extern int parse_options_step(struct parse_opt_ctx_t *ctx, | ||
146 | const struct option *options, | ||
147 | const char * const usagestr[]); | ||
148 | |||
149 | extern int parse_options_end(struct parse_opt_ctx_t *ctx); | ||
150 | |||
151 | |||
152 | /*----- some often used options -----*/ | ||
153 | extern int parse_opt_abbrev_cb(const struct option *, const char *, int); | ||
154 | extern int parse_opt_approxidate_cb(const struct option *, const char *, int); | ||
155 | extern int parse_opt_verbosity_cb(const struct option *, const char *, int); | ||
156 | |||
157 | #define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") | ||
158 | #define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") | ||
159 | #define OPT__VERBOSITY(var) \ | ||
160 | { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ | ||
161 | PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ | ||
162 | { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ | ||
163 | PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } | ||
164 | #define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") | ||
165 | #define OPT__ABBREV(var) \ | ||
166 | { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ | ||
167 | "use <n> digits to display SHA-1s", \ | ||
168 | PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } | ||
169 | |||
170 | extern const char *parse_options_fix_filename(const char *prefix, const char *file); | ||
171 | |||
172 | #endif | ||
diff --git a/Documentation/perf_counter/util/path.c b/Documentation/perf_counter/util/path.c new file mode 100644 index 000000000000..a501a40dd2cb --- /dev/null +++ b/Documentation/perf_counter/util/path.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * I'm tired of doing "vsnprintf()" etc just to open a | ||
3 | * file, so here's a "return static buffer with printf" | ||
4 | * interface for paths. | ||
5 | * | ||
6 | * It's obviously not thread-safe. Sue me. But it's quite | ||
7 | * useful for doing things like | ||
8 | * | ||
9 | * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); | ||
10 | * | ||
11 | * which is what it's designed for. | ||
12 | */ | ||
13 | #include "cache.h" | ||
14 | |||
15 | static char bad_path[] = "/bad-path/"; | ||
16 | /* | ||
17 | * Two hacks: | ||
18 | */ | ||
19 | |||
20 | static char *get_perf_dir(void) | ||
21 | { | ||
22 | return "."; | ||
23 | } | ||
24 | |||
25 | size_t strlcpy(char *dest, const char *src, size_t size) | ||
26 | { | ||
27 | size_t ret = strlen(src); | ||
28 | |||
29 | if (size) { | ||
30 | size_t len = (ret >= size) ? size - 1 : ret; | ||
31 | memcpy(dest, src, len); | ||
32 | dest[len] = '\0'; | ||
33 | } | ||
34 | return ret; | ||
35 | } | ||
36 | |||
37 | |||
38 | static char *get_pathname(void) | ||
39 | { | ||
40 | static char pathname_array[4][PATH_MAX]; | ||
41 | static int index; | ||
42 | return pathname_array[3 & ++index]; | ||
43 | } | ||
44 | |||
45 | static char *cleanup_path(char *path) | ||
46 | { | ||
47 | /* Clean it up */ | ||
48 | if (!memcmp(path, "./", 2)) { | ||
49 | path += 2; | ||
50 | while (*path == '/') | ||
51 | path++; | ||
52 | } | ||
53 | return path; | ||
54 | } | ||
55 | |||
56 | char *mksnpath(char *buf, size_t n, const char *fmt, ...) | ||
57 | { | ||
58 | va_list args; | ||
59 | unsigned len; | ||
60 | |||
61 | va_start(args, fmt); | ||
62 | len = vsnprintf(buf, n, fmt, args); | ||
63 | va_end(args); | ||
64 | if (len >= n) { | ||
65 | strlcpy(buf, bad_path, n); | ||
66 | return buf; | ||
67 | } | ||
68 | return cleanup_path(buf); | ||
69 | } | ||
70 | |||
71 | static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) | ||
72 | { | ||
73 | const char *perf_dir = get_perf_dir(); | ||
74 | size_t len; | ||
75 | |||
76 | len = strlen(perf_dir); | ||
77 | if (n < len + 1) | ||
78 | goto bad; | ||
79 | memcpy(buf, perf_dir, len); | ||
80 | if (len && !is_dir_sep(perf_dir[len-1])) | ||
81 | buf[len++] = '/'; | ||
82 | len += vsnprintf(buf + len, n - len, fmt, args); | ||
83 | if (len >= n) | ||
84 | goto bad; | ||
85 | return cleanup_path(buf); | ||
86 | bad: | ||
87 | strlcpy(buf, bad_path, n); | ||
88 | return buf; | ||
89 | } | ||
90 | |||
91 | char *perf_snpath(char *buf, size_t n, const char *fmt, ...) | ||
92 | { | ||
93 | va_list args; | ||
94 | va_start(args, fmt); | ||
95 | (void)perf_vsnpath(buf, n, fmt, args); | ||
96 | va_end(args); | ||
97 | return buf; | ||
98 | } | ||
99 | |||
100 | char *perf_pathdup(const char *fmt, ...) | ||
101 | { | ||
102 | char path[PATH_MAX]; | ||
103 | va_list args; | ||
104 | va_start(args, fmt); | ||
105 | (void)perf_vsnpath(path, sizeof(path), fmt, args); | ||
106 | va_end(args); | ||
107 | return xstrdup(path); | ||
108 | } | ||
109 | |||
110 | char *mkpath(const char *fmt, ...) | ||
111 | { | ||
112 | va_list args; | ||
113 | unsigned len; | ||
114 | char *pathname = get_pathname(); | ||
115 | |||
116 | va_start(args, fmt); | ||
117 | len = vsnprintf(pathname, PATH_MAX, fmt, args); | ||
118 | va_end(args); | ||
119 | if (len >= PATH_MAX) | ||
120 | return bad_path; | ||
121 | return cleanup_path(pathname); | ||
122 | } | ||
123 | |||
124 | char *perf_path(const char *fmt, ...) | ||
125 | { | ||
126 | const char *perf_dir = get_perf_dir(); | ||
127 | char *pathname = get_pathname(); | ||
128 | va_list args; | ||
129 | unsigned len; | ||
130 | |||
131 | len = strlen(perf_dir); | ||
132 | if (len > PATH_MAX-100) | ||
133 | return bad_path; | ||
134 | memcpy(pathname, perf_dir, len); | ||
135 | if (len && perf_dir[len-1] != '/') | ||
136 | pathname[len++] = '/'; | ||
137 | va_start(args, fmt); | ||
138 | len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); | ||
139 | va_end(args); | ||
140 | if (len >= PATH_MAX) | ||
141 | return bad_path; | ||
142 | return cleanup_path(pathname); | ||
143 | } | ||
144 | |||
145 | |||
146 | /* perf_mkstemp() - create tmp file honoring TMPDIR variable */ | ||
147 | int perf_mkstemp(char *path, size_t len, const char *template) | ||
148 | { | ||
149 | const char *tmp; | ||
150 | size_t n; | ||
151 | |||
152 | tmp = getenv("TMPDIR"); | ||
153 | if (!tmp) | ||
154 | tmp = "/tmp"; | ||
155 | n = snprintf(path, len, "%s/%s", tmp, template); | ||
156 | if (len <= n) { | ||
157 | errno = ENAMETOOLONG; | ||
158 | return -1; | ||
159 | } | ||
160 | return mkstemp(path); | ||
161 | } | ||
162 | |||
163 | |||
164 | const char *make_relative_path(const char *abs, const char *base) | ||
165 | { | ||
166 | static char buf[PATH_MAX + 1]; | ||
167 | int baselen; | ||
168 | if (!base) | ||
169 | return abs; | ||
170 | baselen = strlen(base); | ||
171 | if (prefixcmp(abs, base)) | ||
172 | return abs; | ||
173 | if (abs[baselen] == '/') | ||
174 | baselen++; | ||
175 | else if (base[baselen - 1] != '/') | ||
176 | return abs; | ||
177 | strcpy(buf, abs + baselen); | ||
178 | return buf; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * It is okay if dst == src, but they should not overlap otherwise. | ||
183 | * | ||
184 | * Performs the following normalizations on src, storing the result in dst: | ||
185 | * - Ensures that components are separated by '/' (Windows only) | ||
186 | * - Squashes sequences of '/'. | ||
187 | * - Removes "." components. | ||
188 | * - Removes ".." components, and the components the precede them. | ||
189 | * Returns failure (non-zero) if a ".." component appears as first path | ||
190 | * component anytime during the normalization. Otherwise, returns success (0). | ||
191 | * | ||
192 | * Note that this function is purely textual. It does not follow symlinks, | ||
193 | * verify the existence of the path, or make any system calls. | ||
194 | */ | ||
195 | int normalize_path_copy(char *dst, const char *src) | ||
196 | { | ||
197 | char *dst0; | ||
198 | |||
199 | if (has_dos_drive_prefix(src)) { | ||
200 | *dst++ = *src++; | ||
201 | *dst++ = *src++; | ||
202 | } | ||
203 | dst0 = dst; | ||
204 | |||
205 | if (is_dir_sep(*src)) { | ||
206 | *dst++ = '/'; | ||
207 | while (is_dir_sep(*src)) | ||
208 | src++; | ||
209 | } | ||
210 | |||
211 | for (;;) { | ||
212 | char c = *src; | ||
213 | |||
214 | /* | ||
215 | * A path component that begins with . could be | ||
216 | * special: | ||
217 | * (1) "." and ends -- ignore and terminate. | ||
218 | * (2) "./" -- ignore them, eat slash and continue. | ||
219 | * (3) ".." and ends -- strip one and terminate. | ||
220 | * (4) "../" -- strip one, eat slash and continue. | ||
221 | */ | ||
222 | if (c == '.') { | ||
223 | if (!src[1]) { | ||
224 | /* (1) */ | ||
225 | src++; | ||
226 | } else if (is_dir_sep(src[1])) { | ||
227 | /* (2) */ | ||
228 | src += 2; | ||
229 | while (is_dir_sep(*src)) | ||
230 | src++; | ||
231 | continue; | ||
232 | } else if (src[1] == '.') { | ||
233 | if (!src[2]) { | ||
234 | /* (3) */ | ||
235 | src += 2; | ||
236 | goto up_one; | ||
237 | } else if (is_dir_sep(src[2])) { | ||
238 | /* (4) */ | ||
239 | src += 3; | ||
240 | while (is_dir_sep(*src)) | ||
241 | src++; | ||
242 | goto up_one; | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* copy up to the next '/', and eat all '/' */ | ||
248 | while ((c = *src++) != '\0' && !is_dir_sep(c)) | ||
249 | *dst++ = c; | ||
250 | if (is_dir_sep(c)) { | ||
251 | *dst++ = '/'; | ||
252 | while (is_dir_sep(c)) | ||
253 | c = *src++; | ||
254 | src--; | ||
255 | } else if (!c) | ||
256 | break; | ||
257 | continue; | ||
258 | |||
259 | up_one: | ||
260 | /* | ||
261 | * dst0..dst is prefix portion, and dst[-1] is '/'; | ||
262 | * go up one level. | ||
263 | */ | ||
264 | dst--; /* go to trailing '/' */ | ||
265 | if (dst <= dst0) | ||
266 | return -1; | ||
267 | /* Windows: dst[-1] cannot be backslash anymore */ | ||
268 | while (dst0 < dst && dst[-1] != '/') | ||
269 | dst--; | ||
270 | } | ||
271 | *dst = '\0'; | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * path = Canonical absolute path | ||
277 | * prefix_list = Colon-separated list of absolute paths | ||
278 | * | ||
279 | * Determines, for each path in prefix_list, whether the "prefix" really | ||
280 | * is an ancestor directory of path. Returns the length of the longest | ||
281 | * ancestor directory, excluding any trailing slashes, or -1 if no prefix | ||
282 | * is an ancestor. (Note that this means 0 is returned if prefix_list is | ||
283 | * "/".) "/foo" is not considered an ancestor of "/foobar". Directories | ||
284 | * are not considered to be their own ancestors. path must be in a | ||
285 | * canonical form: empty components, or "." or ".." components are not | ||
286 | * allowed. prefix_list may be null, which is like "". | ||
287 | */ | ||
288 | int longest_ancestor_length(const char *path, const char *prefix_list) | ||
289 | { | ||
290 | char buf[PATH_MAX+1]; | ||
291 | const char *ceil, *colon; | ||
292 | int len, max_len = -1; | ||
293 | |||
294 | if (prefix_list == NULL || !strcmp(path, "/")) | ||
295 | return -1; | ||
296 | |||
297 | for (colon = ceil = prefix_list; *colon; ceil = colon+1) { | ||
298 | for (colon = ceil; *colon && *colon != PATH_SEP; colon++); | ||
299 | len = colon - ceil; | ||
300 | if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) | ||
301 | continue; | ||
302 | strlcpy(buf, ceil, len+1); | ||
303 | if (normalize_path_copy(buf, buf) < 0) | ||
304 | continue; | ||
305 | len = strlen(buf); | ||
306 | if (len > 0 && buf[len-1] == '/') | ||
307 | buf[--len] = '\0'; | ||
308 | |||
309 | if (!strncmp(path, buf, len) && | ||
310 | path[len] == '/' && | ||
311 | len > max_len) { | ||
312 | max_len = len; | ||
313 | } | ||
314 | } | ||
315 | |||
316 | return max_len; | ||
317 | } | ||
318 | |||
319 | /* strip arbitrary amount of directory separators at end of path */ | ||
320 | static inline int chomp_trailing_dir_sep(const char *path, int len) | ||
321 | { | ||
322 | while (len && is_dir_sep(path[len - 1])) | ||
323 | len--; | ||
324 | return len; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * If path ends with suffix (complete path components), returns the | ||
329 | * part before suffix (sans trailing directory separators). | ||
330 | * Otherwise returns NULL. | ||
331 | */ | ||
332 | char *strip_path_suffix(const char *path, const char *suffix) | ||
333 | { | ||
334 | int path_len = strlen(path), suffix_len = strlen(suffix); | ||
335 | |||
336 | while (suffix_len) { | ||
337 | if (!path_len) | ||
338 | return NULL; | ||
339 | |||
340 | if (is_dir_sep(path[path_len - 1])) { | ||
341 | if (!is_dir_sep(suffix[suffix_len - 1])) | ||
342 | return NULL; | ||
343 | path_len = chomp_trailing_dir_sep(path, path_len); | ||
344 | suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); | ||
345 | } | ||
346 | else if (path[--path_len] != suffix[--suffix_len]) | ||
347 | return NULL; | ||
348 | } | ||
349 | |||
350 | if (path_len && !is_dir_sep(path[path_len - 1])) | ||
351 | return NULL; | ||
352 | return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); | ||
353 | } | ||
diff --git a/Documentation/perf_counter/util/quote.c b/Documentation/perf_counter/util/quote.c new file mode 100644 index 000000000000..7a49fcf69671 --- /dev/null +++ b/Documentation/perf_counter/util/quote.c | |||
@@ -0,0 +1,478 @@ | |||
1 | #include "cache.h" | ||
2 | #include "quote.h" | ||
3 | |||
4 | int quote_path_fully = 1; | ||
5 | |||
6 | /* Help to copy the thing properly quoted for the shell safety. | ||
7 | * any single quote is replaced with '\'', any exclamation point | ||
8 | * is replaced with '\!', and the whole thing is enclosed in a | ||
9 | * | ||
10 | * E.g. | ||
11 | * original sq_quote result | ||
12 | * name ==> name ==> 'name' | ||
13 | * a b ==> a b ==> 'a b' | ||
14 | * a'b ==> a'\''b ==> 'a'\''b' | ||
15 | * a!b ==> a'\!'b ==> 'a'\!'b' | ||
16 | */ | ||
17 | static inline int need_bs_quote(char c) | ||
18 | { | ||
19 | return (c == '\'' || c == '!'); | ||
20 | } | ||
21 | |||
22 | void sq_quote_buf(struct strbuf *dst, const char *src) | ||
23 | { | ||
24 | char *to_free = NULL; | ||
25 | |||
26 | if (dst->buf == src) | ||
27 | to_free = strbuf_detach(dst, NULL); | ||
28 | |||
29 | strbuf_addch(dst, '\''); | ||
30 | while (*src) { | ||
31 | size_t len = strcspn(src, "'!"); | ||
32 | strbuf_add(dst, src, len); | ||
33 | src += len; | ||
34 | while (need_bs_quote(*src)) { | ||
35 | strbuf_addstr(dst, "'\\"); | ||
36 | strbuf_addch(dst, *src++); | ||
37 | strbuf_addch(dst, '\''); | ||
38 | } | ||
39 | } | ||
40 | strbuf_addch(dst, '\''); | ||
41 | free(to_free); | ||
42 | } | ||
43 | |||
44 | void sq_quote_print(FILE *stream, const char *src) | ||
45 | { | ||
46 | char c; | ||
47 | |||
48 | fputc('\'', stream); | ||
49 | while ((c = *src++)) { | ||
50 | if (need_bs_quote(c)) { | ||
51 | fputs("'\\", stream); | ||
52 | fputc(c, stream); | ||
53 | fputc('\'', stream); | ||
54 | } else { | ||
55 | fputc(c, stream); | ||
56 | } | ||
57 | } | ||
58 | fputc('\'', stream); | ||
59 | } | ||
60 | |||
61 | void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) | ||
62 | { | ||
63 | int i; | ||
64 | |||
65 | /* Copy into destination buffer. */ | ||
66 | strbuf_grow(dst, 255); | ||
67 | for (i = 0; argv[i]; ++i) { | ||
68 | strbuf_addch(dst, ' '); | ||
69 | sq_quote_buf(dst, argv[i]); | ||
70 | if (maxlen && dst->len > maxlen) | ||
71 | die("Too many or long arguments"); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | char *sq_dequote_step(char *arg, char **next) | ||
76 | { | ||
77 | char *dst = arg; | ||
78 | char *src = arg; | ||
79 | char c; | ||
80 | |||
81 | if (*src != '\'') | ||
82 | return NULL; | ||
83 | for (;;) { | ||
84 | c = *++src; | ||
85 | if (!c) | ||
86 | return NULL; | ||
87 | if (c != '\'') { | ||
88 | *dst++ = c; | ||
89 | continue; | ||
90 | } | ||
91 | /* We stepped out of sq */ | ||
92 | switch (*++src) { | ||
93 | case '\0': | ||
94 | *dst = 0; | ||
95 | if (next) | ||
96 | *next = NULL; | ||
97 | return arg; | ||
98 | case '\\': | ||
99 | c = *++src; | ||
100 | if (need_bs_quote(c) && *++src == '\'') { | ||
101 | *dst++ = c; | ||
102 | continue; | ||
103 | } | ||
104 | /* Fallthrough */ | ||
105 | default: | ||
106 | if (!next || !isspace(*src)) | ||
107 | return NULL; | ||
108 | do { | ||
109 | c = *++src; | ||
110 | } while (isspace(c)); | ||
111 | *dst = 0; | ||
112 | *next = src; | ||
113 | return arg; | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | |||
118 | char *sq_dequote(char *arg) | ||
119 | { | ||
120 | return sq_dequote_step(arg, NULL); | ||
121 | } | ||
122 | |||
123 | int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) | ||
124 | { | ||
125 | char *next = arg; | ||
126 | |||
127 | if (!*arg) | ||
128 | return 0; | ||
129 | do { | ||
130 | char *dequoted = sq_dequote_step(next, &next); | ||
131 | if (!dequoted) | ||
132 | return -1; | ||
133 | ALLOC_GROW(*argv, *nr + 1, *alloc); | ||
134 | (*argv)[(*nr)++] = dequoted; | ||
135 | } while (next); | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* 1 means: quote as octal | ||
141 | * 0 means: quote as octal if (quote_path_fully) | ||
142 | * -1 means: never quote | ||
143 | * c: quote as "\\c" | ||
144 | */ | ||
145 | #define X8(x) x, x, x, x, x, x, x, x | ||
146 | #define X16(x) X8(x), X8(x) | ||
147 | static signed char const sq_lookup[256] = { | ||
148 | /* 0 1 2 3 4 5 6 7 */ | ||
149 | /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', | ||
150 | /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, | ||
151 | /* 0x10 */ X16(1), | ||
152 | /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, | ||
153 | /* 0x28 */ X16(-1), X16(-1), X16(-1), | ||
154 | /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, | ||
155 | /* 0x60 */ X16(-1), X8(-1), | ||
156 | /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, | ||
157 | /* 0x80 */ /* set to 0 */ | ||
158 | }; | ||
159 | |||
160 | static inline int sq_must_quote(char c) | ||
161 | { | ||
162 | return sq_lookup[(unsigned char)c] + quote_path_fully > 0; | ||
163 | } | ||
164 | |||
165 | /* returns the longest prefix not needing a quote up to maxlen if positive. | ||
166 | This stops at the first \0 because it's marked as a character needing an | ||
167 | escape */ | ||
168 | static size_t next_quote_pos(const char *s, ssize_t maxlen) | ||
169 | { | ||
170 | size_t len; | ||
171 | if (maxlen < 0) { | ||
172 | for (len = 0; !sq_must_quote(s[len]); len++); | ||
173 | } else { | ||
174 | for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); | ||
175 | } | ||
176 | return len; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * C-style name quoting. | ||
181 | * | ||
182 | * (1) if sb and fp are both NULL, inspect the input name and counts the | ||
183 | * number of bytes that are needed to hold c_style quoted version of name, | ||
184 | * counting the double quotes around it but not terminating NUL, and | ||
185 | * returns it. | ||
186 | * However, if name does not need c_style quoting, it returns 0. | ||
187 | * | ||
188 | * (2) if sb or fp are not NULL, it emits the c_style quoted version | ||
189 | * of name, enclosed with double quotes if asked and needed only. | ||
190 | * Return value is the same as in (1). | ||
191 | */ | ||
192 | static size_t quote_c_style_counted(const char *name, ssize_t maxlen, | ||
193 | struct strbuf *sb, FILE *fp, int no_dq) | ||
194 | { | ||
195 | #undef EMIT | ||
196 | #define EMIT(c) \ | ||
197 | do { \ | ||
198 | if (sb) strbuf_addch(sb, (c)); \ | ||
199 | if (fp) fputc((c), fp); \ | ||
200 | count++; \ | ||
201 | } while (0) | ||
202 | #define EMITBUF(s, l) \ | ||
203 | do { \ | ||
204 | if (sb) strbuf_add(sb, (s), (l)); \ | ||
205 | if (fp) fwrite((s), (l), 1, fp); \ | ||
206 | count += (l); \ | ||
207 | } while (0) | ||
208 | |||
209 | size_t len, count = 0; | ||
210 | const char *p = name; | ||
211 | |||
212 | for (;;) { | ||
213 | int ch; | ||
214 | |||
215 | len = next_quote_pos(p, maxlen); | ||
216 | if (len == maxlen || !p[len]) | ||
217 | break; | ||
218 | |||
219 | if (!no_dq && p == name) | ||
220 | EMIT('"'); | ||
221 | |||
222 | EMITBUF(p, len); | ||
223 | EMIT('\\'); | ||
224 | p += len; | ||
225 | ch = (unsigned char)*p++; | ||
226 | if (sq_lookup[ch] >= ' ') { | ||
227 | EMIT(sq_lookup[ch]); | ||
228 | } else { | ||
229 | EMIT(((ch >> 6) & 03) + '0'); | ||
230 | EMIT(((ch >> 3) & 07) + '0'); | ||
231 | EMIT(((ch >> 0) & 07) + '0'); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | EMITBUF(p, len); | ||
236 | if (p == name) /* no ending quote needed */ | ||
237 | return 0; | ||
238 | |||
239 | if (!no_dq) | ||
240 | EMIT('"'); | ||
241 | return count; | ||
242 | } | ||
243 | |||
244 | size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) | ||
245 | { | ||
246 | return quote_c_style_counted(name, -1, sb, fp, nodq); | ||
247 | } | ||
248 | |||
249 | void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) | ||
250 | { | ||
251 | if (quote_c_style(prefix, NULL, NULL, 0) || | ||
252 | quote_c_style(path, NULL, NULL, 0)) { | ||
253 | if (!nodq) | ||
254 | strbuf_addch(sb, '"'); | ||
255 | quote_c_style(prefix, sb, NULL, 1); | ||
256 | quote_c_style(path, sb, NULL, 1); | ||
257 | if (!nodq) | ||
258 | strbuf_addch(sb, '"'); | ||
259 | } else { | ||
260 | strbuf_addstr(sb, prefix); | ||
261 | strbuf_addstr(sb, path); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | void write_name_quoted(const char *name, FILE *fp, int terminator) | ||
266 | { | ||
267 | if (terminator) { | ||
268 | quote_c_style(name, NULL, fp, 0); | ||
269 | } else { | ||
270 | fputs(name, fp); | ||
271 | } | ||
272 | fputc(terminator, fp); | ||
273 | } | ||
274 | |||
275 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
276 | const char *name, FILE *fp, int terminator) | ||
277 | { | ||
278 | int needquote = 0; | ||
279 | |||
280 | if (terminator) { | ||
281 | needquote = next_quote_pos(pfx, pfxlen) < pfxlen | ||
282 | || name[next_quote_pos(name, -1)]; | ||
283 | } | ||
284 | if (needquote) { | ||
285 | fputc('"', fp); | ||
286 | quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); | ||
287 | quote_c_style(name, NULL, fp, 1); | ||
288 | fputc('"', fp); | ||
289 | } else { | ||
290 | fwrite(pfx, pfxlen, 1, fp); | ||
291 | fputs(name, fp); | ||
292 | } | ||
293 | fputc(terminator, fp); | ||
294 | } | ||
295 | |||
296 | /* quote path as relative to the given prefix */ | ||
297 | char *quote_path_relative(const char *in, int len, | ||
298 | struct strbuf *out, const char *prefix) | ||
299 | { | ||
300 | int needquote; | ||
301 | |||
302 | if (len < 0) | ||
303 | len = strlen(in); | ||
304 | |||
305 | /* "../" prefix itself does not need quoting, but "in" might. */ | ||
306 | needquote = next_quote_pos(in, len) < len; | ||
307 | strbuf_setlen(out, 0); | ||
308 | strbuf_grow(out, len); | ||
309 | |||
310 | if (needquote) | ||
311 | strbuf_addch(out, '"'); | ||
312 | if (prefix) { | ||
313 | int off = 0; | ||
314 | while (prefix[off] && off < len && prefix[off] == in[off]) | ||
315 | if (prefix[off] == '/') { | ||
316 | prefix += off + 1; | ||
317 | in += off + 1; | ||
318 | len -= off + 1; | ||
319 | off = 0; | ||
320 | } else | ||
321 | off++; | ||
322 | |||
323 | for (; *prefix; prefix++) | ||
324 | if (*prefix == '/') | ||
325 | strbuf_addstr(out, "../"); | ||
326 | } | ||
327 | |||
328 | quote_c_style_counted (in, len, out, NULL, 1); | ||
329 | |||
330 | if (needquote) | ||
331 | strbuf_addch(out, '"'); | ||
332 | if (!out->len) | ||
333 | strbuf_addstr(out, "./"); | ||
334 | |||
335 | return out->buf; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * C-style name unquoting. | ||
340 | * | ||
341 | * Quoted should point at the opening double quote. | ||
342 | * + Returns 0 if it was able to unquote the string properly, and appends the | ||
343 | * result in the strbuf `sb'. | ||
344 | * + Returns -1 in case of error, and doesn't touch the strbuf. Though note | ||
345 | * that this function will allocate memory in the strbuf, so calling | ||
346 | * strbuf_release is mandatory whichever result unquote_c_style returns. | ||
347 | * | ||
348 | * Updates endp pointer to point at one past the ending double quote if given. | ||
349 | */ | ||
350 | int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) | ||
351 | { | ||
352 | size_t oldlen = sb->len, len; | ||
353 | int ch, ac; | ||
354 | |||
355 | if (*quoted++ != '"') | ||
356 | return -1; | ||
357 | |||
358 | for (;;) { | ||
359 | len = strcspn(quoted, "\"\\"); | ||
360 | strbuf_add(sb, quoted, len); | ||
361 | quoted += len; | ||
362 | |||
363 | switch (*quoted++) { | ||
364 | case '"': | ||
365 | if (endp) | ||
366 | *endp = quoted; | ||
367 | return 0; | ||
368 | case '\\': | ||
369 | break; | ||
370 | default: | ||
371 | goto error; | ||
372 | } | ||
373 | |||
374 | switch ((ch = *quoted++)) { | ||
375 | case 'a': ch = '\a'; break; | ||
376 | case 'b': ch = '\b'; break; | ||
377 | case 'f': ch = '\f'; break; | ||
378 | case 'n': ch = '\n'; break; | ||
379 | case 'r': ch = '\r'; break; | ||
380 | case 't': ch = '\t'; break; | ||
381 | case 'v': ch = '\v'; break; | ||
382 | |||
383 | case '\\': case '"': | ||
384 | break; /* verbatim */ | ||
385 | |||
386 | /* octal values with first digit over 4 overflow */ | ||
387 | case '0': case '1': case '2': case '3': | ||
388 | ac = ((ch - '0') << 6); | ||
389 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
390 | goto error; | ||
391 | ac |= ((ch - '0') << 3); | ||
392 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
393 | goto error; | ||
394 | ac |= (ch - '0'); | ||
395 | ch = ac; | ||
396 | break; | ||
397 | default: | ||
398 | goto error; | ||
399 | } | ||
400 | strbuf_addch(sb, ch); | ||
401 | } | ||
402 | |||
403 | error: | ||
404 | strbuf_setlen(sb, oldlen); | ||
405 | return -1; | ||
406 | } | ||
407 | |||
408 | /* quoting as a string literal for other languages */ | ||
409 | |||
410 | void perl_quote_print(FILE *stream, const char *src) | ||
411 | { | ||
412 | const char sq = '\''; | ||
413 | const char bq = '\\'; | ||
414 | char c; | ||
415 | |||
416 | fputc(sq, stream); | ||
417 | while ((c = *src++)) { | ||
418 | if (c == sq || c == bq) | ||
419 | fputc(bq, stream); | ||
420 | fputc(c, stream); | ||
421 | } | ||
422 | fputc(sq, stream); | ||
423 | } | ||
424 | |||
425 | void python_quote_print(FILE *stream, const char *src) | ||
426 | { | ||
427 | const char sq = '\''; | ||
428 | const char bq = '\\'; | ||
429 | const char nl = '\n'; | ||
430 | char c; | ||
431 | |||
432 | fputc(sq, stream); | ||
433 | while ((c = *src++)) { | ||
434 | if (c == nl) { | ||
435 | fputc(bq, stream); | ||
436 | fputc('n', stream); | ||
437 | continue; | ||
438 | } | ||
439 | if (c == sq || c == bq) | ||
440 | fputc(bq, stream); | ||
441 | fputc(c, stream); | ||
442 | } | ||
443 | fputc(sq, stream); | ||
444 | } | ||
445 | |||
446 | void tcl_quote_print(FILE *stream, const char *src) | ||
447 | { | ||
448 | char c; | ||
449 | |||
450 | fputc('"', stream); | ||
451 | while ((c = *src++)) { | ||
452 | switch (c) { | ||
453 | case '[': case ']': | ||
454 | case '{': case '}': | ||
455 | case '$': case '\\': case '"': | ||
456 | fputc('\\', stream); | ||
457 | default: | ||
458 | fputc(c, stream); | ||
459 | break; | ||
460 | case '\f': | ||
461 | fputs("\\f", stream); | ||
462 | break; | ||
463 | case '\r': | ||
464 | fputs("\\r", stream); | ||
465 | break; | ||
466 | case '\n': | ||
467 | fputs("\\n", stream); | ||
468 | break; | ||
469 | case '\t': | ||
470 | fputs("\\t", stream); | ||
471 | break; | ||
472 | case '\v': | ||
473 | fputs("\\v", stream); | ||
474 | break; | ||
475 | } | ||
476 | } | ||
477 | fputc('"', stream); | ||
478 | } | ||
diff --git a/Documentation/perf_counter/util/quote.h b/Documentation/perf_counter/util/quote.h new file mode 100644 index 000000000000..5dfad89816db --- /dev/null +++ b/Documentation/perf_counter/util/quote.h | |||
@@ -0,0 +1,68 @@ | |||
1 | #ifndef QUOTE_H | ||
2 | #define QUOTE_H | ||
3 | |||
4 | #include <stddef.h> | ||
5 | #include <stdio.h> | ||
6 | |||
7 | /* Help to copy the thing properly quoted for the shell safety. | ||
8 | * any single quote is replaced with '\'', any exclamation point | ||
9 | * is replaced with '\!', and the whole thing is enclosed in a | ||
10 | * single quote pair. | ||
11 | * | ||
12 | * For example, if you are passing the result to system() as an | ||
13 | * argument: | ||
14 | * | ||
15 | * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) | ||
16 | * | ||
17 | * would be appropriate. If the system() is going to call ssh to | ||
18 | * run the command on the other side: | ||
19 | * | ||
20 | * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); | ||
21 | * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); | ||
22 | * | ||
23 | * Note that the above examples leak memory! Remember to free result from | ||
24 | * sq_quote() in a real application. | ||
25 | * | ||
26 | * sq_quote_buf() writes to an existing buffer of specified size; it | ||
27 | * will return the number of characters that would have been written | ||
28 | * excluding the final null regardless of the buffer size. | ||
29 | */ | ||
30 | |||
31 | extern void sq_quote_print(FILE *stream, const char *src); | ||
32 | |||
33 | extern void sq_quote_buf(struct strbuf *, const char *src); | ||
34 | extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); | ||
35 | |||
36 | /* This unwraps what sq_quote() produces in place, but returns | ||
37 | * NULL if the input does not look like what sq_quote would have | ||
38 | * produced. | ||
39 | */ | ||
40 | extern char *sq_dequote(char *); | ||
41 | |||
42 | /* | ||
43 | * Same as the above, but can be used to unwrap many arguments in the | ||
44 | * same string separated by space. "next" is changed to point to the | ||
45 | * next argument that should be passed as first parameter. When there | ||
46 | * is no more argument to be dequoted, "next" is updated to point to NULL. | ||
47 | */ | ||
48 | extern char *sq_dequote_step(char *arg, char **next); | ||
49 | extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); | ||
50 | |||
51 | extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); | ||
52 | extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); | ||
53 | extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); | ||
54 | |||
55 | extern void write_name_quoted(const char *name, FILE *, int terminator); | ||
56 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
57 | const char *name, FILE *, int terminator); | ||
58 | |||
59 | /* quote path as relative to the given prefix */ | ||
60 | char *quote_path_relative(const char *in, int len, | ||
61 | struct strbuf *out, const char *prefix); | ||
62 | |||
63 | /* quoting as a string literal for other languages */ | ||
64 | extern void perl_quote_print(FILE *stream, const char *src); | ||
65 | extern void python_quote_print(FILE *stream, const char *src); | ||
66 | extern void tcl_quote_print(FILE *stream, const char *src); | ||
67 | |||
68 | #endif | ||
diff --git a/Documentation/perf_counter/util/run-command.c b/Documentation/perf_counter/util/run-command.c new file mode 100644 index 000000000000..b2f5e854f40a --- /dev/null +++ b/Documentation/perf_counter/util/run-command.c | |||
@@ -0,0 +1,395 @@ | |||
1 | #include "cache.h" | ||
2 | #include "run-command.h" | ||
3 | #include "exec_cmd.h" | ||
4 | |||
5 | static inline void close_pair(int fd[2]) | ||
6 | { | ||
7 | close(fd[0]); | ||
8 | close(fd[1]); | ||
9 | } | ||
10 | |||
11 | static inline void dup_devnull(int to) | ||
12 | { | ||
13 | int fd = open("/dev/null", O_RDWR); | ||
14 | dup2(fd, to); | ||
15 | close(fd); | ||
16 | } | ||
17 | |||
18 | int start_command(struct child_process *cmd) | ||
19 | { | ||
20 | int need_in, need_out, need_err; | ||
21 | int fdin[2], fdout[2], fderr[2]; | ||
22 | |||
23 | /* | ||
24 | * In case of errors we must keep the promise to close FDs | ||
25 | * that have been passed in via ->in and ->out. | ||
26 | */ | ||
27 | |||
28 | need_in = !cmd->no_stdin && cmd->in < 0; | ||
29 | if (need_in) { | ||
30 | if (pipe(fdin) < 0) { | ||
31 | if (cmd->out > 0) | ||
32 | close(cmd->out); | ||
33 | return -ERR_RUN_COMMAND_PIPE; | ||
34 | } | ||
35 | cmd->in = fdin[1]; | ||
36 | } | ||
37 | |||
38 | need_out = !cmd->no_stdout | ||
39 | && !cmd->stdout_to_stderr | ||
40 | && cmd->out < 0; | ||
41 | if (need_out) { | ||
42 | if (pipe(fdout) < 0) { | ||
43 | if (need_in) | ||
44 | close_pair(fdin); | ||
45 | else if (cmd->in) | ||
46 | close(cmd->in); | ||
47 | return -ERR_RUN_COMMAND_PIPE; | ||
48 | } | ||
49 | cmd->out = fdout[0]; | ||
50 | } | ||
51 | |||
52 | need_err = !cmd->no_stderr && cmd->err < 0; | ||
53 | if (need_err) { | ||
54 | if (pipe(fderr) < 0) { | ||
55 | if (need_in) | ||
56 | close_pair(fdin); | ||
57 | else if (cmd->in) | ||
58 | close(cmd->in); | ||
59 | if (need_out) | ||
60 | close_pair(fdout); | ||
61 | else if (cmd->out) | ||
62 | close(cmd->out); | ||
63 | return -ERR_RUN_COMMAND_PIPE; | ||
64 | } | ||
65 | cmd->err = fderr[0]; | ||
66 | } | ||
67 | |||
68 | #ifndef __MINGW32__ | ||
69 | fflush(NULL); | ||
70 | cmd->pid = fork(); | ||
71 | if (!cmd->pid) { | ||
72 | if (cmd->no_stdin) | ||
73 | dup_devnull(0); | ||
74 | else if (need_in) { | ||
75 | dup2(fdin[0], 0); | ||
76 | close_pair(fdin); | ||
77 | } else if (cmd->in) { | ||
78 | dup2(cmd->in, 0); | ||
79 | close(cmd->in); | ||
80 | } | ||
81 | |||
82 | if (cmd->no_stderr) | ||
83 | dup_devnull(2); | ||
84 | else if (need_err) { | ||
85 | dup2(fderr[1], 2); | ||
86 | close_pair(fderr); | ||
87 | } | ||
88 | |||
89 | if (cmd->no_stdout) | ||
90 | dup_devnull(1); | ||
91 | else if (cmd->stdout_to_stderr) | ||
92 | dup2(2, 1); | ||
93 | else if (need_out) { | ||
94 | dup2(fdout[1], 1); | ||
95 | close_pair(fdout); | ||
96 | } else if (cmd->out > 1) { | ||
97 | dup2(cmd->out, 1); | ||
98 | close(cmd->out); | ||
99 | } | ||
100 | |||
101 | if (cmd->dir && chdir(cmd->dir)) | ||
102 | die("exec %s: cd to %s failed (%s)", cmd->argv[0], | ||
103 | cmd->dir, strerror(errno)); | ||
104 | if (cmd->env) { | ||
105 | for (; *cmd->env; cmd->env++) { | ||
106 | if (strchr(*cmd->env, '=')) | ||
107 | putenv((char*)*cmd->env); | ||
108 | else | ||
109 | unsetenv(*cmd->env); | ||
110 | } | ||
111 | } | ||
112 | if (cmd->preexec_cb) | ||
113 | cmd->preexec_cb(); | ||
114 | if (cmd->perf_cmd) { | ||
115 | execv_perf_cmd(cmd->argv); | ||
116 | } else { | ||
117 | execvp(cmd->argv[0], (char *const*) cmd->argv); | ||
118 | } | ||
119 | exit(127); | ||
120 | } | ||
121 | #else | ||
122 | int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ | ||
123 | const char **sargv = cmd->argv; | ||
124 | char **env = environ; | ||
125 | |||
126 | if (cmd->no_stdin) { | ||
127 | s0 = dup(0); | ||
128 | dup_devnull(0); | ||
129 | } else if (need_in) { | ||
130 | s0 = dup(0); | ||
131 | dup2(fdin[0], 0); | ||
132 | } else if (cmd->in) { | ||
133 | s0 = dup(0); | ||
134 | dup2(cmd->in, 0); | ||
135 | } | ||
136 | |||
137 | if (cmd->no_stderr) { | ||
138 | s2 = dup(2); | ||
139 | dup_devnull(2); | ||
140 | } else if (need_err) { | ||
141 | s2 = dup(2); | ||
142 | dup2(fderr[1], 2); | ||
143 | } | ||
144 | |||
145 | if (cmd->no_stdout) { | ||
146 | s1 = dup(1); | ||
147 | dup_devnull(1); | ||
148 | } else if (cmd->stdout_to_stderr) { | ||
149 | s1 = dup(1); | ||
150 | dup2(2, 1); | ||
151 | } else if (need_out) { | ||
152 | s1 = dup(1); | ||
153 | dup2(fdout[1], 1); | ||
154 | } else if (cmd->out > 1) { | ||
155 | s1 = dup(1); | ||
156 | dup2(cmd->out, 1); | ||
157 | } | ||
158 | |||
159 | if (cmd->dir) | ||
160 | die("chdir in start_command() not implemented"); | ||
161 | if (cmd->env) { | ||
162 | env = copy_environ(); | ||
163 | for (; *cmd->env; cmd->env++) | ||
164 | env = env_setenv(env, *cmd->env); | ||
165 | } | ||
166 | |||
167 | if (cmd->perf_cmd) { | ||
168 | cmd->argv = prepare_perf_cmd(cmd->argv); | ||
169 | } | ||
170 | |||
171 | cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); | ||
172 | |||
173 | if (cmd->env) | ||
174 | free_environ(env); | ||
175 | if (cmd->perf_cmd) | ||
176 | free(cmd->argv); | ||
177 | |||
178 | cmd->argv = sargv; | ||
179 | if (s0 >= 0) | ||
180 | dup2(s0, 0), close(s0); | ||
181 | if (s1 >= 0) | ||
182 | dup2(s1, 1), close(s1); | ||
183 | if (s2 >= 0) | ||
184 | dup2(s2, 2), close(s2); | ||
185 | #endif | ||
186 | |||
187 | if (cmd->pid < 0) { | ||
188 | int err = errno; | ||
189 | if (need_in) | ||
190 | close_pair(fdin); | ||
191 | else if (cmd->in) | ||
192 | close(cmd->in); | ||
193 | if (need_out) | ||
194 | close_pair(fdout); | ||
195 | else if (cmd->out) | ||
196 | close(cmd->out); | ||
197 | if (need_err) | ||
198 | close_pair(fderr); | ||
199 | return err == ENOENT ? | ||
200 | -ERR_RUN_COMMAND_EXEC : | ||
201 | -ERR_RUN_COMMAND_FORK; | ||
202 | } | ||
203 | |||
204 | if (need_in) | ||
205 | close(fdin[0]); | ||
206 | else if (cmd->in) | ||
207 | close(cmd->in); | ||
208 | |||
209 | if (need_out) | ||
210 | close(fdout[1]); | ||
211 | else if (cmd->out) | ||
212 | close(cmd->out); | ||
213 | |||
214 | if (need_err) | ||
215 | close(fderr[1]); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static int wait_or_whine(pid_t pid) | ||
221 | { | ||
222 | for (;;) { | ||
223 | int status, code; | ||
224 | pid_t waiting = waitpid(pid, &status, 0); | ||
225 | |||
226 | if (waiting < 0) { | ||
227 | if (errno == EINTR) | ||
228 | continue; | ||
229 | error("waitpid failed (%s)", strerror(errno)); | ||
230 | return -ERR_RUN_COMMAND_WAITPID; | ||
231 | } | ||
232 | if (waiting != pid) | ||
233 | return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; | ||
234 | if (WIFSIGNALED(status)) | ||
235 | return -ERR_RUN_COMMAND_WAITPID_SIGNAL; | ||
236 | |||
237 | if (!WIFEXITED(status)) | ||
238 | return -ERR_RUN_COMMAND_WAITPID_NOEXIT; | ||
239 | code = WEXITSTATUS(status); | ||
240 | switch (code) { | ||
241 | case 127: | ||
242 | return -ERR_RUN_COMMAND_EXEC; | ||
243 | case 0: | ||
244 | return 0; | ||
245 | default: | ||
246 | return -code; | ||
247 | } | ||
248 | } | ||
249 | } | ||
250 | |||
251 | int finish_command(struct child_process *cmd) | ||
252 | { | ||
253 | return wait_or_whine(cmd->pid); | ||
254 | } | ||
255 | |||
256 | int run_command(struct child_process *cmd) | ||
257 | { | ||
258 | int code = start_command(cmd); | ||
259 | if (code) | ||
260 | return code; | ||
261 | return finish_command(cmd); | ||
262 | } | ||
263 | |||
264 | static void prepare_run_command_v_opt(struct child_process *cmd, | ||
265 | const char **argv, | ||
266 | int opt) | ||
267 | { | ||
268 | memset(cmd, 0, sizeof(*cmd)); | ||
269 | cmd->argv = argv; | ||
270 | cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; | ||
271 | cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; | ||
272 | cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; | ||
273 | } | ||
274 | |||
275 | int run_command_v_opt(const char **argv, int opt) | ||
276 | { | ||
277 | struct child_process cmd; | ||
278 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
279 | return run_command(&cmd); | ||
280 | } | ||
281 | |||
282 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) | ||
283 | { | ||
284 | struct child_process cmd; | ||
285 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
286 | cmd.dir = dir; | ||
287 | cmd.env = env; | ||
288 | return run_command(&cmd); | ||
289 | } | ||
290 | |||
291 | #ifdef __MINGW32__ | ||
292 | static __stdcall unsigned run_thread(void *data) | ||
293 | { | ||
294 | struct async *async = data; | ||
295 | return async->proc(async->fd_for_proc, async->data); | ||
296 | } | ||
297 | #endif | ||
298 | |||
299 | int start_async(struct async *async) | ||
300 | { | ||
301 | int pipe_out[2]; | ||
302 | |||
303 | if (pipe(pipe_out) < 0) | ||
304 | return error("cannot create pipe: %s", strerror(errno)); | ||
305 | async->out = pipe_out[0]; | ||
306 | |||
307 | #ifndef __MINGW32__ | ||
308 | /* Flush stdio before fork() to avoid cloning buffers */ | ||
309 | fflush(NULL); | ||
310 | |||
311 | async->pid = fork(); | ||
312 | if (async->pid < 0) { | ||
313 | error("fork (async) failed: %s", strerror(errno)); | ||
314 | close_pair(pipe_out); | ||
315 | return -1; | ||
316 | } | ||
317 | if (!async->pid) { | ||
318 | close(pipe_out[0]); | ||
319 | exit(!!async->proc(pipe_out[1], async->data)); | ||
320 | } | ||
321 | close(pipe_out[1]); | ||
322 | #else | ||
323 | async->fd_for_proc = pipe_out[1]; | ||
324 | async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); | ||
325 | if (!async->tid) { | ||
326 | error("cannot create thread: %s", strerror(errno)); | ||
327 | close_pair(pipe_out); | ||
328 | return -1; | ||
329 | } | ||
330 | #endif | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | int finish_async(struct async *async) | ||
335 | { | ||
336 | #ifndef __MINGW32__ | ||
337 | int ret = 0; | ||
338 | |||
339 | if (wait_or_whine(async->pid)) | ||
340 | ret = error("waitpid (async) failed"); | ||
341 | #else | ||
342 | DWORD ret = 0; | ||
343 | if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) | ||
344 | ret = error("waiting for thread failed: %lu", GetLastError()); | ||
345 | else if (!GetExitCodeThread(async->tid, &ret)) | ||
346 | ret = error("cannot get thread exit code: %lu", GetLastError()); | ||
347 | CloseHandle(async->tid); | ||
348 | #endif | ||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | int run_hook(const char *index_file, const char *name, ...) | ||
353 | { | ||
354 | struct child_process hook; | ||
355 | const char **argv = NULL, *env[2]; | ||
356 | char index[PATH_MAX]; | ||
357 | va_list args; | ||
358 | int ret; | ||
359 | size_t i = 0, alloc = 0; | ||
360 | |||
361 | if (access(perf_path("hooks/%s", name), X_OK) < 0) | ||
362 | return 0; | ||
363 | |||
364 | va_start(args, name); | ||
365 | ALLOC_GROW(argv, i + 1, alloc); | ||
366 | argv[i++] = perf_path("hooks/%s", name); | ||
367 | while (argv[i-1]) { | ||
368 | ALLOC_GROW(argv, i + 1, alloc); | ||
369 | argv[i++] = va_arg(args, const char *); | ||
370 | } | ||
371 | va_end(args); | ||
372 | |||
373 | memset(&hook, 0, sizeof(hook)); | ||
374 | hook.argv = argv; | ||
375 | hook.no_stdin = 1; | ||
376 | hook.stdout_to_stderr = 1; | ||
377 | if (index_file) { | ||
378 | snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); | ||
379 | env[0] = index; | ||
380 | env[1] = NULL; | ||
381 | hook.env = env; | ||
382 | } | ||
383 | |||
384 | ret = start_command(&hook); | ||
385 | free(argv); | ||
386 | if (ret) { | ||
387 | warning("Could not spawn %s", argv[0]); | ||
388 | return ret; | ||
389 | } | ||
390 | ret = finish_command(&hook); | ||
391 | if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) | ||
392 | warning("%s exited due to uncaught signal", argv[0]); | ||
393 | |||
394 | return ret; | ||
395 | } | ||
diff --git a/Documentation/perf_counter/util/run-command.h b/Documentation/perf_counter/util/run-command.h new file mode 100644 index 000000000000..328289f23669 --- /dev/null +++ b/Documentation/perf_counter/util/run-command.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef RUN_COMMAND_H | ||
2 | #define RUN_COMMAND_H | ||
3 | |||
4 | enum { | ||
5 | ERR_RUN_COMMAND_FORK = 10000, | ||
6 | ERR_RUN_COMMAND_EXEC, | ||
7 | ERR_RUN_COMMAND_PIPE, | ||
8 | ERR_RUN_COMMAND_WAITPID, | ||
9 | ERR_RUN_COMMAND_WAITPID_WRONG_PID, | ||
10 | ERR_RUN_COMMAND_WAITPID_SIGNAL, | ||
11 | ERR_RUN_COMMAND_WAITPID_NOEXIT, | ||
12 | }; | ||
13 | #define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) | ||
14 | |||
15 | struct child_process { | ||
16 | const char **argv; | ||
17 | pid_t pid; | ||
18 | /* | ||
19 | * Using .in, .out, .err: | ||
20 | * - Specify 0 for no redirections (child inherits stdin, stdout, | ||
21 | * stderr from parent). | ||
22 | * - Specify -1 to have a pipe allocated as follows: | ||
23 | * .in: returns the writable pipe end; parent writes to it, | ||
24 | * the readable pipe end becomes child's stdin | ||
25 | * .out, .err: returns the readable pipe end; parent reads from | ||
26 | * it, the writable pipe end becomes child's stdout/stderr | ||
27 | * The caller of start_command() must close the returned FDs | ||
28 | * after it has completed reading from/writing to it! | ||
29 | * - Specify > 0 to set a channel to a particular FD as follows: | ||
30 | * .in: a readable FD, becomes child's stdin | ||
31 | * .out: a writable FD, becomes child's stdout/stderr | ||
32 | * .err > 0 not supported | ||
33 | * The specified FD is closed by start_command(), even in case | ||
34 | * of errors! | ||
35 | */ | ||
36 | int in; | ||
37 | int out; | ||
38 | int err; | ||
39 | const char *dir; | ||
40 | const char *const *env; | ||
41 | unsigned no_stdin:1; | ||
42 | unsigned no_stdout:1; | ||
43 | unsigned no_stderr:1; | ||
44 | unsigned perf_cmd:1; /* if this is to be perf sub-command */ | ||
45 | unsigned stdout_to_stderr:1; | ||
46 | void (*preexec_cb)(void); | ||
47 | }; | ||
48 | |||
49 | int start_command(struct child_process *); | ||
50 | int finish_command(struct child_process *); | ||
51 | int run_command(struct child_process *); | ||
52 | |||
53 | extern int run_hook(const char *index_file, const char *name, ...); | ||
54 | |||
55 | #define RUN_COMMAND_NO_STDIN 1 | ||
56 | #define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ | ||
57 | #define RUN_COMMAND_STDOUT_TO_STDERR 4 | ||
58 | int run_command_v_opt(const char **argv, int opt); | ||
59 | |||
60 | /* | ||
61 | * env (the environment) is to be formatted like environ: "VAR=VALUE". | ||
62 | * To unset an environment variable use just "VAR". | ||
63 | */ | ||
64 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); | ||
65 | |||
66 | /* | ||
67 | * The purpose of the following functions is to feed a pipe by running | ||
68 | * a function asynchronously and providing output that the caller reads. | ||
69 | * | ||
70 | * It is expected that no synchronization and mutual exclusion between | ||
71 | * the caller and the feed function is necessary so that the function | ||
72 | * can run in a thread without interfering with the caller. | ||
73 | */ | ||
74 | struct async { | ||
75 | /* | ||
76 | * proc writes to fd and closes it; | ||
77 | * returns 0 on success, non-zero on failure | ||
78 | */ | ||
79 | int (*proc)(int fd, void *data); | ||
80 | void *data; | ||
81 | int out; /* caller reads from here and closes it */ | ||
82 | #ifndef __MINGW32__ | ||
83 | pid_t pid; | ||
84 | #else | ||
85 | HANDLE tid; | ||
86 | int fd_for_proc; | ||
87 | #endif | ||
88 | }; | ||
89 | |||
90 | int start_async(struct async *async); | ||
91 | int finish_async(struct async *async); | ||
92 | |||
93 | #endif | ||
diff --git a/Documentation/perf_counter/util/strbuf.c b/Documentation/perf_counter/util/strbuf.c new file mode 100644 index 000000000000..eaba09306802 --- /dev/null +++ b/Documentation/perf_counter/util/strbuf.c | |||
@@ -0,0 +1,359 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | int prefixcmp(const char *str, const char *prefix) | ||
4 | { | ||
5 | for (; ; str++, prefix++) | ||
6 | if (!*prefix) | ||
7 | return 0; | ||
8 | else if (*str != *prefix) | ||
9 | return (unsigned char)*prefix - (unsigned char)*str; | ||
10 | } | ||
11 | |||
12 | /* | ||
13 | * Used as the default ->buf value, so that people can always assume | ||
14 | * buf is non NULL and ->buf is NUL terminated even for a freshly | ||
15 | * initialized strbuf. | ||
16 | */ | ||
17 | char strbuf_slopbuf[1]; | ||
18 | |||
19 | void strbuf_init(struct strbuf *sb, size_t hint) | ||
20 | { | ||
21 | sb->alloc = sb->len = 0; | ||
22 | sb->buf = strbuf_slopbuf; | ||
23 | if (hint) | ||
24 | strbuf_grow(sb, hint); | ||
25 | } | ||
26 | |||
27 | void strbuf_release(struct strbuf *sb) | ||
28 | { | ||
29 | if (sb->alloc) { | ||
30 | free(sb->buf); | ||
31 | strbuf_init(sb, 0); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | char *strbuf_detach(struct strbuf *sb, size_t *sz) | ||
36 | { | ||
37 | char *res = sb->alloc ? sb->buf : NULL; | ||
38 | if (sz) | ||
39 | *sz = sb->len; | ||
40 | strbuf_init(sb, 0); | ||
41 | return res; | ||
42 | } | ||
43 | |||
44 | void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) | ||
45 | { | ||
46 | strbuf_release(sb); | ||
47 | sb->buf = buf; | ||
48 | sb->len = len; | ||
49 | sb->alloc = alloc; | ||
50 | strbuf_grow(sb, 0); | ||
51 | sb->buf[sb->len] = '\0'; | ||
52 | } | ||
53 | |||
54 | void strbuf_grow(struct strbuf *sb, size_t extra) | ||
55 | { | ||
56 | if (sb->len + extra + 1 <= sb->len) | ||
57 | die("you want to use way too much memory"); | ||
58 | if (!sb->alloc) | ||
59 | sb->buf = NULL; | ||
60 | ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); | ||
61 | } | ||
62 | |||
63 | void strbuf_trim(struct strbuf *sb) | ||
64 | { | ||
65 | char *b = sb->buf; | ||
66 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
67 | sb->len--; | ||
68 | while (sb->len > 0 && isspace(*b)) { | ||
69 | b++; | ||
70 | sb->len--; | ||
71 | } | ||
72 | memmove(sb->buf, b, sb->len); | ||
73 | sb->buf[sb->len] = '\0'; | ||
74 | } | ||
75 | void strbuf_rtrim(struct strbuf *sb) | ||
76 | { | ||
77 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
78 | sb->len--; | ||
79 | sb->buf[sb->len] = '\0'; | ||
80 | } | ||
81 | |||
82 | void strbuf_ltrim(struct strbuf *sb) | ||
83 | { | ||
84 | char *b = sb->buf; | ||
85 | while (sb->len > 0 && isspace(*b)) { | ||
86 | b++; | ||
87 | sb->len--; | ||
88 | } | ||
89 | memmove(sb->buf, b, sb->len); | ||
90 | sb->buf[sb->len] = '\0'; | ||
91 | } | ||
92 | |||
93 | void strbuf_tolower(struct strbuf *sb) | ||
94 | { | ||
95 | int i; | ||
96 | for (i = 0; i < sb->len; i++) | ||
97 | sb->buf[i] = tolower(sb->buf[i]); | ||
98 | } | ||
99 | |||
100 | struct strbuf **strbuf_split(const struct strbuf *sb, int delim) | ||
101 | { | ||
102 | int alloc = 2, pos = 0; | ||
103 | char *n, *p; | ||
104 | struct strbuf **ret; | ||
105 | struct strbuf *t; | ||
106 | |||
107 | ret = calloc(alloc, sizeof(struct strbuf *)); | ||
108 | p = n = sb->buf; | ||
109 | while (n < sb->buf + sb->len) { | ||
110 | int len; | ||
111 | n = memchr(n, delim, sb->len - (n - sb->buf)); | ||
112 | if (pos + 1 >= alloc) { | ||
113 | alloc = alloc * 2; | ||
114 | ret = realloc(ret, sizeof(struct strbuf *) * alloc); | ||
115 | } | ||
116 | if (!n) | ||
117 | n = sb->buf + sb->len - 1; | ||
118 | len = n - p + 1; | ||
119 | t = malloc(sizeof(struct strbuf)); | ||
120 | strbuf_init(t, len); | ||
121 | strbuf_add(t, p, len); | ||
122 | ret[pos] = t; | ||
123 | ret[++pos] = NULL; | ||
124 | p = ++n; | ||
125 | } | ||
126 | return ret; | ||
127 | } | ||
128 | |||
129 | void strbuf_list_free(struct strbuf **sbs) | ||
130 | { | ||
131 | struct strbuf **s = sbs; | ||
132 | |||
133 | while (*s) { | ||
134 | strbuf_release(*s); | ||
135 | free(*s++); | ||
136 | } | ||
137 | free(sbs); | ||
138 | } | ||
139 | |||
140 | int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) | ||
141 | { | ||
142 | int len = a->len < b->len ? a->len: b->len; | ||
143 | int cmp = memcmp(a->buf, b->buf, len); | ||
144 | if (cmp) | ||
145 | return cmp; | ||
146 | return a->len < b->len ? -1: a->len != b->len; | ||
147 | } | ||
148 | |||
149 | void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, | ||
150 | const void *data, size_t dlen) | ||
151 | { | ||
152 | if (pos + len < pos) | ||
153 | die("you want to use way too much memory"); | ||
154 | if (pos > sb->len) | ||
155 | die("`pos' is too far after the end of the buffer"); | ||
156 | if (pos + len > sb->len) | ||
157 | die("`pos + len' is too far after the end of the buffer"); | ||
158 | |||
159 | if (dlen >= len) | ||
160 | strbuf_grow(sb, dlen - len); | ||
161 | memmove(sb->buf + pos + dlen, | ||
162 | sb->buf + pos + len, | ||
163 | sb->len - pos - len); | ||
164 | memcpy(sb->buf + pos, data, dlen); | ||
165 | strbuf_setlen(sb, sb->len + dlen - len); | ||
166 | } | ||
167 | |||
168 | void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) | ||
169 | { | ||
170 | strbuf_splice(sb, pos, 0, data, len); | ||
171 | } | ||
172 | |||
173 | void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) | ||
174 | { | ||
175 | strbuf_splice(sb, pos, len, NULL, 0); | ||
176 | } | ||
177 | |||
178 | void strbuf_add(struct strbuf *sb, const void *data, size_t len) | ||
179 | { | ||
180 | strbuf_grow(sb, len); | ||
181 | memcpy(sb->buf + sb->len, data, len); | ||
182 | strbuf_setlen(sb, sb->len + len); | ||
183 | } | ||
184 | |||
185 | void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) | ||
186 | { | ||
187 | strbuf_grow(sb, len); | ||
188 | memcpy(sb->buf + sb->len, sb->buf + pos, len); | ||
189 | strbuf_setlen(sb, sb->len + len); | ||
190 | } | ||
191 | |||
192 | void strbuf_addf(struct strbuf *sb, const char *fmt, ...) | ||
193 | { | ||
194 | int len; | ||
195 | va_list ap; | ||
196 | |||
197 | if (!strbuf_avail(sb)) | ||
198 | strbuf_grow(sb, 64); | ||
199 | va_start(ap, fmt); | ||
200 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
201 | va_end(ap); | ||
202 | if (len < 0) | ||
203 | die("your vsnprintf is broken"); | ||
204 | if (len > strbuf_avail(sb)) { | ||
205 | strbuf_grow(sb, len); | ||
206 | va_start(ap, fmt); | ||
207 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
208 | va_end(ap); | ||
209 | if (len > strbuf_avail(sb)) { | ||
210 | die("this should not happen, your snprintf is broken"); | ||
211 | } | ||
212 | } | ||
213 | strbuf_setlen(sb, sb->len + len); | ||
214 | } | ||
215 | |||
216 | void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, | ||
217 | void *context) | ||
218 | { | ||
219 | for (;;) { | ||
220 | const char *percent; | ||
221 | size_t consumed; | ||
222 | |||
223 | percent = strchrnul(format, '%'); | ||
224 | strbuf_add(sb, format, percent - format); | ||
225 | if (!*percent) | ||
226 | break; | ||
227 | format = percent + 1; | ||
228 | |||
229 | consumed = fn(sb, format, context); | ||
230 | if (consumed) | ||
231 | format += consumed; | ||
232 | else | ||
233 | strbuf_addch(sb, '%'); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, | ||
238 | void *context) | ||
239 | { | ||
240 | struct strbuf_expand_dict_entry *e = context; | ||
241 | size_t len; | ||
242 | |||
243 | for (; e->placeholder && (len = strlen(e->placeholder)); e++) { | ||
244 | if (!strncmp(placeholder, e->placeholder, len)) { | ||
245 | if (e->value) | ||
246 | strbuf_addstr(sb, e->value); | ||
247 | return len; | ||
248 | } | ||
249 | } | ||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) | ||
254 | { | ||
255 | size_t res; | ||
256 | size_t oldalloc = sb->alloc; | ||
257 | |||
258 | strbuf_grow(sb, size); | ||
259 | res = fread(sb->buf + sb->len, 1, size, f); | ||
260 | if (res > 0) | ||
261 | strbuf_setlen(sb, sb->len + res); | ||
262 | else if (res < 0 && oldalloc == 0) | ||
263 | strbuf_release(sb); | ||
264 | return res; | ||
265 | } | ||
266 | |||
267 | ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) | ||
268 | { | ||
269 | size_t oldlen = sb->len; | ||
270 | size_t oldalloc = sb->alloc; | ||
271 | |||
272 | strbuf_grow(sb, hint ? hint : 8192); | ||
273 | for (;;) { | ||
274 | ssize_t cnt; | ||
275 | |||
276 | cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); | ||
277 | if (cnt < 0) { | ||
278 | if (oldalloc == 0) | ||
279 | strbuf_release(sb); | ||
280 | else | ||
281 | strbuf_setlen(sb, oldlen); | ||
282 | return -1; | ||
283 | } | ||
284 | if (!cnt) | ||
285 | break; | ||
286 | sb->len += cnt; | ||
287 | strbuf_grow(sb, 8192); | ||
288 | } | ||
289 | |||
290 | sb->buf[sb->len] = '\0'; | ||
291 | return sb->len - oldlen; | ||
292 | } | ||
293 | |||
294 | #define STRBUF_MAXLINK (2*PATH_MAX) | ||
295 | |||
296 | int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) | ||
297 | { | ||
298 | size_t oldalloc = sb->alloc; | ||
299 | |||
300 | if (hint < 32) | ||
301 | hint = 32; | ||
302 | |||
303 | while (hint < STRBUF_MAXLINK) { | ||
304 | int len; | ||
305 | |||
306 | strbuf_grow(sb, hint); | ||
307 | len = readlink(path, sb->buf, hint); | ||
308 | if (len < 0) { | ||
309 | if (errno != ERANGE) | ||
310 | break; | ||
311 | } else if (len < hint) { | ||
312 | strbuf_setlen(sb, len); | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* .. the buffer was too small - try again */ | ||
317 | hint *= 2; | ||
318 | } | ||
319 | if (oldalloc == 0) | ||
320 | strbuf_release(sb); | ||
321 | return -1; | ||
322 | } | ||
323 | |||
324 | int strbuf_getline(struct strbuf *sb, FILE *fp, int term) | ||
325 | { | ||
326 | int ch; | ||
327 | |||
328 | strbuf_grow(sb, 0); | ||
329 | if (feof(fp)) | ||
330 | return EOF; | ||
331 | |||
332 | strbuf_reset(sb); | ||
333 | while ((ch = fgetc(fp)) != EOF) { | ||
334 | if (ch == term) | ||
335 | break; | ||
336 | strbuf_grow(sb, 1); | ||
337 | sb->buf[sb->len++] = ch; | ||
338 | } | ||
339 | if (ch == EOF && sb->len == 0) | ||
340 | return EOF; | ||
341 | |||
342 | sb->buf[sb->len] = '\0'; | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) | ||
347 | { | ||
348 | int fd, len; | ||
349 | |||
350 | fd = open(path, O_RDONLY); | ||
351 | if (fd < 0) | ||
352 | return -1; | ||
353 | len = strbuf_read(sb, fd, hint); | ||
354 | close(fd); | ||
355 | if (len < 0) | ||
356 | return -1; | ||
357 | |||
358 | return len; | ||
359 | } | ||
diff --git a/Documentation/perf_counter/util/strbuf.h b/Documentation/perf_counter/util/strbuf.h new file mode 100644 index 000000000000..9ee908a3ec5d --- /dev/null +++ b/Documentation/perf_counter/util/strbuf.h | |||
@@ -0,0 +1,137 @@ | |||
1 | #ifndef STRBUF_H | ||
2 | #define STRBUF_H | ||
3 | |||
4 | /* | ||
5 | * Strbuf's can be use in many ways: as a byte array, or to store arbitrary | ||
6 | * long, overflow safe strings. | ||
7 | * | ||
8 | * Strbufs has some invariants that are very important to keep in mind: | ||
9 | * | ||
10 | * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to | ||
11 | * build complex strings/buffers whose final size isn't easily known. | ||
12 | * | ||
13 | * It is NOT legal to copy the ->buf pointer away. | ||
14 | * `strbuf_detach' is the operation that detachs a buffer from its shell | ||
15 | * while keeping the shell valid wrt its invariants. | ||
16 | * | ||
17 | * 2. the ->buf member is a byte array that has at least ->len + 1 bytes | ||
18 | * allocated. The extra byte is used to store a '\0', allowing the ->buf | ||
19 | * member to be a valid C-string. Every strbuf function ensure this | ||
20 | * invariant is preserved. | ||
21 | * | ||
22 | * Note that it is OK to "play" with the buffer directly if you work it | ||
23 | * that way: | ||
24 | * | ||
25 | * strbuf_grow(sb, SOME_SIZE); | ||
26 | * ... Here, the memory array starting at sb->buf, and of length | ||
27 | * ... strbuf_avail(sb) is all yours, and you are sure that | ||
28 | * ... strbuf_avail(sb) is at least SOME_SIZE. | ||
29 | * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); | ||
30 | * | ||
31 | * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). | ||
32 | * | ||
33 | * Doing so is safe, though if it has to be done in many places, adding the | ||
34 | * missing API to the strbuf module is the way to go. | ||
35 | * | ||
36 | * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 | ||
37 | * even if it's true in the current implementation. Alloc is somehow a | ||
38 | * "private" member that should not be messed with. | ||
39 | */ | ||
40 | |||
41 | #include <assert.h> | ||
42 | |||
43 | extern char strbuf_slopbuf[]; | ||
44 | struct strbuf { | ||
45 | size_t alloc; | ||
46 | size_t len; | ||
47 | char *buf; | ||
48 | }; | ||
49 | |||
50 | #define STRBUF_INIT { 0, 0, strbuf_slopbuf } | ||
51 | |||
52 | /*----- strbuf life cycle -----*/ | ||
53 | extern void strbuf_init(struct strbuf *, size_t); | ||
54 | extern void strbuf_release(struct strbuf *); | ||
55 | extern char *strbuf_detach(struct strbuf *, size_t *); | ||
56 | extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); | ||
57 | static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { | ||
58 | struct strbuf tmp = *a; | ||
59 | *a = *b; | ||
60 | *b = tmp; | ||
61 | } | ||
62 | |||
63 | /*----- strbuf size related -----*/ | ||
64 | static inline size_t strbuf_avail(const struct strbuf *sb) { | ||
65 | return sb->alloc ? sb->alloc - sb->len - 1 : 0; | ||
66 | } | ||
67 | |||
68 | extern void strbuf_grow(struct strbuf *, size_t); | ||
69 | |||
70 | static inline void strbuf_setlen(struct strbuf *sb, size_t len) { | ||
71 | if (!sb->alloc) | ||
72 | strbuf_grow(sb, 0); | ||
73 | assert(len < sb->alloc); | ||
74 | sb->len = len; | ||
75 | sb->buf[len] = '\0'; | ||
76 | } | ||
77 | #define strbuf_reset(sb) strbuf_setlen(sb, 0) | ||
78 | |||
79 | /*----- content related -----*/ | ||
80 | extern void strbuf_trim(struct strbuf *); | ||
81 | extern void strbuf_rtrim(struct strbuf *); | ||
82 | extern void strbuf_ltrim(struct strbuf *); | ||
83 | extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); | ||
84 | extern void strbuf_tolower(struct strbuf *); | ||
85 | |||
86 | extern struct strbuf **strbuf_split(const struct strbuf *, int delim); | ||
87 | extern void strbuf_list_free(struct strbuf **); | ||
88 | |||
89 | /*----- add data in your buffer -----*/ | ||
90 | static inline void strbuf_addch(struct strbuf *sb, int c) { | ||
91 | strbuf_grow(sb, 1); | ||
92 | sb->buf[sb->len++] = c; | ||
93 | sb->buf[sb->len] = '\0'; | ||
94 | } | ||
95 | |||
96 | extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); | ||
97 | extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); | ||
98 | |||
99 | /* splice pos..pos+len with given data */ | ||
100 | extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, | ||
101 | const void *, size_t); | ||
102 | |||
103 | extern void strbuf_add(struct strbuf *, const void *, size_t); | ||
104 | static inline void strbuf_addstr(struct strbuf *sb, const char *s) { | ||
105 | strbuf_add(sb, s, strlen(s)); | ||
106 | } | ||
107 | static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { | ||
108 | strbuf_add(sb, sb2->buf, sb2->len); | ||
109 | } | ||
110 | extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); | ||
111 | |||
112 | typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); | ||
113 | extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); | ||
114 | struct strbuf_expand_dict_entry { | ||
115 | const char *placeholder; | ||
116 | const char *value; | ||
117 | }; | ||
118 | extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); | ||
119 | |||
120 | __attribute__((format(printf,2,3))) | ||
121 | extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); | ||
122 | |||
123 | extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); | ||
124 | /* XXX: if read fails, any partial read is undone */ | ||
125 | extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); | ||
126 | extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); | ||
127 | extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); | ||
128 | |||
129 | extern int strbuf_getline(struct strbuf *, FILE *, int); | ||
130 | |||
131 | extern void stripspace(struct strbuf *buf, int skip_comments); | ||
132 | extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); | ||
133 | |||
134 | extern int strbuf_branchname(struct strbuf *sb, const char *name); | ||
135 | extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); | ||
136 | |||
137 | #endif /* STRBUF_H */ | ||
diff --git a/Documentation/perf_counter/util/usage.c b/Documentation/perf_counter/util/usage.c new file mode 100644 index 000000000000..7a10421fe6b4 --- /dev/null +++ b/Documentation/perf_counter/util/usage.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | */ | ||
6 | #include "util.h" | ||
7 | |||
8 | static void report(const char *prefix, const char *err, va_list params) | ||
9 | { | ||
10 | char msg[1024]; | ||
11 | vsnprintf(msg, sizeof(msg), err, params); | ||
12 | fprintf(stderr, "%s%s\n", prefix, msg); | ||
13 | } | ||
14 | |||
15 | static NORETURN void usage_builtin(const char *err) | ||
16 | { | ||
17 | fprintf(stderr, "usage: %s\n", err); | ||
18 | exit(129); | ||
19 | } | ||
20 | |||
21 | static NORETURN void die_builtin(const char *err, va_list params) | ||
22 | { | ||
23 | report("fatal: ", err, params); | ||
24 | exit(128); | ||
25 | } | ||
26 | |||
27 | static void error_builtin(const char *err, va_list params) | ||
28 | { | ||
29 | report("error: ", err, params); | ||
30 | } | ||
31 | |||
32 | static void warn_builtin(const char *warn, va_list params) | ||
33 | { | ||
34 | report("warning: ", warn, params); | ||
35 | } | ||
36 | |||
37 | /* If we are in a dlopen()ed .so write to a global variable would segfault | ||
38 | * (ugh), so keep things static. */ | ||
39 | static void (*usage_routine)(const char *err) NORETURN = usage_builtin; | ||
40 | static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; | ||
41 | static void (*error_routine)(const char *err, va_list params) = error_builtin; | ||
42 | static void (*warn_routine)(const char *err, va_list params) = warn_builtin; | ||
43 | |||
44 | void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) | ||
45 | { | ||
46 | die_routine = routine; | ||
47 | } | ||
48 | |||
49 | void usage(const char *err) | ||
50 | { | ||
51 | usage_routine(err); | ||
52 | } | ||
53 | |||
54 | void die(const char *err, ...) | ||
55 | { | ||
56 | va_list params; | ||
57 | |||
58 | va_start(params, err); | ||
59 | die_routine(err, params); | ||
60 | va_end(params); | ||
61 | } | ||
62 | |||
63 | int error(const char *err, ...) | ||
64 | { | ||
65 | va_list params; | ||
66 | |||
67 | va_start(params, err); | ||
68 | error_routine(err, params); | ||
69 | va_end(params); | ||
70 | return -1; | ||
71 | } | ||
72 | |||
73 | void warning(const char *warn, ...) | ||
74 | { | ||
75 | va_list params; | ||
76 | |||
77 | va_start(params, warn); | ||
78 | warn_routine(warn, params); | ||
79 | va_end(params); | ||
80 | } | ||
diff --git a/Documentation/perf_counter/util/util.h b/Documentation/perf_counter/util/util.h new file mode 100644 index 000000000000..36e40c38e093 --- /dev/null +++ b/Documentation/perf_counter/util/util.h | |||
@@ -0,0 +1,408 @@ | |||
1 | #ifndef GIT_COMPAT_UTIL_H | ||
2 | #define GIT_COMPAT_UTIL_H | ||
3 | |||
4 | #define _FILE_OFFSET_BITS 64 | ||
5 | |||
6 | #ifndef FLEX_ARRAY | ||
7 | /* | ||
8 | * See if our compiler is known to support flexible array members. | ||
9 | */ | ||
10 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) | ||
11 | # define FLEX_ARRAY /* empty */ | ||
12 | #elif defined(__GNUC__) | ||
13 | # if (__GNUC__ >= 3) | ||
14 | # define FLEX_ARRAY /* empty */ | ||
15 | # else | ||
16 | # define FLEX_ARRAY 0 /* older GNU extension */ | ||
17 | # endif | ||
18 | #endif | ||
19 | |||
20 | /* | ||
21 | * Otherwise, default to safer but a bit wasteful traditional style | ||
22 | */ | ||
23 | #ifndef FLEX_ARRAY | ||
24 | # define FLEX_ARRAY 1 | ||
25 | #endif | ||
26 | #endif | ||
27 | |||
28 | #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) | ||
29 | |||
30 | #ifdef __GNUC__ | ||
31 | #define TYPEOF(x) (__typeof__(x)) | ||
32 | #else | ||
33 | #define TYPEOF(x) | ||
34 | #endif | ||
35 | |||
36 | #define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) | ||
37 | #define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ | ||
38 | |||
39 | /* Approximation of the length of the decimal representation of this type. */ | ||
40 | #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) | ||
41 | |||
42 | #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) | ||
43 | #define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ | ||
44 | #define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ | ||
45 | #endif | ||
46 | #define _ALL_SOURCE 1 | ||
47 | #define _GNU_SOURCE 1 | ||
48 | #define _BSD_SOURCE 1 | ||
49 | |||
50 | #include <unistd.h> | ||
51 | #include <stdio.h> | ||
52 | #include <sys/stat.h> | ||
53 | #include <fcntl.h> | ||
54 | #include <stddef.h> | ||
55 | #include <stdlib.h> | ||
56 | #include <stdarg.h> | ||
57 | #include <string.h> | ||
58 | #include <errno.h> | ||
59 | #include <limits.h> | ||
60 | #include <sys/param.h> | ||
61 | #include <sys/types.h> | ||
62 | #include <dirent.h> | ||
63 | #include <sys/time.h> | ||
64 | #include <time.h> | ||
65 | #include <signal.h> | ||
66 | #include <fnmatch.h> | ||
67 | #include <assert.h> | ||
68 | #include <regex.h> | ||
69 | #include <utime.h> | ||
70 | #ifndef __MINGW32__ | ||
71 | #include <sys/wait.h> | ||
72 | #include <sys/poll.h> | ||
73 | #include <sys/socket.h> | ||
74 | #include <sys/ioctl.h> | ||
75 | #ifndef NO_SYS_SELECT_H | ||
76 | #include <sys/select.h> | ||
77 | #endif | ||
78 | #include <netinet/in.h> | ||
79 | #include <netinet/tcp.h> | ||
80 | #include <arpa/inet.h> | ||
81 | #include <netdb.h> | ||
82 | #include <pwd.h> | ||
83 | #include <inttypes.h> | ||
84 | #if defined(__CYGWIN__) | ||
85 | #undef _XOPEN_SOURCE | ||
86 | #include <grp.h> | ||
87 | #define _XOPEN_SOURCE 600 | ||
88 | #include "compat/cygwin.h" | ||
89 | #else | ||
90 | #undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ | ||
91 | #include <grp.h> | ||
92 | #define _ALL_SOURCE 1 | ||
93 | #endif | ||
94 | #else /* __MINGW32__ */ | ||
95 | /* pull in Windows compatibility stuff */ | ||
96 | #include "compat/mingw.h" | ||
97 | #endif /* __MINGW32__ */ | ||
98 | |||
99 | #ifndef NO_ICONV | ||
100 | #include <iconv.h> | ||
101 | #endif | ||
102 | |||
103 | #ifndef NO_OPENSSL | ||
104 | #include <openssl/ssl.h> | ||
105 | #include <openssl/err.h> | ||
106 | #endif | ||
107 | |||
108 | /* On most systems <limits.h> would have given us this, but | ||
109 | * not on some systems (e.g. GNU/Hurd). | ||
110 | */ | ||
111 | #ifndef PATH_MAX | ||
112 | #define PATH_MAX 4096 | ||
113 | #endif | ||
114 | |||
115 | #ifndef PRIuMAX | ||
116 | #define PRIuMAX "llu" | ||
117 | #endif | ||
118 | |||
119 | #ifndef PRIu32 | ||
120 | #define PRIu32 "u" | ||
121 | #endif | ||
122 | |||
123 | #ifndef PRIx32 | ||
124 | #define PRIx32 "x" | ||
125 | #endif | ||
126 | |||
127 | #ifndef PATH_SEP | ||
128 | #define PATH_SEP ':' | ||
129 | #endif | ||
130 | |||
131 | #ifndef STRIP_EXTENSION | ||
132 | #define STRIP_EXTENSION "" | ||
133 | #endif | ||
134 | |||
135 | #ifndef has_dos_drive_prefix | ||
136 | #define has_dos_drive_prefix(path) 0 | ||
137 | #endif | ||
138 | |||
139 | #ifndef is_dir_sep | ||
140 | #define is_dir_sep(c) ((c) == '/') | ||
141 | #endif | ||
142 | |||
143 | #ifdef __GNUC__ | ||
144 | #define NORETURN __attribute__((__noreturn__)) | ||
145 | #else | ||
146 | #define NORETURN | ||
147 | #ifndef __attribute__ | ||
148 | #define __attribute__(x) | ||
149 | #endif | ||
150 | #endif | ||
151 | |||
152 | /* General helper functions */ | ||
153 | extern void usage(const char *err) NORETURN; | ||
154 | extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); | ||
155 | extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); | ||
156 | extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); | ||
157 | |||
158 | extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); | ||
159 | |||
160 | extern int prefixcmp(const char *str, const char *prefix); | ||
161 | extern time_t tm_to_time_t(const struct tm *tm); | ||
162 | |||
163 | static inline const char *skip_prefix(const char *str, const char *prefix) | ||
164 | { | ||
165 | size_t len = strlen(prefix); | ||
166 | return strncmp(str, prefix, len) ? NULL : str + len; | ||
167 | } | ||
168 | |||
169 | #if defined(NO_MMAP) || defined(USE_WIN32_MMAP) | ||
170 | |||
171 | #ifndef PROT_READ | ||
172 | #define PROT_READ 1 | ||
173 | #define PROT_WRITE 2 | ||
174 | #define MAP_PRIVATE 1 | ||
175 | #define MAP_FAILED ((void*)-1) | ||
176 | #endif | ||
177 | |||
178 | #define mmap git_mmap | ||
179 | #define munmap git_munmap | ||
180 | extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); | ||
181 | extern int git_munmap(void *start, size_t length); | ||
182 | |||
183 | #else /* NO_MMAP || USE_WIN32_MMAP */ | ||
184 | |||
185 | #include <sys/mman.h> | ||
186 | |||
187 | #endif /* NO_MMAP || USE_WIN32_MMAP */ | ||
188 | |||
189 | #ifdef NO_MMAP | ||
190 | |||
191 | /* This value must be multiple of (pagesize * 2) */ | ||
192 | #define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) | ||
193 | |||
194 | #else /* NO_MMAP */ | ||
195 | |||
196 | /* This value must be multiple of (pagesize * 2) */ | ||
197 | #define DEFAULT_PACKED_GIT_WINDOW_SIZE \ | ||
198 | (sizeof(void*) >= 8 \ | ||
199 | ? 1 * 1024 * 1024 * 1024 \ | ||
200 | : 32 * 1024 * 1024) | ||
201 | |||
202 | #endif /* NO_MMAP */ | ||
203 | |||
204 | #ifdef NO_ST_BLOCKS_IN_STRUCT_STAT | ||
205 | #define on_disk_bytes(st) ((st).st_size) | ||
206 | #else | ||
207 | #define on_disk_bytes(st) ((st).st_blocks * 512) | ||
208 | #endif | ||
209 | |||
210 | #define DEFAULT_PACKED_GIT_LIMIT \ | ||
211 | ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) | ||
212 | |||
213 | #ifdef NO_PREAD | ||
214 | #define pread git_pread | ||
215 | extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); | ||
216 | #endif | ||
217 | /* | ||
218 | * Forward decl that will remind us if its twin in cache.h changes. | ||
219 | * This function is used in compat/pread.c. But we can't include | ||
220 | * cache.h there. | ||
221 | */ | ||
222 | extern ssize_t read_in_full(int fd, void *buf, size_t count); | ||
223 | |||
224 | #ifdef NO_SETENV | ||
225 | #define setenv gitsetenv | ||
226 | extern int gitsetenv(const char *, const char *, int); | ||
227 | #endif | ||
228 | |||
229 | #ifdef NO_MKDTEMP | ||
230 | #define mkdtemp gitmkdtemp | ||
231 | extern char *gitmkdtemp(char *); | ||
232 | #endif | ||
233 | |||
234 | #ifdef NO_UNSETENV | ||
235 | #define unsetenv gitunsetenv | ||
236 | extern void gitunsetenv(const char *); | ||
237 | #endif | ||
238 | |||
239 | #ifdef NO_STRCASESTR | ||
240 | #define strcasestr gitstrcasestr | ||
241 | extern char *gitstrcasestr(const char *haystack, const char *needle); | ||
242 | #endif | ||
243 | |||
244 | #ifdef NO_STRLCPY | ||
245 | #define strlcpy gitstrlcpy | ||
246 | extern size_t gitstrlcpy(char *, const char *, size_t); | ||
247 | #endif | ||
248 | |||
249 | #ifdef NO_STRTOUMAX | ||
250 | #define strtoumax gitstrtoumax | ||
251 | extern uintmax_t gitstrtoumax(const char *, char **, int); | ||
252 | #endif | ||
253 | |||
254 | #ifdef NO_HSTRERROR | ||
255 | #define hstrerror githstrerror | ||
256 | extern const char *githstrerror(int herror); | ||
257 | #endif | ||
258 | |||
259 | #ifdef NO_MEMMEM | ||
260 | #define memmem gitmemmem | ||
261 | void *gitmemmem(const void *haystack, size_t haystacklen, | ||
262 | const void *needle, size_t needlelen); | ||
263 | #endif | ||
264 | |||
265 | #ifdef FREAD_READS_DIRECTORIES | ||
266 | #ifdef fopen | ||
267 | #undef fopen | ||
268 | #endif | ||
269 | #define fopen(a,b) git_fopen(a,b) | ||
270 | extern FILE *git_fopen(const char*, const char*); | ||
271 | #endif | ||
272 | |||
273 | #ifdef SNPRINTF_RETURNS_BOGUS | ||
274 | #define snprintf git_snprintf | ||
275 | extern int git_snprintf(char *str, size_t maxsize, | ||
276 | const char *format, ...); | ||
277 | #define vsnprintf git_vsnprintf | ||
278 | extern int git_vsnprintf(char *str, size_t maxsize, | ||
279 | const char *format, va_list ap); | ||
280 | #endif | ||
281 | |||
282 | #ifdef __GLIBC_PREREQ | ||
283 | #if __GLIBC_PREREQ(2, 1) | ||
284 | #define HAVE_STRCHRNUL | ||
285 | #endif | ||
286 | #endif | ||
287 | |||
288 | #ifndef HAVE_STRCHRNUL | ||
289 | #define strchrnul gitstrchrnul | ||
290 | static inline char *gitstrchrnul(const char *s, int c) | ||
291 | { | ||
292 | while (*s && *s != c) | ||
293 | s++; | ||
294 | return (char *)s; | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | /* | ||
299 | * Wrappers: | ||
300 | */ | ||
301 | extern char *xstrdup(const char *str); | ||
302 | extern void *xmalloc(size_t size); | ||
303 | extern void *xmemdupz(const void *data, size_t len); | ||
304 | extern char *xstrndup(const char *str, size_t len); | ||
305 | extern void *xrealloc(void *ptr, size_t size); | ||
306 | extern void *xcalloc(size_t nmemb, size_t size); | ||
307 | extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); | ||
308 | extern ssize_t xread(int fd, void *buf, size_t len); | ||
309 | extern ssize_t xwrite(int fd, const void *buf, size_t len); | ||
310 | extern int xdup(int fd); | ||
311 | extern FILE *xfdopen(int fd, const char *mode); | ||
312 | static inline size_t xsize_t(off_t len) | ||
313 | { | ||
314 | return (size_t)len; | ||
315 | } | ||
316 | |||
317 | static inline int has_extension(const char *filename, const char *ext) | ||
318 | { | ||
319 | size_t len = strlen(filename); | ||
320 | size_t extlen = strlen(ext); | ||
321 | return len > extlen && !memcmp(filename + len - extlen, ext, extlen); | ||
322 | } | ||
323 | |||
324 | /* Sane ctype - no locale, and works with signed chars */ | ||
325 | #undef isascii | ||
326 | #undef isspace | ||
327 | #undef isdigit | ||
328 | #undef isalpha | ||
329 | #undef isalnum | ||
330 | #undef tolower | ||
331 | #undef toupper | ||
332 | extern unsigned char sane_ctype[256]; | ||
333 | #define GIT_SPACE 0x01 | ||
334 | #define GIT_DIGIT 0x02 | ||
335 | #define GIT_ALPHA 0x04 | ||
336 | #define GIT_GLOB_SPECIAL 0x08 | ||
337 | #define GIT_REGEX_SPECIAL 0x10 | ||
338 | #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) | ||
339 | #define isascii(x) (((x) & ~0x7f) == 0) | ||
340 | #define isspace(x) sane_istest(x,GIT_SPACE) | ||
341 | #define isdigit(x) sane_istest(x,GIT_DIGIT) | ||
342 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | ||
343 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | ||
344 | #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) | ||
345 | #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) | ||
346 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | ||
347 | #define toupper(x) sane_case((unsigned char)(x), 0) | ||
348 | |||
349 | static inline int sane_case(int x, int high) | ||
350 | { | ||
351 | if (sane_istest(x, GIT_ALPHA)) | ||
352 | x = (x & ~0x20) | high; | ||
353 | return x; | ||
354 | } | ||
355 | |||
356 | static inline int strtoul_ui(char const *s, int base, unsigned int *result) | ||
357 | { | ||
358 | unsigned long ul; | ||
359 | char *p; | ||
360 | |||
361 | errno = 0; | ||
362 | ul = strtoul(s, &p, base); | ||
363 | if (errno || *p || p == s || (unsigned int) ul != ul) | ||
364 | return -1; | ||
365 | *result = ul; | ||
366 | return 0; | ||
367 | } | ||
368 | |||
369 | static inline int strtol_i(char const *s, int base, int *result) | ||
370 | { | ||
371 | long ul; | ||
372 | char *p; | ||
373 | |||
374 | errno = 0; | ||
375 | ul = strtol(s, &p, base); | ||
376 | if (errno || *p || p == s || (int) ul != ul) | ||
377 | return -1; | ||
378 | *result = ul; | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | #ifdef INTERNAL_QSORT | ||
383 | void git_qsort(void *base, size_t nmemb, size_t size, | ||
384 | int(*compar)(const void *, const void *)); | ||
385 | #define qsort git_qsort | ||
386 | #endif | ||
387 | |||
388 | #ifndef DIR_HAS_BSD_GROUP_SEMANTICS | ||
389 | # define FORCE_DIR_SET_GID S_ISGID | ||
390 | #else | ||
391 | # define FORCE_DIR_SET_GID 0 | ||
392 | #endif | ||
393 | |||
394 | #ifdef NO_NSEC | ||
395 | #undef USE_NSEC | ||
396 | #define ST_CTIME_NSEC(st) 0 | ||
397 | #define ST_MTIME_NSEC(st) 0 | ||
398 | #else | ||
399 | #ifdef USE_ST_TIMESPEC | ||
400 | #define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) | ||
401 | #define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) | ||
402 | #else | ||
403 | #define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) | ||
404 | #define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) | ||
405 | #endif | ||
406 | #endif | ||
407 | |||
408 | #endif | ||
diff --git a/Documentation/perf_counter/util/wrapper.c b/Documentation/perf_counter/util/wrapper.c new file mode 100644 index 000000000000..6350d65f6d9e --- /dev/null +++ b/Documentation/perf_counter/util/wrapper.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * Various trivial helper wrappers around standard functions | ||
3 | */ | ||
4 | #include "cache.h" | ||
5 | |||
6 | /* | ||
7 | * There's no pack memory to release - but stay close to the Git | ||
8 | * version so wrap this away: | ||
9 | */ | ||
10 | static inline void release_pack_memory(size_t size, int flag) | ||
11 | { | ||
12 | } | ||
13 | |||
14 | char *xstrdup(const char *str) | ||
15 | { | ||
16 | char *ret = strdup(str); | ||
17 | if (!ret) { | ||
18 | release_pack_memory(strlen(str) + 1, -1); | ||
19 | ret = strdup(str); | ||
20 | if (!ret) | ||
21 | die("Out of memory, strdup failed"); | ||
22 | } | ||
23 | return ret; | ||
24 | } | ||
25 | |||
26 | void *xmalloc(size_t size) | ||
27 | { | ||
28 | void *ret = malloc(size); | ||
29 | if (!ret && !size) | ||
30 | ret = malloc(1); | ||
31 | if (!ret) { | ||
32 | release_pack_memory(size, -1); | ||
33 | ret = malloc(size); | ||
34 | if (!ret && !size) | ||
35 | ret = malloc(1); | ||
36 | if (!ret) | ||
37 | die("Out of memory, malloc failed"); | ||
38 | } | ||
39 | #ifdef XMALLOC_POISON | ||
40 | memset(ret, 0xA5, size); | ||
41 | #endif | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of | ||
47 | * "data" to the allocated memory, zero terminates the allocated memory, | ||
48 | * and returns a pointer to the allocated memory. If the allocation fails, | ||
49 | * the program dies. | ||
50 | */ | ||
51 | void *xmemdupz(const void *data, size_t len) | ||
52 | { | ||
53 | char *p = xmalloc(len + 1); | ||
54 | memcpy(p, data, len); | ||
55 | p[len] = '\0'; | ||
56 | return p; | ||
57 | } | ||
58 | |||
59 | char *xstrndup(const char *str, size_t len) | ||
60 | { | ||
61 | char *p = memchr(str, '\0', len); | ||
62 | return xmemdupz(str, p ? p - str : len); | ||
63 | } | ||
64 | |||
65 | void *xrealloc(void *ptr, size_t size) | ||
66 | { | ||
67 | void *ret = realloc(ptr, size); | ||
68 | if (!ret && !size) | ||
69 | ret = realloc(ptr, 1); | ||
70 | if (!ret) { | ||
71 | release_pack_memory(size, -1); | ||
72 | ret = realloc(ptr, size); | ||
73 | if (!ret && !size) | ||
74 | ret = realloc(ptr, 1); | ||
75 | if (!ret) | ||
76 | die("Out of memory, realloc failed"); | ||
77 | } | ||
78 | return ret; | ||
79 | } | ||
80 | |||
81 | void *xcalloc(size_t nmemb, size_t size) | ||
82 | { | ||
83 | void *ret = calloc(nmemb, size); | ||
84 | if (!ret && (!nmemb || !size)) | ||
85 | ret = calloc(1, 1); | ||
86 | if (!ret) { | ||
87 | release_pack_memory(nmemb * size, -1); | ||
88 | ret = calloc(nmemb, size); | ||
89 | if (!ret && (!nmemb || !size)) | ||
90 | ret = calloc(1, 1); | ||
91 | if (!ret) | ||
92 | die("Out of memory, calloc failed"); | ||
93 | } | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | void *xmmap(void *start, size_t length, | ||
98 | int prot, int flags, int fd, off_t offset) | ||
99 | { | ||
100 | void *ret = mmap(start, length, prot, flags, fd, offset); | ||
101 | if (ret == MAP_FAILED) { | ||
102 | if (!length) | ||
103 | return NULL; | ||
104 | release_pack_memory(length, fd); | ||
105 | ret = mmap(start, length, prot, flags, fd, offset); | ||
106 | if (ret == MAP_FAILED) | ||
107 | die("Out of memory? mmap failed: %s", strerror(errno)); | ||
108 | } | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * xread() is the same a read(), but it automatically restarts read() | ||
114 | * operations with a recoverable error (EAGAIN and EINTR). xread() | ||
115 | * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. | ||
116 | */ | ||
117 | ssize_t xread(int fd, void *buf, size_t len) | ||
118 | { | ||
119 | ssize_t nr; | ||
120 | while (1) { | ||
121 | nr = read(fd, buf, len); | ||
122 | if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) | ||
123 | continue; | ||
124 | return nr; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * xwrite() is the same a write(), but it automatically restarts write() | ||
130 | * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT | ||
131 | * GUARANTEE that "len" bytes is written even if the operation is successful. | ||
132 | */ | ||
133 | ssize_t xwrite(int fd, const void *buf, size_t len) | ||
134 | { | ||
135 | ssize_t nr; | ||
136 | while (1) { | ||
137 | nr = write(fd, buf, len); | ||
138 | if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) | ||
139 | continue; | ||
140 | return nr; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | ssize_t read_in_full(int fd, void *buf, size_t count) | ||
145 | { | ||
146 | char *p = buf; | ||
147 | ssize_t total = 0; | ||
148 | |||
149 | while (count > 0) { | ||
150 | ssize_t loaded = xread(fd, p, count); | ||
151 | if (loaded <= 0) | ||
152 | return total ? total : loaded; | ||
153 | count -= loaded; | ||
154 | p += loaded; | ||
155 | total += loaded; | ||
156 | } | ||
157 | |||
158 | return total; | ||
159 | } | ||
160 | |||
161 | ssize_t write_in_full(int fd, const void *buf, size_t count) | ||
162 | { | ||
163 | const char *p = buf; | ||
164 | ssize_t total = 0; | ||
165 | |||
166 | while (count > 0) { | ||
167 | ssize_t written = xwrite(fd, p, count); | ||
168 | if (written < 0) | ||
169 | return -1; | ||
170 | if (!written) { | ||
171 | errno = ENOSPC; | ||
172 | return -1; | ||
173 | } | ||
174 | count -= written; | ||
175 | p += written; | ||
176 | total += written; | ||
177 | } | ||
178 | |||
179 | return total; | ||
180 | } | ||
181 | |||
182 | int xdup(int fd) | ||
183 | { | ||
184 | int ret = dup(fd); | ||
185 | if (ret < 0) | ||
186 | die("dup failed: %s", strerror(errno)); | ||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | FILE *xfdopen(int fd, const char *mode) | ||
191 | { | ||
192 | FILE *stream = fdopen(fd, mode); | ||
193 | if (stream == NULL) | ||
194 | die("Out of memory? fdopen failed: %s", strerror(errno)); | ||
195 | return stream; | ||
196 | } | ||
197 | |||
198 | int xmkstemp(char *template) | ||
199 | { | ||
200 | int fd; | ||
201 | |||
202 | fd = mkstemp(template); | ||
203 | if (fd < 0) | ||
204 | die("Unable to create temporary file: %s", strerror(errno)); | ||
205 | return fd; | ||
206 | } | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 2b349ba4add4..8f4a8b601a15 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4375,6 +4375,16 @@ S: Maintained | |||
4375 | F: include/linux/delayacct.h | 4375 | F: include/linux/delayacct.h |
4376 | F: kernel/delayacct.c | 4376 | F: kernel/delayacct.c |
4377 | 4377 | ||
4378 | PERFORMANCE COUNTER SUBSYSTEM | ||
4379 | P: Peter Zijlstra | ||
4380 | M: a.p.zijlstra@chello.nl | ||
4381 | P: Paul Mackerras | ||
4382 | M: paulus@samba.org | ||
4383 | P: Ingo Molnar | ||
4384 | M: mingo@elte.hu | ||
4385 | L: linux-kernel@vger.kernel.org | ||
4386 | S: Supported | ||
4387 | |||
4378 | PERSONALITY HANDLING | 4388 | PERSONALITY HANDLING |
4379 | P: Christoph Hellwig | 4389 | P: Christoph Hellwig |
4380 | M: hch@infradead.org | 4390 | M: hch@infradead.org |
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7e034b0a6dd..20a44d0c9fdd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h | |||
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags) | |||
131 | */ | 131 | */ |
132 | struct irq_chip; | 132 | struct irq_chip; |
133 | 133 | ||
134 | #ifdef CONFIG_PERF_COUNTERS | ||
135 | static inline unsigned long test_perf_counter_pending(void) | ||
136 | { | ||
137 | unsigned long x; | ||
138 | |||
139 | asm volatile("lbz %0,%1(13)" | ||
140 | : "=r" (x) | ||
141 | : "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
142 | return x; | ||
143 | } | ||
144 | |||
145 | static inline void set_perf_counter_pending(void) | ||
146 | { | ||
147 | asm volatile("stb %0,%1(13)" : : | ||
148 | "r" (1), | ||
149 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
150 | } | ||
151 | |||
152 | static inline void clear_perf_counter_pending(void) | ||
153 | { | ||
154 | asm volatile("stb %0,%1(13)" : : | ||
155 | "r" (0), | ||
156 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
157 | } | ||
158 | |||
159 | extern void perf_counter_do_pending(void); | ||
160 | |||
161 | #else | ||
162 | |||
163 | static inline unsigned long test_perf_counter_pending(void) | ||
164 | { | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static inline void set_perf_counter_pending(void) {} | ||
169 | static inline void clear_perf_counter_pending(void) {} | ||
170 | static inline void perf_counter_do_pending(void) {} | ||
171 | #endif /* CONFIG_PERF_COUNTERS */ | ||
172 | |||
134 | #endif /* __KERNEL__ */ | 173 | #endif /* __KERNEL__ */ |
135 | #endif /* _ASM_POWERPC_HW_IRQ_H */ | 174 | #endif /* _ASM_POWERPC_HW_IRQ_H */ |
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 082b3aedf145..6ef055723019 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -99,6 +99,7 @@ struct paca_struct { | |||
99 | u8 soft_enabled; /* irq soft-enable flag */ | 99 | u8 soft_enabled; /* irq soft-enable flag */ |
100 | u8 hard_enabled; /* set if irqs are enabled in MSR */ | 100 | u8 hard_enabled; /* set if irqs are enabled in MSR */ |
101 | u8 io_sync; /* writel() needs spin_unlock sync */ | 101 | u8 io_sync; /* writel() needs spin_unlock sync */ |
102 | u8 perf_counter_pending; /* PM interrupt while soft-disabled */ | ||
102 | 103 | ||
103 | /* Stuff for accurate time accounting */ | 104 | /* Stuff for accurate time accounting */ |
104 | u64 user_time; /* accumulated usermode TB ticks */ | 105 | u64 user_time; /* accumulated usermode TB ticks */ |
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h new file mode 100644 index 000000000000..1c60f0ca7920 --- /dev/null +++ b/arch/powerpc/include/asm/perf_counter.h | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Performance counter support - PowerPC-specific definitions. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/types.h> | ||
12 | |||
13 | #define MAX_HWCOUNTERS 8 | ||
14 | #define MAX_EVENT_ALTERNATIVES 8 | ||
15 | #define MAX_LIMITED_HWCOUNTERS 2 | ||
16 | |||
17 | /* | ||
18 | * This struct provides the constants and functions needed to | ||
19 | * describe the PMU on a particular POWER-family CPU. | ||
20 | */ | ||
21 | struct power_pmu { | ||
22 | int n_counter; | ||
23 | int max_alternatives; | ||
24 | u64 add_fields; | ||
25 | u64 test_adder; | ||
26 | int (*compute_mmcr)(u64 events[], int n_ev, | ||
27 | unsigned int hwc[], u64 mmcr[]); | ||
28 | int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); | ||
29 | int (*get_alternatives)(u64 event, unsigned int flags, | ||
30 | u64 alt[]); | ||
31 | void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); | ||
32 | int (*limited_pmc_event)(u64 event); | ||
33 | u32 flags; | ||
34 | int n_generic; | ||
35 | int *generic_events; | ||
36 | }; | ||
37 | |||
38 | extern struct power_pmu *ppmu; | ||
39 | |||
40 | /* | ||
41 | * Values for power_pmu.flags | ||
42 | */ | ||
43 | #define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ | ||
44 | #define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ | ||
45 | |||
46 | /* | ||
47 | * Values for flags to get_alternatives() | ||
48 | */ | ||
49 | #define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ | ||
50 | #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ | ||
51 | #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ | ||
52 | |||
53 | struct pt_regs; | ||
54 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
55 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
56 | |||
57 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
58 | |||
59 | /* | ||
60 | * The power_pmu.get_constraint function returns a 64-bit value and | ||
61 | * a 64-bit mask that express the constraints between this event and | ||
62 | * other events. | ||
63 | * | ||
64 | * The value and mask are divided up into (non-overlapping) bitfields | ||
65 | * of three different types: | ||
66 | * | ||
67 | * Select field: this expresses the constraint that some set of bits | ||
68 | * in MMCR* needs to be set to a specific value for this event. For a | ||
69 | * select field, the mask contains 1s in every bit of the field, and | ||
70 | * the value contains a unique value for each possible setting of the | ||
71 | * MMCR* bits. The constraint checking code will ensure that two events | ||
72 | * that set the same field in their masks have the same value in their | ||
73 | * value dwords. | ||
74 | * | ||
75 | * Add field: this expresses the constraint that there can be at most | ||
76 | * N events in a particular class. A field of k bits can be used for | ||
77 | * N <= 2^(k-1) - 1. The mask has the most significant bit of the field | ||
78 | * set (and the other bits 0), and the value has only the least significant | ||
79 | * bit of the field set. In addition, the 'add_fields' and 'test_adder' | ||
80 | * in the struct power_pmu for this processor come into play. The | ||
81 | * add_fields value contains 1 in the LSB of the field, and the | ||
82 | * test_adder contains 2^(k-1) - 1 - N in the field. | ||
83 | * | ||
84 | * NAND field: this expresses the constraint that you may not have events | ||
85 | * in all of a set of classes. (For example, on PPC970, you can't select | ||
86 | * events from the FPU, ISU and IDU simultaneously, although any two are | ||
87 | * possible.) For N classes, the field is N+1 bits wide, and each class | ||
88 | * is assigned one bit from the least-significant N bits. The mask has | ||
89 | * only the most-significant bit set, and the value has only the bit | ||
90 | * for the event's class set. The test_adder has the least significant | ||
91 | * bit set in the field. | ||
92 | * | ||
93 | * If an event is not subject to the constraint expressed by a particular | ||
94 | * field, then it will have 0 in both the mask and value for that field. | ||
95 | */ | ||
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e8018d540e87..fb359b0a6937 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -492,11 +492,13 @@ | |||
492 | #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ | 492 | #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ |
493 | #define SPRN_MMCR1 798 | 493 | #define SPRN_MMCR1 798 |
494 | #define SPRN_MMCRA 0x312 | 494 | #define SPRN_MMCRA 0x312 |
495 | #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ | ||
495 | #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ | 496 | #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ |
496 | #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ | 497 | #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ |
497 | #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ | 498 | #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ |
498 | #define MMCRA_SLOT_SHIFT 24 | 499 | #define MMCRA_SLOT_SHIFT 24 |
499 | #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ | 500 | #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ |
501 | #define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ | ||
500 | #define POWER6_MMCRA_SIHV 0x0000040000000000ULL | 502 | #define POWER6_MMCRA_SIHV 0x0000040000000000ULL |
501 | #define POWER6_MMCRA_SIPR 0x0000020000000000ULL | 503 | #define POWER6_MMCRA_SIPR 0x0000020000000000ULL |
502 | #define POWER6_MMCRA_THRM 0x00000020UL | 504 | #define POWER6_MMCRA_THRM 0x00000020UL |
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index d98a30dfd41c..a0b92de51c7e 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h | |||
@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1) | |||
322 | SYSCALL_SPU(dup3) | 322 | SYSCALL_SPU(dup3) |
323 | SYSCALL_SPU(pipe2) | 323 | SYSCALL_SPU(pipe2) |
324 | SYSCALL(inotify_init1) | 324 | SYSCALL(inotify_init1) |
325 | SYSCALL(ni_syscall) | 325 | SYSCALL_SPU(perf_counter_open) |
326 | COMPAT_SYS_SPU(preadv) | 326 | COMPAT_SYS_SPU(preadv) |
327 | COMPAT_SYS_SPU(pwritev) | 327 | COMPAT_SYS_SPU(pwritev) |
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3f06f8ec81c5..4badac2d11d1 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h | |||
@@ -341,6 +341,7 @@ | |||
341 | #define __NR_dup3 316 | 341 | #define __NR_dup3 316 |
342 | #define __NR_pipe2 317 | 342 | #define __NR_pipe2 317 |
343 | #define __NR_inotify_init1 318 | 343 | #define __NR_inotify_init1 318 |
344 | #define __NR_perf_counter_open 319 | ||
344 | #define __NR_preadv 320 | 345 | #define __NR_preadv 320 |
345 | #define __NR_pwritev 321 | 346 | #define __NR_pwritev 321 |
346 | 347 | ||
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 71901fbda4a5..9ba1bb731fcc 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -94,6 +94,8 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o | |||
94 | 94 | ||
95 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 95 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
96 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 96 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
97 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ | ||
98 | power5-pmu.o power5+-pmu.o power6-pmu.o | ||
97 | 99 | ||
98 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o | 100 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o |
99 | 101 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1e40bc053946..e981d1ce1914 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -131,6 +131,7 @@ int main(void) | |||
131 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); | 131 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
132 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); | 132 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
133 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); | 133 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); |
134 | DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); | ||
134 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); | 135 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); |
135 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); | 136 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); |
136 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 137 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index abfc32330479..43e073477c34 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | |||
526 | 2: | 526 | 2: |
527 | TRACE_AND_RESTORE_IRQ(r5); | 527 | TRACE_AND_RESTORE_IRQ(r5); |
528 | 528 | ||
529 | #ifdef CONFIG_PERF_COUNTERS | ||
530 | /* check paca->perf_counter_pending if we're enabling ints */ | ||
531 | lbz r3,PACAPERFPEND(r13) | ||
532 | and. r3,r3,r5 | ||
533 | beq 27f | ||
534 | bl .perf_counter_do_pending | ||
535 | 27: | ||
536 | #endif /* CONFIG_PERF_COUNTERS */ | ||
537 | |||
529 | /* extract EE bit and use it to restore paca->hard_enabled */ | 538 | /* extract EE bit and use it to restore paca->hard_enabled */ |
530 | ld r3,_MSR(r1) | 539 | ld r3,_MSR(r1) |
531 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ | 540 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8c1a4966867e..feff792ed0f9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en) | |||
135 | iseries_handle_interrupts(); | 135 | iseries_handle_interrupts(); |
136 | } | 136 | } |
137 | 137 | ||
138 | if (test_perf_counter_pending()) { | ||
139 | clear_perf_counter_pending(); | ||
140 | perf_counter_do_pending(); | ||
141 | } | ||
142 | |||
138 | /* | 143 | /* |
139 | * if (get_paca()->hard_enabled) return; | 144 | * if (get_paca()->hard_enabled) return; |
140 | * But again we need to take care that gcc gets hard_enabled directly | 145 | * But again we need to take care that gcc gets hard_enabled directly |
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c new file mode 100644 index 000000000000..6baae5a5c331 --- /dev/null +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -0,0 +1,1165 @@ | |||
1 | /* | ||
2 | * Performance counter support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_counter.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/hardirq.h> | ||
16 | #include <asm/reg.h> | ||
17 | #include <asm/pmc.h> | ||
18 | #include <asm/machdep.h> | ||
19 | #include <asm/firmware.h> | ||
20 | #include <asm/ptrace.h> | ||
21 | |||
22 | struct cpu_hw_counters { | ||
23 | int n_counters; | ||
24 | int n_percpu; | ||
25 | int disabled; | ||
26 | int n_added; | ||
27 | int n_limited; | ||
28 | u8 pmcs_enabled; | ||
29 | struct perf_counter *counter[MAX_HWCOUNTERS]; | ||
30 | u64 events[MAX_HWCOUNTERS]; | ||
31 | unsigned int flags[MAX_HWCOUNTERS]; | ||
32 | u64 mmcr[3]; | ||
33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; | ||
34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | ||
35 | }; | ||
36 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); | ||
37 | |||
38 | struct power_pmu *ppmu; | ||
39 | |||
40 | /* | ||
41 | * Normally, to ignore kernel events we set the FCS (freeze counters | ||
42 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | ||
43 | * hypervisor bit set in the MSR, or if we are running on a processor | ||
44 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | ||
45 | * then we need to use the FCHV bit to ignore kernel events. | ||
46 | */ | ||
47 | static unsigned int freeze_counters_kernel = MMCR0_FCS; | ||
48 | |||
49 | static void perf_counter_interrupt(struct pt_regs *regs); | ||
50 | |||
51 | void perf_counter_print_debug(void) | ||
52 | { | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Read one performance monitor counter (PMC). | ||
57 | */ | ||
58 | static unsigned long read_pmc(int idx) | ||
59 | { | ||
60 | unsigned long val; | ||
61 | |||
62 | switch (idx) { | ||
63 | case 1: | ||
64 | val = mfspr(SPRN_PMC1); | ||
65 | break; | ||
66 | case 2: | ||
67 | val = mfspr(SPRN_PMC2); | ||
68 | break; | ||
69 | case 3: | ||
70 | val = mfspr(SPRN_PMC3); | ||
71 | break; | ||
72 | case 4: | ||
73 | val = mfspr(SPRN_PMC4); | ||
74 | break; | ||
75 | case 5: | ||
76 | val = mfspr(SPRN_PMC5); | ||
77 | break; | ||
78 | case 6: | ||
79 | val = mfspr(SPRN_PMC6); | ||
80 | break; | ||
81 | case 7: | ||
82 | val = mfspr(SPRN_PMC7); | ||
83 | break; | ||
84 | case 8: | ||
85 | val = mfspr(SPRN_PMC8); | ||
86 | break; | ||
87 | default: | ||
88 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
89 | val = 0; | ||
90 | } | ||
91 | return val; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Write one PMC. | ||
96 | */ | ||
97 | static void write_pmc(int idx, unsigned long val) | ||
98 | { | ||
99 | switch (idx) { | ||
100 | case 1: | ||
101 | mtspr(SPRN_PMC1, val); | ||
102 | break; | ||
103 | case 2: | ||
104 | mtspr(SPRN_PMC2, val); | ||
105 | break; | ||
106 | case 3: | ||
107 | mtspr(SPRN_PMC3, val); | ||
108 | break; | ||
109 | case 4: | ||
110 | mtspr(SPRN_PMC4, val); | ||
111 | break; | ||
112 | case 5: | ||
113 | mtspr(SPRN_PMC5, val); | ||
114 | break; | ||
115 | case 6: | ||
116 | mtspr(SPRN_PMC6, val); | ||
117 | break; | ||
118 | case 7: | ||
119 | mtspr(SPRN_PMC7, val); | ||
120 | break; | ||
121 | case 8: | ||
122 | mtspr(SPRN_PMC8, val); | ||
123 | break; | ||
124 | default: | ||
125 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Check if a set of events can all go on the PMU at once. | ||
131 | * If they can't, this will look at alternative codes for the events | ||
132 | * and see if any combination of alternative codes is feasible. | ||
133 | * The feasible set is returned in event[]. | ||
134 | */ | ||
135 | static int power_check_constraints(u64 event[], unsigned int cflags[], | ||
136 | int n_ev) | ||
137 | { | ||
138 | u64 mask, value, nv; | ||
139 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
140 | u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
141 | u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
142 | u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; | ||
143 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; | ||
144 | int i, j; | ||
145 | u64 addf = ppmu->add_fields; | ||
146 | u64 tadd = ppmu->test_adder; | ||
147 | |||
148 | if (n_ev > ppmu->n_counter) | ||
149 | return -1; | ||
150 | |||
151 | /* First see if the events will go on as-is */ | ||
152 | for (i = 0; i < n_ev; ++i) { | ||
153 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | ||
154 | && !ppmu->limited_pmc_event(event[i])) { | ||
155 | ppmu->get_alternatives(event[i], cflags[i], | ||
156 | alternatives[i]); | ||
157 | event[i] = alternatives[i][0]; | ||
158 | } | ||
159 | if (ppmu->get_constraint(event[i], &amasks[i][0], | ||
160 | &avalues[i][0])) | ||
161 | return -1; | ||
162 | } | ||
163 | value = mask = 0; | ||
164 | for (i = 0; i < n_ev; ++i) { | ||
165 | nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); | ||
166 | if ((((nv + tadd) ^ value) & mask) != 0 || | ||
167 | (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) | ||
168 | break; | ||
169 | value = nv; | ||
170 | mask |= amasks[i][0]; | ||
171 | } | ||
172 | if (i == n_ev) | ||
173 | return 0; /* all OK */ | ||
174 | |||
175 | /* doesn't work, gather alternatives... */ | ||
176 | if (!ppmu->get_alternatives) | ||
177 | return -1; | ||
178 | for (i = 0; i < n_ev; ++i) { | ||
179 | choice[i] = 0; | ||
180 | n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], | ||
181 | alternatives[i]); | ||
182 | for (j = 1; j < n_alt[i]; ++j) | ||
183 | ppmu->get_constraint(alternatives[i][j], | ||
184 | &amasks[i][j], &avalues[i][j]); | ||
185 | } | ||
186 | |||
187 | /* enumerate all possibilities and see if any will work */ | ||
188 | i = 0; | ||
189 | j = -1; | ||
190 | value = mask = nv = 0; | ||
191 | while (i < n_ev) { | ||
192 | if (j >= 0) { | ||
193 | /* we're backtracking, restore context */ | ||
194 | value = svalues[i]; | ||
195 | mask = smasks[i]; | ||
196 | j = choice[i]; | ||
197 | } | ||
198 | /* | ||
199 | * See if any alternative k for event i, | ||
200 | * where k > j, will satisfy the constraints. | ||
201 | */ | ||
202 | while (++j < n_alt[i]) { | ||
203 | nv = (value | avalues[i][j]) + | ||
204 | (value & avalues[i][j] & addf); | ||
205 | if ((((nv + tadd) ^ value) & mask) == 0 && | ||
206 | (((nv + tadd) ^ avalues[i][j]) | ||
207 | & amasks[i][j]) == 0) | ||
208 | break; | ||
209 | } | ||
210 | if (j >= n_alt[i]) { | ||
211 | /* | ||
212 | * No feasible alternative, backtrack | ||
213 | * to event i-1 and continue enumerating its | ||
214 | * alternatives from where we got up to. | ||
215 | */ | ||
216 | if (--i < 0) | ||
217 | return -1; | ||
218 | } else { | ||
219 | /* | ||
220 | * Found a feasible alternative for event i, | ||
221 | * remember where we got up to with this event, | ||
222 | * go on to the next event, and start with | ||
223 | * the first alternative for it. | ||
224 | */ | ||
225 | choice[i] = j; | ||
226 | svalues[i] = value; | ||
227 | smasks[i] = mask; | ||
228 | value = nv; | ||
229 | mask |= amasks[i][j]; | ||
230 | ++i; | ||
231 | j = -1; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* OK, we have a feasible combination, tell the caller the solution */ | ||
236 | for (i = 0; i < n_ev; ++i) | ||
237 | event[i] = alternatives[i][choice[i]]; | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * Check if newly-added counters have consistent settings for | ||
243 | * exclude_{user,kernel,hv} with each other and any previously | ||
244 | * added counters. | ||
245 | */ | ||
246 | static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], | ||
247 | int n_prev, int n_new) | ||
248 | { | ||
249 | int eu = 0, ek = 0, eh = 0; | ||
250 | int i, n, first; | ||
251 | struct perf_counter *counter; | ||
252 | |||
253 | n = n_prev + n_new; | ||
254 | if (n <= 1) | ||
255 | return 0; | ||
256 | |||
257 | first = 1; | ||
258 | for (i = 0; i < n; ++i) { | ||
259 | if (cflags[i] & PPMU_LIMITED_PMC_OK) { | ||
260 | cflags[i] &= ~PPMU_LIMITED_PMC_REQD; | ||
261 | continue; | ||
262 | } | ||
263 | counter = ctrs[i]; | ||
264 | if (first) { | ||
265 | eu = counter->hw_event.exclude_user; | ||
266 | ek = counter->hw_event.exclude_kernel; | ||
267 | eh = counter->hw_event.exclude_hv; | ||
268 | first = 0; | ||
269 | } else if (counter->hw_event.exclude_user != eu || | ||
270 | counter->hw_event.exclude_kernel != ek || | ||
271 | counter->hw_event.exclude_hv != eh) { | ||
272 | return -EAGAIN; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | if (eu || ek || eh) | ||
277 | for (i = 0; i < n; ++i) | ||
278 | if (cflags[i] & PPMU_LIMITED_PMC_OK) | ||
279 | cflags[i] |= PPMU_LIMITED_PMC_REQD; | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static void power_pmu_read(struct perf_counter *counter) | ||
285 | { | ||
286 | long val, delta, prev; | ||
287 | |||
288 | if (!counter->hw.idx) | ||
289 | return; | ||
290 | /* | ||
291 | * Performance monitor interrupts come even when interrupts | ||
292 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
293 | * Therefore we treat them like NMIs. | ||
294 | */ | ||
295 | do { | ||
296 | prev = atomic64_read(&counter->hw.prev_count); | ||
297 | barrier(); | ||
298 | val = read_pmc(counter->hw.idx); | ||
299 | } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); | ||
300 | |||
301 | /* The counters are only 32 bits wide */ | ||
302 | delta = (val - prev) & 0xfffffffful; | ||
303 | atomic64_add(delta, &counter->count); | ||
304 | atomic64_sub(delta, &counter->hw.period_left); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * On some machines, PMC5 and PMC6 can't be written, don't respect | ||
309 | * the freeze conditions, and don't generate interrupts. This tells | ||
310 | * us if `counter' is using such a PMC. | ||
311 | */ | ||
312 | static int is_limited_pmc(int pmcnum) | ||
313 | { | ||
314 | return (ppmu->flags & PPMU_LIMITED_PMC5_6) | ||
315 | && (pmcnum == 5 || pmcnum == 6); | ||
316 | } | ||
317 | |||
318 | static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, | ||
319 | unsigned long pmc5, unsigned long pmc6) | ||
320 | { | ||
321 | struct perf_counter *counter; | ||
322 | u64 val, prev, delta; | ||
323 | int i; | ||
324 | |||
325 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
326 | counter = cpuhw->limited_counter[i]; | ||
327 | if (!counter->hw.idx) | ||
328 | continue; | ||
329 | val = (counter->hw.idx == 5) ? pmc5 : pmc6; | ||
330 | prev = atomic64_read(&counter->hw.prev_count); | ||
331 | counter->hw.idx = 0; | ||
332 | delta = (val - prev) & 0xfffffffful; | ||
333 | atomic64_add(delta, &counter->count); | ||
334 | } | ||
335 | } | ||
336 | |||
337 | static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, | ||
338 | unsigned long pmc5, unsigned long pmc6) | ||
339 | { | ||
340 | struct perf_counter *counter; | ||
341 | u64 val; | ||
342 | int i; | ||
343 | |||
344 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
345 | counter = cpuhw->limited_counter[i]; | ||
346 | counter->hw.idx = cpuhw->limited_hwidx[i]; | ||
347 | val = (counter->hw.idx == 5) ? pmc5 : pmc6; | ||
348 | atomic64_set(&counter->hw.prev_count, val); | ||
349 | perf_counter_update_userpage(counter); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Since limited counters don't respect the freeze conditions, we | ||
355 | * have to read them immediately after freezing or unfreezing the | ||
356 | * other counters. We try to keep the values from the limited | ||
357 | * counters as consistent as possible by keeping the delay (in | ||
358 | * cycles and instructions) between freezing/unfreezing and reading | ||
359 | * the limited counters as small and consistent as possible. | ||
360 | * Therefore, if any limited counters are in use, we read them | ||
361 | * both, and always in the same order, to minimize variability, | ||
362 | * and do it inside the same asm that writes MMCR0. | ||
363 | */ | ||
364 | static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) | ||
365 | { | ||
366 | unsigned long pmc5, pmc6; | ||
367 | |||
368 | if (!cpuhw->n_limited) { | ||
369 | mtspr(SPRN_MMCR0, mmcr0); | ||
370 | return; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Write MMCR0, then read PMC5 and PMC6 immediately. | ||
375 | */ | ||
376 | asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" | ||
377 | : "=&r" (pmc5), "=&r" (pmc6) | ||
378 | : "r" (mmcr0), "i" (SPRN_MMCR0), | ||
379 | "i" (SPRN_PMC5), "i" (SPRN_PMC6)); | ||
380 | |||
381 | if (mmcr0 & MMCR0_FC) | ||
382 | freeze_limited_counters(cpuhw, pmc5, pmc6); | ||
383 | else | ||
384 | thaw_limited_counters(cpuhw, pmc5, pmc6); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Disable all counters to prevent PMU interrupts and to allow | ||
389 | * counters to be added or removed. | ||
390 | */ | ||
391 | void hw_perf_disable(void) | ||
392 | { | ||
393 | struct cpu_hw_counters *cpuhw; | ||
394 | unsigned long ret; | ||
395 | unsigned long flags; | ||
396 | |||
397 | local_irq_save(flags); | ||
398 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
399 | |||
400 | ret = cpuhw->disabled; | ||
401 | if (!ret) { | ||
402 | cpuhw->disabled = 1; | ||
403 | cpuhw->n_added = 0; | ||
404 | |||
405 | /* | ||
406 | * Check if we ever enabled the PMU on this cpu. | ||
407 | */ | ||
408 | if (!cpuhw->pmcs_enabled) { | ||
409 | if (ppc_md.enable_pmcs) | ||
410 | ppc_md.enable_pmcs(); | ||
411 | cpuhw->pmcs_enabled = 1; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Disable instruction sampling if it was enabled | ||
416 | */ | ||
417 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
418 | mtspr(SPRN_MMCRA, | ||
419 | cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
420 | mb(); | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Set the 'freeze counters' bit. | ||
425 | * The barrier is to make sure the mtspr has been | ||
426 | * executed and the PMU has frozen the counters | ||
427 | * before we return. | ||
428 | */ | ||
429 | write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
430 | mb(); | ||
431 | } | ||
432 | local_irq_restore(flags); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Re-enable all counters if disable == 0. | ||
437 | * If we were previously disabled and counters were added, then | ||
438 | * put the new config on the PMU. | ||
439 | */ | ||
440 | void hw_perf_enable(void) | ||
441 | { | ||
442 | struct perf_counter *counter; | ||
443 | struct cpu_hw_counters *cpuhw; | ||
444 | unsigned long flags; | ||
445 | long i; | ||
446 | unsigned long val; | ||
447 | s64 left; | ||
448 | unsigned int hwc_index[MAX_HWCOUNTERS]; | ||
449 | int n_lim; | ||
450 | int idx; | ||
451 | |||
452 | local_irq_save(flags); | ||
453 | if (!cpuhw->disabled) { | ||
454 | local_irq_restore(flags); | ||
455 | return; | ||
456 | } | ||
457 | |||
458 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
459 | cpuhw->disabled = 0; | ||
460 | |||
461 | /* | ||
462 | * If we didn't change anything, or only removed counters, | ||
463 | * no need to recalculate MMCR* settings and reset the PMCs. | ||
464 | * Just reenable the PMU with the current MMCR* settings | ||
465 | * (possibly updated for removal of counters). | ||
466 | */ | ||
467 | if (!cpuhw->n_added) { | ||
468 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
469 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
470 | if (cpuhw->n_counters == 0) | ||
471 | get_lppaca()->pmcregs_in_use = 0; | ||
472 | goto out_enable; | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * Compute MMCR* values for the new set of counters | ||
477 | */ | ||
478 | if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, | ||
479 | cpuhw->mmcr)) { | ||
480 | /* shouldn't ever get here */ | ||
481 | printk(KERN_ERR "oops compute_mmcr failed\n"); | ||
482 | goto out; | ||
483 | } | ||
484 | |||
485 | /* | ||
486 | * Add in MMCR0 freeze bits corresponding to the | ||
487 | * hw_event.exclude_* bits for the first counter. | ||
488 | * We have already checked that all counters have the | ||
489 | * same values for these bits as the first counter. | ||
490 | */ | ||
491 | counter = cpuhw->counter[0]; | ||
492 | if (counter->hw_event.exclude_user) | ||
493 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
494 | if (counter->hw_event.exclude_kernel) | ||
495 | cpuhw->mmcr[0] |= freeze_counters_kernel; | ||
496 | if (counter->hw_event.exclude_hv) | ||
497 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
498 | |||
499 | /* | ||
500 | * Write the new configuration to MMCR* with the freeze | ||
501 | * bit set and set the hardware counters to their initial values. | ||
502 | * Then unfreeze the counters. | ||
503 | */ | ||
504 | get_lppaca()->pmcregs_in_use = 1; | ||
505 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
506 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
507 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
508 | | MMCR0_FC); | ||
509 | |||
510 | /* | ||
511 | * Read off any pre-existing counters that need to move | ||
512 | * to another PMC. | ||
513 | */ | ||
514 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
515 | counter = cpuhw->counter[i]; | ||
516 | if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { | ||
517 | power_pmu_read(counter); | ||
518 | write_pmc(counter->hw.idx, 0); | ||
519 | counter->hw.idx = 0; | ||
520 | } | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * Initialize the PMCs for all the new and moved counters. | ||
525 | */ | ||
526 | cpuhw->n_limited = n_lim = 0; | ||
527 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
528 | counter = cpuhw->counter[i]; | ||
529 | if (counter->hw.idx) | ||
530 | continue; | ||
531 | idx = hwc_index[i] + 1; | ||
532 | if (is_limited_pmc(idx)) { | ||
533 | cpuhw->limited_counter[n_lim] = counter; | ||
534 | cpuhw->limited_hwidx[n_lim] = idx; | ||
535 | ++n_lim; | ||
536 | continue; | ||
537 | } | ||
538 | val = 0; | ||
539 | if (counter->hw.irq_period) { | ||
540 | left = atomic64_read(&counter->hw.period_left); | ||
541 | if (left < 0x80000000L) | ||
542 | val = 0x80000000L - left; | ||
543 | } | ||
544 | atomic64_set(&counter->hw.prev_count, val); | ||
545 | counter->hw.idx = idx; | ||
546 | write_pmc(idx, val); | ||
547 | perf_counter_update_userpage(counter); | ||
548 | } | ||
549 | cpuhw->n_limited = n_lim; | ||
550 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | ||
551 | |||
552 | out_enable: | ||
553 | mb(); | ||
554 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
555 | |||
556 | /* | ||
557 | * Enable instruction sampling if necessary | ||
558 | */ | ||
559 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
560 | mb(); | ||
561 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); | ||
562 | } | ||
563 | |||
564 | out: | ||
565 | local_irq_restore(flags); | ||
566 | } | ||
567 | |||
568 | static int collect_events(struct perf_counter *group, int max_count, | ||
569 | struct perf_counter *ctrs[], u64 *events, | ||
570 | unsigned int *flags) | ||
571 | { | ||
572 | int n = 0; | ||
573 | struct perf_counter *counter; | ||
574 | |||
575 | if (!is_software_counter(group)) { | ||
576 | if (n >= max_count) | ||
577 | return -1; | ||
578 | ctrs[n] = group; | ||
579 | flags[n] = group->hw.counter_base; | ||
580 | events[n++] = group->hw.config; | ||
581 | } | ||
582 | list_for_each_entry(counter, &group->sibling_list, list_entry) { | ||
583 | if (!is_software_counter(counter) && | ||
584 | counter->state != PERF_COUNTER_STATE_OFF) { | ||
585 | if (n >= max_count) | ||
586 | return -1; | ||
587 | ctrs[n] = counter; | ||
588 | flags[n] = counter->hw.counter_base; | ||
589 | events[n++] = counter->hw.config; | ||
590 | } | ||
591 | } | ||
592 | return n; | ||
593 | } | ||
594 | |||
595 | static void counter_sched_in(struct perf_counter *counter, int cpu) | ||
596 | { | ||
597 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
598 | counter->oncpu = cpu; | ||
599 | counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; | ||
600 | if (is_software_counter(counter)) | ||
601 | counter->pmu->enable(counter); | ||
602 | } | ||
603 | |||
604 | /* | ||
605 | * Called to enable a whole group of counters. | ||
606 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
607 | * Assumes the caller has disabled interrupts and has | ||
608 | * frozen the PMU with hw_perf_save_disable. | ||
609 | */ | ||
610 | int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
611 | struct perf_cpu_context *cpuctx, | ||
612 | struct perf_counter_context *ctx, int cpu) | ||
613 | { | ||
614 | struct cpu_hw_counters *cpuhw; | ||
615 | long i, n, n0; | ||
616 | struct perf_counter *sub; | ||
617 | |||
618 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
619 | n0 = cpuhw->n_counters; | ||
620 | n = collect_events(group_leader, ppmu->n_counter - n0, | ||
621 | &cpuhw->counter[n0], &cpuhw->events[n0], | ||
622 | &cpuhw->flags[n0]); | ||
623 | if (n < 0) | ||
624 | return -EAGAIN; | ||
625 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) | ||
626 | return -EAGAIN; | ||
627 | i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); | ||
628 | if (i < 0) | ||
629 | return -EAGAIN; | ||
630 | cpuhw->n_counters = n0 + n; | ||
631 | cpuhw->n_added += n; | ||
632 | |||
633 | /* | ||
634 | * OK, this group can go on; update counter states etc., | ||
635 | * and enable any software counters | ||
636 | */ | ||
637 | for (i = n0; i < n0 + n; ++i) | ||
638 | cpuhw->counter[i]->hw.config = cpuhw->events[i]; | ||
639 | cpuctx->active_oncpu += n; | ||
640 | n = 1; | ||
641 | counter_sched_in(group_leader, cpu); | ||
642 | list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { | ||
643 | if (sub->state != PERF_COUNTER_STATE_OFF) { | ||
644 | counter_sched_in(sub, cpu); | ||
645 | ++n; | ||
646 | } | ||
647 | } | ||
648 | ctx->nr_active += n; | ||
649 | |||
650 | return 1; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * Add a counter to the PMU. | ||
655 | * If all counters are not already frozen, then we disable and | ||
656 | * re-enable the PMU in order to get hw_perf_enable to do the | ||
657 | * actual work of reconfiguring the PMU. | ||
658 | */ | ||
659 | static int power_pmu_enable(struct perf_counter *counter) | ||
660 | { | ||
661 | struct cpu_hw_counters *cpuhw; | ||
662 | unsigned long flags; | ||
663 | int n0; | ||
664 | int ret = -EAGAIN; | ||
665 | |||
666 | local_irq_save(flags); | ||
667 | perf_disable(); | ||
668 | |||
669 | /* | ||
670 | * Add the counter to the list (if there is room) | ||
671 | * and check whether the total set is still feasible. | ||
672 | */ | ||
673 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
674 | n0 = cpuhw->n_counters; | ||
675 | if (n0 >= ppmu->n_counter) | ||
676 | goto out; | ||
677 | cpuhw->counter[n0] = counter; | ||
678 | cpuhw->events[n0] = counter->hw.config; | ||
679 | cpuhw->flags[n0] = counter->hw.counter_base; | ||
680 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) | ||
681 | goto out; | ||
682 | if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) | ||
683 | goto out; | ||
684 | |||
685 | counter->hw.config = cpuhw->events[n0]; | ||
686 | ++cpuhw->n_counters; | ||
687 | ++cpuhw->n_added; | ||
688 | |||
689 | ret = 0; | ||
690 | out: | ||
691 | perf_enable(); | ||
692 | local_irq_restore(flags); | ||
693 | return ret; | ||
694 | } | ||
695 | |||
696 | /* | ||
697 | * Remove a counter from the PMU. | ||
698 | */ | ||
699 | static void power_pmu_disable(struct perf_counter *counter) | ||
700 | { | ||
701 | struct cpu_hw_counters *cpuhw; | ||
702 | long i; | ||
703 | unsigned long flags; | ||
704 | |||
705 | local_irq_save(flags); | ||
706 | perf_disable(); | ||
707 | |||
708 | power_pmu_read(counter); | ||
709 | |||
710 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
711 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
712 | if (counter == cpuhw->counter[i]) { | ||
713 | while (++i < cpuhw->n_counters) | ||
714 | cpuhw->counter[i-1] = cpuhw->counter[i]; | ||
715 | --cpuhw->n_counters; | ||
716 | ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); | ||
717 | if (counter->hw.idx) { | ||
718 | write_pmc(counter->hw.idx, 0); | ||
719 | counter->hw.idx = 0; | ||
720 | } | ||
721 | perf_counter_update_userpage(counter); | ||
722 | break; | ||
723 | } | ||
724 | } | ||
725 | for (i = 0; i < cpuhw->n_limited; ++i) | ||
726 | if (counter == cpuhw->limited_counter[i]) | ||
727 | break; | ||
728 | if (i < cpuhw->n_limited) { | ||
729 | while (++i < cpuhw->n_limited) { | ||
730 | cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; | ||
731 | cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; | ||
732 | } | ||
733 | --cpuhw->n_limited; | ||
734 | } | ||
735 | if (cpuhw->n_counters == 0) { | ||
736 | /* disable exceptions if no counters are running */ | ||
737 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | ||
738 | } | ||
739 | |||
740 | perf_enable(); | ||
741 | local_irq_restore(flags); | ||
742 | } | ||
743 | |||
744 | struct pmu power_pmu = { | ||
745 | .enable = power_pmu_enable, | ||
746 | .disable = power_pmu_disable, | ||
747 | .read = power_pmu_read, | ||
748 | }; | ||
749 | |||
750 | /* | ||
751 | * Return 1 if we might be able to put counter on a limited PMC, | ||
752 | * or 0 if not. | ||
753 | * A counter can only go on a limited PMC if it counts something | ||
754 | * that a limited PMC can count, doesn't require interrupts, and | ||
755 | * doesn't exclude any processor mode. | ||
756 | */ | ||
757 | static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, | ||
758 | unsigned int flags) | ||
759 | { | ||
760 | int n; | ||
761 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
762 | |||
763 | if (counter->hw_event.exclude_user | ||
764 | || counter->hw_event.exclude_kernel | ||
765 | || counter->hw_event.exclude_hv | ||
766 | || counter->hw_event.irq_period) | ||
767 | return 0; | ||
768 | |||
769 | if (ppmu->limited_pmc_event(ev)) | ||
770 | return 1; | ||
771 | |||
772 | /* | ||
773 | * The requested event isn't on a limited PMC already; | ||
774 | * see if any alternative code goes on a limited PMC. | ||
775 | */ | ||
776 | if (!ppmu->get_alternatives) | ||
777 | return 0; | ||
778 | |||
779 | flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; | ||
780 | n = ppmu->get_alternatives(ev, flags, alt); | ||
781 | |||
782 | return n > 0; | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * Find an alternative event that goes on a normal PMC, if possible, | ||
787 | * and return the event code, or 0 if there is no such alternative. | ||
788 | * (Note: event code 0 is "don't count" on all machines.) | ||
789 | */ | ||
790 | static u64 normal_pmc_alternative(u64 ev, unsigned long flags) | ||
791 | { | ||
792 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
793 | int n; | ||
794 | |||
795 | flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); | ||
796 | n = ppmu->get_alternatives(ev, flags, alt); | ||
797 | if (!n) | ||
798 | return 0; | ||
799 | return alt[0]; | ||
800 | } | ||
801 | |||
802 | /* Number of perf_counters counting hardware events */ | ||
803 | static atomic_t num_counters; | ||
804 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
805 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
806 | |||
807 | /* | ||
808 | * Release the PMU if this is the last perf_counter. | ||
809 | */ | ||
810 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
811 | { | ||
812 | if (!atomic_add_unless(&num_counters, -1, 1)) { | ||
813 | mutex_lock(&pmc_reserve_mutex); | ||
814 | if (atomic_dec_return(&num_counters) == 0) | ||
815 | release_pmc_hardware(); | ||
816 | mutex_unlock(&pmc_reserve_mutex); | ||
817 | } | ||
818 | } | ||
819 | |||
820 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
821 | { | ||
822 | u64 ev; | ||
823 | unsigned long flags; | ||
824 | struct perf_counter *ctrs[MAX_HWCOUNTERS]; | ||
825 | u64 events[MAX_HWCOUNTERS]; | ||
826 | unsigned int cflags[MAX_HWCOUNTERS]; | ||
827 | int n; | ||
828 | int err; | ||
829 | |||
830 | if (!ppmu) | ||
831 | return ERR_PTR(-ENXIO); | ||
832 | if (!perf_event_raw(&counter->hw_event)) { | ||
833 | ev = perf_event_id(&counter->hw_event); | ||
834 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
835 | return ERR_PTR(-EOPNOTSUPP); | ||
836 | ev = ppmu->generic_events[ev]; | ||
837 | } else { | ||
838 | ev = perf_event_config(&counter->hw_event); | ||
839 | } | ||
840 | counter->hw.config_base = ev; | ||
841 | counter->hw.idx = 0; | ||
842 | |||
843 | /* | ||
844 | * If we are not running on a hypervisor, force the | ||
845 | * exclude_hv bit to 0 so that we don't care what | ||
846 | * the user set it to. | ||
847 | */ | ||
848 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
849 | counter->hw_event.exclude_hv = 0; | ||
850 | |||
851 | /* | ||
852 | * If this is a per-task counter, then we can use | ||
853 | * PM_RUN_* events interchangeably with their non RUN_* | ||
854 | * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. | ||
855 | * XXX we should check if the task is an idle task. | ||
856 | */ | ||
857 | flags = 0; | ||
858 | if (counter->ctx->task) | ||
859 | flags |= PPMU_ONLY_COUNT_RUN; | ||
860 | |||
861 | /* | ||
862 | * If this machine has limited counters, check whether this | ||
863 | * event could go on a limited counter. | ||
864 | */ | ||
865 | if (ppmu->flags & PPMU_LIMITED_PMC5_6) { | ||
866 | if (can_go_on_limited_pmc(counter, ev, flags)) { | ||
867 | flags |= PPMU_LIMITED_PMC_OK; | ||
868 | } else if (ppmu->limited_pmc_event(ev)) { | ||
869 | /* | ||
870 | * The requested event is on a limited PMC, | ||
871 | * but we can't use a limited PMC; see if any | ||
872 | * alternative goes on a normal PMC. | ||
873 | */ | ||
874 | ev = normal_pmc_alternative(ev, flags); | ||
875 | if (!ev) | ||
876 | return ERR_PTR(-EINVAL); | ||
877 | } | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * If this is in a group, check if it can go on with all the | ||
882 | * other hardware counters in the group. We assume the counter | ||
883 | * hasn't been linked into its leader's sibling list at this point. | ||
884 | */ | ||
885 | n = 0; | ||
886 | if (counter->group_leader != counter) { | ||
887 | n = collect_events(counter->group_leader, ppmu->n_counter - 1, | ||
888 | ctrs, events, cflags); | ||
889 | if (n < 0) | ||
890 | return ERR_PTR(-EINVAL); | ||
891 | } | ||
892 | events[n] = ev; | ||
893 | ctrs[n] = counter; | ||
894 | cflags[n] = flags; | ||
895 | if (check_excludes(ctrs, cflags, n, 1)) | ||
896 | return ERR_PTR(-EINVAL); | ||
897 | if (power_check_constraints(events, cflags, n + 1)) | ||
898 | return ERR_PTR(-EINVAL); | ||
899 | |||
900 | counter->hw.config = events[n]; | ||
901 | counter->hw.counter_base = cflags[n]; | ||
902 | atomic64_set(&counter->hw.period_left, counter->hw.irq_period); | ||
903 | |||
904 | /* | ||
905 | * See if we need to reserve the PMU. | ||
906 | * If no counters are currently in use, then we have to take a | ||
907 | * mutex to ensure that we don't race with another task doing | ||
908 | * reserve_pmc_hardware or release_pmc_hardware. | ||
909 | */ | ||
910 | err = 0; | ||
911 | if (!atomic_inc_not_zero(&num_counters)) { | ||
912 | mutex_lock(&pmc_reserve_mutex); | ||
913 | if (atomic_read(&num_counters) == 0 && | ||
914 | reserve_pmc_hardware(perf_counter_interrupt)) | ||
915 | err = -EBUSY; | ||
916 | else | ||
917 | atomic_inc(&num_counters); | ||
918 | mutex_unlock(&pmc_reserve_mutex); | ||
919 | } | ||
920 | counter->destroy = hw_perf_counter_destroy; | ||
921 | |||
922 | if (err) | ||
923 | return ERR_PTR(err); | ||
924 | return &power_pmu; | ||
925 | } | ||
926 | |||
927 | /* | ||
928 | * A counter has overflowed; update its count and record | ||
929 | * things if requested. Note that interrupts are hard-disabled | ||
930 | * here so there is no possibility of being interrupted. | ||
931 | */ | ||
932 | static void record_and_restart(struct perf_counter *counter, long val, | ||
933 | struct pt_regs *regs, int nmi) | ||
934 | { | ||
935 | u64 period = counter->hw.irq_period; | ||
936 | s64 prev, delta, left; | ||
937 | int record = 0; | ||
938 | u64 addr, mmcra, sdsync; | ||
939 | |||
940 | /* we don't have to worry about interrupts here */ | ||
941 | prev = atomic64_read(&counter->hw.prev_count); | ||
942 | delta = (val - prev) & 0xfffffffful; | ||
943 | atomic64_add(delta, &counter->count); | ||
944 | |||
945 | /* | ||
946 | * See if the total period for this counter has expired, | ||
947 | * and update for the next period. | ||
948 | */ | ||
949 | val = 0; | ||
950 | left = atomic64_read(&counter->hw.period_left) - delta; | ||
951 | if (period) { | ||
952 | if (left <= 0) { | ||
953 | left += period; | ||
954 | if (left <= 0) | ||
955 | left = period; | ||
956 | record = 1; | ||
957 | } | ||
958 | if (left < 0x80000000L) | ||
959 | val = 0x80000000L - left; | ||
960 | } | ||
961 | write_pmc(counter->hw.idx, val); | ||
962 | atomic64_set(&counter->hw.prev_count, val); | ||
963 | atomic64_set(&counter->hw.period_left, left); | ||
964 | perf_counter_update_userpage(counter); | ||
965 | |||
966 | /* | ||
967 | * Finally record data if requested. | ||
968 | */ | ||
969 | if (record) { | ||
970 | addr = 0; | ||
971 | if (counter->hw_event.record_type & PERF_RECORD_ADDR) { | ||
972 | /* | ||
973 | * The user wants a data address recorded. | ||
974 | * If we're not doing instruction sampling, | ||
975 | * give them the SDAR (sampled data address). | ||
976 | * If we are doing instruction sampling, then only | ||
977 | * give them the SDAR if it corresponds to the | ||
978 | * instruction pointed to by SIAR; this is indicated | ||
979 | * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. | ||
980 | */ | ||
981 | mmcra = regs->dsisr; | ||
982 | sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
983 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
984 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
985 | addr = mfspr(SPRN_SDAR); | ||
986 | } | ||
987 | perf_counter_overflow(counter, nmi, regs, addr); | ||
988 | } | ||
989 | } | ||
990 | |||
991 | /* | ||
992 | * Called from generic code to get the misc flags (i.e. processor mode) | ||
993 | * for an event. | ||
994 | */ | ||
995 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
996 | { | ||
997 | unsigned long mmcra; | ||
998 | |||
999 | if (TRAP(regs) != 0xf00) { | ||
1000 | /* not a PMU interrupt */ | ||
1001 | return user_mode(regs) ? PERF_EVENT_MISC_USER : | ||
1002 | PERF_EVENT_MISC_KERNEL; | ||
1003 | } | ||
1004 | |||
1005 | mmcra = regs->dsisr; | ||
1006 | if (ppmu->flags & PPMU_ALT_SIPR) { | ||
1007 | if (mmcra & POWER6_MMCRA_SIHV) | ||
1008 | return PERF_EVENT_MISC_HYPERVISOR; | ||
1009 | return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
1010 | PERF_EVENT_MISC_KERNEL; | ||
1011 | } | ||
1012 | if (mmcra & MMCRA_SIHV) | ||
1013 | return PERF_EVENT_MISC_HYPERVISOR; | ||
1014 | return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
1015 | PERF_EVENT_MISC_KERNEL; | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
1019 | * Called from generic code to get the instruction pointer | ||
1020 | * for an event. | ||
1021 | */ | ||
1022 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1023 | { | ||
1024 | unsigned long mmcra; | ||
1025 | unsigned long ip; | ||
1026 | unsigned long slot; | ||
1027 | |||
1028 | if (TRAP(regs) != 0xf00) | ||
1029 | return regs->nip; /* not a PMU interrupt */ | ||
1030 | |||
1031 | ip = mfspr(SPRN_SIAR); | ||
1032 | mmcra = regs->dsisr; | ||
1033 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
1034 | slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
1035 | if (slot > 1) | ||
1036 | ip += 4 * (slot - 1); | ||
1037 | } | ||
1038 | return ip; | ||
1039 | } | ||
1040 | |||
1041 | /* | ||
1042 | * Performance monitor interrupt stuff | ||
1043 | */ | ||
1044 | static void perf_counter_interrupt(struct pt_regs *regs) | ||
1045 | { | ||
1046 | int i; | ||
1047 | struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
1048 | struct perf_counter *counter; | ||
1049 | long val; | ||
1050 | int found = 0; | ||
1051 | int nmi; | ||
1052 | |||
1053 | if (cpuhw->n_limited) | ||
1054 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), | ||
1055 | mfspr(SPRN_PMC6)); | ||
1056 | |||
1057 | /* | ||
1058 | * Overload regs->dsisr to store MMCRA so we only need to read it once. | ||
1059 | */ | ||
1060 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
1061 | |||
1062 | /* | ||
1063 | * If interrupts were soft-disabled when this PMU interrupt | ||
1064 | * occurred, treat it as an NMI. | ||
1065 | */ | ||
1066 | nmi = !regs->softe; | ||
1067 | if (nmi) | ||
1068 | nmi_enter(); | ||
1069 | else | ||
1070 | irq_enter(); | ||
1071 | |||
1072 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
1073 | counter = cpuhw->counter[i]; | ||
1074 | if (is_limited_pmc(counter->hw.idx)) | ||
1075 | continue; | ||
1076 | val = read_pmc(counter->hw.idx); | ||
1077 | if ((int)val < 0) { | ||
1078 | /* counter has overflowed */ | ||
1079 | found = 1; | ||
1080 | record_and_restart(counter, val, regs, nmi); | ||
1081 | } | ||
1082 | } | ||
1083 | |||
1084 | /* | ||
1085 | * In case we didn't find and reset the counter that caused | ||
1086 | * the interrupt, scan all counters and reset any that are | ||
1087 | * negative, to avoid getting continual interrupts. | ||
1088 | * Any that we processed in the previous loop will not be negative. | ||
1089 | */ | ||
1090 | if (!found) { | ||
1091 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
1092 | if (is_limited_pmc(i + 1)) | ||
1093 | continue; | ||
1094 | val = read_pmc(i + 1); | ||
1095 | if ((int)val < 0) | ||
1096 | write_pmc(i + 1, 0); | ||
1097 | } | ||
1098 | } | ||
1099 | |||
1100 | /* | ||
1101 | * Reset MMCR0 to its normal value. This will set PMXE and | ||
1102 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) | ||
1103 | * and thus allow interrupts to occur again. | ||
1104 | * XXX might want to use MSR.PM to keep the counters frozen until | ||
1105 | * we get back out of this interrupt. | ||
1106 | */ | ||
1107 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
1108 | |||
1109 | if (nmi) | ||
1110 | nmi_exit(); | ||
1111 | else | ||
1112 | irq_exit(); | ||
1113 | } | ||
1114 | |||
1115 | void hw_perf_counter_setup(int cpu) | ||
1116 | { | ||
1117 | struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); | ||
1118 | |||
1119 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
1120 | cpuhw->mmcr[0] = MMCR0_FC; | ||
1121 | } | ||
1122 | |||
1123 | extern struct power_pmu power4_pmu; | ||
1124 | extern struct power_pmu ppc970_pmu; | ||
1125 | extern struct power_pmu power5_pmu; | ||
1126 | extern struct power_pmu power5p_pmu; | ||
1127 | extern struct power_pmu power6_pmu; | ||
1128 | |||
1129 | static int init_perf_counters(void) | ||
1130 | { | ||
1131 | unsigned long pvr; | ||
1132 | |||
1133 | /* XXX should get this from cputable */ | ||
1134 | pvr = mfspr(SPRN_PVR); | ||
1135 | switch (PVR_VER(pvr)) { | ||
1136 | case PV_POWER4: | ||
1137 | case PV_POWER4p: | ||
1138 | ppmu = &power4_pmu; | ||
1139 | break; | ||
1140 | case PV_970: | ||
1141 | case PV_970FX: | ||
1142 | case PV_970MP: | ||
1143 | ppmu = &ppc970_pmu; | ||
1144 | break; | ||
1145 | case PV_POWER5: | ||
1146 | ppmu = &power5_pmu; | ||
1147 | break; | ||
1148 | case PV_POWER5p: | ||
1149 | ppmu = &power5p_pmu; | ||
1150 | break; | ||
1151 | case 0x3e: | ||
1152 | ppmu = &power6_pmu; | ||
1153 | break; | ||
1154 | } | ||
1155 | |||
1156 | /* | ||
1157 | * Use FCHV to ignore kernel events if MSR.HV is set. | ||
1158 | */ | ||
1159 | if (mfmsr() & MSR_HV) | ||
1160 | freeze_counters_kernel = MMCR0_FCHV; | ||
1161 | |||
1162 | return 0; | ||
1163 | } | ||
1164 | |||
1165 | arch_initcall(init_perf_counters); | ||
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c new file mode 100644 index 000000000000..836fa118eb1e --- /dev/null +++ b/arch/powerpc/kernel/power4-pmu.c | |||
@@ -0,0 +1,557 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER4 | ||
17 | */ | ||
18 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
21 | #define PM_UNIT_MSK 0xf | ||
22 | #define PM_LOWER_SH 6 | ||
23 | #define PM_LOWER_MSK 1 | ||
24 | #define PM_LOWER_MSKS 0x40 | ||
25 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 3 | ||
27 | #define PM_PMCSEL_MSK 7 | ||
28 | |||
29 | /* | ||
30 | * Unit code values | ||
31 | */ | ||
32 | #define PM_FPU 1 | ||
33 | #define PM_ISU1 2 | ||
34 | #define PM_IFU 3 | ||
35 | #define PM_IDU0 4 | ||
36 | #define PM_ISU1_ALT 6 | ||
37 | #define PM_ISU2 7 | ||
38 | #define PM_IFU_ALT 8 | ||
39 | #define PM_LSU0 9 | ||
40 | #define PM_LSU1 0xc | ||
41 | #define PM_GPS 0xf | ||
42 | |||
43 | /* | ||
44 | * Bits in MMCR0 for POWER4 | ||
45 | */ | ||
46 | #define MMCR0_PMC1SEL_SH 8 | ||
47 | #define MMCR0_PMC2SEL_SH 1 | ||
48 | #define MMCR_PMCSEL_MSK 0x1f | ||
49 | |||
50 | /* | ||
51 | * Bits in MMCR1 for POWER4 | ||
52 | */ | ||
53 | #define MMCR1_TTM0SEL_SH 62 | ||
54 | #define MMCR1_TTC0SEL_SH 61 | ||
55 | #define MMCR1_TTM1SEL_SH 59 | ||
56 | #define MMCR1_TTC1SEL_SH 58 | ||
57 | #define MMCR1_TTM2SEL_SH 56 | ||
58 | #define MMCR1_TTC2SEL_SH 55 | ||
59 | #define MMCR1_TTM3SEL_SH 53 | ||
60 | #define MMCR1_TTC3SEL_SH 52 | ||
61 | #define MMCR1_TTMSEL_MSK 3 | ||
62 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
63 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
64 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
65 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
66 | #define MMCR1_DEBUG0SEL_SH 43 | ||
67 | #define MMCR1_DEBUG1SEL_SH 42 | ||
68 | #define MMCR1_DEBUG2SEL_SH 41 | ||
69 | #define MMCR1_DEBUG3SEL_SH 40 | ||
70 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
71 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
72 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
73 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
74 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
75 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
76 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
77 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
78 | #define MMCR1_PMC3SEL_SH 27 | ||
79 | #define MMCR1_PMC4SEL_SH 22 | ||
80 | #define MMCR1_PMC5SEL_SH 17 | ||
81 | #define MMCR1_PMC6SEL_SH 12 | ||
82 | #define MMCR1_PMC7SEL_SH 7 | ||
83 | #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ | ||
84 | |||
85 | static short mmcr1_adder_bits[8] = { | ||
86 | MMCR1_PMC1_ADDER_SEL_SH, | ||
87 | MMCR1_PMC2_ADDER_SEL_SH, | ||
88 | MMCR1_PMC3_ADDER_SEL_SH, | ||
89 | MMCR1_PMC4_ADDER_SEL_SH, | ||
90 | MMCR1_PMC5_ADDER_SEL_SH, | ||
91 | MMCR1_PMC6_ADDER_SEL_SH, | ||
92 | MMCR1_PMC7_ADDER_SEL_SH, | ||
93 | MMCR1_PMC8_ADDER_SEL_SH | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Bits in MMCRA | ||
98 | */ | ||
99 | #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ | ||
100 | |||
101 | /* | ||
102 | * Layout of constraint bits: | ||
103 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
104 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
105 | * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> | ||
106 | * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
107 | * \SMPL ||\TTC3SEL | ||
108 | * |\TTC_IFU_SEL | ||
109 | * \TTM2SEL0 | ||
110 | * | ||
111 | * SMPL - SAMPLE_ENABLE constraint | ||
112 | * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 | ||
113 | * | ||
114 | * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 | ||
115 | * 55: UC1 error 0x0080_0000_0000_0000 | ||
116 | * 54: FPU events needed 0x0040_0000_0000_0000 | ||
117 | * 53: ISU1 events needed 0x0020_0000_0000_0000 | ||
118 | * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 | ||
119 | * | ||
120 | * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 | ||
121 | * 51: UC2 error 0x0008_0000_0000_0000 | ||
122 | * 50: FPU events needed 0x0004_0000_0000_0000 | ||
123 | * 49: IFU events needed 0x0002_0000_0000_0000 | ||
124 | * 48: LSU0 events needed 0x0001_0000_0000_0000 | ||
125 | * | ||
126 | * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 | ||
127 | * 47: UC3 error 0x8000_0000_0000 | ||
128 | * 46: LSU0 events needed 0x4000_0000_0000 | ||
129 | * 45: IFU events needed 0x2000_0000_0000 | ||
130 | * 44: IDU0|ISU2 events needed 0x1000_0000_0000 | ||
131 | * 43: ISU1 events needed 0x0800_0000_0000 | ||
132 | * | ||
133 | * TTM2SEL0 | ||
134 | * 42: 0 = IDU0 events needed | ||
135 | * 1 = ISU2 events needed 0x0400_0000_0000 | ||
136 | * | ||
137 | * TTC_IFU_SEL | ||
138 | * 41: 0 = IFU.U events needed | ||
139 | * 1 = IFU.L events needed 0x0200_0000_0000 | ||
140 | * | ||
141 | * TTC3SEL | ||
142 | * 40: 0 = LSU1.U events needed | ||
143 | * 1 = LSU1.L events needed 0x0100_0000_0000 | ||
144 | * | ||
145 | * PS1 | ||
146 | * 39: PS1 error 0x0080_0000_0000 | ||
147 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
148 | * | ||
149 | * PS2 | ||
150 | * 35: PS2 error 0x0008_0000_0000 | ||
151 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
152 | * | ||
153 | * B0 | ||
154 | * 28-31: Byte 0 event source 0xf000_0000 | ||
155 | * 1 = FPU | ||
156 | * 2 = ISU1 | ||
157 | * 3 = IFU | ||
158 | * 4 = IDU0 | ||
159 | * 7 = ISU2 | ||
160 | * 9 = LSU0 | ||
161 | * c = LSU1 | ||
162 | * f = GPS | ||
163 | * | ||
164 | * B1, B2, B3 | ||
165 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
166 | * | ||
167 | * P8 | ||
168 | * 15: P8 error 0x8000 | ||
169 | * 14-15: Count of events needing PMC8 | ||
170 | * | ||
171 | * P1..P7 | ||
172 | * 0-13: Count of events needing PMC1..PMC7 | ||
173 | * | ||
174 | * Note: this doesn't allow events using IFU.U to be combined with events | ||
175 | * using IFU.L, though that is feasible (using TTM0 and TTM2). However | ||
176 | * there are no listed events for IFU.L (they are debug events not | ||
177 | * verified for performance monitoring) so this shouldn't cause a | ||
178 | * problem. | ||
179 | */ | ||
180 | |||
181 | static struct unitinfo { | ||
182 | u64 value, mask; | ||
183 | int unit; | ||
184 | int lowerbit; | ||
185 | } p4_unitinfo[16] = { | ||
186 | [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, | ||
187 | [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | ||
188 | [PM_ISU1_ALT] = | ||
189 | { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | ||
190 | [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | ||
191 | [PM_IFU_ALT] = | ||
192 | { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | ||
193 | [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, | ||
194 | [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, | ||
195 | [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, | ||
196 | [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, | ||
197 | [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } | ||
198 | }; | ||
199 | |||
200 | static unsigned char direct_marked_event[8] = { | ||
201 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
202 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
203 | (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ | ||
204 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
205 | (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ | ||
206 | (1<<3) | (1<<4) | (1<<5), | ||
207 | /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
208 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
209 | (1<<4), /* PMC8: PM_MRK_LSU_FIN */ | ||
210 | }; | ||
211 | |||
212 | /* | ||
213 | * Returns 1 if event counts things relating to marked instructions | ||
214 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
215 | */ | ||
216 | static int p4_marked_instr_event(u64 event) | ||
217 | { | ||
218 | int pmc, psel, unit, byte, bit; | ||
219 | unsigned int mask; | ||
220 | |||
221 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
222 | psel = event & PM_PMCSEL_MSK; | ||
223 | if (pmc) { | ||
224 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
225 | return 1; | ||
226 | if (psel == 0) /* add events */ | ||
227 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
228 | else if (psel == 6) /* decode events */ | ||
229 | bit = 4; | ||
230 | else | ||
231 | return 0; | ||
232 | } else | ||
233 | bit = psel; | ||
234 | |||
235 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
236 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
237 | mask = 0; | ||
238 | switch (unit) { | ||
239 | case PM_LSU1: | ||
240 | if (event & PM_LOWER_MSKS) | ||
241 | mask = 1 << 28; /* byte 7 bit 4 */ | ||
242 | else | ||
243 | mask = 6 << 24; /* byte 3 bits 1 and 2 */ | ||
244 | break; | ||
245 | case PM_LSU0: | ||
246 | /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ | ||
247 | mask = 0x083dff00; | ||
248 | } | ||
249 | return (mask >> (byte * 8 + bit)) & 1; | ||
250 | } | ||
251 | |||
252 | static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
253 | { | ||
254 | int pmc, byte, unit, lower, sh; | ||
255 | u64 mask = 0, value = 0; | ||
256 | int grp = -1; | ||
257 | |||
258 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
259 | if (pmc) { | ||
260 | if (pmc > 8) | ||
261 | return -1; | ||
262 | sh = (pmc - 1) * 2; | ||
263 | mask |= 2 << sh; | ||
264 | value |= 1 << sh; | ||
265 | grp = ((pmc - 1) >> 1) & 1; | ||
266 | } | ||
267 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
268 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
269 | if (unit) { | ||
270 | lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
271 | |||
272 | /* | ||
273 | * Bus events on bytes 0 and 2 can be counted | ||
274 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
275 | */ | ||
276 | if (!pmc) | ||
277 | grp = byte & 1; | ||
278 | |||
279 | if (!p4_unitinfo[unit].unit) | ||
280 | return -1; | ||
281 | mask |= p4_unitinfo[unit].mask; | ||
282 | value |= p4_unitinfo[unit].value; | ||
283 | sh = p4_unitinfo[unit].lowerbit; | ||
284 | if (sh > 1) | ||
285 | value |= (u64)lower << sh; | ||
286 | else if (lower != sh) | ||
287 | return -1; | ||
288 | unit = p4_unitinfo[unit].unit; | ||
289 | |||
290 | /* Set byte lane select field */ | ||
291 | mask |= 0xfULL << (28 - 4 * byte); | ||
292 | value |= (u64)unit << (28 - 4 * byte); | ||
293 | } | ||
294 | if (grp == 0) { | ||
295 | /* increment PMC1/2/5/6 field */ | ||
296 | mask |= 0x8000000000ull; | ||
297 | value |= 0x1000000000ull; | ||
298 | } else { | ||
299 | /* increment PMC3/4/7/8 field */ | ||
300 | mask |= 0x800000000ull; | ||
301 | value |= 0x100000000ull; | ||
302 | } | ||
303 | |||
304 | /* Marked instruction events need sample_enable set */ | ||
305 | if (p4_marked_instr_event(event)) { | ||
306 | mask |= 1ull << 56; | ||
307 | value |= 1ull << 56; | ||
308 | } | ||
309 | |||
310 | /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ | ||
311 | if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) | ||
312 | mask |= 1ull << 56; | ||
313 | |||
314 | *maskp = mask; | ||
315 | *valp = value; | ||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static unsigned int ppc_inst_cmpl[] = { | ||
320 | 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 | ||
321 | }; | ||
322 | |||
323 | static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
324 | { | ||
325 | int i, j, na; | ||
326 | |||
327 | alt[0] = event; | ||
328 | na = 1; | ||
329 | |||
330 | /* 2 possibilities for PM_GRP_DISP_REJECT */ | ||
331 | if (event == 0x8003 || event == 0x0224) { | ||
332 | alt[1] = event ^ (0x8003 ^ 0x0224); | ||
333 | return 2; | ||
334 | } | ||
335 | |||
336 | /* 2 possibilities for PM_ST_MISS_L1 */ | ||
337 | if (event == 0x0c13 || event == 0x0c23) { | ||
338 | alt[1] = event ^ (0x0c13 ^ 0x0c23); | ||
339 | return 2; | ||
340 | } | ||
341 | |||
342 | /* several possibilities for PM_INST_CMPL */ | ||
343 | for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { | ||
344 | if (event == ppc_inst_cmpl[i]) { | ||
345 | for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) | ||
346 | if (j != i) | ||
347 | alt[na++] = ppc_inst_cmpl[j]; | ||
348 | break; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | return na; | ||
353 | } | ||
354 | |||
355 | static int p4_compute_mmcr(u64 event[], int n_ev, | ||
356 | unsigned int hwc[], u64 mmcr[]) | ||
357 | { | ||
358 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
359 | unsigned int pmc, unit, byte, psel, lower; | ||
360 | unsigned int ttm, grp; | ||
361 | unsigned int pmc_inuse = 0; | ||
362 | unsigned int pmc_grp_use[2]; | ||
363 | unsigned char busbyte[4]; | ||
364 | unsigned char unituse[16]; | ||
365 | unsigned int unitlower = 0; | ||
366 | int i; | ||
367 | |||
368 | if (n_ev > 8) | ||
369 | return -1; | ||
370 | |||
371 | /* First pass to count resource use */ | ||
372 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
373 | memset(busbyte, 0, sizeof(busbyte)); | ||
374 | memset(unituse, 0, sizeof(unituse)); | ||
375 | for (i = 0; i < n_ev; ++i) { | ||
376 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
377 | if (pmc) { | ||
378 | if (pmc_inuse & (1 << (pmc - 1))) | ||
379 | return -1; | ||
380 | pmc_inuse |= 1 << (pmc - 1); | ||
381 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
382 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
383 | } | ||
384 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
385 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
386 | lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
387 | if (unit) { | ||
388 | if (!pmc) | ||
389 | ++pmc_grp_use[byte & 1]; | ||
390 | if (unit == 6 || unit == 8) | ||
391 | /* map alt ISU1/IFU codes: 6->2, 8->3 */ | ||
392 | unit = (unit >> 1) - 1; | ||
393 | if (busbyte[byte] && busbyte[byte] != unit) | ||
394 | return -1; | ||
395 | busbyte[byte] = unit; | ||
396 | lower <<= unit; | ||
397 | if (unituse[unit] && lower != (unitlower & lower)) | ||
398 | return -1; | ||
399 | unituse[unit] = 1; | ||
400 | unitlower |= lower; | ||
401 | } | ||
402 | } | ||
403 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
404 | return -1; | ||
405 | |||
406 | /* | ||
407 | * Assign resources and set multiplexer selects. | ||
408 | * | ||
409 | * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. | ||
410 | * Each TTMx can only select one unit, but since | ||
411 | * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, | ||
412 | * we have some choices. | ||
413 | */ | ||
414 | if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { | ||
415 | unituse[6] = 1; /* Move 2 to 6 */ | ||
416 | unituse[2] = 0; | ||
417 | } | ||
418 | if (unituse[3] & (unituse[1] | unituse[2])) { | ||
419 | unituse[8] = 1; /* Move 3 to 8 */ | ||
420 | unituse[3] = 0; | ||
421 | unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); | ||
422 | } | ||
423 | /* Check only one unit per TTMx */ | ||
424 | if (unituse[1] + unituse[2] + unituse[3] > 1 || | ||
425 | unituse[4] + unituse[6] + unituse[7] > 1 || | ||
426 | unituse[8] + unituse[9] > 1 || | ||
427 | (unituse[5] | unituse[10] | unituse[11] | | ||
428 | unituse[13] | unituse[14])) | ||
429 | return -1; | ||
430 | |||
431 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | ||
432 | mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; | ||
433 | mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; | ||
434 | mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; | ||
435 | |||
436 | /* Set TTCxSEL fields. */ | ||
437 | if (unitlower & 0xe) | ||
438 | mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; | ||
439 | if (unitlower & 0xf0) | ||
440 | mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; | ||
441 | if (unitlower & 0xf00) | ||
442 | mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; | ||
443 | if (unitlower & 0x7000) | ||
444 | mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; | ||
445 | |||
446 | /* Set byte lane select fields. */ | ||
447 | for (byte = 0; byte < 4; ++byte) { | ||
448 | unit = busbyte[byte]; | ||
449 | if (!unit) | ||
450 | continue; | ||
451 | if (unit == 0xf) { | ||
452 | /* special case for GPS */ | ||
453 | mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); | ||
454 | } else { | ||
455 | if (!unituse[unit]) | ||
456 | ttm = unit - 1; /* 2->1, 3->2 */ | ||
457 | else | ||
458 | ttm = unit >> 2; | ||
459 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); | ||
460 | } | ||
461 | } | ||
462 | |||
463 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
464 | for (i = 0; i < n_ev; ++i) { | ||
465 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
466 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
467 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
468 | psel = event[i] & PM_PMCSEL_MSK; | ||
469 | if (!pmc) { | ||
470 | /* Bus event or 00xxx direct event (off or cycles) */ | ||
471 | if (unit) | ||
472 | psel |= 0x10 | ((byte & 2) << 2); | ||
473 | for (pmc = 0; pmc < 8; ++pmc) { | ||
474 | if (pmc_inuse & (1 << pmc)) | ||
475 | continue; | ||
476 | grp = (pmc >> 1) & 1; | ||
477 | if (unit) { | ||
478 | if (grp == (byte & 1)) | ||
479 | break; | ||
480 | } else if (pmc_grp_use[grp] < 4) { | ||
481 | ++pmc_grp_use[grp]; | ||
482 | break; | ||
483 | } | ||
484 | } | ||
485 | pmc_inuse |= 1 << pmc; | ||
486 | } else { | ||
487 | /* Direct event */ | ||
488 | --pmc; | ||
489 | if (psel == 0 && (byte & 2)) | ||
490 | /* add events on higher-numbered bus */ | ||
491 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
492 | else if (psel == 6 && byte == 3) | ||
493 | /* seem to need to set sample_enable here */ | ||
494 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
495 | psel |= 8; | ||
496 | } | ||
497 | if (pmc <= 1) | ||
498 | mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
499 | else | ||
500 | mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
501 | if (pmc == 7) /* PMC8 */ | ||
502 | mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; | ||
503 | hwc[i] = pmc; | ||
504 | if (p4_marked_instr_event(event[i])) | ||
505 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
506 | } | ||
507 | |||
508 | if (pmc_inuse & 1) | ||
509 | mmcr0 |= MMCR0_PMC1CE; | ||
510 | if (pmc_inuse & 0xfe) | ||
511 | mmcr0 |= MMCR0_PMCjCE; | ||
512 | |||
513 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
514 | |||
515 | /* Return MMCRx values */ | ||
516 | mmcr[0] = mmcr0; | ||
517 | mmcr[1] = mmcr1; | ||
518 | mmcr[2] = mmcra; | ||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
523 | { | ||
524 | /* | ||
525 | * Setting the PMCxSEL field to 0 disables PMC x. | ||
526 | * (Note that pmc is 0-based here, not 1-based.) | ||
527 | */ | ||
528 | if (pmc <= 1) { | ||
529 | mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); | ||
530 | } else { | ||
531 | mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); | ||
532 | if (pmc == 7) | ||
533 | mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); | ||
534 | } | ||
535 | } | ||
536 | |||
537 | static int p4_generic_events[] = { | ||
538 | [PERF_COUNT_CPU_CYCLES] = 7, | ||
539 | [PERF_COUNT_INSTRUCTIONS] = 0x1001, | ||
540 | [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ | ||
541 | [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ | ||
542 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ | ||
543 | [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ | ||
544 | }; | ||
545 | |||
546 | struct power_pmu power4_pmu = { | ||
547 | .n_counter = 8, | ||
548 | .max_alternatives = 5, | ||
549 | .add_fields = 0x0000001100005555ull, | ||
550 | .test_adder = 0x0011083300000000ull, | ||
551 | .compute_mmcr = p4_compute_mmcr, | ||
552 | .get_constraint = p4_get_constraint, | ||
553 | .get_alternatives = p4_get_alternatives, | ||
554 | .disable_pmc = p4_disable_pmc, | ||
555 | .n_generic = ARRAY_SIZE(p4_generic_events), | ||
556 | .generic_events = p4_generic_events, | ||
557 | }; | ||
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c new file mode 100644 index 000000000000..c6cdfc165d6e --- /dev/null +++ b/arch/powerpc/kernel/power5+-pmu.c | |||
@@ -0,0 +1,630 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5+/++ (not POWER5) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
24 | #define PM_BYTE_MSK 7 | ||
25 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
26 | #define PM_GRS_MSK 7 | ||
27 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* Values in PM_UNIT field */ | ||
31 | #define PM_FPU 0 | ||
32 | #define PM_ISU0 1 | ||
33 | #define PM_IFU 2 | ||
34 | #define PM_ISU1 3 | ||
35 | #define PM_IDU 4 | ||
36 | #define PM_ISU0_ALT 6 | ||
37 | #define PM_GRS 7 | ||
38 | #define PM_LSU0 8 | ||
39 | #define PM_LSU1 0xc | ||
40 | #define PM_LASTUNIT 0xc | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR1 for POWER5+ | ||
44 | */ | ||
45 | #define MMCR1_TTM0SEL_SH 62 | ||
46 | #define MMCR1_TTM1SEL_SH 60 | ||
47 | #define MMCR1_TTM2SEL_SH 58 | ||
48 | #define MMCR1_TTM3SEL_SH 56 | ||
49 | #define MMCR1_TTMSEL_MSK 3 | ||
50 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
51 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
52 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
53 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
54 | #define MMCR1_GRS_L2SEL_SH 46 | ||
55 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
56 | #define MMCR1_GRS_L3SEL_SH 44 | ||
57 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
58 | #define MMCR1_GRS_MCSEL_SH 41 | ||
59 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
60 | #define MMCR1_GRS_FABSEL_SH 39 | ||
61 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
62 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
63 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
64 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
65 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
66 | #define MMCR1_PMC1SEL_SH 25 | ||
67 | #define MMCR1_PMC2SEL_SH 17 | ||
68 | #define MMCR1_PMC3SEL_SH 9 | ||
69 | #define MMCR1_PMC4SEL_SH 1 | ||
70 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
71 | #define MMCR1_PMCSEL_MSK 0x7f | ||
72 | |||
73 | /* | ||
74 | * Bits in MMCRA | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Layout of constraint bits: | ||
79 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
80 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
81 | * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> | ||
82 | * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 | ||
83 | * | ||
84 | * NC - number of counters | ||
85 | * 51: NC error 0x0008_0000_0000_0000 | ||
86 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
87 | * | ||
88 | * G0..G3 - GRS mux constraints | ||
89 | * 46-47: GRS_L2SEL value | ||
90 | * 44-45: GRS_L3SEL value | ||
91 | * 41-44: GRS_MCSEL value | ||
92 | * 39-40: GRS_FABSEL value | ||
93 | * Note that these match up with their bit positions in MMCR1 | ||
94 | * | ||
95 | * T0 - TTM0 constraint | ||
96 | * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 | ||
97 | * | ||
98 | * T1 - TTM1 constraint | ||
99 | * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 | ||
100 | * | ||
101 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
102 | * 33: UC3 error 0x02_0000_0000 | ||
103 | * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 | ||
104 | * 31: ISU0 events needed 0x01_8000_0000 | ||
105 | * 30: IDU|GRS events needed 0x00_4000_0000 | ||
106 | * | ||
107 | * B0 | ||
108 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
109 | * Encoding as for the event code | ||
110 | * | ||
111 | * B1, B2, B3 | ||
112 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
113 | * | ||
114 | * P6 | ||
115 | * 11: P6 error 0x800 | ||
116 | * 10-11: Count of events needing PMC6 | ||
117 | * | ||
118 | * P1..P5 | ||
119 | * 0-9: Count of events needing PMC1..PMC5 | ||
120 | */ | ||
121 | |||
122 | static const int grsel_shift[8] = { | ||
123 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
124 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
125 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
126 | }; | ||
127 | |||
128 | /* Masks and values for using events from the various units */ | ||
129 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
130 | [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, | ||
131 | [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, | ||
132 | [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, | ||
133 | [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, | ||
134 | [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, | ||
135 | [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, | ||
136 | }; | ||
137 | |||
138 | static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
139 | { | ||
140 | int pmc, byte, unit, sh; | ||
141 | int bit, fmask; | ||
142 | u64 mask = 0, value = 0; | ||
143 | |||
144 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
145 | if (pmc) { | ||
146 | if (pmc > 6) | ||
147 | return -1; | ||
148 | sh = (pmc - 1) * 2; | ||
149 | mask |= 2 << sh; | ||
150 | value |= 1 << sh; | ||
151 | if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) | ||
152 | return -1; | ||
153 | } | ||
154 | if (event & PM_BUSEVENT_MSK) { | ||
155 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
156 | if (unit > PM_LASTUNIT) | ||
157 | return -1; | ||
158 | if (unit == PM_ISU0_ALT) | ||
159 | unit = PM_ISU0; | ||
160 | mask |= unit_cons[unit][0]; | ||
161 | value |= unit_cons[unit][1]; | ||
162 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
163 | if (byte >= 4) { | ||
164 | if (unit != PM_LSU1) | ||
165 | return -1; | ||
166 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
167 | ++unit; | ||
168 | byte &= 3; | ||
169 | } | ||
170 | if (unit == PM_GRS) { | ||
171 | bit = event & 7; | ||
172 | fmask = (bit == 6)? 7: 3; | ||
173 | sh = grsel_shift[bit]; | ||
174 | mask |= (u64)fmask << sh; | ||
175 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | ||
176 | } | ||
177 | /* Set byte lane select field */ | ||
178 | mask |= 0xfULL << (24 - 4 * byte); | ||
179 | value |= (u64)unit << (24 - 4 * byte); | ||
180 | } | ||
181 | if (pmc < 5) { | ||
182 | /* need a counter from PMC1-4 set */ | ||
183 | mask |= 0x8000000000000ull; | ||
184 | value |= 0x1000000000000ull; | ||
185 | } | ||
186 | *maskp = mask; | ||
187 | *valp = value; | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static int power5p_limited_pmc_event(u64 event) | ||
192 | { | ||
193 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
194 | |||
195 | return pmc == 5 || pmc == 6; | ||
196 | } | ||
197 | |||
198 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
199 | |||
200 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
201 | { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ | ||
202 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
203 | { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ | ||
204 | { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ | ||
205 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
206 | { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ | ||
207 | { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ | ||
208 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
209 | { 0x100009, 0x200009 }, /* PM_INST_CMPL */ | ||
210 | { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ | ||
211 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Scan the alternatives table for a match and return the | ||
216 | * index into the alternatives table if found, else -1. | ||
217 | */ | ||
218 | static int find_alternative(unsigned int event) | ||
219 | { | ||
220 | int i, j; | ||
221 | |||
222 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
223 | if (event < event_alternatives[i][0]) | ||
224 | break; | ||
225 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
226 | if (event == event_alternatives[i][j]) | ||
227 | return i; | ||
228 | } | ||
229 | return -1; | ||
230 | } | ||
231 | |||
232 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
233 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
234 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
235 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
236 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
237 | }; | ||
238 | |||
239 | /* | ||
240 | * Some direct events for decodes of event bus byte 3 have alternative | ||
241 | * PMCSEL values on other counters. This returns the alternative | ||
242 | * event code for those that do, or -1 otherwise. This also handles | ||
243 | * alternative PCMSEL values for add events. | ||
244 | */ | ||
245 | static int find_alternative_bdecode(unsigned int event) | ||
246 | { | ||
247 | int pmc, altpmc, pp, j; | ||
248 | |||
249 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
250 | if (pmc == 0 || pmc > 4) | ||
251 | return -1; | ||
252 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
253 | pp = event & PM_PMCSEL_MSK; | ||
254 | for (j = 0; j < 4; ++j) { | ||
255 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
256 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
257 | (altpmc << PM_PMC_SH) | | ||
258 | bytedecode_alternatives[altpmc - 1][j]; | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /* new decode alternatives for power5+ */ | ||
263 | if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) | ||
264 | return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); | ||
265 | if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) | ||
266 | return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); | ||
267 | |||
268 | /* alternative add event encodings */ | ||
269 | if (pp == 0x10 || pp == 0x28) | ||
270 | return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | | ||
271 | (altpmc << PM_PMC_SH); | ||
272 | |||
273 | return -1; | ||
274 | } | ||
275 | |||
276 | static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
277 | { | ||
278 | int i, j, nalt = 1; | ||
279 | int nlim; | ||
280 | u64 ae; | ||
281 | |||
282 | alt[0] = event; | ||
283 | nalt = 1; | ||
284 | nlim = power5p_limited_pmc_event(event); | ||
285 | i = find_alternative(event); | ||
286 | if (i >= 0) { | ||
287 | for (j = 0; j < MAX_ALT; ++j) { | ||
288 | ae = event_alternatives[i][j]; | ||
289 | if (ae && ae != event) | ||
290 | alt[nalt++] = ae; | ||
291 | nlim += power5p_limited_pmc_event(ae); | ||
292 | } | ||
293 | } else { | ||
294 | ae = find_alternative_bdecode(event); | ||
295 | if (ae > 0) | ||
296 | alt[nalt++] = ae; | ||
297 | } | ||
298 | |||
299 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
300 | /* | ||
301 | * We're only counting in RUN state, | ||
302 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
303 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
304 | * This doesn't include alternatives that don't provide | ||
305 | * any extra flexibility in assigning PMCs (e.g. | ||
306 | * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). | ||
307 | * Note that even with these additional alternatives | ||
308 | * we never end up with more than 3 alternatives for any event. | ||
309 | */ | ||
310 | j = nalt; | ||
311 | for (i = 0; i < nalt; ++i) { | ||
312 | switch (alt[i]) { | ||
313 | case 0xf: /* PM_CYC */ | ||
314 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
315 | ++nlim; | ||
316 | break; | ||
317 | case 0x600005: /* PM_RUN_CYC */ | ||
318 | alt[j++] = 0xf; | ||
319 | break; | ||
320 | case 0x100009: /* PM_INST_CMPL */ | ||
321 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
322 | ++nlim; | ||
323 | break; | ||
324 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
325 | alt[j++] = 0x100009; /* PM_INST_CMPL */ | ||
326 | alt[j++] = 0x200009; | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | nalt = j; | ||
331 | } | ||
332 | |||
333 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
334 | /* remove the limited PMC events */ | ||
335 | j = 0; | ||
336 | for (i = 0; i < nalt; ++i) { | ||
337 | if (!power5p_limited_pmc_event(alt[i])) { | ||
338 | alt[j] = alt[i]; | ||
339 | ++j; | ||
340 | } | ||
341 | } | ||
342 | nalt = j; | ||
343 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
344 | /* remove all but the limited PMC events */ | ||
345 | j = 0; | ||
346 | for (i = 0; i < nalt; ++i) { | ||
347 | if (power5p_limited_pmc_event(alt[i])) { | ||
348 | alt[j] = alt[i]; | ||
349 | ++j; | ||
350 | } | ||
351 | } | ||
352 | nalt = j; | ||
353 | } | ||
354 | |||
355 | return nalt; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Map of which direct events on which PMCs are marked instruction events. | ||
360 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
361 | * Bit 0 is set if it is marked for all PMCs. | ||
362 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
363 | */ | ||
364 | static unsigned char direct_event_is_marked[0x28] = { | ||
365 | 0, /* 00 */ | ||
366 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
367 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
368 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
369 | 0, /* 04 */ | ||
370 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
371 | 0x80, /* 06 */ | ||
372 | 0x80, /* 07 */ | ||
373 | 0, 0, 0,/* 08 - 0a */ | ||
374 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
375 | 0, /* 0c */ | ||
376 | 0x80, /* 0d */ | ||
377 | 0x80, /* 0e */ | ||
378 | 0, /* 0f */ | ||
379 | 0, /* 10 */ | ||
380 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
381 | 0, /* 12 */ | ||
382 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
383 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
384 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
385 | 0x80, /* 16 */ | ||
386 | 0x80, /* 17 */ | ||
387 | 0, 0, 0, 0, 0, | ||
388 | 0x80, /* 1d */ | ||
389 | 0x80, /* 1e */ | ||
390 | 0, /* 1f */ | ||
391 | 0x80, /* 20 */ | ||
392 | 0x80, /* 21 */ | ||
393 | 0x80, /* 22 */ | ||
394 | 0x80, /* 23 */ | ||
395 | 0x80, /* 24 */ | ||
396 | 0x80, /* 25 */ | ||
397 | 0x80, /* 26 */ | ||
398 | 0x80, /* 27 */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Returns 1 if event counts things relating to marked instructions | ||
403 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
404 | */ | ||
405 | static int power5p_marked_instr_event(u64 event) | ||
406 | { | ||
407 | int pmc, psel; | ||
408 | int bit, byte, unit; | ||
409 | u32 mask; | ||
410 | |||
411 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
412 | psel = event & PM_PMCSEL_MSK; | ||
413 | if (pmc >= 5) | ||
414 | return 0; | ||
415 | |||
416 | bit = -1; | ||
417 | if (psel < sizeof(direct_event_is_marked)) { | ||
418 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
419 | return 1; | ||
420 | if (direct_event_is_marked[psel] & 0x80) | ||
421 | bit = 4; | ||
422 | else if (psel == 0x08) | ||
423 | bit = pmc - 1; | ||
424 | else if (psel == 0x10) | ||
425 | bit = 4 - pmc; | ||
426 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
427 | bit = 4; | ||
428 | } else if ((psel & 0x48) == 0x40) { | ||
429 | bit = psel & 7; | ||
430 | } else if (psel == 0x28) { | ||
431 | bit = pmc - 1; | ||
432 | } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { | ||
433 | bit = 4; | ||
434 | } | ||
435 | |||
436 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
437 | return 0; | ||
438 | |||
439 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
440 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
441 | if (unit == PM_LSU0) { | ||
442 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
443 | mask = 0x5dff00; | ||
444 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
445 | byte -= 4; | ||
446 | /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ | ||
447 | mask = 0x5f11c000; | ||
448 | } else | ||
449 | return 0; | ||
450 | |||
451 | return (mask >> (byte * 8 + bit)) & 1; | ||
452 | } | ||
453 | |||
454 | static int power5p_compute_mmcr(u64 event[], int n_ev, | ||
455 | unsigned int hwc[], u64 mmcr[]) | ||
456 | { | ||
457 | u64 mmcr1 = 0; | ||
458 | u64 mmcra = 0; | ||
459 | unsigned int pmc, unit, byte, psel; | ||
460 | unsigned int ttm; | ||
461 | int i, isbus, bit, grsel; | ||
462 | unsigned int pmc_inuse = 0; | ||
463 | unsigned char busbyte[4]; | ||
464 | unsigned char unituse[16]; | ||
465 | int ttmuse; | ||
466 | |||
467 | if (n_ev > 6) | ||
468 | return -1; | ||
469 | |||
470 | /* First pass to count resource use */ | ||
471 | memset(busbyte, 0, sizeof(busbyte)); | ||
472 | memset(unituse, 0, sizeof(unituse)); | ||
473 | for (i = 0; i < n_ev; ++i) { | ||
474 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
475 | if (pmc) { | ||
476 | if (pmc > 6) | ||
477 | return -1; | ||
478 | if (pmc_inuse & (1 << (pmc - 1))) | ||
479 | return -1; | ||
480 | pmc_inuse |= 1 << (pmc - 1); | ||
481 | } | ||
482 | if (event[i] & PM_BUSEVENT_MSK) { | ||
483 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
484 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
485 | if (unit > PM_LASTUNIT) | ||
486 | return -1; | ||
487 | if (unit == PM_ISU0_ALT) | ||
488 | unit = PM_ISU0; | ||
489 | if (byte >= 4) { | ||
490 | if (unit != PM_LSU1) | ||
491 | return -1; | ||
492 | ++unit; | ||
493 | byte &= 3; | ||
494 | } | ||
495 | if (busbyte[byte] && busbyte[byte] != unit) | ||
496 | return -1; | ||
497 | busbyte[byte] = unit; | ||
498 | unituse[unit] = 1; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Assign resources and set multiplexer selects. | ||
504 | * | ||
505 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
506 | * choice we have to deal with. | ||
507 | */ | ||
508 | if (unituse[PM_ISU0] & | ||
509 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
510 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
511 | unituse[PM_ISU0] = 0; | ||
512 | } | ||
513 | /* Set TTM[01]SEL fields. */ | ||
514 | ttmuse = 0; | ||
515 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
516 | if (!unituse[i]) | ||
517 | continue; | ||
518 | if (ttmuse++) | ||
519 | return -1; | ||
520 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | ||
521 | } | ||
522 | ttmuse = 0; | ||
523 | for (; i <= PM_GRS; ++i) { | ||
524 | if (!unituse[i]) | ||
525 | continue; | ||
526 | if (ttmuse++) | ||
527 | return -1; | ||
528 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | ||
529 | } | ||
530 | if (ttmuse > 1) | ||
531 | return -1; | ||
532 | |||
533 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
534 | for (byte = 0; byte < 4; ++byte) { | ||
535 | unit = busbyte[byte]; | ||
536 | if (!unit) | ||
537 | continue; | ||
538 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
539 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
540 | unit = PM_ISU0_ALT; | ||
541 | } else if (unit == PM_LSU1 + 1) { | ||
542 | /* select lower word of LSU1 for this byte */ | ||
543 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
544 | } | ||
545 | ttm = unit >> 2; | ||
546 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
547 | } | ||
548 | |||
549 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
550 | for (i = 0; i < n_ev; ++i) { | ||
551 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
552 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
553 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
554 | psel = event[i] & PM_PMCSEL_MSK; | ||
555 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
556 | if (!pmc) { | ||
557 | /* Bus event or any-PMC direct event */ | ||
558 | for (pmc = 0; pmc < 4; ++pmc) { | ||
559 | if (!(pmc_inuse & (1 << pmc))) | ||
560 | break; | ||
561 | } | ||
562 | if (pmc >= 4) | ||
563 | return -1; | ||
564 | pmc_inuse |= 1 << pmc; | ||
565 | } else if (pmc <= 4) { | ||
566 | /* Direct event */ | ||
567 | --pmc; | ||
568 | if (isbus && (byte & 2) && | ||
569 | (psel == 8 || psel == 0x10 || psel == 0x28)) | ||
570 | /* add events on higher-numbered bus */ | ||
571 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
572 | } else { | ||
573 | /* Instructions or run cycles on PMC5/6 */ | ||
574 | --pmc; | ||
575 | } | ||
576 | if (isbus && unit == PM_GRS) { | ||
577 | bit = psel & 7; | ||
578 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
579 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | ||
580 | } | ||
581 | if (power5p_marked_instr_event(event[i])) | ||
582 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
583 | if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) | ||
584 | /* select alternate byte lane */ | ||
585 | psel |= 0x10; | ||
586 | if (pmc <= 3) | ||
587 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
588 | hwc[i] = pmc; | ||
589 | } | ||
590 | |||
591 | /* Return MMCRx values */ | ||
592 | mmcr[0] = 0; | ||
593 | if (pmc_inuse & 1) | ||
594 | mmcr[0] = MMCR0_PMC1CE; | ||
595 | if (pmc_inuse & 0x3e) | ||
596 | mmcr[0] |= MMCR0_PMCjCE; | ||
597 | mmcr[1] = mmcr1; | ||
598 | mmcr[2] = mmcra; | ||
599 | return 0; | ||
600 | } | ||
601 | |||
602 | static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
603 | { | ||
604 | if (pmc <= 3) | ||
605 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
606 | } | ||
607 | |||
608 | static int power5p_generic_events[] = { | ||
609 | [PERF_COUNT_CPU_CYCLES] = 0xf, | ||
610 | [PERF_COUNT_INSTRUCTIONS] = 0x100009, | ||
611 | [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ | ||
612 | [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
613 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
614 | [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
615 | }; | ||
616 | |||
617 | struct power_pmu power5p_pmu = { | ||
618 | .n_counter = 6, | ||
619 | .max_alternatives = MAX_ALT, | ||
620 | .add_fields = 0x7000000000055ull, | ||
621 | .test_adder = 0x3000040000000ull, | ||
622 | .compute_mmcr = power5p_compute_mmcr, | ||
623 | .get_constraint = power5p_get_constraint, | ||
624 | .get_alternatives = power5p_get_alternatives, | ||
625 | .disable_pmc = power5p_disable_pmc, | ||
626 | .n_generic = ARRAY_SIZE(power5p_generic_events), | ||
627 | .generic_events = power5p_generic_events, | ||
628 | .flags = PPMU_LIMITED_PMC5_6, | ||
629 | .limited_pmc_event = power5p_limited_pmc_event, | ||
630 | }; | ||
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c new file mode 100644 index 000000000000..d5344968ee9c --- /dev/null +++ b/arch/powerpc/kernel/power5-pmu.c | |||
@@ -0,0 +1,570 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5 (not POWER5++) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER5 (not POWER5++) | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
24 | #define PM_BYTE_MSK 7 | ||
25 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
26 | #define PM_GRS_MSK 7 | ||
27 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* Values in PM_UNIT field */ | ||
31 | #define PM_FPU 0 | ||
32 | #define PM_ISU0 1 | ||
33 | #define PM_IFU 2 | ||
34 | #define PM_ISU1 3 | ||
35 | #define PM_IDU 4 | ||
36 | #define PM_ISU0_ALT 6 | ||
37 | #define PM_GRS 7 | ||
38 | #define PM_LSU0 8 | ||
39 | #define PM_LSU1 0xc | ||
40 | #define PM_LASTUNIT 0xc | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR1 for POWER5 | ||
44 | */ | ||
45 | #define MMCR1_TTM0SEL_SH 62 | ||
46 | #define MMCR1_TTM1SEL_SH 60 | ||
47 | #define MMCR1_TTM2SEL_SH 58 | ||
48 | #define MMCR1_TTM3SEL_SH 56 | ||
49 | #define MMCR1_TTMSEL_MSK 3 | ||
50 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
51 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
52 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
53 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
54 | #define MMCR1_GRS_L2SEL_SH 46 | ||
55 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
56 | #define MMCR1_GRS_L3SEL_SH 44 | ||
57 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
58 | #define MMCR1_GRS_MCSEL_SH 41 | ||
59 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
60 | #define MMCR1_GRS_FABSEL_SH 39 | ||
61 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
62 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
63 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
64 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
65 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
66 | #define MMCR1_PMC1SEL_SH 25 | ||
67 | #define MMCR1_PMC2SEL_SH 17 | ||
68 | #define MMCR1_PMC3SEL_SH 9 | ||
69 | #define MMCR1_PMC4SEL_SH 1 | ||
70 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
71 | #define MMCR1_PMCSEL_MSK 0x7f | ||
72 | |||
73 | /* | ||
74 | * Bits in MMCRA | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Layout of constraint bits: | ||
79 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
80 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
81 | * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> | ||
82 | * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 | ||
83 | * | ||
84 | * T0 - TTM0 constraint | ||
85 | * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 | ||
86 | * | ||
87 | * T1 - TTM1 constraint | ||
88 | * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 | ||
89 | * | ||
90 | * NC - number of counters | ||
91 | * 51: NC error 0x0008_0000_0000_0000 | ||
92 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
93 | * | ||
94 | * G0..G3 - GRS mux constraints | ||
95 | * 46-47: GRS_L2SEL value | ||
96 | * 44-45: GRS_L3SEL value | ||
97 | * 41-44: GRS_MCSEL value | ||
98 | * 39-40: GRS_FABSEL value | ||
99 | * Note that these match up with their bit positions in MMCR1 | ||
100 | * | ||
101 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
102 | * 37: UC3 error 0x20_0000_0000 | ||
103 | * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 | ||
104 | * 35: ISU0 events needed 0x08_0000_0000 | ||
105 | * 34: IDU|GRS events needed 0x04_0000_0000 | ||
106 | * | ||
107 | * PS1 | ||
108 | * 33: PS1 error 0x2_0000_0000 | ||
109 | * 31-32: count of events needing PMC1/2 0x1_8000_0000 | ||
110 | * | ||
111 | * PS2 | ||
112 | * 30: PS2 error 0x4000_0000 | ||
113 | * 28-29: count of events needing PMC3/4 0x3000_0000 | ||
114 | * | ||
115 | * B0 | ||
116 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
117 | * Encoding as for the event code | ||
118 | * | ||
119 | * B1, B2, B3 | ||
120 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
121 | * | ||
122 | * P1..P6 | ||
123 | * 0-11: Count of events needing PMC1..PMC6 | ||
124 | */ | ||
125 | |||
126 | static const int grsel_shift[8] = { | ||
127 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
128 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
129 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
130 | }; | ||
131 | |||
132 | /* Masks and values for using events from the various units */ | ||
133 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
134 | [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, | ||
135 | [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, | ||
136 | [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, | ||
137 | [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, | ||
138 | [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, | ||
139 | [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, | ||
140 | }; | ||
141 | |||
142 | static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
143 | { | ||
144 | int pmc, byte, unit, sh; | ||
145 | int bit, fmask; | ||
146 | u64 mask = 0, value = 0; | ||
147 | int grp = -1; | ||
148 | |||
149 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
150 | if (pmc) { | ||
151 | if (pmc > 6) | ||
152 | return -1; | ||
153 | sh = (pmc - 1) * 2; | ||
154 | mask |= 2 << sh; | ||
155 | value |= 1 << sh; | ||
156 | if (pmc <= 4) | ||
157 | grp = (pmc - 1) >> 1; | ||
158 | else if (event != 0x500009 && event != 0x600005) | ||
159 | return -1; | ||
160 | } | ||
161 | if (event & PM_BUSEVENT_MSK) { | ||
162 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
163 | if (unit > PM_LASTUNIT) | ||
164 | return -1; | ||
165 | if (unit == PM_ISU0_ALT) | ||
166 | unit = PM_ISU0; | ||
167 | mask |= unit_cons[unit][0]; | ||
168 | value |= unit_cons[unit][1]; | ||
169 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
170 | if (byte >= 4) { | ||
171 | if (unit != PM_LSU1) | ||
172 | return -1; | ||
173 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
174 | ++unit; | ||
175 | byte &= 3; | ||
176 | } | ||
177 | if (unit == PM_GRS) { | ||
178 | bit = event & 7; | ||
179 | fmask = (bit == 6)? 7: 3; | ||
180 | sh = grsel_shift[bit]; | ||
181 | mask |= (u64)fmask << sh; | ||
182 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | ||
183 | } | ||
184 | /* | ||
185 | * Bus events on bytes 0 and 2 can be counted | ||
186 | * on PMC1/2; bytes 1 and 3 on PMC3/4. | ||
187 | */ | ||
188 | if (!pmc) | ||
189 | grp = byte & 1; | ||
190 | /* Set byte lane select field */ | ||
191 | mask |= 0xfULL << (24 - 4 * byte); | ||
192 | value |= (u64)unit << (24 - 4 * byte); | ||
193 | } | ||
194 | if (grp == 0) { | ||
195 | /* increment PMC1/2 field */ | ||
196 | mask |= 0x200000000ull; | ||
197 | value |= 0x080000000ull; | ||
198 | } else if (grp == 1) { | ||
199 | /* increment PMC3/4 field */ | ||
200 | mask |= 0x40000000ull; | ||
201 | value |= 0x10000000ull; | ||
202 | } | ||
203 | if (pmc < 5) { | ||
204 | /* need a counter from PMC1-4 set */ | ||
205 | mask |= 0x8000000000000ull; | ||
206 | value |= 0x1000000000000ull; | ||
207 | } | ||
208 | *maskp = mask; | ||
209 | *valp = value; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
214 | |||
215 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
216 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
217 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
218 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
219 | { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ | ||
220 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
221 | }; | ||
222 | |||
223 | /* | ||
224 | * Scan the alternatives table for a match and return the | ||
225 | * index into the alternatives table if found, else -1. | ||
226 | */ | ||
227 | static int find_alternative(u64 event) | ||
228 | { | ||
229 | int i, j; | ||
230 | |||
231 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
232 | if (event < event_alternatives[i][0]) | ||
233 | break; | ||
234 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
235 | if (event == event_alternatives[i][j]) | ||
236 | return i; | ||
237 | } | ||
238 | return -1; | ||
239 | } | ||
240 | |||
241 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
242 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
243 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
244 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
245 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
246 | }; | ||
247 | |||
248 | /* | ||
249 | * Some direct events for decodes of event bus byte 3 have alternative | ||
250 | * PMCSEL values on other counters. This returns the alternative | ||
251 | * event code for those that do, or -1 otherwise. | ||
252 | */ | ||
253 | static u64 find_alternative_bdecode(u64 event) | ||
254 | { | ||
255 | int pmc, altpmc, pp, j; | ||
256 | |||
257 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
258 | if (pmc == 0 || pmc > 4) | ||
259 | return -1; | ||
260 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
261 | pp = event & PM_PMCSEL_MSK; | ||
262 | for (j = 0; j < 4; ++j) { | ||
263 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
264 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
265 | (altpmc << PM_PMC_SH) | | ||
266 | bytedecode_alternatives[altpmc - 1][j]; | ||
267 | } | ||
268 | } | ||
269 | return -1; | ||
270 | } | ||
271 | |||
272 | static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
273 | { | ||
274 | int i, j, nalt = 1; | ||
275 | u64 ae; | ||
276 | |||
277 | alt[0] = event; | ||
278 | nalt = 1; | ||
279 | i = find_alternative(event); | ||
280 | if (i >= 0) { | ||
281 | for (j = 0; j < MAX_ALT; ++j) { | ||
282 | ae = event_alternatives[i][j]; | ||
283 | if (ae && ae != event) | ||
284 | alt[nalt++] = ae; | ||
285 | } | ||
286 | } else { | ||
287 | ae = find_alternative_bdecode(event); | ||
288 | if (ae > 0) | ||
289 | alt[nalt++] = ae; | ||
290 | } | ||
291 | return nalt; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Map of which direct events on which PMCs are marked instruction events. | ||
296 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
297 | * Bit 0 is set if it is marked for all PMCs. | ||
298 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
299 | */ | ||
300 | static unsigned char direct_event_is_marked[0x28] = { | ||
301 | 0, /* 00 */ | ||
302 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
303 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
304 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
305 | 0, /* 04 */ | ||
306 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
307 | 0x80, /* 06 */ | ||
308 | 0x80, /* 07 */ | ||
309 | 0, 0, 0,/* 08 - 0a */ | ||
310 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
311 | 0, /* 0c */ | ||
312 | 0x80, /* 0d */ | ||
313 | 0x80, /* 0e */ | ||
314 | 0, /* 0f */ | ||
315 | 0, /* 10 */ | ||
316 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
317 | 0, /* 12 */ | ||
318 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
319 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
320 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
321 | 0x80, /* 16 */ | ||
322 | 0x80, /* 17 */ | ||
323 | 0, 0, 0, 0, 0, | ||
324 | 0x80, /* 1d */ | ||
325 | 0x80, /* 1e */ | ||
326 | 0, /* 1f */ | ||
327 | 0x80, /* 20 */ | ||
328 | 0x80, /* 21 */ | ||
329 | 0x80, /* 22 */ | ||
330 | 0x80, /* 23 */ | ||
331 | 0x80, /* 24 */ | ||
332 | 0x80, /* 25 */ | ||
333 | 0x80, /* 26 */ | ||
334 | 0x80, /* 27 */ | ||
335 | }; | ||
336 | |||
337 | /* | ||
338 | * Returns 1 if event counts things relating to marked instructions | ||
339 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
340 | */ | ||
341 | static int power5_marked_instr_event(u64 event) | ||
342 | { | ||
343 | int pmc, psel; | ||
344 | int bit, byte, unit; | ||
345 | u32 mask; | ||
346 | |||
347 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
348 | psel = event & PM_PMCSEL_MSK; | ||
349 | if (pmc >= 5) | ||
350 | return 0; | ||
351 | |||
352 | bit = -1; | ||
353 | if (psel < sizeof(direct_event_is_marked)) { | ||
354 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
355 | return 1; | ||
356 | if (direct_event_is_marked[psel] & 0x80) | ||
357 | bit = 4; | ||
358 | else if (psel == 0x08) | ||
359 | bit = pmc - 1; | ||
360 | else if (psel == 0x10) | ||
361 | bit = 4 - pmc; | ||
362 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
363 | bit = 4; | ||
364 | } else if ((psel & 0x58) == 0x40) | ||
365 | bit = psel & 7; | ||
366 | |||
367 | if (!(event & PM_BUSEVENT_MSK)) | ||
368 | return 0; | ||
369 | |||
370 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
371 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
372 | if (unit == PM_LSU0) { | ||
373 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
374 | mask = 0x5dff00; | ||
375 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
376 | byte -= 4; | ||
377 | /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ | ||
378 | mask = 0x5f00c0aa; | ||
379 | } else | ||
380 | return 0; | ||
381 | |||
382 | return (mask >> (byte * 8 + bit)) & 1; | ||
383 | } | ||
384 | |||
385 | static int power5_compute_mmcr(u64 event[], int n_ev, | ||
386 | unsigned int hwc[], u64 mmcr[]) | ||
387 | { | ||
388 | u64 mmcr1 = 0; | ||
389 | u64 mmcra = 0; | ||
390 | unsigned int pmc, unit, byte, psel; | ||
391 | unsigned int ttm, grp; | ||
392 | int i, isbus, bit, grsel; | ||
393 | unsigned int pmc_inuse = 0; | ||
394 | unsigned int pmc_grp_use[2]; | ||
395 | unsigned char busbyte[4]; | ||
396 | unsigned char unituse[16]; | ||
397 | int ttmuse; | ||
398 | |||
399 | if (n_ev > 6) | ||
400 | return -1; | ||
401 | |||
402 | /* First pass to count resource use */ | ||
403 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
404 | memset(busbyte, 0, sizeof(busbyte)); | ||
405 | memset(unituse, 0, sizeof(unituse)); | ||
406 | for (i = 0; i < n_ev; ++i) { | ||
407 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
408 | if (pmc) { | ||
409 | if (pmc > 6) | ||
410 | return -1; | ||
411 | if (pmc_inuse & (1 << (pmc - 1))) | ||
412 | return -1; | ||
413 | pmc_inuse |= 1 << (pmc - 1); | ||
414 | /* count 1/2 vs 3/4 use */ | ||
415 | if (pmc <= 4) | ||
416 | ++pmc_grp_use[(pmc - 1) >> 1]; | ||
417 | } | ||
418 | if (event[i] & PM_BUSEVENT_MSK) { | ||
419 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
420 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
421 | if (unit > PM_LASTUNIT) | ||
422 | return -1; | ||
423 | if (unit == PM_ISU0_ALT) | ||
424 | unit = PM_ISU0; | ||
425 | if (byte >= 4) { | ||
426 | if (unit != PM_LSU1) | ||
427 | return -1; | ||
428 | ++unit; | ||
429 | byte &= 3; | ||
430 | } | ||
431 | if (!pmc) | ||
432 | ++pmc_grp_use[byte & 1]; | ||
433 | if (busbyte[byte] && busbyte[byte] != unit) | ||
434 | return -1; | ||
435 | busbyte[byte] = unit; | ||
436 | unituse[unit] = 1; | ||
437 | } | ||
438 | } | ||
439 | if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) | ||
440 | return -1; | ||
441 | |||
442 | /* | ||
443 | * Assign resources and set multiplexer selects. | ||
444 | * | ||
445 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
446 | * choice we have to deal with. | ||
447 | */ | ||
448 | if (unituse[PM_ISU0] & | ||
449 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
450 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
451 | unituse[PM_ISU0] = 0; | ||
452 | } | ||
453 | /* Set TTM[01]SEL fields. */ | ||
454 | ttmuse = 0; | ||
455 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
456 | if (!unituse[i]) | ||
457 | continue; | ||
458 | if (ttmuse++) | ||
459 | return -1; | ||
460 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | ||
461 | } | ||
462 | ttmuse = 0; | ||
463 | for (; i <= PM_GRS; ++i) { | ||
464 | if (!unituse[i]) | ||
465 | continue; | ||
466 | if (ttmuse++) | ||
467 | return -1; | ||
468 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | ||
469 | } | ||
470 | if (ttmuse > 1) | ||
471 | return -1; | ||
472 | |||
473 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
474 | for (byte = 0; byte < 4; ++byte) { | ||
475 | unit = busbyte[byte]; | ||
476 | if (!unit) | ||
477 | continue; | ||
478 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
479 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
480 | unit = PM_ISU0_ALT; | ||
481 | } else if (unit == PM_LSU1 + 1) { | ||
482 | /* select lower word of LSU1 for this byte */ | ||
483 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
484 | } | ||
485 | ttm = unit >> 2; | ||
486 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
487 | } | ||
488 | |||
489 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
490 | for (i = 0; i < n_ev; ++i) { | ||
491 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
492 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
493 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
494 | psel = event[i] & PM_PMCSEL_MSK; | ||
495 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
496 | if (!pmc) { | ||
497 | /* Bus event or any-PMC direct event */ | ||
498 | for (pmc = 0; pmc < 4; ++pmc) { | ||
499 | if (pmc_inuse & (1 << pmc)) | ||
500 | continue; | ||
501 | grp = (pmc >> 1) & 1; | ||
502 | if (isbus) { | ||
503 | if (grp == (byte & 1)) | ||
504 | break; | ||
505 | } else if (pmc_grp_use[grp] < 2) { | ||
506 | ++pmc_grp_use[grp]; | ||
507 | break; | ||
508 | } | ||
509 | } | ||
510 | pmc_inuse |= 1 << pmc; | ||
511 | } else if (pmc <= 4) { | ||
512 | /* Direct event */ | ||
513 | --pmc; | ||
514 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | ||
515 | /* add events on higher-numbered bus */ | ||
516 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
517 | } else { | ||
518 | /* Instructions or run cycles on PMC5/6 */ | ||
519 | --pmc; | ||
520 | } | ||
521 | if (isbus && unit == PM_GRS) { | ||
522 | bit = psel & 7; | ||
523 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
524 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | ||
525 | } | ||
526 | if (power5_marked_instr_event(event[i])) | ||
527 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
528 | if (pmc <= 3) | ||
529 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
530 | hwc[i] = pmc; | ||
531 | } | ||
532 | |||
533 | /* Return MMCRx values */ | ||
534 | mmcr[0] = 0; | ||
535 | if (pmc_inuse & 1) | ||
536 | mmcr[0] = MMCR0_PMC1CE; | ||
537 | if (pmc_inuse & 0x3e) | ||
538 | mmcr[0] |= MMCR0_PMCjCE; | ||
539 | mmcr[1] = mmcr1; | ||
540 | mmcr[2] = mmcra; | ||
541 | return 0; | ||
542 | } | ||
543 | |||
544 | static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
545 | { | ||
546 | if (pmc <= 3) | ||
547 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
548 | } | ||
549 | |||
550 | static int power5_generic_events[] = { | ||
551 | [PERF_COUNT_CPU_CYCLES] = 0xf, | ||
552 | [PERF_COUNT_INSTRUCTIONS] = 0x100009, | ||
553 | [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ | ||
554 | [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
555 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
556 | [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
557 | }; | ||
558 | |||
559 | struct power_pmu power5_pmu = { | ||
560 | .n_counter = 6, | ||
561 | .max_alternatives = MAX_ALT, | ||
562 | .add_fields = 0x7000090000555ull, | ||
563 | .test_adder = 0x3000490000000ull, | ||
564 | .compute_mmcr = power5_compute_mmcr, | ||
565 | .get_constraint = power5_get_constraint, | ||
566 | .get_alternatives = power5_get_alternatives, | ||
567 | .disable_pmc = power5_disable_pmc, | ||
568 | .n_generic = ARRAY_SIZE(power5_generic_events), | ||
569 | .generic_events = power5_generic_events, | ||
570 | }; | ||
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c new file mode 100644 index 000000000000..cd4fbe06c35d --- /dev/null +++ b/arch/powerpc/kernel/power6-pmu.c | |||
@@ -0,0 +1,490 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER6 processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER6 | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0x7 | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) | ||
24 | #define PM_LLAV 0x8000 /* Load lookahead match value */ | ||
25 | #define PM_LLA 0x4000 /* Load lookahead match enable */ | ||
26 | #define PM_BYTE_SH 12 /* Byte of event bus to use */ | ||
27 | #define PM_BYTE_MSK 3 | ||
28 | #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ | ||
29 | #define PM_SUBUNIT_MSK 7 | ||
30 | #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) | ||
31 | #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ | ||
32 | #define PM_BUSEVENT_MSK 0xf3700 | ||
33 | |||
34 | /* | ||
35 | * Bits in MMCR1 for POWER6 | ||
36 | */ | ||
37 | #define MMCR1_TTM0SEL_SH 60 | ||
38 | #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) | ||
39 | #define MMCR1_TTMSEL_MSK 0xf | ||
40 | #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) | ||
41 | #define MMCR1_NESTSEL_SH 45 | ||
42 | #define MMCR1_NESTSEL_MSK 0x7 | ||
43 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | ||
44 | #define MMCR1_PMC1_LLA ((u64)1 << 44) | ||
45 | #define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) | ||
46 | #define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) | ||
47 | #define MMCR1_PMC1SEL_SH 24 | ||
48 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
49 | #define MMCR1_PMCSEL_MSK 0xff | ||
50 | |||
51 | /* | ||
52 | * Map of which direct events on which PMCs are marked instruction events. | ||
53 | * Indexed by PMCSEL value >> 1. | ||
54 | * Bottom 4 bits are a map of which PMCs are interesting, | ||
55 | * top 4 bits say what sort of event: | ||
56 | * 0 = direct marked event, | ||
57 | * 1 = byte decode event, | ||
58 | * 4 = add/and event (PMC1 -> bits 0 & 4), | ||
59 | * 5 = add/and event (PMC1 -> bits 1 & 5), | ||
60 | * 6 = add/and event (PMC1 -> bits 2 & 6), | ||
61 | * 7 = add/and event (PMC1 -> bits 3 & 7). | ||
62 | */ | ||
63 | static unsigned char direct_event_is_marked[0x60 >> 1] = { | ||
64 | 0, /* 00 */ | ||
65 | 0, /* 02 */ | ||
66 | 0, /* 04 */ | ||
67 | 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
68 | 0x04, /* 08 PM_MRK_DFU_FIN */ | ||
69 | 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ | ||
70 | 0, /* 0c */ | ||
71 | 0, /* 0e */ | ||
72 | 0x02, /* 10 PM_MRK_INST_DISP */ | ||
73 | 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ | ||
74 | 0, /* 14 */ | ||
75 | 0, /* 16 */ | ||
76 | 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ | ||
77 | 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
78 | 0x01, /* 1c PM_MRK_INST_ISSUED */ | ||
79 | 0, /* 1e */ | ||
80 | 0, /* 20 */ | ||
81 | 0, /* 22 */ | ||
82 | 0, /* 24 */ | ||
83 | 0, /* 26 */ | ||
84 | 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ | ||
85 | 0, /* 2a */ | ||
86 | 0, /* 2c */ | ||
87 | 0, /* 2e */ | ||
88 | 0x4f, /* 30 */ | ||
89 | 0x7f, /* 32 */ | ||
90 | 0x4f, /* 34 */ | ||
91 | 0x5f, /* 36 */ | ||
92 | 0x6f, /* 38 */ | ||
93 | 0x4f, /* 3a */ | ||
94 | 0, /* 3c */ | ||
95 | 0x08, /* 3e PM_MRK_INST_TIMEO */ | ||
96 | 0x1f, /* 40 */ | ||
97 | 0x1f, /* 42 */ | ||
98 | 0x1f, /* 44 */ | ||
99 | 0x1f, /* 46 */ | ||
100 | 0x1f, /* 48 */ | ||
101 | 0x1f, /* 4a */ | ||
102 | 0x1f, /* 4c */ | ||
103 | 0x1f, /* 4e */ | ||
104 | 0, /* 50 */ | ||
105 | 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ | ||
106 | 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ | ||
107 | 0x02, /* 56 PM_MRK_LD_MISS_L1 */ | ||
108 | 0, /* 58 */ | ||
109 | 0, /* 5a */ | ||
110 | 0, /* 5c */ | ||
111 | 0, /* 5e */ | ||
112 | }; | ||
113 | |||
114 | /* | ||
115 | * Masks showing for each unit which bits are marked events. | ||
116 | * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. | ||
117 | */ | ||
118 | static u32 marked_bus_events[16] = { | ||
119 | 0x01000000, /* direct events set 1: byte 3 bit 0 */ | ||
120 | 0x00010000, /* direct events set 2: byte 2 bit 0 */ | ||
121 | 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ | ||
122 | 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ | ||
123 | 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ | ||
124 | 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ | ||
125 | 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ | ||
126 | 0, /* LSU set 3 */ | ||
127 | 0x00000010, /* VMX set 3: byte 0 bit 4 */ | ||
128 | 0, /* BFP set 1 */ | ||
129 | 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ | ||
130 | 0, 0 | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Returns 1 if event counts things relating to marked instructions | ||
135 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
136 | */ | ||
137 | static int power6_marked_instr_event(u64 event) | ||
138 | { | ||
139 | int pmc, psel, ptype; | ||
140 | int bit, byte, unit; | ||
141 | u32 mask; | ||
142 | |||
143 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
144 | psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ | ||
145 | if (pmc >= 5) | ||
146 | return 0; | ||
147 | |||
148 | bit = -1; | ||
149 | if (psel < sizeof(direct_event_is_marked)) { | ||
150 | ptype = direct_event_is_marked[psel]; | ||
151 | if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) | ||
152 | return 0; | ||
153 | ptype >>= 4; | ||
154 | if (ptype == 0) | ||
155 | return 1; | ||
156 | if (ptype == 1) | ||
157 | bit = 0; | ||
158 | else | ||
159 | bit = ptype ^ (pmc - 1); | ||
160 | } else if ((psel & 0x48) == 0x40) | ||
161 | bit = psel & 7; | ||
162 | |||
163 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
164 | return 0; | ||
165 | |||
166 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
167 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
168 | mask = marked_bus_events[unit]; | ||
169 | return (mask >> (byte * 8 + bit)) & 1; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Assign PMC numbers and compute MMCR1 value for a set of events | ||
174 | */ | ||
175 | static int p6_compute_mmcr(u64 event[], int n_ev, | ||
176 | unsigned int hwc[], u64 mmcr[]) | ||
177 | { | ||
178 | u64 mmcr1 = 0; | ||
179 | u64 mmcra = 0; | ||
180 | int i; | ||
181 | unsigned int pmc, ev, b, u, s, psel; | ||
182 | unsigned int ttmset = 0; | ||
183 | unsigned int pmc_inuse = 0; | ||
184 | |||
185 | if (n_ev > 6) | ||
186 | return -1; | ||
187 | for (i = 0; i < n_ev; ++i) { | ||
188 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
189 | if (pmc) { | ||
190 | if (pmc_inuse & (1 << (pmc - 1))) | ||
191 | return -1; /* collision! */ | ||
192 | pmc_inuse |= 1 << (pmc - 1); | ||
193 | } | ||
194 | } | ||
195 | for (i = 0; i < n_ev; ++i) { | ||
196 | ev = event[i]; | ||
197 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
198 | if (pmc) { | ||
199 | --pmc; | ||
200 | } else { | ||
201 | /* can go on any PMC; find a free one */ | ||
202 | for (pmc = 0; pmc < 4; ++pmc) | ||
203 | if (!(pmc_inuse & (1 << pmc))) | ||
204 | break; | ||
205 | if (pmc >= 4) | ||
206 | return -1; | ||
207 | pmc_inuse |= 1 << pmc; | ||
208 | } | ||
209 | hwc[i] = pmc; | ||
210 | psel = ev & PM_PMCSEL_MSK; | ||
211 | if (ev & PM_BUSEVENT_MSK) { | ||
212 | /* this event uses the event bus */ | ||
213 | b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
214 | u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
215 | /* check for conflict on this byte of event bus */ | ||
216 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | ||
217 | return -1; | ||
218 | mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); | ||
219 | ttmset |= 1 << b; | ||
220 | if (u == 5) { | ||
221 | /* Nest events have a further mux */ | ||
222 | s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
223 | if ((ttmset & 0x10) && | ||
224 | MMCR1_NESTSEL(mmcr1) != s) | ||
225 | return -1; | ||
226 | ttmset |= 0x10; | ||
227 | mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; | ||
228 | } | ||
229 | if (0x30 <= psel && psel <= 0x3d) { | ||
230 | /* these need the PMCx_ADDR_SEL bits */ | ||
231 | if (b >= 2) | ||
232 | mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; | ||
233 | } | ||
234 | /* bus select values are different for PMC3/4 */ | ||
235 | if (pmc >= 2 && (psel & 0x90) == 0x80) | ||
236 | psel ^= 0x20; | ||
237 | } | ||
238 | if (ev & PM_LLA) { | ||
239 | mmcr1 |= MMCR1_PMC1_LLA >> pmc; | ||
240 | if (ev & PM_LLAV) | ||
241 | mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; | ||
242 | } | ||
243 | if (power6_marked_instr_event(event[i])) | ||
244 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
245 | if (pmc < 4) | ||
246 | mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); | ||
247 | } | ||
248 | mmcr[0] = 0; | ||
249 | if (pmc_inuse & 1) | ||
250 | mmcr[0] = MMCR0_PMC1CE; | ||
251 | if (pmc_inuse & 0xe) | ||
252 | mmcr[0] |= MMCR0_PMCjCE; | ||
253 | mmcr[1] = mmcr1; | ||
254 | mmcr[2] = mmcra; | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * Layout of constraint bits: | ||
260 | * | ||
261 | * 0-1 add field: number of uses of PMC1 (max 1) | ||
262 | * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 | ||
263 | * 12-15 add field: number of uses of PMC1-4 (max 4) | ||
264 | * 16-19 select field: unit on byte 0 of event bus | ||
265 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | ||
266 | * 32-34 select field: nest (subunit) event selector | ||
267 | */ | ||
268 | static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
269 | { | ||
270 | int pmc, byte, sh, subunit; | ||
271 | u64 mask = 0, value = 0; | ||
272 | |||
273 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
274 | if (pmc) { | ||
275 | if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) | ||
276 | return -1; | ||
277 | sh = (pmc - 1) * 2; | ||
278 | mask |= 2 << sh; | ||
279 | value |= 1 << sh; | ||
280 | } | ||
281 | if (event & PM_BUSEVENT_MSK) { | ||
282 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
283 | sh = byte * 4 + (16 - PM_UNIT_SH); | ||
284 | mask |= PM_UNIT_MSKS << sh; | ||
285 | value |= (u64)(event & PM_UNIT_MSKS) << sh; | ||
286 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | ||
287 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
288 | mask |= (u64)PM_SUBUNIT_MSK << 32; | ||
289 | value |= (u64)subunit << 32; | ||
290 | } | ||
291 | } | ||
292 | if (pmc <= 4) { | ||
293 | mask |= 0x8000; /* add field for count of PMC1-4 uses */ | ||
294 | value |= 0x1000; | ||
295 | } | ||
296 | *maskp = mask; | ||
297 | *valp = value; | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int p6_limited_pmc_event(u64 event) | ||
302 | { | ||
303 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
304 | |||
305 | return pmc == 5 || pmc == 6; | ||
306 | } | ||
307 | |||
308 | #define MAX_ALT 4 /* at most 4 alternatives for any event */ | ||
309 | |||
310 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
311 | { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ | ||
312 | { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ | ||
313 | { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ | ||
314 | { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ | ||
315 | { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ | ||
316 | { 0x10000e, 0x400010 }, /* PM_PURR */ | ||
317 | { 0x100010, 0x4000f8 }, /* PM_FLUSH */ | ||
318 | { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ | ||
319 | { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ | ||
320 | { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ | ||
321 | { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ | ||
322 | { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ | ||
323 | { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ | ||
324 | { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ | ||
325 | { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ | ||
326 | { 0x200012, 0x300012 }, /* PM_INST_DISP */ | ||
327 | { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ | ||
328 | { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ | ||
329 | { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ | ||
330 | { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ | ||
331 | { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ | ||
332 | { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ | ||
333 | { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ | ||
334 | }; | ||
335 | |||
336 | /* | ||
337 | * This could be made more efficient with a binary search on | ||
338 | * a presorted list, if necessary | ||
339 | */ | ||
340 | static int find_alternatives_list(u64 event) | ||
341 | { | ||
342 | int i, j; | ||
343 | unsigned int alt; | ||
344 | |||
345 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
346 | if (event < event_alternatives[i][0]) | ||
347 | return -1; | ||
348 | for (j = 0; j < MAX_ALT; ++j) { | ||
349 | alt = event_alternatives[i][j]; | ||
350 | if (!alt || event < alt) | ||
351 | break; | ||
352 | if (event == alt) | ||
353 | return i; | ||
354 | } | ||
355 | } | ||
356 | return -1; | ||
357 | } | ||
358 | |||
359 | static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
360 | { | ||
361 | int i, j, nlim; | ||
362 | unsigned int psel, pmc; | ||
363 | unsigned int nalt = 1; | ||
364 | u64 aevent; | ||
365 | |||
366 | alt[0] = event; | ||
367 | nlim = p6_limited_pmc_event(event); | ||
368 | |||
369 | /* check the alternatives table */ | ||
370 | i = find_alternatives_list(event); | ||
371 | if (i >= 0) { | ||
372 | /* copy out alternatives from list */ | ||
373 | for (j = 0; j < MAX_ALT; ++j) { | ||
374 | aevent = event_alternatives[i][j]; | ||
375 | if (!aevent) | ||
376 | break; | ||
377 | if (aevent != event) | ||
378 | alt[nalt++] = aevent; | ||
379 | nlim += p6_limited_pmc_event(aevent); | ||
380 | } | ||
381 | |||
382 | } else { | ||
383 | /* Check for alternative ways of computing sum events */ | ||
384 | /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ | ||
385 | psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ | ||
386 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
387 | if (pmc && (psel == 0x32 || psel == 0x34)) | ||
388 | alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | | ||
389 | ((5 - pmc) << PM_PMC_SH); | ||
390 | |||
391 | /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ | ||
392 | if (pmc && (psel == 0x38 || psel == 0x3a)) | ||
393 | alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | | ||
394 | ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); | ||
395 | } | ||
396 | |||
397 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
398 | /* | ||
399 | * We're only counting in RUN state, | ||
400 | * so PM_CYC is equivalent to PM_RUN_CYC, | ||
401 | * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. | ||
402 | * This doesn't include alternatives that don't provide | ||
403 | * any extra flexibility in assigning PMCs (e.g. | ||
404 | * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). | ||
405 | * Note that even with these additional alternatives | ||
406 | * we never end up with more than 4 alternatives for any event. | ||
407 | */ | ||
408 | j = nalt; | ||
409 | for (i = 0; i < nalt; ++i) { | ||
410 | switch (alt[i]) { | ||
411 | case 0x1e: /* PM_CYC */ | ||
412 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
413 | ++nlim; | ||
414 | break; | ||
415 | case 0x10000a: /* PM_RUN_CYC */ | ||
416 | alt[j++] = 0x1e; /* PM_CYC */ | ||
417 | break; | ||
418 | case 2: /* PM_INST_CMPL */ | ||
419 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
420 | ++nlim; | ||
421 | break; | ||
422 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
423 | alt[j++] = 2; /* PM_INST_CMPL */ | ||
424 | break; | ||
425 | case 0x10000e: /* PM_PURR */ | ||
426 | alt[j++] = 0x4000f4; /* PM_RUN_PURR */ | ||
427 | break; | ||
428 | case 0x4000f4: /* PM_RUN_PURR */ | ||
429 | alt[j++] = 0x10000e; /* PM_PURR */ | ||
430 | break; | ||
431 | } | ||
432 | } | ||
433 | nalt = j; | ||
434 | } | ||
435 | |||
436 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
437 | /* remove the limited PMC events */ | ||
438 | j = 0; | ||
439 | for (i = 0; i < nalt; ++i) { | ||
440 | if (!p6_limited_pmc_event(alt[i])) { | ||
441 | alt[j] = alt[i]; | ||
442 | ++j; | ||
443 | } | ||
444 | } | ||
445 | nalt = j; | ||
446 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
447 | /* remove all but the limited PMC events */ | ||
448 | j = 0; | ||
449 | for (i = 0; i < nalt; ++i) { | ||
450 | if (p6_limited_pmc_event(alt[i])) { | ||
451 | alt[j] = alt[i]; | ||
452 | ++j; | ||
453 | } | ||
454 | } | ||
455 | nalt = j; | ||
456 | } | ||
457 | |||
458 | return nalt; | ||
459 | } | ||
460 | |||
461 | static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
462 | { | ||
463 | /* Set PMCxSEL to 0 to disable PMCx */ | ||
464 | if (pmc <= 3) | ||
465 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
466 | } | ||
467 | |||
468 | static int power6_generic_events[] = { | ||
469 | [PERF_COUNT_CPU_CYCLES] = 0x1e, | ||
470 | [PERF_COUNT_INSTRUCTIONS] = 2, | ||
471 | [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ | ||
472 | [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ | ||
473 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ | ||
474 | [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ | ||
475 | }; | ||
476 | |||
477 | struct power_pmu power6_pmu = { | ||
478 | .n_counter = 6, | ||
479 | .max_alternatives = MAX_ALT, | ||
480 | .add_fields = 0x1555, | ||
481 | .test_adder = 0x3000, | ||
482 | .compute_mmcr = p6_compute_mmcr, | ||
483 | .get_constraint = p6_get_constraint, | ||
484 | .get_alternatives = p6_get_alternatives, | ||
485 | .disable_pmc = p6_disable_pmc, | ||
486 | .n_generic = ARRAY_SIZE(power6_generic_events), | ||
487 | .generic_events = power6_generic_events, | ||
488 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, | ||
489 | .limited_pmc_event = p6_limited_pmc_event, | ||
490 | }; | ||
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c new file mode 100644 index 000000000000..eed47c4523f1 --- /dev/null +++ b/arch/powerpc/kernel/ppc970-pmu.c | |||
@@ -0,0 +1,441 @@ | |||
1 | /* | ||
2 | * Performance counter support for PPC970-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for PPC970 | ||
17 | */ | ||
18 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
21 | #define PM_UNIT_MSK 0xf | ||
22 | #define PM_SPCSEL_SH 6 | ||
23 | #define PM_SPCSEL_MSK 3 | ||
24 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
25 | #define PM_BYTE_MSK 3 | ||
26 | #define PM_PMCSEL_MSK 0xf | ||
27 | |||
28 | /* Values in PM_UNIT field */ | ||
29 | #define PM_NONE 0 | ||
30 | #define PM_FPU 1 | ||
31 | #define PM_VPU 2 | ||
32 | #define PM_ISU 3 | ||
33 | #define PM_IFU 4 | ||
34 | #define PM_IDU 5 | ||
35 | #define PM_STS 6 | ||
36 | #define PM_LSU0 7 | ||
37 | #define PM_LSU1U 8 | ||
38 | #define PM_LSU1L 9 | ||
39 | #define PM_LASTUNIT 9 | ||
40 | |||
41 | /* | ||
42 | * Bits in MMCR0 for PPC970 | ||
43 | */ | ||
44 | #define MMCR0_PMC1SEL_SH 8 | ||
45 | #define MMCR0_PMC2SEL_SH 1 | ||
46 | #define MMCR_PMCSEL_MSK 0x1f | ||
47 | |||
48 | /* | ||
49 | * Bits in MMCR1 for PPC970 | ||
50 | */ | ||
51 | #define MMCR1_TTM0SEL_SH 62 | ||
52 | #define MMCR1_TTM1SEL_SH 59 | ||
53 | #define MMCR1_TTM3SEL_SH 53 | ||
54 | #define MMCR1_TTMSEL_MSK 3 | ||
55 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
56 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
57 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
58 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
59 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
60 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
61 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
62 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
63 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
64 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
65 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
66 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
67 | #define MMCR1_PMC3SEL_SH 27 | ||
68 | #define MMCR1_PMC4SEL_SH 22 | ||
69 | #define MMCR1_PMC5SEL_SH 17 | ||
70 | #define MMCR1_PMC6SEL_SH 12 | ||
71 | #define MMCR1_PMC7SEL_SH 7 | ||
72 | #define MMCR1_PMC8SEL_SH 2 | ||
73 | |||
74 | static short mmcr1_adder_bits[8] = { | ||
75 | MMCR1_PMC1_ADDER_SEL_SH, | ||
76 | MMCR1_PMC2_ADDER_SEL_SH, | ||
77 | MMCR1_PMC3_ADDER_SEL_SH, | ||
78 | MMCR1_PMC4_ADDER_SEL_SH, | ||
79 | MMCR1_PMC5_ADDER_SEL_SH, | ||
80 | MMCR1_PMC6_ADDER_SEL_SH, | ||
81 | MMCR1_PMC7_ADDER_SEL_SH, | ||
82 | MMCR1_PMC8_ADDER_SEL_SH | ||
83 | }; | ||
84 | |||
85 | /* | ||
86 | * Bits in MMCRA | ||
87 | */ | ||
88 | |||
89 | /* | ||
90 | * Layout of constraint bits: | ||
91 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
92 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
93 | * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> | ||
94 | * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
95 | * | ||
96 | * SP - SPCSEL constraint | ||
97 | * 48-49: SPCSEL value 0x3_0000_0000_0000 | ||
98 | * | ||
99 | * T0 - TTM0 constraint | ||
100 | * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 | ||
101 | * | ||
102 | * T1 - TTM1 constraint | ||
103 | * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 | ||
104 | * | ||
105 | * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS | ||
106 | * 43: UC3 error 0x0800_0000_0000 | ||
107 | * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 | ||
108 | * 41: ISU events needed 0x0200_0000_0000 | ||
109 | * 40: IDU|STS events needed 0x0100_0000_0000 | ||
110 | * | ||
111 | * PS1 | ||
112 | * 39: PS1 error 0x0080_0000_0000 | ||
113 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
114 | * | ||
115 | * PS2 | ||
116 | * 35: PS2 error 0x0008_0000_0000 | ||
117 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
118 | * | ||
119 | * B0 | ||
120 | * 28-31: Byte 0 event source 0xf000_0000 | ||
121 | * Encoding as for the event code | ||
122 | * | ||
123 | * B1, B2, B3 | ||
124 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
125 | * | ||
126 | * P1 | ||
127 | * 15: P1 error 0x8000 | ||
128 | * 14-15: Count of events needing PMC1 | ||
129 | * | ||
130 | * P2..P8 | ||
131 | * 0-13: Count of events needing PMC2..PMC8 | ||
132 | */ | ||
133 | |||
134 | static unsigned char direct_marked_event[8] = { | ||
135 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
136 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
137 | (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ | ||
138 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
139 | (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ | ||
140 | (1<<3) | (1<<4) | (1<<5), | ||
141 | /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
142 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
143 | (1<<4) /* PMC8: PM_MRK_LSU_FIN */ | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * Returns 1 if event counts things relating to marked instructions | ||
148 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
149 | */ | ||
150 | static int p970_marked_instr_event(u64 event) | ||
151 | { | ||
152 | int pmc, psel, unit, byte, bit; | ||
153 | unsigned int mask; | ||
154 | |||
155 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
156 | psel = event & PM_PMCSEL_MSK; | ||
157 | if (pmc) { | ||
158 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
159 | return 1; | ||
160 | if (psel == 0) /* add events */ | ||
161 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
162 | else if (psel == 7 || psel == 13) /* decode events */ | ||
163 | bit = 4; | ||
164 | else | ||
165 | return 0; | ||
166 | } else | ||
167 | bit = psel; | ||
168 | |||
169 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
170 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
171 | mask = 0; | ||
172 | switch (unit) { | ||
173 | case PM_VPU: | ||
174 | mask = 0x4c; /* byte 0 bits 2,3,6 */ | ||
175 | case PM_LSU0: | ||
176 | /* byte 2 bits 0,2,3,4,6; all of byte 1 */ | ||
177 | mask = 0x085dff00; | ||
178 | case PM_LSU1L: | ||
179 | mask = 0x50 << 24; /* byte 3 bits 4,6 */ | ||
180 | break; | ||
181 | } | ||
182 | return (mask >> (byte * 8 + bit)) & 1; | ||
183 | } | ||
184 | |||
185 | /* Masks and values for using events from the various units */ | ||
186 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
187 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | ||
188 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | ||
189 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | ||
190 | [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, | ||
191 | [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, | ||
192 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | ||
193 | }; | ||
194 | |||
195 | static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
196 | { | ||
197 | int pmc, byte, unit, sh, spcsel; | ||
198 | u64 mask = 0, value = 0; | ||
199 | int grp = -1; | ||
200 | |||
201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
202 | if (pmc) { | ||
203 | if (pmc > 8) | ||
204 | return -1; | ||
205 | sh = (pmc - 1) * 2; | ||
206 | mask |= 2 << sh; | ||
207 | value |= 1 << sh; | ||
208 | grp = ((pmc - 1) >> 1) & 1; | ||
209 | } | ||
210 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
211 | if (unit) { | ||
212 | if (unit > PM_LASTUNIT) | ||
213 | return -1; | ||
214 | mask |= unit_cons[unit][0]; | ||
215 | value |= unit_cons[unit][1]; | ||
216 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
217 | /* | ||
218 | * Bus events on bytes 0 and 2 can be counted | ||
219 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
220 | */ | ||
221 | if (!pmc) | ||
222 | grp = byte & 1; | ||
223 | /* Set byte lane select field */ | ||
224 | mask |= 0xfULL << (28 - 4 * byte); | ||
225 | value |= (u64)unit << (28 - 4 * byte); | ||
226 | } | ||
227 | if (grp == 0) { | ||
228 | /* increment PMC1/2/5/6 field */ | ||
229 | mask |= 0x8000000000ull; | ||
230 | value |= 0x1000000000ull; | ||
231 | } else if (grp == 1) { | ||
232 | /* increment PMC3/4/7/8 field */ | ||
233 | mask |= 0x800000000ull; | ||
234 | value |= 0x100000000ull; | ||
235 | } | ||
236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
237 | if (spcsel) { | ||
238 | mask |= 3ull << 48; | ||
239 | value |= (u64)spcsel << 48; | ||
240 | } | ||
241 | *maskp = mask; | ||
242 | *valp = value; | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
247 | { | ||
248 | alt[0] = event; | ||
249 | |||
250 | /* 2 alternatives for LSU empty */ | ||
251 | if (event == 0x2002 || event == 0x3002) { | ||
252 | alt[1] = event ^ 0x1000; | ||
253 | return 2; | ||
254 | } | ||
255 | |||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static int p970_compute_mmcr(u64 event[], int n_ev, | ||
260 | unsigned int hwc[], u64 mmcr[]) | ||
261 | { | ||
262 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
263 | unsigned int pmc, unit, byte, psel; | ||
264 | unsigned int ttm, grp; | ||
265 | unsigned int pmc_inuse = 0; | ||
266 | unsigned int pmc_grp_use[2]; | ||
267 | unsigned char busbyte[4]; | ||
268 | unsigned char unituse[16]; | ||
269 | unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; | ||
270 | unsigned char ttmuse[2]; | ||
271 | unsigned char pmcsel[8]; | ||
272 | int i; | ||
273 | int spcsel; | ||
274 | |||
275 | if (n_ev > 8) | ||
276 | return -1; | ||
277 | |||
278 | /* First pass to count resource use */ | ||
279 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
280 | memset(busbyte, 0, sizeof(busbyte)); | ||
281 | memset(unituse, 0, sizeof(unituse)); | ||
282 | for (i = 0; i < n_ev; ++i) { | ||
283 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
284 | if (pmc) { | ||
285 | if (pmc_inuse & (1 << (pmc - 1))) | ||
286 | return -1; | ||
287 | pmc_inuse |= 1 << (pmc - 1); | ||
288 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
289 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
290 | } | ||
291 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
292 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
293 | if (unit) { | ||
294 | if (unit > PM_LASTUNIT) | ||
295 | return -1; | ||
296 | if (!pmc) | ||
297 | ++pmc_grp_use[byte & 1]; | ||
298 | if (busbyte[byte] && busbyte[byte] != unit) | ||
299 | return -1; | ||
300 | busbyte[byte] = unit; | ||
301 | unituse[unit] = 1; | ||
302 | } | ||
303 | } | ||
304 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
305 | return -1; | ||
306 | |||
307 | /* | ||
308 | * Assign resources and set multiplexer selects. | ||
309 | * | ||
310 | * PM_ISU can go either on TTM0 or TTM1, but that's the only | ||
311 | * choice we have to deal with. | ||
312 | */ | ||
313 | if (unituse[PM_ISU] & | ||
314 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) | ||
315 | unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ | ||
316 | /* Set TTM[01]SEL fields. */ | ||
317 | ttmuse[0] = ttmuse[1] = 0; | ||
318 | for (i = PM_FPU; i <= PM_STS; ++i) { | ||
319 | if (!unituse[i]) | ||
320 | continue; | ||
321 | ttm = unitmap[i]; | ||
322 | ++ttmuse[(ttm >> 2) & 1]; | ||
323 | mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; | ||
324 | } | ||
325 | /* Check only one unit per TTMx */ | ||
326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | ||
327 | return -1; | ||
328 | |||
329 | /* Set byte lane select fields and TTM3SEL. */ | ||
330 | for (byte = 0; byte < 4; ++byte) { | ||
331 | unit = busbyte[byte]; | ||
332 | if (!unit) | ||
333 | continue; | ||
334 | if (unit <= PM_STS) | ||
335 | ttm = (unitmap[unit] >> 2) & 1; | ||
336 | else if (unit == PM_LSU0) | ||
337 | ttm = 2; | ||
338 | else { | ||
339 | ttm = 3; | ||
340 | if (unit == PM_LSU1L && byte >= 2) | ||
341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
342 | } | ||
343 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
344 | } | ||
345 | |||
346 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
347 | memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ | ||
348 | for (i = 0; i < n_ev; ++i) { | ||
349 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
350 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
351 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
352 | psel = event[i] & PM_PMCSEL_MSK; | ||
353 | if (!pmc) { | ||
354 | /* Bus event or any-PMC direct event */ | ||
355 | if (unit) | ||
356 | psel |= 0x10 | ((byte & 2) << 2); | ||
357 | else | ||
358 | psel |= 8; | ||
359 | for (pmc = 0; pmc < 8; ++pmc) { | ||
360 | if (pmc_inuse & (1 << pmc)) | ||
361 | continue; | ||
362 | grp = (pmc >> 1) & 1; | ||
363 | if (unit) { | ||
364 | if (grp == (byte & 1)) | ||
365 | break; | ||
366 | } else if (pmc_grp_use[grp] < 4) { | ||
367 | ++pmc_grp_use[grp]; | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | pmc_inuse |= 1 << pmc; | ||
372 | } else { | ||
373 | /* Direct event */ | ||
374 | --pmc; | ||
375 | if (psel == 0 && (byte & 2)) | ||
376 | /* add events on higher-numbered bus */ | ||
377 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
378 | } | ||
379 | pmcsel[pmc] = psel; | ||
380 | hwc[i] = pmc; | ||
381 | spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
382 | mmcr1 |= spcsel; | ||
383 | if (p970_marked_instr_event(event[i])) | ||
384 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
385 | } | ||
386 | for (pmc = 0; pmc < 2; ++pmc) | ||
387 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
388 | for (; pmc < 8; ++pmc) | ||
389 | mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
390 | if (pmc_inuse & 1) | ||
391 | mmcr0 |= MMCR0_PMC1CE; | ||
392 | if (pmc_inuse & 0xfe) | ||
393 | mmcr0 |= MMCR0_PMCjCE; | ||
394 | |||
395 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
396 | |||
397 | /* Return MMCRx values */ | ||
398 | mmcr[0] = mmcr0; | ||
399 | mmcr[1] = mmcr1; | ||
400 | mmcr[2] = mmcra; | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
405 | { | ||
406 | int shift, i; | ||
407 | |||
408 | if (pmc <= 1) { | ||
409 | shift = MMCR0_PMC1SEL_SH - 7 * pmc; | ||
410 | i = 0; | ||
411 | } else { | ||
412 | shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); | ||
413 | i = 1; | ||
414 | } | ||
415 | /* | ||
416 | * Setting the PMCxSEL field to 0x08 disables PMC x. | ||
417 | */ | ||
418 | mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); | ||
419 | } | ||
420 | |||
421 | static int ppc970_generic_events[] = { | ||
422 | [PERF_COUNT_CPU_CYCLES] = 7, | ||
423 | [PERF_COUNT_INSTRUCTIONS] = 1, | ||
424 | [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ | ||
425 | [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ | ||
426 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ | ||
427 | [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ | ||
428 | }; | ||
429 | |||
430 | struct power_pmu ppc970_pmu = { | ||
431 | .n_counter = 8, | ||
432 | .max_alternatives = 2, | ||
433 | .add_fields = 0x001100005555ull, | ||
434 | .test_adder = 0x013300000000ull, | ||
435 | .compute_mmcr = p970_compute_mmcr, | ||
436 | .get_constraint = p970_get_constraint, | ||
437 | .get_alternatives = p970_get_alternatives, | ||
438 | .disable_pmc = p970_disable_pmc, | ||
439 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | ||
440 | .generic_events = ppc970_generic_events, | ||
441 | }; | ||
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 76993941cac9..ac0e112031b2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/kprobes.h> | 30 | #include <linux/kprobes.h> |
31 | #include <linux/kdebug.h> | 31 | #include <linux/kdebug.h> |
32 | #include <linux/perf_counter.h> | ||
32 | 33 | ||
33 | #include <asm/firmware.h> | 34 | #include <asm/firmware.h> |
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
170 | die("Weird page fault", regs, SIGSEGV); | 171 | die("Weird page fault", regs, SIGSEGV); |
171 | } | 172 | } |
172 | 173 | ||
174 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); | ||
175 | |||
173 | /* When running in the kernel we expect faults to occur only to | 176 | /* When running in the kernel we expect faults to occur only to |
174 | * addresses in user space. All other faults represent errors in the | 177 | * addresses in user space. All other faults represent errors in the |
175 | * kernel and should generate an OOPS. Unfortunately, in the case of an | 178 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
@@ -309,6 +312,8 @@ good_area: | |||
309 | } | 312 | } |
310 | if (ret & VM_FAULT_MAJOR) { | 313 | if (ret & VM_FAULT_MAJOR) { |
311 | current->maj_flt++; | 314 | current->maj_flt++; |
315 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, | ||
316 | regs, address); | ||
312 | #ifdef CONFIG_PPC_SMLPAR | 317 | #ifdef CONFIG_PPC_SMLPAR |
313 | if (firmware_has_feature(FW_FEATURE_CMO)) { | 318 | if (firmware_has_feature(FW_FEATURE_CMO)) { |
314 | preempt_disable(); | 319 | preempt_disable(); |
@@ -316,8 +321,11 @@ good_area: | |||
316 | preempt_enable(); | 321 | preempt_enable(); |
317 | } | 322 | } |
318 | #endif | 323 | #endif |
319 | } else | 324 | } else { |
320 | current->min_flt++; | 325 | current->min_flt++; |
326 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, | ||
327 | regs, address); | ||
328 | } | ||
321 | up_read(&mm->mmap_sem); | 329 | up_read(&mm->mmap_sem); |
322 | return 0; | 330 | return 0; |
323 | 331 | ||
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9da795e49337..732ee93a8e98 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype | |||
@@ -1,6 +1,7 @@ | |||
1 | config PPC64 | 1 | config PPC64 |
2 | bool "64-bit kernel" | 2 | bool "64-bit kernel" |
3 | default n | 3 | default n |
4 | select HAVE_PERF_COUNTERS | ||
4 | help | 5 | help |
5 | This option selects whether a 32-bit or a 64-bit kernel | 6 | This option selects whether a 32-bit or a 64-bit kernel |
6 | will be built. | 7 | will be built. |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885eee14..32ada97c964d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -727,6 +727,7 @@ config X86_UP_IOAPIC | |||
727 | config X86_LOCAL_APIC | 727 | config X86_LOCAL_APIC |
728 | def_bool y | 728 | def_bool y |
729 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 729 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC |
730 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
730 | 731 | ||
731 | config X86_IO_APIC | 732 | config X86_IO_APIC |
732 | def_bool y | 733 | def_bool y |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202086e8..e590261ba059 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -825,9 +825,11 @@ ia32_sys_call_table: | |||
825 | .quad compat_sys_signalfd4 | 825 | .quad compat_sys_signalfd4 |
826 | .quad sys_eventfd2 | 826 | .quad sys_eventfd2 |
827 | .quad sys_epoll_create1 | 827 | .quad sys_epoll_create1 |
828 | .quad sys_dup3 /* 330 */ | 828 | .quad sys_dup3 /* 330 */ |
829 | .quad sys_pipe2 | 829 | .quad sys_pipe2 |
830 | .quad sys_inotify_init1 | 830 | .quad sys_inotify_init1 |
831 | .quad compat_sys_preadv | 831 | .quad compat_sys_preadv |
832 | .quad compat_sys_pwritev | 832 | .quad compat_sys_pwritev |
833 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | ||
834 | .quad sys_perf_counter_open | ||
833 | ia32_syscall_end: | 835 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 85b46fba4229..aff9f1fcdcd7 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
247 | #define smp_mb__before_atomic_inc() barrier() | 247 | #define smp_mb__before_atomic_inc() barrier() |
248 | #define smp_mb__after_atomic_inc() barrier() | 248 | #define smp_mb__after_atomic_inc() barrier() |
249 | 249 | ||
250 | /* An 64bit atomic type */ | ||
251 | |||
252 | typedef struct { | ||
253 | unsigned long long counter; | ||
254 | } atomic64_t; | ||
255 | |||
256 | #define ATOMIC64_INIT(val) { (val) } | ||
257 | |||
258 | /** | ||
259 | * atomic64_read - read atomic64 variable | ||
260 | * @v: pointer of type atomic64_t | ||
261 | * | ||
262 | * Atomically reads the value of @v. | ||
263 | * Doesn't imply a read memory barrier. | ||
264 | */ | ||
265 | #define __atomic64_read(ptr) ((ptr)->counter) | ||
266 | |||
267 | static inline unsigned long long | ||
268 | cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) | ||
269 | { | ||
270 | asm volatile( | ||
271 | |||
272 | LOCK_PREFIX "cmpxchg8b (%[ptr])\n" | ||
273 | |||
274 | : "=A" (old) | ||
275 | |||
276 | : [ptr] "D" (ptr), | ||
277 | "A" (old), | ||
278 | "b" (ll_low(new)), | ||
279 | "c" (ll_high(new)) | ||
280 | |||
281 | : "memory"); | ||
282 | |||
283 | return old; | ||
284 | } | ||
285 | |||
286 | static inline unsigned long long | ||
287 | atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | ||
288 | unsigned long long new_val) | ||
289 | { | ||
290 | return cmpxchg8b(&ptr->counter, old_val, new_val); | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * atomic64_xchg - xchg atomic64 variable | ||
295 | * @ptr: pointer to type atomic64_t | ||
296 | * @new_val: value to assign | ||
297 | * @old_val: old value that was there | ||
298 | * | ||
299 | * Atomically xchgs the value of @ptr to @new_val and returns | ||
300 | * the old value. | ||
301 | */ | ||
302 | |||
303 | static inline unsigned long long | ||
304 | atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) | ||
305 | { | ||
306 | unsigned long long old_val; | ||
307 | |||
308 | do { | ||
309 | old_val = atomic_read(ptr); | ||
310 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
311 | |||
312 | return old_val; | ||
313 | } | ||
314 | |||
315 | /** | ||
316 | * atomic64_set - set atomic64 variable | ||
317 | * @ptr: pointer to type atomic64_t | ||
318 | * @new_val: value to assign | ||
319 | * | ||
320 | * Atomically sets the value of @ptr to @new_val. | ||
321 | */ | ||
322 | static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) | ||
323 | { | ||
324 | atomic64_xchg(ptr, new_val); | ||
325 | } | ||
326 | |||
327 | /** | ||
328 | * atomic64_read - read atomic64 variable | ||
329 | * @ptr: pointer to type atomic64_t | ||
330 | * | ||
331 | * Atomically reads the value of @ptr and returns it. | ||
332 | */ | ||
333 | static inline unsigned long long atomic64_read(atomic64_t *ptr) | ||
334 | { | ||
335 | unsigned long long curr_val; | ||
336 | |||
337 | do { | ||
338 | curr_val = __atomic64_read(ptr); | ||
339 | } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); | ||
340 | |||
341 | return curr_val; | ||
342 | } | ||
343 | |||
344 | /** | ||
345 | * atomic64_add_return - add and return | ||
346 | * @delta: integer value to add | ||
347 | * @ptr: pointer to type atomic64_t | ||
348 | * | ||
349 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
350 | */ | ||
351 | static inline unsigned long long | ||
352 | atomic64_add_return(unsigned long long delta, atomic64_t *ptr) | ||
353 | { | ||
354 | unsigned long long old_val, new_val; | ||
355 | |||
356 | do { | ||
357 | old_val = atomic_read(ptr); | ||
358 | new_val = old_val + delta; | ||
359 | |||
360 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
361 | |||
362 | return new_val; | ||
363 | } | ||
364 | |||
365 | static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) | ||
366 | { | ||
367 | return atomic64_add_return(-delta, ptr); | ||
368 | } | ||
369 | |||
370 | static inline long atomic64_inc_return(atomic64_t *ptr) | ||
371 | { | ||
372 | return atomic64_add_return(1, ptr); | ||
373 | } | ||
374 | |||
375 | static inline long atomic64_dec_return(atomic64_t *ptr) | ||
376 | { | ||
377 | return atomic64_sub_return(1, ptr); | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * atomic64_add - add integer to atomic64 variable | ||
382 | * @delta: integer value to add | ||
383 | * @ptr: pointer to type atomic64_t | ||
384 | * | ||
385 | * Atomically adds @delta to @ptr. | ||
386 | */ | ||
387 | static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) | ||
388 | { | ||
389 | atomic64_add_return(delta, ptr); | ||
390 | } | ||
391 | |||
392 | /** | ||
393 | * atomic64_sub - subtract the atomic64 variable | ||
394 | * @delta: integer value to subtract | ||
395 | * @ptr: pointer to type atomic64_t | ||
396 | * | ||
397 | * Atomically subtracts @delta from @ptr. | ||
398 | */ | ||
399 | static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) | ||
400 | { | ||
401 | atomic64_add(-delta, ptr); | ||
402 | } | ||
403 | |||
404 | /** | ||
405 | * atomic64_sub_and_test - subtract value from variable and test result | ||
406 | * @delta: integer value to subtract | ||
407 | * @ptr: pointer to type atomic64_t | ||
408 | * | ||
409 | * Atomically subtracts @delta from @ptr and returns | ||
410 | * true if the result is zero, or false for all | ||
411 | * other cases. | ||
412 | */ | ||
413 | static inline int | ||
414 | atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) | ||
415 | { | ||
416 | unsigned long long old_val = atomic64_sub_return(delta, ptr); | ||
417 | |||
418 | return old_val == 0; | ||
419 | } | ||
420 | |||
421 | /** | ||
422 | * atomic64_inc - increment atomic64 variable | ||
423 | * @ptr: pointer to type atomic64_t | ||
424 | * | ||
425 | * Atomically increments @ptr by 1. | ||
426 | */ | ||
427 | static inline void atomic64_inc(atomic64_t *ptr) | ||
428 | { | ||
429 | atomic64_add(1, ptr); | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * atomic64_dec - decrement atomic64 variable | ||
434 | * @ptr: pointer to type atomic64_t | ||
435 | * | ||
436 | * Atomically decrements @ptr by 1. | ||
437 | */ | ||
438 | static inline void atomic64_dec(atomic64_t *ptr) | ||
439 | { | ||
440 | atomic64_sub(1, ptr); | ||
441 | } | ||
442 | |||
443 | /** | ||
444 | * atomic64_dec_and_test - decrement and test | ||
445 | * @ptr: pointer to type atomic64_t | ||
446 | * | ||
447 | * Atomically decrements @ptr by 1 and | ||
448 | * returns true if the result is 0, or false for all other | ||
449 | * cases. | ||
450 | */ | ||
451 | static inline int atomic64_dec_and_test(atomic64_t *ptr) | ||
452 | { | ||
453 | return atomic64_sub_and_test(1, ptr); | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * atomic64_inc_and_test - increment and test | ||
458 | * @ptr: pointer to type atomic64_t | ||
459 | * | ||
460 | * Atomically increments @ptr by 1 | ||
461 | * and returns true if the result is zero, or false for all | ||
462 | * other cases. | ||
463 | */ | ||
464 | static inline int atomic64_inc_and_test(atomic64_t *ptr) | ||
465 | { | ||
466 | return atomic64_sub_and_test(-1, ptr); | ||
467 | } | ||
468 | |||
469 | /** | ||
470 | * atomic64_add_negative - add and test if negative | ||
471 | * @delta: integer value to add | ||
472 | * @ptr: pointer to type atomic64_t | ||
473 | * | ||
474 | * Atomically adds @delta to @ptr and returns true | ||
475 | * if the result is negative, or false when | ||
476 | * result is greater than or equal to zero. | ||
477 | */ | ||
478 | static inline int | ||
479 | atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) | ||
480 | { | ||
481 | long long old_val = atomic64_add_return(delta, ptr); | ||
482 | |||
483 | return old_val < 0; | ||
484 | } | ||
485 | |||
250 | #include <asm-generic/atomic.h> | 486 | #include <asm-generic/atomic.h> |
251 | #endif /* _ASM_X86_ATOMIC_32_H */ | 487 | #endif /* _ASM_X86_ATOMIC_32_H */ |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..fe24d2802490 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | |||
50 | 50 | ||
51 | #ifdef CONFIG_PERF_COUNTERS | 51 | #ifdef CONFIG_PERF_COUNTERS |
52 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) | 52 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) |
53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) | ||
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | #ifdef CONFIG_X86_MCE_P4THERMAL | 56 | #ifdef CONFIG_X86_MCE_P4THERMAL |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 37555e52f980..9ebc5c255032 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -13,6 +13,8 @@ typedef struct { | |||
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | #endif | 14 | #endif |
15 | unsigned int generic_irqs; /* arch dependent */ | 15 | unsigned int generic_irqs; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | ||
17 | unsigned int apic_pending_irqs; | ||
16 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
17 | unsigned int irq_resched_count; | 19 | unsigned int irq_resched_count; |
18 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b762ea49bd70..7309c0ad6902 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -29,6 +29,9 @@ | |||
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void generic_interrupt(void); | 30 | extern void generic_interrupt(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_counter_interrupt(void); | ||
33 | extern void perf_pending_interrupt(void); | ||
34 | |||
32 | extern void spurious_interrupt(void); | 35 | extern void spurious_interrupt(void); |
33 | extern void thermal_interrupt(void); | 36 | extern void thermal_interrupt(void); |
34 | extern void reschedule_interrupt(void); | 37 | extern void reschedule_interrupt(void); |
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h deleted file mode 100644 index fa0fd068bc2e..000000000000 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H | ||
2 | #define _ASM_X86_INTEL_ARCH_PERFMON_H | ||
3 | |||
4 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
5 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
6 | |||
7 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
8 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
9 | |||
10 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
11 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
12 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
13 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
14 | |||
15 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) | ||
16 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
17 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) | ||
18 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
19 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
20 | |||
21 | union cpuid10_eax { | ||
22 | struct { | ||
23 | unsigned int version_id:8; | ||
24 | unsigned int num_counters:8; | ||
25 | unsigned int bit_width:8; | ||
26 | unsigned int mask_length:8; | ||
27 | } split; | ||
28 | unsigned int full; | ||
29 | }; | ||
30 | |||
31 | #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..545bb811ccb5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -117,6 +117,11 @@ | |||
117 | #define GENERIC_INTERRUPT_VECTOR 0xed | 117 | #define GENERIC_INTERRUPT_VECTOR 0xed |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * Performance monitoring pending work vector: | ||
121 | */ | ||
122 | #define LOCAL_PENDING_VECTOR 0xec | ||
123 | |||
124 | /* | ||
120 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 125 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
121 | * start at 0x31(0x41) to spread out vectors evenly between priority | 126 | * start at 0x31(0x41) to spread out vectors evenly between priority |
122 | * levels. (0x80 is the syscall vector) | 127 | * levels. (0x80 is the syscall vector) |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 000000000000..d08dd52cb8ff --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -0,0 +1,100 @@ | |||
1 | #ifndef _ASM_X86_PERF_COUNTER_H | ||
2 | #define _ASM_X86_PERF_COUNTER_H | ||
3 | |||
4 | /* | ||
5 | * Performance counter hw details: | ||
6 | */ | ||
7 | |||
8 | #define X86_PMC_MAX_GENERIC 8 | ||
9 | #define X86_PMC_MAX_FIXED 3 | ||
10 | |||
11 | #define X86_PMC_IDX_GENERIC 0 | ||
12 | #define X86_PMC_IDX_FIXED 32 | ||
13 | #define X86_PMC_IDX_MAX 64 | ||
14 | |||
15 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
16 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
17 | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
20 | |||
21 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
22 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
23 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
24 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
25 | |||
26 | /* | ||
27 | * Includes eventsel and unit mask as well: | ||
28 | */ | ||
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | ||
30 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | ||
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | ||
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
36 | |||
37 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | ||
38 | |||
39 | /* | ||
40 | * Intel "Architectural Performance Monitoring" CPUID | ||
41 | * detection/enumeration details: | ||
42 | */ | ||
43 | union cpuid10_eax { | ||
44 | struct { | ||
45 | unsigned int version_id:8; | ||
46 | unsigned int num_counters:8; | ||
47 | unsigned int bit_width:8; | ||
48 | unsigned int mask_length:8; | ||
49 | } split; | ||
50 | unsigned int full; | ||
51 | }; | ||
52 | |||
53 | union cpuid10_edx { | ||
54 | struct { | ||
55 | unsigned int num_counters_fixed:4; | ||
56 | unsigned int reserved:28; | ||
57 | } split; | ||
58 | unsigned int full; | ||
59 | }; | ||
60 | |||
61 | |||
62 | /* | ||
63 | * Fixed-purpose performance counters: | ||
64 | */ | ||
65 | |||
66 | /* | ||
67 | * All 3 fixed-mode PMCs are configured via this single MSR: | ||
68 | */ | ||
69 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d | ||
70 | |||
71 | /* | ||
72 | * The counts are available in three separate MSRs: | ||
73 | */ | ||
74 | |||
75 | /* Instr_Retired.Any: */ | ||
76 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | ||
77 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | ||
78 | |||
79 | /* CPU_CLK_Unhalted.Core: */ | ||
80 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | ||
81 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | ||
82 | |||
83 | /* CPU_CLK_Unhalted.Ref: */ | ||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | ||
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | ||
86 | |||
87 | extern void set_perf_counter_pending(void); | ||
88 | |||
89 | #define clear_perf_counter_pending() do { } while (0) | ||
90 | #define test_perf_counter_pending() (0) | ||
91 | |||
92 | #ifdef CONFIG_PERF_COUNTERS | ||
93 | extern void init_hw_perf_counters(void); | ||
94 | extern void perf_counters_lapic_init(int nmi); | ||
95 | #else | ||
96 | static inline void init_hw_perf_counters(void) { } | ||
97 | static inline void perf_counters_lapic_init(int nmi) { } | ||
98 | #endif | ||
99 | |||
100 | #endif /* _ASM_X86_PERF_COUNTER_H */ | ||
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..732a30706153 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -340,6 +340,8 @@ | |||
340 | #define __NR_inotify_init1 332 | 340 | #define __NR_inotify_init1 332 |
341 | #define __NR_preadv 333 | 341 | #define __NR_preadv 333 |
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_rt_tgsigqueueinfo 335 | ||
344 | #define __NR_perf_counter_open 336 | ||
343 | 345 | ||
344 | #ifdef __KERNEL__ | 346 | #ifdef __KERNEL__ |
345 | 347 | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..900e1617e672 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -657,7 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | |||
657 | __SYSCALL(__NR_preadv, sys_preadv) | 657 | __SYSCALL(__NR_preadv, sys_preadv) |
658 | #define __NR_pwritev 296 | 658 | #define __NR_pwritev 296 |
659 | __SYSCALL(__NR_pwritev, sys_pwritev) | 659 | __SYSCALL(__NR_pwritev, sys_pwritev) |
660 | 660 | #define __NR_rt_tgsigqueueinfo 297 | |
661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | ||
662 | #define __NR_perf_counter_open 298 | ||
663 | __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | ||
661 | 664 | ||
662 | #ifndef __NO_STUBS | 665 | #ifndef __NO_STUBS |
663 | #define __ARCH_WANT_OLD_READDIR | 666 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f2870920f246..e9021a908020 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
36 | 36 | ||
37 | #include <asm/perf_counter.h> | ||
37 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
38 | #include <asm/atomic.h> | 39 | #include <asm/atomic.h> |
39 | #include <asm/mpspec.h> | 40 | #include <asm/mpspec.h> |
@@ -761,6 +762,8 @@ static void local_apic_timer_interrupt(void) | |||
761 | inc_irq_stat(apic_timer_irqs); | 762 | inc_irq_stat(apic_timer_irqs); |
762 | 763 | ||
763 | evt->event_handler(evt); | 764 | evt->event_handler(evt); |
765 | |||
766 | perf_counter_unthrottle(); | ||
764 | } | 767 | } |
765 | 768 | ||
766 | /* | 769 | /* |
@@ -1133,6 +1136,7 @@ void __cpuinit setup_local_APIC(void) | |||
1133 | apic_write(APIC_ESR, 0); | 1136 | apic_write(APIC_ESR, 0); |
1134 | } | 1137 | } |
1135 | #endif | 1138 | #endif |
1139 | perf_counters_lapic_init(0); | ||
1136 | 1140 | ||
1137 | preempt_disable(); | 1141 | preempt_disable(); |
1138 | 1142 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4e242f9a06e4..3efcb2b96a15 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | # | 1 | # |
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details, features and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | |||
23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
25 | 25 | ||
26 | obj-$(CONFIG_X86_MCE) += mcheck/ | 26 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o |
27 | obj-$(CONFIG_MTRR) += mtrr/ | ||
28 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
29 | 27 | ||
30 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 28 | obj-$(CONFIG_X86_MCE) += mcheck/ |
29 | obj-$(CONFIG_MTRR) += mtrr/ | ||
30 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
31 | |||
32 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
31 | 33 | ||
32 | quiet_cmd_mkcapflags = MKCAP $@ | 34 | quiet_cmd_mkcapflags = MKCAP $@ |
33 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | 35 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c1caefc82e62..591012fb949f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
16 | #include <asm/perf_counter.h> | ||
16 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
17 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -854,6 +855,7 @@ void __init identify_boot_cpu(void) | |||
854 | #else | 855 | #else |
855 | vgetcpu_set_mode(); | 856 | vgetcpu_set_mode(); |
856 | #endif | 857 | #endif |
858 | init_hw_perf_counters(); | ||
857 | } | 859 | } |
858 | 860 | ||
859 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 861 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c new file mode 100644 index 000000000000..5bfd30ab3920 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -0,0 +1,1242 @@ | |||
1 | /* | ||
2 | * Performance counter x86 architecture code | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | ||
10 | * For licencing details see kernel-base/COPYING | ||
11 | */ | ||
12 | |||
13 | #include <linux/perf_counter.h> | ||
14 | #include <linux/capability.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/hardirq.h> | ||
17 | #include <linux/kprobes.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | #include <asm/apic.h> | ||
24 | #include <asm/stacktrace.h> | ||
25 | #include <asm/nmi.h> | ||
26 | |||
27 | static u64 perf_counter_mask __read_mostly; | ||
28 | |||
29 | struct cpu_hw_counters { | ||
30 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | ||
31 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
32 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
33 | unsigned long interrupts; | ||
34 | int enabled; | ||
35 | }; | ||
36 | |||
37 | /* | ||
38 | * struct x86_pmu - generic x86 pmu | ||
39 | */ | ||
40 | struct x86_pmu { | ||
41 | const char *name; | ||
42 | int version; | ||
43 | int (*handle_irq)(struct pt_regs *, int); | ||
44 | void (*disable_all)(void); | ||
45 | void (*enable_all)(void); | ||
46 | void (*enable)(struct hw_perf_counter *, int); | ||
47 | void (*disable)(struct hw_perf_counter *, int); | ||
48 | unsigned eventsel; | ||
49 | unsigned perfctr; | ||
50 | u64 (*event_map)(int); | ||
51 | u64 (*raw_event)(u64); | ||
52 | int max_events; | ||
53 | int num_counters; | ||
54 | int num_counters_fixed; | ||
55 | int counter_bits; | ||
56 | u64 counter_mask; | ||
57 | u64 max_period; | ||
58 | u64 intel_ctrl; | ||
59 | }; | ||
60 | |||
61 | static struct x86_pmu x86_pmu __read_mostly; | ||
62 | |||
63 | static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | ||
64 | .enabled = 1, | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * Intel PerfMon v3. Used on Core2 and later. | ||
69 | */ | ||
70 | static const u64 intel_perfmon_event_map[] = | ||
71 | { | ||
72 | [PERF_COUNT_CPU_CYCLES] = 0x003c, | ||
73 | [PERF_COUNT_INSTRUCTIONS] = 0x00c0, | ||
74 | [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, | ||
75 | [PERF_COUNT_CACHE_MISSES] = 0x412e, | ||
76 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
77 | [PERF_COUNT_BRANCH_MISSES] = 0x00c5, | ||
78 | [PERF_COUNT_BUS_CYCLES] = 0x013c, | ||
79 | }; | ||
80 | |||
81 | static u64 intel_pmu_event_map(int event) | ||
82 | { | ||
83 | return intel_perfmon_event_map[event]; | ||
84 | } | ||
85 | |||
86 | static u64 intel_pmu_raw_event(u64 event) | ||
87 | { | ||
88 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
89 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
90 | #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL | ||
91 | |||
92 | #define CORE_EVNTSEL_MASK \ | ||
93 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
94 | CORE_EVNTSEL_UNIT_MASK | \ | ||
95 | CORE_EVNTSEL_COUNTER_MASK) | ||
96 | |||
97 | return event & CORE_EVNTSEL_MASK; | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * AMD Performance Monitor K7 and later. | ||
102 | */ | ||
103 | static const u64 amd_perfmon_event_map[] = | ||
104 | { | ||
105 | [PERF_COUNT_CPU_CYCLES] = 0x0076, | ||
106 | [PERF_COUNT_INSTRUCTIONS] = 0x00c0, | ||
107 | [PERF_COUNT_CACHE_REFERENCES] = 0x0080, | ||
108 | [PERF_COUNT_CACHE_MISSES] = 0x0081, | ||
109 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
110 | [PERF_COUNT_BRANCH_MISSES] = 0x00c5, | ||
111 | }; | ||
112 | |||
113 | static u64 amd_pmu_event_map(int event) | ||
114 | { | ||
115 | return amd_perfmon_event_map[event]; | ||
116 | } | ||
117 | |||
118 | static u64 amd_pmu_raw_event(u64 event) | ||
119 | { | ||
120 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
121 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
122 | #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL | ||
123 | |||
124 | #define K7_EVNTSEL_MASK \ | ||
125 | (K7_EVNTSEL_EVENT_MASK | \ | ||
126 | K7_EVNTSEL_UNIT_MASK | \ | ||
127 | K7_EVNTSEL_COUNTER_MASK) | ||
128 | |||
129 | return event & K7_EVNTSEL_MASK; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Propagate counter elapsed time into the generic counter. | ||
134 | * Can only be executed on the CPU where the counter is active. | ||
135 | * Returns the delta events processed. | ||
136 | */ | ||
137 | static u64 | ||
138 | x86_perf_counter_update(struct perf_counter *counter, | ||
139 | struct hw_perf_counter *hwc, int idx) | ||
140 | { | ||
141 | int shift = 64 - x86_pmu.counter_bits; | ||
142 | u64 prev_raw_count, new_raw_count; | ||
143 | s64 delta; | ||
144 | |||
145 | /* | ||
146 | * Careful: an NMI might modify the previous counter value. | ||
147 | * | ||
148 | * Our tactic to handle this is to first atomically read and | ||
149 | * exchange a new raw count - then add that new-prev delta | ||
150 | * count to the generic counter atomically: | ||
151 | */ | ||
152 | again: | ||
153 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
154 | rdmsrl(hwc->counter_base + idx, new_raw_count); | ||
155 | |||
156 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
157 | new_raw_count) != prev_raw_count) | ||
158 | goto again; | ||
159 | |||
160 | /* | ||
161 | * Now we have the new raw value and have updated the prev | ||
162 | * timestamp already. We can now calculate the elapsed delta | ||
163 | * (counter-)time and add that to the generic counter. | ||
164 | * | ||
165 | * Careful, not all hw sign-extends above the physical width | ||
166 | * of the count. | ||
167 | */ | ||
168 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
169 | delta >>= shift; | ||
170 | |||
171 | atomic64_add(delta, &counter->count); | ||
172 | atomic64_sub(delta, &hwc->period_left); | ||
173 | |||
174 | return new_raw_count; | ||
175 | } | ||
176 | |||
177 | static atomic_t active_counters; | ||
178 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
179 | |||
180 | static bool reserve_pmc_hardware(void) | ||
181 | { | ||
182 | int i; | ||
183 | |||
184 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
185 | disable_lapic_nmi_watchdog(); | ||
186 | |||
187 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
188 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | ||
189 | goto perfctr_fail; | ||
190 | } | ||
191 | |||
192 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
193 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | ||
194 | goto eventsel_fail; | ||
195 | } | ||
196 | |||
197 | return true; | ||
198 | |||
199 | eventsel_fail: | ||
200 | for (i--; i >= 0; i--) | ||
201 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
202 | |||
203 | i = x86_pmu.num_counters; | ||
204 | |||
205 | perfctr_fail: | ||
206 | for (i--; i >= 0; i--) | ||
207 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
208 | |||
209 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
210 | enable_lapic_nmi_watchdog(); | ||
211 | |||
212 | return false; | ||
213 | } | ||
214 | |||
215 | static void release_pmc_hardware(void) | ||
216 | { | ||
217 | int i; | ||
218 | |||
219 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
220 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
221 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
222 | } | ||
223 | |||
224 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
225 | enable_lapic_nmi_watchdog(); | ||
226 | } | ||
227 | |||
228 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
229 | { | ||
230 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | ||
231 | release_pmc_hardware(); | ||
232 | mutex_unlock(&pmc_reserve_mutex); | ||
233 | } | ||
234 | } | ||
235 | |||
236 | static inline int x86_pmu_initialized(void) | ||
237 | { | ||
238 | return x86_pmu.handle_irq != NULL; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * Setup the hardware configuration for a given hw_event_type | ||
243 | */ | ||
244 | static int __hw_perf_counter_init(struct perf_counter *counter) | ||
245 | { | ||
246 | struct perf_counter_hw_event *hw_event = &counter->hw_event; | ||
247 | struct hw_perf_counter *hwc = &counter->hw; | ||
248 | int err; | ||
249 | |||
250 | if (!x86_pmu_initialized()) | ||
251 | return -ENODEV; | ||
252 | |||
253 | err = 0; | ||
254 | if (!atomic_inc_not_zero(&active_counters)) { | ||
255 | mutex_lock(&pmc_reserve_mutex); | ||
256 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | ||
257 | err = -EBUSY; | ||
258 | else | ||
259 | atomic_inc(&active_counters); | ||
260 | mutex_unlock(&pmc_reserve_mutex); | ||
261 | } | ||
262 | if (err) | ||
263 | return err; | ||
264 | |||
265 | /* | ||
266 | * Generate PMC IRQs: | ||
267 | * (keep 'enabled' bit clear for now) | ||
268 | */ | ||
269 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
270 | |||
271 | /* | ||
272 | * Count user and OS events unless requested not to. | ||
273 | */ | ||
274 | if (!hw_event->exclude_user) | ||
275 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
276 | if (!hw_event->exclude_kernel) | ||
277 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
278 | |||
279 | /* | ||
280 | * If privileged enough, allow NMI events: | ||
281 | */ | ||
282 | hwc->nmi = 0; | ||
283 | if (hw_event->nmi) { | ||
284 | if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN)) | ||
285 | return -EACCES; | ||
286 | hwc->nmi = 1; | ||
287 | } | ||
288 | |||
289 | if (!hwc->irq_period) | ||
290 | hwc->irq_period = x86_pmu.max_period; | ||
291 | |||
292 | atomic64_set(&hwc->period_left, | ||
293 | min(x86_pmu.max_period, hwc->irq_period)); | ||
294 | |||
295 | /* | ||
296 | * Raw event type provide the config in the event structure | ||
297 | */ | ||
298 | if (perf_event_raw(hw_event)) { | ||
299 | hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event)); | ||
300 | } else { | ||
301 | if (perf_event_id(hw_event) >= x86_pmu.max_events) | ||
302 | return -EINVAL; | ||
303 | /* | ||
304 | * The generic map: | ||
305 | */ | ||
306 | hwc->config |= x86_pmu.event_map(perf_event_id(hw_event)); | ||
307 | } | ||
308 | |||
309 | counter->destroy = hw_perf_counter_destroy; | ||
310 | |||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | static void intel_pmu_disable_all(void) | ||
315 | { | ||
316 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
317 | } | ||
318 | |||
319 | static void amd_pmu_disable_all(void) | ||
320 | { | ||
321 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
322 | int idx; | ||
323 | |||
324 | if (!cpuc->enabled) | ||
325 | return; | ||
326 | |||
327 | cpuc->enabled = 0; | ||
328 | /* | ||
329 | * ensure we write the disable before we start disabling the | ||
330 | * counters proper, so that amd_pmu_enable_counter() does the | ||
331 | * right thing. | ||
332 | */ | ||
333 | barrier(); | ||
334 | |||
335 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
336 | u64 val; | ||
337 | |||
338 | if (!test_bit(idx, cpuc->active_mask)) | ||
339 | continue; | ||
340 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
341 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | ||
342 | continue; | ||
343 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
344 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | void hw_perf_disable(void) | ||
349 | { | ||
350 | if (!x86_pmu_initialized()) | ||
351 | return; | ||
352 | return x86_pmu.disable_all(); | ||
353 | } | ||
354 | |||
355 | static void intel_pmu_enable_all(void) | ||
356 | { | ||
357 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
358 | } | ||
359 | |||
360 | static void amd_pmu_enable_all(void) | ||
361 | { | ||
362 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
363 | int idx; | ||
364 | |||
365 | if (cpuc->enabled) | ||
366 | return; | ||
367 | |||
368 | cpuc->enabled = 1; | ||
369 | barrier(); | ||
370 | |||
371 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
372 | u64 val; | ||
373 | |||
374 | if (!test_bit(idx, cpuc->active_mask)) | ||
375 | continue; | ||
376 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
377 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
378 | continue; | ||
379 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
380 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | void hw_perf_enable(void) | ||
385 | { | ||
386 | if (!x86_pmu_initialized()) | ||
387 | return; | ||
388 | x86_pmu.enable_all(); | ||
389 | } | ||
390 | |||
391 | static inline u64 intel_pmu_get_status(void) | ||
392 | { | ||
393 | u64 status; | ||
394 | |||
395 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
396 | |||
397 | return status; | ||
398 | } | ||
399 | |||
400 | static inline void intel_pmu_ack_status(u64 ack) | ||
401 | { | ||
402 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
403 | } | ||
404 | |||
405 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
406 | { | ||
407 | int err; | ||
408 | err = checking_wrmsrl(hwc->config_base + idx, | ||
409 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
410 | } | ||
411 | |||
412 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
413 | { | ||
414 | int err; | ||
415 | err = checking_wrmsrl(hwc->config_base + idx, | ||
416 | hwc->config); | ||
417 | } | ||
418 | |||
419 | static inline void | ||
420 | intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | ||
421 | { | ||
422 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
423 | u64 ctrl_val, mask; | ||
424 | int err; | ||
425 | |||
426 | mask = 0xfULL << (idx * 4); | ||
427 | |||
428 | rdmsrl(hwc->config_base, ctrl_val); | ||
429 | ctrl_val &= ~mask; | ||
430 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
431 | } | ||
432 | |||
433 | static inline void | ||
434 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
435 | { | ||
436 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
437 | intel_pmu_disable_fixed(hwc, idx); | ||
438 | return; | ||
439 | } | ||
440 | |||
441 | x86_pmu_disable_counter(hwc, idx); | ||
442 | } | ||
443 | |||
444 | static inline void | ||
445 | amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
446 | { | ||
447 | x86_pmu_disable_counter(hwc, idx); | ||
448 | } | ||
449 | |||
450 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | ||
451 | |||
452 | /* | ||
453 | * Set the next IRQ period, based on the hwc->period_left value. | ||
454 | * To be called with the counter disabled in hw: | ||
455 | */ | ||
456 | static void | ||
457 | x86_perf_counter_set_period(struct perf_counter *counter, | ||
458 | struct hw_perf_counter *hwc, int idx) | ||
459 | { | ||
460 | s64 left = atomic64_read(&hwc->period_left); | ||
461 | s64 period = min(x86_pmu.max_period, hwc->irq_period); | ||
462 | int err; | ||
463 | |||
464 | /* | ||
465 | * If we are way outside a reasoable range then just skip forward: | ||
466 | */ | ||
467 | if (unlikely(left <= -period)) { | ||
468 | left = period; | ||
469 | atomic64_set(&hwc->period_left, left); | ||
470 | } | ||
471 | |||
472 | if (unlikely(left <= 0)) { | ||
473 | left += period; | ||
474 | atomic64_set(&hwc->period_left, left); | ||
475 | } | ||
476 | /* | ||
477 | * Quirk: certain CPUs dont like it if just 1 event is left: | ||
478 | */ | ||
479 | if (unlikely(left < 2)) | ||
480 | left = 2; | ||
481 | |||
482 | per_cpu(prev_left[idx], smp_processor_id()) = left; | ||
483 | |||
484 | /* | ||
485 | * The hw counter starts counting from this counter offset, | ||
486 | * mark it to be able to extra future deltas: | ||
487 | */ | ||
488 | atomic64_set(&hwc->prev_count, (u64)-left); | ||
489 | |||
490 | err = checking_wrmsrl(hwc->counter_base + idx, | ||
491 | (u64)(-left) & x86_pmu.counter_mask); | ||
492 | } | ||
493 | |||
494 | static inline void | ||
495 | intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | ||
496 | { | ||
497 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
498 | u64 ctrl_val, bits, mask; | ||
499 | int err; | ||
500 | |||
501 | /* | ||
502 | * Enable IRQ generation (0x8), | ||
503 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
504 | * if requested: | ||
505 | */ | ||
506 | bits = 0x8ULL; | ||
507 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
508 | bits |= 0x2; | ||
509 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
510 | bits |= 0x1; | ||
511 | bits <<= (idx * 4); | ||
512 | mask = 0xfULL << (idx * 4); | ||
513 | |||
514 | rdmsrl(hwc->config_base, ctrl_val); | ||
515 | ctrl_val &= ~mask; | ||
516 | ctrl_val |= bits; | ||
517 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
518 | } | ||
519 | |||
520 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
521 | { | ||
522 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
523 | intel_pmu_enable_fixed(hwc, idx); | ||
524 | return; | ||
525 | } | ||
526 | |||
527 | x86_pmu_enable_counter(hwc, idx); | ||
528 | } | ||
529 | |||
530 | static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
531 | { | ||
532 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
533 | |||
534 | if (cpuc->enabled) | ||
535 | x86_pmu_enable_counter(hwc, idx); | ||
536 | else | ||
537 | x86_pmu_disable_counter(hwc, idx); | ||
538 | } | ||
539 | |||
540 | static int | ||
541 | fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | ||
542 | { | ||
543 | unsigned int event; | ||
544 | |||
545 | if (!x86_pmu.num_counters_fixed) | ||
546 | return -1; | ||
547 | |||
548 | if (unlikely(hwc->nmi)) | ||
549 | return -1; | ||
550 | |||
551 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
552 | |||
553 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS))) | ||
554 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
555 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES))) | ||
556 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
557 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES))) | ||
558 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
559 | |||
560 | return -1; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Find a PMC slot for the freshly enabled / scheduled in counter: | ||
565 | */ | ||
566 | static int x86_pmu_enable(struct perf_counter *counter) | ||
567 | { | ||
568 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
569 | struct hw_perf_counter *hwc = &counter->hw; | ||
570 | int idx; | ||
571 | |||
572 | idx = fixed_mode_idx(counter, hwc); | ||
573 | if (idx >= 0) { | ||
574 | /* | ||
575 | * Try to get the fixed counter, if that is already taken | ||
576 | * then try to get a generic counter: | ||
577 | */ | ||
578 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
579 | goto try_generic; | ||
580 | |||
581 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
582 | /* | ||
583 | * We set it so that counter_base + idx in wrmsr/rdmsr maps to | ||
584 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
585 | */ | ||
586 | hwc->counter_base = | ||
587 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
588 | hwc->idx = idx; | ||
589 | } else { | ||
590 | idx = hwc->idx; | ||
591 | /* Try to get the previous generic counter again */ | ||
592 | if (test_and_set_bit(idx, cpuc->used_mask)) { | ||
593 | try_generic: | ||
594 | idx = find_first_zero_bit(cpuc->used_mask, | ||
595 | x86_pmu.num_counters); | ||
596 | if (idx == x86_pmu.num_counters) | ||
597 | return -EAGAIN; | ||
598 | |||
599 | set_bit(idx, cpuc->used_mask); | ||
600 | hwc->idx = idx; | ||
601 | } | ||
602 | hwc->config_base = x86_pmu.eventsel; | ||
603 | hwc->counter_base = x86_pmu.perfctr; | ||
604 | } | ||
605 | |||
606 | perf_counters_lapic_init(hwc->nmi); | ||
607 | |||
608 | x86_pmu.disable(hwc, idx); | ||
609 | |||
610 | cpuc->counters[idx] = counter; | ||
611 | set_bit(idx, cpuc->active_mask); | ||
612 | |||
613 | x86_perf_counter_set_period(counter, hwc, idx); | ||
614 | x86_pmu.enable(hwc, idx); | ||
615 | |||
616 | return 0; | ||
617 | } | ||
618 | |||
619 | void perf_counter_print_debug(void) | ||
620 | { | ||
621 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | ||
622 | struct cpu_hw_counters *cpuc; | ||
623 | unsigned long flags; | ||
624 | int cpu, idx; | ||
625 | |||
626 | if (!x86_pmu.num_counters) | ||
627 | return; | ||
628 | |||
629 | local_irq_save(flags); | ||
630 | |||
631 | cpu = smp_processor_id(); | ||
632 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
633 | |||
634 | if (x86_pmu.version >= 2) { | ||
635 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
636 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
637 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | ||
638 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | ||
639 | |||
640 | pr_info("\n"); | ||
641 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | ||
642 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | ||
643 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | ||
644 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | ||
645 | } | ||
646 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | ||
647 | |||
648 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
649 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | ||
650 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | ||
651 | |||
652 | prev_left = per_cpu(prev_left[idx], cpu); | ||
653 | |||
654 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | ||
655 | cpu, idx, pmc_ctrl); | ||
656 | pr_info("CPU#%d: gen-PMC%d count: %016llx\n", | ||
657 | cpu, idx, pmc_count); | ||
658 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | ||
659 | cpu, idx, prev_left); | ||
660 | } | ||
661 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | ||
662 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | ||
663 | |||
664 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | ||
665 | cpu, idx, pmc_count); | ||
666 | } | ||
667 | local_irq_restore(flags); | ||
668 | } | ||
669 | |||
670 | static void x86_pmu_disable(struct perf_counter *counter) | ||
671 | { | ||
672 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
673 | struct hw_perf_counter *hwc = &counter->hw; | ||
674 | int idx = hwc->idx; | ||
675 | |||
676 | /* | ||
677 | * Must be done before we disable, otherwise the nmi handler | ||
678 | * could reenable again: | ||
679 | */ | ||
680 | clear_bit(idx, cpuc->active_mask); | ||
681 | x86_pmu.disable(hwc, idx); | ||
682 | |||
683 | /* | ||
684 | * Make sure the cleared pointer becomes visible before we | ||
685 | * (potentially) free the counter: | ||
686 | */ | ||
687 | barrier(); | ||
688 | |||
689 | /* | ||
690 | * Drain the remaining delta count out of a counter | ||
691 | * that we are disabling: | ||
692 | */ | ||
693 | x86_perf_counter_update(counter, hwc, idx); | ||
694 | cpuc->counters[idx] = NULL; | ||
695 | clear_bit(idx, cpuc->used_mask); | ||
696 | } | ||
697 | |||
698 | /* | ||
699 | * Save and restart an expired counter. Called by NMI contexts, | ||
700 | * so it has to be careful about preempting normal counter ops: | ||
701 | */ | ||
702 | static void intel_pmu_save_and_restart(struct perf_counter *counter) | ||
703 | { | ||
704 | struct hw_perf_counter *hwc = &counter->hw; | ||
705 | int idx = hwc->idx; | ||
706 | |||
707 | x86_perf_counter_update(counter, hwc, idx); | ||
708 | x86_perf_counter_set_period(counter, hwc, idx); | ||
709 | |||
710 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
711 | intel_pmu_enable_counter(hwc, idx); | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * Maximum interrupt frequency of 100KHz per CPU | ||
716 | */ | ||
717 | #define PERFMON_MAX_INTERRUPTS (100000/HZ) | ||
718 | |||
719 | /* | ||
720 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
721 | * rules apply: | ||
722 | */ | ||
723 | static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi) | ||
724 | { | ||
725 | struct cpu_hw_counters *cpuc; | ||
726 | struct cpu_hw_counters; | ||
727 | int bit, cpu, loops; | ||
728 | u64 ack, status; | ||
729 | |||
730 | cpu = smp_processor_id(); | ||
731 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
732 | |||
733 | perf_disable(); | ||
734 | status = intel_pmu_get_status(); | ||
735 | if (!status) { | ||
736 | perf_enable(); | ||
737 | return 0; | ||
738 | } | ||
739 | |||
740 | loops = 0; | ||
741 | again: | ||
742 | if (++loops > 100) { | ||
743 | WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); | ||
744 | return 1; | ||
745 | } | ||
746 | |||
747 | inc_irq_stat(apic_perf_irqs); | ||
748 | ack = status; | ||
749 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
750 | struct perf_counter *counter = cpuc->counters[bit]; | ||
751 | |||
752 | clear_bit(bit, (unsigned long *) &status); | ||
753 | if (!test_bit(bit, cpuc->active_mask)) | ||
754 | continue; | ||
755 | |||
756 | intel_pmu_save_and_restart(counter); | ||
757 | if (perf_counter_overflow(counter, nmi, regs, 0)) | ||
758 | intel_pmu_disable_counter(&counter->hw, bit); | ||
759 | } | ||
760 | |||
761 | intel_pmu_ack_status(ack); | ||
762 | |||
763 | /* | ||
764 | * Repeat if there is more work to be done: | ||
765 | */ | ||
766 | status = intel_pmu_get_status(); | ||
767 | if (status) | ||
768 | goto again; | ||
769 | |||
770 | if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS) | ||
771 | perf_enable(); | ||
772 | |||
773 | return 1; | ||
774 | } | ||
775 | |||
776 | static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) | ||
777 | { | ||
778 | int cpu, idx, throttle = 0, handled = 0; | ||
779 | struct cpu_hw_counters *cpuc; | ||
780 | struct perf_counter *counter; | ||
781 | struct hw_perf_counter *hwc; | ||
782 | u64 val; | ||
783 | |||
784 | cpu = smp_processor_id(); | ||
785 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
786 | |||
787 | if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) { | ||
788 | throttle = 1; | ||
789 | __perf_disable(); | ||
790 | cpuc->enabled = 0; | ||
791 | barrier(); | ||
792 | } | ||
793 | |||
794 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
795 | int disable = 0; | ||
796 | |||
797 | if (!test_bit(idx, cpuc->active_mask)) | ||
798 | continue; | ||
799 | |||
800 | counter = cpuc->counters[idx]; | ||
801 | hwc = &counter->hw; | ||
802 | |||
803 | if (counter->hw_event.nmi != nmi) | ||
804 | goto next; | ||
805 | |||
806 | val = x86_perf_counter_update(counter, hwc, idx); | ||
807 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | ||
808 | goto next; | ||
809 | |||
810 | /* counter overflow */ | ||
811 | x86_perf_counter_set_period(counter, hwc, idx); | ||
812 | handled = 1; | ||
813 | inc_irq_stat(apic_perf_irqs); | ||
814 | disable = perf_counter_overflow(counter, nmi, regs, 0); | ||
815 | |||
816 | next: | ||
817 | if (disable || throttle) | ||
818 | amd_pmu_disable_counter(hwc, idx); | ||
819 | } | ||
820 | |||
821 | return handled; | ||
822 | } | ||
823 | |||
824 | void perf_counter_unthrottle(void) | ||
825 | { | ||
826 | struct cpu_hw_counters *cpuc; | ||
827 | |||
828 | if (!x86_pmu_initialized()) | ||
829 | return; | ||
830 | |||
831 | cpuc = &__get_cpu_var(cpu_hw_counters); | ||
832 | if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { | ||
833 | /* | ||
834 | * Clear them before re-enabling irqs/NMIs again: | ||
835 | */ | ||
836 | cpuc->interrupts = 0; | ||
837 | perf_enable(); | ||
838 | } else { | ||
839 | cpuc->interrupts = 0; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | void smp_perf_counter_interrupt(struct pt_regs *regs) | ||
844 | { | ||
845 | irq_enter(); | ||
846 | apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); | ||
847 | ack_APIC_irq(); | ||
848 | x86_pmu.handle_irq(regs, 0); | ||
849 | irq_exit(); | ||
850 | } | ||
851 | |||
852 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
853 | { | ||
854 | irq_enter(); | ||
855 | ack_APIC_irq(); | ||
856 | inc_irq_stat(apic_pending_irqs); | ||
857 | perf_counter_do_pending(); | ||
858 | irq_exit(); | ||
859 | } | ||
860 | |||
861 | void set_perf_counter_pending(void) | ||
862 | { | ||
863 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
864 | } | ||
865 | |||
866 | void perf_counters_lapic_init(int nmi) | ||
867 | { | ||
868 | u32 apic_val; | ||
869 | |||
870 | if (!x86_pmu_initialized()) | ||
871 | return; | ||
872 | |||
873 | /* | ||
874 | * Enable the performance counter vector in the APIC LVT: | ||
875 | */ | ||
876 | apic_val = apic_read(APIC_LVTERR); | ||
877 | |||
878 | apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED); | ||
879 | if (nmi) | ||
880 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
881 | else | ||
882 | apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); | ||
883 | apic_write(APIC_LVTERR, apic_val); | ||
884 | } | ||
885 | |||
886 | static int __kprobes | ||
887 | perf_counter_nmi_handler(struct notifier_block *self, | ||
888 | unsigned long cmd, void *__args) | ||
889 | { | ||
890 | struct die_args *args = __args; | ||
891 | struct pt_regs *regs; | ||
892 | |||
893 | if (!atomic_read(&active_counters)) | ||
894 | return NOTIFY_DONE; | ||
895 | |||
896 | switch (cmd) { | ||
897 | case DIE_NMI: | ||
898 | case DIE_NMI_IPI: | ||
899 | break; | ||
900 | |||
901 | default: | ||
902 | return NOTIFY_DONE; | ||
903 | } | ||
904 | |||
905 | regs = args->regs; | ||
906 | |||
907 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
908 | /* | ||
909 | * Can't rely on the handled return value to say it was our NMI, two | ||
910 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | ||
911 | * | ||
912 | * If the first NMI handles both, the latter will be empty and daze | ||
913 | * the CPU. | ||
914 | */ | ||
915 | x86_pmu.handle_irq(regs, 1); | ||
916 | |||
917 | return NOTIFY_STOP; | ||
918 | } | ||
919 | |||
920 | static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | ||
921 | .notifier_call = perf_counter_nmi_handler, | ||
922 | .next = NULL, | ||
923 | .priority = 1 | ||
924 | }; | ||
925 | |||
926 | static struct x86_pmu intel_pmu = { | ||
927 | .name = "Intel", | ||
928 | .handle_irq = intel_pmu_handle_irq, | ||
929 | .disable_all = intel_pmu_disable_all, | ||
930 | .enable_all = intel_pmu_enable_all, | ||
931 | .enable = intel_pmu_enable_counter, | ||
932 | .disable = intel_pmu_disable_counter, | ||
933 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
934 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
935 | .event_map = intel_pmu_event_map, | ||
936 | .raw_event = intel_pmu_raw_event, | ||
937 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
938 | /* | ||
939 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
940 | * so we install an artificial 1<<31 period regardless of | ||
941 | * the generic counter period: | ||
942 | */ | ||
943 | .max_period = (1ULL << 31) - 1, | ||
944 | }; | ||
945 | |||
946 | static struct x86_pmu amd_pmu = { | ||
947 | .name = "AMD", | ||
948 | .handle_irq = amd_pmu_handle_irq, | ||
949 | .disable_all = amd_pmu_disable_all, | ||
950 | .enable_all = amd_pmu_enable_all, | ||
951 | .enable = amd_pmu_enable_counter, | ||
952 | .disable = amd_pmu_disable_counter, | ||
953 | .eventsel = MSR_K7_EVNTSEL0, | ||
954 | .perfctr = MSR_K7_PERFCTR0, | ||
955 | .event_map = amd_pmu_event_map, | ||
956 | .raw_event = amd_pmu_raw_event, | ||
957 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
958 | .num_counters = 4, | ||
959 | .counter_bits = 48, | ||
960 | .counter_mask = (1ULL << 48) - 1, | ||
961 | /* use highest bit to detect overflow */ | ||
962 | .max_period = (1ULL << 47) - 1, | ||
963 | }; | ||
964 | |||
965 | static int intel_pmu_init(void) | ||
966 | { | ||
967 | union cpuid10_edx edx; | ||
968 | union cpuid10_eax eax; | ||
969 | unsigned int unused; | ||
970 | unsigned int ebx; | ||
971 | int version; | ||
972 | |||
973 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
974 | return -ENODEV; | ||
975 | |||
976 | /* | ||
977 | * Check whether the Architectural PerfMon supports | ||
978 | * Branch Misses Retired Event or not. | ||
979 | */ | ||
980 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
981 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
982 | return -ENODEV; | ||
983 | |||
984 | version = eax.split.version_id; | ||
985 | if (version < 2) | ||
986 | return -ENODEV; | ||
987 | |||
988 | x86_pmu = intel_pmu; | ||
989 | x86_pmu.version = version; | ||
990 | x86_pmu.num_counters = eax.split.num_counters; | ||
991 | |||
992 | /* | ||
993 | * Quirk: v2 perfmon does not report fixed-purpose counters, so | ||
994 | * assume at least 3 counters: | ||
995 | */ | ||
996 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | ||
997 | |||
998 | x86_pmu.counter_bits = eax.split.bit_width; | ||
999 | x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; | ||
1000 | |||
1001 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1002 | |||
1003 | return 0; | ||
1004 | } | ||
1005 | |||
1006 | static int amd_pmu_init(void) | ||
1007 | { | ||
1008 | x86_pmu = amd_pmu; | ||
1009 | return 0; | ||
1010 | } | ||
1011 | |||
1012 | void __init init_hw_perf_counters(void) | ||
1013 | { | ||
1014 | int err; | ||
1015 | |||
1016 | switch (boot_cpu_data.x86_vendor) { | ||
1017 | case X86_VENDOR_INTEL: | ||
1018 | err = intel_pmu_init(); | ||
1019 | break; | ||
1020 | case X86_VENDOR_AMD: | ||
1021 | err = amd_pmu_init(); | ||
1022 | break; | ||
1023 | default: | ||
1024 | return; | ||
1025 | } | ||
1026 | if (err != 0) | ||
1027 | return; | ||
1028 | |||
1029 | pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name); | ||
1030 | pr_info("... version: %d\n", x86_pmu.version); | ||
1031 | pr_info("... bit width: %d\n", x86_pmu.counter_bits); | ||
1032 | |||
1033 | pr_info("... num counters: %d\n", x86_pmu.num_counters); | ||
1034 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1035 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1036 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | ||
1037 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | ||
1038 | } | ||
1039 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | ||
1040 | perf_max_counters = x86_pmu.num_counters; | ||
1041 | |||
1042 | pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); | ||
1043 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | ||
1044 | |||
1045 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | ||
1046 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1047 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | ||
1048 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | ||
1049 | } | ||
1050 | pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed); | ||
1051 | |||
1052 | perf_counter_mask |= | ||
1053 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | ||
1054 | |||
1055 | pr_info("... counter mask: %016Lx\n", perf_counter_mask); | ||
1056 | |||
1057 | perf_counters_lapic_init(0); | ||
1058 | register_die_notifier(&perf_counter_nmi_notifier); | ||
1059 | } | ||
1060 | |||
1061 | static inline void x86_pmu_read(struct perf_counter *counter) | ||
1062 | { | ||
1063 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); | ||
1064 | } | ||
1065 | |||
1066 | static const struct pmu pmu = { | ||
1067 | .enable = x86_pmu_enable, | ||
1068 | .disable = x86_pmu_disable, | ||
1069 | .read = x86_pmu_read, | ||
1070 | }; | ||
1071 | |||
1072 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
1073 | { | ||
1074 | int err; | ||
1075 | |||
1076 | err = __hw_perf_counter_init(counter); | ||
1077 | if (err) | ||
1078 | return ERR_PTR(err); | ||
1079 | |||
1080 | return &pmu; | ||
1081 | } | ||
1082 | |||
1083 | /* | ||
1084 | * callchain support | ||
1085 | */ | ||
1086 | |||
1087 | static inline | ||
1088 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | ||
1089 | { | ||
1090 | if (entry->nr < MAX_STACK_DEPTH) | ||
1091 | entry->ip[entry->nr++] = ip; | ||
1092 | } | ||
1093 | |||
1094 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | ||
1095 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | ||
1096 | |||
1097 | |||
1098 | static void | ||
1099 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
1100 | { | ||
1101 | /* Ignore warnings */ | ||
1102 | } | ||
1103 | |||
1104 | static void backtrace_warning(void *data, char *msg) | ||
1105 | { | ||
1106 | /* Ignore warnings */ | ||
1107 | } | ||
1108 | |||
1109 | static int backtrace_stack(void *data, char *name) | ||
1110 | { | ||
1111 | /* Don't bother with IRQ stacks for now */ | ||
1112 | return -1; | ||
1113 | } | ||
1114 | |||
1115 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
1116 | { | ||
1117 | struct perf_callchain_entry *entry = data; | ||
1118 | |||
1119 | if (reliable) | ||
1120 | callchain_store(entry, addr); | ||
1121 | } | ||
1122 | |||
1123 | static const struct stacktrace_ops backtrace_ops = { | ||
1124 | .warning = backtrace_warning, | ||
1125 | .warning_symbol = backtrace_warning_symbol, | ||
1126 | .stack = backtrace_stack, | ||
1127 | .address = backtrace_address, | ||
1128 | }; | ||
1129 | |||
1130 | static void | ||
1131 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1132 | { | ||
1133 | unsigned long bp; | ||
1134 | char *stack; | ||
1135 | int nr = entry->nr; | ||
1136 | |||
1137 | callchain_store(entry, instruction_pointer(regs)); | ||
1138 | |||
1139 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
1140 | #ifdef CONFIG_FRAME_POINTER | ||
1141 | bp = frame_pointer(regs); | ||
1142 | #else | ||
1143 | bp = 0; | ||
1144 | #endif | ||
1145 | |||
1146 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | ||
1147 | |||
1148 | entry->kernel = entry->nr - nr; | ||
1149 | } | ||
1150 | |||
1151 | |||
1152 | struct stack_frame { | ||
1153 | const void __user *next_fp; | ||
1154 | unsigned long return_address; | ||
1155 | }; | ||
1156 | |||
1157 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
1158 | { | ||
1159 | int ret; | ||
1160 | |||
1161 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
1162 | return 0; | ||
1163 | |||
1164 | ret = 1; | ||
1165 | pagefault_disable(); | ||
1166 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1167 | ret = 0; | ||
1168 | pagefault_enable(); | ||
1169 | |||
1170 | return ret; | ||
1171 | } | ||
1172 | |||
1173 | static void | ||
1174 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1175 | { | ||
1176 | struct stack_frame frame; | ||
1177 | const void __user *fp; | ||
1178 | int nr = entry->nr; | ||
1179 | |||
1180 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
1181 | fp = (void __user *)regs->bp; | ||
1182 | |||
1183 | callchain_store(entry, regs->ip); | ||
1184 | |||
1185 | while (entry->nr < MAX_STACK_DEPTH) { | ||
1186 | frame.next_fp = NULL; | ||
1187 | frame.return_address = 0; | ||
1188 | |||
1189 | if (!copy_stack_frame(fp, &frame)) | ||
1190 | break; | ||
1191 | |||
1192 | if ((unsigned long)fp < user_stack_pointer(regs)) | ||
1193 | break; | ||
1194 | |||
1195 | callchain_store(entry, frame.return_address); | ||
1196 | fp = frame.next_fp; | ||
1197 | } | ||
1198 | |||
1199 | entry->user = entry->nr - nr; | ||
1200 | } | ||
1201 | |||
1202 | static void | ||
1203 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1204 | { | ||
1205 | int is_user; | ||
1206 | |||
1207 | if (!regs) | ||
1208 | return; | ||
1209 | |||
1210 | is_user = user_mode(regs); | ||
1211 | |||
1212 | if (!current || current->pid == 0) | ||
1213 | return; | ||
1214 | |||
1215 | if (is_user && current->state != TASK_RUNNING) | ||
1216 | return; | ||
1217 | |||
1218 | if (!is_user) | ||
1219 | perf_callchain_kernel(regs, entry); | ||
1220 | |||
1221 | if (current->mm) | ||
1222 | perf_callchain_user(regs, entry); | ||
1223 | } | ||
1224 | |||
1225 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1226 | { | ||
1227 | struct perf_callchain_entry *entry; | ||
1228 | |||
1229 | if (in_nmi()) | ||
1230 | entry = &__get_cpu_var(nmi_entry); | ||
1231 | else | ||
1232 | entry = &__get_cpu_var(irq_entry); | ||
1233 | |||
1234 | entry->nr = 0; | ||
1235 | entry->hv = 0; | ||
1236 | entry->kernel = 0; | ||
1237 | entry->user = 0; | ||
1238 | |||
1239 | perf_do_callchain(regs, entry); | ||
1240 | |||
1241 | return entry; | ||
1242 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f6c70a164e32..d6f5b9fbde32 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -19,8 +19,8 @@ | |||
19 | #include <linux/nmi.h> | 19 | #include <linux/nmi.h> |
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/genapic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/intel_arch_perfmon.h> | 23 | #include <asm/perf_counter.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | 25 | struct nmi_watchdog_ctlblk { |
26 | unsigned int cccr_msr; | 26 | unsigned int cccr_msr; |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38946c6e8433..891004619142 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1025,6 +1025,13 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
1025 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1025 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1026 | spurious_interrupt smp_spurious_interrupt | 1026 | spurious_interrupt smp_spurious_interrupt |
1027 | 1027 | ||
1028 | #ifdef CONFIG_PERF_COUNTERS | ||
1029 | apicinterrupt LOCAL_PERF_VECTOR \ | ||
1030 | perf_counter_interrupt smp_perf_counter_interrupt | ||
1031 | apicinterrupt LOCAL_PENDING_VECTOR \ | ||
1032 | perf_pending_interrupt smp_perf_pending_interrupt | ||
1033 | #endif | ||
1034 | |||
1028 | /* | 1035 | /* |
1029 | * Exception entry points. | 1036 | * Exception entry points. |
1030 | */ | 1037 | */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c3fe010d74c8..8279fb8df17f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -63,6 +63,14 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
63 | for_each_online_cpu(j) | 63 | for_each_online_cpu(j) |
64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | 64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); |
65 | seq_printf(p, " Spurious interrupts\n"); | 65 | seq_printf(p, " Spurious interrupts\n"); |
66 | seq_printf(p, "%*s: ", prec, "CNT"); | ||
67 | for_each_online_cpu(j) | ||
68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | ||
69 | seq_printf(p, " Performance counter interrupts\n"); | ||
70 | seq_printf(p, "%*s: ", prec, "PND"); | ||
71 | for_each_online_cpu(j) | ||
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | ||
73 | seq_printf(p, " Performance pending work\n"); | ||
66 | #endif | 74 | #endif |
67 | if (generic_interrupt_extension) { | 75 | if (generic_interrupt_extension) { |
68 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -166,6 +174,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
166 | #ifdef CONFIG_X86_LOCAL_APIC | 174 | #ifdef CONFIG_X86_LOCAL_APIC |
167 | sum += irq_stats(cpu)->apic_timer_irqs; | 175 | sum += irq_stats(cpu)->apic_timer_irqs; |
168 | sum += irq_stats(cpu)->irq_spurious_count; | 176 | sum += irq_stats(cpu)->irq_spurious_count; |
177 | sum += irq_stats(cpu)->apic_perf_irqs; | ||
178 | sum += irq_stats(cpu)->apic_pending_irqs; | ||
169 | #endif | 179 | #endif |
170 | if (generic_interrupt_extension) | 180 | if (generic_interrupt_extension) |
171 | sum += irq_stats(cpu)->generic_irqs; | 181 | sum += irq_stats(cpu)->generic_irqs; |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 368b0a8836f9..3190a6b961e6 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -118,28 +118,8 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
118 | return 0; | 118 | return 0; |
119 | } | 119 | } |
120 | 120 | ||
121 | /* Overridden in paravirt.c */ | 121 | static void __init smp_intr_init(void) |
122 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
123 | |||
124 | void __init native_init_IRQ(void) | ||
125 | { | 122 | { |
126 | int i; | ||
127 | |||
128 | /* Execute any quirks before the call gates are initialised: */ | ||
129 | x86_quirk_pre_intr_init(); | ||
130 | |||
131 | /* | ||
132 | * Cover the whole vector space, no vector can escape | ||
133 | * us. (some of these will be overridden and become | ||
134 | * 'special' SMP interrupts) | ||
135 | */ | ||
136 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | ||
137 | /* SYSCALL_VECTOR was reserved in trap_init. */ | ||
138 | if (i != SYSCALL_VECTOR) | ||
139 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); | ||
140 | } | ||
141 | |||
142 | |||
143 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) | 123 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) |
144 | /* | 124 | /* |
145 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | 125 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper |
@@ -168,6 +148,11 @@ void __init native_init_IRQ(void) | |||
168 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 148 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
169 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 149 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
170 | #endif | 150 | #endif |
151 | } | ||
152 | |||
153 | static void __init apic_intr_init(void) | ||
154 | { | ||
155 | smp_intr_init(); | ||
171 | 156 | ||
172 | #ifdef CONFIG_X86_LOCAL_APIC | 157 | #ifdef CONFIG_X86_LOCAL_APIC |
173 | /* self generated IPI for local APIC timer */ | 158 | /* self generated IPI for local APIC timer */ |
@@ -179,12 +164,41 @@ void __init native_init_IRQ(void) | |||
179 | /* IPI vectors for APIC spurious and error interrupts */ | 164 | /* IPI vectors for APIC spurious and error interrupts */ |
180 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 165 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
181 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 166 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
182 | #endif | 167 | # ifdef CONFIG_PERF_COUNTERS |
168 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
169 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | ||
170 | # endif | ||
183 | 171 | ||
184 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) | 172 | # ifdef CONFIG_X86_MCE_P4THERMAL |
185 | /* thermal monitor LVT interrupt */ | 173 | /* thermal monitor LVT interrupt */ |
186 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 174 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
175 | # endif | ||
187 | #endif | 176 | #endif |
177 | } | ||
178 | |||
179 | /* Overridden in paravirt.c */ | ||
180 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
181 | |||
182 | void __init native_init_IRQ(void) | ||
183 | { | ||
184 | int i; | ||
185 | |||
186 | /* Execute any quirks before the call gates are initialised: */ | ||
187 | x86_quirk_pre_intr_init(); | ||
188 | |||
189 | apic_intr_init(); | ||
190 | |||
191 | /* | ||
192 | * Cover the whole vector space, no vector can escape | ||
193 | * us. (some of these will be overridden and become | ||
194 | * 'special' SMP interrupts) | ||
195 | */ | ||
196 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
197 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
198 | /* SYSCALL_VECTOR was reserved in trap_init. */ | ||
199 | if (!test_bit(vector, used_vectors)) | ||
200 | set_intr_gate(vector, interrupt[i]); | ||
201 | } | ||
188 | 202 | ||
189 | if (!acpi_ioapic) | 203 | if (!acpi_ioapic) |
190 | setup_irq(2, &irq2); | 204 | setup_irq(2, &irq2); |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 8cd10537fd46..53ceb26f80ff 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -152,6 +152,12 @@ static void __init apic_intr_init(void) | |||
152 | /* IPI vectors for APIC spurious and error interrupts */ | 152 | /* IPI vectors for APIC spurious and error interrupts */ |
153 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 153 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
154 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 154 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
155 | |||
156 | /* Performance monitoring interrupt: */ | ||
157 | #ifdef CONFIG_PERF_COUNTERS | ||
158 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
159 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | ||
160 | #endif | ||
155 | } | 161 | } |
156 | 162 | ||
157 | void __init native_init_IRQ(void) | 163 | void __init native_init_IRQ(void) |
@@ -159,6 +165,9 @@ void __init native_init_IRQ(void) | |||
159 | int i; | 165 | int i; |
160 | 166 | ||
161 | init_ISA_irqs(); | 167 | init_ISA_irqs(); |
168 | |||
169 | apic_intr_init(); | ||
170 | |||
162 | /* | 171 | /* |
163 | * Cover the whole vector space, no vector can escape | 172 | * Cover the whole vector space, no vector can escape |
164 | * us. (some of these will be overridden and become | 173 | * us. (some of these will be overridden and become |
@@ -166,12 +175,10 @@ void __init native_init_IRQ(void) | |||
166 | */ | 175 | */ |
167 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | 176 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { |
168 | int vector = FIRST_EXTERNAL_VECTOR + i; | 177 | int vector = FIRST_EXTERNAL_VECTOR + i; |
169 | if (vector != IA32_SYSCALL_VECTOR) | 178 | if (!test_bit(vector, used_vectors)) |
170 | set_intr_gate(vector, interrupt[i]); | 179 | set_intr_gate(vector, interrupt[i]); |
171 | } | 180 | } |
172 | 181 | ||
173 | apic_intr_init(); | ||
174 | |||
175 | if (!acpi_ioapic) | 182 | if (!acpi_ioapic) |
176 | setup_irq(2, &irq2); | 183 | setup_irq(2, &irq2); |
177 | } | 184 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 14425166b8e3..0a813b17b172 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | |||
10 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c8736b491..d51321ddafda 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -334,3 +334,5 @@ ENTRY(sys_call_table) | |||
334 | .long sys_inotify_init1 | 334 | .long sys_inotify_init1 |
335 | .long sys_preadv | 335 | .long sys_preadv |
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | ||
338 | .long sys_perf_counter_open | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d288327ff0..2cc162e09c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -945,8 +945,13 @@ void __init trap_init(void) | |||
945 | #endif | 945 | #endif |
946 | set_intr_gate(19, &simd_coprocessor_error); | 946 | set_intr_gate(19, &simd_coprocessor_error); |
947 | 947 | ||
948 | /* Reserve all the builtin and the syscall vector: */ | ||
949 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
950 | set_bit(i, used_vectors); | ||
951 | |||
948 | #ifdef CONFIG_IA32_EMULATION | 952 | #ifdef CONFIG_IA32_EMULATION |
949 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 953 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
954 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
950 | #endif | 955 | #endif |
951 | 956 | ||
952 | #ifdef CONFIG_X86_32 | 957 | #ifdef CONFIG_X86_32 |
@@ -963,17 +968,9 @@ void __init trap_init(void) | |||
963 | } | 968 | } |
964 | 969 | ||
965 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 970 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
966 | #endif | ||
967 | |||
968 | /* Reserve all the builtin and the syscall vector: */ | ||
969 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
970 | set_bit(i, used_vectors); | ||
971 | |||
972 | #ifdef CONFIG_X86_64 | ||
973 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
974 | #else | ||
975 | set_bit(SYSCALL_VECTOR, used_vectors); | 971 | set_bit(SYSCALL_VECTOR, used_vectors); |
976 | #endif | 972 | #endif |
973 | |||
977 | /* | 974 | /* |
978 | * Should be a barrier for any external CPU state: | 975 | * Should be a barrier for any external CPU state: |
979 | */ | 976 | */ |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..6f9df2babe48 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/tty.h> | 27 | #include <linux/tty.h> |
28 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/perf_counter.h> | ||
30 | 31 | ||
31 | #include <asm-generic/sections.h> | 32 | #include <asm-generic/sections.h> |
32 | 33 | ||
@@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1044 | if (unlikely(error_code & PF_RSVD)) | 1045 | if (unlikely(error_code & PF_RSVD)) |
1045 | pgtable_bad(regs, error_code, address); | 1046 | pgtable_bad(regs, error_code, address); |
1046 | 1047 | ||
1048 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); | ||
1049 | |||
1047 | /* | 1050 | /* |
1048 | * If we're in an interrupt, have no user context or are running | 1051 | * If we're in an interrupt, have no user context or are running |
1049 | * in an atomic region then we must not take the fault: | 1052 | * in an atomic region then we must not take the fault: |
@@ -1137,10 +1140,15 @@ good_area: | |||
1137 | return; | 1140 | return; |
1138 | } | 1141 | } |
1139 | 1142 | ||
1140 | if (fault & VM_FAULT_MAJOR) | 1143 | if (fault & VM_FAULT_MAJOR) { |
1141 | tsk->maj_flt++; | 1144 | tsk->maj_flt++; |
1142 | else | 1145 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, |
1146 | regs, address); | ||
1147 | } else { | ||
1143 | tsk->min_flt++; | 1148 | tsk->min_flt++; |
1149 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, | ||
1150 | regs, address); | ||
1151 | } | ||
1144 | 1152 | ||
1145 | check_v8086_mode(regs, address, tsk); | 1153 | check_v8086_mode(regs, address, tsk); |
1146 | 1154 | ||
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a7..c638685136e1 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
40 | 40 | ||
41 | switch (val) { | 41 | switch (val) { |
42 | case DIE_NMI: | 42 | case DIE_NMI: |
43 | if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) | 43 | case DIE_NMI_IPI: |
44 | ret = NOTIFY_STOP; | 44 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); |
45 | ret = NOTIFY_STOP; | ||
45 | break; | 46 | break; |
46 | default: | 47 | default: |
47 | break; | 48 | break; |
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy) | |||
134 | static struct notifier_block profile_exceptions_nb = { | 135 | static struct notifier_block profile_exceptions_nb = { |
135 | .notifier_call = profile_exceptions_notify, | 136 | .notifier_call = profile_exceptions_notify, |
136 | .next = NULL, | 137 | .next = NULL, |
137 | .priority = 0 | 138 | .priority = 2 |
138 | }; | 139 | }; |
139 | 140 | ||
140 | static int nmi_setup(void) | 141 | static int nmi_setup(void) |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 10131fbdaada..4da7230b3d17 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
19 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
20 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | 21 | #include <asm/perf_counter.h> |
22 | 22 | ||
23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
136 | u64 val; | 136 | u64 val; |
137 | int i; | 137 | int i; |
138 | 138 | ||
139 | /* | ||
140 | * This can happen if perf counters are in use when | ||
141 | * we steal the die notifier NMI. | ||
142 | */ | ||
143 | if (unlikely(!reset_value)) | ||
144 | goto out; | ||
145 | |||
139 | for (i = 0 ; i < num_counters; ++i) { | 146 | for (i = 0 ; i < num_counters; ++i) { |
140 | if (!reset_value[i]) | 147 | if (!reset_value[i]) |
141 | continue; | 148 | continue; |
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
146 | } | 153 | } |
147 | } | 154 | } |
148 | 155 | ||
156 | out: | ||
149 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | 157 | /* Only P6 based Pentium M need to re-unmask the apic vector but it |
150 | * doesn't hurt other P6 variant */ | 158 | * doesn't hurt other P6 variant */ |
151 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | 159 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); |
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index d6a807f4077d..39a05b5fa9cb 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kbd_kern.h> | 25 | #include <linux/kbd_kern.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/quotaops.h> | 27 | #include <linux/quotaops.h> |
28 | #include <linux/perf_counter.h> | ||
28 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
30 | #include <linux/suspend.h> | 31 | #include <linux/suspend.h> |
@@ -243,6 +244,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) | |||
243 | struct pt_regs *regs = get_irq_regs(); | 244 | struct pt_regs *regs = get_irq_regs(); |
244 | if (regs) | 245 | if (regs) |
245 | show_regs(regs); | 246 | show_regs(regs); |
247 | perf_counter_print_debug(); | ||
246 | } | 248 | } |
247 | static struct sysrq_key_op sysrq_showregs_op = { | 249 | static struct sysrq_key_op sysrq_showregs_op = { |
248 | .handler = sysrq_handle_showregs, | 250 | .handler = sysrq_handle_showregs, |
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
36 | #include <linux/perf_counter.h> | ||
36 | #include <linux/highmem.h> | 37 | #include <linux/highmem.h> |
37 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
38 | #include <linux/key.h> | 39 | #include <linux/key.h> |
@@ -922,6 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
922 | task_lock(tsk); | 923 | task_lock(tsk); |
923 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); | 924 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); |
924 | task_unlock(tsk); | 925 | task_unlock(tsk); |
926 | perf_counter_comm(tsk); | ||
925 | } | 927 | } |
926 | 928 | ||
927 | int flush_old_exec(struct linux_binprm * bprm) | 929 | int flush_old_exec(struct linux_binprm * bprm) |
@@ -990,6 +992,13 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
990 | 992 | ||
991 | current->personality &= ~bprm->per_clear; | 993 | current->personality &= ~bprm->per_clear; |
992 | 994 | ||
995 | /* | ||
996 | * Flush performance counters when crossing a | ||
997 | * security domain: | ||
998 | */ | ||
999 | if (!get_dumpable(current->mm)) | ||
1000 | perf_counter_exit_task(current); | ||
1001 | |||
993 | /* An exec changes our domain. We are no longer part of the thread | 1002 | /* An exec changes our domain. We are no longer part of the thread |
994 | group */ | 1003 | group */ |
995 | 1004 | ||
diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3c..af931ee43dd8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h | |||
@@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); | |||
222 | int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); | 222 | int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); |
223 | int get_compat_sigevent(struct sigevent *event, | 223 | int get_compat_sigevent(struct sigevent *event, |
224 | const struct compat_sigevent __user *u_event); | 224 | const struct compat_sigevent __user *u_event); |
225 | long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, | ||
226 | struct compat_siginfo __user *uinfo); | ||
225 | 227 | ||
226 | static inline int compat_timeval_compare(struct compat_timeval *lhs, | 228 | static inline int compat_timeval_compare(struct compat_timeval *lhs, |
227 | struct compat_timeval *rhs) | 229 | struct compat_timeval *rhs) |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641f..503afaa0afa7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -108,6 +108,18 @@ extern struct group_info init_groups; | |||
108 | 108 | ||
109 | extern struct cred init_cred; | 109 | extern struct cred init_cred; |
110 | 110 | ||
111 | #ifdef CONFIG_PERF_COUNTERS | ||
112 | # define INIT_PERF_COUNTERS(tsk) \ | ||
113 | .perf_counter_ctx.counter_list = \ | ||
114 | LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ | ||
115 | .perf_counter_ctx.event_list = \ | ||
116 | LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ | ||
117 | .perf_counter_ctx.lock = \ | ||
118 | __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), | ||
119 | #else | ||
120 | # define INIT_PERF_COUNTERS(tsk) | ||
121 | #endif | ||
122 | |||
111 | /* | 123 | /* |
112 | * INIT_TASK is used to set up the first task table, touch at | 124 | * INIT_TASK is used to set up the first task table, touch at |
113 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 125 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
@@ -171,6 +183,7 @@ extern struct cred init_cred; | |||
171 | }, \ | 183 | }, \ |
172 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ | 184 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ |
173 | INIT_IDS \ | 185 | INIT_IDS \ |
186 | INIT_PERF_COUNTERS(tsk) \ | ||
174 | INIT_TRACE_IRQFLAGS \ | 187 | INIT_TRACE_IRQFLAGS \ |
175 | INIT_LOCKDEP \ | 188 | INIT_LOCKDEP \ |
176 | INIT_FTRACE_GRAPH \ | 189 | INIT_FTRACE_GRAPH \ |
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0c8b89f28a95..a77c6007dc99 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -81,7 +81,12 @@ static inline unsigned int kstat_irqs(unsigned int irq) | |||
81 | return sum; | 81 | return sum; |
82 | } | 82 | } |
83 | 83 | ||
84 | |||
85 | /* | ||
86 | * Lock/unlock the current runqueue - to extract task statistics: | ||
87 | */ | ||
84 | extern unsigned long long task_delta_exec(struct task_struct *); | 88 | extern unsigned long long task_delta_exec(struct task_struct *); |
89 | |||
85 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); | 90 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); |
86 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); | 91 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); |
87 | extern void account_steal_time(cputime_t); | 92 | extern void account_steal_time(cputime_t); |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab8..878cab4f5fcc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -150,5 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |||
150 | */ | 150 | */ |
151 | extern int mutex_trylock(struct mutex *lock); | 151 | extern int mutex_trylock(struct mutex *lock); |
152 | extern void mutex_unlock(struct mutex *lock); | 152 | extern void mutex_unlock(struct mutex *lock); |
153 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | ||
153 | 154 | ||
154 | #endif | 155 | #endif |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 000000000000..c8c1dfc22c93 --- /dev/null +++ b/include/linux/perf_counter.h | |||
@@ -0,0 +1,642 @@ | |||
1 | /* | ||
2 | * Performance counters: | ||
3 | * | ||
4 | * Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar | ||
6 | * | ||
7 | * Data type definitions, declarations, prototypes. | ||
8 | * | ||
9 | * Started by: Thomas Gleixner and Ingo Molnar | ||
10 | * | ||
11 | * For licencing details see kernel-base/COPYING | ||
12 | */ | ||
13 | #ifndef _LINUX_PERF_COUNTER_H | ||
14 | #define _LINUX_PERF_COUNTER_H | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | #include <linux/ioctl.h> | ||
18 | #include <asm/byteorder.h> | ||
19 | |||
20 | /* | ||
21 | * User-space ABI bits: | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * hw_event.type | ||
26 | */ | ||
27 | enum perf_event_types { | ||
28 | PERF_TYPE_HARDWARE = 0, | ||
29 | PERF_TYPE_SOFTWARE = 1, | ||
30 | PERF_TYPE_TRACEPOINT = 2, | ||
31 | |||
32 | /* | ||
33 | * available TYPE space, raw is the max value. | ||
34 | */ | ||
35 | |||
36 | PERF_TYPE_RAW = 128, | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Generalized performance counter event types, used by the hw_event.event_id | ||
41 | * parameter of the sys_perf_counter_open() syscall: | ||
42 | */ | ||
43 | enum hw_event_ids { | ||
44 | /* | ||
45 | * Common hardware events, generalized by the kernel: | ||
46 | */ | ||
47 | PERF_COUNT_CPU_CYCLES = 0, | ||
48 | PERF_COUNT_INSTRUCTIONS = 1, | ||
49 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
50 | PERF_COUNT_CACHE_MISSES = 3, | ||
51 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
52 | PERF_COUNT_BRANCH_MISSES = 5, | ||
53 | PERF_COUNT_BUS_CYCLES = 6, | ||
54 | |||
55 | PERF_HW_EVENTS_MAX = 7, | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * Special "software" counters provided by the kernel, even if the hardware | ||
60 | * does not support performance counters. These counters measure various | ||
61 | * physical and sw events of the kernel (and allow the profiling of them as | ||
62 | * well): | ||
63 | */ | ||
64 | enum sw_event_ids { | ||
65 | PERF_COUNT_CPU_CLOCK = 0, | ||
66 | PERF_COUNT_TASK_CLOCK = 1, | ||
67 | PERF_COUNT_PAGE_FAULTS = 2, | ||
68 | PERF_COUNT_CONTEXT_SWITCHES = 3, | ||
69 | PERF_COUNT_CPU_MIGRATIONS = 4, | ||
70 | PERF_COUNT_PAGE_FAULTS_MIN = 5, | ||
71 | PERF_COUNT_PAGE_FAULTS_MAJ = 6, | ||
72 | |||
73 | PERF_SW_EVENTS_MAX = 7, | ||
74 | }; | ||
75 | |||
76 | #define __PERF_COUNTER_MASK(name) \ | ||
77 | (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ | ||
78 | PERF_COUNTER_##name##_SHIFT) | ||
79 | |||
80 | #define PERF_COUNTER_RAW_BITS 1 | ||
81 | #define PERF_COUNTER_RAW_SHIFT 63 | ||
82 | #define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) | ||
83 | |||
84 | #define PERF_COUNTER_CONFIG_BITS 63 | ||
85 | #define PERF_COUNTER_CONFIG_SHIFT 0 | ||
86 | #define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) | ||
87 | |||
88 | #define PERF_COUNTER_TYPE_BITS 7 | ||
89 | #define PERF_COUNTER_TYPE_SHIFT 56 | ||
90 | #define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) | ||
91 | |||
92 | #define PERF_COUNTER_EVENT_BITS 56 | ||
93 | #define PERF_COUNTER_EVENT_SHIFT 0 | ||
94 | #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) | ||
95 | |||
96 | /* | ||
97 | * Bits that can be set in hw_event.record_type to request information | ||
98 | * in the overflow packets. | ||
99 | */ | ||
100 | enum perf_counter_record_format { | ||
101 | PERF_RECORD_IP = 1U << 0, | ||
102 | PERF_RECORD_TID = 1U << 1, | ||
103 | PERF_RECORD_TIME = 1U << 2, | ||
104 | PERF_RECORD_ADDR = 1U << 3, | ||
105 | PERF_RECORD_GROUP = 1U << 4, | ||
106 | PERF_RECORD_CALLCHAIN = 1U << 5, | ||
107 | PERF_RECORD_CONFIG = 1U << 6, | ||
108 | PERF_RECORD_CPU = 1U << 7, | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * Bits that can be set in hw_event.read_format to request that | ||
113 | * reads on the counter should return the indicated quantities, | ||
114 | * in increasing order of bit value, after the counter value. | ||
115 | */ | ||
116 | enum perf_counter_read_format { | ||
117 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1, | ||
118 | PERF_FORMAT_TOTAL_TIME_RUNNING = 2, | ||
119 | }; | ||
120 | |||
121 | /* | ||
122 | * Hardware event to monitor via a performance monitoring counter: | ||
123 | */ | ||
124 | struct perf_counter_hw_event { | ||
125 | /* | ||
126 | * The MSB of the config word signifies if the rest contains cpu | ||
127 | * specific (raw) counter configuration data, if unset, the next | ||
128 | * 7 bits are an event type and the rest of the bits are the event | ||
129 | * identifier. | ||
130 | */ | ||
131 | __u64 config; | ||
132 | |||
133 | union { | ||
134 | __u64 irq_period; | ||
135 | __u64 irq_freq; | ||
136 | }; | ||
137 | |||
138 | __u32 record_type; | ||
139 | __u32 read_format; | ||
140 | |||
141 | __u64 disabled : 1, /* off by default */ | ||
142 | nmi : 1, /* NMI sampling */ | ||
143 | inherit : 1, /* children inherit it */ | ||
144 | pinned : 1, /* must always be on PMU */ | ||
145 | exclusive : 1, /* only group on PMU */ | ||
146 | exclude_user : 1, /* don't count user */ | ||
147 | exclude_kernel : 1, /* ditto kernel */ | ||
148 | exclude_hv : 1, /* ditto hypervisor */ | ||
149 | exclude_idle : 1, /* don't count when idle */ | ||
150 | mmap : 1, /* include mmap data */ | ||
151 | munmap : 1, /* include munmap data */ | ||
152 | comm : 1, /* include comm data */ | ||
153 | freq : 1, /* use freq, not period */ | ||
154 | |||
155 | __reserved_1 : 51; | ||
156 | |||
157 | __u32 extra_config_len; | ||
158 | __u32 wakeup_events; /* wakeup every n events */ | ||
159 | |||
160 | __u64 __reserved_2; | ||
161 | __u64 __reserved_3; | ||
162 | }; | ||
163 | |||
164 | /* | ||
165 | * Ioctls that can be done on a perf counter fd: | ||
166 | */ | ||
167 | #define PERF_COUNTER_IOC_ENABLE _IOW('$', 0, u32) | ||
168 | #define PERF_COUNTER_IOC_DISABLE _IOW('$', 1, u32) | ||
169 | #define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) | ||
170 | #define PERF_COUNTER_IOC_RESET _IOW('$', 3, u32) | ||
171 | |||
172 | enum perf_counter_ioc_flags { | ||
173 | PERF_IOC_FLAG_GROUP = 1U << 0, | ||
174 | }; | ||
175 | |||
176 | /* | ||
177 | * Structure of the page that can be mapped via mmap | ||
178 | */ | ||
179 | struct perf_counter_mmap_page { | ||
180 | __u32 version; /* version number of this structure */ | ||
181 | __u32 compat_version; /* lowest version this is compat with */ | ||
182 | |||
183 | /* | ||
184 | * Bits needed to read the hw counters in user-space. | ||
185 | * | ||
186 | * u32 seq; | ||
187 | * s64 count; | ||
188 | * | ||
189 | * do { | ||
190 | * seq = pc->lock; | ||
191 | * | ||
192 | * barrier() | ||
193 | * if (pc->index) { | ||
194 | * count = pmc_read(pc->index - 1); | ||
195 | * count += pc->offset; | ||
196 | * } else | ||
197 | * goto regular_read; | ||
198 | * | ||
199 | * barrier(); | ||
200 | * } while (pc->lock != seq); | ||
201 | * | ||
202 | * NOTE: for obvious reason this only works on self-monitoring | ||
203 | * processes. | ||
204 | */ | ||
205 | __u32 lock; /* seqlock for synchronization */ | ||
206 | __u32 index; /* hardware counter identifier */ | ||
207 | __s64 offset; /* add to hardware counter value */ | ||
208 | |||
209 | /* | ||
210 | * Control data for the mmap() data buffer. | ||
211 | * | ||
212 | * User-space reading this value should issue an rmb(), on SMP capable | ||
213 | * platforms, after reading this value -- see perf_counter_wakeup(). | ||
214 | */ | ||
215 | __u32 data_head; /* head in the data section */ | ||
216 | }; | ||
217 | |||
218 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) | ||
219 | #define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) | ||
220 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
221 | #define PERF_EVENT_MISC_USER (2 << 0) | ||
222 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) | ||
223 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
224 | |||
225 | struct perf_event_header { | ||
226 | __u32 type; | ||
227 | __u16 misc; | ||
228 | __u16 size; | ||
229 | }; | ||
230 | |||
231 | enum perf_event_type { | ||
232 | |||
233 | /* | ||
234 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
235 | * correlate userspace IPs to code. They have the following structure: | ||
236 | * | ||
237 | * struct { | ||
238 | * struct perf_event_header header; | ||
239 | * | ||
240 | * u32 pid, tid; | ||
241 | * u64 addr; | ||
242 | * u64 len; | ||
243 | * u64 pgoff; | ||
244 | * char filename[]; | ||
245 | * }; | ||
246 | */ | ||
247 | PERF_EVENT_MMAP = 1, | ||
248 | PERF_EVENT_MUNMAP = 2, | ||
249 | |||
250 | /* | ||
251 | * struct { | ||
252 | * struct perf_event_header header; | ||
253 | * | ||
254 | * u32 pid, tid; | ||
255 | * char comm[]; | ||
256 | * }; | ||
257 | */ | ||
258 | PERF_EVENT_COMM = 3, | ||
259 | |||
260 | /* | ||
261 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | ||
262 | * will be PERF_RECORD_* | ||
263 | * | ||
264 | * struct { | ||
265 | * struct perf_event_header header; | ||
266 | * | ||
267 | * { u64 ip; } && PERF_RECORD_IP | ||
268 | * { u32 pid, tid; } && PERF_RECORD_TID | ||
269 | * { u64 time; } && PERF_RECORD_TIME | ||
270 | * { u64 addr; } && PERF_RECORD_ADDR | ||
271 | * { u64 config; } && PERF_RECORD_CONFIG | ||
272 | * { u32 cpu, res; } && PERF_RECORD_CPU | ||
273 | * | ||
274 | * { u64 nr; | ||
275 | * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP | ||
276 | * | ||
277 | * { u16 nr, | ||
278 | * hv, | ||
279 | * kernel, | ||
280 | * user; | ||
281 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
282 | * }; | ||
283 | */ | ||
284 | }; | ||
285 | |||
286 | #ifdef __KERNEL__ | ||
287 | /* | ||
288 | * Kernel-internal data types and definitions: | ||
289 | */ | ||
290 | |||
291 | #ifdef CONFIG_PERF_COUNTERS | ||
292 | # include <asm/perf_counter.h> | ||
293 | #endif | ||
294 | |||
295 | #include <linux/list.h> | ||
296 | #include <linux/mutex.h> | ||
297 | #include <linux/rculist.h> | ||
298 | #include <linux/rcupdate.h> | ||
299 | #include <linux/spinlock.h> | ||
300 | #include <linux/hrtimer.h> | ||
301 | #include <linux/fs.h> | ||
302 | #include <asm/atomic.h> | ||
303 | |||
304 | struct task_struct; | ||
305 | |||
306 | static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) | ||
307 | { | ||
308 | return hw_event->config & PERF_COUNTER_RAW_MASK; | ||
309 | } | ||
310 | |||
311 | static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) | ||
312 | { | ||
313 | return hw_event->config & PERF_COUNTER_CONFIG_MASK; | ||
314 | } | ||
315 | |||
316 | static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) | ||
317 | { | ||
318 | return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> | ||
319 | PERF_COUNTER_TYPE_SHIFT; | ||
320 | } | ||
321 | |||
322 | static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) | ||
323 | { | ||
324 | return hw_event->config & PERF_COUNTER_EVENT_MASK; | ||
325 | } | ||
326 | |||
327 | /** | ||
328 | * struct hw_perf_counter - performance counter hardware details: | ||
329 | */ | ||
330 | struct hw_perf_counter { | ||
331 | #ifdef CONFIG_PERF_COUNTERS | ||
332 | union { | ||
333 | struct { /* hardware */ | ||
334 | u64 config; | ||
335 | unsigned long config_base; | ||
336 | unsigned long counter_base; | ||
337 | int nmi; | ||
338 | int idx; | ||
339 | }; | ||
340 | union { /* software */ | ||
341 | atomic64_t count; | ||
342 | struct hrtimer hrtimer; | ||
343 | }; | ||
344 | }; | ||
345 | atomic64_t prev_count; | ||
346 | u64 irq_period; | ||
347 | atomic64_t period_left; | ||
348 | u64 interrupts; | ||
349 | #endif | ||
350 | }; | ||
351 | |||
352 | struct perf_counter; | ||
353 | |||
354 | /** | ||
355 | * struct pmu - generic performance monitoring unit | ||
356 | */ | ||
357 | struct pmu { | ||
358 | int (*enable) (struct perf_counter *counter); | ||
359 | void (*disable) (struct perf_counter *counter); | ||
360 | void (*read) (struct perf_counter *counter); | ||
361 | }; | ||
362 | |||
363 | /** | ||
364 | * enum perf_counter_active_state - the states of a counter | ||
365 | */ | ||
366 | enum perf_counter_active_state { | ||
367 | PERF_COUNTER_STATE_ERROR = -2, | ||
368 | PERF_COUNTER_STATE_OFF = -1, | ||
369 | PERF_COUNTER_STATE_INACTIVE = 0, | ||
370 | PERF_COUNTER_STATE_ACTIVE = 1, | ||
371 | }; | ||
372 | |||
373 | struct file; | ||
374 | |||
375 | struct perf_mmap_data { | ||
376 | struct rcu_head rcu_head; | ||
377 | int nr_pages; /* nr of data pages */ | ||
378 | int nr_locked; /* nr pages mlocked */ | ||
379 | |||
380 | atomic_t poll; /* POLL_ for wakeups */ | ||
381 | atomic_t head; /* write position */ | ||
382 | atomic_t events; /* event limit */ | ||
383 | |||
384 | atomic_t done_head; /* completed head */ | ||
385 | atomic_t lock; /* concurrent writes */ | ||
386 | |||
387 | atomic_t wakeup; /* needs a wakeup */ | ||
388 | |||
389 | struct perf_counter_mmap_page *user_page; | ||
390 | void *data_pages[0]; | ||
391 | }; | ||
392 | |||
393 | struct perf_pending_entry { | ||
394 | struct perf_pending_entry *next; | ||
395 | void (*func)(struct perf_pending_entry *); | ||
396 | }; | ||
397 | |||
398 | /** | ||
399 | * struct perf_counter - performance counter kernel representation: | ||
400 | */ | ||
401 | struct perf_counter { | ||
402 | #ifdef CONFIG_PERF_COUNTERS | ||
403 | struct list_head list_entry; | ||
404 | struct list_head event_entry; | ||
405 | struct list_head sibling_list; | ||
406 | int nr_siblings; | ||
407 | struct perf_counter *group_leader; | ||
408 | const struct pmu *pmu; | ||
409 | |||
410 | enum perf_counter_active_state state; | ||
411 | enum perf_counter_active_state prev_state; | ||
412 | atomic64_t count; | ||
413 | |||
414 | /* | ||
415 | * These are the total time in nanoseconds that the counter | ||
416 | * has been enabled (i.e. eligible to run, and the task has | ||
417 | * been scheduled in, if this is a per-task counter) | ||
418 | * and running (scheduled onto the CPU), respectively. | ||
419 | * | ||
420 | * They are computed from tstamp_enabled, tstamp_running and | ||
421 | * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. | ||
422 | */ | ||
423 | u64 total_time_enabled; | ||
424 | u64 total_time_running; | ||
425 | |||
426 | /* | ||
427 | * These are timestamps used for computing total_time_enabled | ||
428 | * and total_time_running when the counter is in INACTIVE or | ||
429 | * ACTIVE state, measured in nanoseconds from an arbitrary point | ||
430 | * in time. | ||
431 | * tstamp_enabled: the notional time when the counter was enabled | ||
432 | * tstamp_running: the notional time when the counter was scheduled on | ||
433 | * tstamp_stopped: in INACTIVE state, the notional time when the | ||
434 | * counter was scheduled off. | ||
435 | */ | ||
436 | u64 tstamp_enabled; | ||
437 | u64 tstamp_running; | ||
438 | u64 tstamp_stopped; | ||
439 | |||
440 | struct perf_counter_hw_event hw_event; | ||
441 | struct hw_perf_counter hw; | ||
442 | |||
443 | struct perf_counter_context *ctx; | ||
444 | struct task_struct *task; | ||
445 | struct file *filp; | ||
446 | |||
447 | struct perf_counter *parent; | ||
448 | struct list_head child_list; | ||
449 | |||
450 | /* | ||
451 | * These accumulate total time (in nanoseconds) that children | ||
452 | * counters have been enabled and running, respectively. | ||
453 | */ | ||
454 | atomic64_t child_total_time_enabled; | ||
455 | atomic64_t child_total_time_running; | ||
456 | |||
457 | /* | ||
458 | * Protect attach/detach and child_list: | ||
459 | */ | ||
460 | struct mutex mutex; | ||
461 | |||
462 | int oncpu; | ||
463 | int cpu; | ||
464 | |||
465 | /* mmap bits */ | ||
466 | struct mutex mmap_mutex; | ||
467 | atomic_t mmap_count; | ||
468 | struct perf_mmap_data *data; | ||
469 | |||
470 | /* poll related */ | ||
471 | wait_queue_head_t waitq; | ||
472 | struct fasync_struct *fasync; | ||
473 | |||
474 | /* delayed work for NMIs and such */ | ||
475 | int pending_wakeup; | ||
476 | int pending_kill; | ||
477 | int pending_disable; | ||
478 | struct perf_pending_entry pending; | ||
479 | |||
480 | atomic_t event_limit; | ||
481 | |||
482 | void (*destroy)(struct perf_counter *); | ||
483 | struct rcu_head rcu_head; | ||
484 | #endif | ||
485 | }; | ||
486 | |||
487 | /** | ||
488 | * struct perf_counter_context - counter context structure | ||
489 | * | ||
490 | * Used as a container for task counters and CPU counters as well: | ||
491 | */ | ||
492 | struct perf_counter_context { | ||
493 | #ifdef CONFIG_PERF_COUNTERS | ||
494 | /* | ||
495 | * Protect the states of the counters in the list, | ||
496 | * nr_active, and the list: | ||
497 | */ | ||
498 | spinlock_t lock; | ||
499 | /* | ||
500 | * Protect the list of counters. Locking either mutex or lock | ||
501 | * is sufficient to ensure the list doesn't change; to change | ||
502 | * the list you need to lock both the mutex and the spinlock. | ||
503 | */ | ||
504 | struct mutex mutex; | ||
505 | |||
506 | struct list_head counter_list; | ||
507 | struct list_head event_list; | ||
508 | int nr_counters; | ||
509 | int nr_active; | ||
510 | int is_active; | ||
511 | struct task_struct *task; | ||
512 | |||
513 | /* | ||
514 | * Context clock, runs when context enabled. | ||
515 | */ | ||
516 | u64 time; | ||
517 | u64 timestamp; | ||
518 | #endif | ||
519 | }; | ||
520 | |||
521 | /** | ||
522 | * struct perf_counter_cpu_context - per cpu counter context structure | ||
523 | */ | ||
524 | struct perf_cpu_context { | ||
525 | struct perf_counter_context ctx; | ||
526 | struct perf_counter_context *task_ctx; | ||
527 | int active_oncpu; | ||
528 | int max_pertask; | ||
529 | int exclusive; | ||
530 | |||
531 | /* | ||
532 | * Recursion avoidance: | ||
533 | * | ||
534 | * task, softirq, irq, nmi context | ||
535 | */ | ||
536 | int recursion[4]; | ||
537 | }; | ||
538 | |||
539 | #ifdef CONFIG_PERF_COUNTERS | ||
540 | |||
541 | /* | ||
542 | * Set by architecture code: | ||
543 | */ | ||
544 | extern int perf_max_counters; | ||
545 | |||
546 | extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); | ||
547 | |||
548 | extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); | ||
549 | extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); | ||
550 | extern void perf_counter_task_tick(struct task_struct *task, int cpu); | ||
551 | extern void perf_counter_init_task(struct task_struct *child); | ||
552 | extern void perf_counter_exit_task(struct task_struct *child); | ||
553 | extern void perf_counter_do_pending(void); | ||
554 | extern void perf_counter_print_debug(void); | ||
555 | extern void perf_counter_unthrottle(void); | ||
556 | extern void __perf_disable(void); | ||
557 | extern bool __perf_enable(void); | ||
558 | extern void perf_disable(void); | ||
559 | extern void perf_enable(void); | ||
560 | extern int perf_counter_task_disable(void); | ||
561 | extern int perf_counter_task_enable(void); | ||
562 | extern int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
563 | struct perf_cpu_context *cpuctx, | ||
564 | struct perf_counter_context *ctx, int cpu); | ||
565 | extern void perf_counter_update_userpage(struct perf_counter *counter); | ||
566 | |||
567 | extern int perf_counter_overflow(struct perf_counter *counter, | ||
568 | int nmi, struct pt_regs *regs, u64 addr); | ||
569 | /* | ||
570 | * Return 1 for a software counter, 0 for a hardware counter | ||
571 | */ | ||
572 | static inline int is_software_counter(struct perf_counter *counter) | ||
573 | { | ||
574 | return !perf_event_raw(&counter->hw_event) && | ||
575 | perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; | ||
576 | } | ||
577 | |||
578 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | ||
579 | |||
580 | extern void perf_counter_mmap(unsigned long addr, unsigned long len, | ||
581 | unsigned long pgoff, struct file *file); | ||
582 | |||
583 | extern void perf_counter_munmap(unsigned long addr, unsigned long len, | ||
584 | unsigned long pgoff, struct file *file); | ||
585 | |||
586 | extern void perf_counter_comm(struct task_struct *tsk); | ||
587 | |||
588 | #define MAX_STACK_DEPTH 255 | ||
589 | |||
590 | struct perf_callchain_entry { | ||
591 | u16 nr, hv, kernel, user; | ||
592 | u64 ip[MAX_STACK_DEPTH]; | ||
593 | }; | ||
594 | |||
595 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | ||
596 | |||
597 | extern int sysctl_perf_counter_priv; | ||
598 | extern int sysctl_perf_counter_mlock; | ||
599 | |||
600 | extern void perf_counter_init(void); | ||
601 | |||
602 | #ifndef perf_misc_flags | ||
603 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ | ||
604 | PERF_EVENT_MISC_KERNEL) | ||
605 | #define perf_instruction_pointer(regs) instruction_pointer(regs) | ||
606 | #endif | ||
607 | |||
608 | #else | ||
609 | static inline void | ||
610 | perf_counter_task_sched_in(struct task_struct *task, int cpu) { } | ||
611 | static inline void | ||
612 | perf_counter_task_sched_out(struct task_struct *task, int cpu) { } | ||
613 | static inline void | ||
614 | perf_counter_task_tick(struct task_struct *task, int cpu) { } | ||
615 | static inline void perf_counter_init_task(struct task_struct *child) { } | ||
616 | static inline void perf_counter_exit_task(struct task_struct *child) { } | ||
617 | static inline void perf_counter_do_pending(void) { } | ||
618 | static inline void perf_counter_print_debug(void) { } | ||
619 | static inline void perf_counter_unthrottle(void) { } | ||
620 | static inline void perf_disable(void) { } | ||
621 | static inline void perf_enable(void) { } | ||
622 | static inline int perf_counter_task_disable(void) { return -EINVAL; } | ||
623 | static inline int perf_counter_task_enable(void) { return -EINVAL; } | ||
624 | |||
625 | static inline void | ||
626 | perf_swcounter_event(u32 event, u64 nr, int nmi, | ||
627 | struct pt_regs *regs, u64 addr) { } | ||
628 | |||
629 | static inline void | ||
630 | perf_counter_mmap(unsigned long addr, unsigned long len, | ||
631 | unsigned long pgoff, struct file *file) { } | ||
632 | |||
633 | static inline void | ||
634 | perf_counter_munmap(unsigned long addr, unsigned long len, | ||
635 | unsigned long pgoff, struct file *file) { } | ||
636 | |||
637 | static inline void perf_counter_comm(struct task_struct *tsk) { } | ||
638 | static inline void perf_counter_init(void) { } | ||
639 | #endif | ||
640 | |||
641 | #endif /* __KERNEL__ */ | ||
642 | #endif /* _LINUX_PERF_COUNTER_H */ | ||
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e7..b00df4c79c63 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
@@ -85,4 +85,7 @@ | |||
85 | #define PR_SET_TIMERSLACK 29 | 85 | #define PR_SET_TIMERSLACK 29 |
86 | #define PR_GET_TIMERSLACK 30 | 86 | #define PR_GET_TIMERSLACK 30 |
87 | 87 | ||
88 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
89 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
90 | |||
88 | #endif /* _LINUX_PRCTL_H */ | 91 | #endif /* _LINUX_PRCTL_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049c..ff59d1231519 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -71,6 +71,7 @@ struct sched_param { | |||
71 | #include <linux/path.h> | 71 | #include <linux/path.h> |
72 | #include <linux/compiler.h> | 72 | #include <linux/compiler.h> |
73 | #include <linux/completion.h> | 73 | #include <linux/completion.h> |
74 | #include <linux/perf_counter.h> | ||
74 | #include <linux/pid.h> | 75 | #include <linux/pid.h> |
75 | #include <linux/percpu.h> | 76 | #include <linux/percpu.h> |
76 | #include <linux/topology.h> | 77 | #include <linux/topology.h> |
@@ -137,6 +138,7 @@ extern unsigned long nr_running(void); | |||
137 | extern unsigned long nr_uninterruptible(void); | 138 | extern unsigned long nr_uninterruptible(void); |
138 | extern unsigned long nr_active(void); | 139 | extern unsigned long nr_active(void); |
139 | extern unsigned long nr_iowait(void); | 140 | extern unsigned long nr_iowait(void); |
141 | extern u64 cpu_nr_migrations(int cpu); | ||
140 | 142 | ||
141 | extern unsigned long get_parent_ip(unsigned long addr); | 143 | extern unsigned long get_parent_ip(unsigned long addr); |
142 | 144 | ||
@@ -672,6 +674,10 @@ struct user_struct { | |||
672 | struct work_struct work; | 674 | struct work_struct work; |
673 | #endif | 675 | #endif |
674 | #endif | 676 | #endif |
677 | |||
678 | #ifdef CONFIG_PERF_COUNTERS | ||
679 | atomic_long_t locked_vm; | ||
680 | #endif | ||
675 | }; | 681 | }; |
676 | 682 | ||
677 | extern int uids_sysfs_init(void); | 683 | extern int uids_sysfs_init(void); |
@@ -1052,9 +1058,10 @@ struct sched_entity { | |||
1052 | u64 last_wakeup; | 1058 | u64 last_wakeup; |
1053 | u64 avg_overlap; | 1059 | u64 avg_overlap; |
1054 | 1060 | ||
1061 | u64 nr_migrations; | ||
1062 | |||
1055 | u64 start_runtime; | 1063 | u64 start_runtime; |
1056 | u64 avg_wakeup; | 1064 | u64 avg_wakeup; |
1057 | u64 nr_migrations; | ||
1058 | 1065 | ||
1059 | #ifdef CONFIG_SCHEDSTATS | 1066 | #ifdef CONFIG_SCHEDSTATS |
1060 | u64 wait_start; | 1067 | u64 wait_start; |
@@ -1380,6 +1387,7 @@ struct task_struct { | |||
1380 | struct list_head pi_state_list; | 1387 | struct list_head pi_state_list; |
1381 | struct futex_pi_state *pi_state_cache; | 1388 | struct futex_pi_state *pi_state_cache; |
1382 | #endif | 1389 | #endif |
1390 | struct perf_counter_context perf_counter_ctx; | ||
1383 | #ifdef CONFIG_NUMA | 1391 | #ifdef CONFIG_NUMA |
1384 | struct mempolicy *mempolicy; | 1392 | struct mempolicy *mempolicy; |
1385 | short il_next; | 1393 | short il_next; |
@@ -2388,6 +2396,13 @@ static inline void inc_syscw(struct task_struct *tsk) | |||
2388 | #define TASK_SIZE_OF(tsk) TASK_SIZE | 2396 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2389 | #endif | 2397 | #endif |
2390 | 2398 | ||
2399 | /* | ||
2400 | * Call the function if the target task is executing on a CPU right now: | ||
2401 | */ | ||
2402 | extern void task_oncpu_function_call(struct task_struct *p, | ||
2403 | void (*func) (void *info), void *info); | ||
2404 | |||
2405 | |||
2391 | #ifdef CONFIG_MM_OWNER | 2406 | #ifdef CONFIG_MM_OWNER |
2392 | extern void mm_update_next_owner(struct mm_struct *mm); | 2407 | extern void mm_update_next_owner(struct mm_struct *mm); |
2393 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); | 2408 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); |
diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa53..c7552836bd95 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h | |||
@@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) | |||
235 | extern int next_signal(struct sigpending *pending, sigset_t *mask); | 235 | extern int next_signal(struct sigpending *pending, sigset_t *mask); |
236 | extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); | 236 | extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); |
237 | extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); | 237 | extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); |
238 | extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, | ||
239 | siginfo_t *info); | ||
238 | extern long do_sigpending(void __user *, unsigned long); | 240 | extern long do_sigpending(void __user *, unsigned long); |
239 | extern int sigprocmask(int, sigset_t *, sigset_t *); | 241 | extern int sigprocmask(int, sigset_t *, sigset_t *); |
240 | extern int show_unhandled_signals; | 242 | extern int show_unhandled_signals; |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 30520844b8da..79faae950e2e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -55,6 +55,7 @@ struct compat_timeval; | |||
55 | struct robust_list_head; | 55 | struct robust_list_head; |
56 | struct getcpu_cache; | 56 | struct getcpu_cache; |
57 | struct old_linux_dirent; | 57 | struct old_linux_dirent; |
58 | struct perf_counter_hw_event; | ||
58 | 59 | ||
59 | #include <linux/types.h> | 60 | #include <linux/types.h> |
60 | #include <linux/aio_abi.h> | 61 | #include <linux/aio_abi.h> |
@@ -755,4 +756,8 @@ asmlinkage long sys_pipe(int __user *); | |||
755 | 756 | ||
756 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | 757 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); |
757 | 758 | ||
759 | |||
760 | asmlinkage long sys_perf_counter_open( | ||
761 | const struct perf_counter_hw_event __user *hw_event_uptr, | ||
762 | pid_t pid, int cpu, int group_fd, unsigned long flags); | ||
758 | #endif | 763 | #endif |
diff --git a/init/Kconfig b/init/Kconfig index 7be4d3836745..8158f1f44694 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -933,6 +933,41 @@ config AIO | |||
933 | by some high performance threaded applications. Disabling | 933 | by some high performance threaded applications. Disabling |
934 | this option saves about 7k. | 934 | this option saves about 7k. |
935 | 935 | ||
936 | config HAVE_PERF_COUNTERS | ||
937 | bool | ||
938 | |||
939 | menu "Performance Counters" | ||
940 | |||
941 | config PERF_COUNTERS | ||
942 | bool "Kernel Performance Counters" | ||
943 | depends on HAVE_PERF_COUNTERS | ||
944 | default y | ||
945 | select ANON_INODES | ||
946 | help | ||
947 | Enable kernel support for performance counter hardware. | ||
948 | |||
949 | Performance counters are special hardware registers available | ||
950 | on most modern CPUs. These registers count the number of certain | ||
951 | types of hw events: such as instructions executed, cachemisses | ||
952 | suffered, or branches mis-predicted - without slowing down the | ||
953 | kernel or applications. These registers can also trigger interrupts | ||
954 | when a threshold number of events have passed - and can thus be | ||
955 | used to profile the code that runs on that CPU. | ||
956 | |||
957 | The Linux Performance Counter subsystem provides an abstraction of | ||
958 | these hardware capabilities, available via a system call. It | ||
959 | provides per task and per CPU counters, and it provides event | ||
960 | capabilities on top of those. | ||
961 | |||
962 | Say Y if unsure. | ||
963 | |||
964 | config EVENT_PROFILE | ||
965 | bool "Tracepoint profile sources" | ||
966 | depends on PERF_COUNTERS && EVENT_TRACER | ||
967 | default y | ||
968 | |||
969 | endmenu | ||
970 | |||
936 | config VM_EVENT_COUNTERS | 971 | config VM_EVENT_COUNTERS |
937 | default y | 972 | default y |
938 | bool "Enable VM event counters for /proc/vmstat" if EMBEDDED | 973 | bool "Enable VM event counters for /proc/vmstat" if EMBEDDED |
diff --git a/kernel/Makefile b/kernel/Makefile index 42423665660a..e914ca992d70 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -95,6 +95,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ | |||
95 | obj-$(CONFIG_TRACING) += trace/ | 95 | obj-$(CONFIG_TRACING) += trace/ |
96 | obj-$(CONFIG_SMP) += sched_cpupri.o | 96 | obj-$(CONFIG_SMP) += sched_cpupri.o |
97 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 97 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
98 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o | ||
98 | 99 | ||
99 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 100 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
100 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 101 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/compat.c b/kernel/compat.c index 42d56544460f..f6c204f07ea6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
882 | 882 | ||
883 | } | 883 | } |
884 | 884 | ||
885 | asmlinkage long | ||
886 | compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, | ||
887 | struct compat_siginfo __user *uinfo) | ||
888 | { | ||
889 | siginfo_t info; | ||
890 | |||
891 | if (copy_siginfo_from_user32(&info, uinfo)) | ||
892 | return -EFAULT; | ||
893 | return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); | ||
894 | } | ||
895 | |||
885 | #ifdef __ARCH_WANT_COMPAT_SYS_TIME | 896 | #ifdef __ARCH_WANT_COMPAT_SYS_TIME |
886 | 897 | ||
887 | /* compat_time_t is a 32 bit "long" and needs to get converted. */ | 898 | /* compat_time_t is a 32 bit "long" and needs to get converted. */ |
diff --git a/kernel/exit.c b/kernel/exit.c index abf9cf3b95c6..73affd35e76d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -158,6 +158,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
158 | { | 158 | { |
159 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 159 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
160 | 160 | ||
161 | #ifdef CONFIG_PERF_COUNTERS | ||
162 | WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); | ||
163 | #endif | ||
161 | trace_sched_process_free(tsk); | 164 | trace_sched_process_free(tsk); |
162 | put_task_struct(tsk); | 165 | put_task_struct(tsk); |
163 | } | 166 | } |
@@ -174,6 +177,13 @@ repeat: | |||
174 | atomic_dec(&__task_cred(p)->user->processes); | 177 | atomic_dec(&__task_cred(p)->user->processes); |
175 | 178 | ||
176 | proc_flush_task(p); | 179 | proc_flush_task(p); |
180 | |||
181 | /* | ||
182 | * Flush inherited counters to the parent - before the parent | ||
183 | * gets woken up by child-exit notifications. | ||
184 | */ | ||
185 | perf_counter_exit_task(p); | ||
186 | |||
177 | write_lock_irq(&tasklist_lock); | 187 | write_lock_irq(&tasklist_lock); |
178 | tracehook_finish_release_task(p); | 188 | tracehook_finish_release_task(p); |
179 | __exit_signal(p); | 189 | __exit_signal(p); |
@@ -981,10 +991,6 @@ NORET_TYPE void do_exit(long code) | |||
981 | tsk->mempolicy = NULL; | 991 | tsk->mempolicy = NULL; |
982 | #endif | 992 | #endif |
983 | #ifdef CONFIG_FUTEX | 993 | #ifdef CONFIG_FUTEX |
984 | /* | ||
985 | * This must happen late, after the PID is not | ||
986 | * hashed anymore: | ||
987 | */ | ||
988 | if (unlikely(!list_empty(&tsk->pi_state_list))) | 994 | if (unlikely(!list_empty(&tsk->pi_state_list))) |
989 | exit_pi_state_list(tsk); | 995 | exit_pi_state_list(tsk); |
990 | if (unlikely(current->pi_state_cache)) | 996 | if (unlikely(current->pi_state_cache)) |
@@ -1251,6 +1257,12 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1251 | */ | 1257 | */ |
1252 | read_unlock(&tasklist_lock); | 1258 | read_unlock(&tasklist_lock); |
1253 | 1259 | ||
1260 | /* | ||
1261 | * Flush inherited counters to the parent - before the parent | ||
1262 | * gets woken up by child-exit notifications. | ||
1263 | */ | ||
1264 | perf_counter_exit_task(p); | ||
1265 | |||
1254 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1266 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; |
1255 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) | 1267 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) |
1256 | ? p->signal->group_exit_code : p->exit_code; | 1268 | ? p->signal->group_exit_code : p->exit_code; |
diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd00726..d32fef4d38e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -983,6 +983,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
983 | goto fork_out; | 983 | goto fork_out; |
984 | 984 | ||
985 | rt_mutex_init_task(p); | 985 | rt_mutex_init_task(p); |
986 | perf_counter_init_task(p); | ||
986 | 987 | ||
987 | #ifdef CONFIG_PROVE_LOCKING | 988 | #ifdef CONFIG_PROVE_LOCKING |
988 | DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); | 989 | DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 507cf2b5e9f1..f788a5ace24b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); | |||
89 | * | 89 | * |
90 | * This function is similar to (but not equivalent to) down(). | 90 | * This function is similar to (but not equivalent to) down(). |
91 | */ | 91 | */ |
92 | void inline __sched mutex_lock(struct mutex *lock) | 92 | void __sched mutex_lock(struct mutex *lock) |
93 | { | 93 | { |
94 | might_sleep(); | 94 | might_sleep(); |
95 | /* | 95 | /* |
@@ -471,5 +471,28 @@ int __sched mutex_trylock(struct mutex *lock) | |||
471 | 471 | ||
472 | return ret; | 472 | return ret; |
473 | } | 473 | } |
474 | |||
475 | EXPORT_SYMBOL(mutex_trylock); | 474 | EXPORT_SYMBOL(mutex_trylock); |
475 | |||
476 | /** | ||
477 | * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | ||
478 | * @cnt: the atomic which we are to dec | ||
479 | * @lock: the mutex to return holding if we dec to 0 | ||
480 | * | ||
481 | * return true and hold lock if we dec to 0, return false otherwise | ||
482 | */ | ||
483 | int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | ||
484 | { | ||
485 | /* dec if we can't possibly hit 0 */ | ||
486 | if (atomic_add_unless(cnt, -1, 1)) | ||
487 | return 0; | ||
488 | /* we might hit 0, so take the lock */ | ||
489 | mutex_lock(lock); | ||
490 | if (!atomic_dec_and_test(cnt)) { | ||
491 | /* when we actually did the dec, we didn't hit 0 */ | ||
492 | mutex_unlock(lock); | ||
493 | return 0; | ||
494 | } | ||
495 | /* we hit 0, and we hold the lock */ | ||
496 | return 1; | ||
497 | } | ||
498 | EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c new file mode 100644 index 000000000000..59a926d04baf --- /dev/null +++ b/kernel/perf_counter.c | |||
@@ -0,0 +1,3526 @@ | |||
1 | /* | ||
2 | * Performance counter core code | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | ||
8 | * | ||
9 | * For licensing details see kernel-base/COPYING | ||
10 | */ | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/smp.h> | ||
16 | #include <linux/file.h> | ||
17 | #include <linux/poll.h> | ||
18 | #include <linux/sysfs.h> | ||
19 | #include <linux/ptrace.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/vmstat.h> | ||
22 | #include <linux/hardirq.h> | ||
23 | #include <linux/rculist.h> | ||
24 | #include <linux/uaccess.h> | ||
25 | #include <linux/syscalls.h> | ||
26 | #include <linux/anon_inodes.h> | ||
27 | #include <linux/kernel_stat.h> | ||
28 | #include <linux/perf_counter.h> | ||
29 | #include <linux/dcache.h> | ||
30 | |||
31 | #include <asm/irq_regs.h> | ||
32 | |||
33 | /* | ||
34 | * Each CPU has a list of per CPU counters: | ||
35 | */ | ||
36 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | ||
37 | |||
38 | int perf_max_counters __read_mostly = 1; | ||
39 | static int perf_reserved_percpu __read_mostly; | ||
40 | static int perf_overcommit __read_mostly = 1; | ||
41 | |||
42 | static atomic_t nr_counters __read_mostly; | ||
43 | static atomic_t nr_mmap_tracking __read_mostly; | ||
44 | static atomic_t nr_munmap_tracking __read_mostly; | ||
45 | static atomic_t nr_comm_tracking __read_mostly; | ||
46 | |||
47 | int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ | ||
48 | int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ | ||
49 | |||
50 | /* | ||
51 | * Lock for (sysadmin-configurable) counter reservations: | ||
52 | */ | ||
53 | static DEFINE_SPINLOCK(perf_resource_lock); | ||
54 | |||
55 | /* | ||
56 | * Architecture provided APIs - weak aliases: | ||
57 | */ | ||
58 | extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
59 | { | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | void __weak hw_perf_disable(void) { barrier(); } | ||
64 | void __weak hw_perf_enable(void) { barrier(); } | ||
65 | |||
66 | void __weak hw_perf_counter_setup(int cpu) { barrier(); } | ||
67 | int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
68 | struct perf_cpu_context *cpuctx, | ||
69 | struct perf_counter_context *ctx, int cpu) | ||
70 | { | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | void __weak perf_counter_print_debug(void) { } | ||
75 | |||
76 | static DEFINE_PER_CPU(int, disable_count); | ||
77 | |||
78 | void __perf_disable(void) | ||
79 | { | ||
80 | __get_cpu_var(disable_count)++; | ||
81 | } | ||
82 | |||
83 | bool __perf_enable(void) | ||
84 | { | ||
85 | return !--__get_cpu_var(disable_count); | ||
86 | } | ||
87 | |||
88 | void perf_disable(void) | ||
89 | { | ||
90 | __perf_disable(); | ||
91 | hw_perf_disable(); | ||
92 | } | ||
93 | |||
94 | void perf_enable(void) | ||
95 | { | ||
96 | if (__perf_enable()) | ||
97 | hw_perf_enable(); | ||
98 | } | ||
99 | |||
100 | static void | ||
101 | list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
102 | { | ||
103 | struct perf_counter *group_leader = counter->group_leader; | ||
104 | |||
105 | /* | ||
106 | * Depending on whether it is a standalone or sibling counter, | ||
107 | * add it straight to the context's counter list, or to the group | ||
108 | * leader's sibling list: | ||
109 | */ | ||
110 | if (group_leader == counter) | ||
111 | list_add_tail(&counter->list_entry, &ctx->counter_list); | ||
112 | else { | ||
113 | list_add_tail(&counter->list_entry, &group_leader->sibling_list); | ||
114 | group_leader->nr_siblings++; | ||
115 | } | ||
116 | |||
117 | list_add_rcu(&counter->event_entry, &ctx->event_list); | ||
118 | ctx->nr_counters++; | ||
119 | } | ||
120 | |||
121 | static void | ||
122 | list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
123 | { | ||
124 | struct perf_counter *sibling, *tmp; | ||
125 | |||
126 | ctx->nr_counters--; | ||
127 | |||
128 | list_del_init(&counter->list_entry); | ||
129 | list_del_rcu(&counter->event_entry); | ||
130 | |||
131 | if (counter->group_leader != counter) | ||
132 | counter->group_leader->nr_siblings--; | ||
133 | |||
134 | /* | ||
135 | * If this was a group counter with sibling counters then | ||
136 | * upgrade the siblings to singleton counters by adding them | ||
137 | * to the context list directly: | ||
138 | */ | ||
139 | list_for_each_entry_safe(sibling, tmp, | ||
140 | &counter->sibling_list, list_entry) { | ||
141 | |||
142 | list_move_tail(&sibling->list_entry, &ctx->counter_list); | ||
143 | sibling->group_leader = sibling; | ||
144 | } | ||
145 | } | ||
146 | |||
147 | static void | ||
148 | counter_sched_out(struct perf_counter *counter, | ||
149 | struct perf_cpu_context *cpuctx, | ||
150 | struct perf_counter_context *ctx) | ||
151 | { | ||
152 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
153 | return; | ||
154 | |||
155 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
156 | counter->tstamp_stopped = ctx->time; | ||
157 | counter->pmu->disable(counter); | ||
158 | counter->oncpu = -1; | ||
159 | |||
160 | if (!is_software_counter(counter)) | ||
161 | cpuctx->active_oncpu--; | ||
162 | ctx->nr_active--; | ||
163 | if (counter->hw_event.exclusive || !cpuctx->active_oncpu) | ||
164 | cpuctx->exclusive = 0; | ||
165 | } | ||
166 | |||
167 | static void | ||
168 | group_sched_out(struct perf_counter *group_counter, | ||
169 | struct perf_cpu_context *cpuctx, | ||
170 | struct perf_counter_context *ctx) | ||
171 | { | ||
172 | struct perf_counter *counter; | ||
173 | |||
174 | if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
175 | return; | ||
176 | |||
177 | counter_sched_out(group_counter, cpuctx, ctx); | ||
178 | |||
179 | /* | ||
180 | * Schedule out siblings (if any): | ||
181 | */ | ||
182 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) | ||
183 | counter_sched_out(counter, cpuctx, ctx); | ||
184 | |||
185 | if (group_counter->hw_event.exclusive) | ||
186 | cpuctx->exclusive = 0; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Cross CPU call to remove a performance counter | ||
191 | * | ||
192 | * We disable the counter on the hardware level first. After that we | ||
193 | * remove it from the context list. | ||
194 | */ | ||
195 | static void __perf_counter_remove_from_context(void *info) | ||
196 | { | ||
197 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
198 | struct perf_counter *counter = info; | ||
199 | struct perf_counter_context *ctx = counter->ctx; | ||
200 | unsigned long flags; | ||
201 | |||
202 | /* | ||
203 | * If this is a task context, we need to check whether it is | ||
204 | * the current task context of this cpu. If not it has been | ||
205 | * scheduled out before the smp call arrived. | ||
206 | */ | ||
207 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
208 | return; | ||
209 | |||
210 | spin_lock_irqsave(&ctx->lock, flags); | ||
211 | |||
212 | counter_sched_out(counter, cpuctx, ctx); | ||
213 | |||
214 | counter->task = NULL; | ||
215 | |||
216 | /* | ||
217 | * Protect the list operation against NMI by disabling the | ||
218 | * counters on a global level. NOP for non NMI based counters. | ||
219 | */ | ||
220 | perf_disable(); | ||
221 | list_del_counter(counter, ctx); | ||
222 | perf_enable(); | ||
223 | |||
224 | if (!ctx->task) { | ||
225 | /* | ||
226 | * Allow more per task counters with respect to the | ||
227 | * reservation: | ||
228 | */ | ||
229 | cpuctx->max_pertask = | ||
230 | min(perf_max_counters - ctx->nr_counters, | ||
231 | perf_max_counters - perf_reserved_percpu); | ||
232 | } | ||
233 | |||
234 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
235 | } | ||
236 | |||
237 | |||
238 | /* | ||
239 | * Remove the counter from a task's (or a CPU's) list of counters. | ||
240 | * | ||
241 | * Must be called with counter->mutex and ctx->mutex held. | ||
242 | * | ||
243 | * CPU counters are removed with a smp call. For task counters we only | ||
244 | * call when the task is on a CPU. | ||
245 | */ | ||
246 | static void perf_counter_remove_from_context(struct perf_counter *counter) | ||
247 | { | ||
248 | struct perf_counter_context *ctx = counter->ctx; | ||
249 | struct task_struct *task = ctx->task; | ||
250 | |||
251 | if (!task) { | ||
252 | /* | ||
253 | * Per cpu counters are removed via an smp call and | ||
254 | * the removal is always sucessful. | ||
255 | */ | ||
256 | smp_call_function_single(counter->cpu, | ||
257 | __perf_counter_remove_from_context, | ||
258 | counter, 1); | ||
259 | return; | ||
260 | } | ||
261 | |||
262 | retry: | ||
263 | task_oncpu_function_call(task, __perf_counter_remove_from_context, | ||
264 | counter); | ||
265 | |||
266 | spin_lock_irq(&ctx->lock); | ||
267 | /* | ||
268 | * If the context is active we need to retry the smp call. | ||
269 | */ | ||
270 | if (ctx->nr_active && !list_empty(&counter->list_entry)) { | ||
271 | spin_unlock_irq(&ctx->lock); | ||
272 | goto retry; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * The lock prevents that this context is scheduled in so we | ||
277 | * can remove the counter safely, if the call above did not | ||
278 | * succeed. | ||
279 | */ | ||
280 | if (!list_empty(&counter->list_entry)) { | ||
281 | list_del_counter(counter, ctx); | ||
282 | counter->task = NULL; | ||
283 | } | ||
284 | spin_unlock_irq(&ctx->lock); | ||
285 | } | ||
286 | |||
287 | static inline u64 perf_clock(void) | ||
288 | { | ||
289 | return cpu_clock(smp_processor_id()); | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * Update the record of the current time in a context. | ||
294 | */ | ||
295 | static void update_context_time(struct perf_counter_context *ctx) | ||
296 | { | ||
297 | u64 now = perf_clock(); | ||
298 | |||
299 | ctx->time += now - ctx->timestamp; | ||
300 | ctx->timestamp = now; | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * Update the total_time_enabled and total_time_running fields for a counter. | ||
305 | */ | ||
306 | static void update_counter_times(struct perf_counter *counter) | ||
307 | { | ||
308 | struct perf_counter_context *ctx = counter->ctx; | ||
309 | u64 run_end; | ||
310 | |||
311 | if (counter->state < PERF_COUNTER_STATE_INACTIVE) | ||
312 | return; | ||
313 | |||
314 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; | ||
315 | |||
316 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) | ||
317 | run_end = counter->tstamp_stopped; | ||
318 | else | ||
319 | run_end = ctx->time; | ||
320 | |||
321 | counter->total_time_running = run_end - counter->tstamp_running; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Update total_time_enabled and total_time_running for all counters in a group. | ||
326 | */ | ||
327 | static void update_group_times(struct perf_counter *leader) | ||
328 | { | ||
329 | struct perf_counter *counter; | ||
330 | |||
331 | update_counter_times(leader); | ||
332 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
333 | update_counter_times(counter); | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Cross CPU call to disable a performance counter | ||
338 | */ | ||
339 | static void __perf_counter_disable(void *info) | ||
340 | { | ||
341 | struct perf_counter *counter = info; | ||
342 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
343 | struct perf_counter_context *ctx = counter->ctx; | ||
344 | unsigned long flags; | ||
345 | |||
346 | /* | ||
347 | * If this is a per-task counter, need to check whether this | ||
348 | * counter's task is the current task on this cpu. | ||
349 | */ | ||
350 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
351 | return; | ||
352 | |||
353 | spin_lock_irqsave(&ctx->lock, flags); | ||
354 | |||
355 | /* | ||
356 | * If the counter is on, turn it off. | ||
357 | * If it is in error state, leave it in error state. | ||
358 | */ | ||
359 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { | ||
360 | update_context_time(ctx); | ||
361 | update_counter_times(counter); | ||
362 | if (counter == counter->group_leader) | ||
363 | group_sched_out(counter, cpuctx, ctx); | ||
364 | else | ||
365 | counter_sched_out(counter, cpuctx, ctx); | ||
366 | counter->state = PERF_COUNTER_STATE_OFF; | ||
367 | } | ||
368 | |||
369 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Disable a counter. | ||
374 | */ | ||
375 | static void perf_counter_disable(struct perf_counter *counter) | ||
376 | { | ||
377 | struct perf_counter_context *ctx = counter->ctx; | ||
378 | struct task_struct *task = ctx->task; | ||
379 | |||
380 | if (!task) { | ||
381 | /* | ||
382 | * Disable the counter on the cpu that it's on | ||
383 | */ | ||
384 | smp_call_function_single(counter->cpu, __perf_counter_disable, | ||
385 | counter, 1); | ||
386 | return; | ||
387 | } | ||
388 | |||
389 | retry: | ||
390 | task_oncpu_function_call(task, __perf_counter_disable, counter); | ||
391 | |||
392 | spin_lock_irq(&ctx->lock); | ||
393 | /* | ||
394 | * If the counter is still active, we need to retry the cross-call. | ||
395 | */ | ||
396 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | ||
397 | spin_unlock_irq(&ctx->lock); | ||
398 | goto retry; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * Since we have the lock this context can't be scheduled | ||
403 | * in, so we can change the state safely. | ||
404 | */ | ||
405 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
406 | update_counter_times(counter); | ||
407 | counter->state = PERF_COUNTER_STATE_OFF; | ||
408 | } | ||
409 | |||
410 | spin_unlock_irq(&ctx->lock); | ||
411 | } | ||
412 | |||
413 | static int | ||
414 | counter_sched_in(struct perf_counter *counter, | ||
415 | struct perf_cpu_context *cpuctx, | ||
416 | struct perf_counter_context *ctx, | ||
417 | int cpu) | ||
418 | { | ||
419 | if (counter->state <= PERF_COUNTER_STATE_OFF) | ||
420 | return 0; | ||
421 | |||
422 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
423 | counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | ||
424 | /* | ||
425 | * The new state must be visible before we turn it on in the hardware: | ||
426 | */ | ||
427 | smp_wmb(); | ||
428 | |||
429 | if (counter->pmu->enable(counter)) { | ||
430 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
431 | counter->oncpu = -1; | ||
432 | return -EAGAIN; | ||
433 | } | ||
434 | |||
435 | counter->tstamp_running += ctx->time - counter->tstamp_stopped; | ||
436 | |||
437 | if (!is_software_counter(counter)) | ||
438 | cpuctx->active_oncpu++; | ||
439 | ctx->nr_active++; | ||
440 | |||
441 | if (counter->hw_event.exclusive) | ||
442 | cpuctx->exclusive = 1; | ||
443 | |||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | static int | ||
448 | group_sched_in(struct perf_counter *group_counter, | ||
449 | struct perf_cpu_context *cpuctx, | ||
450 | struct perf_counter_context *ctx, | ||
451 | int cpu) | ||
452 | { | ||
453 | struct perf_counter *counter, *partial_group; | ||
454 | int ret; | ||
455 | |||
456 | if (group_counter->state == PERF_COUNTER_STATE_OFF) | ||
457 | return 0; | ||
458 | |||
459 | ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); | ||
460 | if (ret) | ||
461 | return ret < 0 ? ret : 0; | ||
462 | |||
463 | group_counter->prev_state = group_counter->state; | ||
464 | if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) | ||
465 | return -EAGAIN; | ||
466 | |||
467 | /* | ||
468 | * Schedule in siblings as one group (if any): | ||
469 | */ | ||
470 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { | ||
471 | counter->prev_state = counter->state; | ||
472 | if (counter_sched_in(counter, cpuctx, ctx, cpu)) { | ||
473 | partial_group = counter; | ||
474 | goto group_error; | ||
475 | } | ||
476 | } | ||
477 | |||
478 | return 0; | ||
479 | |||
480 | group_error: | ||
481 | /* | ||
482 | * Groups can be scheduled in as one unit only, so undo any | ||
483 | * partial group before returning: | ||
484 | */ | ||
485 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { | ||
486 | if (counter == partial_group) | ||
487 | break; | ||
488 | counter_sched_out(counter, cpuctx, ctx); | ||
489 | } | ||
490 | counter_sched_out(group_counter, cpuctx, ctx); | ||
491 | |||
492 | return -EAGAIN; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Return 1 for a group consisting entirely of software counters, | ||
497 | * 0 if the group contains any hardware counters. | ||
498 | */ | ||
499 | static int is_software_only_group(struct perf_counter *leader) | ||
500 | { | ||
501 | struct perf_counter *counter; | ||
502 | |||
503 | if (!is_software_counter(leader)) | ||
504 | return 0; | ||
505 | |||
506 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
507 | if (!is_software_counter(counter)) | ||
508 | return 0; | ||
509 | |||
510 | return 1; | ||
511 | } | ||
512 | |||
513 | /* | ||
514 | * Work out whether we can put this counter group on the CPU now. | ||
515 | */ | ||
516 | static int group_can_go_on(struct perf_counter *counter, | ||
517 | struct perf_cpu_context *cpuctx, | ||
518 | int can_add_hw) | ||
519 | { | ||
520 | /* | ||
521 | * Groups consisting entirely of software counters can always go on. | ||
522 | */ | ||
523 | if (is_software_only_group(counter)) | ||
524 | return 1; | ||
525 | /* | ||
526 | * If an exclusive group is already on, no other hardware | ||
527 | * counters can go on. | ||
528 | */ | ||
529 | if (cpuctx->exclusive) | ||
530 | return 0; | ||
531 | /* | ||
532 | * If this group is exclusive and there are already | ||
533 | * counters on the CPU, it can't go on. | ||
534 | */ | ||
535 | if (counter->hw_event.exclusive && cpuctx->active_oncpu) | ||
536 | return 0; | ||
537 | /* | ||
538 | * Otherwise, try to add it if all previous groups were able | ||
539 | * to go on. | ||
540 | */ | ||
541 | return can_add_hw; | ||
542 | } | ||
543 | |||
544 | static void add_counter_to_ctx(struct perf_counter *counter, | ||
545 | struct perf_counter_context *ctx) | ||
546 | { | ||
547 | list_add_counter(counter, ctx); | ||
548 | counter->prev_state = PERF_COUNTER_STATE_OFF; | ||
549 | counter->tstamp_enabled = ctx->time; | ||
550 | counter->tstamp_running = ctx->time; | ||
551 | counter->tstamp_stopped = ctx->time; | ||
552 | } | ||
553 | |||
554 | /* | ||
555 | * Cross CPU call to install and enable a performance counter | ||
556 | */ | ||
557 | static void __perf_install_in_context(void *info) | ||
558 | { | ||
559 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
560 | struct perf_counter *counter = info; | ||
561 | struct perf_counter_context *ctx = counter->ctx; | ||
562 | struct perf_counter *leader = counter->group_leader; | ||
563 | int cpu = smp_processor_id(); | ||
564 | unsigned long flags; | ||
565 | int err; | ||
566 | |||
567 | /* | ||
568 | * If this is a task context, we need to check whether it is | ||
569 | * the current task context of this cpu. If not it has been | ||
570 | * scheduled out before the smp call arrived. | ||
571 | */ | ||
572 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
573 | return; | ||
574 | |||
575 | spin_lock_irqsave(&ctx->lock, flags); | ||
576 | update_context_time(ctx); | ||
577 | |||
578 | /* | ||
579 | * Protect the list operation against NMI by disabling the | ||
580 | * counters on a global level. NOP for non NMI based counters. | ||
581 | */ | ||
582 | perf_disable(); | ||
583 | |||
584 | add_counter_to_ctx(counter, ctx); | ||
585 | |||
586 | /* | ||
587 | * Don't put the counter on if it is disabled or if | ||
588 | * it is in a group and the group isn't on. | ||
589 | */ | ||
590 | if (counter->state != PERF_COUNTER_STATE_INACTIVE || | ||
591 | (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) | ||
592 | goto unlock; | ||
593 | |||
594 | /* | ||
595 | * An exclusive counter can't go on if there are already active | ||
596 | * hardware counters, and no hardware counter can go on if there | ||
597 | * is already an exclusive counter on. | ||
598 | */ | ||
599 | if (!group_can_go_on(counter, cpuctx, 1)) | ||
600 | err = -EEXIST; | ||
601 | else | ||
602 | err = counter_sched_in(counter, cpuctx, ctx, cpu); | ||
603 | |||
604 | if (err) { | ||
605 | /* | ||
606 | * This counter couldn't go on. If it is in a group | ||
607 | * then we have to pull the whole group off. | ||
608 | * If the counter group is pinned then put it in error state. | ||
609 | */ | ||
610 | if (leader != counter) | ||
611 | group_sched_out(leader, cpuctx, ctx); | ||
612 | if (leader->hw_event.pinned) { | ||
613 | update_group_times(leader); | ||
614 | leader->state = PERF_COUNTER_STATE_ERROR; | ||
615 | } | ||
616 | } | ||
617 | |||
618 | if (!err && !ctx->task && cpuctx->max_pertask) | ||
619 | cpuctx->max_pertask--; | ||
620 | |||
621 | unlock: | ||
622 | perf_enable(); | ||
623 | |||
624 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * Attach a performance counter to a context | ||
629 | * | ||
630 | * First we add the counter to the list with the hardware enable bit | ||
631 | * in counter->hw_config cleared. | ||
632 | * | ||
633 | * If the counter is attached to a task which is on a CPU we use a smp | ||
634 | * call to enable it in the task context. The task might have been | ||
635 | * scheduled away, but we check this in the smp call again. | ||
636 | * | ||
637 | * Must be called with ctx->mutex held. | ||
638 | */ | ||
639 | static void | ||
640 | perf_install_in_context(struct perf_counter_context *ctx, | ||
641 | struct perf_counter *counter, | ||
642 | int cpu) | ||
643 | { | ||
644 | struct task_struct *task = ctx->task; | ||
645 | |||
646 | if (!task) { | ||
647 | /* | ||
648 | * Per cpu counters are installed via an smp call and | ||
649 | * the install is always sucessful. | ||
650 | */ | ||
651 | smp_call_function_single(cpu, __perf_install_in_context, | ||
652 | counter, 1); | ||
653 | return; | ||
654 | } | ||
655 | |||
656 | counter->task = task; | ||
657 | retry: | ||
658 | task_oncpu_function_call(task, __perf_install_in_context, | ||
659 | counter); | ||
660 | |||
661 | spin_lock_irq(&ctx->lock); | ||
662 | /* | ||
663 | * we need to retry the smp call. | ||
664 | */ | ||
665 | if (ctx->is_active && list_empty(&counter->list_entry)) { | ||
666 | spin_unlock_irq(&ctx->lock); | ||
667 | goto retry; | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * The lock prevents that this context is scheduled in so we | ||
672 | * can add the counter safely, if it the call above did not | ||
673 | * succeed. | ||
674 | */ | ||
675 | if (list_empty(&counter->list_entry)) | ||
676 | add_counter_to_ctx(counter, ctx); | ||
677 | spin_unlock_irq(&ctx->lock); | ||
678 | } | ||
679 | |||
680 | /* | ||
681 | * Cross CPU call to enable a performance counter | ||
682 | */ | ||
683 | static void __perf_counter_enable(void *info) | ||
684 | { | ||
685 | struct perf_counter *counter = info; | ||
686 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
687 | struct perf_counter_context *ctx = counter->ctx; | ||
688 | struct perf_counter *leader = counter->group_leader; | ||
689 | unsigned long flags; | ||
690 | int err; | ||
691 | |||
692 | /* | ||
693 | * If this is a per-task counter, need to check whether this | ||
694 | * counter's task is the current task on this cpu. | ||
695 | */ | ||
696 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
697 | return; | ||
698 | |||
699 | spin_lock_irqsave(&ctx->lock, flags); | ||
700 | update_context_time(ctx); | ||
701 | |||
702 | counter->prev_state = counter->state; | ||
703 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
704 | goto unlock; | ||
705 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
706 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
707 | |||
708 | /* | ||
709 | * If the counter is in a group and isn't the group leader, | ||
710 | * then don't put it on unless the group is on. | ||
711 | */ | ||
712 | if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) | ||
713 | goto unlock; | ||
714 | |||
715 | if (!group_can_go_on(counter, cpuctx, 1)) { | ||
716 | err = -EEXIST; | ||
717 | } else { | ||
718 | perf_disable(); | ||
719 | if (counter == leader) | ||
720 | err = group_sched_in(counter, cpuctx, ctx, | ||
721 | smp_processor_id()); | ||
722 | else | ||
723 | err = counter_sched_in(counter, cpuctx, ctx, | ||
724 | smp_processor_id()); | ||
725 | perf_enable(); | ||
726 | } | ||
727 | |||
728 | if (err) { | ||
729 | /* | ||
730 | * If this counter can't go on and it's part of a | ||
731 | * group, then the whole group has to come off. | ||
732 | */ | ||
733 | if (leader != counter) | ||
734 | group_sched_out(leader, cpuctx, ctx); | ||
735 | if (leader->hw_event.pinned) { | ||
736 | update_group_times(leader); | ||
737 | leader->state = PERF_COUNTER_STATE_ERROR; | ||
738 | } | ||
739 | } | ||
740 | |||
741 | unlock: | ||
742 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
743 | } | ||
744 | |||
745 | /* | ||
746 | * Enable a counter. | ||
747 | */ | ||
748 | static void perf_counter_enable(struct perf_counter *counter) | ||
749 | { | ||
750 | struct perf_counter_context *ctx = counter->ctx; | ||
751 | struct task_struct *task = ctx->task; | ||
752 | |||
753 | if (!task) { | ||
754 | /* | ||
755 | * Enable the counter on the cpu that it's on | ||
756 | */ | ||
757 | smp_call_function_single(counter->cpu, __perf_counter_enable, | ||
758 | counter, 1); | ||
759 | return; | ||
760 | } | ||
761 | |||
762 | spin_lock_irq(&ctx->lock); | ||
763 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
764 | goto out; | ||
765 | |||
766 | /* | ||
767 | * If the counter is in error state, clear that first. | ||
768 | * That way, if we see the counter in error state below, we | ||
769 | * know that it has gone back into error state, as distinct | ||
770 | * from the task having been scheduled away before the | ||
771 | * cross-call arrived. | ||
772 | */ | ||
773 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
774 | counter->state = PERF_COUNTER_STATE_OFF; | ||
775 | |||
776 | retry: | ||
777 | spin_unlock_irq(&ctx->lock); | ||
778 | task_oncpu_function_call(task, __perf_counter_enable, counter); | ||
779 | |||
780 | spin_lock_irq(&ctx->lock); | ||
781 | |||
782 | /* | ||
783 | * If the context is active and the counter is still off, | ||
784 | * we need to retry the cross-call. | ||
785 | */ | ||
786 | if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) | ||
787 | goto retry; | ||
788 | |||
789 | /* | ||
790 | * Since we have the lock this context can't be scheduled | ||
791 | * in, so we can change the state safely. | ||
792 | */ | ||
793 | if (counter->state == PERF_COUNTER_STATE_OFF) { | ||
794 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
795 | counter->tstamp_enabled = | ||
796 | ctx->time - counter->total_time_enabled; | ||
797 | } | ||
798 | out: | ||
799 | spin_unlock_irq(&ctx->lock); | ||
800 | } | ||
801 | |||
802 | static int perf_counter_refresh(struct perf_counter *counter, int refresh) | ||
803 | { | ||
804 | /* | ||
805 | * not supported on inherited counters | ||
806 | */ | ||
807 | if (counter->hw_event.inherit) | ||
808 | return -EINVAL; | ||
809 | |||
810 | atomic_add(refresh, &counter->event_limit); | ||
811 | perf_counter_enable(counter); | ||
812 | |||
813 | return 0; | ||
814 | } | ||
815 | |||
816 | void __perf_counter_sched_out(struct perf_counter_context *ctx, | ||
817 | struct perf_cpu_context *cpuctx) | ||
818 | { | ||
819 | struct perf_counter *counter; | ||
820 | |||
821 | spin_lock(&ctx->lock); | ||
822 | ctx->is_active = 0; | ||
823 | if (likely(!ctx->nr_counters)) | ||
824 | goto out; | ||
825 | update_context_time(ctx); | ||
826 | |||
827 | perf_disable(); | ||
828 | if (ctx->nr_active) { | ||
829 | list_for_each_entry(counter, &ctx->counter_list, list_entry) | ||
830 | group_sched_out(counter, cpuctx, ctx); | ||
831 | } | ||
832 | perf_enable(); | ||
833 | out: | ||
834 | spin_unlock(&ctx->lock); | ||
835 | } | ||
836 | |||
837 | /* | ||
838 | * Called from scheduler to remove the counters of the current task, | ||
839 | * with interrupts disabled. | ||
840 | * | ||
841 | * We stop each counter and update the counter value in counter->count. | ||
842 | * | ||
843 | * This does not protect us against NMI, but disable() | ||
844 | * sets the disabled bit in the control field of counter _before_ | ||
845 | * accessing the counter control register. If a NMI hits, then it will | ||
846 | * not restart the counter. | ||
847 | */ | ||
848 | void perf_counter_task_sched_out(struct task_struct *task, int cpu) | ||
849 | { | ||
850 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
851 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | ||
852 | struct pt_regs *regs; | ||
853 | |||
854 | if (likely(!cpuctx->task_ctx)) | ||
855 | return; | ||
856 | |||
857 | update_context_time(ctx); | ||
858 | |||
859 | regs = task_pt_regs(task); | ||
860 | perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); | ||
861 | __perf_counter_sched_out(ctx, cpuctx); | ||
862 | |||
863 | cpuctx->task_ctx = NULL; | ||
864 | } | ||
865 | |||
866 | static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) | ||
867 | { | ||
868 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
869 | |||
870 | __perf_counter_sched_out(ctx, cpuctx); | ||
871 | cpuctx->task_ctx = NULL; | ||
872 | } | ||
873 | |||
874 | static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) | ||
875 | { | ||
876 | __perf_counter_sched_out(&cpuctx->ctx, cpuctx); | ||
877 | } | ||
878 | |||
879 | static void | ||
880 | __perf_counter_sched_in(struct perf_counter_context *ctx, | ||
881 | struct perf_cpu_context *cpuctx, int cpu) | ||
882 | { | ||
883 | struct perf_counter *counter; | ||
884 | int can_add_hw = 1; | ||
885 | |||
886 | spin_lock(&ctx->lock); | ||
887 | ctx->is_active = 1; | ||
888 | if (likely(!ctx->nr_counters)) | ||
889 | goto out; | ||
890 | |||
891 | ctx->timestamp = perf_clock(); | ||
892 | |||
893 | perf_disable(); | ||
894 | |||
895 | /* | ||
896 | * First go through the list and put on any pinned groups | ||
897 | * in order to give them the best chance of going on. | ||
898 | */ | ||
899 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
900 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
901 | !counter->hw_event.pinned) | ||
902 | continue; | ||
903 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
904 | continue; | ||
905 | |||
906 | if (group_can_go_on(counter, cpuctx, 1)) | ||
907 | group_sched_in(counter, cpuctx, ctx, cpu); | ||
908 | |||
909 | /* | ||
910 | * If this pinned group hasn't been scheduled, | ||
911 | * put it in error state. | ||
912 | */ | ||
913 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
914 | update_group_times(counter); | ||
915 | counter->state = PERF_COUNTER_STATE_ERROR; | ||
916 | } | ||
917 | } | ||
918 | |||
919 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
920 | /* | ||
921 | * Ignore counters in OFF or ERROR state, and | ||
922 | * ignore pinned counters since we did them already. | ||
923 | */ | ||
924 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
925 | counter->hw_event.pinned) | ||
926 | continue; | ||
927 | |||
928 | /* | ||
929 | * Listen to the 'cpu' scheduling filter constraint | ||
930 | * of counters: | ||
931 | */ | ||
932 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
933 | continue; | ||
934 | |||
935 | if (group_can_go_on(counter, cpuctx, can_add_hw)) { | ||
936 | if (group_sched_in(counter, cpuctx, ctx, cpu)) | ||
937 | can_add_hw = 0; | ||
938 | } | ||
939 | } | ||
940 | perf_enable(); | ||
941 | out: | ||
942 | spin_unlock(&ctx->lock); | ||
943 | } | ||
944 | |||
945 | /* | ||
946 | * Called from scheduler to add the counters of the current task | ||
947 | * with interrupts disabled. | ||
948 | * | ||
949 | * We restore the counter value and then enable it. | ||
950 | * | ||
951 | * This does not protect us against NMI, but enable() | ||
952 | * sets the enabled bit in the control field of counter _before_ | ||
953 | * accessing the counter control register. If a NMI hits, then it will | ||
954 | * keep the counter running. | ||
955 | */ | ||
956 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) | ||
957 | { | ||
958 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
959 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | ||
960 | |||
961 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
962 | cpuctx->task_ctx = ctx; | ||
963 | } | ||
964 | |||
965 | static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | ||
966 | { | ||
967 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
968 | |||
969 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
970 | } | ||
971 | |||
972 | int perf_counter_task_disable(void) | ||
973 | { | ||
974 | struct task_struct *curr = current; | ||
975 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
976 | struct perf_counter *counter; | ||
977 | unsigned long flags; | ||
978 | |||
979 | if (likely(!ctx->nr_counters)) | ||
980 | return 0; | ||
981 | |||
982 | local_irq_save(flags); | ||
983 | |||
984 | __perf_counter_task_sched_out(ctx); | ||
985 | |||
986 | spin_lock(&ctx->lock); | ||
987 | |||
988 | /* | ||
989 | * Disable all the counters: | ||
990 | */ | ||
991 | perf_disable(); | ||
992 | |||
993 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
994 | if (counter->state != PERF_COUNTER_STATE_ERROR) { | ||
995 | update_group_times(counter); | ||
996 | counter->state = PERF_COUNTER_STATE_OFF; | ||
997 | } | ||
998 | } | ||
999 | |||
1000 | perf_enable(); | ||
1001 | |||
1002 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
1003 | |||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | int perf_counter_task_enable(void) | ||
1008 | { | ||
1009 | struct task_struct *curr = current; | ||
1010 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
1011 | struct perf_counter *counter; | ||
1012 | unsigned long flags; | ||
1013 | int cpu; | ||
1014 | |||
1015 | if (likely(!ctx->nr_counters)) | ||
1016 | return 0; | ||
1017 | |||
1018 | local_irq_save(flags); | ||
1019 | cpu = smp_processor_id(); | ||
1020 | |||
1021 | __perf_counter_task_sched_out(ctx); | ||
1022 | |||
1023 | spin_lock(&ctx->lock); | ||
1024 | |||
1025 | /* | ||
1026 | * Disable all the counters: | ||
1027 | */ | ||
1028 | perf_disable(); | ||
1029 | |||
1030 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1031 | if (counter->state > PERF_COUNTER_STATE_OFF) | ||
1032 | continue; | ||
1033 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
1034 | counter->tstamp_enabled = | ||
1035 | ctx->time - counter->total_time_enabled; | ||
1036 | counter->hw_event.disabled = 0; | ||
1037 | } | ||
1038 | perf_enable(); | ||
1039 | |||
1040 | spin_unlock(&ctx->lock); | ||
1041 | |||
1042 | perf_counter_task_sched_in(curr, cpu); | ||
1043 | |||
1044 | local_irq_restore(flags); | ||
1045 | |||
1046 | return 0; | ||
1047 | } | ||
1048 | |||
1049 | void perf_adjust_freq(struct perf_counter_context *ctx) | ||
1050 | { | ||
1051 | struct perf_counter *counter; | ||
1052 | u64 irq_period; | ||
1053 | u64 events, period; | ||
1054 | s64 delta; | ||
1055 | |||
1056 | spin_lock(&ctx->lock); | ||
1057 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1058 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
1059 | continue; | ||
1060 | |||
1061 | if (!counter->hw_event.freq || !counter->hw_event.irq_freq) | ||
1062 | continue; | ||
1063 | |||
1064 | events = HZ * counter->hw.interrupts * counter->hw.irq_period; | ||
1065 | period = div64_u64(events, counter->hw_event.irq_freq); | ||
1066 | |||
1067 | delta = (s64)(1 + period - counter->hw.irq_period); | ||
1068 | delta >>= 1; | ||
1069 | |||
1070 | irq_period = counter->hw.irq_period + delta; | ||
1071 | |||
1072 | if (!irq_period) | ||
1073 | irq_period = 1; | ||
1074 | |||
1075 | counter->hw.irq_period = irq_period; | ||
1076 | counter->hw.interrupts = 0; | ||
1077 | } | ||
1078 | spin_unlock(&ctx->lock); | ||
1079 | } | ||
1080 | |||
1081 | /* | ||
1082 | * Round-robin a context's counters: | ||
1083 | */ | ||
1084 | static void rotate_ctx(struct perf_counter_context *ctx) | ||
1085 | { | ||
1086 | struct perf_counter *counter; | ||
1087 | |||
1088 | if (!ctx->nr_counters) | ||
1089 | return; | ||
1090 | |||
1091 | spin_lock(&ctx->lock); | ||
1092 | /* | ||
1093 | * Rotate the first entry last (works just fine for group counters too): | ||
1094 | */ | ||
1095 | perf_disable(); | ||
1096 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1097 | list_move_tail(&counter->list_entry, &ctx->counter_list); | ||
1098 | break; | ||
1099 | } | ||
1100 | perf_enable(); | ||
1101 | |||
1102 | spin_unlock(&ctx->lock); | ||
1103 | } | ||
1104 | |||
1105 | void perf_counter_task_tick(struct task_struct *curr, int cpu) | ||
1106 | { | ||
1107 | struct perf_cpu_context *cpuctx; | ||
1108 | struct perf_counter_context *ctx; | ||
1109 | |||
1110 | if (!atomic_read(&nr_counters)) | ||
1111 | return; | ||
1112 | |||
1113 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1114 | ctx = &curr->perf_counter_ctx; | ||
1115 | |||
1116 | perf_adjust_freq(&cpuctx->ctx); | ||
1117 | perf_adjust_freq(ctx); | ||
1118 | |||
1119 | perf_counter_cpu_sched_out(cpuctx); | ||
1120 | __perf_counter_task_sched_out(ctx); | ||
1121 | |||
1122 | rotate_ctx(&cpuctx->ctx); | ||
1123 | rotate_ctx(ctx); | ||
1124 | |||
1125 | perf_counter_cpu_sched_in(cpuctx, cpu); | ||
1126 | perf_counter_task_sched_in(curr, cpu); | ||
1127 | } | ||
1128 | |||
1129 | /* | ||
1130 | * Cross CPU call to read the hardware counter | ||
1131 | */ | ||
1132 | static void __read(void *info) | ||
1133 | { | ||
1134 | struct perf_counter *counter = info; | ||
1135 | struct perf_counter_context *ctx = counter->ctx; | ||
1136 | unsigned long flags; | ||
1137 | |||
1138 | local_irq_save(flags); | ||
1139 | if (ctx->is_active) | ||
1140 | update_context_time(ctx); | ||
1141 | counter->pmu->read(counter); | ||
1142 | update_counter_times(counter); | ||
1143 | local_irq_restore(flags); | ||
1144 | } | ||
1145 | |||
1146 | static u64 perf_counter_read(struct perf_counter *counter) | ||
1147 | { | ||
1148 | /* | ||
1149 | * If counter is enabled and currently active on a CPU, update the | ||
1150 | * value in the counter structure: | ||
1151 | */ | ||
1152 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | ||
1153 | smp_call_function_single(counter->oncpu, | ||
1154 | __read, counter, 1); | ||
1155 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
1156 | update_counter_times(counter); | ||
1157 | } | ||
1158 | |||
1159 | return atomic64_read(&counter->count); | ||
1160 | } | ||
1161 | |||
1162 | static void put_context(struct perf_counter_context *ctx) | ||
1163 | { | ||
1164 | if (ctx->task) | ||
1165 | put_task_struct(ctx->task); | ||
1166 | } | ||
1167 | |||
1168 | static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | ||
1169 | { | ||
1170 | struct perf_cpu_context *cpuctx; | ||
1171 | struct perf_counter_context *ctx; | ||
1172 | struct task_struct *task; | ||
1173 | |||
1174 | /* | ||
1175 | * If cpu is not a wildcard then this is a percpu counter: | ||
1176 | */ | ||
1177 | if (cpu != -1) { | ||
1178 | /* Must be root to operate on a CPU counter: */ | ||
1179 | if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN)) | ||
1180 | return ERR_PTR(-EACCES); | ||
1181 | |||
1182 | if (cpu < 0 || cpu > num_possible_cpus()) | ||
1183 | return ERR_PTR(-EINVAL); | ||
1184 | |||
1185 | /* | ||
1186 | * We could be clever and allow to attach a counter to an | ||
1187 | * offline CPU and activate it when the CPU comes up, but | ||
1188 | * that's for later. | ||
1189 | */ | ||
1190 | if (!cpu_isset(cpu, cpu_online_map)) | ||
1191 | return ERR_PTR(-ENODEV); | ||
1192 | |||
1193 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1194 | ctx = &cpuctx->ctx; | ||
1195 | |||
1196 | return ctx; | ||
1197 | } | ||
1198 | |||
1199 | rcu_read_lock(); | ||
1200 | if (!pid) | ||
1201 | task = current; | ||
1202 | else | ||
1203 | task = find_task_by_vpid(pid); | ||
1204 | if (task) | ||
1205 | get_task_struct(task); | ||
1206 | rcu_read_unlock(); | ||
1207 | |||
1208 | if (!task) | ||
1209 | return ERR_PTR(-ESRCH); | ||
1210 | |||
1211 | ctx = &task->perf_counter_ctx; | ||
1212 | ctx->task = task; | ||
1213 | |||
1214 | /* Reuse ptrace permission checks for now. */ | ||
1215 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) { | ||
1216 | put_context(ctx); | ||
1217 | return ERR_PTR(-EACCES); | ||
1218 | } | ||
1219 | |||
1220 | return ctx; | ||
1221 | } | ||
1222 | |||
1223 | static void free_counter_rcu(struct rcu_head *head) | ||
1224 | { | ||
1225 | struct perf_counter *counter; | ||
1226 | |||
1227 | counter = container_of(head, struct perf_counter, rcu_head); | ||
1228 | kfree(counter); | ||
1229 | } | ||
1230 | |||
1231 | static void perf_pending_sync(struct perf_counter *counter); | ||
1232 | |||
1233 | static void free_counter(struct perf_counter *counter) | ||
1234 | { | ||
1235 | perf_pending_sync(counter); | ||
1236 | |||
1237 | atomic_dec(&nr_counters); | ||
1238 | if (counter->hw_event.mmap) | ||
1239 | atomic_dec(&nr_mmap_tracking); | ||
1240 | if (counter->hw_event.munmap) | ||
1241 | atomic_dec(&nr_munmap_tracking); | ||
1242 | if (counter->hw_event.comm) | ||
1243 | atomic_dec(&nr_comm_tracking); | ||
1244 | |||
1245 | if (counter->destroy) | ||
1246 | counter->destroy(counter); | ||
1247 | |||
1248 | call_rcu(&counter->rcu_head, free_counter_rcu); | ||
1249 | } | ||
1250 | |||
1251 | /* | ||
1252 | * Called when the last reference to the file is gone. | ||
1253 | */ | ||
1254 | static int perf_release(struct inode *inode, struct file *file) | ||
1255 | { | ||
1256 | struct perf_counter *counter = file->private_data; | ||
1257 | struct perf_counter_context *ctx = counter->ctx; | ||
1258 | |||
1259 | file->private_data = NULL; | ||
1260 | |||
1261 | mutex_lock(&ctx->mutex); | ||
1262 | mutex_lock(&counter->mutex); | ||
1263 | |||
1264 | perf_counter_remove_from_context(counter); | ||
1265 | |||
1266 | mutex_unlock(&counter->mutex); | ||
1267 | mutex_unlock(&ctx->mutex); | ||
1268 | |||
1269 | free_counter(counter); | ||
1270 | put_context(ctx); | ||
1271 | |||
1272 | return 0; | ||
1273 | } | ||
1274 | |||
1275 | /* | ||
1276 | * Read the performance counter - simple non blocking version for now | ||
1277 | */ | ||
1278 | static ssize_t | ||
1279 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | ||
1280 | { | ||
1281 | u64 values[3]; | ||
1282 | int n; | ||
1283 | |||
1284 | /* | ||
1285 | * Return end-of-file for a read on a counter that is in | ||
1286 | * error state (i.e. because it was pinned but it couldn't be | ||
1287 | * scheduled on to the CPU at some point). | ||
1288 | */ | ||
1289 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
1290 | return 0; | ||
1291 | |||
1292 | mutex_lock(&counter->mutex); | ||
1293 | values[0] = perf_counter_read(counter); | ||
1294 | n = 1; | ||
1295 | if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
1296 | values[n++] = counter->total_time_enabled + | ||
1297 | atomic64_read(&counter->child_total_time_enabled); | ||
1298 | if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
1299 | values[n++] = counter->total_time_running + | ||
1300 | atomic64_read(&counter->child_total_time_running); | ||
1301 | mutex_unlock(&counter->mutex); | ||
1302 | |||
1303 | if (count < n * sizeof(u64)) | ||
1304 | return -EINVAL; | ||
1305 | count = n * sizeof(u64); | ||
1306 | |||
1307 | if (copy_to_user(buf, values, count)) | ||
1308 | return -EFAULT; | ||
1309 | |||
1310 | return count; | ||
1311 | } | ||
1312 | |||
1313 | static ssize_t | ||
1314 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | ||
1315 | { | ||
1316 | struct perf_counter *counter = file->private_data; | ||
1317 | |||
1318 | return perf_read_hw(counter, buf, count); | ||
1319 | } | ||
1320 | |||
1321 | static unsigned int perf_poll(struct file *file, poll_table *wait) | ||
1322 | { | ||
1323 | struct perf_counter *counter = file->private_data; | ||
1324 | struct perf_mmap_data *data; | ||
1325 | unsigned int events = POLL_HUP; | ||
1326 | |||
1327 | rcu_read_lock(); | ||
1328 | data = rcu_dereference(counter->data); | ||
1329 | if (data) | ||
1330 | events = atomic_xchg(&data->poll, 0); | ||
1331 | rcu_read_unlock(); | ||
1332 | |||
1333 | poll_wait(file, &counter->waitq, wait); | ||
1334 | |||
1335 | return events; | ||
1336 | } | ||
1337 | |||
1338 | static void perf_counter_reset(struct perf_counter *counter) | ||
1339 | { | ||
1340 | (void)perf_counter_read(counter); | ||
1341 | atomic64_set(&counter->count, 0); | ||
1342 | perf_counter_update_userpage(counter); | ||
1343 | } | ||
1344 | |||
1345 | static void perf_counter_for_each_sibling(struct perf_counter *counter, | ||
1346 | void (*func)(struct perf_counter *)) | ||
1347 | { | ||
1348 | struct perf_counter_context *ctx = counter->ctx; | ||
1349 | struct perf_counter *sibling; | ||
1350 | |||
1351 | spin_lock_irq(&ctx->lock); | ||
1352 | counter = counter->group_leader; | ||
1353 | |||
1354 | func(counter); | ||
1355 | list_for_each_entry(sibling, &counter->sibling_list, list_entry) | ||
1356 | func(sibling); | ||
1357 | spin_unlock_irq(&ctx->lock); | ||
1358 | } | ||
1359 | |||
1360 | static void perf_counter_for_each_child(struct perf_counter *counter, | ||
1361 | void (*func)(struct perf_counter *)) | ||
1362 | { | ||
1363 | struct perf_counter *child; | ||
1364 | |||
1365 | mutex_lock(&counter->mutex); | ||
1366 | func(counter); | ||
1367 | list_for_each_entry(child, &counter->child_list, child_list) | ||
1368 | func(child); | ||
1369 | mutex_unlock(&counter->mutex); | ||
1370 | } | ||
1371 | |||
1372 | static void perf_counter_for_each(struct perf_counter *counter, | ||
1373 | void (*func)(struct perf_counter *)) | ||
1374 | { | ||
1375 | struct perf_counter *child; | ||
1376 | |||
1377 | mutex_lock(&counter->mutex); | ||
1378 | perf_counter_for_each_sibling(counter, func); | ||
1379 | list_for_each_entry(child, &counter->child_list, child_list) | ||
1380 | perf_counter_for_each_sibling(child, func); | ||
1381 | mutex_unlock(&counter->mutex); | ||
1382 | } | ||
1383 | |||
1384 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
1385 | { | ||
1386 | struct perf_counter *counter = file->private_data; | ||
1387 | void (*func)(struct perf_counter *); | ||
1388 | u32 flags = arg; | ||
1389 | |||
1390 | switch (cmd) { | ||
1391 | case PERF_COUNTER_IOC_ENABLE: | ||
1392 | func = perf_counter_enable; | ||
1393 | break; | ||
1394 | case PERF_COUNTER_IOC_DISABLE: | ||
1395 | func = perf_counter_disable; | ||
1396 | break; | ||
1397 | case PERF_COUNTER_IOC_RESET: | ||
1398 | func = perf_counter_reset; | ||
1399 | break; | ||
1400 | |||
1401 | case PERF_COUNTER_IOC_REFRESH: | ||
1402 | return perf_counter_refresh(counter, arg); | ||
1403 | default: | ||
1404 | return -ENOTTY; | ||
1405 | } | ||
1406 | |||
1407 | if (flags & PERF_IOC_FLAG_GROUP) | ||
1408 | perf_counter_for_each(counter, func); | ||
1409 | else | ||
1410 | perf_counter_for_each_child(counter, func); | ||
1411 | |||
1412 | return 0; | ||
1413 | } | ||
1414 | |||
1415 | /* | ||
1416 | * Callers need to ensure there can be no nesting of this function, otherwise | ||
1417 | * the seqlock logic goes bad. We can not serialize this because the arch | ||
1418 | * code calls this from NMI context. | ||
1419 | */ | ||
1420 | void perf_counter_update_userpage(struct perf_counter *counter) | ||
1421 | { | ||
1422 | struct perf_mmap_data *data; | ||
1423 | struct perf_counter_mmap_page *userpg; | ||
1424 | |||
1425 | rcu_read_lock(); | ||
1426 | data = rcu_dereference(counter->data); | ||
1427 | if (!data) | ||
1428 | goto unlock; | ||
1429 | |||
1430 | userpg = data->user_page; | ||
1431 | |||
1432 | /* | ||
1433 | * Disable preemption so as to not let the corresponding user-space | ||
1434 | * spin too long if we get preempted. | ||
1435 | */ | ||
1436 | preempt_disable(); | ||
1437 | ++userpg->lock; | ||
1438 | barrier(); | ||
1439 | userpg->index = counter->hw.idx; | ||
1440 | userpg->offset = atomic64_read(&counter->count); | ||
1441 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1442 | userpg->offset -= atomic64_read(&counter->hw.prev_count); | ||
1443 | |||
1444 | barrier(); | ||
1445 | ++userpg->lock; | ||
1446 | preempt_enable(); | ||
1447 | unlock: | ||
1448 | rcu_read_unlock(); | ||
1449 | } | ||
1450 | |||
1451 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1452 | { | ||
1453 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1454 | struct perf_mmap_data *data; | ||
1455 | int ret = VM_FAULT_SIGBUS; | ||
1456 | |||
1457 | rcu_read_lock(); | ||
1458 | data = rcu_dereference(counter->data); | ||
1459 | if (!data) | ||
1460 | goto unlock; | ||
1461 | |||
1462 | if (vmf->pgoff == 0) { | ||
1463 | vmf->page = virt_to_page(data->user_page); | ||
1464 | } else { | ||
1465 | int nr = vmf->pgoff - 1; | ||
1466 | |||
1467 | if ((unsigned)nr > data->nr_pages) | ||
1468 | goto unlock; | ||
1469 | |||
1470 | vmf->page = virt_to_page(data->data_pages[nr]); | ||
1471 | } | ||
1472 | get_page(vmf->page); | ||
1473 | ret = 0; | ||
1474 | unlock: | ||
1475 | rcu_read_unlock(); | ||
1476 | |||
1477 | return ret; | ||
1478 | } | ||
1479 | |||
1480 | static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) | ||
1481 | { | ||
1482 | struct perf_mmap_data *data; | ||
1483 | unsigned long size; | ||
1484 | int i; | ||
1485 | |||
1486 | WARN_ON(atomic_read(&counter->mmap_count)); | ||
1487 | |||
1488 | size = sizeof(struct perf_mmap_data); | ||
1489 | size += nr_pages * sizeof(void *); | ||
1490 | |||
1491 | data = kzalloc(size, GFP_KERNEL); | ||
1492 | if (!data) | ||
1493 | goto fail; | ||
1494 | |||
1495 | data->user_page = (void *)get_zeroed_page(GFP_KERNEL); | ||
1496 | if (!data->user_page) | ||
1497 | goto fail_user_page; | ||
1498 | |||
1499 | for (i = 0; i < nr_pages; i++) { | ||
1500 | data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | ||
1501 | if (!data->data_pages[i]) | ||
1502 | goto fail_data_pages; | ||
1503 | } | ||
1504 | |||
1505 | data->nr_pages = nr_pages; | ||
1506 | atomic_set(&data->lock, -1); | ||
1507 | |||
1508 | rcu_assign_pointer(counter->data, data); | ||
1509 | |||
1510 | return 0; | ||
1511 | |||
1512 | fail_data_pages: | ||
1513 | for (i--; i >= 0; i--) | ||
1514 | free_page((unsigned long)data->data_pages[i]); | ||
1515 | |||
1516 | free_page((unsigned long)data->user_page); | ||
1517 | |||
1518 | fail_user_page: | ||
1519 | kfree(data); | ||
1520 | |||
1521 | fail: | ||
1522 | return -ENOMEM; | ||
1523 | } | ||
1524 | |||
1525 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | ||
1526 | { | ||
1527 | struct perf_mmap_data *data = container_of(rcu_head, | ||
1528 | struct perf_mmap_data, rcu_head); | ||
1529 | int i; | ||
1530 | |||
1531 | free_page((unsigned long)data->user_page); | ||
1532 | for (i = 0; i < data->nr_pages; i++) | ||
1533 | free_page((unsigned long)data->data_pages[i]); | ||
1534 | kfree(data); | ||
1535 | } | ||
1536 | |||
1537 | static void perf_mmap_data_free(struct perf_counter *counter) | ||
1538 | { | ||
1539 | struct perf_mmap_data *data = counter->data; | ||
1540 | |||
1541 | WARN_ON(atomic_read(&counter->mmap_count)); | ||
1542 | |||
1543 | rcu_assign_pointer(counter->data, NULL); | ||
1544 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | ||
1545 | } | ||
1546 | |||
1547 | static void perf_mmap_open(struct vm_area_struct *vma) | ||
1548 | { | ||
1549 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1550 | |||
1551 | atomic_inc(&counter->mmap_count); | ||
1552 | } | ||
1553 | |||
1554 | static void perf_mmap_close(struct vm_area_struct *vma) | ||
1555 | { | ||
1556 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1557 | |||
1558 | if (atomic_dec_and_mutex_lock(&counter->mmap_count, | ||
1559 | &counter->mmap_mutex)) { | ||
1560 | struct user_struct *user = current_user(); | ||
1561 | |||
1562 | atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); | ||
1563 | vma->vm_mm->locked_vm -= counter->data->nr_locked; | ||
1564 | perf_mmap_data_free(counter); | ||
1565 | mutex_unlock(&counter->mmap_mutex); | ||
1566 | } | ||
1567 | } | ||
1568 | |||
1569 | static struct vm_operations_struct perf_mmap_vmops = { | ||
1570 | .open = perf_mmap_open, | ||
1571 | .close = perf_mmap_close, | ||
1572 | .fault = perf_mmap_fault, | ||
1573 | }; | ||
1574 | |||
1575 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) | ||
1576 | { | ||
1577 | struct perf_counter *counter = file->private_data; | ||
1578 | struct user_struct *user = current_user(); | ||
1579 | unsigned long vma_size; | ||
1580 | unsigned long nr_pages; | ||
1581 | unsigned long user_locked, user_lock_limit; | ||
1582 | unsigned long locked, lock_limit; | ||
1583 | long user_extra, extra; | ||
1584 | int ret = 0; | ||
1585 | |||
1586 | if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) | ||
1587 | return -EINVAL; | ||
1588 | |||
1589 | vma_size = vma->vm_end - vma->vm_start; | ||
1590 | nr_pages = (vma_size / PAGE_SIZE) - 1; | ||
1591 | |||
1592 | /* | ||
1593 | * If we have data pages ensure they're a power-of-two number, so we | ||
1594 | * can do bitmasks instead of modulo. | ||
1595 | */ | ||
1596 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) | ||
1597 | return -EINVAL; | ||
1598 | |||
1599 | if (vma_size != PAGE_SIZE * (1 + nr_pages)) | ||
1600 | return -EINVAL; | ||
1601 | |||
1602 | if (vma->vm_pgoff != 0) | ||
1603 | return -EINVAL; | ||
1604 | |||
1605 | mutex_lock(&counter->mmap_mutex); | ||
1606 | if (atomic_inc_not_zero(&counter->mmap_count)) { | ||
1607 | if (nr_pages != counter->data->nr_pages) | ||
1608 | ret = -EINVAL; | ||
1609 | goto unlock; | ||
1610 | } | ||
1611 | |||
1612 | user_extra = nr_pages + 1; | ||
1613 | user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); | ||
1614 | user_locked = atomic_long_read(&user->locked_vm) + user_extra; | ||
1615 | |||
1616 | extra = 0; | ||
1617 | if (user_locked > user_lock_limit) | ||
1618 | extra = user_locked - user_lock_limit; | ||
1619 | |||
1620 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | ||
1621 | lock_limit >>= PAGE_SHIFT; | ||
1622 | locked = vma->vm_mm->locked_vm + extra; | ||
1623 | |||
1624 | if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { | ||
1625 | ret = -EPERM; | ||
1626 | goto unlock; | ||
1627 | } | ||
1628 | |||
1629 | WARN_ON(counter->data); | ||
1630 | ret = perf_mmap_data_alloc(counter, nr_pages); | ||
1631 | if (ret) | ||
1632 | goto unlock; | ||
1633 | |||
1634 | atomic_set(&counter->mmap_count, 1); | ||
1635 | atomic_long_add(user_extra, &user->locked_vm); | ||
1636 | vma->vm_mm->locked_vm += extra; | ||
1637 | counter->data->nr_locked = extra; | ||
1638 | unlock: | ||
1639 | mutex_unlock(&counter->mmap_mutex); | ||
1640 | |||
1641 | vma->vm_flags &= ~VM_MAYWRITE; | ||
1642 | vma->vm_flags |= VM_RESERVED; | ||
1643 | vma->vm_ops = &perf_mmap_vmops; | ||
1644 | |||
1645 | return ret; | ||
1646 | } | ||
1647 | |||
1648 | static int perf_fasync(int fd, struct file *filp, int on) | ||
1649 | { | ||
1650 | struct perf_counter *counter = filp->private_data; | ||
1651 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
1652 | int retval; | ||
1653 | |||
1654 | mutex_lock(&inode->i_mutex); | ||
1655 | retval = fasync_helper(fd, filp, on, &counter->fasync); | ||
1656 | mutex_unlock(&inode->i_mutex); | ||
1657 | |||
1658 | if (retval < 0) | ||
1659 | return retval; | ||
1660 | |||
1661 | return 0; | ||
1662 | } | ||
1663 | |||
1664 | static const struct file_operations perf_fops = { | ||
1665 | .release = perf_release, | ||
1666 | .read = perf_read, | ||
1667 | .poll = perf_poll, | ||
1668 | .unlocked_ioctl = perf_ioctl, | ||
1669 | .compat_ioctl = perf_ioctl, | ||
1670 | .mmap = perf_mmap, | ||
1671 | .fasync = perf_fasync, | ||
1672 | }; | ||
1673 | |||
1674 | /* | ||
1675 | * Perf counter wakeup | ||
1676 | * | ||
1677 | * If there's data, ensure we set the poll() state and publish everything | ||
1678 | * to user-space before waking everybody up. | ||
1679 | */ | ||
1680 | |||
1681 | void perf_counter_wakeup(struct perf_counter *counter) | ||
1682 | { | ||
1683 | wake_up_all(&counter->waitq); | ||
1684 | |||
1685 | if (counter->pending_kill) { | ||
1686 | kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); | ||
1687 | counter->pending_kill = 0; | ||
1688 | } | ||
1689 | } | ||
1690 | |||
1691 | /* | ||
1692 | * Pending wakeups | ||
1693 | * | ||
1694 | * Handle the case where we need to wakeup up from NMI (or rq->lock) context. | ||
1695 | * | ||
1696 | * The NMI bit means we cannot possibly take locks. Therefore, maintain a | ||
1697 | * single linked list and use cmpxchg() to add entries lockless. | ||
1698 | */ | ||
1699 | |||
1700 | static void perf_pending_counter(struct perf_pending_entry *entry) | ||
1701 | { | ||
1702 | struct perf_counter *counter = container_of(entry, | ||
1703 | struct perf_counter, pending); | ||
1704 | |||
1705 | if (counter->pending_disable) { | ||
1706 | counter->pending_disable = 0; | ||
1707 | perf_counter_disable(counter); | ||
1708 | } | ||
1709 | |||
1710 | if (counter->pending_wakeup) { | ||
1711 | counter->pending_wakeup = 0; | ||
1712 | perf_counter_wakeup(counter); | ||
1713 | } | ||
1714 | } | ||
1715 | |||
1716 | #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) | ||
1717 | |||
1718 | static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { | ||
1719 | PENDING_TAIL, | ||
1720 | }; | ||
1721 | |||
1722 | static void perf_pending_queue(struct perf_pending_entry *entry, | ||
1723 | void (*func)(struct perf_pending_entry *)) | ||
1724 | { | ||
1725 | struct perf_pending_entry **head; | ||
1726 | |||
1727 | if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) | ||
1728 | return; | ||
1729 | |||
1730 | entry->func = func; | ||
1731 | |||
1732 | head = &get_cpu_var(perf_pending_head); | ||
1733 | |||
1734 | do { | ||
1735 | entry->next = *head; | ||
1736 | } while (cmpxchg(head, entry->next, entry) != entry->next); | ||
1737 | |||
1738 | set_perf_counter_pending(); | ||
1739 | |||
1740 | put_cpu_var(perf_pending_head); | ||
1741 | } | ||
1742 | |||
1743 | static int __perf_pending_run(void) | ||
1744 | { | ||
1745 | struct perf_pending_entry *list; | ||
1746 | int nr = 0; | ||
1747 | |||
1748 | list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); | ||
1749 | while (list != PENDING_TAIL) { | ||
1750 | void (*func)(struct perf_pending_entry *); | ||
1751 | struct perf_pending_entry *entry = list; | ||
1752 | |||
1753 | list = list->next; | ||
1754 | |||
1755 | func = entry->func; | ||
1756 | entry->next = NULL; | ||
1757 | /* | ||
1758 | * Ensure we observe the unqueue before we issue the wakeup, | ||
1759 | * so that we won't be waiting forever. | ||
1760 | * -- see perf_not_pending(). | ||
1761 | */ | ||
1762 | smp_wmb(); | ||
1763 | |||
1764 | func(entry); | ||
1765 | nr++; | ||
1766 | } | ||
1767 | |||
1768 | return nr; | ||
1769 | } | ||
1770 | |||
1771 | static inline int perf_not_pending(struct perf_counter *counter) | ||
1772 | { | ||
1773 | /* | ||
1774 | * If we flush on whatever cpu we run, there is a chance we don't | ||
1775 | * need to wait. | ||
1776 | */ | ||
1777 | get_cpu(); | ||
1778 | __perf_pending_run(); | ||
1779 | put_cpu(); | ||
1780 | |||
1781 | /* | ||
1782 | * Ensure we see the proper queue state before going to sleep | ||
1783 | * so that we do not miss the wakeup. -- see perf_pending_handle() | ||
1784 | */ | ||
1785 | smp_rmb(); | ||
1786 | return counter->pending.next == NULL; | ||
1787 | } | ||
1788 | |||
1789 | static void perf_pending_sync(struct perf_counter *counter) | ||
1790 | { | ||
1791 | wait_event(counter->waitq, perf_not_pending(counter)); | ||
1792 | } | ||
1793 | |||
1794 | void perf_counter_do_pending(void) | ||
1795 | { | ||
1796 | __perf_pending_run(); | ||
1797 | } | ||
1798 | |||
1799 | /* | ||
1800 | * Callchain support -- arch specific | ||
1801 | */ | ||
1802 | |||
1803 | __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1804 | { | ||
1805 | return NULL; | ||
1806 | } | ||
1807 | |||
1808 | /* | ||
1809 | * Output | ||
1810 | */ | ||
1811 | |||
1812 | struct perf_output_handle { | ||
1813 | struct perf_counter *counter; | ||
1814 | struct perf_mmap_data *data; | ||
1815 | unsigned int offset; | ||
1816 | unsigned int head; | ||
1817 | int nmi; | ||
1818 | int overflow; | ||
1819 | int locked; | ||
1820 | unsigned long flags; | ||
1821 | }; | ||
1822 | |||
1823 | static void perf_output_wakeup(struct perf_output_handle *handle) | ||
1824 | { | ||
1825 | atomic_set(&handle->data->poll, POLL_IN); | ||
1826 | |||
1827 | if (handle->nmi) { | ||
1828 | handle->counter->pending_wakeup = 1; | ||
1829 | perf_pending_queue(&handle->counter->pending, | ||
1830 | perf_pending_counter); | ||
1831 | } else | ||
1832 | perf_counter_wakeup(handle->counter); | ||
1833 | } | ||
1834 | |||
1835 | /* | ||
1836 | * Curious locking construct. | ||
1837 | * | ||
1838 | * We need to ensure a later event doesn't publish a head when a former | ||
1839 | * event isn't done writing. However since we need to deal with NMIs we | ||
1840 | * cannot fully serialize things. | ||
1841 | * | ||
1842 | * What we do is serialize between CPUs so we only have to deal with NMI | ||
1843 | * nesting on a single CPU. | ||
1844 | * | ||
1845 | * We only publish the head (and generate a wakeup) when the outer-most | ||
1846 | * event completes. | ||
1847 | */ | ||
1848 | static void perf_output_lock(struct perf_output_handle *handle) | ||
1849 | { | ||
1850 | struct perf_mmap_data *data = handle->data; | ||
1851 | int cpu; | ||
1852 | |||
1853 | handle->locked = 0; | ||
1854 | |||
1855 | local_irq_save(handle->flags); | ||
1856 | cpu = smp_processor_id(); | ||
1857 | |||
1858 | if (in_nmi() && atomic_read(&data->lock) == cpu) | ||
1859 | return; | ||
1860 | |||
1861 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
1862 | cpu_relax(); | ||
1863 | |||
1864 | handle->locked = 1; | ||
1865 | } | ||
1866 | |||
1867 | static void perf_output_unlock(struct perf_output_handle *handle) | ||
1868 | { | ||
1869 | struct perf_mmap_data *data = handle->data; | ||
1870 | int head, cpu; | ||
1871 | |||
1872 | data->done_head = data->head; | ||
1873 | |||
1874 | if (!handle->locked) | ||
1875 | goto out; | ||
1876 | |||
1877 | again: | ||
1878 | /* | ||
1879 | * The xchg implies a full barrier that ensures all writes are done | ||
1880 | * before we publish the new head, matched by a rmb() in userspace when | ||
1881 | * reading this position. | ||
1882 | */ | ||
1883 | while ((head = atomic_xchg(&data->done_head, 0))) | ||
1884 | data->user_page->data_head = head; | ||
1885 | |||
1886 | /* | ||
1887 | * NMI can happen here, which means we can miss a done_head update. | ||
1888 | */ | ||
1889 | |||
1890 | cpu = atomic_xchg(&data->lock, -1); | ||
1891 | WARN_ON_ONCE(cpu != smp_processor_id()); | ||
1892 | |||
1893 | /* | ||
1894 | * Therefore we have to validate we did not indeed do so. | ||
1895 | */ | ||
1896 | if (unlikely(atomic_read(&data->done_head))) { | ||
1897 | /* | ||
1898 | * Since we had it locked, we can lock it again. | ||
1899 | */ | ||
1900 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
1901 | cpu_relax(); | ||
1902 | |||
1903 | goto again; | ||
1904 | } | ||
1905 | |||
1906 | if (atomic_xchg(&data->wakeup, 0)) | ||
1907 | perf_output_wakeup(handle); | ||
1908 | out: | ||
1909 | local_irq_restore(handle->flags); | ||
1910 | } | ||
1911 | |||
1912 | static int perf_output_begin(struct perf_output_handle *handle, | ||
1913 | struct perf_counter *counter, unsigned int size, | ||
1914 | int nmi, int overflow) | ||
1915 | { | ||
1916 | struct perf_mmap_data *data; | ||
1917 | unsigned int offset, head; | ||
1918 | |||
1919 | /* | ||
1920 | * For inherited counters we send all the output towards the parent. | ||
1921 | */ | ||
1922 | if (counter->parent) | ||
1923 | counter = counter->parent; | ||
1924 | |||
1925 | rcu_read_lock(); | ||
1926 | data = rcu_dereference(counter->data); | ||
1927 | if (!data) | ||
1928 | goto out; | ||
1929 | |||
1930 | handle->data = data; | ||
1931 | handle->counter = counter; | ||
1932 | handle->nmi = nmi; | ||
1933 | handle->overflow = overflow; | ||
1934 | |||
1935 | if (!data->nr_pages) | ||
1936 | goto fail; | ||
1937 | |||
1938 | perf_output_lock(handle); | ||
1939 | |||
1940 | do { | ||
1941 | offset = head = atomic_read(&data->head); | ||
1942 | head += size; | ||
1943 | } while (atomic_cmpxchg(&data->head, offset, head) != offset); | ||
1944 | |||
1945 | handle->offset = offset; | ||
1946 | handle->head = head; | ||
1947 | |||
1948 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) | ||
1949 | atomic_set(&data->wakeup, 1); | ||
1950 | |||
1951 | return 0; | ||
1952 | |||
1953 | fail: | ||
1954 | perf_output_wakeup(handle); | ||
1955 | out: | ||
1956 | rcu_read_unlock(); | ||
1957 | |||
1958 | return -ENOSPC; | ||
1959 | } | ||
1960 | |||
1961 | static void perf_output_copy(struct perf_output_handle *handle, | ||
1962 | void *buf, unsigned int len) | ||
1963 | { | ||
1964 | unsigned int pages_mask; | ||
1965 | unsigned int offset; | ||
1966 | unsigned int size; | ||
1967 | void **pages; | ||
1968 | |||
1969 | offset = handle->offset; | ||
1970 | pages_mask = handle->data->nr_pages - 1; | ||
1971 | pages = handle->data->data_pages; | ||
1972 | |||
1973 | do { | ||
1974 | unsigned int page_offset; | ||
1975 | int nr; | ||
1976 | |||
1977 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
1978 | page_offset = offset & (PAGE_SIZE - 1); | ||
1979 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
1980 | |||
1981 | memcpy(pages[nr] + page_offset, buf, size); | ||
1982 | |||
1983 | len -= size; | ||
1984 | buf += size; | ||
1985 | offset += size; | ||
1986 | } while (len); | ||
1987 | |||
1988 | handle->offset = offset; | ||
1989 | |||
1990 | /* | ||
1991 | * Check we didn't copy past our reservation window, taking the | ||
1992 | * possible unsigned int wrap into account. | ||
1993 | */ | ||
1994 | WARN_ON_ONCE(((int)(handle->head - handle->offset)) < 0); | ||
1995 | } | ||
1996 | |||
1997 | #define perf_output_put(handle, x) \ | ||
1998 | perf_output_copy((handle), &(x), sizeof(x)) | ||
1999 | |||
2000 | static void perf_output_end(struct perf_output_handle *handle) | ||
2001 | { | ||
2002 | struct perf_counter *counter = handle->counter; | ||
2003 | struct perf_mmap_data *data = handle->data; | ||
2004 | |||
2005 | int wakeup_events = counter->hw_event.wakeup_events; | ||
2006 | |||
2007 | if (handle->overflow && wakeup_events) { | ||
2008 | int events = atomic_inc_return(&data->events); | ||
2009 | if (events >= wakeup_events) { | ||
2010 | atomic_sub(wakeup_events, &data->events); | ||
2011 | atomic_set(&data->wakeup, 1); | ||
2012 | } | ||
2013 | } | ||
2014 | |||
2015 | perf_output_unlock(handle); | ||
2016 | rcu_read_unlock(); | ||
2017 | } | ||
2018 | |||
2019 | static void perf_counter_output(struct perf_counter *counter, | ||
2020 | int nmi, struct pt_regs *regs, u64 addr) | ||
2021 | { | ||
2022 | int ret; | ||
2023 | u64 record_type = counter->hw_event.record_type; | ||
2024 | struct perf_output_handle handle; | ||
2025 | struct perf_event_header header; | ||
2026 | u64 ip; | ||
2027 | struct { | ||
2028 | u32 pid, tid; | ||
2029 | } tid_entry; | ||
2030 | struct { | ||
2031 | u64 event; | ||
2032 | u64 counter; | ||
2033 | } group_entry; | ||
2034 | struct perf_callchain_entry *callchain = NULL; | ||
2035 | int callchain_size = 0; | ||
2036 | u64 time; | ||
2037 | struct { | ||
2038 | u32 cpu, reserved; | ||
2039 | } cpu_entry; | ||
2040 | |||
2041 | header.type = 0; | ||
2042 | header.size = sizeof(header); | ||
2043 | |||
2044 | header.misc = PERF_EVENT_MISC_OVERFLOW; | ||
2045 | header.misc |= perf_misc_flags(regs); | ||
2046 | |||
2047 | if (record_type & PERF_RECORD_IP) { | ||
2048 | ip = perf_instruction_pointer(regs); | ||
2049 | header.type |= PERF_RECORD_IP; | ||
2050 | header.size += sizeof(ip); | ||
2051 | } | ||
2052 | |||
2053 | if (record_type & PERF_RECORD_TID) { | ||
2054 | /* namespace issues */ | ||
2055 | tid_entry.pid = current->group_leader->pid; | ||
2056 | tid_entry.tid = current->pid; | ||
2057 | |||
2058 | header.type |= PERF_RECORD_TID; | ||
2059 | header.size += sizeof(tid_entry); | ||
2060 | } | ||
2061 | |||
2062 | if (record_type & PERF_RECORD_TIME) { | ||
2063 | /* | ||
2064 | * Maybe do better on x86 and provide cpu_clock_nmi() | ||
2065 | */ | ||
2066 | time = sched_clock(); | ||
2067 | |||
2068 | header.type |= PERF_RECORD_TIME; | ||
2069 | header.size += sizeof(u64); | ||
2070 | } | ||
2071 | |||
2072 | if (record_type & PERF_RECORD_ADDR) { | ||
2073 | header.type |= PERF_RECORD_ADDR; | ||
2074 | header.size += sizeof(u64); | ||
2075 | } | ||
2076 | |||
2077 | if (record_type & PERF_RECORD_CONFIG) { | ||
2078 | header.type |= PERF_RECORD_CONFIG; | ||
2079 | header.size += sizeof(u64); | ||
2080 | } | ||
2081 | |||
2082 | if (record_type & PERF_RECORD_CPU) { | ||
2083 | header.type |= PERF_RECORD_CPU; | ||
2084 | header.size += sizeof(cpu_entry); | ||
2085 | |||
2086 | cpu_entry.cpu = raw_smp_processor_id(); | ||
2087 | } | ||
2088 | |||
2089 | if (record_type & PERF_RECORD_GROUP) { | ||
2090 | header.type |= PERF_RECORD_GROUP; | ||
2091 | header.size += sizeof(u64) + | ||
2092 | counter->nr_siblings * sizeof(group_entry); | ||
2093 | } | ||
2094 | |||
2095 | if (record_type & PERF_RECORD_CALLCHAIN) { | ||
2096 | callchain = perf_callchain(regs); | ||
2097 | |||
2098 | if (callchain) { | ||
2099 | callchain_size = (1 + callchain->nr) * sizeof(u64); | ||
2100 | |||
2101 | header.type |= PERF_RECORD_CALLCHAIN; | ||
2102 | header.size += callchain_size; | ||
2103 | } | ||
2104 | } | ||
2105 | |||
2106 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); | ||
2107 | if (ret) | ||
2108 | return; | ||
2109 | |||
2110 | perf_output_put(&handle, header); | ||
2111 | |||
2112 | if (record_type & PERF_RECORD_IP) | ||
2113 | perf_output_put(&handle, ip); | ||
2114 | |||
2115 | if (record_type & PERF_RECORD_TID) | ||
2116 | perf_output_put(&handle, tid_entry); | ||
2117 | |||
2118 | if (record_type & PERF_RECORD_TIME) | ||
2119 | perf_output_put(&handle, time); | ||
2120 | |||
2121 | if (record_type & PERF_RECORD_ADDR) | ||
2122 | perf_output_put(&handle, addr); | ||
2123 | |||
2124 | if (record_type & PERF_RECORD_CONFIG) | ||
2125 | perf_output_put(&handle, counter->hw_event.config); | ||
2126 | |||
2127 | if (record_type & PERF_RECORD_CPU) | ||
2128 | perf_output_put(&handle, cpu_entry); | ||
2129 | |||
2130 | /* | ||
2131 | * XXX PERF_RECORD_GROUP vs inherited counters seems difficult. | ||
2132 | */ | ||
2133 | if (record_type & PERF_RECORD_GROUP) { | ||
2134 | struct perf_counter *leader, *sub; | ||
2135 | u64 nr = counter->nr_siblings; | ||
2136 | |||
2137 | perf_output_put(&handle, nr); | ||
2138 | |||
2139 | leader = counter->group_leader; | ||
2140 | list_for_each_entry(sub, &leader->sibling_list, list_entry) { | ||
2141 | if (sub != counter) | ||
2142 | sub->pmu->read(sub); | ||
2143 | |||
2144 | group_entry.event = sub->hw_event.config; | ||
2145 | group_entry.counter = atomic64_read(&sub->count); | ||
2146 | |||
2147 | perf_output_put(&handle, group_entry); | ||
2148 | } | ||
2149 | } | ||
2150 | |||
2151 | if (callchain) | ||
2152 | perf_output_copy(&handle, callchain, callchain_size); | ||
2153 | |||
2154 | perf_output_end(&handle); | ||
2155 | } | ||
2156 | |||
2157 | /* | ||
2158 | * comm tracking | ||
2159 | */ | ||
2160 | |||
2161 | struct perf_comm_event { | ||
2162 | struct task_struct *task; | ||
2163 | char *comm; | ||
2164 | int comm_size; | ||
2165 | |||
2166 | struct { | ||
2167 | struct perf_event_header header; | ||
2168 | |||
2169 | u32 pid; | ||
2170 | u32 tid; | ||
2171 | } event; | ||
2172 | }; | ||
2173 | |||
2174 | static void perf_counter_comm_output(struct perf_counter *counter, | ||
2175 | struct perf_comm_event *comm_event) | ||
2176 | { | ||
2177 | struct perf_output_handle handle; | ||
2178 | int size = comm_event->event.header.size; | ||
2179 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
2180 | |||
2181 | if (ret) | ||
2182 | return; | ||
2183 | |||
2184 | perf_output_put(&handle, comm_event->event); | ||
2185 | perf_output_copy(&handle, comm_event->comm, | ||
2186 | comm_event->comm_size); | ||
2187 | perf_output_end(&handle); | ||
2188 | } | ||
2189 | |||
2190 | static int perf_counter_comm_match(struct perf_counter *counter, | ||
2191 | struct perf_comm_event *comm_event) | ||
2192 | { | ||
2193 | if (counter->hw_event.comm && | ||
2194 | comm_event->event.header.type == PERF_EVENT_COMM) | ||
2195 | return 1; | ||
2196 | |||
2197 | return 0; | ||
2198 | } | ||
2199 | |||
2200 | static void perf_counter_comm_ctx(struct perf_counter_context *ctx, | ||
2201 | struct perf_comm_event *comm_event) | ||
2202 | { | ||
2203 | struct perf_counter *counter; | ||
2204 | |||
2205 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2206 | return; | ||
2207 | |||
2208 | rcu_read_lock(); | ||
2209 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2210 | if (perf_counter_comm_match(counter, comm_event)) | ||
2211 | perf_counter_comm_output(counter, comm_event); | ||
2212 | } | ||
2213 | rcu_read_unlock(); | ||
2214 | } | ||
2215 | |||
2216 | static void perf_counter_comm_event(struct perf_comm_event *comm_event) | ||
2217 | { | ||
2218 | struct perf_cpu_context *cpuctx; | ||
2219 | unsigned int size; | ||
2220 | char *comm = comm_event->task->comm; | ||
2221 | |||
2222 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | ||
2223 | |||
2224 | comm_event->comm = comm; | ||
2225 | comm_event->comm_size = size; | ||
2226 | |||
2227 | comm_event->event.header.size = sizeof(comm_event->event) + size; | ||
2228 | |||
2229 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2230 | perf_counter_comm_ctx(&cpuctx->ctx, comm_event); | ||
2231 | put_cpu_var(perf_cpu_context); | ||
2232 | |||
2233 | perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); | ||
2234 | } | ||
2235 | |||
2236 | void perf_counter_comm(struct task_struct *task) | ||
2237 | { | ||
2238 | struct perf_comm_event comm_event; | ||
2239 | |||
2240 | if (!atomic_read(&nr_comm_tracking)) | ||
2241 | return; | ||
2242 | |||
2243 | comm_event = (struct perf_comm_event){ | ||
2244 | .task = task, | ||
2245 | .event = { | ||
2246 | .header = { .type = PERF_EVENT_COMM, }, | ||
2247 | .pid = task->group_leader->pid, | ||
2248 | .tid = task->pid, | ||
2249 | }, | ||
2250 | }; | ||
2251 | |||
2252 | perf_counter_comm_event(&comm_event); | ||
2253 | } | ||
2254 | |||
2255 | /* | ||
2256 | * mmap tracking | ||
2257 | */ | ||
2258 | |||
2259 | struct perf_mmap_event { | ||
2260 | struct file *file; | ||
2261 | char *file_name; | ||
2262 | int file_size; | ||
2263 | |||
2264 | struct { | ||
2265 | struct perf_event_header header; | ||
2266 | |||
2267 | u32 pid; | ||
2268 | u32 tid; | ||
2269 | u64 start; | ||
2270 | u64 len; | ||
2271 | u64 pgoff; | ||
2272 | } event; | ||
2273 | }; | ||
2274 | |||
2275 | static void perf_counter_mmap_output(struct perf_counter *counter, | ||
2276 | struct perf_mmap_event *mmap_event) | ||
2277 | { | ||
2278 | struct perf_output_handle handle; | ||
2279 | int size = mmap_event->event.header.size; | ||
2280 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
2281 | |||
2282 | if (ret) | ||
2283 | return; | ||
2284 | |||
2285 | perf_output_put(&handle, mmap_event->event); | ||
2286 | perf_output_copy(&handle, mmap_event->file_name, | ||
2287 | mmap_event->file_size); | ||
2288 | perf_output_end(&handle); | ||
2289 | } | ||
2290 | |||
2291 | static int perf_counter_mmap_match(struct perf_counter *counter, | ||
2292 | struct perf_mmap_event *mmap_event) | ||
2293 | { | ||
2294 | if (counter->hw_event.mmap && | ||
2295 | mmap_event->event.header.type == PERF_EVENT_MMAP) | ||
2296 | return 1; | ||
2297 | |||
2298 | if (counter->hw_event.munmap && | ||
2299 | mmap_event->event.header.type == PERF_EVENT_MUNMAP) | ||
2300 | return 1; | ||
2301 | |||
2302 | return 0; | ||
2303 | } | ||
2304 | |||
2305 | static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, | ||
2306 | struct perf_mmap_event *mmap_event) | ||
2307 | { | ||
2308 | struct perf_counter *counter; | ||
2309 | |||
2310 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2311 | return; | ||
2312 | |||
2313 | rcu_read_lock(); | ||
2314 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2315 | if (perf_counter_mmap_match(counter, mmap_event)) | ||
2316 | perf_counter_mmap_output(counter, mmap_event); | ||
2317 | } | ||
2318 | rcu_read_unlock(); | ||
2319 | } | ||
2320 | |||
2321 | static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) | ||
2322 | { | ||
2323 | struct perf_cpu_context *cpuctx; | ||
2324 | struct file *file = mmap_event->file; | ||
2325 | unsigned int size; | ||
2326 | char tmp[16]; | ||
2327 | char *buf = NULL; | ||
2328 | char *name; | ||
2329 | |||
2330 | if (file) { | ||
2331 | buf = kzalloc(PATH_MAX, GFP_KERNEL); | ||
2332 | if (!buf) { | ||
2333 | name = strncpy(tmp, "//enomem", sizeof(tmp)); | ||
2334 | goto got_name; | ||
2335 | } | ||
2336 | name = d_path(&file->f_path, buf, PATH_MAX); | ||
2337 | if (IS_ERR(name)) { | ||
2338 | name = strncpy(tmp, "//toolong", sizeof(tmp)); | ||
2339 | goto got_name; | ||
2340 | } | ||
2341 | } else { | ||
2342 | name = strncpy(tmp, "//anon", sizeof(tmp)); | ||
2343 | goto got_name; | ||
2344 | } | ||
2345 | |||
2346 | got_name: | ||
2347 | size = ALIGN(strlen(name)+1, sizeof(u64)); | ||
2348 | |||
2349 | mmap_event->file_name = name; | ||
2350 | mmap_event->file_size = size; | ||
2351 | |||
2352 | mmap_event->event.header.size = sizeof(mmap_event->event) + size; | ||
2353 | |||
2354 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2355 | perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); | ||
2356 | put_cpu_var(perf_cpu_context); | ||
2357 | |||
2358 | perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); | ||
2359 | |||
2360 | kfree(buf); | ||
2361 | } | ||
2362 | |||
2363 | void perf_counter_mmap(unsigned long addr, unsigned long len, | ||
2364 | unsigned long pgoff, struct file *file) | ||
2365 | { | ||
2366 | struct perf_mmap_event mmap_event; | ||
2367 | |||
2368 | if (!atomic_read(&nr_mmap_tracking)) | ||
2369 | return; | ||
2370 | |||
2371 | mmap_event = (struct perf_mmap_event){ | ||
2372 | .file = file, | ||
2373 | .event = { | ||
2374 | .header = { .type = PERF_EVENT_MMAP, }, | ||
2375 | .pid = current->group_leader->pid, | ||
2376 | .tid = current->pid, | ||
2377 | .start = addr, | ||
2378 | .len = len, | ||
2379 | .pgoff = pgoff, | ||
2380 | }, | ||
2381 | }; | ||
2382 | |||
2383 | perf_counter_mmap_event(&mmap_event); | ||
2384 | } | ||
2385 | |||
2386 | void perf_counter_munmap(unsigned long addr, unsigned long len, | ||
2387 | unsigned long pgoff, struct file *file) | ||
2388 | { | ||
2389 | struct perf_mmap_event mmap_event; | ||
2390 | |||
2391 | if (!atomic_read(&nr_munmap_tracking)) | ||
2392 | return; | ||
2393 | |||
2394 | mmap_event = (struct perf_mmap_event){ | ||
2395 | .file = file, | ||
2396 | .event = { | ||
2397 | .header = { .type = PERF_EVENT_MUNMAP, }, | ||
2398 | .pid = current->group_leader->pid, | ||
2399 | .tid = current->pid, | ||
2400 | .start = addr, | ||
2401 | .len = len, | ||
2402 | .pgoff = pgoff, | ||
2403 | }, | ||
2404 | }; | ||
2405 | |||
2406 | perf_counter_mmap_event(&mmap_event); | ||
2407 | } | ||
2408 | |||
2409 | /* | ||
2410 | * Generic counter overflow handling. | ||
2411 | */ | ||
2412 | |||
2413 | int perf_counter_overflow(struct perf_counter *counter, | ||
2414 | int nmi, struct pt_regs *regs, u64 addr) | ||
2415 | { | ||
2416 | int events = atomic_read(&counter->event_limit); | ||
2417 | int ret = 0; | ||
2418 | |||
2419 | counter->hw.interrupts++; | ||
2420 | |||
2421 | /* | ||
2422 | * XXX event_limit might not quite work as expected on inherited | ||
2423 | * counters | ||
2424 | */ | ||
2425 | |||
2426 | counter->pending_kill = POLL_IN; | ||
2427 | if (events && atomic_dec_and_test(&counter->event_limit)) { | ||
2428 | ret = 1; | ||
2429 | counter->pending_kill = POLL_HUP; | ||
2430 | if (nmi) { | ||
2431 | counter->pending_disable = 1; | ||
2432 | perf_pending_queue(&counter->pending, | ||
2433 | perf_pending_counter); | ||
2434 | } else | ||
2435 | perf_counter_disable(counter); | ||
2436 | } | ||
2437 | |||
2438 | perf_counter_output(counter, nmi, regs, addr); | ||
2439 | return ret; | ||
2440 | } | ||
2441 | |||
2442 | /* | ||
2443 | * Generic software counter infrastructure | ||
2444 | */ | ||
2445 | |||
2446 | static void perf_swcounter_update(struct perf_counter *counter) | ||
2447 | { | ||
2448 | struct hw_perf_counter *hwc = &counter->hw; | ||
2449 | u64 prev, now; | ||
2450 | s64 delta; | ||
2451 | |||
2452 | again: | ||
2453 | prev = atomic64_read(&hwc->prev_count); | ||
2454 | now = atomic64_read(&hwc->count); | ||
2455 | if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) | ||
2456 | goto again; | ||
2457 | |||
2458 | delta = now - prev; | ||
2459 | |||
2460 | atomic64_add(delta, &counter->count); | ||
2461 | atomic64_sub(delta, &hwc->period_left); | ||
2462 | } | ||
2463 | |||
2464 | static void perf_swcounter_set_period(struct perf_counter *counter) | ||
2465 | { | ||
2466 | struct hw_perf_counter *hwc = &counter->hw; | ||
2467 | s64 left = atomic64_read(&hwc->period_left); | ||
2468 | s64 period = hwc->irq_period; | ||
2469 | |||
2470 | if (unlikely(left <= -period)) { | ||
2471 | left = period; | ||
2472 | atomic64_set(&hwc->period_left, left); | ||
2473 | } | ||
2474 | |||
2475 | if (unlikely(left <= 0)) { | ||
2476 | left += period; | ||
2477 | atomic64_add(period, &hwc->period_left); | ||
2478 | } | ||
2479 | |||
2480 | atomic64_set(&hwc->prev_count, -left); | ||
2481 | atomic64_set(&hwc->count, -left); | ||
2482 | } | ||
2483 | |||
2484 | static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | ||
2485 | { | ||
2486 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
2487 | struct perf_counter *counter; | ||
2488 | struct pt_regs *regs; | ||
2489 | u64 period; | ||
2490 | |||
2491 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); | ||
2492 | counter->pmu->read(counter); | ||
2493 | |||
2494 | regs = get_irq_regs(); | ||
2495 | /* | ||
2496 | * In case we exclude kernel IPs or are somehow not in interrupt | ||
2497 | * context, provide the next best thing, the user IP. | ||
2498 | */ | ||
2499 | if ((counter->hw_event.exclude_kernel || !regs) && | ||
2500 | !counter->hw_event.exclude_user) | ||
2501 | regs = task_pt_regs(current); | ||
2502 | |||
2503 | if (regs) { | ||
2504 | if (perf_counter_overflow(counter, 0, regs, 0)) | ||
2505 | ret = HRTIMER_NORESTART; | ||
2506 | } | ||
2507 | |||
2508 | period = max_t(u64, 10000, counter->hw.irq_period); | ||
2509 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
2510 | |||
2511 | return ret; | ||
2512 | } | ||
2513 | |||
2514 | static void perf_swcounter_overflow(struct perf_counter *counter, | ||
2515 | int nmi, struct pt_regs *regs, u64 addr) | ||
2516 | { | ||
2517 | perf_swcounter_update(counter); | ||
2518 | perf_swcounter_set_period(counter); | ||
2519 | if (perf_counter_overflow(counter, nmi, regs, addr)) | ||
2520 | /* soft-disable the counter */ | ||
2521 | ; | ||
2522 | |||
2523 | } | ||
2524 | |||
2525 | static int perf_swcounter_match(struct perf_counter *counter, | ||
2526 | enum perf_event_types type, | ||
2527 | u32 event, struct pt_regs *regs) | ||
2528 | { | ||
2529 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
2530 | return 0; | ||
2531 | |||
2532 | if (perf_event_raw(&counter->hw_event)) | ||
2533 | return 0; | ||
2534 | |||
2535 | if (perf_event_type(&counter->hw_event) != type) | ||
2536 | return 0; | ||
2537 | |||
2538 | if (perf_event_id(&counter->hw_event) != event) | ||
2539 | return 0; | ||
2540 | |||
2541 | if (counter->hw_event.exclude_user && user_mode(regs)) | ||
2542 | return 0; | ||
2543 | |||
2544 | if (counter->hw_event.exclude_kernel && !user_mode(regs)) | ||
2545 | return 0; | ||
2546 | |||
2547 | return 1; | ||
2548 | } | ||
2549 | |||
2550 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, | ||
2551 | int nmi, struct pt_regs *regs, u64 addr) | ||
2552 | { | ||
2553 | int neg = atomic64_add_negative(nr, &counter->hw.count); | ||
2554 | if (counter->hw.irq_period && !neg) | ||
2555 | perf_swcounter_overflow(counter, nmi, regs, addr); | ||
2556 | } | ||
2557 | |||
2558 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | ||
2559 | enum perf_event_types type, u32 event, | ||
2560 | u64 nr, int nmi, struct pt_regs *regs, | ||
2561 | u64 addr) | ||
2562 | { | ||
2563 | struct perf_counter *counter; | ||
2564 | |||
2565 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2566 | return; | ||
2567 | |||
2568 | rcu_read_lock(); | ||
2569 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2570 | if (perf_swcounter_match(counter, type, event, regs)) | ||
2571 | perf_swcounter_add(counter, nr, nmi, regs, addr); | ||
2572 | } | ||
2573 | rcu_read_unlock(); | ||
2574 | } | ||
2575 | |||
2576 | static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) | ||
2577 | { | ||
2578 | if (in_nmi()) | ||
2579 | return &cpuctx->recursion[3]; | ||
2580 | |||
2581 | if (in_irq()) | ||
2582 | return &cpuctx->recursion[2]; | ||
2583 | |||
2584 | if (in_softirq()) | ||
2585 | return &cpuctx->recursion[1]; | ||
2586 | |||
2587 | return &cpuctx->recursion[0]; | ||
2588 | } | ||
2589 | |||
2590 | static void __perf_swcounter_event(enum perf_event_types type, u32 event, | ||
2591 | u64 nr, int nmi, struct pt_regs *regs, | ||
2592 | u64 addr) | ||
2593 | { | ||
2594 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
2595 | int *recursion = perf_swcounter_recursion_context(cpuctx); | ||
2596 | |||
2597 | if (*recursion) | ||
2598 | goto out; | ||
2599 | |||
2600 | (*recursion)++; | ||
2601 | barrier(); | ||
2602 | |||
2603 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, | ||
2604 | nr, nmi, regs, addr); | ||
2605 | if (cpuctx->task_ctx) { | ||
2606 | perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, | ||
2607 | nr, nmi, regs, addr); | ||
2608 | } | ||
2609 | |||
2610 | barrier(); | ||
2611 | (*recursion)--; | ||
2612 | |||
2613 | out: | ||
2614 | put_cpu_var(perf_cpu_context); | ||
2615 | } | ||
2616 | |||
2617 | void | ||
2618 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | ||
2619 | { | ||
2620 | __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); | ||
2621 | } | ||
2622 | |||
2623 | static void perf_swcounter_read(struct perf_counter *counter) | ||
2624 | { | ||
2625 | perf_swcounter_update(counter); | ||
2626 | } | ||
2627 | |||
2628 | static int perf_swcounter_enable(struct perf_counter *counter) | ||
2629 | { | ||
2630 | perf_swcounter_set_period(counter); | ||
2631 | return 0; | ||
2632 | } | ||
2633 | |||
2634 | static void perf_swcounter_disable(struct perf_counter *counter) | ||
2635 | { | ||
2636 | perf_swcounter_update(counter); | ||
2637 | } | ||
2638 | |||
2639 | static const struct pmu perf_ops_generic = { | ||
2640 | .enable = perf_swcounter_enable, | ||
2641 | .disable = perf_swcounter_disable, | ||
2642 | .read = perf_swcounter_read, | ||
2643 | }; | ||
2644 | |||
2645 | /* | ||
2646 | * Software counter: cpu wall time clock | ||
2647 | */ | ||
2648 | |||
2649 | static void cpu_clock_perf_counter_update(struct perf_counter *counter) | ||
2650 | { | ||
2651 | int cpu = raw_smp_processor_id(); | ||
2652 | s64 prev; | ||
2653 | u64 now; | ||
2654 | |||
2655 | now = cpu_clock(cpu); | ||
2656 | prev = atomic64_read(&counter->hw.prev_count); | ||
2657 | atomic64_set(&counter->hw.prev_count, now); | ||
2658 | atomic64_add(now - prev, &counter->count); | ||
2659 | } | ||
2660 | |||
2661 | static int cpu_clock_perf_counter_enable(struct perf_counter *counter) | ||
2662 | { | ||
2663 | struct hw_perf_counter *hwc = &counter->hw; | ||
2664 | int cpu = raw_smp_processor_id(); | ||
2665 | |||
2666 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
2667 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2668 | hwc->hrtimer.function = perf_swcounter_hrtimer; | ||
2669 | if (hwc->irq_period) { | ||
2670 | u64 period = max_t(u64, 10000, hwc->irq_period); | ||
2671 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
2672 | ns_to_ktime(period), 0, | ||
2673 | HRTIMER_MODE_REL, 0); | ||
2674 | } | ||
2675 | |||
2676 | return 0; | ||
2677 | } | ||
2678 | |||
2679 | static void cpu_clock_perf_counter_disable(struct perf_counter *counter) | ||
2680 | { | ||
2681 | hrtimer_cancel(&counter->hw.hrtimer); | ||
2682 | cpu_clock_perf_counter_update(counter); | ||
2683 | } | ||
2684 | |||
2685 | static void cpu_clock_perf_counter_read(struct perf_counter *counter) | ||
2686 | { | ||
2687 | cpu_clock_perf_counter_update(counter); | ||
2688 | } | ||
2689 | |||
2690 | static const struct pmu perf_ops_cpu_clock = { | ||
2691 | .enable = cpu_clock_perf_counter_enable, | ||
2692 | .disable = cpu_clock_perf_counter_disable, | ||
2693 | .read = cpu_clock_perf_counter_read, | ||
2694 | }; | ||
2695 | |||
2696 | /* | ||
2697 | * Software counter: task time clock | ||
2698 | */ | ||
2699 | |||
2700 | static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) | ||
2701 | { | ||
2702 | u64 prev; | ||
2703 | s64 delta; | ||
2704 | |||
2705 | prev = atomic64_xchg(&counter->hw.prev_count, now); | ||
2706 | delta = now - prev; | ||
2707 | atomic64_add(delta, &counter->count); | ||
2708 | } | ||
2709 | |||
2710 | static int task_clock_perf_counter_enable(struct perf_counter *counter) | ||
2711 | { | ||
2712 | struct hw_perf_counter *hwc = &counter->hw; | ||
2713 | u64 now; | ||
2714 | |||
2715 | now = counter->ctx->time; | ||
2716 | |||
2717 | atomic64_set(&hwc->prev_count, now); | ||
2718 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2719 | hwc->hrtimer.function = perf_swcounter_hrtimer; | ||
2720 | if (hwc->irq_period) { | ||
2721 | u64 period = max_t(u64, 10000, hwc->irq_period); | ||
2722 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
2723 | ns_to_ktime(period), 0, | ||
2724 | HRTIMER_MODE_REL, 0); | ||
2725 | } | ||
2726 | |||
2727 | return 0; | ||
2728 | } | ||
2729 | |||
2730 | static void task_clock_perf_counter_disable(struct perf_counter *counter) | ||
2731 | { | ||
2732 | hrtimer_cancel(&counter->hw.hrtimer); | ||
2733 | task_clock_perf_counter_update(counter, counter->ctx->time); | ||
2734 | |||
2735 | } | ||
2736 | |||
2737 | static void task_clock_perf_counter_read(struct perf_counter *counter) | ||
2738 | { | ||
2739 | u64 time; | ||
2740 | |||
2741 | if (!in_nmi()) { | ||
2742 | update_context_time(counter->ctx); | ||
2743 | time = counter->ctx->time; | ||
2744 | } else { | ||
2745 | u64 now = perf_clock(); | ||
2746 | u64 delta = now - counter->ctx->timestamp; | ||
2747 | time = counter->ctx->time + delta; | ||
2748 | } | ||
2749 | |||
2750 | task_clock_perf_counter_update(counter, time); | ||
2751 | } | ||
2752 | |||
2753 | static const struct pmu perf_ops_task_clock = { | ||
2754 | .enable = task_clock_perf_counter_enable, | ||
2755 | .disable = task_clock_perf_counter_disable, | ||
2756 | .read = task_clock_perf_counter_read, | ||
2757 | }; | ||
2758 | |||
2759 | /* | ||
2760 | * Software counter: cpu migrations | ||
2761 | */ | ||
2762 | |||
2763 | static inline u64 get_cpu_migrations(struct perf_counter *counter) | ||
2764 | { | ||
2765 | struct task_struct *curr = counter->ctx->task; | ||
2766 | |||
2767 | if (curr) | ||
2768 | return curr->se.nr_migrations; | ||
2769 | return cpu_nr_migrations(smp_processor_id()); | ||
2770 | } | ||
2771 | |||
2772 | static void cpu_migrations_perf_counter_update(struct perf_counter *counter) | ||
2773 | { | ||
2774 | u64 prev, now; | ||
2775 | s64 delta; | ||
2776 | |||
2777 | prev = atomic64_read(&counter->hw.prev_count); | ||
2778 | now = get_cpu_migrations(counter); | ||
2779 | |||
2780 | atomic64_set(&counter->hw.prev_count, now); | ||
2781 | |||
2782 | delta = now - prev; | ||
2783 | |||
2784 | atomic64_add(delta, &counter->count); | ||
2785 | } | ||
2786 | |||
2787 | static void cpu_migrations_perf_counter_read(struct perf_counter *counter) | ||
2788 | { | ||
2789 | cpu_migrations_perf_counter_update(counter); | ||
2790 | } | ||
2791 | |||
2792 | static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) | ||
2793 | { | ||
2794 | if (counter->prev_state <= PERF_COUNTER_STATE_OFF) | ||
2795 | atomic64_set(&counter->hw.prev_count, | ||
2796 | get_cpu_migrations(counter)); | ||
2797 | return 0; | ||
2798 | } | ||
2799 | |||
2800 | static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) | ||
2801 | { | ||
2802 | cpu_migrations_perf_counter_update(counter); | ||
2803 | } | ||
2804 | |||
2805 | static const struct pmu perf_ops_cpu_migrations = { | ||
2806 | .enable = cpu_migrations_perf_counter_enable, | ||
2807 | .disable = cpu_migrations_perf_counter_disable, | ||
2808 | .read = cpu_migrations_perf_counter_read, | ||
2809 | }; | ||
2810 | |||
2811 | #ifdef CONFIG_EVENT_PROFILE | ||
2812 | void perf_tpcounter_event(int event_id) | ||
2813 | { | ||
2814 | struct pt_regs *regs = get_irq_regs(); | ||
2815 | |||
2816 | if (!regs) | ||
2817 | regs = task_pt_regs(current); | ||
2818 | |||
2819 | __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); | ||
2820 | } | ||
2821 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); | ||
2822 | |||
2823 | extern int ftrace_profile_enable(int); | ||
2824 | extern void ftrace_profile_disable(int); | ||
2825 | |||
2826 | static void tp_perf_counter_destroy(struct perf_counter *counter) | ||
2827 | { | ||
2828 | ftrace_profile_disable(perf_event_id(&counter->hw_event)); | ||
2829 | } | ||
2830 | |||
2831 | static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | ||
2832 | { | ||
2833 | int event_id = perf_event_id(&counter->hw_event); | ||
2834 | int ret; | ||
2835 | |||
2836 | ret = ftrace_profile_enable(event_id); | ||
2837 | if (ret) | ||
2838 | return NULL; | ||
2839 | |||
2840 | counter->destroy = tp_perf_counter_destroy; | ||
2841 | counter->hw.irq_period = counter->hw_event.irq_period; | ||
2842 | |||
2843 | return &perf_ops_generic; | ||
2844 | } | ||
2845 | #else | ||
2846 | static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | ||
2847 | { | ||
2848 | return NULL; | ||
2849 | } | ||
2850 | #endif | ||
2851 | |||
2852 | static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | ||
2853 | { | ||
2854 | const struct pmu *pmu = NULL; | ||
2855 | |||
2856 | /* | ||
2857 | * Software counters (currently) can't in general distinguish | ||
2858 | * between user, kernel and hypervisor events. | ||
2859 | * However, context switches and cpu migrations are considered | ||
2860 | * to be kernel events, and page faults are never hypervisor | ||
2861 | * events. | ||
2862 | */ | ||
2863 | switch (perf_event_id(&counter->hw_event)) { | ||
2864 | case PERF_COUNT_CPU_CLOCK: | ||
2865 | pmu = &perf_ops_cpu_clock; | ||
2866 | |||
2867 | break; | ||
2868 | case PERF_COUNT_TASK_CLOCK: | ||
2869 | /* | ||
2870 | * If the user instantiates this as a per-cpu counter, | ||
2871 | * use the cpu_clock counter instead. | ||
2872 | */ | ||
2873 | if (counter->ctx->task) | ||
2874 | pmu = &perf_ops_task_clock; | ||
2875 | else | ||
2876 | pmu = &perf_ops_cpu_clock; | ||
2877 | |||
2878 | break; | ||
2879 | case PERF_COUNT_PAGE_FAULTS: | ||
2880 | case PERF_COUNT_PAGE_FAULTS_MIN: | ||
2881 | case PERF_COUNT_PAGE_FAULTS_MAJ: | ||
2882 | case PERF_COUNT_CONTEXT_SWITCHES: | ||
2883 | pmu = &perf_ops_generic; | ||
2884 | break; | ||
2885 | case PERF_COUNT_CPU_MIGRATIONS: | ||
2886 | if (!counter->hw_event.exclude_kernel) | ||
2887 | pmu = &perf_ops_cpu_migrations; | ||
2888 | break; | ||
2889 | } | ||
2890 | |||
2891 | return pmu; | ||
2892 | } | ||
2893 | |||
2894 | /* | ||
2895 | * Allocate and initialize a counter structure | ||
2896 | */ | ||
2897 | static struct perf_counter * | ||
2898 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, | ||
2899 | int cpu, | ||
2900 | struct perf_counter_context *ctx, | ||
2901 | struct perf_counter *group_leader, | ||
2902 | gfp_t gfpflags) | ||
2903 | { | ||
2904 | const struct pmu *pmu; | ||
2905 | struct perf_counter *counter; | ||
2906 | struct hw_perf_counter *hwc; | ||
2907 | long err; | ||
2908 | |||
2909 | counter = kzalloc(sizeof(*counter), gfpflags); | ||
2910 | if (!counter) | ||
2911 | return ERR_PTR(-ENOMEM); | ||
2912 | |||
2913 | /* | ||
2914 | * Single counters are their own group leaders, with an | ||
2915 | * empty sibling list: | ||
2916 | */ | ||
2917 | if (!group_leader) | ||
2918 | group_leader = counter; | ||
2919 | |||
2920 | mutex_init(&counter->mutex); | ||
2921 | INIT_LIST_HEAD(&counter->list_entry); | ||
2922 | INIT_LIST_HEAD(&counter->event_entry); | ||
2923 | INIT_LIST_HEAD(&counter->sibling_list); | ||
2924 | init_waitqueue_head(&counter->waitq); | ||
2925 | |||
2926 | mutex_init(&counter->mmap_mutex); | ||
2927 | |||
2928 | INIT_LIST_HEAD(&counter->child_list); | ||
2929 | |||
2930 | counter->cpu = cpu; | ||
2931 | counter->hw_event = *hw_event; | ||
2932 | counter->group_leader = group_leader; | ||
2933 | counter->pmu = NULL; | ||
2934 | counter->ctx = ctx; | ||
2935 | |||
2936 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
2937 | if (hw_event->disabled) | ||
2938 | counter->state = PERF_COUNTER_STATE_OFF; | ||
2939 | |||
2940 | pmu = NULL; | ||
2941 | |||
2942 | hwc = &counter->hw; | ||
2943 | if (hw_event->freq && hw_event->irq_freq) | ||
2944 | hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq); | ||
2945 | else | ||
2946 | hwc->irq_period = hw_event->irq_period; | ||
2947 | |||
2948 | /* | ||
2949 | * we currently do not support PERF_RECORD_GROUP on inherited counters | ||
2950 | */ | ||
2951 | if (hw_event->inherit && (hw_event->record_type & PERF_RECORD_GROUP)) | ||
2952 | goto done; | ||
2953 | |||
2954 | if (perf_event_raw(hw_event)) { | ||
2955 | pmu = hw_perf_counter_init(counter); | ||
2956 | goto done; | ||
2957 | } | ||
2958 | |||
2959 | switch (perf_event_type(hw_event)) { | ||
2960 | case PERF_TYPE_HARDWARE: | ||
2961 | pmu = hw_perf_counter_init(counter); | ||
2962 | break; | ||
2963 | |||
2964 | case PERF_TYPE_SOFTWARE: | ||
2965 | pmu = sw_perf_counter_init(counter); | ||
2966 | break; | ||
2967 | |||
2968 | case PERF_TYPE_TRACEPOINT: | ||
2969 | pmu = tp_perf_counter_init(counter); | ||
2970 | break; | ||
2971 | } | ||
2972 | done: | ||
2973 | err = 0; | ||
2974 | if (!pmu) | ||
2975 | err = -EINVAL; | ||
2976 | else if (IS_ERR(pmu)) | ||
2977 | err = PTR_ERR(pmu); | ||
2978 | |||
2979 | if (err) { | ||
2980 | kfree(counter); | ||
2981 | return ERR_PTR(err); | ||
2982 | } | ||
2983 | |||
2984 | counter->pmu = pmu; | ||
2985 | |||
2986 | atomic_inc(&nr_counters); | ||
2987 | if (counter->hw_event.mmap) | ||
2988 | atomic_inc(&nr_mmap_tracking); | ||
2989 | if (counter->hw_event.munmap) | ||
2990 | atomic_inc(&nr_munmap_tracking); | ||
2991 | if (counter->hw_event.comm) | ||
2992 | atomic_inc(&nr_comm_tracking); | ||
2993 | |||
2994 | return counter; | ||
2995 | } | ||
2996 | |||
2997 | /** | ||
2998 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | ||
2999 | * | ||
3000 | * @hw_event_uptr: event type attributes for monitoring/sampling | ||
3001 | * @pid: target pid | ||
3002 | * @cpu: target cpu | ||
3003 | * @group_fd: group leader counter fd | ||
3004 | */ | ||
3005 | SYSCALL_DEFINE5(perf_counter_open, | ||
3006 | const struct perf_counter_hw_event __user *, hw_event_uptr, | ||
3007 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | ||
3008 | { | ||
3009 | struct perf_counter *counter, *group_leader; | ||
3010 | struct perf_counter_hw_event hw_event; | ||
3011 | struct perf_counter_context *ctx; | ||
3012 | struct file *counter_file = NULL; | ||
3013 | struct file *group_file = NULL; | ||
3014 | int fput_needed = 0; | ||
3015 | int fput_needed2 = 0; | ||
3016 | int ret; | ||
3017 | |||
3018 | /* for future expandability... */ | ||
3019 | if (flags) | ||
3020 | return -EINVAL; | ||
3021 | |||
3022 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) | ||
3023 | return -EFAULT; | ||
3024 | |||
3025 | /* | ||
3026 | * Get the target context (task or percpu): | ||
3027 | */ | ||
3028 | ctx = find_get_context(pid, cpu); | ||
3029 | if (IS_ERR(ctx)) | ||
3030 | return PTR_ERR(ctx); | ||
3031 | |||
3032 | /* | ||
3033 | * Look up the group leader (we will attach this counter to it): | ||
3034 | */ | ||
3035 | group_leader = NULL; | ||
3036 | if (group_fd != -1) { | ||
3037 | ret = -EINVAL; | ||
3038 | group_file = fget_light(group_fd, &fput_needed); | ||
3039 | if (!group_file) | ||
3040 | goto err_put_context; | ||
3041 | if (group_file->f_op != &perf_fops) | ||
3042 | goto err_put_context; | ||
3043 | |||
3044 | group_leader = group_file->private_data; | ||
3045 | /* | ||
3046 | * Do not allow a recursive hierarchy (this new sibling | ||
3047 | * becoming part of another group-sibling): | ||
3048 | */ | ||
3049 | if (group_leader->group_leader != group_leader) | ||
3050 | goto err_put_context; | ||
3051 | /* | ||
3052 | * Do not allow to attach to a group in a different | ||
3053 | * task or CPU context: | ||
3054 | */ | ||
3055 | if (group_leader->ctx != ctx) | ||
3056 | goto err_put_context; | ||
3057 | /* | ||
3058 | * Only a group leader can be exclusive or pinned | ||
3059 | */ | ||
3060 | if (hw_event.exclusive || hw_event.pinned) | ||
3061 | goto err_put_context; | ||
3062 | } | ||
3063 | |||
3064 | counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader, | ||
3065 | GFP_KERNEL); | ||
3066 | ret = PTR_ERR(counter); | ||
3067 | if (IS_ERR(counter)) | ||
3068 | goto err_put_context; | ||
3069 | |||
3070 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); | ||
3071 | if (ret < 0) | ||
3072 | goto err_free_put_context; | ||
3073 | |||
3074 | counter_file = fget_light(ret, &fput_needed2); | ||
3075 | if (!counter_file) | ||
3076 | goto err_free_put_context; | ||
3077 | |||
3078 | counter->filp = counter_file; | ||
3079 | mutex_lock(&ctx->mutex); | ||
3080 | perf_install_in_context(ctx, counter, cpu); | ||
3081 | mutex_unlock(&ctx->mutex); | ||
3082 | |||
3083 | fput_light(counter_file, fput_needed2); | ||
3084 | |||
3085 | out_fput: | ||
3086 | fput_light(group_file, fput_needed); | ||
3087 | |||
3088 | return ret; | ||
3089 | |||
3090 | err_free_put_context: | ||
3091 | kfree(counter); | ||
3092 | |||
3093 | err_put_context: | ||
3094 | put_context(ctx); | ||
3095 | |||
3096 | goto out_fput; | ||
3097 | } | ||
3098 | |||
3099 | /* | ||
3100 | * Initialize the perf_counter context in a task_struct: | ||
3101 | */ | ||
3102 | static void | ||
3103 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
3104 | struct task_struct *task) | ||
3105 | { | ||
3106 | memset(ctx, 0, sizeof(*ctx)); | ||
3107 | spin_lock_init(&ctx->lock); | ||
3108 | mutex_init(&ctx->mutex); | ||
3109 | INIT_LIST_HEAD(&ctx->counter_list); | ||
3110 | INIT_LIST_HEAD(&ctx->event_list); | ||
3111 | ctx->task = task; | ||
3112 | } | ||
3113 | |||
3114 | /* | ||
3115 | * inherit a counter from parent task to child task: | ||
3116 | */ | ||
3117 | static struct perf_counter * | ||
3118 | inherit_counter(struct perf_counter *parent_counter, | ||
3119 | struct task_struct *parent, | ||
3120 | struct perf_counter_context *parent_ctx, | ||
3121 | struct task_struct *child, | ||
3122 | struct perf_counter *group_leader, | ||
3123 | struct perf_counter_context *child_ctx) | ||
3124 | { | ||
3125 | struct perf_counter *child_counter; | ||
3126 | |||
3127 | /* | ||
3128 | * Instead of creating recursive hierarchies of counters, | ||
3129 | * we link inherited counters back to the original parent, | ||
3130 | * which has a filp for sure, which we use as the reference | ||
3131 | * count: | ||
3132 | */ | ||
3133 | if (parent_counter->parent) | ||
3134 | parent_counter = parent_counter->parent; | ||
3135 | |||
3136 | child_counter = perf_counter_alloc(&parent_counter->hw_event, | ||
3137 | parent_counter->cpu, child_ctx, | ||
3138 | group_leader, GFP_KERNEL); | ||
3139 | if (IS_ERR(child_counter)) | ||
3140 | return child_counter; | ||
3141 | |||
3142 | /* | ||
3143 | * Link it up in the child's context: | ||
3144 | */ | ||
3145 | child_counter->task = child; | ||
3146 | add_counter_to_ctx(child_counter, child_ctx); | ||
3147 | |||
3148 | child_counter->parent = parent_counter; | ||
3149 | /* | ||
3150 | * inherit into child's child as well: | ||
3151 | */ | ||
3152 | child_counter->hw_event.inherit = 1; | ||
3153 | |||
3154 | /* | ||
3155 | * Get a reference to the parent filp - we will fput it | ||
3156 | * when the child counter exits. This is safe to do because | ||
3157 | * we are in the parent and we know that the filp still | ||
3158 | * exists and has a nonzero count: | ||
3159 | */ | ||
3160 | atomic_long_inc(&parent_counter->filp->f_count); | ||
3161 | |||
3162 | /* | ||
3163 | * Link this into the parent counter's child list | ||
3164 | */ | ||
3165 | mutex_lock(&parent_counter->mutex); | ||
3166 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); | ||
3167 | |||
3168 | /* | ||
3169 | * Make the child state follow the state of the parent counter, | ||
3170 | * not its hw_event.disabled bit. We hold the parent's mutex, | ||
3171 | * so we won't race with perf_counter_{en,dis}able_family. | ||
3172 | */ | ||
3173 | if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
3174 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
3175 | else | ||
3176 | child_counter->state = PERF_COUNTER_STATE_OFF; | ||
3177 | |||
3178 | mutex_unlock(&parent_counter->mutex); | ||
3179 | |||
3180 | return child_counter; | ||
3181 | } | ||
3182 | |||
3183 | static int inherit_group(struct perf_counter *parent_counter, | ||
3184 | struct task_struct *parent, | ||
3185 | struct perf_counter_context *parent_ctx, | ||
3186 | struct task_struct *child, | ||
3187 | struct perf_counter_context *child_ctx) | ||
3188 | { | ||
3189 | struct perf_counter *leader; | ||
3190 | struct perf_counter *sub; | ||
3191 | struct perf_counter *child_ctr; | ||
3192 | |||
3193 | leader = inherit_counter(parent_counter, parent, parent_ctx, | ||
3194 | child, NULL, child_ctx); | ||
3195 | if (IS_ERR(leader)) | ||
3196 | return PTR_ERR(leader); | ||
3197 | list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { | ||
3198 | child_ctr = inherit_counter(sub, parent, parent_ctx, | ||
3199 | child, leader, child_ctx); | ||
3200 | if (IS_ERR(child_ctr)) | ||
3201 | return PTR_ERR(child_ctr); | ||
3202 | } | ||
3203 | return 0; | ||
3204 | } | ||
3205 | |||
3206 | static void sync_child_counter(struct perf_counter *child_counter, | ||
3207 | struct perf_counter *parent_counter) | ||
3208 | { | ||
3209 | u64 child_val; | ||
3210 | |||
3211 | child_val = atomic64_read(&child_counter->count); | ||
3212 | |||
3213 | /* | ||
3214 | * Add back the child's count to the parent's count: | ||
3215 | */ | ||
3216 | atomic64_add(child_val, &parent_counter->count); | ||
3217 | atomic64_add(child_counter->total_time_enabled, | ||
3218 | &parent_counter->child_total_time_enabled); | ||
3219 | atomic64_add(child_counter->total_time_running, | ||
3220 | &parent_counter->child_total_time_running); | ||
3221 | |||
3222 | /* | ||
3223 | * Remove this counter from the parent's list | ||
3224 | */ | ||
3225 | mutex_lock(&parent_counter->mutex); | ||
3226 | list_del_init(&child_counter->child_list); | ||
3227 | mutex_unlock(&parent_counter->mutex); | ||
3228 | |||
3229 | /* | ||
3230 | * Release the parent counter, if this was the last | ||
3231 | * reference to it. | ||
3232 | */ | ||
3233 | fput(parent_counter->filp); | ||
3234 | } | ||
3235 | |||
3236 | static void | ||
3237 | __perf_counter_exit_task(struct task_struct *child, | ||
3238 | struct perf_counter *child_counter, | ||
3239 | struct perf_counter_context *child_ctx) | ||
3240 | { | ||
3241 | struct perf_counter *parent_counter; | ||
3242 | |||
3243 | /* | ||
3244 | * If we do not self-reap then we have to wait for the | ||
3245 | * child task to unschedule (it will happen for sure), | ||
3246 | * so that its counter is at its final count. (This | ||
3247 | * condition triggers rarely - child tasks usually get | ||
3248 | * off their CPU before the parent has a chance to | ||
3249 | * get this far into the reaping action) | ||
3250 | */ | ||
3251 | if (child != current) { | ||
3252 | wait_task_inactive(child, 0); | ||
3253 | update_counter_times(child_counter); | ||
3254 | list_del_counter(child_counter, child_ctx); | ||
3255 | } else { | ||
3256 | struct perf_cpu_context *cpuctx; | ||
3257 | unsigned long flags; | ||
3258 | |||
3259 | /* | ||
3260 | * Disable and unlink this counter. | ||
3261 | * | ||
3262 | * Be careful about zapping the list - IRQ/NMI context | ||
3263 | * could still be processing it: | ||
3264 | */ | ||
3265 | local_irq_save(flags); | ||
3266 | perf_disable(); | ||
3267 | |||
3268 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3269 | |||
3270 | group_sched_out(child_counter, cpuctx, child_ctx); | ||
3271 | update_counter_times(child_counter); | ||
3272 | |||
3273 | list_del_counter(child_counter, child_ctx); | ||
3274 | |||
3275 | perf_enable(); | ||
3276 | local_irq_restore(flags); | ||
3277 | } | ||
3278 | |||
3279 | parent_counter = child_counter->parent; | ||
3280 | /* | ||
3281 | * It can happen that parent exits first, and has counters | ||
3282 | * that are still around due to the child reference. These | ||
3283 | * counters need to be zapped - but otherwise linger. | ||
3284 | */ | ||
3285 | if (parent_counter) { | ||
3286 | sync_child_counter(child_counter, parent_counter); | ||
3287 | free_counter(child_counter); | ||
3288 | } | ||
3289 | } | ||
3290 | |||
3291 | /* | ||
3292 | * When a child task exits, feed back counter values to parent counters. | ||
3293 | * | ||
3294 | * Note: we may be running in child context, but the PID is not hashed | ||
3295 | * anymore so new counters will not be added. | ||
3296 | */ | ||
3297 | void perf_counter_exit_task(struct task_struct *child) | ||
3298 | { | ||
3299 | struct perf_counter *child_counter, *tmp; | ||
3300 | struct perf_counter_context *child_ctx; | ||
3301 | |||
3302 | child_ctx = &child->perf_counter_ctx; | ||
3303 | |||
3304 | if (likely(!child_ctx->nr_counters)) | ||
3305 | return; | ||
3306 | |||
3307 | again: | ||
3308 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, | ||
3309 | list_entry) | ||
3310 | __perf_counter_exit_task(child, child_counter, child_ctx); | ||
3311 | |||
3312 | /* | ||
3313 | * If the last counter was a group counter, it will have appended all | ||
3314 | * its siblings to the list, but we obtained 'tmp' before that which | ||
3315 | * will still point to the list head terminating the iteration. | ||
3316 | */ | ||
3317 | if (!list_empty(&child_ctx->counter_list)) | ||
3318 | goto again; | ||
3319 | } | ||
3320 | |||
3321 | /* | ||
3322 | * Initialize the perf_counter context in task_struct | ||
3323 | */ | ||
3324 | void perf_counter_init_task(struct task_struct *child) | ||
3325 | { | ||
3326 | struct perf_counter_context *child_ctx, *parent_ctx; | ||
3327 | struct perf_counter *counter; | ||
3328 | struct task_struct *parent = current; | ||
3329 | |||
3330 | child_ctx = &child->perf_counter_ctx; | ||
3331 | parent_ctx = &parent->perf_counter_ctx; | ||
3332 | |||
3333 | __perf_counter_init_context(child_ctx, child); | ||
3334 | |||
3335 | /* | ||
3336 | * This is executed from the parent task context, so inherit | ||
3337 | * counters that have been marked for cloning: | ||
3338 | */ | ||
3339 | |||
3340 | if (likely(!parent_ctx->nr_counters)) | ||
3341 | return; | ||
3342 | |||
3343 | /* | ||
3344 | * Lock the parent list. No need to lock the child - not PID | ||
3345 | * hashed yet and not running, so nobody can access it. | ||
3346 | */ | ||
3347 | mutex_lock(&parent_ctx->mutex); | ||
3348 | |||
3349 | /* | ||
3350 | * We dont have to disable NMIs - we are only looking at | ||
3351 | * the list, not manipulating it: | ||
3352 | */ | ||
3353 | list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { | ||
3354 | if (!counter->hw_event.inherit) | ||
3355 | continue; | ||
3356 | |||
3357 | if (inherit_group(counter, parent, | ||
3358 | parent_ctx, child, child_ctx)) | ||
3359 | break; | ||
3360 | } | ||
3361 | |||
3362 | mutex_unlock(&parent_ctx->mutex); | ||
3363 | } | ||
3364 | |||
3365 | static void __cpuinit perf_counter_init_cpu(int cpu) | ||
3366 | { | ||
3367 | struct perf_cpu_context *cpuctx; | ||
3368 | |||
3369 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3370 | __perf_counter_init_context(&cpuctx->ctx, NULL); | ||
3371 | |||
3372 | spin_lock(&perf_resource_lock); | ||
3373 | cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; | ||
3374 | spin_unlock(&perf_resource_lock); | ||
3375 | |||
3376 | hw_perf_counter_setup(cpu); | ||
3377 | } | ||
3378 | |||
3379 | #ifdef CONFIG_HOTPLUG_CPU | ||
3380 | static void __perf_counter_exit_cpu(void *info) | ||
3381 | { | ||
3382 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3383 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
3384 | struct perf_counter *counter, *tmp; | ||
3385 | |||
3386 | list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) | ||
3387 | __perf_counter_remove_from_context(counter); | ||
3388 | } | ||
3389 | static void perf_counter_exit_cpu(int cpu) | ||
3390 | { | ||
3391 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3392 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
3393 | |||
3394 | mutex_lock(&ctx->mutex); | ||
3395 | smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); | ||
3396 | mutex_unlock(&ctx->mutex); | ||
3397 | } | ||
3398 | #else | ||
3399 | static inline void perf_counter_exit_cpu(int cpu) { } | ||
3400 | #endif | ||
3401 | |||
3402 | static int __cpuinit | ||
3403 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | ||
3404 | { | ||
3405 | unsigned int cpu = (long)hcpu; | ||
3406 | |||
3407 | switch (action) { | ||
3408 | |||
3409 | case CPU_UP_PREPARE: | ||
3410 | case CPU_UP_PREPARE_FROZEN: | ||
3411 | perf_counter_init_cpu(cpu); | ||
3412 | break; | ||
3413 | |||
3414 | case CPU_DOWN_PREPARE: | ||
3415 | case CPU_DOWN_PREPARE_FROZEN: | ||
3416 | perf_counter_exit_cpu(cpu); | ||
3417 | break; | ||
3418 | |||
3419 | default: | ||
3420 | break; | ||
3421 | } | ||
3422 | |||
3423 | return NOTIFY_OK; | ||
3424 | } | ||
3425 | |||
3426 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
3427 | .notifier_call = perf_cpu_notify, | ||
3428 | }; | ||
3429 | |||
3430 | void __init perf_counter_init(void) | ||
3431 | { | ||
3432 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | ||
3433 | (void *)(long)smp_processor_id()); | ||
3434 | register_cpu_notifier(&perf_cpu_nb); | ||
3435 | } | ||
3436 | |||
3437 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) | ||
3438 | { | ||
3439 | return sprintf(buf, "%d\n", perf_reserved_percpu); | ||
3440 | } | ||
3441 | |||
3442 | static ssize_t | ||
3443 | perf_set_reserve_percpu(struct sysdev_class *class, | ||
3444 | const char *buf, | ||
3445 | size_t count) | ||
3446 | { | ||
3447 | struct perf_cpu_context *cpuctx; | ||
3448 | unsigned long val; | ||
3449 | int err, cpu, mpt; | ||
3450 | |||
3451 | err = strict_strtoul(buf, 10, &val); | ||
3452 | if (err) | ||
3453 | return err; | ||
3454 | if (val > perf_max_counters) | ||
3455 | return -EINVAL; | ||
3456 | |||
3457 | spin_lock(&perf_resource_lock); | ||
3458 | perf_reserved_percpu = val; | ||
3459 | for_each_online_cpu(cpu) { | ||
3460 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3461 | spin_lock_irq(&cpuctx->ctx.lock); | ||
3462 | mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, | ||
3463 | perf_max_counters - perf_reserved_percpu); | ||
3464 | cpuctx->max_pertask = mpt; | ||
3465 | spin_unlock_irq(&cpuctx->ctx.lock); | ||
3466 | } | ||
3467 | spin_unlock(&perf_resource_lock); | ||
3468 | |||
3469 | return count; | ||
3470 | } | ||
3471 | |||
3472 | static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) | ||
3473 | { | ||
3474 | return sprintf(buf, "%d\n", perf_overcommit); | ||
3475 | } | ||
3476 | |||
3477 | static ssize_t | ||
3478 | perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) | ||
3479 | { | ||
3480 | unsigned long val; | ||
3481 | int err; | ||
3482 | |||
3483 | err = strict_strtoul(buf, 10, &val); | ||
3484 | if (err) | ||
3485 | return err; | ||
3486 | if (val > 1) | ||
3487 | return -EINVAL; | ||
3488 | |||
3489 | spin_lock(&perf_resource_lock); | ||
3490 | perf_overcommit = val; | ||
3491 | spin_unlock(&perf_resource_lock); | ||
3492 | |||
3493 | return count; | ||
3494 | } | ||
3495 | |||
3496 | static SYSDEV_CLASS_ATTR( | ||
3497 | reserve_percpu, | ||
3498 | 0644, | ||
3499 | perf_show_reserve_percpu, | ||
3500 | perf_set_reserve_percpu | ||
3501 | ); | ||
3502 | |||
3503 | static SYSDEV_CLASS_ATTR( | ||
3504 | overcommit, | ||
3505 | 0644, | ||
3506 | perf_show_overcommit, | ||
3507 | perf_set_overcommit | ||
3508 | ); | ||
3509 | |||
3510 | static struct attribute *perfclass_attrs[] = { | ||
3511 | &attr_reserve_percpu.attr, | ||
3512 | &attr_overcommit.attr, | ||
3513 | NULL | ||
3514 | }; | ||
3515 | |||
3516 | static struct attribute_group perfclass_attr_group = { | ||
3517 | .attrs = perfclass_attrs, | ||
3518 | .name = "perf_counters", | ||
3519 | }; | ||
3520 | |||
3521 | static int __init perf_counter_sysfs_init(void) | ||
3522 | { | ||
3523 | return sysfs_create_group(&cpu_sysdev_class.kset.kobj, | ||
3524 | &perfclass_attr_group); | ||
3525 | } | ||
3526 | device_initcall(perf_counter_sysfs_init); | ||
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 69d9cb921ffa..013882e83497 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -864,9 +864,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, | |||
864 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | 864 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); |
865 | 865 | ||
866 | /** | 866 | /** |
867 | * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible | 867 | * rt_mutex_timed_lock - lock a rt_mutex interruptible |
868 | * the timeout structure is provided | 868 | * the timeout structure is provided |
869 | * by the caller | 869 | * by the caller |
870 | * | 870 | * |
871 | * @lock: the rt_mutex to be locked | 871 | * @lock: the rt_mutex to be locked |
872 | * @timeout: timeout structure or NULL (no timeout) | 872 | * @timeout: timeout structure or NULL (no timeout) |
@@ -913,7 +913,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) | |||
913 | } | 913 | } |
914 | EXPORT_SYMBOL_GPL(rt_mutex_unlock); | 914 | EXPORT_SYMBOL_GPL(rt_mutex_unlock); |
915 | 915 | ||
916 | /*** | 916 | /** |
917 | * rt_mutex_destroy - mark a mutex unusable | 917 | * rt_mutex_destroy - mark a mutex unusable |
918 | * @lock: the mutex to be destroyed | 918 | * @lock: the mutex to be destroyed |
919 | * | 919 | * |
diff --git a/kernel/sched.c b/kernel/sched.c index 26efa475bdc1..419a39d0988f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/completion.h> | 39 | #include <linux/completion.h> |
40 | #include <linux/kernel_stat.h> | 40 | #include <linux/kernel_stat.h> |
41 | #include <linux/debug_locks.h> | 41 | #include <linux/debug_locks.h> |
42 | #include <linux/perf_counter.h> | ||
42 | #include <linux/security.h> | 43 | #include <linux/security.h> |
43 | #include <linux/notifier.h> | 44 | #include <linux/notifier.h> |
44 | #include <linux/profile.h> | 45 | #include <linux/profile.h> |
@@ -584,6 +585,7 @@ struct rq { | |||
584 | struct load_weight load; | 585 | struct load_weight load; |
585 | unsigned long nr_load_updates; | 586 | unsigned long nr_load_updates; |
586 | u64 nr_switches; | 587 | u64 nr_switches; |
588 | u64 nr_migrations_in; | ||
587 | 589 | ||
588 | struct cfs_rq cfs; | 590 | struct cfs_rq cfs; |
589 | struct rt_rq rt; | 591 | struct rt_rq rt; |
@@ -692,7 +694,7 @@ static inline int cpu_of(struct rq *rq) | |||
692 | #define task_rq(p) cpu_rq(task_cpu(p)) | 694 | #define task_rq(p) cpu_rq(task_cpu(p)) |
693 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 695 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
694 | 696 | ||
695 | static inline void update_rq_clock(struct rq *rq) | 697 | inline void update_rq_clock(struct rq *rq) |
696 | { | 698 | { |
697 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 699 | rq->clock = sched_clock_cpu(cpu_of(rq)); |
698 | } | 700 | } |
@@ -1967,12 +1969,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1967 | p->se.sleep_start -= clock_offset; | 1969 | p->se.sleep_start -= clock_offset; |
1968 | if (p->se.block_start) | 1970 | if (p->se.block_start) |
1969 | p->se.block_start -= clock_offset; | 1971 | p->se.block_start -= clock_offset; |
1972 | #endif | ||
1970 | if (old_cpu != new_cpu) { | 1973 | if (old_cpu != new_cpu) { |
1971 | schedstat_inc(p, se.nr_migrations); | 1974 | p->se.nr_migrations++; |
1975 | new_rq->nr_migrations_in++; | ||
1976 | #ifdef CONFIG_SCHEDSTATS | ||
1972 | if (task_hot(p, old_rq->clock, NULL)) | 1977 | if (task_hot(p, old_rq->clock, NULL)) |
1973 | schedstat_inc(p, se.nr_forced2_migrations); | 1978 | schedstat_inc(p, se.nr_forced2_migrations); |
1974 | } | ||
1975 | #endif | 1979 | #endif |
1980 | } | ||
1976 | p->se.vruntime -= old_cfsrq->min_vruntime - | 1981 | p->se.vruntime -= old_cfsrq->min_vruntime - |
1977 | new_cfsrq->min_vruntime; | 1982 | new_cfsrq->min_vruntime; |
1978 | 1983 | ||
@@ -2324,6 +2329,27 @@ static int sched_balance_self(int cpu, int flag) | |||
2324 | 2329 | ||
2325 | #endif /* CONFIG_SMP */ | 2330 | #endif /* CONFIG_SMP */ |
2326 | 2331 | ||
2332 | /** | ||
2333 | * task_oncpu_function_call - call a function on the cpu on which a task runs | ||
2334 | * @p: the task to evaluate | ||
2335 | * @func: the function to be called | ||
2336 | * @info: the function call argument | ||
2337 | * | ||
2338 | * Calls the function @func when the task is currently running. This might | ||
2339 | * be on the current CPU, which just calls the function directly | ||
2340 | */ | ||
2341 | void task_oncpu_function_call(struct task_struct *p, | ||
2342 | void (*func) (void *info), void *info) | ||
2343 | { | ||
2344 | int cpu; | ||
2345 | |||
2346 | preempt_disable(); | ||
2347 | cpu = task_cpu(p); | ||
2348 | if (task_curr(p)) | ||
2349 | smp_call_function_single(cpu, func, info, 1); | ||
2350 | preempt_enable(); | ||
2351 | } | ||
2352 | |||
2327 | /*** | 2353 | /*** |
2328 | * try_to_wake_up - wake up a thread | 2354 | * try_to_wake_up - wake up a thread |
2329 | * @p: the to-be-woken-up thread | 2355 | * @p: the to-be-woken-up thread |
@@ -2480,6 +2506,7 @@ static void __sched_fork(struct task_struct *p) | |||
2480 | p->se.exec_start = 0; | 2506 | p->se.exec_start = 0; |
2481 | p->se.sum_exec_runtime = 0; | 2507 | p->se.sum_exec_runtime = 0; |
2482 | p->se.prev_sum_exec_runtime = 0; | 2508 | p->se.prev_sum_exec_runtime = 0; |
2509 | p->se.nr_migrations = 0; | ||
2483 | p->se.last_wakeup = 0; | 2510 | p->se.last_wakeup = 0; |
2484 | p->se.avg_overlap = 0; | 2511 | p->se.avg_overlap = 0; |
2485 | p->se.start_runtime = 0; | 2512 | p->se.start_runtime = 0; |
@@ -2710,6 +2737,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2710 | */ | 2737 | */ |
2711 | prev_state = prev->state; | 2738 | prev_state = prev->state; |
2712 | finish_arch_switch(prev); | 2739 | finish_arch_switch(prev); |
2740 | perf_counter_task_sched_in(current, cpu_of(rq)); | ||
2713 | finish_lock_switch(rq, prev); | 2741 | finish_lock_switch(rq, prev); |
2714 | #ifdef CONFIG_SMP | 2742 | #ifdef CONFIG_SMP |
2715 | if (post_schedule) | 2743 | if (post_schedule) |
@@ -2872,6 +2900,15 @@ unsigned long nr_active(void) | |||
2872 | } | 2900 | } |
2873 | 2901 | ||
2874 | /* | 2902 | /* |
2903 | * Externally visible per-cpu scheduler statistics: | ||
2904 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
2905 | */ | ||
2906 | u64 cpu_nr_migrations(int cpu) | ||
2907 | { | ||
2908 | return cpu_rq(cpu)->nr_migrations_in; | ||
2909 | } | ||
2910 | |||
2911 | /* | ||
2875 | * Update rq->cpu_load[] statistics. This function is usually called every | 2912 | * Update rq->cpu_load[] statistics. This function is usually called every |
2876 | * scheduler tick (TICK_NSEC). | 2913 | * scheduler tick (TICK_NSEC). |
2877 | */ | 2914 | */ |
@@ -4838,6 +4875,7 @@ void scheduler_tick(void) | |||
4838 | update_rq_clock(rq); | 4875 | update_rq_clock(rq); |
4839 | update_cpu_load(rq); | 4876 | update_cpu_load(rq); |
4840 | curr->sched_class->task_tick(rq, curr, 0); | 4877 | curr->sched_class->task_tick(rq, curr, 0); |
4878 | perf_counter_task_tick(curr, cpu); | ||
4841 | spin_unlock(&rq->lock); | 4879 | spin_unlock(&rq->lock); |
4842 | 4880 | ||
4843 | #ifdef CONFIG_SMP | 4881 | #ifdef CONFIG_SMP |
@@ -5053,6 +5091,7 @@ need_resched_nonpreemptible: | |||
5053 | 5091 | ||
5054 | if (likely(prev != next)) { | 5092 | if (likely(prev != next)) { |
5055 | sched_info_switch(prev, next); | 5093 | sched_info_switch(prev, next); |
5094 | perf_counter_task_sched_out(prev, cpu); | ||
5056 | 5095 | ||
5057 | rq->nr_switches++; | 5096 | rq->nr_switches++; |
5058 | rq->curr = next; | 5097 | rq->curr = next; |
@@ -8958,7 +8997,7 @@ void __init sched_init(void) | |||
8958 | * 1024) and two child groups A0 and A1 (of weight 1024 each), | 8997 | * 1024) and two child groups A0 and A1 (of weight 1024 each), |
8959 | * then A0's share of the cpu resource is: | 8998 | * then A0's share of the cpu resource is: |
8960 | * | 8999 | * |
8961 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% | 9000 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% |
8962 | * | 9001 | * |
8963 | * We achieve this by letting init_task_group's tasks sit | 9002 | * We achieve this by letting init_task_group's tasks sit |
8964 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 9003 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). |
@@ -9059,6 +9098,8 @@ void __init sched_init(void) | |||
9059 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | 9098 | alloc_bootmem_cpumask_var(&cpu_isolated_map); |
9060 | #endif /* SMP */ | 9099 | #endif /* SMP */ |
9061 | 9100 | ||
9101 | perf_counter_init(); | ||
9102 | |||
9062 | scheduler_running = 1; | 9103 | scheduler_running = 1; |
9063 | } | 9104 | } |
9064 | 9105 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index d8034737db4c..f79b3b9f8375 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2278,24 +2278,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) | |||
2278 | return kill_something_info(sig, &info, pid); | 2278 | return kill_something_info(sig, &info, pid); |
2279 | } | 2279 | } |
2280 | 2280 | ||
2281 | static int do_tkill(pid_t tgid, pid_t pid, int sig) | 2281 | static int |
2282 | do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) | ||
2282 | { | 2283 | { |
2283 | int error; | ||
2284 | struct siginfo info; | ||
2285 | struct task_struct *p; | 2284 | struct task_struct *p; |
2286 | unsigned long flags; | 2285 | unsigned long flags; |
2287 | 2286 | int error = -ESRCH; | |
2288 | error = -ESRCH; | ||
2289 | info.si_signo = sig; | ||
2290 | info.si_errno = 0; | ||
2291 | info.si_code = SI_TKILL; | ||
2292 | info.si_pid = task_tgid_vnr(current); | ||
2293 | info.si_uid = current_uid(); | ||
2294 | 2287 | ||
2295 | rcu_read_lock(); | 2288 | rcu_read_lock(); |
2296 | p = find_task_by_vpid(pid); | 2289 | p = find_task_by_vpid(pid); |
2297 | if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { | 2290 | if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { |
2298 | error = check_kill_permission(sig, &info, p); | 2291 | error = check_kill_permission(sig, info, p); |
2299 | /* | 2292 | /* |
2300 | * The null signal is a permissions and process existence | 2293 | * The null signal is a permissions and process existence |
2301 | * probe. No signal is actually delivered. | 2294 | * probe. No signal is actually delivered. |
@@ -2305,7 +2298,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) | |||
2305 | * signal is private anyway. | 2298 | * signal is private anyway. |
2306 | */ | 2299 | */ |
2307 | if (!error && sig && lock_task_sighand(p, &flags)) { | 2300 | if (!error && sig && lock_task_sighand(p, &flags)) { |
2308 | error = specific_send_sig_info(sig, &info, p); | 2301 | error = specific_send_sig_info(sig, info, p); |
2309 | unlock_task_sighand(p, &flags); | 2302 | unlock_task_sighand(p, &flags); |
2310 | } | 2303 | } |
2311 | } | 2304 | } |
@@ -2314,6 +2307,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) | |||
2314 | return error; | 2307 | return error; |
2315 | } | 2308 | } |
2316 | 2309 | ||
2310 | static int do_tkill(pid_t tgid, pid_t pid, int sig) | ||
2311 | { | ||
2312 | struct siginfo info; | ||
2313 | |||
2314 | info.si_signo = sig; | ||
2315 | info.si_errno = 0; | ||
2316 | info.si_code = SI_TKILL; | ||
2317 | info.si_pid = task_tgid_vnr(current); | ||
2318 | info.si_uid = current_uid(); | ||
2319 | |||
2320 | return do_send_specific(tgid, pid, sig, &info); | ||
2321 | } | ||
2322 | |||
2317 | /** | 2323 | /** |
2318 | * sys_tgkill - send signal to one specific thread | 2324 | * sys_tgkill - send signal to one specific thread |
2319 | * @tgid: the thread group ID of the thread | 2325 | * @tgid: the thread group ID of the thread |
@@ -2363,6 +2369,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, | |||
2363 | return kill_proc_info(sig, &info, pid); | 2369 | return kill_proc_info(sig, &info, pid); |
2364 | } | 2370 | } |
2365 | 2371 | ||
2372 | long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) | ||
2373 | { | ||
2374 | /* This is only valid for single tasks */ | ||
2375 | if (pid <= 0 || tgid <= 0) | ||
2376 | return -EINVAL; | ||
2377 | |||
2378 | /* Not even root can pretend to send signals from the kernel. | ||
2379 | Nor can they impersonate a kill(), which adds source info. */ | ||
2380 | if (info->si_code >= 0) | ||
2381 | return -EPERM; | ||
2382 | info->si_signo = sig; | ||
2383 | |||
2384 | return do_send_specific(tgid, pid, sig, info); | ||
2385 | } | ||
2386 | |||
2387 | SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, | ||
2388 | siginfo_t __user *, uinfo) | ||
2389 | { | ||
2390 | siginfo_t info; | ||
2391 | |||
2392 | if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) | ||
2393 | return -EFAULT; | ||
2394 | |||
2395 | return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); | ||
2396 | } | ||
2397 | |||
2366 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | 2398 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) |
2367 | { | 2399 | { |
2368 | struct task_struct *t = current; | 2400 | struct task_struct *t = current; |
diff --git a/kernel/sys.c b/kernel/sys.c index e7998cf31498..438d99a38c87 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/prctl.h> | 14 | #include <linux/prctl.h> |
15 | #include <linux/highuid.h> | 15 | #include <linux/highuid.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/perf_counter.h> | ||
17 | #include <linux/resource.h> | 18 | #include <linux/resource.h> |
18 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
19 | #include <linux/kexec.h> | 20 | #include <linux/kexec.h> |
@@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1793 | case PR_SET_TSC: | 1794 | case PR_SET_TSC: |
1794 | error = SET_TSC_CTL(arg2); | 1795 | error = SET_TSC_CTL(arg2); |
1795 | break; | 1796 | break; |
1797 | case PR_TASK_PERF_COUNTERS_DISABLE: | ||
1798 | error = perf_counter_task_disable(); | ||
1799 | break; | ||
1800 | case PR_TASK_PERF_COUNTERS_ENABLE: | ||
1801 | error = perf_counter_task_enable(); | ||
1802 | break; | ||
1796 | case PR_GET_TIMERSLACK: | 1803 | case PR_GET_TIMERSLACK: |
1797 | error = current->timer_slack_ns; | 1804 | error = current->timer_slack_ns; |
1798 | break; | 1805 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 27dad2967387..68320f6b07b5 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime); | |||
175 | cond_syscall(compat_sys_timerfd_gettime); | 175 | cond_syscall(compat_sys_timerfd_gettime); |
176 | cond_syscall(sys_eventfd); | 176 | cond_syscall(sys_eventfd); |
177 | cond_syscall(sys_eventfd2); | 177 | cond_syscall(sys_eventfd2); |
178 | |||
179 | /* performance counters: */ | ||
180 | cond_syscall(sys_perf_counter_open); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2970d56fb76..3cb1849f5989 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/reboot.h> | 49 | #include <linux/reboot.h> |
50 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
51 | #include <linux/slow-work.h> | 51 | #include <linux/slow-work.h> |
52 | #include <linux/perf_counter.h> | ||
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
54 | #include <asm/processor.h> | 55 | #include <asm/processor.h> |
@@ -912,6 +913,24 @@ static struct ctl_table kern_table[] = { | |||
912 | .child = slow_work_sysctls, | 913 | .child = slow_work_sysctls, |
913 | }, | 914 | }, |
914 | #endif | 915 | #endif |
916 | #ifdef CONFIG_PERF_COUNTERS | ||
917 | { | ||
918 | .ctl_name = CTL_UNNUMBERED, | ||
919 | .procname = "perf_counter_privileged", | ||
920 | .data = &sysctl_perf_counter_priv, | ||
921 | .maxlen = sizeof(sysctl_perf_counter_priv), | ||
922 | .mode = 0644, | ||
923 | .proc_handler = &proc_dointvec, | ||
924 | }, | ||
925 | { | ||
926 | .ctl_name = CTL_UNNUMBERED, | ||
927 | .procname = "perf_counter_mlock_kb", | ||
928 | .data = &sysctl_perf_counter_mlock, | ||
929 | .maxlen = sizeof(sysctl_perf_counter_mlock), | ||
930 | .mode = 0644, | ||
931 | .proc_handler = &proc_dointvec, | ||
932 | }, | ||
933 | #endif | ||
915 | /* | 934 | /* |
916 | * NOTE: do not add new entries to this table unless you have read | 935 | * NOTE: do not add new entries to this table unless you have read |
917 | * Documentation/sysctl/ctl_unnumbered.txt | 936 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/timer.c b/kernel/timer.c index cffffad01c31..fed53be44fd9 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_counter.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <asm/unistd.h> | 43 | #include <asm/unistd.h> |
@@ -1170,6 +1171,8 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1170 | { | 1171 | { |
1171 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1172 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1172 | 1173 | ||
1174 | perf_counter_do_pending(); | ||
1175 | |||
1173 | hrtimer_run_pending(); | 1176 | hrtimer_run_pending(); |
1174 | 1177 | ||
1175 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1178 | if (time_after_eq(jiffies, base->timer_jiffies)) |
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/mempolicy.h> | 28 | #include <linux/mempolicy.h> |
29 | #include <linux/rmap.h> | 29 | #include <linux/rmap.h> |
30 | #include <linux/mmu_notifier.h> | 30 | #include <linux/mmu_notifier.h> |
31 | #include <linux/perf_counter.h> | ||
31 | 32 | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/cacheflush.h> | 34 | #include <asm/cacheflush.h> |
@@ -1219,6 +1220,9 @@ munmap_back: | |||
1219 | if (correct_wcount) | 1220 | if (correct_wcount) |
1220 | atomic_inc(&inode->i_writecount); | 1221 | atomic_inc(&inode->i_writecount); |
1221 | out: | 1222 | out: |
1223 | if (vm_flags & VM_EXEC) | ||
1224 | perf_counter_mmap(addr, len, pgoff, file); | ||
1225 | |||
1222 | mm->total_vm += len >> PAGE_SHIFT; | 1226 | mm->total_vm += len >> PAGE_SHIFT; |
1223 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1227 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); |
1224 | if (vm_flags & VM_LOCKED) { | 1228 | if (vm_flags & VM_LOCKED) { |
@@ -1752,6 +1756,12 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) | |||
1752 | do { | 1756 | do { |
1753 | long nrpages = vma_pages(vma); | 1757 | long nrpages = vma_pages(vma); |
1754 | 1758 | ||
1759 | if (vma->vm_flags & VM_EXEC) { | ||
1760 | perf_counter_munmap(vma->vm_start, | ||
1761 | nrpages << PAGE_SHIFT, | ||
1762 | vma->vm_pgoff, vma->vm_file); | ||
1763 | } | ||
1764 | |||
1755 | mm->total_vm -= nrpages; | 1765 | mm->total_vm -= nrpages; |
1756 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); | 1766 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); |
1757 | vma = remove_vma(vma); | 1767 | vma = remove_vma(vma); |