diff options
110 files changed, 19741 insertions, 93 deletions
diff --git a/Documentation/perf_counter/.gitignore b/Documentation/perf_counter/.gitignore new file mode 100644 index 000000000000..41c0b20a76ce --- /dev/null +++ b/Documentation/perf_counter/.gitignore | |||
@@ -0,0 +1,179 @@ | |||
1 | GIT-BUILD-OPTIONS | ||
2 | GIT-CFLAGS | ||
3 | GIT-GUI-VARS | ||
4 | GIT-VERSION-FILE | ||
5 | git | ||
6 | git-add | ||
7 | git-add--interactive | ||
8 | git-am | ||
9 | git-annotate | ||
10 | git-apply | ||
11 | git-archimport | ||
12 | git-archive | ||
13 | git-bisect | ||
14 | git-bisect--helper | ||
15 | git-blame | ||
16 | git-branch | ||
17 | git-bundle | ||
18 | git-cat-file | ||
19 | git-check-attr | ||
20 | git-check-ref-format | ||
21 | git-checkout | ||
22 | git-checkout-index | ||
23 | git-cherry | ||
24 | git-cherry-pick | ||
25 | git-clean | ||
26 | git-clone | ||
27 | git-commit | ||
28 | git-commit-tree | ||
29 | git-config | ||
30 | git-count-objects | ||
31 | git-cvsexportcommit | ||
32 | git-cvsimport | ||
33 | git-cvsserver | ||
34 | git-daemon | ||
35 | git-diff | ||
36 | git-diff-files | ||
37 | git-diff-index | ||
38 | git-diff-tree | ||
39 | git-difftool | ||
40 | git-difftool--helper | ||
41 | git-describe | ||
42 | git-fast-export | ||
43 | git-fast-import | ||
44 | git-fetch | ||
45 | git-fetch--tool | ||
46 | git-fetch-pack | ||
47 | git-filter-branch | ||
48 | git-fmt-merge-msg | ||
49 | git-for-each-ref | ||
50 | git-format-patch | ||
51 | git-fsck | ||
52 | git-fsck-objects | ||
53 | git-gc | ||
54 | git-get-tar-commit-id | ||
55 | git-grep | ||
56 | git-hash-object | ||
57 | git-help | ||
58 | git-http-fetch | ||
59 | git-http-push | ||
60 | git-imap-send | ||
61 | git-index-pack | ||
62 | git-init | ||
63 | git-init-db | ||
64 | git-instaweb | ||
65 | git-log | ||
66 | git-lost-found | ||
67 | git-ls-files | ||
68 | git-ls-remote | ||
69 | git-ls-tree | ||
70 | git-mailinfo | ||
71 | git-mailsplit | ||
72 | git-merge | ||
73 | git-merge-base | ||
74 | git-merge-index | ||
75 | git-merge-file | ||
76 | git-merge-tree | ||
77 | git-merge-octopus | ||
78 | git-merge-one-file | ||
79 | git-merge-ours | ||
80 | git-merge-recursive | ||
81 | git-merge-resolve | ||
82 | git-merge-subtree | ||
83 | git-mergetool | ||
84 | git-mergetool--lib | ||
85 | git-mktag | ||
86 | git-mktree | ||
87 | git-name-rev | ||
88 | git-mv | ||
89 | git-pack-redundant | ||
90 | git-pack-objects | ||
91 | git-pack-refs | ||
92 | git-parse-remote | ||
93 | git-patch-id | ||
94 | git-peek-remote | ||
95 | git-prune | ||
96 | git-prune-packed | ||
97 | git-pull | ||
98 | git-push | ||
99 | git-quiltimport | ||
100 | git-read-tree | ||
101 | git-rebase | ||
102 | git-rebase--interactive | ||
103 | git-receive-pack | ||
104 | git-reflog | ||
105 | git-relink | ||
106 | git-remote | ||
107 | git-repack | ||
108 | git-repo-config | ||
109 | git-request-pull | ||
110 | git-rerere | ||
111 | git-reset | ||
112 | git-rev-list | ||
113 | git-rev-parse | ||
114 | git-revert | ||
115 | git-rm | ||
116 | git-send-email | ||
117 | git-send-pack | ||
118 | git-sh-setup | ||
119 | git-shell | ||
120 | git-shortlog | ||
121 | git-show | ||
122 | git-show-branch | ||
123 | git-show-index | ||
124 | git-show-ref | ||
125 | git-stage | ||
126 | git-stash | ||
127 | git-status | ||
128 | git-stripspace | ||
129 | git-submodule | ||
130 | git-svn | ||
131 | git-symbolic-ref | ||
132 | git-tag | ||
133 | git-tar-tree | ||
134 | git-unpack-file | ||
135 | git-unpack-objects | ||
136 | git-update-index | ||
137 | git-update-ref | ||
138 | git-update-server-info | ||
139 | git-upload-archive | ||
140 | git-upload-pack | ||
141 | git-var | ||
142 | git-verify-pack | ||
143 | git-verify-tag | ||
144 | git-web--browse | ||
145 | git-whatchanged | ||
146 | git-write-tree | ||
147 | git-core-*/?* | ||
148 | gitk-wish | ||
149 | gitweb/gitweb.cgi | ||
150 | test-chmtime | ||
151 | test-ctype | ||
152 | test-date | ||
153 | test-delta | ||
154 | test-dump-cache-tree | ||
155 | test-genrandom | ||
156 | test-match-trees | ||
157 | test-parse-options | ||
158 | test-path-utils | ||
159 | test-sha1 | ||
160 | test-sigchain | ||
161 | common-cmds.h | ||
162 | *.tar.gz | ||
163 | *.dsc | ||
164 | *.deb | ||
165 | git.spec | ||
166 | *.exe | ||
167 | *.[aos] | ||
168 | *.py[co] | ||
169 | config.mak | ||
170 | autom4te.cache | ||
171 | config.cache | ||
172 | config.log | ||
173 | config.status | ||
174 | config.mak.autogen | ||
175 | config.mak.append | ||
176 | configure | ||
177 | tags | ||
178 | TAGS | ||
179 | cscope* | ||
diff --git a/Documentation/perf_counter/Documentation/perf-help.txt b/Documentation/perf_counter/Documentation/perf-help.txt new file mode 100644 index 000000000000..f85fed5a7edb --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-help.txt | |||
@@ -0,0 +1,38 @@ | |||
1 | perf-help(1) | ||
2 | =========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-help - display help information about perf | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | 'perf help' [-a|--all] [COMMAND] | ||
11 | |||
12 | DESCRIPTION | ||
13 | ----------- | ||
14 | |||
15 | With no options and no COMMAND given, the synopsis of the 'perf' | ||
16 | command and a list of the most commonly used perf commands are printed | ||
17 | on the standard output. | ||
18 | |||
19 | If the option '--all' or '-a' is given, then all available commands are | ||
20 | printed on the standard output. | ||
21 | |||
22 | If a perf command is named, a manual page for that command is brought | ||
23 | up. The 'man' program is used by default for this purpose, but this | ||
24 | can be overridden by other options or configuration variables. | ||
25 | |||
26 | Note that `perf --help ...` is identical to `perf help ...` because the | ||
27 | former is internally converted into the latter. | ||
28 | |||
29 | OPTIONS | ||
30 | ------- | ||
31 | -a:: | ||
32 | --all:: | ||
33 | Prints all the available commands on the standard output. This | ||
34 | option supersedes any other option. | ||
35 | |||
36 | PERF | ||
37 | ---- | ||
38 | Part of the linkperf:perf[1] suite | ||
diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt new file mode 100644 index 000000000000..d07700e35eb2 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-record.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | perf-record(1) | ||
2 | ========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-record - Run a command and record its profile into output.perf | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers a performance counter profile | ||
16 | from it, into output.perf - without displaying anything. | ||
17 | |||
18 | This file can then be inspected later on, using 'perf report'. | ||
19 | |||
20 | |||
21 | OPTIONS | ||
22 | ------- | ||
23 | <command>...:: | ||
24 | Any command you can specify in a shell. | ||
25 | |||
26 | -e:: | ||
27 | --event=:: | ||
28 | 0:0: cpu-cycles | ||
29 | 0:0: cycles | ||
30 | 0:1: instructions | ||
31 | 0:2: cache-references | ||
32 | 0:3: cache-misses | ||
33 | 0:4: branch-instructions | ||
34 | 0:4: branches | ||
35 | 0:5: branch-misses | ||
36 | 0:6: bus-cycles | ||
37 | 1:0: cpu-clock | ||
38 | 1:1: task-clock | ||
39 | 1:2: page-faults | ||
40 | 1:2: faults | ||
41 | 1:5: minor-faults | ||
42 | 1:6: major-faults | ||
43 | 1:3: context-switches | ||
44 | 1:3: cs | ||
45 | 1:4: cpu-migrations | ||
46 | 1:4: migrations | ||
47 | rNNN: raw PMU events (eventsel+umask) | ||
48 | |||
49 | -a:: | ||
50 | system-wide collection | ||
51 | |||
52 | -l:: | ||
53 | scale counter values | ||
54 | |||
55 | Configuration | ||
56 | ------------- | ||
57 | |||
58 | EXAMPLES | ||
59 | -------- | ||
60 | |||
61 | SEE ALSO | ||
62 | -------- | ||
63 | linkperf:git-stat[1] | ||
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt new file mode 100644 index 000000000000..7fcab271e570 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-stat.txt | |||
@@ -0,0 +1,76 @@ | |||
1 | perf-stat(1) | ||
2 | ========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-stat - Run a command and gather performance counter statistics | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers performance counter statistics | ||
16 | from it. | ||
17 | |||
18 | |||
19 | OPTIONS | ||
20 | ------- | ||
21 | <command>...:: | ||
22 | Any command you can specify in a shell. | ||
23 | |||
24 | -e:: | ||
25 | --event=:: | ||
26 | 0:0: cpu-cycles | ||
27 | 0:0: cycles | ||
28 | 0:1: instructions | ||
29 | 0:2: cache-references | ||
30 | 0:3: cache-misses | ||
31 | 0:4: branch-instructions | ||
32 | 0:4: branches | ||
33 | 0:5: branch-misses | ||
34 | 0:6: bus-cycles | ||
35 | 1:0: cpu-clock | ||
36 | 1:1: task-clock | ||
37 | 1:2: page-faults | ||
38 | 1:2: faults | ||
39 | 1:5: minor-faults | ||
40 | 1:6: major-faults | ||
41 | 1:3: context-switches | ||
42 | 1:3: cs | ||
43 | 1:4: cpu-migrations | ||
44 | 1:4: migrations | ||
45 | rNNN: raw PMU events (eventsel+umask) | ||
46 | |||
47 | -a:: | ||
48 | system-wide collection | ||
49 | |||
50 | -l:: | ||
51 | scale counter values | ||
52 | |||
53 | Configuration | ||
54 | ------------- | ||
55 | |||
56 | EXAMPLES | ||
57 | -------- | ||
58 | |||
59 | $ perf stat sleep 1 | ||
60 | |||
61 | Performance counter stats for 'sleep': | ||
62 | |||
63 | 0.678356 task clock ticks (msecs) | ||
64 | 7 context switches (events) | ||
65 | 4 CPU migrations (events) | ||
66 | 232 pagefaults (events) | ||
67 | 1810403 CPU cycles (events) | ||
68 | 946759 instructions (events) | ||
69 | 18952 cache references (events) | ||
70 | 4885 cache misses (events) | ||
71 | |||
72 | Wall-clock time elapsed: 1001.252894 msecs | ||
73 | |||
74 | SEE ALSO | ||
75 | -------- | ||
76 | linkperf:git-tops[1] | ||
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt new file mode 100644 index 000000000000..057333b72534 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-top.txt | |||
@@ -0,0 +1,61 @@ | |||
1 | perf-top(1) | ||
2 | ========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-top - Run a command and profile it | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers a performance counter profile | ||
16 | from it. | ||
17 | |||
18 | |||
19 | OPTIONS | ||
20 | ------- | ||
21 | <command>...:: | ||
22 | Any command you can specify in a shell. | ||
23 | |||
24 | -e:: | ||
25 | --event=:: | ||
26 | 0:0: cpu-cycles | ||
27 | 0:0: cycles | ||
28 | 0:1: instructions | ||
29 | 0:2: cache-references | ||
30 | 0:3: cache-misses | ||
31 | 0:4: branch-instructions | ||
32 | 0:4: branches | ||
33 | 0:5: branch-misses | ||
34 | 0:6: bus-cycles | ||
35 | 1:0: cpu-clock | ||
36 | 1:1: task-clock | ||
37 | 1:2: page-faults | ||
38 | 1:2: faults | ||
39 | 1:5: minor-faults | ||
40 | 1:6: major-faults | ||
41 | 1:3: context-switches | ||
42 | 1:3: cs | ||
43 | 1:4: cpu-migrations | ||
44 | 1:4: migrations | ||
45 | rNNN: raw PMU events (eventsel+umask) | ||
46 | |||
47 | -a:: | ||
48 | system-wide collection | ||
49 | |||
50 | -l:: | ||
51 | scale counter values | ||
52 | |||
53 | Configuration | ||
54 | ------------- | ||
55 | |||
56 | EXAMPLES | ||
57 | -------- | ||
58 | |||
59 | SEE ALSO | ||
60 | -------- | ||
61 | linkperf:git-stat[1] | ||
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile new file mode 100644 index 000000000000..543ccf28ac4a --- /dev/null +++ b/Documentation/perf_counter/Makefile | |||
@@ -0,0 +1,844 @@ | |||
1 | # The default target of this Makefile is... | ||
2 | all:: | ||
3 | |||
4 | # Define V=1 to have a more verbose compile. | ||
5 | # | ||
6 | # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() | ||
7 | # or vsnprintf() return -1 instead of number of characters which would | ||
8 | # have been written to the final string if enough space had been available. | ||
9 | # | ||
10 | # Define FREAD_READS_DIRECTORIES if your are on a system which succeeds | ||
11 | # when attempting to read from an fopen'ed directory. | ||
12 | # | ||
13 | # Define NO_OPENSSL environment variable if you do not have OpenSSL. | ||
14 | # This also implies MOZILLA_SHA1. | ||
15 | # | ||
16 | # Define CURLDIR=/foo/bar if your curl header and library files are in | ||
17 | # /foo/bar/include and /foo/bar/lib directories. | ||
18 | # | ||
19 | # Define EXPATDIR=/foo/bar if your expat header and library files are in | ||
20 | # /foo/bar/include and /foo/bar/lib directories. | ||
21 | # | ||
22 | # Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. | ||
23 | # | ||
24 | # Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks | ||
25 | # d_type in struct dirent (latest Cygwin -- will be fixed soonish). | ||
26 | # | ||
27 | # Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) | ||
28 | # do not support the 'size specifiers' introduced by C99, namely ll, hh, | ||
29 | # j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). | ||
30 | # some C compilers supported these specifiers prior to C99 as an extension. | ||
31 | # | ||
32 | # Define NO_STRCASESTR if you don't have strcasestr. | ||
33 | # | ||
34 | # Define NO_MEMMEM if you don't have memmem. | ||
35 | # | ||
36 | # Define NO_STRTOUMAX if you don't have strtoumax in the C library. | ||
37 | # If your compiler also does not support long long or does not have | ||
38 | # strtoull, define NO_STRTOULL. | ||
39 | # | ||
40 | # Define NO_SETENV if you don't have setenv in the C library. | ||
41 | # | ||
42 | # Define NO_UNSETENV if you don't have unsetenv in the C library. | ||
43 | # | ||
44 | # Define NO_MKDTEMP if you don't have mkdtemp in the C library. | ||
45 | # | ||
46 | # Define NO_SYS_SELECT_H if you don't have sys/select.h. | ||
47 | # | ||
48 | # Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link. | ||
49 | # Enable it on Windows. By default, symrefs are still used. | ||
50 | # | ||
51 | # Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability | ||
52 | # tests. These tests take up a significant amount of the total test time | ||
53 | # but are not needed unless you plan to talk to SVN repos. | ||
54 | # | ||
55 | # Define NO_FINK if you are building on Darwin/Mac OS X, have Fink | ||
56 | # installed in /sw, but don't want PERF to link against any libraries | ||
57 | # installed there. If defined you may specify your own (or Fink's) | ||
58 | # include directories and library directories by defining CFLAGS | ||
59 | # and LDFLAGS appropriately. | ||
60 | # | ||
61 | # Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, | ||
62 | # have DarwinPorts installed in /opt/local, but don't want PERF to | ||
63 | # link against any libraries installed there. If defined you may | ||
64 | # specify your own (or DarwinPort's) include directories and | ||
65 | # library directories by defining CFLAGS and LDFLAGS appropriately. | ||
66 | # | ||
67 | # Define PPC_SHA1 environment variable when running make to make use of | ||
68 | # a bundled SHA1 routine optimized for PowerPC. | ||
69 | # | ||
70 | # Define ARM_SHA1 environment variable when running make to make use of | ||
71 | # a bundled SHA1 routine optimized for ARM. | ||
72 | # | ||
73 | # Define MOZILLA_SHA1 environment variable when running make to make use of | ||
74 | # a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast | ||
75 | # on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default | ||
76 | # choice) has very fast version optimized for i586. | ||
77 | # | ||
78 | # Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). | ||
79 | # | ||
80 | # Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). | ||
81 | # | ||
82 | # Define NEEDS_SOCKET if linking with libc is not enough (SunOS, | ||
83 | # Patrick Mauritz). | ||
84 | # | ||
85 | # Define NO_MMAP if you want to avoid mmap. | ||
86 | # | ||
87 | # Define NO_PTHREADS if you do not have or do not want to use Pthreads. | ||
88 | # | ||
89 | # Define NO_PREAD if you have a problem with pread() system call (e.g. | ||
90 | # cygwin.dll before v1.5.22). | ||
91 | # | ||
92 | # Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is | ||
93 | # generally faster on your platform than accessing the working directory. | ||
94 | # | ||
95 | # Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support | ||
96 | # the executable mode bit, but doesn't really do so. | ||
97 | # | ||
98 | # Define NO_IPV6 if you lack IPv6 support and getaddrinfo(). | ||
99 | # | ||
100 | # Define NO_SOCKADDR_STORAGE if your platform does not have struct | ||
101 | # sockaddr_storage. | ||
102 | # | ||
103 | # Define NO_ICONV if your libc does not properly support iconv. | ||
104 | # | ||
105 | # Define OLD_ICONV if your library has an old iconv(), where the second | ||
106 | # (input buffer pointer) parameter is declared with type (const char **). | ||
107 | # | ||
108 | # Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. | ||
109 | # | ||
110 | # Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" | ||
111 | # that tells runtime paths to dynamic libraries; | ||
112 | # "-Wl,-rpath=/path/lib" is used instead. | ||
113 | # | ||
114 | # Define USE_NSEC below if you want perf to care about sub-second file mtimes | ||
115 | # and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and | ||
116 | # it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely | ||
117 | # randomly break unless your underlying filesystem supports those sub-second | ||
118 | # times (my ext3 doesn't). | ||
119 | # | ||
120 | # Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of | ||
121 | # "st_ctim" | ||
122 | # | ||
123 | # Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" | ||
124 | # available. This automatically turns USE_NSEC off. | ||
125 | # | ||
126 | # Define USE_STDEV below if you want perf to care about the underlying device | ||
127 | # change being considered an inode change from the update-index perspective. | ||
128 | # | ||
129 | # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks | ||
130 | # field that counts the on-disk footprint in 512-byte blocks. | ||
131 | # | ||
132 | # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 | ||
133 | # | ||
134 | # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. | ||
135 | # | ||
136 | # Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's | ||
137 | # MakeMaker (e.g. using ActiveState under Cygwin). | ||
138 | # | ||
139 | # Define NO_PERL if you do not want Perl scripts or libraries at all. | ||
140 | # | ||
141 | # Define INTERNAL_QSORT to use Git's implementation of qsort(), which | ||
142 | # is a simplified version of the merge sort used in glibc. This is | ||
143 | # recommended if Git triggers O(n^2) behavior in your platform's qsort(). | ||
144 | # | ||
145 | # Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call | ||
146 | # your external grep (e.g., if your system lacks grep, if its grep is | ||
147 | # broken, or spawning external process is slower than built-in grep perf has). | ||
148 | |||
149 | PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | ||
150 | @$(SHELL_PATH) util/PERF-VERSION-GEN | ||
151 | -include PERF-VERSION-FILE | ||
152 | |||
153 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') | ||
154 | uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') | ||
155 | uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not') | ||
156 | uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') | ||
157 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') | ||
158 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') | ||
159 | |||
160 | # CFLAGS and LDFLAGS are for the users to override from the command line. | ||
161 | |||
162 | CFLAGS = -g -O2 -Wall | ||
163 | LDFLAGS = -lpthread -lrt | ||
164 | ALL_CFLAGS = $(CFLAGS) | ||
165 | ALL_LDFLAGS = $(LDFLAGS) | ||
166 | STRIP ?= strip | ||
167 | |||
168 | # Among the variables below, these: | ||
169 | # perfexecdir | ||
170 | # template_dir | ||
171 | # mandir | ||
172 | # infodir | ||
173 | # htmldir | ||
174 | # ETC_PERFCONFIG (but not sysconfdir) | ||
175 | # can be specified as a relative path some/where/else; | ||
176 | # this is interpreted as relative to $(prefix) and "perf" at | ||
177 | # runtime figures out where they are based on the path to the executable. | ||
178 | # This can help installing the suite in a relocatable way. | ||
179 | |||
180 | prefix = $(HOME) | ||
181 | bindir_relative = bin | ||
182 | bindir = $(prefix)/$(bindir_relative) | ||
183 | mandir = share/man | ||
184 | infodir = share/info | ||
185 | perfexecdir = libexec/perf-core | ||
186 | sharedir = $(prefix)/share | ||
187 | template_dir = share/perf-core/templates | ||
188 | htmldir = share/doc/perf-doc | ||
189 | ifeq ($(prefix),/usr) | ||
190 | sysconfdir = /etc | ||
191 | ETC_PERFCONFIG = $(sysconfdir)/perfconfig | ||
192 | else | ||
193 | sysconfdir = $(prefix)/etc | ||
194 | ETC_PERFCONFIG = etc/perfconfig | ||
195 | endif | ||
196 | lib = lib | ||
197 | # DESTDIR= | ||
198 | |||
199 | export prefix bindir sharedir sysconfdir | ||
200 | |||
201 | CC = gcc | ||
202 | AR = ar | ||
203 | RM = rm -f | ||
204 | TAR = tar | ||
205 | FIND = find | ||
206 | INSTALL = install | ||
207 | RPMBUILD = rpmbuild | ||
208 | PTHREAD_LIBS = -lpthread | ||
209 | |||
210 | # sparse is architecture-neutral, which means that we need to tell it | ||
211 | # explicitly what architecture to check for. Fix this up for yours.. | ||
212 | SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ | ||
213 | |||
214 | |||
215 | |||
216 | ### --- END CONFIGURATION SECTION --- | ||
217 | |||
218 | # Those must not be GNU-specific; they are shared with perl/ which may | ||
219 | # be built by a different compiler. (Note that this is an artifact now | ||
220 | # but it still might be nice to keep that distinction.) | ||
221 | BASIC_CFLAGS = | ||
222 | BASIC_LDFLAGS = | ||
223 | |||
224 | # Guard against environment variables | ||
225 | BUILTIN_OBJS = | ||
226 | BUILT_INS = | ||
227 | COMPAT_CFLAGS = | ||
228 | COMPAT_OBJS = | ||
229 | LIB_H = | ||
230 | LIB_OBJS = | ||
231 | PROGRAMS = | ||
232 | SCRIPT_PERL = | ||
233 | SCRIPT_SH = | ||
234 | TEST_PROGRAMS = | ||
235 | |||
236 | # | ||
237 | # No scripts right now: | ||
238 | # | ||
239 | |||
240 | # SCRIPT_SH += perf-am.sh | ||
241 | |||
242 | # | ||
243 | # No Perl scripts right now: | ||
244 | # | ||
245 | |||
246 | # SCRIPT_PERL += perf-add--interactive.perl | ||
247 | |||
248 | SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ | ||
249 | $(patsubst %.perl,%,$(SCRIPT_PERL)) | ||
250 | |||
251 | # Empty... | ||
252 | EXTRA_PROGRAMS = | ||
253 | |||
254 | # ... and all the rest that could be moved out of bindir to perfexecdir | ||
255 | PROGRAMS += $(EXTRA_PROGRAMS) | ||
256 | |||
257 | # | ||
258 | # Single 'perf' binary right now: | ||
259 | # | ||
260 | PROGRAMS += perf | ||
261 | |||
262 | # List built-in command $C whose implementation cmd_$C() is not in | ||
263 | # builtin-$C.o but is linked in as part of some other command. | ||
264 | BUILT_INS += $(patsubst builtin-%.o,perf-%$X,$(BUILTIN_OBJS)) | ||
265 | |||
266 | # | ||
267 | # None right now: | ||
268 | # | ||
269 | # BUILT_INS += perf-init $X | ||
270 | |||
271 | # what 'all' will build and 'install' will install, in perfexecdir | ||
272 | ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) | ||
273 | |||
274 | # what 'all' will build but not install in perfexecdir | ||
275 | OTHER_PROGRAMS = perf$X | ||
276 | |||
277 | # Set paths to tools early so that they can be used for version tests. | ||
278 | ifndef SHELL_PATH | ||
279 | SHELL_PATH = /bin/sh | ||
280 | endif | ||
281 | ifndef PERL_PATH | ||
282 | PERL_PATH = /usr/bin/perl | ||
283 | endif | ||
284 | |||
285 | export PERL_PATH | ||
286 | |||
287 | LIB_FILE=libperf.a | ||
288 | |||
289 | LIB_H += ../../include/linux/perf_counter.h | ||
290 | LIB_H += util/levenshtein.h | ||
291 | LIB_H += util/parse-options.h | ||
292 | LIB_H += util/quote.h | ||
293 | LIB_H += util/util.h | ||
294 | LIB_H += util/help.h | ||
295 | LIB_H += util/strbuf.h | ||
296 | LIB_H += util/run-command.h | ||
297 | |||
298 | LIB_OBJS += util/abspath.o | ||
299 | LIB_OBJS += util/alias.o | ||
300 | LIB_OBJS += util/config.o | ||
301 | LIB_OBJS += util/ctype.o | ||
302 | LIB_OBJS += util/exec_cmd.o | ||
303 | LIB_OBJS += util/help.o | ||
304 | LIB_OBJS += util/levenshtein.o | ||
305 | LIB_OBJS += util/parse-options.o | ||
306 | LIB_OBJS += util/path.o | ||
307 | LIB_OBJS += util/run-command.o | ||
308 | LIB_OBJS += util/quote.o | ||
309 | LIB_OBJS += util/strbuf.o | ||
310 | LIB_OBJS += util/usage.o | ||
311 | LIB_OBJS += util/wrapper.o | ||
312 | |||
313 | BUILTIN_OBJS += builtin-help.o | ||
314 | BUILTIN_OBJS += builtin-record.o | ||
315 | BUILTIN_OBJS += builtin-stat.o | ||
316 | BUILTIN_OBJS += builtin-top.o | ||
317 | |||
318 | PERFLIBS = $(LIB_FILE) | ||
319 | EXTLIBS = | ||
320 | |||
321 | # | ||
322 | # Platform specific tweaks | ||
323 | # | ||
324 | |||
325 | # We choose to avoid "if .. else if .. else .. endif endif" | ||
326 | # because maintaining the nesting to match is a pain. If | ||
327 | # we had "elif" things would have been much nicer... | ||
328 | |||
329 | -include config.mak.autogen | ||
330 | -include config.mak | ||
331 | |||
332 | ifeq ($(uname_S),Darwin) | ||
333 | ifndef NO_FINK | ||
334 | ifeq ($(shell test -d /sw/lib && echo y),y) | ||
335 | BASIC_CFLAGS += -I/sw/include | ||
336 | BASIC_LDFLAGS += -L/sw/lib | ||
337 | endif | ||
338 | endif | ||
339 | ifndef NO_DARWIN_PORTS | ||
340 | ifeq ($(shell test -d /opt/local/lib && echo y),y) | ||
341 | BASIC_CFLAGS += -I/opt/local/include | ||
342 | BASIC_LDFLAGS += -L/opt/local/lib | ||
343 | endif | ||
344 | endif | ||
345 | PTHREAD_LIBS = | ||
346 | endif | ||
347 | |||
348 | ifndef CC_LD_DYNPATH | ||
349 | ifdef NO_R_TO_GCC_LINKER | ||
350 | # Some gcc does not accept and pass -R to the linker to specify | ||
351 | # the runtime dynamic library path. | ||
352 | CC_LD_DYNPATH = -Wl,-rpath, | ||
353 | else | ||
354 | CC_LD_DYNPATH = -R | ||
355 | endif | ||
356 | endif | ||
357 | |||
358 | ifdef ZLIB_PATH | ||
359 | BASIC_CFLAGS += -I$(ZLIB_PATH)/include | ||
360 | EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) | ||
361 | endif | ||
362 | EXTLIBS += -lz | ||
363 | |||
364 | ifdef NEEDS_SOCKET | ||
365 | EXTLIBS += -lsocket | ||
366 | endif | ||
367 | ifdef NEEDS_NSL | ||
368 | EXTLIBS += -lnsl | ||
369 | endif | ||
370 | ifdef NO_D_TYPE_IN_DIRENT | ||
371 | BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT | ||
372 | endif | ||
373 | ifdef NO_D_INO_IN_DIRENT | ||
374 | BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT | ||
375 | endif | ||
376 | ifdef NO_ST_BLOCKS_IN_STRUCT_STAT | ||
377 | BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT | ||
378 | endif | ||
379 | ifdef USE_NSEC | ||
380 | BASIC_CFLAGS += -DUSE_NSEC | ||
381 | endif | ||
382 | ifdef USE_ST_TIMESPEC | ||
383 | BASIC_CFLAGS += -DUSE_ST_TIMESPEC | ||
384 | endif | ||
385 | ifdef NO_NSEC | ||
386 | BASIC_CFLAGS += -DNO_NSEC | ||
387 | endif | ||
388 | ifdef NO_C99_FORMAT | ||
389 | BASIC_CFLAGS += -DNO_C99_FORMAT | ||
390 | endif | ||
391 | ifdef SNPRINTF_RETURNS_BOGUS | ||
392 | COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS | ||
393 | COMPAT_OBJS += compat/snprintf.o | ||
394 | endif | ||
395 | ifdef FREAD_READS_DIRECTORIES | ||
396 | COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES | ||
397 | COMPAT_OBJS += compat/fopen.o | ||
398 | endif | ||
399 | ifdef NO_SYMLINK_HEAD | ||
400 | BASIC_CFLAGS += -DNO_SYMLINK_HEAD | ||
401 | endif | ||
402 | ifdef NO_STRCASESTR | ||
403 | COMPAT_CFLAGS += -DNO_STRCASESTR | ||
404 | COMPAT_OBJS += compat/strcasestr.o | ||
405 | endif | ||
406 | ifdef NO_STRTOUMAX | ||
407 | COMPAT_CFLAGS += -DNO_STRTOUMAX | ||
408 | COMPAT_OBJS += compat/strtoumax.o | ||
409 | endif | ||
410 | ifdef NO_STRTOULL | ||
411 | COMPAT_CFLAGS += -DNO_STRTOULL | ||
412 | endif | ||
413 | ifdef NO_SETENV | ||
414 | COMPAT_CFLAGS += -DNO_SETENV | ||
415 | COMPAT_OBJS += compat/setenv.o | ||
416 | endif | ||
417 | ifdef NO_MKDTEMP | ||
418 | COMPAT_CFLAGS += -DNO_MKDTEMP | ||
419 | COMPAT_OBJS += compat/mkdtemp.o | ||
420 | endif | ||
421 | ifdef NO_UNSETENV | ||
422 | COMPAT_CFLAGS += -DNO_UNSETENV | ||
423 | COMPAT_OBJS += compat/unsetenv.o | ||
424 | endif | ||
425 | ifdef NO_SYS_SELECT_H | ||
426 | BASIC_CFLAGS += -DNO_SYS_SELECT_H | ||
427 | endif | ||
428 | ifdef NO_MMAP | ||
429 | COMPAT_CFLAGS += -DNO_MMAP | ||
430 | COMPAT_OBJS += compat/mmap.o | ||
431 | else | ||
432 | ifdef USE_WIN32_MMAP | ||
433 | COMPAT_CFLAGS += -DUSE_WIN32_MMAP | ||
434 | COMPAT_OBJS += compat/win32mmap.o | ||
435 | endif | ||
436 | endif | ||
437 | ifdef NO_PREAD | ||
438 | COMPAT_CFLAGS += -DNO_PREAD | ||
439 | COMPAT_OBJS += compat/pread.o | ||
440 | endif | ||
441 | ifdef NO_FAST_WORKING_DIRECTORY | ||
442 | BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY | ||
443 | endif | ||
444 | ifdef NO_TRUSTABLE_FILEMODE | ||
445 | BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE | ||
446 | endif | ||
447 | ifdef NO_IPV6 | ||
448 | BASIC_CFLAGS += -DNO_IPV6 | ||
449 | endif | ||
450 | ifdef NO_UINTMAX_T | ||
451 | BASIC_CFLAGS += -Duintmax_t=uint32_t | ||
452 | endif | ||
453 | ifdef NO_SOCKADDR_STORAGE | ||
454 | ifdef NO_IPV6 | ||
455 | BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in | ||
456 | else | ||
457 | BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6 | ||
458 | endif | ||
459 | endif | ||
460 | ifdef NO_INET_NTOP | ||
461 | LIB_OBJS += compat/inet_ntop.o | ||
462 | endif | ||
463 | ifdef NO_INET_PTON | ||
464 | LIB_OBJS += compat/inet_pton.o | ||
465 | endif | ||
466 | |||
467 | ifdef NO_ICONV | ||
468 | BASIC_CFLAGS += -DNO_ICONV | ||
469 | endif | ||
470 | |||
471 | ifdef OLD_ICONV | ||
472 | BASIC_CFLAGS += -DOLD_ICONV | ||
473 | endif | ||
474 | |||
475 | ifdef NO_DEFLATE_BOUND | ||
476 | BASIC_CFLAGS += -DNO_DEFLATE_BOUND | ||
477 | endif | ||
478 | |||
479 | ifdef PPC_SHA1 | ||
480 | SHA1_HEADER = "ppc/sha1.h" | ||
481 | LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o | ||
482 | else | ||
483 | ifdef ARM_SHA1 | ||
484 | SHA1_HEADER = "arm/sha1.h" | ||
485 | LIB_OBJS += arm/sha1.o arm/sha1_arm.o | ||
486 | else | ||
487 | ifdef MOZILLA_SHA1 | ||
488 | SHA1_HEADER = "mozilla-sha1/sha1.h" | ||
489 | LIB_OBJS += mozilla-sha1/sha1.o | ||
490 | else | ||
491 | SHA1_HEADER = <openssl/sha.h> | ||
492 | EXTLIBS += $(LIB_4_CRYPTO) | ||
493 | endif | ||
494 | endif | ||
495 | endif | ||
496 | ifdef NO_PERL_MAKEMAKER | ||
497 | export NO_PERL_MAKEMAKER | ||
498 | endif | ||
499 | ifdef NO_HSTRERROR | ||
500 | COMPAT_CFLAGS += -DNO_HSTRERROR | ||
501 | COMPAT_OBJS += compat/hstrerror.o | ||
502 | endif | ||
503 | ifdef NO_MEMMEM | ||
504 | COMPAT_CFLAGS += -DNO_MEMMEM | ||
505 | COMPAT_OBJS += compat/memmem.o | ||
506 | endif | ||
507 | ifdef INTERNAL_QSORT | ||
508 | COMPAT_CFLAGS += -DINTERNAL_QSORT | ||
509 | COMPAT_OBJS += compat/qsort.o | ||
510 | endif | ||
511 | ifdef RUNTIME_PREFIX | ||
512 | COMPAT_CFLAGS += -DRUNTIME_PREFIX | ||
513 | endif | ||
514 | |||
515 | ifdef DIR_HAS_BSD_GROUP_SEMANTICS | ||
516 | COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS | ||
517 | endif | ||
518 | ifdef NO_EXTERNAL_GREP | ||
519 | BASIC_CFLAGS += -DNO_EXTERNAL_GREP | ||
520 | endif | ||
521 | |||
522 | ifeq ($(PERL_PATH),) | ||
523 | NO_PERL=NoThanks | ||
524 | endif | ||
525 | |||
526 | QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir | ||
527 | QUIET_SUBDIR1 = | ||
528 | |||
529 | ifneq ($(findstring $(MAKEFLAGS),w),w) | ||
530 | PRINT_DIR = --no-print-directory | ||
531 | else # "make -w" | ||
532 | NO_SUBDIR = : | ||
533 | endif | ||
534 | |||
535 | ifneq ($(findstring $(MAKEFLAGS),s),s) | ||
536 | ifndef V | ||
537 | QUIET_CC = @echo ' ' CC $@; | ||
538 | QUIET_AR = @echo ' ' AR $@; | ||
539 | QUIET_LINK = @echo ' ' LINK $@; | ||
540 | QUIET_BUILT_IN = @echo ' ' BUILTIN $@; | ||
541 | QUIET_GEN = @echo ' ' GEN $@; | ||
542 | QUIET_SUBDIR0 = +@subdir= | ||
543 | QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ | ||
544 | $(MAKE) $(PRINT_DIR) -C $$subdir | ||
545 | export V | ||
546 | export QUIET_GEN | ||
547 | export QUIET_BUILT_IN | ||
548 | endif | ||
549 | endif | ||
550 | |||
551 | ifdef ASCIIDOC8 | ||
552 | export ASCIIDOC8 | ||
553 | endif | ||
554 | |||
555 | # Shell quote (do not use $(call) to accommodate ancient setups); | ||
556 | |||
557 | SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) | ||
558 | ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) | ||
559 | |||
560 | DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) | ||
561 | bindir_SQ = $(subst ','\'',$(bindir)) | ||
562 | bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) | ||
563 | mandir_SQ = $(subst ','\'',$(mandir)) | ||
564 | infodir_SQ = $(subst ','\'',$(infodir)) | ||
565 | perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) | ||
566 | template_dir_SQ = $(subst ','\'',$(template_dir)) | ||
567 | htmldir_SQ = $(subst ','\'',$(htmldir)) | ||
568 | prefix_SQ = $(subst ','\'',$(prefix)) | ||
569 | |||
570 | SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) | ||
571 | PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) | ||
572 | |||
573 | LIBS = $(PERFLIBS) $(EXTLIBS) | ||
574 | |||
575 | BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ | ||
576 | $(COMPAT_CFLAGS) | ||
577 | LIB_OBJS += $(COMPAT_OBJS) | ||
578 | |||
579 | ALL_CFLAGS += $(BASIC_CFLAGS) | ||
580 | ALL_LDFLAGS += $(BASIC_LDFLAGS) | ||
581 | |||
582 | export TAR INSTALL DESTDIR SHELL_PATH | ||
583 | |||
584 | |||
585 | ### Build rules | ||
586 | |||
587 | SHELL = $(SHELL_PATH) | ||
588 | |||
589 | all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS | ||
590 | ifneq (,$X) | ||
591 | $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) | ||
592 | endif | ||
593 | |||
594 | all:: | ||
595 | |||
596 | please_set_SHELL_PATH_to_a_more_modern_shell: | ||
597 | @$$(:) | ||
598 | |||
599 | shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell | ||
600 | |||
601 | strip: $(PROGRAMS) perf$X | ||
602 | $(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X | ||
603 | |||
604 | perf.o: perf.c common-cmds.h PERF-CFLAGS | ||
605 | $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ | ||
606 | '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ | ||
607 | $(ALL_CFLAGS) -c $(filter %.c,$^) | ||
608 | |||
609 | perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS) | ||
610 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \ | ||
611 | $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) | ||
612 | |||
613 | builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS | ||
614 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ | ||
615 | '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ | ||
616 | '-DPERF_MAN_PATH="$(mandir_SQ)"' \ | ||
617 | '-DPERF_INFO_PATH="$(infodir_SQ)"' $< | ||
618 | |||
619 | $(BUILT_INS): perf$X | ||
620 | $(QUIET_BUILT_IN)$(RM) $@ && \ | ||
621 | ln perf$X $@ 2>/dev/null || \ | ||
622 | ln -s perf$X $@ 2>/dev/null || \ | ||
623 | cp perf$X $@ | ||
624 | |||
625 | common-cmds.h: util/generate-cmdlist.sh command-list.txt | ||
626 | |||
627 | common-cmds.h: $(wildcard Documentation/perf-*.txt) | ||
628 | $(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@ | ||
629 | |||
630 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh | ||
631 | $(QUIET_GEN)$(RM) $@ $@+ && \ | ||
632 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ | ||
633 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ | ||
634 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ | ||
635 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | ||
636 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ | ||
637 | $@.sh >$@+ && \ | ||
638 | chmod +x $@+ && \ | ||
639 | mv $@+ $@ | ||
640 | |||
641 | configure: configure.ac | ||
642 | $(QUIET_GEN)$(RM) $@ $<+ && \ | ||
643 | sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | ||
644 | $< > $<+ && \ | ||
645 | autoconf -o $@ $<+ && \ | ||
646 | $(RM) $<+ | ||
647 | |||
648 | # These can record PERF_VERSION | ||
649 | perf.o perf.spec \ | ||
650 | $(patsubst %.sh,%,$(SCRIPT_SH)) \ | ||
651 | $(patsubst %.perl,%,$(SCRIPT_PERL)) \ | ||
652 | : PERF-VERSION-FILE | ||
653 | |||
654 | %.o: %.c PERF-CFLAGS | ||
655 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< | ||
656 | %.s: %.c PERF-CFLAGS | ||
657 | $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< | ||
658 | %.o: %.S | ||
659 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< | ||
660 | |||
661 | util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS | ||
662 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ | ||
663 | '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ | ||
664 | '-DBINDIR="$(bindir_relative_SQ)"' \ | ||
665 | '-DPREFIX="$(prefix_SQ)"' \ | ||
666 | $< | ||
667 | |||
668 | builtin-init-db.o: builtin-init-db.c PERF-CFLAGS | ||
669 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< | ||
670 | |||
671 | util/config.o: util/config.c PERF-CFLAGS | ||
672 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< | ||
673 | |||
674 | perf-%$X: %.o $(PERFLIBS) | ||
675 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) | ||
676 | |||
677 | $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) | ||
678 | $(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) | ||
679 | builtin-revert.o wt-status.o: wt-status.h | ||
680 | |||
681 | $(LIB_FILE): $(LIB_OBJS) | ||
682 | $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) | ||
683 | |||
684 | TAGS: | ||
685 | $(RM) TAGS | ||
686 | $(FIND) . -name '*.[hcS]' -print | xargs etags -a | ||
687 | |||
688 | tags: | ||
689 | $(RM) tags | ||
690 | $(FIND) . -name '*.[hcS]' -print | xargs ctags -a | ||
691 | |||
692 | cscope: | ||
693 | $(RM) cscope* | ||
694 | $(FIND) . -name '*.[hcS]' -print | xargs cscope -b | ||
695 | |||
696 | ### Detect prefix changes | ||
697 | TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ | ||
698 | $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) | ||
699 | |||
700 | PERF-CFLAGS: .FORCE-PERF-CFLAGS | ||
701 | @FLAGS='$(TRACK_CFLAGS)'; \ | ||
702 | if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \ | ||
703 | echo 1>&2 " * new build flags or prefix"; \ | ||
704 | echo "$$FLAGS" >PERF-CFLAGS; \ | ||
705 | fi | ||
706 | |||
707 | # We need to apply sq twice, once to protect from the shell | ||
708 | # that runs PERF-BUILD-OPTIONS, and then again to protect it | ||
709 | # and the first level quoting from the shell that runs "echo". | ||
710 | PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS | ||
711 | @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ | ||
712 | @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ | ||
713 | @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ | ||
714 | @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ | ||
715 | |||
716 | ### Testing rules | ||
717 | |||
718 | # | ||
719 | # None right now: | ||
720 | # | ||
721 | # TEST_PROGRAMS += test-something$X | ||
722 | |||
723 | all:: $(TEST_PROGRAMS) | ||
724 | |||
725 | # GNU make supports exporting all variables by "export" without parameters. | ||
726 | # However, the environment gets quite big, and some programs have problems | ||
727 | # with that. | ||
728 | |||
729 | export NO_SVN_TESTS | ||
730 | |||
731 | check: common-cmds.h | ||
732 | if sparse; \ | ||
733 | then \ | ||
734 | for i in *.c */*.c; \ | ||
735 | do \ | ||
736 | sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ | ||
737 | done; \ | ||
738 | else \ | ||
739 | echo 2>&1 "Did you mean 'make test'?"; \ | ||
740 | exit 1; \ | ||
741 | fi | ||
742 | |||
743 | remove-dashes: | ||
744 | ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS) | ||
745 | |||
746 | ### Installation rules | ||
747 | |||
748 | ifneq ($(filter /%,$(firstword $(template_dir))),) | ||
749 | template_instdir = $(template_dir) | ||
750 | else | ||
751 | template_instdir = $(prefix)/$(template_dir) | ||
752 | endif | ||
753 | export template_instdir | ||
754 | |||
755 | ifneq ($(filter /%,$(firstword $(perfexecdir))),) | ||
756 | perfexec_instdir = $(perfexecdir) | ||
757 | else | ||
758 | perfexec_instdir = $(prefix)/$(perfexecdir) | ||
759 | endif | ||
760 | perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) | ||
761 | export perfexec_instdir | ||
762 | |||
763 | install: all | ||
764 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' | ||
765 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | ||
766 | $(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | ||
767 | ifneq (,$X) | ||
768 | $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) | ||
769 | endif | ||
770 | |||
771 | ### Maintainer's dist rules | ||
772 | |||
773 | perf.spec: perf.spec.in | ||
774 | sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+ | ||
775 | mv $@+ $@ | ||
776 | |||
777 | PERF_TARNAME=perf-$(PERF_VERSION) | ||
778 | dist: perf.spec perf-archive$(X) configure | ||
779 | ./perf-archive --format=tar \ | ||
780 | --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar | ||
781 | @mkdir -p $(PERF_TARNAME) | ||
782 | @cp perf.spec configure $(PERF_TARNAME) | ||
783 | @echo $(PERF_VERSION) > $(PERF_TARNAME)/version | ||
784 | $(TAR) rf $(PERF_TARNAME).tar \ | ||
785 | $(PERF_TARNAME)/perf.spec \ | ||
786 | $(PERF_TARNAME)/configure \ | ||
787 | $(PERF_TARNAME)/version | ||
788 | @$(RM) -r $(PERF_TARNAME) | ||
789 | gzip -f -9 $(PERF_TARNAME).tar | ||
790 | |||
791 | rpm: dist | ||
792 | $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz | ||
793 | |||
794 | ### Cleaning rules | ||
795 | |||
796 | distclean: clean | ||
797 | $(RM) configure | ||
798 | |||
799 | clean: | ||
800 | $(RM) *.o */*.o $(LIB_FILE) | ||
801 | $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X | ||
802 | $(RM) $(TEST_PROGRAMS) | ||
803 | $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* | ||
804 | $(RM) -r autom4te.cache | ||
805 | $(RM) config.log config.mak.autogen config.mak.append config.status config.cache | ||
806 | $(RM) -r $(PERF_TARNAME) .doc-tmp-dir | ||
807 | $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz | ||
808 | $(RM) $(htmldocs).tar.gz $(manpages).tar.gz | ||
809 | $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS | ||
810 | |||
811 | .PHONY: all install clean strip | ||
812 | .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell | ||
813 | .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS | ||
814 | .PHONY: .FORCE-PERF-BUILD-OPTIONS | ||
815 | |||
816 | ### Make sure built-ins do not have dups and listed in perf.c | ||
817 | # | ||
818 | check-builtins:: | ||
819 | ./check-builtins.sh | ||
820 | |||
821 | ### Test suite coverage testing | ||
822 | # | ||
823 | .PHONY: coverage coverage-clean coverage-build coverage-report | ||
824 | |||
825 | coverage: | ||
826 | $(MAKE) coverage-build | ||
827 | $(MAKE) coverage-report | ||
828 | |||
829 | coverage-clean: | ||
830 | rm -f *.gcda *.gcno | ||
831 | |||
832 | COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs | ||
833 | COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov | ||
834 | |||
835 | coverage-build: coverage-clean | ||
836 | $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all | ||
837 | $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \ | ||
838 | -j1 test | ||
839 | |||
840 | coverage-report: | ||
841 | gcov -b *.c */*.c | ||
842 | grep '^function.*called 0 ' *.c.gcov */*.c.gcov \ | ||
843 | | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ | ||
844 | | tee coverage-untested-functions | ||
diff --git a/Documentation/perf_counter/PERF-BUILD-OPTIONS b/Documentation/perf_counter/PERF-BUILD-OPTIONS new file mode 100644 index 000000000000..46d8d6ceb2f4 --- /dev/null +++ b/Documentation/perf_counter/PERF-BUILD-OPTIONS | |||
@@ -0,0 +1,4 @@ | |||
1 | SHELL_PATH='/bin/sh' | ||
2 | TAR='tar' | ||
3 | NO_CURL='' | ||
4 | NO_PERL='' | ||
diff --git a/Documentation/perf_counter/PERF-CFLAGS b/Documentation/perf_counter/PERF-CFLAGS new file mode 100644 index 000000000000..f24906ca688d --- /dev/null +++ b/Documentation/perf_counter/PERF-CFLAGS | |||
@@ -0,0 +1 @@ | |||
-g -O2 -Wall -DSHA1_HEADER='<openssl/sha.h>' : /home/mingo/bin:libexec/perf-core:share/perf-core/templates:/home/mingo | |||
diff --git a/Documentation/perf_counter/PERF-VERSION-FILE b/Documentation/perf_counter/PERF-VERSION-FILE new file mode 100644 index 000000000000..328e244c0c81 --- /dev/null +++ b/Documentation/perf_counter/PERF-VERSION-FILE | |||
@@ -0,0 +1 @@ | |||
PERF_VERSION = 0.0.1.PERF | |||
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c new file mode 100644 index 000000000000..6616de0ef053 --- /dev/null +++ b/Documentation/perf_counter/builtin-help.c | |||
@@ -0,0 +1,461 @@ | |||
1 | /* | ||
2 | * builtin-help.c | ||
3 | * | ||
4 | * Builtin help command | ||
5 | */ | ||
6 | #include "util/cache.h" | ||
7 | #include "builtin.h" | ||
8 | #include "util/exec_cmd.h" | ||
9 | #include "common-cmds.h" | ||
10 | #include "util/parse-options.h" | ||
11 | #include "util/run-command.h" | ||
12 | #include "util/help.h" | ||
13 | |||
14 | static struct man_viewer_list { | ||
15 | struct man_viewer_list *next; | ||
16 | char name[FLEX_ARRAY]; | ||
17 | } *man_viewer_list; | ||
18 | |||
19 | static struct man_viewer_info_list { | ||
20 | struct man_viewer_info_list *next; | ||
21 | const char *info; | ||
22 | char name[FLEX_ARRAY]; | ||
23 | } *man_viewer_info_list; | ||
24 | |||
25 | enum help_format { | ||
26 | HELP_FORMAT_MAN, | ||
27 | HELP_FORMAT_INFO, | ||
28 | HELP_FORMAT_WEB, | ||
29 | }; | ||
30 | |||
31 | static int show_all = 0; | ||
32 | static enum help_format help_format = HELP_FORMAT_MAN; | ||
33 | static struct option builtin_help_options[] = { | ||
34 | OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), | ||
35 | OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), | ||
36 | OPT_SET_INT('w', "web", &help_format, "show manual in web browser", | ||
37 | HELP_FORMAT_WEB), | ||
38 | OPT_SET_INT('i', "info", &help_format, "show info page", | ||
39 | HELP_FORMAT_INFO), | ||
40 | OPT_END(), | ||
41 | }; | ||
42 | |||
43 | static const char * const builtin_help_usage[] = { | ||
44 | "perf help [--all] [--man|--web|--info] [command]", | ||
45 | NULL | ||
46 | }; | ||
47 | |||
48 | static enum help_format parse_help_format(const char *format) | ||
49 | { | ||
50 | if (!strcmp(format, "man")) | ||
51 | return HELP_FORMAT_MAN; | ||
52 | if (!strcmp(format, "info")) | ||
53 | return HELP_FORMAT_INFO; | ||
54 | if (!strcmp(format, "web") || !strcmp(format, "html")) | ||
55 | return HELP_FORMAT_WEB; | ||
56 | die("unrecognized help format '%s'", format); | ||
57 | } | ||
58 | |||
59 | static const char *get_man_viewer_info(const char *name) | ||
60 | { | ||
61 | struct man_viewer_info_list *viewer; | ||
62 | |||
63 | for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) | ||
64 | { | ||
65 | if (!strcasecmp(name, viewer->name)) | ||
66 | return viewer->info; | ||
67 | } | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static int check_emacsclient_version(void) | ||
72 | { | ||
73 | struct strbuf buffer = STRBUF_INIT; | ||
74 | struct child_process ec_process; | ||
75 | const char *argv_ec[] = { "emacsclient", "--version", NULL }; | ||
76 | int version; | ||
77 | |||
78 | /* emacsclient prints its version number on stderr */ | ||
79 | memset(&ec_process, 0, sizeof(ec_process)); | ||
80 | ec_process.argv = argv_ec; | ||
81 | ec_process.err = -1; | ||
82 | ec_process.stdout_to_stderr = 1; | ||
83 | if (start_command(&ec_process)) { | ||
84 | fprintf(stderr, "Failed to start emacsclient.\n"); | ||
85 | return -1; | ||
86 | } | ||
87 | strbuf_read(&buffer, ec_process.err, 20); | ||
88 | close(ec_process.err); | ||
89 | |||
90 | /* | ||
91 | * Don't bother checking return value, because "emacsclient --version" | ||
92 | * seems to always exits with code 1. | ||
93 | */ | ||
94 | finish_command(&ec_process); | ||
95 | |||
96 | if (prefixcmp(buffer.buf, "emacsclient")) { | ||
97 | fprintf(stderr, "Failed to parse emacsclient version.\n"); | ||
98 | strbuf_release(&buffer); | ||
99 | return -1; | ||
100 | } | ||
101 | |||
102 | strbuf_remove(&buffer, 0, strlen("emacsclient")); | ||
103 | version = atoi(buffer.buf); | ||
104 | |||
105 | if (version < 22) { | ||
106 | fprintf(stderr, | ||
107 | "emacsclient version '%d' too old (< 22).\n", | ||
108 | version); | ||
109 | strbuf_release(&buffer); | ||
110 | return -1; | ||
111 | } | ||
112 | |||
113 | strbuf_release(&buffer); | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | static void exec_woman_emacs(const char* path, const char *page) | ||
118 | { | ||
119 | if (!check_emacsclient_version()) { | ||
120 | /* This works only with emacsclient version >= 22. */ | ||
121 | struct strbuf man_page = STRBUF_INIT; | ||
122 | |||
123 | if (!path) | ||
124 | path = "emacsclient"; | ||
125 | strbuf_addf(&man_page, "(woman \"%s\")", page); | ||
126 | execlp(path, "emacsclient", "-e", man_page.buf, NULL); | ||
127 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | static void exec_man_konqueror(const char* path, const char *page) | ||
132 | { | ||
133 | const char *display = getenv("DISPLAY"); | ||
134 | if (display && *display) { | ||
135 | struct strbuf man_page = STRBUF_INIT; | ||
136 | const char *filename = "kfmclient"; | ||
137 | |||
138 | /* It's simpler to launch konqueror using kfmclient. */ | ||
139 | if (path) { | ||
140 | const char *file = strrchr(path, '/'); | ||
141 | if (file && !strcmp(file + 1, "konqueror")) { | ||
142 | char *new = strdup(path); | ||
143 | char *dest = strrchr(new, '/'); | ||
144 | |||
145 | /* strlen("konqueror") == strlen("kfmclient") */ | ||
146 | strcpy(dest + 1, "kfmclient"); | ||
147 | path = new; | ||
148 | } | ||
149 | if (file) | ||
150 | filename = file; | ||
151 | } else | ||
152 | path = "kfmclient"; | ||
153 | strbuf_addf(&man_page, "man:%s(1)", page); | ||
154 | execlp(path, filename, "newTab", man_page.buf, NULL); | ||
155 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static void exec_man_man(const char* path, const char *page) | ||
160 | { | ||
161 | if (!path) | ||
162 | path = "man"; | ||
163 | execlp(path, "man", page, NULL); | ||
164 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
165 | } | ||
166 | |||
167 | static void exec_man_cmd(const char *cmd, const char *page) | ||
168 | { | ||
169 | struct strbuf shell_cmd = STRBUF_INIT; | ||
170 | strbuf_addf(&shell_cmd, "%s %s", cmd, page); | ||
171 | execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); | ||
172 | warning("failed to exec '%s': %s", cmd, strerror(errno)); | ||
173 | } | ||
174 | |||
175 | static void add_man_viewer(const char *name) | ||
176 | { | ||
177 | struct man_viewer_list **p = &man_viewer_list; | ||
178 | size_t len = strlen(name); | ||
179 | |||
180 | while (*p) | ||
181 | p = &((*p)->next); | ||
182 | *p = calloc(1, (sizeof(**p) + len + 1)); | ||
183 | strncpy((*p)->name, name, len); | ||
184 | } | ||
185 | |||
186 | static int supported_man_viewer(const char *name, size_t len) | ||
187 | { | ||
188 | return (!strncasecmp("man", name, len) || | ||
189 | !strncasecmp("woman", name, len) || | ||
190 | !strncasecmp("konqueror", name, len)); | ||
191 | } | ||
192 | |||
193 | static void do_add_man_viewer_info(const char *name, | ||
194 | size_t len, | ||
195 | const char *value) | ||
196 | { | ||
197 | struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); | ||
198 | |||
199 | strncpy(new->name, name, len); | ||
200 | new->info = strdup(value); | ||
201 | new->next = man_viewer_info_list; | ||
202 | man_viewer_info_list = new; | ||
203 | } | ||
204 | |||
205 | static int add_man_viewer_path(const char *name, | ||
206 | size_t len, | ||
207 | const char *value) | ||
208 | { | ||
209 | if (supported_man_viewer(name, len)) | ||
210 | do_add_man_viewer_info(name, len, value); | ||
211 | else | ||
212 | warning("'%s': path for unsupported man viewer.\n" | ||
213 | "Please consider using 'man.<tool>.cmd' instead.", | ||
214 | name); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int add_man_viewer_cmd(const char *name, | ||
220 | size_t len, | ||
221 | const char *value) | ||
222 | { | ||
223 | if (supported_man_viewer(name, len)) | ||
224 | warning("'%s': cmd for supported man viewer.\n" | ||
225 | "Please consider using 'man.<tool>.path' instead.", | ||
226 | name); | ||
227 | else | ||
228 | do_add_man_viewer_info(name, len, value); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int add_man_viewer_info(const char *var, const char *value) | ||
234 | { | ||
235 | const char *name = var + 4; | ||
236 | const char *subkey = strrchr(name, '.'); | ||
237 | |||
238 | if (!subkey) | ||
239 | return error("Config with no key for man viewer: %s", name); | ||
240 | |||
241 | if (!strcmp(subkey, ".path")) { | ||
242 | if (!value) | ||
243 | return config_error_nonbool(var); | ||
244 | return add_man_viewer_path(name, subkey - name, value); | ||
245 | } | ||
246 | if (!strcmp(subkey, ".cmd")) { | ||
247 | if (!value) | ||
248 | return config_error_nonbool(var); | ||
249 | return add_man_viewer_cmd(name, subkey - name, value); | ||
250 | } | ||
251 | |||
252 | warning("'%s': unsupported man viewer sub key.", subkey); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int perf_help_config(const char *var, const char *value, void *cb) | ||
257 | { | ||
258 | if (!strcmp(var, "help.format")) { | ||
259 | if (!value) | ||
260 | return config_error_nonbool(var); | ||
261 | help_format = parse_help_format(value); | ||
262 | return 0; | ||
263 | } | ||
264 | if (!strcmp(var, "man.viewer")) { | ||
265 | if (!value) | ||
266 | return config_error_nonbool(var); | ||
267 | add_man_viewer(value); | ||
268 | return 0; | ||
269 | } | ||
270 | if (!prefixcmp(var, "man.")) | ||
271 | return add_man_viewer_info(var, value); | ||
272 | |||
273 | return perf_default_config(var, value, cb); | ||
274 | } | ||
275 | |||
276 | static struct cmdnames main_cmds, other_cmds; | ||
277 | |||
278 | void list_common_cmds_help(void) | ||
279 | { | ||
280 | int i, longest = 0; | ||
281 | |||
282 | for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { | ||
283 | if (longest < strlen(common_cmds[i].name)) | ||
284 | longest = strlen(common_cmds[i].name); | ||
285 | } | ||
286 | |||
287 | puts("The most commonly used perf commands are:"); | ||
288 | for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { | ||
289 | printf(" %s ", common_cmds[i].name); | ||
290 | mput_char(' ', longest - strlen(common_cmds[i].name)); | ||
291 | puts(common_cmds[i].help); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static int is_perf_command(const char *s) | ||
296 | { | ||
297 | return is_in_cmdlist(&main_cmds, s) || | ||
298 | is_in_cmdlist(&other_cmds, s); | ||
299 | } | ||
300 | |||
301 | static const char *prepend(const char *prefix, const char *cmd) | ||
302 | { | ||
303 | size_t pre_len = strlen(prefix); | ||
304 | size_t cmd_len = strlen(cmd); | ||
305 | char *p = malloc(pre_len + cmd_len + 1); | ||
306 | memcpy(p, prefix, pre_len); | ||
307 | strcpy(p + pre_len, cmd); | ||
308 | return p; | ||
309 | } | ||
310 | |||
311 | static const char *cmd_to_page(const char *perf_cmd) | ||
312 | { | ||
313 | if (!perf_cmd) | ||
314 | return "perf"; | ||
315 | else if (!prefixcmp(perf_cmd, "perf")) | ||
316 | return perf_cmd; | ||
317 | else if (is_perf_command(perf_cmd)) | ||
318 | return prepend("perf-", perf_cmd); | ||
319 | else | ||
320 | return prepend("perf", perf_cmd); | ||
321 | } | ||
322 | |||
323 | static void setup_man_path(void) | ||
324 | { | ||
325 | struct strbuf new_path = STRBUF_INIT; | ||
326 | const char *old_path = getenv("MANPATH"); | ||
327 | |||
328 | /* We should always put ':' after our path. If there is no | ||
329 | * old_path, the ':' at the end will let 'man' to try | ||
330 | * system-wide paths after ours to find the manual page. If | ||
331 | * there is old_path, we need ':' as delimiter. */ | ||
332 | strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); | ||
333 | strbuf_addch(&new_path, ':'); | ||
334 | if (old_path) | ||
335 | strbuf_addstr(&new_path, old_path); | ||
336 | |||
337 | setenv("MANPATH", new_path.buf, 1); | ||
338 | |||
339 | strbuf_release(&new_path); | ||
340 | } | ||
341 | |||
342 | static void exec_viewer(const char *name, const char *page) | ||
343 | { | ||
344 | const char *info = get_man_viewer_info(name); | ||
345 | |||
346 | if (!strcasecmp(name, "man")) | ||
347 | exec_man_man(info, page); | ||
348 | else if (!strcasecmp(name, "woman")) | ||
349 | exec_woman_emacs(info, page); | ||
350 | else if (!strcasecmp(name, "konqueror")) | ||
351 | exec_man_konqueror(info, page); | ||
352 | else if (info) | ||
353 | exec_man_cmd(info, page); | ||
354 | else | ||
355 | warning("'%s': unknown man viewer.", name); | ||
356 | } | ||
357 | |||
358 | static void show_man_page(const char *perf_cmd) | ||
359 | { | ||
360 | struct man_viewer_list *viewer; | ||
361 | const char *page = cmd_to_page(perf_cmd); | ||
362 | const char *fallback = getenv("PERF_MAN_VIEWER"); | ||
363 | |||
364 | setup_man_path(); | ||
365 | for (viewer = man_viewer_list; viewer; viewer = viewer->next) | ||
366 | { | ||
367 | exec_viewer(viewer->name, page); /* will return when unable */ | ||
368 | } | ||
369 | if (fallback) | ||
370 | exec_viewer(fallback, page); | ||
371 | exec_viewer("man", page); | ||
372 | die("no man viewer handled the request"); | ||
373 | } | ||
374 | |||
375 | static void show_info_page(const char *perf_cmd) | ||
376 | { | ||
377 | const char *page = cmd_to_page(perf_cmd); | ||
378 | setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); | ||
379 | execlp("info", "info", "perfman", page, NULL); | ||
380 | } | ||
381 | |||
382 | static void get_html_page_path(struct strbuf *page_path, const char *page) | ||
383 | { | ||
384 | struct stat st; | ||
385 | const char *html_path = system_path(PERF_HTML_PATH); | ||
386 | |||
387 | /* Check that we have a perf documentation directory. */ | ||
388 | if (stat(mkpath("%s/perf.html", html_path), &st) | ||
389 | || !S_ISREG(st.st_mode)) | ||
390 | die("'%s': not a documentation directory.", html_path); | ||
391 | |||
392 | strbuf_init(page_path, 0); | ||
393 | strbuf_addf(page_path, "%s/%s.html", html_path, page); | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * If open_html is not defined in a platform-specific way (see for | ||
398 | * example compat/mingw.h), we use the script web--browse to display | ||
399 | * HTML. | ||
400 | */ | ||
401 | #ifndef open_html | ||
402 | void open_html(const char *path) | ||
403 | { | ||
404 | execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL); | ||
405 | } | ||
406 | #endif | ||
407 | |||
408 | static void show_html_page(const char *perf_cmd) | ||
409 | { | ||
410 | const char *page = cmd_to_page(perf_cmd); | ||
411 | struct strbuf page_path; /* it leaks but we exec bellow */ | ||
412 | |||
413 | get_html_page_path(&page_path, page); | ||
414 | |||
415 | open_html(page_path.buf); | ||
416 | } | ||
417 | |||
418 | int cmd_help(int argc, const char **argv, const char *prefix) | ||
419 | { | ||
420 | const char *alias; | ||
421 | load_command_list("perf-", &main_cmds, &other_cmds); | ||
422 | |||
423 | perf_config(perf_help_config, NULL); | ||
424 | |||
425 | argc = parse_options(argc, argv, builtin_help_options, | ||
426 | builtin_help_usage, 0); | ||
427 | |||
428 | if (show_all) { | ||
429 | printf("usage: %s\n\n", perf_usage_string); | ||
430 | list_commands("perf commands", &main_cmds, &other_cmds); | ||
431 | printf("%s\n", perf_more_info_string); | ||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | if (!argv[0]) { | ||
436 | printf("usage: %s\n\n", perf_usage_string); | ||
437 | list_common_cmds_help(); | ||
438 | printf("\n%s\n", perf_more_info_string); | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | alias = alias_lookup(argv[0]); | ||
443 | if (alias && !is_perf_command(argv[0])) { | ||
444 | printf("`perf %s' is aliased to `%s'\n", argv[0], alias); | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | switch (help_format) { | ||
449 | case HELP_FORMAT_MAN: | ||
450 | show_man_page(argv[0]); | ||
451 | break; | ||
452 | case HELP_FORMAT_INFO: | ||
453 | show_info_page(argv[0]); | ||
454 | break; | ||
455 | case HELP_FORMAT_WEB: | ||
456 | show_html_page(argv[0]); | ||
457 | break; | ||
458 | } | ||
459 | |||
460 | return 0; | ||
461 | } | ||
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c new file mode 100644 index 000000000000..4a50abf843ee --- /dev/null +++ b/Documentation/perf_counter/builtin-record.c | |||
@@ -0,0 +1,506 @@ | |||
1 | |||
2 | |||
3 | #define _GNU_SOURCE | ||
4 | #include <sys/types.h> | ||
5 | #include <sys/stat.h> | ||
6 | #include <sys/time.h> | ||
7 | #include <unistd.h> | ||
8 | #include <stdint.h> | ||
9 | #include <stdlib.h> | ||
10 | #include <string.h> | ||
11 | #include <limits.h> | ||
12 | #include <getopt.h> | ||
13 | #include <assert.h> | ||
14 | #include <fcntl.h> | ||
15 | #include <stdio.h> | ||
16 | #include <errno.h> | ||
17 | #include <ctype.h> | ||
18 | #include <time.h> | ||
19 | #include <sched.h> | ||
20 | #include <pthread.h> | ||
21 | |||
22 | #include <sys/syscall.h> | ||
23 | #include <sys/ioctl.h> | ||
24 | #include <sys/poll.h> | ||
25 | #include <sys/prctl.h> | ||
26 | #include <sys/wait.h> | ||
27 | #include <sys/uio.h> | ||
28 | #include <sys/mman.h> | ||
29 | |||
30 | #include <linux/unistd.h> | ||
31 | #include <linux/types.h> | ||
32 | |||
33 | #include "../../include/linux/perf_counter.h" | ||
34 | |||
35 | |||
36 | /* | ||
37 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | ||
38 | * counters in the current task. | ||
39 | */ | ||
40 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
41 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
42 | |||
43 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | ||
44 | |||
45 | #define rdclock() \ | ||
46 | ({ \ | ||
47 | struct timespec ts; \ | ||
48 | \ | ||
49 | clock_gettime(CLOCK_MONOTONIC, &ts); \ | ||
50 | ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ | ||
51 | }) | ||
52 | |||
53 | /* | ||
54 | * Pick up some kernel type conventions: | ||
55 | */ | ||
56 | #define __user | ||
57 | #define asmlinkage | ||
58 | |||
59 | #ifdef __x86_64__ | ||
60 | #define __NR_perf_counter_open 295 | ||
61 | #define rmb() asm volatile("lfence" ::: "memory") | ||
62 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
63 | #endif | ||
64 | |||
65 | #ifdef __i386__ | ||
66 | #define __NR_perf_counter_open 333 | ||
67 | #define rmb() asm volatile("lfence" ::: "memory") | ||
68 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
69 | #endif | ||
70 | |||
71 | #ifdef __powerpc__ | ||
72 | #define __NR_perf_counter_open 319 | ||
73 | #define rmb() asm volatile ("sync" ::: "memory") | ||
74 | #define cpu_relax() asm volatile ("" ::: "memory"); | ||
75 | #endif | ||
76 | |||
77 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
78 | #define min(x, y) ({ \ | ||
79 | typeof(x) _min1 = (x); \ | ||
80 | typeof(y) _min2 = (y); \ | ||
81 | (void) (&_min1 == &_min2); \ | ||
82 | _min1 < _min2 ? _min1 : _min2; }) | ||
83 | |||
84 | extern asmlinkage int sys_perf_counter_open( | ||
85 | struct perf_counter_hw_event *hw_event_uptr __user, | ||
86 | pid_t pid, | ||
87 | int cpu, | ||
88 | int group_fd, | ||
89 | unsigned long flags); | ||
90 | |||
91 | #define MAX_COUNTERS 64 | ||
92 | #define MAX_NR_CPUS 256 | ||
93 | |||
94 | #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) | ||
95 | |||
96 | static int nr_counters = 0; | ||
97 | static __u64 event_id[MAX_COUNTERS] = { }; | ||
98 | static int default_interval = 100000; | ||
99 | static int event_count[MAX_COUNTERS]; | ||
100 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
101 | static int nr_cpus = 0; | ||
102 | static unsigned int page_size; | ||
103 | static unsigned int mmap_pages = 16; | ||
104 | static int output; | ||
105 | static char *output_name = "output.perf"; | ||
106 | static int group = 0; | ||
107 | static unsigned int realtime_prio = 0; | ||
108 | |||
109 | const unsigned int default_count[] = { | ||
110 | 1000000, | ||
111 | 1000000, | ||
112 | 10000, | ||
113 | 10000, | ||
114 | 1000000, | ||
115 | 10000, | ||
116 | }; | ||
117 | |||
118 | struct event_symbol { | ||
119 | __u64 event; | ||
120 | char *symbol; | ||
121 | }; | ||
122 | |||
123 | static struct event_symbol event_symbols[] = { | ||
124 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | ||
125 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | ||
126 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | ||
127 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | ||
128 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | ||
129 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | ||
130 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | ||
131 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | ||
132 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | ||
133 | |||
134 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | ||
135 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | ||
136 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | ||
137 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | ||
138 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | ||
139 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | ||
140 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | ||
141 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | ||
142 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | ||
143 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * Each event can have multiple symbolic names. | ||
148 | * Symbolic names are (almost) exactly matched. | ||
149 | */ | ||
150 | static __u64 match_event_symbols(char *str) | ||
151 | { | ||
152 | __u64 config, id; | ||
153 | int type; | ||
154 | unsigned int i; | ||
155 | |||
156 | if (sscanf(str, "r%llx", &config) == 1) | ||
157 | return config | PERF_COUNTER_RAW_MASK; | ||
158 | |||
159 | if (sscanf(str, "%d:%llu", &type, &id) == 2) | ||
160 | return EID(type, id); | ||
161 | |||
162 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
163 | if (!strncmp(str, event_symbols[i].symbol, | ||
164 | strlen(event_symbols[i].symbol))) | ||
165 | return event_symbols[i].event; | ||
166 | } | ||
167 | |||
168 | return ~0ULL; | ||
169 | } | ||
170 | |||
171 | static int parse_events(char *str) | ||
172 | { | ||
173 | __u64 config; | ||
174 | |||
175 | again: | ||
176 | if (nr_counters == MAX_COUNTERS) | ||
177 | return -1; | ||
178 | |||
179 | config = match_event_symbols(str); | ||
180 | if (config == ~0ULL) | ||
181 | return -1; | ||
182 | |||
183 | event_id[nr_counters] = config; | ||
184 | nr_counters++; | ||
185 | |||
186 | str = strstr(str, ","); | ||
187 | if (str) { | ||
188 | str++; | ||
189 | goto again; | ||
190 | } | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
196 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
197 | |||
198 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
199 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
200 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
201 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
202 | |||
203 | static void display_events_help(void) | ||
204 | { | ||
205 | unsigned int i; | ||
206 | __u64 e; | ||
207 | |||
208 | printf( | ||
209 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
210 | |||
211 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
212 | int type, id; | ||
213 | |||
214 | e = event_symbols[i].event; | ||
215 | type = PERF_COUNTER_TYPE(e); | ||
216 | id = PERF_COUNTER_ID(e); | ||
217 | |||
218 | printf("\n %d:%d: %-20s", | ||
219 | type, id, event_symbols[i].symbol); | ||
220 | } | ||
221 | |||
222 | printf("\n" | ||
223 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
224 | } | ||
225 | |||
226 | static void display_help(void) | ||
227 | { | ||
228 | printf( | ||
229 | "Usage: perf-record [<options>]\n" | ||
230 | "perf-record Options (up to %d event types can be specified at once):\n\n", | ||
231 | MAX_COUNTERS); | ||
232 | |||
233 | display_events_help(); | ||
234 | |||
235 | printf( | ||
236 | " -c CNT --count=CNT # event period to sample\n" | ||
237 | " -m pages --mmap_pages=<pages> # number of mmap data pages\n" | ||
238 | " -o file --output=<file> # output file\n" | ||
239 | " -r prio --realtime=<prio> # use RT prio\n" | ||
240 | ); | ||
241 | |||
242 | exit(0); | ||
243 | } | ||
244 | |||
245 | static void process_options(int argc, char *argv[]) | ||
246 | { | ||
247 | int error = 0, counter; | ||
248 | |||
249 | for (;;) { | ||
250 | int option_index = 0; | ||
251 | /** Options for getopt */ | ||
252 | static struct option long_options[] = { | ||
253 | {"count", required_argument, NULL, 'c'}, | ||
254 | {"event", required_argument, NULL, 'e'}, | ||
255 | {"mmap_pages", required_argument, NULL, 'm'}, | ||
256 | {"output", required_argument, NULL, 'o'}, | ||
257 | {"realtime", required_argument, NULL, 'r'}, | ||
258 | {NULL, 0, NULL, 0 } | ||
259 | }; | ||
260 | int c = getopt_long(argc, argv, "+:c:e:m:o:r:", | ||
261 | long_options, &option_index); | ||
262 | if (c == -1) | ||
263 | break; | ||
264 | |||
265 | switch (c) { | ||
266 | case 'c': default_interval = atoi(optarg); break; | ||
267 | case 'e': error = parse_events(optarg); break; | ||
268 | case 'm': mmap_pages = atoi(optarg); break; | ||
269 | case 'o': output_name = strdup(optarg); break; | ||
270 | case 'r': realtime_prio = atoi(optarg); break; | ||
271 | default: error = 1; break; | ||
272 | } | ||
273 | } | ||
274 | if (error) | ||
275 | display_help(); | ||
276 | |||
277 | if (!nr_counters) { | ||
278 | nr_counters = 1; | ||
279 | event_id[0] = 0; | ||
280 | } | ||
281 | |||
282 | for (counter = 0; counter < nr_counters; counter++) { | ||
283 | if (event_count[counter]) | ||
284 | continue; | ||
285 | |||
286 | event_count[counter] = default_interval; | ||
287 | } | ||
288 | } | ||
289 | |||
290 | struct mmap_data { | ||
291 | int counter; | ||
292 | void *base; | ||
293 | unsigned int mask; | ||
294 | unsigned int prev; | ||
295 | }; | ||
296 | |||
297 | static unsigned int mmap_read_head(struct mmap_data *md) | ||
298 | { | ||
299 | struct perf_counter_mmap_page *pc = md->base; | ||
300 | int head; | ||
301 | |||
302 | head = pc->data_head; | ||
303 | rmb(); | ||
304 | |||
305 | return head; | ||
306 | } | ||
307 | |||
308 | static long events; | ||
309 | static struct timeval last_read, this_read; | ||
310 | |||
311 | static void mmap_read(struct mmap_data *md) | ||
312 | { | ||
313 | unsigned int head = mmap_read_head(md); | ||
314 | unsigned int old = md->prev; | ||
315 | unsigned char *data = md->base + page_size; | ||
316 | unsigned long size; | ||
317 | void *buf; | ||
318 | int diff; | ||
319 | |||
320 | gettimeofday(&this_read, NULL); | ||
321 | |||
322 | /* | ||
323 | * If we're further behind than half the buffer, there's a chance | ||
324 | * the writer will bite our tail and screw up the events under us. | ||
325 | * | ||
326 | * If we somehow ended up ahead of the head, we got messed up. | ||
327 | * | ||
328 | * In either case, truncate and restart at head. | ||
329 | */ | ||
330 | diff = head - old; | ||
331 | if (diff > md->mask / 2 || diff < 0) { | ||
332 | struct timeval iv; | ||
333 | unsigned long msecs; | ||
334 | |||
335 | timersub(&this_read, &last_read, &iv); | ||
336 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | ||
337 | |||
338 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | ||
339 | " Last read %lu msecs ago.\n", msecs); | ||
340 | |||
341 | /* | ||
342 | * head points to a known good entry, start there. | ||
343 | */ | ||
344 | old = head; | ||
345 | } | ||
346 | |||
347 | last_read = this_read; | ||
348 | |||
349 | if (old != head) | ||
350 | events++; | ||
351 | |||
352 | size = head - old; | ||
353 | |||
354 | if ((old & md->mask) + size != (head & md->mask)) { | ||
355 | buf = &data[old & md->mask]; | ||
356 | size = md->mask + 1 - (old & md->mask); | ||
357 | old += size; | ||
358 | while (size) { | ||
359 | int ret = write(output, buf, size); | ||
360 | if (ret < 0) { | ||
361 | perror("failed to write"); | ||
362 | exit(-1); | ||
363 | } | ||
364 | size -= ret; | ||
365 | buf += ret; | ||
366 | } | ||
367 | } | ||
368 | |||
369 | buf = &data[old & md->mask]; | ||
370 | size = head - old; | ||
371 | old += size; | ||
372 | while (size) { | ||
373 | int ret = write(output, buf, size); | ||
374 | if (ret < 0) { | ||
375 | perror("failed to write"); | ||
376 | exit(-1); | ||
377 | } | ||
378 | size -= ret; | ||
379 | buf += ret; | ||
380 | } | ||
381 | |||
382 | md->prev = old; | ||
383 | } | ||
384 | |||
385 | static volatile int done = 0; | ||
386 | |||
387 | static void sigchld_handler(int sig) | ||
388 | { | ||
389 | if (sig == SIGCHLD) | ||
390 | done = 1; | ||
391 | } | ||
392 | |||
393 | int cmd_record(int argc, const char **argv) | ||
394 | { | ||
395 | struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | ||
396 | struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
397 | struct perf_counter_hw_event hw_event; | ||
398 | int i, counter, group_fd, nr_poll = 0; | ||
399 | pid_t pid; | ||
400 | int ret; | ||
401 | |||
402 | page_size = sysconf(_SC_PAGE_SIZE); | ||
403 | |||
404 | process_options(argc, argv); | ||
405 | |||
406 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
407 | assert(nr_cpus <= MAX_NR_CPUS); | ||
408 | assert(nr_cpus >= 0); | ||
409 | |||
410 | output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); | ||
411 | if (output < 0) { | ||
412 | perror("failed to create output file"); | ||
413 | exit(-1); | ||
414 | } | ||
415 | |||
416 | argc -= optind; | ||
417 | argv += optind; | ||
418 | |||
419 | for (i = 0; i < nr_cpus; i++) { | ||
420 | group_fd = -1; | ||
421 | for (counter = 0; counter < nr_counters; counter++) { | ||
422 | |||
423 | memset(&hw_event, 0, sizeof(hw_event)); | ||
424 | hw_event.config = event_id[counter]; | ||
425 | hw_event.irq_period = event_count[counter]; | ||
426 | hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; | ||
427 | hw_event.nmi = 1; | ||
428 | hw_event.mmap = 1; | ||
429 | hw_event.comm = 1; | ||
430 | |||
431 | fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0); | ||
432 | if (fd[i][counter] < 0) { | ||
433 | int err = errno; | ||
434 | printf("kerneltop error: syscall returned with %d (%s)\n", | ||
435 | fd[i][counter], strerror(err)); | ||
436 | if (err == EPERM) | ||
437 | printf("Are you root?\n"); | ||
438 | exit(-1); | ||
439 | } | ||
440 | assert(fd[i][counter] >= 0); | ||
441 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
442 | |||
443 | /* | ||
444 | * First counter acts as the group leader: | ||
445 | */ | ||
446 | if (group && group_fd == -1) | ||
447 | group_fd = fd[i][counter]; | ||
448 | |||
449 | event_array[nr_poll].fd = fd[i][counter]; | ||
450 | event_array[nr_poll].events = POLLIN; | ||
451 | nr_poll++; | ||
452 | |||
453 | mmap_array[i][counter].counter = counter; | ||
454 | mmap_array[i][counter].prev = 0; | ||
455 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | ||
456 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
457 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | ||
458 | if (mmap_array[i][counter].base == MAP_FAILED) { | ||
459 | printf("kerneltop error: failed to mmap with %d (%s)\n", | ||
460 | errno, strerror(errno)); | ||
461 | exit(-1); | ||
462 | } | ||
463 | } | ||
464 | } | ||
465 | |||
466 | signal(SIGCHLD, sigchld_handler); | ||
467 | |||
468 | pid = fork(); | ||
469 | if (pid < 0) | ||
470 | perror("failed to fork"); | ||
471 | |||
472 | if (!pid) { | ||
473 | if (execvp(argv[0], argv)) { | ||
474 | perror(argv[0]); | ||
475 | exit(-1); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | if (realtime_prio) { | ||
480 | struct sched_param param; | ||
481 | |||
482 | param.sched_priority = realtime_prio; | ||
483 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | ||
484 | printf("Could not set realtime priority.\n"); | ||
485 | exit(-1); | ||
486 | } | ||
487 | } | ||
488 | |||
489 | /* | ||
490 | * TODO: store the current /proc/$/maps information somewhere | ||
491 | */ | ||
492 | |||
493 | while (!done) { | ||
494 | int hits = events; | ||
495 | |||
496 | for (i = 0; i < nr_cpus; i++) { | ||
497 | for (counter = 0; counter < nr_counters; counter++) | ||
498 | mmap_read(&mmap_array[i][counter]); | ||
499 | } | ||
500 | |||
501 | if (hits == events) | ||
502 | ret = poll(event_array, nr_poll, 100); | ||
503 | } | ||
504 | |||
505 | return 0; | ||
506 | } | ||
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c new file mode 100644 index 000000000000..112b94ed3298 --- /dev/null +++ b/Documentation/perf_counter/builtin-stat.c | |||
@@ -0,0 +1,591 @@ | |||
1 | /* | ||
2 | * kerneltop.c: show top kernel functions - performance counters showcase | ||
3 | |||
4 | Build with: | ||
5 | |||
6 | cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt | ||
7 | |||
8 | Sample output: | ||
9 | |||
10 | ------------------------------------------------------------------------------ | ||
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | ||
12 | ------------------------------------------------------------------------------ | ||
13 | |||
14 | weight RIP kernel function | ||
15 | ______ ________________ _______________ | ||
16 | |||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | ||
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | ||
19 | 31.26 - ffffffff804ce808 : skb_push | ||
20 | 22.43 - ffffffff80510004 : tcp_established_options | ||
21 | 19.00 - ffffffff8027d250 : find_get_page | ||
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | ||
23 | 15.20 - ffffffff804d8baa : dst_release | ||
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | ||
25 | 14.00 - ffffffff802217d5 : read_hpet | ||
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | ||
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | ||
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * perfstat: /usr/bin/time -alike performance counter statistics utility | ||
33 | |||
34 | It summarizes the counter events of all tasks (and child tasks), | ||
35 | covering all CPUs that the command (or workload) executes on. | ||
36 | It only counts the per-task events of the workload started, | ||
37 | independent of how many other tasks run on those CPUs. | ||
38 | |||
39 | Sample output: | ||
40 | |||
41 | $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null | ||
42 | |||
43 | Performance counter stats for 'ls': | ||
44 | |||
45 | 163516953 instructions | ||
46 | 2295 cache-misses | ||
47 | 2855182 branch-misses | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
52 | * | ||
53 | * Improvements and fixes by: | ||
54 | * | ||
55 | * Arjan van de Ven <arjan@linux.intel.com> | ||
56 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
57 | * Wu Fengguang <fengguang.wu@intel.com> | ||
58 | * Mike Galbraith <efault@gmx.de> | ||
59 | * Paul Mackerras <paulus@samba.org> | ||
60 | * | ||
61 | * Released under the GPL v2. (and only v2, not any later version) | ||
62 | */ | ||
63 | |||
64 | #include "util/util.h" | ||
65 | |||
66 | #include <getopt.h> | ||
67 | #include <assert.h> | ||
68 | #include <fcntl.h> | ||
69 | #include <stdio.h> | ||
70 | #include <errno.h> | ||
71 | #include <time.h> | ||
72 | #include <sched.h> | ||
73 | #include <pthread.h> | ||
74 | |||
75 | #include <sys/syscall.h> | ||
76 | #include <sys/ioctl.h> | ||
77 | #include <sys/poll.h> | ||
78 | #include <sys/prctl.h> | ||
79 | #include <sys/wait.h> | ||
80 | #include <sys/uio.h> | ||
81 | #include <sys/mman.h> | ||
82 | |||
83 | #include <linux/unistd.h> | ||
84 | #include <linux/types.h> | ||
85 | |||
86 | #include "../../include/linux/perf_counter.h" | ||
87 | |||
88 | |||
89 | /* | ||
90 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | ||
91 | * counters in the current task. | ||
92 | */ | ||
93 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
94 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
95 | |||
96 | #define rdclock() \ | ||
97 | ({ \ | ||
98 | struct timespec ts; \ | ||
99 | \ | ||
100 | clock_gettime(CLOCK_MONOTONIC, &ts); \ | ||
101 | ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ | ||
102 | }) | ||
103 | |||
104 | /* | ||
105 | * Pick up some kernel type conventions: | ||
106 | */ | ||
107 | #define __user | ||
108 | #define asmlinkage | ||
109 | |||
110 | #ifdef __x86_64__ | ||
111 | #define __NR_perf_counter_open 295 | ||
112 | #define rmb() asm volatile("lfence" ::: "memory") | ||
113 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
114 | #endif | ||
115 | |||
116 | #ifdef __i386__ | ||
117 | #define __NR_perf_counter_open 333 | ||
118 | #define rmb() asm volatile("lfence" ::: "memory") | ||
119 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
120 | #endif | ||
121 | |||
122 | #ifdef __powerpc__ | ||
123 | #define __NR_perf_counter_open 319 | ||
124 | #define rmb() asm volatile ("sync" ::: "memory") | ||
125 | #define cpu_relax() asm volatile ("" ::: "memory"); | ||
126 | #endif | ||
127 | |||
128 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
129 | #define min(x, y) ({ \ | ||
130 | typeof(x) _min1 = (x); \ | ||
131 | typeof(y) _min2 = (y); \ | ||
132 | (void) (&_min1 == &_min2); \ | ||
133 | _min1 < _min2 ? _min1 : _min2; }) | ||
134 | |||
135 | extern asmlinkage int sys_perf_counter_open( | ||
136 | struct perf_counter_hw_event *hw_event_uptr __user, | ||
137 | pid_t pid, | ||
138 | int cpu, | ||
139 | int group_fd, | ||
140 | unsigned long flags); | ||
141 | |||
142 | #define MAX_COUNTERS 64 | ||
143 | #define MAX_NR_CPUS 256 | ||
144 | |||
145 | #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) | ||
146 | |||
147 | static int system_wide = 0; | ||
148 | |||
149 | static int nr_counters = 0; | ||
150 | static __u64 event_id[MAX_COUNTERS] = { | ||
151 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | ||
152 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), | ||
153 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | ||
154 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | ||
155 | |||
156 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), | ||
157 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), | ||
158 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), | ||
159 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), | ||
160 | }; | ||
161 | static int default_interval = 100000; | ||
162 | static int event_count[MAX_COUNTERS]; | ||
163 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
164 | |||
165 | static int tid = -1; | ||
166 | static int profile_cpu = -1; | ||
167 | static int nr_cpus = 0; | ||
168 | static int nmi = 1; | ||
169 | static int group = 0; | ||
170 | static unsigned int page_size; | ||
171 | |||
172 | static int zero; | ||
173 | |||
174 | static int scale; | ||
175 | |||
176 | static const unsigned int default_count[] = { | ||
177 | 1000000, | ||
178 | 1000000, | ||
179 | 10000, | ||
180 | 10000, | ||
181 | 1000000, | ||
182 | 10000, | ||
183 | }; | ||
184 | |||
185 | static char *hw_event_names[] = { | ||
186 | "CPU cycles", | ||
187 | "instructions", | ||
188 | "cache references", | ||
189 | "cache misses", | ||
190 | "branches", | ||
191 | "branch misses", | ||
192 | "bus cycles", | ||
193 | }; | ||
194 | |||
195 | static char *sw_event_names[] = { | ||
196 | "cpu clock ticks", | ||
197 | "task clock ticks", | ||
198 | "pagefaults", | ||
199 | "context switches", | ||
200 | "CPU migrations", | ||
201 | "minor faults", | ||
202 | "major faults", | ||
203 | }; | ||
204 | |||
205 | struct event_symbol { | ||
206 | __u64 event; | ||
207 | char *symbol; | ||
208 | }; | ||
209 | |||
210 | static struct event_symbol event_symbols[] = { | ||
211 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | ||
212 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | ||
213 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | ||
214 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | ||
215 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | ||
216 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | ||
217 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | ||
218 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | ||
219 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | ||
220 | |||
221 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | ||
222 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | ||
223 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | ||
224 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | ||
225 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | ||
226 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | ||
227 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | ||
228 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | ||
229 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | ||
230 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | ||
231 | }; | ||
232 | |||
233 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
234 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
235 | |||
236 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
237 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
238 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
239 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
240 | |||
241 | static void display_events_help(void) | ||
242 | { | ||
243 | unsigned int i; | ||
244 | __u64 e; | ||
245 | |||
246 | printf( | ||
247 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
248 | |||
249 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
250 | int type, id; | ||
251 | |||
252 | e = event_symbols[i].event; | ||
253 | type = PERF_COUNTER_TYPE(e); | ||
254 | id = PERF_COUNTER_ID(e); | ||
255 | |||
256 | printf("\n %d:%d: %-20s", | ||
257 | type, id, event_symbols[i].symbol); | ||
258 | } | ||
259 | |||
260 | printf("\n" | ||
261 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
262 | } | ||
263 | |||
264 | static void display_help(void) | ||
265 | { | ||
266 | printf( | ||
267 | "Usage: perfstat [<events...>] <cmd...>\n\n" | ||
268 | "PerfStat Options (up to %d event types can be specified):\n\n", | ||
269 | MAX_COUNTERS); | ||
270 | |||
271 | display_events_help(); | ||
272 | |||
273 | printf( | ||
274 | " -l # scale counter values\n" | ||
275 | " -a # system-wide collection\n"); | ||
276 | exit(0); | ||
277 | } | ||
278 | |||
279 | static char *event_name(int ctr) | ||
280 | { | ||
281 | __u64 config = event_id[ctr]; | ||
282 | int type = PERF_COUNTER_TYPE(config); | ||
283 | int id = PERF_COUNTER_ID(config); | ||
284 | static char buf[32]; | ||
285 | |||
286 | if (PERF_COUNTER_RAW(config)) { | ||
287 | sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); | ||
288 | return buf; | ||
289 | } | ||
290 | |||
291 | switch (type) { | ||
292 | case PERF_TYPE_HARDWARE: | ||
293 | if (id < PERF_HW_EVENTS_MAX) | ||
294 | return hw_event_names[id]; | ||
295 | return "unknown-hardware"; | ||
296 | |||
297 | case PERF_TYPE_SOFTWARE: | ||
298 | if (id < PERF_SW_EVENTS_MAX) | ||
299 | return sw_event_names[id]; | ||
300 | return "unknown-software"; | ||
301 | |||
302 | default: | ||
303 | break; | ||
304 | } | ||
305 | |||
306 | return "unknown"; | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * Each event can have multiple symbolic names. | ||
311 | * Symbolic names are (almost) exactly matched. | ||
312 | */ | ||
313 | static __u64 match_event_symbols(char *str) | ||
314 | { | ||
315 | __u64 config, id; | ||
316 | int type; | ||
317 | unsigned int i; | ||
318 | |||
319 | if (sscanf(str, "r%llx", &config) == 1) | ||
320 | return config | PERF_COUNTER_RAW_MASK; | ||
321 | |||
322 | if (sscanf(str, "%d:%llu", &type, &id) == 2) | ||
323 | return EID(type, id); | ||
324 | |||
325 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
326 | if (!strncmp(str, event_symbols[i].symbol, | ||
327 | strlen(event_symbols[i].symbol))) | ||
328 | return event_symbols[i].event; | ||
329 | } | ||
330 | |||
331 | return ~0ULL; | ||
332 | } | ||
333 | |||
334 | static int parse_events(char *str) | ||
335 | { | ||
336 | __u64 config; | ||
337 | |||
338 | again: | ||
339 | if (nr_counters == MAX_COUNTERS) | ||
340 | return -1; | ||
341 | |||
342 | config = match_event_symbols(str); | ||
343 | if (config == ~0ULL) | ||
344 | return -1; | ||
345 | |||
346 | event_id[nr_counters] = config; | ||
347 | nr_counters++; | ||
348 | |||
349 | str = strstr(str, ","); | ||
350 | if (str) { | ||
351 | str++; | ||
352 | goto again; | ||
353 | } | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | |||
359 | /* | ||
360 | * perfstat | ||
361 | */ | ||
362 | |||
363 | char fault_here[1000000]; | ||
364 | |||
365 | static void create_perfstat_counter(int counter) | ||
366 | { | ||
367 | struct perf_counter_hw_event hw_event; | ||
368 | |||
369 | memset(&hw_event, 0, sizeof(hw_event)); | ||
370 | hw_event.config = event_id[counter]; | ||
371 | hw_event.record_type = 0; | ||
372 | hw_event.nmi = 0; | ||
373 | if (scale) | ||
374 | hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
375 | PERF_FORMAT_TOTAL_TIME_RUNNING; | ||
376 | |||
377 | if (system_wide) { | ||
378 | int cpu; | ||
379 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
380 | fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); | ||
381 | if (fd[cpu][counter] < 0) { | ||
382 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
383 | fd[cpu][counter], strerror(errno)); | ||
384 | exit(-1); | ||
385 | } | ||
386 | } | ||
387 | } else { | ||
388 | hw_event.inherit = 1; | ||
389 | hw_event.disabled = 1; | ||
390 | |||
391 | fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); | ||
392 | if (fd[0][counter] < 0) { | ||
393 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
394 | fd[0][counter], strerror(errno)); | ||
395 | exit(-1); | ||
396 | } | ||
397 | } | ||
398 | } | ||
399 | |||
400 | int do_perfstat(int argc, char *argv[]) | ||
401 | { | ||
402 | unsigned long long t0, t1; | ||
403 | int counter; | ||
404 | ssize_t res; | ||
405 | int status; | ||
406 | int pid; | ||
407 | |||
408 | if (!system_wide) | ||
409 | nr_cpus = 1; | ||
410 | |||
411 | for (counter = 0; counter < nr_counters; counter++) | ||
412 | create_perfstat_counter(counter); | ||
413 | |||
414 | argc -= optind; | ||
415 | argv += optind; | ||
416 | |||
417 | if (!argc) | ||
418 | display_help(); | ||
419 | |||
420 | /* | ||
421 | * Enable counters and exec the command: | ||
422 | */ | ||
423 | t0 = rdclock(); | ||
424 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
425 | |||
426 | if ((pid = fork()) < 0) | ||
427 | perror("failed to fork"); | ||
428 | if (!pid) { | ||
429 | if (execvp(argv[0], argv)) { | ||
430 | perror(argv[0]); | ||
431 | exit(-1); | ||
432 | } | ||
433 | } | ||
434 | while (wait(&status) >= 0) | ||
435 | ; | ||
436 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
437 | t1 = rdclock(); | ||
438 | |||
439 | fflush(stdout); | ||
440 | |||
441 | fprintf(stderr, "\n"); | ||
442 | fprintf(stderr, " Performance counter stats for \'%s\':\n", | ||
443 | argv[0]); | ||
444 | fprintf(stderr, "\n"); | ||
445 | |||
446 | for (counter = 0; counter < nr_counters; counter++) { | ||
447 | int cpu, nv; | ||
448 | __u64 count[3], single_count[3]; | ||
449 | int scaled; | ||
450 | |||
451 | count[0] = count[1] = count[2] = 0; | ||
452 | nv = scale ? 3 : 1; | ||
453 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
454 | res = read(fd[cpu][counter], | ||
455 | single_count, nv * sizeof(__u64)); | ||
456 | assert(res == nv * sizeof(__u64)); | ||
457 | |||
458 | count[0] += single_count[0]; | ||
459 | if (scale) { | ||
460 | count[1] += single_count[1]; | ||
461 | count[2] += single_count[2]; | ||
462 | } | ||
463 | } | ||
464 | |||
465 | scaled = 0; | ||
466 | if (scale) { | ||
467 | if (count[2] == 0) { | ||
468 | fprintf(stderr, " %14s %-20s\n", | ||
469 | "<not counted>", event_name(counter)); | ||
470 | continue; | ||
471 | } | ||
472 | if (count[2] < count[1]) { | ||
473 | scaled = 1; | ||
474 | count[0] = (unsigned long long) | ||
475 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || | ||
480 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { | ||
481 | |||
482 | double msecs = (double)count[0] / 1000000; | ||
483 | |||
484 | fprintf(stderr, " %14.6f %-20s (msecs)", | ||
485 | msecs, event_name(counter)); | ||
486 | } else { | ||
487 | fprintf(stderr, " %14Ld %-20s (events)", | ||
488 | count[0], event_name(counter)); | ||
489 | } | ||
490 | if (scaled) | ||
491 | fprintf(stderr, " (scaled from %.2f%%)", | ||
492 | (double) count[2] / count[1] * 100); | ||
493 | fprintf(stderr, "\n"); | ||
494 | } | ||
495 | fprintf(stderr, "\n"); | ||
496 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
497 | (double)(t1-t0)/1e6); | ||
498 | fprintf(stderr, "\n"); | ||
499 | |||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | static void process_options(int argc, char **argv) | ||
504 | { | ||
505 | int error = 0, counter; | ||
506 | |||
507 | for (;;) { | ||
508 | int option_index = 0; | ||
509 | /** Options for getopt */ | ||
510 | static struct option long_options[] = { | ||
511 | {"count", required_argument, NULL, 'c'}, | ||
512 | {"cpu", required_argument, NULL, 'C'}, | ||
513 | {"delay", required_argument, NULL, 'd'}, | ||
514 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
515 | {"event", required_argument, NULL, 'e'}, | ||
516 | {"filter", required_argument, NULL, 'f'}, | ||
517 | {"group", required_argument, NULL, 'g'}, | ||
518 | {"help", no_argument, NULL, 'h'}, | ||
519 | {"nmi", required_argument, NULL, 'n'}, | ||
520 | {"munmap_info", no_argument, NULL, 'U'}, | ||
521 | {"pid", required_argument, NULL, 'p'}, | ||
522 | {"realtime", required_argument, NULL, 'r'}, | ||
523 | {"scale", no_argument, NULL, 'l'}, | ||
524 | {"symbol", required_argument, NULL, 's'}, | ||
525 | {"stat", no_argument, NULL, 'S'}, | ||
526 | {"vmlinux", required_argument, NULL, 'x'}, | ||
527 | {"zero", no_argument, NULL, 'z'}, | ||
528 | {NULL, 0, NULL, 0 } | ||
529 | }; | ||
530 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", | ||
531 | long_options, &option_index); | ||
532 | if (c == -1) | ||
533 | break; | ||
534 | |||
535 | switch (c) { | ||
536 | case 'a': system_wide = 1; break; | ||
537 | case 'c': default_interval = atoi(optarg); break; | ||
538 | case 'C': | ||
539 | /* CPU and PID are mutually exclusive */ | ||
540 | if (tid != -1) { | ||
541 | printf("WARNING: CPU switch overriding PID\n"); | ||
542 | sleep(1); | ||
543 | tid = -1; | ||
544 | } | ||
545 | profile_cpu = atoi(optarg); break; | ||
546 | |||
547 | case 'e': error = parse_events(optarg); break; | ||
548 | |||
549 | case 'g': group = atoi(optarg); break; | ||
550 | case 'h': display_help(); break; | ||
551 | case 'l': scale = 1; break; | ||
552 | case 'n': nmi = atoi(optarg); break; | ||
553 | case 'p': | ||
554 | /* CPU and PID are mutually exclusive */ | ||
555 | if (profile_cpu != -1) { | ||
556 | printf("WARNING: PID switch overriding CPU\n"); | ||
557 | sleep(1); | ||
558 | profile_cpu = -1; | ||
559 | } | ||
560 | tid = atoi(optarg); break; | ||
561 | case 'z': zero = 1; break; | ||
562 | default: error = 1; break; | ||
563 | } | ||
564 | } | ||
565 | if (error) | ||
566 | display_help(); | ||
567 | |||
568 | if (!nr_counters) { | ||
569 | nr_counters = 8; | ||
570 | } | ||
571 | |||
572 | for (counter = 0; counter < nr_counters; counter++) { | ||
573 | if (event_count[counter]) | ||
574 | continue; | ||
575 | |||
576 | event_count[counter] = default_interval; | ||
577 | } | ||
578 | } | ||
579 | |||
580 | int cmd_stat(int argc, char **argv, const char *prefix) | ||
581 | { | ||
582 | page_size = sysconf(_SC_PAGE_SIZE); | ||
583 | |||
584 | process_options(argc, argv); | ||
585 | |||
586 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
587 | assert(nr_cpus <= MAX_NR_CPUS); | ||
588 | assert(nr_cpus >= 0); | ||
589 | |||
590 | return do_perfstat(argc, argv); | ||
591 | } | ||
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c new file mode 100644 index 000000000000..6a276d2b2bbc --- /dev/null +++ b/Documentation/perf_counter/builtin-top.c | |||
@@ -0,0 +1,1203 @@ | |||
1 | /* | ||
2 | * kerneltop.c: show top kernel functions - performance counters showcase | ||
3 | |||
4 | Build with: | ||
5 | |||
6 | cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt | ||
7 | |||
8 | Sample output: | ||
9 | |||
10 | ------------------------------------------------------------------------------ | ||
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | ||
12 | ------------------------------------------------------------------------------ | ||
13 | |||
14 | weight RIP kernel function | ||
15 | ______ ________________ _______________ | ||
16 | |||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | ||
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | ||
19 | 31.26 - ffffffff804ce808 : skb_push | ||
20 | 22.43 - ffffffff80510004 : tcp_established_options | ||
21 | 19.00 - ffffffff8027d250 : find_get_page | ||
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | ||
23 | 15.20 - ffffffff804d8baa : dst_release | ||
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | ||
25 | 14.00 - ffffffff802217d5 : read_hpet | ||
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | ||
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | ||
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
33 | * | ||
34 | * Improvements and fixes by: | ||
35 | * | ||
36 | * Arjan van de Ven <arjan@linux.intel.com> | ||
37 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
38 | * Wu Fengguang <fengguang.wu@intel.com> | ||
39 | * Mike Galbraith <efault@gmx.de> | ||
40 | * Paul Mackerras <paulus@samba.org> | ||
41 | * | ||
42 | * Released under the GPL v2. (and only v2, not any later version) | ||
43 | */ | ||
44 | |||
45 | #include "util/util.h" | ||
46 | |||
47 | #include <getopt.h> | ||
48 | #include <assert.h> | ||
49 | #include <fcntl.h> | ||
50 | #include <stdio.h> | ||
51 | #include <errno.h> | ||
52 | #include <time.h> | ||
53 | #include <sched.h> | ||
54 | #include <pthread.h> | ||
55 | |||
56 | #include <sys/syscall.h> | ||
57 | #include <sys/ioctl.h> | ||
58 | #include <sys/poll.h> | ||
59 | #include <sys/prctl.h> | ||
60 | #include <sys/wait.h> | ||
61 | #include <sys/uio.h> | ||
62 | #include <sys/mman.h> | ||
63 | |||
64 | #include <linux/unistd.h> | ||
65 | #include <linux/types.h> | ||
66 | |||
67 | #include "../../include/linux/perf_counter.h" | ||
68 | |||
69 | |||
70 | /* | ||
71 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | ||
72 | * counters in the current task. | ||
73 | */ | ||
74 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
75 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
76 | |||
77 | #define rdclock() \ | ||
78 | ({ \ | ||
79 | struct timespec ts; \ | ||
80 | \ | ||
81 | clock_gettime(CLOCK_MONOTONIC, &ts); \ | ||
82 | ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ | ||
83 | }) | ||
84 | |||
85 | /* | ||
86 | * Pick up some kernel type conventions: | ||
87 | */ | ||
88 | #define __user | ||
89 | #define asmlinkage | ||
90 | |||
91 | #ifdef __x86_64__ | ||
92 | #define __NR_perf_counter_open 295 | ||
93 | #define rmb() asm volatile("lfence" ::: "memory") | ||
94 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
95 | #endif | ||
96 | |||
97 | #ifdef __i386__ | ||
98 | #define __NR_perf_counter_open 333 | ||
99 | #define rmb() asm volatile("lfence" ::: "memory") | ||
100 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
101 | #endif | ||
102 | |||
103 | #ifdef __powerpc__ | ||
104 | #define __NR_perf_counter_open 319 | ||
105 | #define rmb() asm volatile ("sync" ::: "memory") | ||
106 | #define cpu_relax() asm volatile ("" ::: "memory"); | ||
107 | #endif | ||
108 | |||
109 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
110 | #define min(x, y) ({ \ | ||
111 | typeof(x) _min1 = (x); \ | ||
112 | typeof(y) _min2 = (y); \ | ||
113 | (void) (&_min1 == &_min2); \ | ||
114 | _min1 < _min2 ? _min1 : _min2; }) | ||
115 | |||
116 | asmlinkage int sys_perf_counter_open( | ||
117 | struct perf_counter_hw_event *hw_event_uptr __user, | ||
118 | pid_t pid, | ||
119 | int cpu, | ||
120 | int group_fd, | ||
121 | unsigned long flags) | ||
122 | { | ||
123 | return syscall( | ||
124 | __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); | ||
125 | } | ||
126 | |||
127 | #define MAX_COUNTERS 64 | ||
128 | #define MAX_NR_CPUS 256 | ||
129 | |||
130 | #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) | ||
131 | |||
132 | static int system_wide = 0; | ||
133 | |||
134 | static int nr_counters = 0; | ||
135 | static __u64 event_id[MAX_COUNTERS] = { | ||
136 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | ||
137 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), | ||
138 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | ||
139 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | ||
140 | |||
141 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), | ||
142 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), | ||
143 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), | ||
144 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), | ||
145 | }; | ||
146 | static int default_interval = 100000; | ||
147 | static int event_count[MAX_COUNTERS]; | ||
148 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
149 | |||
150 | static __u64 count_filter = 100; | ||
151 | |||
152 | static int tid = -1; | ||
153 | static int profile_cpu = -1; | ||
154 | static int nr_cpus = 0; | ||
155 | static int nmi = 1; | ||
156 | static unsigned int realtime_prio = 0; | ||
157 | static int group = 0; | ||
158 | static unsigned int page_size; | ||
159 | static unsigned int mmap_pages = 16; | ||
160 | static int use_mmap = 0; | ||
161 | static int use_munmap = 0; | ||
162 | |||
163 | static char *vmlinux; | ||
164 | |||
165 | static char *sym_filter; | ||
166 | static unsigned long filter_start; | ||
167 | static unsigned long filter_end; | ||
168 | |||
169 | static int delay_secs = 2; | ||
170 | static int zero; | ||
171 | static int dump_symtab; | ||
172 | |||
173 | static int scale; | ||
174 | |||
175 | struct source_line { | ||
176 | uint64_t EIP; | ||
177 | unsigned long count; | ||
178 | char *line; | ||
179 | struct source_line *next; | ||
180 | }; | ||
181 | |||
182 | static struct source_line *lines; | ||
183 | static struct source_line **lines_tail; | ||
184 | |||
185 | static const unsigned int default_count[] = { | ||
186 | 1000000, | ||
187 | 1000000, | ||
188 | 10000, | ||
189 | 10000, | ||
190 | 1000000, | ||
191 | 10000, | ||
192 | }; | ||
193 | |||
194 | static char *hw_event_names[] = { | ||
195 | "CPU cycles", | ||
196 | "instructions", | ||
197 | "cache references", | ||
198 | "cache misses", | ||
199 | "branches", | ||
200 | "branch misses", | ||
201 | "bus cycles", | ||
202 | }; | ||
203 | |||
204 | static char *sw_event_names[] = { | ||
205 | "cpu clock ticks", | ||
206 | "task clock ticks", | ||
207 | "pagefaults", | ||
208 | "context switches", | ||
209 | "CPU migrations", | ||
210 | "minor faults", | ||
211 | "major faults", | ||
212 | }; | ||
213 | |||
214 | struct event_symbol { | ||
215 | __u64 event; | ||
216 | char *symbol; | ||
217 | }; | ||
218 | |||
219 | static struct event_symbol event_symbols[] = { | ||
220 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, | ||
221 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | ||
222 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | ||
223 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | ||
224 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | ||
225 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | ||
226 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | ||
227 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | ||
228 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | ||
229 | |||
230 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | ||
231 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | ||
232 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | ||
233 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | ||
234 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | ||
235 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | ||
236 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | ||
237 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | ||
238 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | ||
239 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | ||
240 | }; | ||
241 | |||
242 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
243 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
244 | |||
245 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
246 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
247 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
248 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
249 | |||
250 | static void display_events_help(void) | ||
251 | { | ||
252 | unsigned int i; | ||
253 | __u64 e; | ||
254 | |||
255 | printf( | ||
256 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
257 | |||
258 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
259 | int type, id; | ||
260 | |||
261 | e = event_symbols[i].event; | ||
262 | type = PERF_COUNTER_TYPE(e); | ||
263 | id = PERF_COUNTER_ID(e); | ||
264 | |||
265 | printf("\n %d:%d: %-20s", | ||
266 | type, id, event_symbols[i].symbol); | ||
267 | } | ||
268 | |||
269 | printf("\n" | ||
270 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
271 | } | ||
272 | |||
273 | static void display_help(void) | ||
274 | { | ||
275 | printf( | ||
276 | "Usage: kerneltop [<options>]\n" | ||
277 | " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n" | ||
278 | "KernelTop Options (up to %d event types can be specified at once):\n\n", | ||
279 | MAX_COUNTERS); | ||
280 | |||
281 | display_events_help(); | ||
282 | |||
283 | printf( | ||
284 | " -c CNT --count=CNT # event period to sample\n\n" | ||
285 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" | ||
286 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" | ||
287 | " -l # show scale factor for RR events\n" | ||
288 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" | ||
289 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" | ||
290 | " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n" | ||
291 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" | ||
292 | " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n" | ||
293 | " -z --zero # zero counts after display\n" | ||
294 | " -D --dump_symtab # dump symbol table to stderr on startup\n" | ||
295 | " -m pages --mmap_pages=<pages> # number of mmap data pages\n" | ||
296 | " -M --mmap_info # print mmap info stream\n" | ||
297 | " -U --munmap_info # print munmap info stream\n" | ||
298 | ); | ||
299 | |||
300 | exit(0); | ||
301 | } | ||
302 | |||
303 | static char *event_name(int ctr) | ||
304 | { | ||
305 | __u64 config = event_id[ctr]; | ||
306 | int type = PERF_COUNTER_TYPE(config); | ||
307 | int id = PERF_COUNTER_ID(config); | ||
308 | static char buf[32]; | ||
309 | |||
310 | if (PERF_COUNTER_RAW(config)) { | ||
311 | sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); | ||
312 | return buf; | ||
313 | } | ||
314 | |||
315 | switch (type) { | ||
316 | case PERF_TYPE_HARDWARE: | ||
317 | if (id < PERF_HW_EVENTS_MAX) | ||
318 | return hw_event_names[id]; | ||
319 | return "unknown-hardware"; | ||
320 | |||
321 | case PERF_TYPE_SOFTWARE: | ||
322 | if (id < PERF_SW_EVENTS_MAX) | ||
323 | return sw_event_names[id]; | ||
324 | return "unknown-software"; | ||
325 | |||
326 | default: | ||
327 | break; | ||
328 | } | ||
329 | |||
330 | return "unknown"; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Each event can have multiple symbolic names. | ||
335 | * Symbolic names are (almost) exactly matched. | ||
336 | */ | ||
337 | static __u64 match_event_symbols(char *str) | ||
338 | { | ||
339 | __u64 config, id; | ||
340 | int type; | ||
341 | unsigned int i; | ||
342 | |||
343 | if (sscanf(str, "r%llx", &config) == 1) | ||
344 | return config | PERF_COUNTER_RAW_MASK; | ||
345 | |||
346 | if (sscanf(str, "%d:%llu", &type, &id) == 2) | ||
347 | return EID(type, id); | ||
348 | |||
349 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
350 | if (!strncmp(str, event_symbols[i].symbol, | ||
351 | strlen(event_symbols[i].symbol))) | ||
352 | return event_symbols[i].event; | ||
353 | } | ||
354 | |||
355 | return ~0ULL; | ||
356 | } | ||
357 | |||
358 | static int parse_events(char *str) | ||
359 | { | ||
360 | __u64 config; | ||
361 | |||
362 | again: | ||
363 | if (nr_counters == MAX_COUNTERS) | ||
364 | return -1; | ||
365 | |||
366 | config = match_event_symbols(str); | ||
367 | if (config == ~0ULL) | ||
368 | return -1; | ||
369 | |||
370 | event_id[nr_counters] = config; | ||
371 | nr_counters++; | ||
372 | |||
373 | str = strstr(str, ","); | ||
374 | if (str) { | ||
375 | str++; | ||
376 | goto again; | ||
377 | } | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Symbols | ||
384 | */ | ||
385 | |||
386 | static uint64_t min_ip; | ||
387 | static uint64_t max_ip = -1ll; | ||
388 | |||
389 | struct sym_entry { | ||
390 | unsigned long long addr; | ||
391 | char *sym; | ||
392 | unsigned long count[MAX_COUNTERS]; | ||
393 | int skip; | ||
394 | struct source_line *source; | ||
395 | }; | ||
396 | |||
397 | #define MAX_SYMS 100000 | ||
398 | |||
399 | static int sym_table_count; | ||
400 | |||
401 | struct sym_entry *sym_filter_entry; | ||
402 | |||
403 | static struct sym_entry sym_table[MAX_SYMS]; | ||
404 | |||
405 | static void show_details(struct sym_entry *sym); | ||
406 | |||
407 | /* | ||
408 | * Ordering weight: count-1 * count-2 * ... / count-n | ||
409 | */ | ||
410 | static double sym_weight(const struct sym_entry *sym) | ||
411 | { | ||
412 | double weight; | ||
413 | int counter; | ||
414 | |||
415 | weight = sym->count[0]; | ||
416 | |||
417 | for (counter = 1; counter < nr_counters-1; counter++) | ||
418 | weight *= sym->count[counter]; | ||
419 | |||
420 | weight /= (sym->count[counter] + 1); | ||
421 | |||
422 | return weight; | ||
423 | } | ||
424 | |||
425 | static int compare(const void *__sym1, const void *__sym2) | ||
426 | { | ||
427 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | ||
428 | |||
429 | return sym_weight(sym1) < sym_weight(sym2); | ||
430 | } | ||
431 | |||
432 | static long events; | ||
433 | static long userspace_events; | ||
434 | static const char CONSOLE_CLEAR[] = "[H[2J"; | ||
435 | |||
436 | static struct sym_entry tmp[MAX_SYMS]; | ||
437 | |||
438 | static void print_sym_table(void) | ||
439 | { | ||
440 | int i, printed; | ||
441 | int counter; | ||
442 | float events_per_sec = events/delay_secs; | ||
443 | float kevents_per_sec = (events-userspace_events)/delay_secs; | ||
444 | float sum_kevents = 0.0; | ||
445 | |||
446 | events = userspace_events = 0; | ||
447 | memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); | ||
448 | qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); | ||
449 | |||
450 | for (i = 0; i < sym_table_count && tmp[i].count[0]; i++) | ||
451 | sum_kevents += tmp[i].count[0]; | ||
452 | |||
453 | write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); | ||
454 | |||
455 | printf( | ||
456 | "------------------------------------------------------------------------------\n"); | ||
457 | printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ", | ||
458 | events_per_sec, | ||
459 | 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), | ||
460 | nmi ? "NMI" : "IRQ"); | ||
461 | |||
462 | if (nr_counters == 1) | ||
463 | printf("%d ", event_count[0]); | ||
464 | |||
465 | for (counter = 0; counter < nr_counters; counter++) { | ||
466 | if (counter) | ||
467 | printf("/"); | ||
468 | |||
469 | printf("%s", event_name(counter)); | ||
470 | } | ||
471 | |||
472 | printf( "], "); | ||
473 | |||
474 | if (tid != -1) | ||
475 | printf(" (tid: %d", tid); | ||
476 | else | ||
477 | printf(" (all"); | ||
478 | |||
479 | if (profile_cpu != -1) | ||
480 | printf(", cpu: %d)\n", profile_cpu); | ||
481 | else { | ||
482 | if (tid != -1) | ||
483 | printf(")\n"); | ||
484 | else | ||
485 | printf(", %d CPUs)\n", nr_cpus); | ||
486 | } | ||
487 | |||
488 | printf("------------------------------------------------------------------------------\n\n"); | ||
489 | |||
490 | if (nr_counters == 1) | ||
491 | printf(" events pcnt"); | ||
492 | else | ||
493 | printf(" weight events pcnt"); | ||
494 | |||
495 | printf(" RIP kernel function\n" | ||
496 | " ______ ______ _____ ________________ _______________\n\n" | ||
497 | ); | ||
498 | |||
499 | for (i = 0, printed = 0; i < sym_table_count; i++) { | ||
500 | float pcnt; | ||
501 | int count; | ||
502 | |||
503 | if (printed <= 18 && tmp[i].count[0] >= count_filter) { | ||
504 | pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents)); | ||
505 | |||
506 | if (nr_counters == 1) | ||
507 | printf("%19.2f - %4.1f%% - %016llx : %s\n", | ||
508 | sym_weight(tmp + i), | ||
509 | pcnt, tmp[i].addr, tmp[i].sym); | ||
510 | else | ||
511 | printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n", | ||
512 | sym_weight(tmp + i), | ||
513 | tmp[i].count[0], | ||
514 | pcnt, tmp[i].addr, tmp[i].sym); | ||
515 | printed++; | ||
516 | } | ||
517 | /* | ||
518 | * Add decay to the counts: | ||
519 | */ | ||
520 | for (count = 0; count < nr_counters; count++) | ||
521 | sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; | ||
522 | } | ||
523 | |||
524 | if (sym_filter_entry) | ||
525 | show_details(sym_filter_entry); | ||
526 | |||
527 | { | ||
528 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | ||
529 | |||
530 | if (poll(&stdin_poll, 1, 0) == 1) { | ||
531 | printf("key pressed - exiting.\n"); | ||
532 | exit(0); | ||
533 | } | ||
534 | } | ||
535 | } | ||
536 | |||
537 | static void *display_thread(void *arg) | ||
538 | { | ||
539 | printf("KernelTop refresh period: %d seconds\n", delay_secs); | ||
540 | |||
541 | while (!sleep(delay_secs)) | ||
542 | print_sym_table(); | ||
543 | |||
544 | return NULL; | ||
545 | } | ||
546 | |||
547 | static int read_symbol(FILE *in, struct sym_entry *s) | ||
548 | { | ||
549 | static int filter_match = 0; | ||
550 | char *sym, stype; | ||
551 | char str[500]; | ||
552 | int rc, pos; | ||
553 | |||
554 | rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); | ||
555 | if (rc == EOF) | ||
556 | return -1; | ||
557 | |||
558 | assert(rc == 3); | ||
559 | |||
560 | /* skip until end of line: */ | ||
561 | pos = strlen(str); | ||
562 | do { | ||
563 | rc = fgetc(in); | ||
564 | if (rc == '\n' || rc == EOF || pos >= 499) | ||
565 | break; | ||
566 | str[pos] = rc; | ||
567 | pos++; | ||
568 | } while (1); | ||
569 | str[pos] = 0; | ||
570 | |||
571 | sym = str; | ||
572 | |||
573 | /* Filter out known duplicates and non-text symbols. */ | ||
574 | if (!strcmp(sym, "_text")) | ||
575 | return 1; | ||
576 | if (!min_ip && !strcmp(sym, "_stext")) | ||
577 | return 1; | ||
578 | if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) | ||
579 | return 1; | ||
580 | if (stype != 'T' && stype != 't') | ||
581 | return 1; | ||
582 | if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) | ||
583 | return 1; | ||
584 | if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) | ||
585 | return 1; | ||
586 | |||
587 | s->sym = malloc(strlen(str)); | ||
588 | assert(s->sym); | ||
589 | |||
590 | strcpy((char *)s->sym, str); | ||
591 | s->skip = 0; | ||
592 | |||
593 | /* Tag events to be skipped. */ | ||
594 | if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) | ||
595 | s->skip = 1; | ||
596 | else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) | ||
597 | s->skip = 1; | ||
598 | else if (!strcmp("mwait_idle", s->sym)) | ||
599 | s->skip = 1; | ||
600 | |||
601 | if (filter_match == 1) { | ||
602 | filter_end = s->addr; | ||
603 | filter_match = -1; | ||
604 | if (filter_end - filter_start > 10000) { | ||
605 | printf("hm, too large filter symbol <%s> - skipping.\n", | ||
606 | sym_filter); | ||
607 | printf("symbol filter start: %016lx\n", filter_start); | ||
608 | printf(" end: %016lx\n", filter_end); | ||
609 | filter_end = filter_start = 0; | ||
610 | sym_filter = NULL; | ||
611 | sleep(1); | ||
612 | } | ||
613 | } | ||
614 | if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { | ||
615 | filter_match = 1; | ||
616 | filter_start = s->addr; | ||
617 | } | ||
618 | |||
619 | return 0; | ||
620 | } | ||
621 | |||
622 | static int compare_addr(const void *__sym1, const void *__sym2) | ||
623 | { | ||
624 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | ||
625 | |||
626 | return sym1->addr > sym2->addr; | ||
627 | } | ||
628 | |||
629 | static void sort_symbol_table(void) | ||
630 | { | ||
631 | int i, dups; | ||
632 | |||
633 | do { | ||
634 | qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); | ||
635 | for (i = 0, dups = 0; i < sym_table_count; i++) { | ||
636 | if (sym_table[i].addr == sym_table[i+1].addr) { | ||
637 | sym_table[i+1].addr = -1ll; | ||
638 | dups++; | ||
639 | } | ||
640 | } | ||
641 | sym_table_count -= dups; | ||
642 | } while(dups); | ||
643 | } | ||
644 | |||
645 | static void parse_symbols(void) | ||
646 | { | ||
647 | struct sym_entry *last; | ||
648 | |||
649 | FILE *kallsyms = fopen("/proc/kallsyms", "r"); | ||
650 | |||
651 | if (!kallsyms) { | ||
652 | printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); | ||
653 | exit(-1); | ||
654 | } | ||
655 | |||
656 | while (!feof(kallsyms)) { | ||
657 | if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { | ||
658 | sym_table_count++; | ||
659 | assert(sym_table_count <= MAX_SYMS); | ||
660 | } | ||
661 | } | ||
662 | |||
663 | sort_symbol_table(); | ||
664 | min_ip = sym_table[0].addr; | ||
665 | max_ip = sym_table[sym_table_count-1].addr; | ||
666 | last = sym_table + sym_table_count++; | ||
667 | |||
668 | last->addr = -1ll; | ||
669 | last->sym = "<end>"; | ||
670 | |||
671 | if (filter_end) { | ||
672 | int count; | ||
673 | for (count=0; count < sym_table_count; count ++) { | ||
674 | if (!strcmp(sym_table[count].sym, sym_filter)) { | ||
675 | sym_filter_entry = &sym_table[count]; | ||
676 | break; | ||
677 | } | ||
678 | } | ||
679 | } | ||
680 | if (dump_symtab) { | ||
681 | int i; | ||
682 | |||
683 | for (i = 0; i < sym_table_count; i++) | ||
684 | fprintf(stderr, "%llx %s\n", | ||
685 | sym_table[i].addr, sym_table[i].sym); | ||
686 | } | ||
687 | } | ||
688 | |||
689 | /* | ||
690 | * Source lines | ||
691 | */ | ||
692 | |||
693 | static void parse_vmlinux(char *filename) | ||
694 | { | ||
695 | FILE *file; | ||
696 | char command[PATH_MAX*2]; | ||
697 | if (!filename) | ||
698 | return; | ||
699 | |||
700 | sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); | ||
701 | |||
702 | file = popen(command, "r"); | ||
703 | if (!file) | ||
704 | return; | ||
705 | |||
706 | lines_tail = &lines; | ||
707 | while (!feof(file)) { | ||
708 | struct source_line *src; | ||
709 | size_t dummy = 0; | ||
710 | char *c; | ||
711 | |||
712 | src = malloc(sizeof(struct source_line)); | ||
713 | assert(src != NULL); | ||
714 | memset(src, 0, sizeof(struct source_line)); | ||
715 | |||
716 | if (getline(&src->line, &dummy, file) < 0) | ||
717 | break; | ||
718 | if (!src->line) | ||
719 | break; | ||
720 | |||
721 | c = strchr(src->line, '\n'); | ||
722 | if (c) | ||
723 | *c = 0; | ||
724 | |||
725 | src->next = NULL; | ||
726 | *lines_tail = src; | ||
727 | lines_tail = &src->next; | ||
728 | |||
729 | if (strlen(src->line)>8 && src->line[8] == ':') | ||
730 | src->EIP = strtoull(src->line, NULL, 16); | ||
731 | if (strlen(src->line)>8 && src->line[16] == ':') | ||
732 | src->EIP = strtoull(src->line, NULL, 16); | ||
733 | } | ||
734 | pclose(file); | ||
735 | } | ||
736 | |||
737 | static void record_precise_ip(uint64_t ip) | ||
738 | { | ||
739 | struct source_line *line; | ||
740 | |||
741 | for (line = lines; line; line = line->next) { | ||
742 | if (line->EIP == ip) | ||
743 | line->count++; | ||
744 | if (line->EIP > ip) | ||
745 | break; | ||
746 | } | ||
747 | } | ||
748 | |||
749 | static void lookup_sym_in_vmlinux(struct sym_entry *sym) | ||
750 | { | ||
751 | struct source_line *line; | ||
752 | char pattern[PATH_MAX]; | ||
753 | sprintf(pattern, "<%s>:", sym->sym); | ||
754 | |||
755 | for (line = lines; line; line = line->next) { | ||
756 | if (strstr(line->line, pattern)) { | ||
757 | sym->source = line; | ||
758 | break; | ||
759 | } | ||
760 | } | ||
761 | } | ||
762 | |||
763 | static void show_lines(struct source_line *line_queue, int line_queue_count) | ||
764 | { | ||
765 | int i; | ||
766 | struct source_line *line; | ||
767 | |||
768 | line = line_queue; | ||
769 | for (i = 0; i < line_queue_count; i++) { | ||
770 | printf("%8li\t%s\n", line->count, line->line); | ||
771 | line = line->next; | ||
772 | } | ||
773 | } | ||
774 | |||
775 | #define TRACE_COUNT 3 | ||
776 | |||
777 | static void show_details(struct sym_entry *sym) | ||
778 | { | ||
779 | struct source_line *line; | ||
780 | struct source_line *line_queue = NULL; | ||
781 | int displayed = 0; | ||
782 | int line_queue_count = 0; | ||
783 | |||
784 | if (!sym->source) | ||
785 | lookup_sym_in_vmlinux(sym); | ||
786 | if (!sym->source) | ||
787 | return; | ||
788 | |||
789 | printf("Showing details for %s\n", sym->sym); | ||
790 | |||
791 | line = sym->source; | ||
792 | while (line) { | ||
793 | if (displayed && strstr(line->line, ">:")) | ||
794 | break; | ||
795 | |||
796 | if (!line_queue_count) | ||
797 | line_queue = line; | ||
798 | line_queue_count ++; | ||
799 | |||
800 | if (line->count >= count_filter) { | ||
801 | show_lines(line_queue, line_queue_count); | ||
802 | line_queue_count = 0; | ||
803 | line_queue = NULL; | ||
804 | } else if (line_queue_count > TRACE_COUNT) { | ||
805 | line_queue = line_queue->next; | ||
806 | line_queue_count --; | ||
807 | } | ||
808 | |||
809 | line->count = 0; | ||
810 | displayed++; | ||
811 | if (displayed > 300) | ||
812 | break; | ||
813 | line = line->next; | ||
814 | } | ||
815 | } | ||
816 | |||
817 | /* | ||
818 | * Binary search in the histogram table and record the hit: | ||
819 | */ | ||
820 | static void record_ip(uint64_t ip, int counter) | ||
821 | { | ||
822 | int left_idx, middle_idx, right_idx, idx; | ||
823 | unsigned long left, middle, right; | ||
824 | |||
825 | record_precise_ip(ip); | ||
826 | |||
827 | left_idx = 0; | ||
828 | right_idx = sym_table_count-1; | ||
829 | assert(ip <= max_ip && ip >= min_ip); | ||
830 | |||
831 | while (left_idx + 1 < right_idx) { | ||
832 | middle_idx = (left_idx + right_idx) / 2; | ||
833 | |||
834 | left = sym_table[ left_idx].addr; | ||
835 | middle = sym_table[middle_idx].addr; | ||
836 | right = sym_table[ right_idx].addr; | ||
837 | |||
838 | if (!(left <= middle && middle <= right)) { | ||
839 | printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); | ||
840 | printf("%d %d %d\n", left_idx, middle_idx, right_idx); | ||
841 | } | ||
842 | assert(left <= middle && middle <= right); | ||
843 | if (!(left <= ip && ip <= right)) { | ||
844 | printf(" left: %016lx\n", left); | ||
845 | printf(" ip: %016lx\n", (unsigned long)ip); | ||
846 | printf("right: %016lx\n", right); | ||
847 | } | ||
848 | assert(left <= ip && ip <= right); | ||
849 | /* | ||
850 | * [ left .... target .... middle .... right ] | ||
851 | * => right := middle | ||
852 | */ | ||
853 | if (ip < middle) { | ||
854 | right_idx = middle_idx; | ||
855 | continue; | ||
856 | } | ||
857 | /* | ||
858 | * [ left .... middle ... target ... right ] | ||
859 | * => left := middle | ||
860 | */ | ||
861 | left_idx = middle_idx; | ||
862 | } | ||
863 | |||
864 | idx = left_idx; | ||
865 | |||
866 | if (!sym_table[idx].skip) | ||
867 | sym_table[idx].count[counter]++; | ||
868 | else events--; | ||
869 | } | ||
870 | |||
871 | static void process_event(uint64_t ip, int counter) | ||
872 | { | ||
873 | events++; | ||
874 | |||
875 | if (ip < min_ip || ip > max_ip) { | ||
876 | userspace_events++; | ||
877 | return; | ||
878 | } | ||
879 | |||
880 | record_ip(ip, counter); | ||
881 | } | ||
882 | |||
883 | static void process_options(int argc, char **argv) | ||
884 | { | ||
885 | int error = 0, counter; | ||
886 | |||
887 | for (;;) { | ||
888 | int option_index = 0; | ||
889 | /** Options for getopt */ | ||
890 | static struct option long_options[] = { | ||
891 | {"count", required_argument, NULL, 'c'}, | ||
892 | {"cpu", required_argument, NULL, 'C'}, | ||
893 | {"delay", required_argument, NULL, 'd'}, | ||
894 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
895 | {"event", required_argument, NULL, 'e'}, | ||
896 | {"filter", required_argument, NULL, 'f'}, | ||
897 | {"group", required_argument, NULL, 'g'}, | ||
898 | {"help", no_argument, NULL, 'h'}, | ||
899 | {"nmi", required_argument, NULL, 'n'}, | ||
900 | {"mmap_info", no_argument, NULL, 'M'}, | ||
901 | {"mmap_pages", required_argument, NULL, 'm'}, | ||
902 | {"munmap_info", no_argument, NULL, 'U'}, | ||
903 | {"pid", required_argument, NULL, 'p'}, | ||
904 | {"realtime", required_argument, NULL, 'r'}, | ||
905 | {"scale", no_argument, NULL, 'l'}, | ||
906 | {"symbol", required_argument, NULL, 's'}, | ||
907 | {"stat", no_argument, NULL, 'S'}, | ||
908 | {"vmlinux", required_argument, NULL, 'x'}, | ||
909 | {"zero", no_argument, NULL, 'z'}, | ||
910 | {NULL, 0, NULL, 0 } | ||
911 | }; | ||
912 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", | ||
913 | long_options, &option_index); | ||
914 | if (c == -1) | ||
915 | break; | ||
916 | |||
917 | switch (c) { | ||
918 | case 'a': system_wide = 1; break; | ||
919 | case 'c': default_interval = atoi(optarg); break; | ||
920 | case 'C': | ||
921 | /* CPU and PID are mutually exclusive */ | ||
922 | if (tid != -1) { | ||
923 | printf("WARNING: CPU switch overriding PID\n"); | ||
924 | sleep(1); | ||
925 | tid = -1; | ||
926 | } | ||
927 | profile_cpu = atoi(optarg); break; | ||
928 | case 'd': delay_secs = atoi(optarg); break; | ||
929 | case 'D': dump_symtab = 1; break; | ||
930 | |||
931 | case 'e': error = parse_events(optarg); break; | ||
932 | |||
933 | case 'f': count_filter = atoi(optarg); break; | ||
934 | case 'g': group = atoi(optarg); break; | ||
935 | case 'h': display_help(); break; | ||
936 | case 'l': scale = 1; break; | ||
937 | case 'n': nmi = atoi(optarg); break; | ||
938 | case 'p': | ||
939 | /* CPU and PID are mutually exclusive */ | ||
940 | if (profile_cpu != -1) { | ||
941 | printf("WARNING: PID switch overriding CPU\n"); | ||
942 | sleep(1); | ||
943 | profile_cpu = -1; | ||
944 | } | ||
945 | tid = atoi(optarg); break; | ||
946 | case 'r': realtime_prio = atoi(optarg); break; | ||
947 | case 's': sym_filter = strdup(optarg); break; | ||
948 | case 'x': vmlinux = strdup(optarg); break; | ||
949 | case 'z': zero = 1; break; | ||
950 | case 'm': mmap_pages = atoi(optarg); break; | ||
951 | case 'M': use_mmap = 1; break; | ||
952 | case 'U': use_munmap = 1; break; | ||
953 | default: error = 1; break; | ||
954 | } | ||
955 | } | ||
956 | if (error) | ||
957 | display_help(); | ||
958 | |||
959 | if (!nr_counters) { | ||
960 | nr_counters = 1; | ||
961 | event_id[0] = 0; | ||
962 | } | ||
963 | |||
964 | for (counter = 0; counter < nr_counters; counter++) { | ||
965 | if (event_count[counter]) | ||
966 | continue; | ||
967 | |||
968 | event_count[counter] = default_interval; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | struct mmap_data { | ||
973 | int counter; | ||
974 | void *base; | ||
975 | unsigned int mask; | ||
976 | unsigned int prev; | ||
977 | }; | ||
978 | |||
979 | static unsigned int mmap_read_head(struct mmap_data *md) | ||
980 | { | ||
981 | struct perf_counter_mmap_page *pc = md->base; | ||
982 | int head; | ||
983 | |||
984 | head = pc->data_head; | ||
985 | rmb(); | ||
986 | |||
987 | return head; | ||
988 | } | ||
989 | |||
990 | struct timeval last_read, this_read; | ||
991 | |||
992 | static void mmap_read(struct mmap_data *md) | ||
993 | { | ||
994 | unsigned int head = mmap_read_head(md); | ||
995 | unsigned int old = md->prev; | ||
996 | unsigned char *data = md->base + page_size; | ||
997 | int diff; | ||
998 | |||
999 | gettimeofday(&this_read, NULL); | ||
1000 | |||
1001 | /* | ||
1002 | * If we're further behind than half the buffer, there's a chance | ||
1003 | * the writer will bite our tail and screw up the events under us. | ||
1004 | * | ||
1005 | * If we somehow ended up ahead of the head, we got messed up. | ||
1006 | * | ||
1007 | * In either case, truncate and restart at head. | ||
1008 | */ | ||
1009 | diff = head - old; | ||
1010 | if (diff > md->mask / 2 || diff < 0) { | ||
1011 | struct timeval iv; | ||
1012 | unsigned long msecs; | ||
1013 | |||
1014 | timersub(&this_read, &last_read, &iv); | ||
1015 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | ||
1016 | |||
1017 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | ||
1018 | " Last read %lu msecs ago.\n", msecs); | ||
1019 | |||
1020 | /* | ||
1021 | * head points to a known good entry, start there. | ||
1022 | */ | ||
1023 | old = head; | ||
1024 | } | ||
1025 | |||
1026 | last_read = this_read; | ||
1027 | |||
1028 | for (; old != head;) { | ||
1029 | struct ip_event { | ||
1030 | struct perf_event_header header; | ||
1031 | __u64 ip; | ||
1032 | __u32 pid, tid; | ||
1033 | }; | ||
1034 | struct mmap_event { | ||
1035 | struct perf_event_header header; | ||
1036 | __u32 pid, tid; | ||
1037 | __u64 start; | ||
1038 | __u64 len; | ||
1039 | __u64 pgoff; | ||
1040 | char filename[PATH_MAX]; | ||
1041 | }; | ||
1042 | |||
1043 | typedef union event_union { | ||
1044 | struct perf_event_header header; | ||
1045 | struct ip_event ip; | ||
1046 | struct mmap_event mmap; | ||
1047 | } event_t; | ||
1048 | |||
1049 | event_t *event = (event_t *)&data[old & md->mask]; | ||
1050 | |||
1051 | event_t event_copy; | ||
1052 | |||
1053 | size_t size = event->header.size; | ||
1054 | |||
1055 | /* | ||
1056 | * Event straddles the mmap boundary -- header should always | ||
1057 | * be inside due to u64 alignment of output. | ||
1058 | */ | ||
1059 | if ((old & md->mask) + size != ((old + size) & md->mask)) { | ||
1060 | unsigned int offset = old; | ||
1061 | unsigned int len = min(sizeof(*event), size), cpy; | ||
1062 | void *dst = &event_copy; | ||
1063 | |||
1064 | do { | ||
1065 | cpy = min(md->mask + 1 - (offset & md->mask), len); | ||
1066 | memcpy(dst, &data[offset & md->mask], cpy); | ||
1067 | offset += cpy; | ||
1068 | dst += cpy; | ||
1069 | len -= cpy; | ||
1070 | } while (len); | ||
1071 | |||
1072 | event = &event_copy; | ||
1073 | } | ||
1074 | |||
1075 | old += size; | ||
1076 | |||
1077 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | ||
1078 | if (event->header.type & PERF_RECORD_IP) | ||
1079 | process_event(event->ip.ip, md->counter); | ||
1080 | } else { | ||
1081 | switch (event->header.type) { | ||
1082 | case PERF_EVENT_MMAP: | ||
1083 | case PERF_EVENT_MUNMAP: | ||
1084 | printf("%s: %Lu %Lu %Lu %s\n", | ||
1085 | event->header.type == PERF_EVENT_MMAP | ||
1086 | ? "mmap" : "munmap", | ||
1087 | event->mmap.start, | ||
1088 | event->mmap.len, | ||
1089 | event->mmap.pgoff, | ||
1090 | event->mmap.filename); | ||
1091 | break; | ||
1092 | } | ||
1093 | } | ||
1094 | } | ||
1095 | |||
1096 | md->prev = old; | ||
1097 | } | ||
1098 | |||
1099 | int cmd_top(int argc, char **argv, const char *prefix) | ||
1100 | { | ||
1101 | struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | ||
1102 | struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
1103 | struct perf_counter_hw_event hw_event; | ||
1104 | pthread_t thread; | ||
1105 | int i, counter, group_fd, nr_poll = 0; | ||
1106 | unsigned int cpu; | ||
1107 | int ret; | ||
1108 | |||
1109 | page_size = sysconf(_SC_PAGE_SIZE); | ||
1110 | |||
1111 | process_options(argc, argv); | ||
1112 | |||
1113 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
1114 | assert(nr_cpus <= MAX_NR_CPUS); | ||
1115 | assert(nr_cpus >= 0); | ||
1116 | |||
1117 | if (tid != -1 || profile_cpu != -1) | ||
1118 | nr_cpus = 1; | ||
1119 | |||
1120 | parse_symbols(); | ||
1121 | if (vmlinux && sym_filter_entry) | ||
1122 | parse_vmlinux(vmlinux); | ||
1123 | |||
1124 | for (i = 0; i < nr_cpus; i++) { | ||
1125 | group_fd = -1; | ||
1126 | for (counter = 0; counter < nr_counters; counter++) { | ||
1127 | |||
1128 | cpu = profile_cpu; | ||
1129 | if (tid == -1 && profile_cpu == -1) | ||
1130 | cpu = i; | ||
1131 | |||
1132 | memset(&hw_event, 0, sizeof(hw_event)); | ||
1133 | hw_event.config = event_id[counter]; | ||
1134 | hw_event.irq_period = event_count[counter]; | ||
1135 | hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; | ||
1136 | hw_event.nmi = nmi; | ||
1137 | hw_event.mmap = use_mmap; | ||
1138 | hw_event.munmap = use_munmap; | ||
1139 | |||
1140 | fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); | ||
1141 | if (fd[i][counter] < 0) { | ||
1142 | int err = errno; | ||
1143 | printf("kerneltop error: syscall returned with %d (%s)\n", | ||
1144 | fd[i][counter], strerror(err)); | ||
1145 | if (err == EPERM) | ||
1146 | printf("Are you root?\n"); | ||
1147 | exit(-1); | ||
1148 | } | ||
1149 | assert(fd[i][counter] >= 0); | ||
1150 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
1151 | |||
1152 | /* | ||
1153 | * First counter acts as the group leader: | ||
1154 | */ | ||
1155 | if (group && group_fd == -1) | ||
1156 | group_fd = fd[i][counter]; | ||
1157 | |||
1158 | event_array[nr_poll].fd = fd[i][counter]; | ||
1159 | event_array[nr_poll].events = POLLIN; | ||
1160 | nr_poll++; | ||
1161 | |||
1162 | mmap_array[i][counter].counter = counter; | ||
1163 | mmap_array[i][counter].prev = 0; | ||
1164 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | ||
1165 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
1166 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | ||
1167 | if (mmap_array[i][counter].base == MAP_FAILED) { | ||
1168 | printf("kerneltop error: failed to mmap with %d (%s)\n", | ||
1169 | errno, strerror(errno)); | ||
1170 | exit(-1); | ||
1171 | } | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | if (pthread_create(&thread, NULL, display_thread, NULL)) { | ||
1176 | printf("Could not create display thread.\n"); | ||
1177 | exit(-1); | ||
1178 | } | ||
1179 | |||
1180 | if (realtime_prio) { | ||
1181 | struct sched_param param; | ||
1182 | |||
1183 | param.sched_priority = realtime_prio; | ||
1184 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | ||
1185 | printf("Could not set realtime priority.\n"); | ||
1186 | exit(-1); | ||
1187 | } | ||
1188 | } | ||
1189 | |||
1190 | while (1) { | ||
1191 | int hits = events; | ||
1192 | |||
1193 | for (i = 0; i < nr_cpus; i++) { | ||
1194 | for (counter = 0; counter < nr_counters; counter++) | ||
1195 | mmap_read(&mmap_array[i][counter]); | ||
1196 | } | ||
1197 | |||
1198 | if (hits == events) | ||
1199 | ret = poll(event_array, nr_poll, 100); | ||
1200 | } | ||
1201 | |||
1202 | return 0; | ||
1203 | } | ||
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h new file mode 100644 index 000000000000..d32318aed8cf --- /dev/null +++ b/Documentation/perf_counter/builtin.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef BUILTIN_H | ||
2 | #define BUILTIN_H | ||
3 | |||
4 | #include "util/util.h" | ||
5 | #include "util/strbuf.h" | ||
6 | |||
7 | extern const char perf_version_string[]; | ||
8 | extern const char perf_usage_string[]; | ||
9 | extern const char perf_more_info_string[]; | ||
10 | |||
11 | extern void list_common_cmds_help(void); | ||
12 | extern const char *help_unknown_cmd(const char *cmd); | ||
13 | extern void prune_packed_objects(int); | ||
14 | extern int read_line_with_nul(char *buf, int size, FILE *file); | ||
15 | extern int check_pager_config(const char *cmd); | ||
16 | |||
17 | extern int cmd_help(int argc, const char **argv, const char *prefix); | ||
18 | extern int cmd_record(int argc, const char **argv, const char *prefix); | ||
19 | extern int cmd_stat(int argc, const char **argv, const char *prefix); | ||
20 | extern int cmd_top(int argc, const char **argv, const char *prefix); | ||
21 | extern int cmd_version(int argc, const char **argv, const char *prefix); | ||
22 | #endif | ||
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt new file mode 100644 index 000000000000..d15210aa0cae --- /dev/null +++ b/Documentation/perf_counter/command-list.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | # List of known perf commands. | ||
2 | # command name category [deprecated] [common] | ||
3 | perf-record mainporcelain common | ||
4 | perf-stat mainporcelain common | ||
5 | perf-top mainporcelain common | ||
6 | |||
diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt new file mode 100644 index 000000000000..aaf105c02fba --- /dev/null +++ b/Documentation/perf_counter/design.txt | |||
@@ -0,0 +1,283 @@ | |||
1 | |||
2 | Performance Counters for Linux | ||
3 | ------------------------------ | ||
4 | |||
5 | Performance counters are special hardware registers available on most modern | ||
6 | CPUs. These registers count the number of certain types of hw events: such | ||
7 | as instructions executed, cachemisses suffered, or branches mis-predicted - | ||
8 | without slowing down the kernel or applications. These registers can also | ||
9 | trigger interrupts when a threshold number of events have passed - and can | ||
10 | thus be used to profile the code that runs on that CPU. | ||
11 | |||
12 | The Linux Performance Counter subsystem provides an abstraction of these | ||
13 | hardware capabilities. It provides per task and per CPU counters, counter | ||
14 | groups, and it provides event capabilities on top of those. It | ||
15 | provides "virtual" 64-bit counters, regardless of the width of the | ||
16 | underlying hardware counters. | ||
17 | |||
18 | Performance counters are accessed via special file descriptors. | ||
19 | There's one file descriptor per virtual counter used. | ||
20 | |||
21 | The special file descriptor is opened via the perf_counter_open() | ||
22 | system call: | ||
23 | |||
24 | int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, | ||
25 | pid_t pid, int cpu, int group_fd, | ||
26 | unsigned long flags); | ||
27 | |||
28 | The syscall returns the new fd. The fd can be used via the normal | ||
29 | VFS system calls: read() can be used to read the counter, fcntl() | ||
30 | can be used to set the blocking mode, etc. | ||
31 | |||
32 | Multiple counters can be kept open at a time, and the counters | ||
33 | can be poll()ed. | ||
34 | |||
35 | When creating a new counter fd, 'perf_counter_hw_event' is: | ||
36 | |||
37 | /* | ||
38 | * Event to monitor via a performance monitoring counter: | ||
39 | */ | ||
40 | struct perf_counter_hw_event { | ||
41 | __u64 event_config; | ||
42 | |||
43 | __u64 irq_period; | ||
44 | __u64 record_type; | ||
45 | __u64 read_format; | ||
46 | |||
47 | __u64 disabled : 1, /* off by default */ | ||
48 | nmi : 1, /* NMI sampling */ | ||
49 | inherit : 1, /* children inherit it */ | ||
50 | pinned : 1, /* must always be on PMU */ | ||
51 | exclusive : 1, /* only group on PMU */ | ||
52 | exclude_user : 1, /* don't count user */ | ||
53 | exclude_kernel : 1, /* ditto kernel */ | ||
54 | exclude_hv : 1, /* ditto hypervisor */ | ||
55 | exclude_idle : 1, /* don't count when idle */ | ||
56 | |||
57 | __reserved_1 : 55; | ||
58 | |||
59 | __u32 extra_config_len; | ||
60 | |||
61 | __u32 __reserved_4; | ||
62 | __u64 __reserved_2; | ||
63 | __u64 __reserved_3; | ||
64 | }; | ||
65 | |||
66 | The 'event_config' field specifies what the counter should count. It | ||
67 | is divided into 3 bit-fields: | ||
68 | |||
69 | raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000 | ||
70 | type: 7 bits (next most significant) 0x7f00_0000_0000_0000 | ||
71 | event_id: 56 bits (least significant) 0x00ff_0000_0000_0000 | ||
72 | |||
73 | If 'raw_type' is 1, then the counter will count a hardware event | ||
74 | specified by the remaining 63 bits of event_config. The encoding is | ||
75 | machine-specific. | ||
76 | |||
77 | If 'raw_type' is 0, then the 'type' field says what kind of counter | ||
78 | this is, with the following encoding: | ||
79 | |||
80 | enum perf_event_types { | ||
81 | PERF_TYPE_HARDWARE = 0, | ||
82 | PERF_TYPE_SOFTWARE = 1, | ||
83 | PERF_TYPE_TRACEPOINT = 2, | ||
84 | }; | ||
85 | |||
86 | A counter of PERF_TYPE_HARDWARE will count the hardware event | ||
87 | specified by 'event_id': | ||
88 | |||
89 | /* | ||
90 | * Generalized performance counter event types, used by the hw_event.event_id | ||
91 | * parameter of the sys_perf_counter_open() syscall: | ||
92 | */ | ||
93 | enum hw_event_ids { | ||
94 | /* | ||
95 | * Common hardware events, generalized by the kernel: | ||
96 | */ | ||
97 | PERF_COUNT_CPU_CYCLES = 0, | ||
98 | PERF_COUNT_INSTRUCTIONS = 1, | ||
99 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
100 | PERF_COUNT_CACHE_MISSES = 3, | ||
101 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
102 | PERF_COUNT_BRANCH_MISSES = 5, | ||
103 | PERF_COUNT_BUS_CYCLES = 6, | ||
104 | }; | ||
105 | |||
106 | These are standardized types of events that work relatively uniformly | ||
107 | on all CPUs that implement Performance Counters support under Linux, | ||
108 | although there may be variations (e.g., different CPUs might count | ||
109 | cache references and misses at different levels of the cache hierarchy). | ||
110 | If a CPU is not able to count the selected event, then the system call | ||
111 | will return -EINVAL. | ||
112 | |||
113 | More hw_event_types are supported as well, but they are CPU-specific | ||
114 | and accessed as raw events. For example, to count "External bus | ||
115 | cycles while bus lock signal asserted" events on Intel Core CPUs, pass | ||
116 | in a 0x4064 event_id value and set hw_event.raw_type to 1. | ||
117 | |||
118 | A counter of type PERF_TYPE_SOFTWARE will count one of the available | ||
119 | software events, selected by 'event_id': | ||
120 | |||
121 | /* | ||
122 | * Special "software" counters provided by the kernel, even if the hardware | ||
123 | * does not support performance counters. These counters measure various | ||
124 | * physical and sw events of the kernel (and allow the profiling of them as | ||
125 | * well): | ||
126 | */ | ||
127 | enum sw_event_ids { | ||
128 | PERF_COUNT_CPU_CLOCK = 0, | ||
129 | PERF_COUNT_TASK_CLOCK = 1, | ||
130 | PERF_COUNT_PAGE_FAULTS = 2, | ||
131 | PERF_COUNT_CONTEXT_SWITCHES = 3, | ||
132 | PERF_COUNT_CPU_MIGRATIONS = 4, | ||
133 | PERF_COUNT_PAGE_FAULTS_MIN = 5, | ||
134 | PERF_COUNT_PAGE_FAULTS_MAJ = 6, | ||
135 | }; | ||
136 | |||
137 | Counters come in two flavours: counting counters and sampling | ||
138 | counters. A "counting" counter is one that is used for counting the | ||
139 | number of events that occur, and is characterised by having | ||
140 | irq_period = 0 and record_type = PERF_RECORD_SIMPLE. A read() on a | ||
141 | counting counter simply returns the current value of the counter as | ||
142 | an 8-byte number. | ||
143 | |||
144 | A "sampling" counter is one that is set up to generate an interrupt | ||
145 | every N events, where N is given by 'irq_period'. A sampling counter | ||
146 | has irq_period > 0 and record_type != PERF_RECORD_SIMPLE. The | ||
147 | record_type controls what data is recorded on each interrupt, and the | ||
148 | available values are currently: | ||
149 | |||
150 | /* | ||
151 | * IRQ-notification data record type: | ||
152 | */ | ||
153 | enum perf_counter_record_type { | ||
154 | PERF_RECORD_SIMPLE = 0, | ||
155 | PERF_RECORD_IRQ = 1, | ||
156 | PERF_RECORD_GROUP = 2, | ||
157 | }; | ||
158 | |||
159 | A record_type value of PERF_RECORD_IRQ will record the instruction | ||
160 | pointer (IP) at which the interrupt occurred. A record_type value of | ||
161 | PERF_RECORD_GROUP will record the event_config and counter value of | ||
162 | all of the other counters in the group, and should only be used on a | ||
163 | group leader (see below). Currently these two values are mutually | ||
164 | exclusive, but record_type will become a bit-mask in future and | ||
165 | support other values. | ||
166 | |||
167 | A sampling counter has an event queue, into which an event is placed | ||
168 | on each interrupt. A read() on a sampling counter will read the next | ||
169 | event from the event queue. If the queue is empty, the read() will | ||
170 | either block or return an EAGAIN error, depending on whether the fd | ||
171 | has been set to non-blocking mode or not. | ||
172 | |||
173 | The 'disabled' bit specifies whether the counter starts out disabled | ||
174 | or enabled. If it is initially disabled, it can be enabled by ioctl | ||
175 | or prctl (see below). | ||
176 | |||
177 | The 'nmi' bit specifies, for hardware events, whether the counter | ||
178 | should be set up to request non-maskable interrupts (NMIs) or normal | ||
179 | interrupts. This bit is ignored if the user doesn't have | ||
180 | CAP_SYS_ADMIN privilege (i.e. is not root) or if the CPU doesn't | ||
181 | generate NMIs from hardware counters. | ||
182 | |||
183 | The 'inherit' bit, if set, specifies that this counter should count | ||
184 | events on descendant tasks as well as the task specified. This only | ||
185 | applies to new descendents, not to any existing descendents at the | ||
186 | time the counter is created (nor to any new descendents of existing | ||
187 | descendents). | ||
188 | |||
189 | The 'pinned' bit, if set, specifies that the counter should always be | ||
190 | on the CPU if at all possible. It only applies to hardware counters | ||
191 | and only to group leaders. If a pinned counter cannot be put onto the | ||
192 | CPU (e.g. because there are not enough hardware counters or because of | ||
193 | a conflict with some other event), then the counter goes into an | ||
194 | 'error' state, where reads return end-of-file (i.e. read() returns 0) | ||
195 | until the counter is subsequently enabled or disabled. | ||
196 | |||
197 | The 'exclusive' bit, if set, specifies that when this counter's group | ||
198 | is on the CPU, it should be the only group using the CPU's counters. | ||
199 | In future, this will allow sophisticated monitoring programs to supply | ||
200 | extra configuration information via 'extra_config_len' to exploit | ||
201 | advanced features of the CPU's Performance Monitor Unit (PMU) that are | ||
202 | not otherwise accessible and that might disrupt other hardware | ||
203 | counters. | ||
204 | |||
205 | The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a | ||
206 | way to request that counting of events be restricted to times when the | ||
207 | CPU is in user, kernel and/or hypervisor mode. | ||
208 | |||
209 | |||
210 | The 'pid' parameter to the perf_counter_open() system call allows the | ||
211 | counter to be specific to a task: | ||
212 | |||
213 | pid == 0: if the pid parameter is zero, the counter is attached to the | ||
214 | current task. | ||
215 | |||
216 | pid > 0: the counter is attached to a specific task (if the current task | ||
217 | has sufficient privilege to do so) | ||
218 | |||
219 | pid < 0: all tasks are counted (per cpu counters) | ||
220 | |||
221 | The 'cpu' parameter allows a counter to be made specific to a CPU: | ||
222 | |||
223 | cpu >= 0: the counter is restricted to a specific CPU | ||
224 | cpu == -1: the counter counts on all CPUs | ||
225 | |||
226 | (Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) | ||
227 | |||
228 | A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts | ||
229 | events of that task and 'follows' that task to whatever CPU the task | ||
230 | gets schedule to. Per task counters can be created by any user, for | ||
231 | their own tasks. | ||
232 | |||
233 | A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts | ||
234 | all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. | ||
235 | |||
236 | The 'flags' parameter is currently unused and must be zero. | ||
237 | |||
238 | The 'group_fd' parameter allows counter "groups" to be set up. A | ||
239 | counter group has one counter which is the group "leader". The leader | ||
240 | is created first, with group_fd = -1 in the perf_counter_open call | ||
241 | that creates it. The rest of the group members are created | ||
242 | subsequently, with group_fd giving the fd of the group leader. | ||
243 | (A single counter on its own is created with group_fd = -1 and is | ||
244 | considered to be a group with only 1 member.) | ||
245 | |||
246 | A counter group is scheduled onto the CPU as a unit, that is, it will | ||
247 | only be put onto the CPU if all of the counters in the group can be | ||
248 | put onto the CPU. This means that the values of the member counters | ||
249 | can be meaningfully compared, added, divided (to get ratios), etc., | ||
250 | with each other, since they have counted events for the same set of | ||
251 | executed instructions. | ||
252 | |||
253 | Counters can be enabled and disabled in two ways: via ioctl and via | ||
254 | prctl. When a counter is disabled, it doesn't count or generate | ||
255 | events but does continue to exist and maintain its count value. | ||
256 | |||
257 | An individual counter or counter group can be enabled with | ||
258 | |||
259 | ioctl(fd, PERF_COUNTER_IOC_ENABLE); | ||
260 | |||
261 | or disabled with | ||
262 | |||
263 | ioctl(fd, PERF_COUNTER_IOC_DISABLE); | ||
264 | |||
265 | Enabling or disabling the leader of a group enables or disables the | ||
266 | whole group; that is, while the group leader is disabled, none of the | ||
267 | counters in the group will count. Enabling or disabling a member of a | ||
268 | group other than the leader only affects that counter - disabling an | ||
269 | non-leader stops that counter from counting but doesn't affect any | ||
270 | other counter. | ||
271 | |||
272 | A process can enable or disable all the counter groups that are | ||
273 | attached to it, using prctl: | ||
274 | |||
275 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
276 | |||
277 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
278 | |||
279 | This applies to all counters on the current process, whether created | ||
280 | by this process or by another, and doesn't affect any counters that | ||
281 | this process has created on other processes. It only enables or | ||
282 | disables the group leaders, not any other members in the groups. | ||
283 | |||
diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc new file mode 100644 index 000000000000..1727317352bf --- /dev/null +++ b/Documentation/perf_counter/perf-report.cc | |||
@@ -0,0 +1,479 @@ | |||
1 | #define _GNU_SOURCE | ||
2 | #include <sys/types.h> | ||
3 | #include <sys/stat.h> | ||
4 | #include <sys/time.h> | ||
5 | #include <unistd.h> | ||
6 | #include <stdint.h> | ||
7 | #include <stdlib.h> | ||
8 | #include <string.h> | ||
9 | #include <limits.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <stdio.h> | ||
12 | #include <errno.h> | ||
13 | #include <ctype.h> | ||
14 | #include <time.h> | ||
15 | #include <getopt.h> | ||
16 | |||
17 | #include <sys/ioctl.h> | ||
18 | #include <sys/poll.h> | ||
19 | #include <sys/prctl.h> | ||
20 | #include <sys/wait.h> | ||
21 | #include <sys/mman.h> | ||
22 | #include <sys/types.h> | ||
23 | #include <sys/stat.h> | ||
24 | |||
25 | #include <linux/unistd.h> | ||
26 | #include <linux/types.h> | ||
27 | |||
28 | #include "../../include/linux/perf_counter.h" | ||
29 | |||
30 | #include <set> | ||
31 | #include <map> | ||
32 | #include <string> | ||
33 | |||
34 | |||
35 | static char const *input_name = "output.perf"; | ||
36 | static int input; | ||
37 | |||
38 | static unsigned long page_size; | ||
39 | static unsigned long mmap_window = 32; | ||
40 | |||
41 | struct ip_event { | ||
42 | struct perf_event_header header; | ||
43 | __u64 ip; | ||
44 | __u32 pid, tid; | ||
45 | }; | ||
46 | struct mmap_event { | ||
47 | struct perf_event_header header; | ||
48 | __u32 pid, tid; | ||
49 | __u64 start; | ||
50 | __u64 len; | ||
51 | __u64 pgoff; | ||
52 | char filename[PATH_MAX]; | ||
53 | }; | ||
54 | struct comm_event { | ||
55 | struct perf_event_header header; | ||
56 | __u32 pid,tid; | ||
57 | char comm[16]; | ||
58 | }; | ||
59 | |||
60 | typedef union event_union { | ||
61 | struct perf_event_header header; | ||
62 | struct ip_event ip; | ||
63 | struct mmap_event mmap; | ||
64 | struct comm_event comm; | ||
65 | } event_t; | ||
66 | |||
67 | struct section { | ||
68 | uint64_t start; | ||
69 | uint64_t end; | ||
70 | |||
71 | uint64_t offset; | ||
72 | |||
73 | std::string name; | ||
74 | |||
75 | section() { }; | ||
76 | |||
77 | section(uint64_t stab) : end(stab) { }; | ||
78 | |||
79 | section(uint64_t start, uint64_t size, uint64_t offset, std::string name) : | ||
80 | start(start), end(start + size), offset(offset), name(name) | ||
81 | { }; | ||
82 | |||
83 | bool operator < (const struct section &s) const { | ||
84 | return end < s.end; | ||
85 | }; | ||
86 | }; | ||
87 | |||
88 | typedef std::set<struct section> sections_t; | ||
89 | |||
90 | struct symbol { | ||
91 | uint64_t start; | ||
92 | uint64_t end; | ||
93 | |||
94 | std::string name; | ||
95 | |||
96 | symbol() { }; | ||
97 | |||
98 | symbol(uint64_t ip) : start(ip) { } | ||
99 | |||
100 | symbol(uint64_t start, uint64_t len, std::string name) : | ||
101 | start(start), end(start + len), name(name) | ||
102 | { }; | ||
103 | |||
104 | bool operator < (const struct symbol &s) const { | ||
105 | return start < s.start; | ||
106 | }; | ||
107 | }; | ||
108 | |||
109 | typedef std::set<struct symbol> symbols_t; | ||
110 | |||
111 | struct dso { | ||
112 | sections_t sections; | ||
113 | symbols_t syms; | ||
114 | }; | ||
115 | |||
116 | static std::map<std::string, struct dso> dsos; | ||
117 | |||
118 | static void load_dso_sections(std::string dso_name) | ||
119 | { | ||
120 | struct dso &dso = dsos[dso_name]; | ||
121 | |||
122 | std::string cmd = "readelf -DSW " + dso_name; | ||
123 | |||
124 | FILE *file = popen(cmd.c_str(), "r"); | ||
125 | if (!file) { | ||
126 | perror("failed to open pipe"); | ||
127 | exit(-1); | ||
128 | } | ||
129 | |||
130 | char *line = NULL; | ||
131 | size_t n = 0; | ||
132 | |||
133 | while (!feof(file)) { | ||
134 | uint64_t addr, off, size; | ||
135 | char name[32]; | ||
136 | |||
137 | if (getline(&line, &n, file) < 0) | ||
138 | break; | ||
139 | if (!line) | ||
140 | break; | ||
141 | |||
142 | if (sscanf(line, " [%*2d] %16s %*14s %Lx %Lx %Lx", | ||
143 | name, &addr, &off, &size) == 4) { | ||
144 | |||
145 | dso.sections.insert(section(addr, size, addr - off, name)); | ||
146 | } | ||
147 | #if 0 | ||
148 | /* | ||
149 | * for reading readelf symbols (-s), however these don't seem | ||
150 | * to include nearly everything, so use nm for that. | ||
151 | */ | ||
152 | if (sscanf(line, " %*4d %*3d: %Lx %5Lu %*7s %*6s %*7s %3d %s", | ||
153 | &start, &size, §ion, sym) == 4) { | ||
154 | |||
155 | start -= dso.section_offsets[section]; | ||
156 | |||
157 | dso.syms.insert(symbol(start, size, std::string(sym))); | ||
158 | } | ||
159 | #endif | ||
160 | } | ||
161 | pclose(file); | ||
162 | } | ||
163 | |||
164 | static void load_dso_symbols(std::string dso_name, std::string args) | ||
165 | { | ||
166 | struct dso &dso = dsos[dso_name]; | ||
167 | |||
168 | std::string cmd = "nm -nSC " + args + " " + dso_name; | ||
169 | |||
170 | FILE *file = popen(cmd.c_str(), "r"); | ||
171 | if (!file) { | ||
172 | perror("failed to open pipe"); | ||
173 | exit(-1); | ||
174 | } | ||
175 | |||
176 | char *line = NULL; | ||
177 | size_t n = 0; | ||
178 | |||
179 | while (!feof(file)) { | ||
180 | uint64_t start, size; | ||
181 | char c; | ||
182 | char sym[1024]; | ||
183 | |||
184 | if (getline(&line, &n, file) < 0) | ||
185 | break; | ||
186 | if (!line) | ||
187 | break; | ||
188 | |||
189 | |||
190 | if (sscanf(line, "%Lx %Lx %c %s", &start, &size, &c, sym) == 4) { | ||
191 | sections_t::const_iterator si = | ||
192 | dso.sections.upper_bound(section(start)); | ||
193 | if (si == dso.sections.end()) { | ||
194 | printf("symbol in unknown section: %s\n", sym); | ||
195 | continue; | ||
196 | } | ||
197 | |||
198 | start -= si->offset; | ||
199 | |||
200 | dso.syms.insert(symbol(start, size, sym)); | ||
201 | } | ||
202 | } | ||
203 | pclose(file); | ||
204 | } | ||
205 | |||
206 | static void load_dso(std::string dso_name) | ||
207 | { | ||
208 | load_dso_sections(dso_name); | ||
209 | load_dso_symbols(dso_name, "-D"); /* dynamic symbols */ | ||
210 | load_dso_symbols(dso_name, ""); /* regular ones */ | ||
211 | } | ||
212 | |||
213 | void load_kallsyms(void) | ||
214 | { | ||
215 | struct dso &dso = dsos["[kernel]"]; | ||
216 | |||
217 | FILE *file = fopen("/proc/kallsyms", "r"); | ||
218 | if (!file) { | ||
219 | perror("failed to open kallsyms"); | ||
220 | exit(-1); | ||
221 | } | ||
222 | |||
223 | char *line; | ||
224 | size_t n; | ||
225 | |||
226 | while (!feof(file)) { | ||
227 | uint64_t start; | ||
228 | char c; | ||
229 | char sym[1024]; | ||
230 | |||
231 | if (getline(&line, &n, file) < 0) | ||
232 | break; | ||
233 | if (!line) | ||
234 | break; | ||
235 | |||
236 | if (sscanf(line, "%Lx %c %s", &start, &c, sym) == 3) | ||
237 | dso.syms.insert(symbol(start, 0x1000000, std::string(sym))); | ||
238 | } | ||
239 | fclose(file); | ||
240 | } | ||
241 | |||
242 | struct map { | ||
243 | uint64_t start; | ||
244 | uint64_t end; | ||
245 | uint64_t pgoff; | ||
246 | |||
247 | std::string dso; | ||
248 | |||
249 | map() { }; | ||
250 | |||
251 | map(uint64_t ip) : end(ip) { } | ||
252 | |||
253 | map(mmap_event *mmap) { | ||
254 | start = mmap->start; | ||
255 | end = mmap->start + mmap->len; | ||
256 | pgoff = mmap->pgoff; | ||
257 | |||
258 | dso = std::string(mmap->filename); | ||
259 | |||
260 | if (dsos.find(dso) == dsos.end()) | ||
261 | load_dso(dso); | ||
262 | }; | ||
263 | |||
264 | bool operator < (const struct map &m) const { | ||
265 | return end < m.end; | ||
266 | }; | ||
267 | }; | ||
268 | |||
269 | typedef std::set<struct map> maps_t; | ||
270 | |||
271 | static std::map<int, maps_t> maps; | ||
272 | |||
273 | static std::map<int, std::string> comms; | ||
274 | |||
275 | static std::map<std::string, int> hist; | ||
276 | static std::multimap<int, std::string> rev_hist; | ||
277 | |||
278 | static std::string resolve_comm(int pid) | ||
279 | { | ||
280 | std::string comm; | ||
281 | |||
282 | std::map<int, std::string>::const_iterator ci = comms.find(pid); | ||
283 | if (ci != comms.end()) { | ||
284 | comm = ci->second; | ||
285 | } else { | ||
286 | char pid_str[30]; | ||
287 | |||
288 | sprintf(pid_str, ":%d", pid); | ||
289 | comm = pid_str; | ||
290 | } | ||
291 | |||
292 | return comm; | ||
293 | } | ||
294 | |||
295 | static std::string resolve_user_symbol(int pid, uint64_t ip) | ||
296 | { | ||
297 | std::string sym = "<unknown>"; | ||
298 | |||
299 | maps_t &m = maps[pid]; | ||
300 | maps_t::const_iterator mi = m.upper_bound(map(ip)); | ||
301 | if (mi == m.end()) | ||
302 | return sym; | ||
303 | |||
304 | ip -= mi->start + mi->pgoff; | ||
305 | |||
306 | symbols_t &s = dsos[mi->dso].syms; | ||
307 | symbols_t::const_iterator si = s.upper_bound(symbol(ip)); | ||
308 | |||
309 | sym = mi->dso + ": <unknown>"; | ||
310 | |||
311 | if (si == s.begin()) | ||
312 | return sym; | ||
313 | si--; | ||
314 | |||
315 | if (si->start <= ip && ip < si->end) | ||
316 | sym = mi->dso + ": " + si->name; | ||
317 | #if 0 | ||
318 | else if (si->start <= ip) | ||
319 | sym = mi->dso + ": ?" + si->name; | ||
320 | #endif | ||
321 | |||
322 | return sym; | ||
323 | } | ||
324 | |||
325 | static std::string resolve_kernel_symbol(uint64_t ip) | ||
326 | { | ||
327 | std::string sym = "<unknown>"; | ||
328 | |||
329 | symbols_t &s = dsos["[kernel]"].syms; | ||
330 | symbols_t::const_iterator si = s.upper_bound(symbol(ip)); | ||
331 | |||
332 | if (si == s.begin()) | ||
333 | return sym; | ||
334 | si--; | ||
335 | |||
336 | if (si->start <= ip && ip < si->end) | ||
337 | sym = si->name; | ||
338 | |||
339 | return sym; | ||
340 | } | ||
341 | |||
342 | static void display_help(void) | ||
343 | { | ||
344 | printf( | ||
345 | "Usage: perf-report [<options>]\n" | ||
346 | " -i file --input=<file> # input file\n" | ||
347 | ); | ||
348 | |||
349 | exit(0); | ||
350 | } | ||
351 | |||
352 | static void process_options(int argc, char *argv[]) | ||
353 | { | ||
354 | int error = 0; | ||
355 | |||
356 | for (;;) { | ||
357 | int option_index = 0; | ||
358 | /** Options for getopt */ | ||
359 | static struct option long_options[] = { | ||
360 | {"input", required_argument, NULL, 'i'}, | ||
361 | {NULL, 0, NULL, 0 } | ||
362 | }; | ||
363 | int c = getopt_long(argc, argv, "+:i:", | ||
364 | long_options, &option_index); | ||
365 | if (c == -1) | ||
366 | break; | ||
367 | |||
368 | switch (c) { | ||
369 | case 'i': input_name = strdup(optarg); break; | ||
370 | default: error = 1; break; | ||
371 | } | ||
372 | } | ||
373 | |||
374 | if (error) | ||
375 | display_help(); | ||
376 | } | ||
377 | |||
378 | int main(int argc, char *argv[]) | ||
379 | { | ||
380 | unsigned long offset = 0; | ||
381 | unsigned long head = 0; | ||
382 | struct stat stat; | ||
383 | char *buf; | ||
384 | event_t *event; | ||
385 | int ret; | ||
386 | unsigned long total = 0; | ||
387 | |||
388 | page_size = getpagesize(); | ||
389 | |||
390 | process_options(argc, argv); | ||
391 | |||
392 | input = open(input_name, O_RDONLY); | ||
393 | if (input < 0) { | ||
394 | perror("failed to open file"); | ||
395 | exit(-1); | ||
396 | } | ||
397 | |||
398 | ret = fstat(input, &stat); | ||
399 | if (ret < 0) { | ||
400 | perror("failed to stat file"); | ||
401 | exit(-1); | ||
402 | } | ||
403 | |||
404 | load_kallsyms(); | ||
405 | |||
406 | remap: | ||
407 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, | ||
408 | MAP_SHARED, input, offset); | ||
409 | if (buf == MAP_FAILED) { | ||
410 | perror("failed to mmap file"); | ||
411 | exit(-1); | ||
412 | } | ||
413 | |||
414 | more: | ||
415 | event = (event_t *)(buf + head); | ||
416 | |||
417 | if (head + event->header.size >= page_size * mmap_window) { | ||
418 | unsigned long shift = page_size * (head / page_size); | ||
419 | |||
420 | munmap(buf, page_size * mmap_window); | ||
421 | offset += shift; | ||
422 | head -= shift; | ||
423 | goto remap; | ||
424 | } | ||
425 | head += event->header.size; | ||
426 | |||
427 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | ||
428 | std::string comm, sym, level; | ||
429 | char output[1024]; | ||
430 | |||
431 | if (event->header.misc & PERF_EVENT_MISC_KERNEL) { | ||
432 | level = " [k] "; | ||
433 | sym = resolve_kernel_symbol(event->ip.ip); | ||
434 | } else if (event->header.misc & PERF_EVENT_MISC_USER) { | ||
435 | level = " [.] "; | ||
436 | sym = resolve_user_symbol(event->ip.pid, event->ip.ip); | ||
437 | } else { | ||
438 | level = " [H] "; | ||
439 | } | ||
440 | comm = resolve_comm(event->ip.pid); | ||
441 | |||
442 | snprintf(output, sizeof(output), "%16s %s %s", | ||
443 | comm.c_str(), level.c_str(), sym.c_str()); | ||
444 | hist[output]++; | ||
445 | |||
446 | total++; | ||
447 | |||
448 | } else switch (event->header.type) { | ||
449 | case PERF_EVENT_MMAP: | ||
450 | maps[event->mmap.pid].insert(map(&event->mmap)); | ||
451 | break; | ||
452 | |||
453 | case PERF_EVENT_COMM: | ||
454 | comms[event->comm.pid] = std::string(event->comm.comm); | ||
455 | break; | ||
456 | } | ||
457 | |||
458 | if (offset + head < stat.st_size) | ||
459 | goto more; | ||
460 | |||
461 | close(input); | ||
462 | |||
463 | std::map<std::string, int>::iterator hi = hist.begin(); | ||
464 | |||
465 | while (hi != hist.end()) { | ||
466 | rev_hist.insert(std::pair<int, std::string>(hi->second, hi->first)); | ||
467 | hist.erase(hi++); | ||
468 | } | ||
469 | |||
470 | std::multimap<int, std::string>::const_iterator ri = rev_hist.begin(); | ||
471 | |||
472 | while (ri != rev_hist.end()) { | ||
473 | printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str()); | ||
474 | ri++; | ||
475 | } | ||
476 | |||
477 | return 0; | ||
478 | } | ||
479 | |||
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c new file mode 100644 index 000000000000..594d270be390 --- /dev/null +++ b/Documentation/perf_counter/perf.c | |||
@@ -0,0 +1,414 @@ | |||
1 | #include "builtin.h" | ||
2 | #include "util/exec_cmd.h" | ||
3 | #include "util/cache.h" | ||
4 | #include "util/quote.h" | ||
5 | #include "util/run-command.h" | ||
6 | |||
7 | const char perf_usage_string[] = | ||
8 | "perf [--version] [--help] COMMAND [ARGS]"; | ||
9 | |||
10 | const char perf_more_info_string[] = | ||
11 | "See 'perf help COMMAND' for more information on a specific command."; | ||
12 | |||
13 | static int use_pager = -1; | ||
14 | struct pager_config { | ||
15 | const char *cmd; | ||
16 | int val; | ||
17 | }; | ||
18 | |||
19 | static int pager_command_config(const char *var, const char *value, void *data) | ||
20 | { | ||
21 | struct pager_config *c = data; | ||
22 | if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) | ||
23 | c->val = perf_config_bool(var, value); | ||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | /* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ | ||
28 | int check_pager_config(const char *cmd) | ||
29 | { | ||
30 | struct pager_config c; | ||
31 | c.cmd = cmd; | ||
32 | c.val = -1; | ||
33 | perf_config(pager_command_config, &c); | ||
34 | return c.val; | ||
35 | } | ||
36 | |||
37 | static void commit_pager_choice(void) { | ||
38 | switch (use_pager) { | ||
39 | case 0: | ||
40 | setenv("PERF_PAGER", "cat", 1); | ||
41 | break; | ||
42 | case 1: | ||
43 | /* setup_pager(); */ | ||
44 | break; | ||
45 | default: | ||
46 | break; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static int handle_options(const char*** argv, int* argc, int* envchanged) | ||
51 | { | ||
52 | int handled = 0; | ||
53 | |||
54 | while (*argc > 0) { | ||
55 | const char *cmd = (*argv)[0]; | ||
56 | if (cmd[0] != '-') | ||
57 | break; | ||
58 | |||
59 | /* | ||
60 | * For legacy reasons, the "version" and "help" | ||
61 | * commands can be written with "--" prepended | ||
62 | * to make them look like flags. | ||
63 | */ | ||
64 | if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) | ||
65 | break; | ||
66 | |||
67 | /* | ||
68 | * Check remaining flags. | ||
69 | */ | ||
70 | if (!prefixcmp(cmd, "--exec-path")) { | ||
71 | cmd += 11; | ||
72 | if (*cmd == '=') | ||
73 | perf_set_argv_exec_path(cmd + 1); | ||
74 | else { | ||
75 | puts(perf_exec_path()); | ||
76 | exit(0); | ||
77 | } | ||
78 | } else if (!strcmp(cmd, "--html-path")) { | ||
79 | puts(system_path(PERF_HTML_PATH)); | ||
80 | exit(0); | ||
81 | } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) { | ||
82 | use_pager = 1; | ||
83 | } else if (!strcmp(cmd, "--no-pager")) { | ||
84 | use_pager = 0; | ||
85 | if (envchanged) | ||
86 | *envchanged = 1; | ||
87 | } else if (!strcmp(cmd, "--perf-dir")) { | ||
88 | if (*argc < 2) { | ||
89 | fprintf(stderr, "No directory given for --perf-dir.\n" ); | ||
90 | usage(perf_usage_string); | ||
91 | } | ||
92 | setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); | ||
93 | if (envchanged) | ||
94 | *envchanged = 1; | ||
95 | (*argv)++; | ||
96 | (*argc)--; | ||
97 | handled++; | ||
98 | } else if (!prefixcmp(cmd, "--perf-dir=")) { | ||
99 | setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); | ||
100 | if (envchanged) | ||
101 | *envchanged = 1; | ||
102 | } else if (!strcmp(cmd, "--work-tree")) { | ||
103 | if (*argc < 2) { | ||
104 | fprintf(stderr, "No directory given for --work-tree.\n" ); | ||
105 | usage(perf_usage_string); | ||
106 | } | ||
107 | setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); | ||
108 | if (envchanged) | ||
109 | *envchanged = 1; | ||
110 | (*argv)++; | ||
111 | (*argc)--; | ||
112 | } else if (!prefixcmp(cmd, "--work-tree=")) { | ||
113 | setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); | ||
114 | if (envchanged) | ||
115 | *envchanged = 1; | ||
116 | } else { | ||
117 | fprintf(stderr, "Unknown option: %s\n", cmd); | ||
118 | usage(perf_usage_string); | ||
119 | } | ||
120 | |||
121 | (*argv)++; | ||
122 | (*argc)--; | ||
123 | handled++; | ||
124 | } | ||
125 | return handled; | ||
126 | } | ||
127 | |||
128 | static int handle_alias(int *argcp, const char ***argv) | ||
129 | { | ||
130 | int envchanged = 0, ret = 0, saved_errno = errno; | ||
131 | int count, option_count; | ||
132 | const char** new_argv; | ||
133 | const char *alias_command; | ||
134 | char *alias_string; | ||
135 | |||
136 | alias_command = (*argv)[0]; | ||
137 | alias_string = alias_lookup(alias_command); | ||
138 | if (alias_string) { | ||
139 | if (alias_string[0] == '!') { | ||
140 | if (*argcp > 1) { | ||
141 | struct strbuf buf; | ||
142 | |||
143 | strbuf_init(&buf, PATH_MAX); | ||
144 | strbuf_addstr(&buf, alias_string); | ||
145 | sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); | ||
146 | free(alias_string); | ||
147 | alias_string = buf.buf; | ||
148 | } | ||
149 | ret = system(alias_string + 1); | ||
150 | if (ret >= 0 && WIFEXITED(ret) && | ||
151 | WEXITSTATUS(ret) != 127) | ||
152 | exit(WEXITSTATUS(ret)); | ||
153 | die("Failed to run '%s' when expanding alias '%s'", | ||
154 | alias_string + 1, alias_command); | ||
155 | } | ||
156 | count = split_cmdline(alias_string, &new_argv); | ||
157 | if (count < 0) | ||
158 | die("Bad alias.%s string", alias_command); | ||
159 | option_count = handle_options(&new_argv, &count, &envchanged); | ||
160 | if (envchanged) | ||
161 | die("alias '%s' changes environment variables\n" | ||
162 | "You can use '!perf' in the alias to do this.", | ||
163 | alias_command); | ||
164 | memmove(new_argv - option_count, new_argv, | ||
165 | count * sizeof(char *)); | ||
166 | new_argv -= option_count; | ||
167 | |||
168 | if (count < 1) | ||
169 | die("empty alias for %s", alias_command); | ||
170 | |||
171 | if (!strcmp(alias_command, new_argv[0])) | ||
172 | die("recursive alias: %s", alias_command); | ||
173 | |||
174 | new_argv = realloc(new_argv, sizeof(char*) * | ||
175 | (count + *argcp + 1)); | ||
176 | /* insert after command name */ | ||
177 | memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); | ||
178 | new_argv[count+*argcp] = NULL; | ||
179 | |||
180 | *argv = new_argv; | ||
181 | *argcp += count - 1; | ||
182 | |||
183 | ret = 1; | ||
184 | } | ||
185 | |||
186 | errno = saved_errno; | ||
187 | |||
188 | return ret; | ||
189 | } | ||
190 | |||
191 | const char perf_version_string[] = PERF_VERSION; | ||
192 | |||
193 | #define RUN_SETUP (1<<0) | ||
194 | #define USE_PAGER (1<<1) | ||
195 | /* | ||
196 | * require working tree to be present -- anything uses this needs | ||
197 | * RUN_SETUP for reading from the configuration file. | ||
198 | */ | ||
199 | #define NEED_WORK_TREE (1<<2) | ||
200 | |||
201 | struct cmd_struct { | ||
202 | const char *cmd; | ||
203 | int (*fn)(int, const char **, const char *); | ||
204 | int option; | ||
205 | }; | ||
206 | |||
207 | static int run_builtin(struct cmd_struct *p, int argc, const char **argv) | ||
208 | { | ||
209 | int status; | ||
210 | struct stat st; | ||
211 | const char *prefix; | ||
212 | |||
213 | prefix = NULL; | ||
214 | if (p->option & RUN_SETUP) | ||
215 | prefix = NULL; /* setup_perf_directory(); */ | ||
216 | |||
217 | if (use_pager == -1 && p->option & RUN_SETUP) | ||
218 | use_pager = check_pager_config(p->cmd); | ||
219 | if (use_pager == -1 && p->option & USE_PAGER) | ||
220 | use_pager = 1; | ||
221 | commit_pager_choice(); | ||
222 | |||
223 | if (p->option & NEED_WORK_TREE) | ||
224 | /* setup_work_tree() */; | ||
225 | |||
226 | status = p->fn(argc, argv, prefix); | ||
227 | if (status) | ||
228 | return status & 0xff; | ||
229 | |||
230 | /* Somebody closed stdout? */ | ||
231 | if (fstat(fileno(stdout), &st)) | ||
232 | return 0; | ||
233 | /* Ignore write errors for pipes and sockets.. */ | ||
234 | if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) | ||
235 | return 0; | ||
236 | |||
237 | /* Check for ENOSPC and EIO errors.. */ | ||
238 | if (fflush(stdout)) | ||
239 | die("write failure on standard output: %s", strerror(errno)); | ||
240 | if (ferror(stdout)) | ||
241 | die("unknown write failure on standard output"); | ||
242 | if (fclose(stdout)) | ||
243 | die("close failed on standard output: %s", strerror(errno)); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static void handle_internal_command(int argc, const char **argv) | ||
248 | { | ||
249 | const char *cmd = argv[0]; | ||
250 | static struct cmd_struct commands[] = { | ||
251 | { "help", cmd_help, 0 }, | ||
252 | { "record", cmd_record, 0 }, | ||
253 | { "stat", cmd_stat, 0 }, | ||
254 | { "top", cmd_top, 0 }, | ||
255 | { "version", cmd_version, 0 }, | ||
256 | }; | ||
257 | int i; | ||
258 | static const char ext[] = STRIP_EXTENSION; | ||
259 | |||
260 | if (sizeof(ext) > 1) { | ||
261 | i = strlen(argv[0]) - strlen(ext); | ||
262 | if (i > 0 && !strcmp(argv[0] + i, ext)) { | ||
263 | char *argv0 = strdup(argv[0]); | ||
264 | argv[0] = cmd = argv0; | ||
265 | argv0[i] = '\0'; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | /* Turn "perf cmd --help" into "perf help cmd" */ | ||
270 | if (argc > 1 && !strcmp(argv[1], "--help")) { | ||
271 | argv[1] = argv[0]; | ||
272 | argv[0] = cmd = "help"; | ||
273 | } | ||
274 | |||
275 | for (i = 0; i < ARRAY_SIZE(commands); i++) { | ||
276 | struct cmd_struct *p = commands+i; | ||
277 | if (strcmp(p->cmd, cmd)) | ||
278 | continue; | ||
279 | exit(run_builtin(p, argc, argv)); | ||
280 | } | ||
281 | } | ||
282 | |||
283 | static void execv_dashed_external(const char **argv) | ||
284 | { | ||
285 | struct strbuf cmd = STRBUF_INIT; | ||
286 | const char *tmp; | ||
287 | int status; | ||
288 | |||
289 | strbuf_addf(&cmd, "perf-%s", argv[0]); | ||
290 | |||
291 | /* | ||
292 | * argv[0] must be the perf command, but the argv array | ||
293 | * belongs to the caller, and may be reused in | ||
294 | * subsequent loop iterations. Save argv[0] and | ||
295 | * restore it on error. | ||
296 | */ | ||
297 | tmp = argv[0]; | ||
298 | argv[0] = cmd.buf; | ||
299 | |||
300 | /* | ||
301 | * if we fail because the command is not found, it is | ||
302 | * OK to return. Otherwise, we just pass along the status code. | ||
303 | */ | ||
304 | status = run_command_v_opt(argv, 0); | ||
305 | if (status != -ERR_RUN_COMMAND_EXEC) { | ||
306 | if (IS_RUN_COMMAND_ERR(status)) | ||
307 | die("unable to run '%s'", argv[0]); | ||
308 | exit(-status); | ||
309 | } | ||
310 | errno = ENOENT; /* as if we called execvp */ | ||
311 | |||
312 | argv[0] = tmp; | ||
313 | |||
314 | strbuf_release(&cmd); | ||
315 | } | ||
316 | |||
317 | static int run_argv(int *argcp, const char ***argv) | ||
318 | { | ||
319 | int done_alias = 0; | ||
320 | |||
321 | while (1) { | ||
322 | /* See if it's an internal command */ | ||
323 | handle_internal_command(*argcp, *argv); | ||
324 | |||
325 | /* .. then try the external ones */ | ||
326 | execv_dashed_external(*argv); | ||
327 | |||
328 | /* It could be an alias -- this works around the insanity | ||
329 | * of overriding "perf log" with "perf show" by having | ||
330 | * alias.log = show | ||
331 | */ | ||
332 | if (done_alias || !handle_alias(argcp, argv)) | ||
333 | break; | ||
334 | done_alias = 1; | ||
335 | } | ||
336 | |||
337 | return done_alias; | ||
338 | } | ||
339 | |||
340 | |||
341 | int main(int argc, const char **argv) | ||
342 | { | ||
343 | const char *cmd; | ||
344 | |||
345 | cmd = perf_extract_argv0_path(argv[0]); | ||
346 | if (!cmd) | ||
347 | cmd = "perf-help"; | ||
348 | |||
349 | /* | ||
350 | * "perf-xxxx" is the same as "perf xxxx", but we obviously: | ||
351 | * | ||
352 | * - cannot take flags in between the "perf" and the "xxxx". | ||
353 | * - cannot execute it externally (since it would just do | ||
354 | * the same thing over again) | ||
355 | * | ||
356 | * So we just directly call the internal command handler, and | ||
357 | * die if that one cannot handle it. | ||
358 | */ | ||
359 | if (!prefixcmp(cmd, "perf-")) { | ||
360 | cmd += 4; | ||
361 | argv[0] = cmd; | ||
362 | handle_internal_command(argc, argv); | ||
363 | die("cannot handle %s internally", cmd); | ||
364 | } | ||
365 | |||
366 | /* Look for flags.. */ | ||
367 | argv++; | ||
368 | argc--; | ||
369 | handle_options(&argv, &argc, NULL); | ||
370 | commit_pager_choice(); | ||
371 | if (argc > 0) { | ||
372 | if (!prefixcmp(argv[0], "--")) | ||
373 | argv[0] += 2; | ||
374 | } else { | ||
375 | /* The user didn't specify a command; give them help */ | ||
376 | printf("usage: %s\n\n", perf_usage_string); | ||
377 | list_common_cmds_help(); | ||
378 | printf("\n%s\n", perf_more_info_string); | ||
379 | exit(1); | ||
380 | } | ||
381 | cmd = argv[0]; | ||
382 | |||
383 | /* | ||
384 | * We use PATH to find perf commands, but we prepend some higher | ||
385 | * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH | ||
386 | * environment, and the $(perfexecdir) from the Makefile at build | ||
387 | * time. | ||
388 | */ | ||
389 | setup_path(); | ||
390 | |||
391 | while (1) { | ||
392 | static int done_help = 0; | ||
393 | static int was_alias = 0; | ||
394 | was_alias = run_argv(&argc, &argv); | ||
395 | if (errno != ENOENT) | ||
396 | break; | ||
397 | if (was_alias) { | ||
398 | fprintf(stderr, "Expansion of alias '%s' failed; " | ||
399 | "'%s' is not a perf-command\n", | ||
400 | cmd, argv[0]); | ||
401 | exit(1); | ||
402 | } | ||
403 | if (!done_help) { | ||
404 | cmd = argv[0] = help_unknown_cmd(cmd); | ||
405 | done_help = 1; | ||
406 | } else | ||
407 | break; | ||
408 | } | ||
409 | |||
410 | fprintf(stderr, "Failed to run command '%s': %s\n", | ||
411 | cmd, strerror(errno)); | ||
412 | |||
413 | return 1; | ||
414 | } | ||
diff --git a/Documentation/perf_counter/util/PERF-VERSION-GEN b/Documentation/perf_counter/util/PERF-VERSION-GEN new file mode 100755 index 000000000000..c561d1538c03 --- /dev/null +++ b/Documentation/perf_counter/util/PERF-VERSION-GEN | |||
@@ -0,0 +1,42 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | GVF=PERF-VERSION-FILE | ||
4 | DEF_VER=v0.0.1.PERF | ||
5 | |||
6 | LF=' | ||
7 | ' | ||
8 | |||
9 | # First see if there is a version file (included in release tarballs), | ||
10 | # then try git-describe, then default. | ||
11 | if test -f version | ||
12 | then | ||
13 | VN=$(cat version) || VN="$DEF_VER" | ||
14 | elif test -d .git -o -f .git && | ||
15 | VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && | ||
16 | case "$VN" in | ||
17 | *$LF*) (exit 1) ;; | ||
18 | v[0-9]*) | ||
19 | git update-index -q --refresh | ||
20 | test -z "$(git diff-index --name-only HEAD --)" || | ||
21 | VN="$VN-dirty" ;; | ||
22 | esac | ||
23 | then | ||
24 | VN=$(echo "$VN" | sed -e 's/-/./g'); | ||
25 | else | ||
26 | VN="$DEF_VER" | ||
27 | fi | ||
28 | |||
29 | VN=$(expr "$VN" : v*'\(.*\)') | ||
30 | |||
31 | if test -r $GVF | ||
32 | then | ||
33 | VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) | ||
34 | else | ||
35 | VC=unset | ||
36 | fi | ||
37 | test "$VN" = "$VC" || { | ||
38 | echo >&2 "PERF_VERSION = $VN" | ||
39 | echo "PERF_VERSION = $VN" >$GVF | ||
40 | } | ||
41 | |||
42 | |||
diff --git a/Documentation/perf_counter/util/abspath.c b/Documentation/perf_counter/util/abspath.c new file mode 100644 index 000000000000..649f34f83365 --- /dev/null +++ b/Documentation/perf_counter/util/abspath.c | |||
@@ -0,0 +1,117 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | /* | ||
4 | * Do not use this for inspecting *tracked* content. When path is a | ||
5 | * symlink to a directory, we do not want to say it is a directory when | ||
6 | * dealing with tracked content in the working tree. | ||
7 | */ | ||
8 | int is_directory(const char *path) | ||
9 | { | ||
10 | struct stat st; | ||
11 | return (!stat(path, &st) && S_ISDIR(st.st_mode)); | ||
12 | } | ||
13 | |||
14 | /* We allow "recursive" symbolic links. Only within reason, though. */ | ||
15 | #define MAXDEPTH 5 | ||
16 | |||
17 | const char *make_absolute_path(const char *path) | ||
18 | { | ||
19 | static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; | ||
20 | char cwd[1024] = ""; | ||
21 | int buf_index = 1, len; | ||
22 | |||
23 | int depth = MAXDEPTH; | ||
24 | char *last_elem = NULL; | ||
25 | struct stat st; | ||
26 | |||
27 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
28 | die ("Too long path: %.*s", 60, path); | ||
29 | |||
30 | while (depth--) { | ||
31 | if (!is_directory(buf)) { | ||
32 | char *last_slash = strrchr(buf, '/'); | ||
33 | if (last_slash) { | ||
34 | *last_slash = '\0'; | ||
35 | last_elem = xstrdup(last_slash + 1); | ||
36 | } else { | ||
37 | last_elem = xstrdup(buf); | ||
38 | *buf = '\0'; | ||
39 | } | ||
40 | } | ||
41 | |||
42 | if (*buf) { | ||
43 | if (!*cwd && !getcwd(cwd, sizeof(cwd))) | ||
44 | die ("Could not get current working directory"); | ||
45 | |||
46 | if (chdir(buf)) | ||
47 | die ("Could not switch to '%s'", buf); | ||
48 | } | ||
49 | if (!getcwd(buf, PATH_MAX)) | ||
50 | die ("Could not get current working directory"); | ||
51 | |||
52 | if (last_elem) { | ||
53 | int len = strlen(buf); | ||
54 | if (len + strlen(last_elem) + 2 > PATH_MAX) | ||
55 | die ("Too long path name: '%s/%s'", | ||
56 | buf, last_elem); | ||
57 | buf[len] = '/'; | ||
58 | strcpy(buf + len + 1, last_elem); | ||
59 | free(last_elem); | ||
60 | last_elem = NULL; | ||
61 | } | ||
62 | |||
63 | if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { | ||
64 | len = readlink(buf, next_buf, PATH_MAX); | ||
65 | if (len < 0) | ||
66 | die ("Invalid symlink: %s", buf); | ||
67 | if (PATH_MAX <= len) | ||
68 | die("symbolic link too long: %s", buf); | ||
69 | next_buf[len] = '\0'; | ||
70 | buf = next_buf; | ||
71 | buf_index = 1 - buf_index; | ||
72 | next_buf = bufs[buf_index]; | ||
73 | } else | ||
74 | break; | ||
75 | } | ||
76 | |||
77 | if (*cwd && chdir(cwd)) | ||
78 | die ("Could not change back to '%s'", cwd); | ||
79 | |||
80 | return buf; | ||
81 | } | ||
82 | |||
83 | static const char *get_pwd_cwd(void) | ||
84 | { | ||
85 | static char cwd[PATH_MAX + 1]; | ||
86 | char *pwd; | ||
87 | struct stat cwd_stat, pwd_stat; | ||
88 | if (getcwd(cwd, PATH_MAX) == NULL) | ||
89 | return NULL; | ||
90 | pwd = getenv("PWD"); | ||
91 | if (pwd && strcmp(pwd, cwd)) { | ||
92 | stat(cwd, &cwd_stat); | ||
93 | if (!stat(pwd, &pwd_stat) && | ||
94 | pwd_stat.st_dev == cwd_stat.st_dev && | ||
95 | pwd_stat.st_ino == cwd_stat.st_ino) { | ||
96 | strlcpy(cwd, pwd, PATH_MAX); | ||
97 | } | ||
98 | } | ||
99 | return cwd; | ||
100 | } | ||
101 | |||
102 | const char *make_nonrelative_path(const char *path) | ||
103 | { | ||
104 | static char buf[PATH_MAX + 1]; | ||
105 | |||
106 | if (is_absolute_path(path)) { | ||
107 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
108 | die("Too long path: %.*s", 60, path); | ||
109 | } else { | ||
110 | const char *cwd = get_pwd_cwd(); | ||
111 | if (!cwd) | ||
112 | die("Cannot determine the current working directory"); | ||
113 | if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) | ||
114 | die("Too long path: %.*s", 60, path); | ||
115 | } | ||
116 | return buf; | ||
117 | } | ||
diff --git a/Documentation/perf_counter/util/alias.c b/Documentation/perf_counter/util/alias.c new file mode 100644 index 000000000000..9b3dd2b428df --- /dev/null +++ b/Documentation/perf_counter/util/alias.c | |||
@@ -0,0 +1,77 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | static const char *alias_key; | ||
4 | static char *alias_val; | ||
5 | |||
6 | static int alias_lookup_cb(const char *k, const char *v, void *cb) | ||
7 | { | ||
8 | if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { | ||
9 | if (!v) | ||
10 | return config_error_nonbool(k); | ||
11 | alias_val = strdup(v); | ||
12 | return 0; | ||
13 | } | ||
14 | return 0; | ||
15 | } | ||
16 | |||
17 | char *alias_lookup(const char *alias) | ||
18 | { | ||
19 | alias_key = alias; | ||
20 | alias_val = NULL; | ||
21 | perf_config(alias_lookup_cb, NULL); | ||
22 | return alias_val; | ||
23 | } | ||
24 | |||
25 | int split_cmdline(char *cmdline, const char ***argv) | ||
26 | { | ||
27 | int src, dst, count = 0, size = 16; | ||
28 | char quoted = 0; | ||
29 | |||
30 | *argv = malloc(sizeof(char*) * size); | ||
31 | |||
32 | /* split alias_string */ | ||
33 | (*argv)[count++] = cmdline; | ||
34 | for (src = dst = 0; cmdline[src];) { | ||
35 | char c = cmdline[src]; | ||
36 | if (!quoted && isspace(c)) { | ||
37 | cmdline[dst++] = 0; | ||
38 | while (cmdline[++src] | ||
39 | && isspace(cmdline[src])) | ||
40 | ; /* skip */ | ||
41 | if (count >= size) { | ||
42 | size += 16; | ||
43 | *argv = realloc(*argv, sizeof(char*) * size); | ||
44 | } | ||
45 | (*argv)[count++] = cmdline + dst; | ||
46 | } else if (!quoted && (c == '\'' || c == '"')) { | ||
47 | quoted = c; | ||
48 | src++; | ||
49 | } else if (c == quoted) { | ||
50 | quoted = 0; | ||
51 | src++; | ||
52 | } else { | ||
53 | if (c == '\\' && quoted != '\'') { | ||
54 | src++; | ||
55 | c = cmdline[src]; | ||
56 | if (!c) { | ||
57 | free(*argv); | ||
58 | *argv = NULL; | ||
59 | return error("cmdline ends with \\"); | ||
60 | } | ||
61 | } | ||
62 | cmdline[dst++] = c; | ||
63 | src++; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | cmdline[dst] = 0; | ||
68 | |||
69 | if (quoted) { | ||
70 | free(*argv); | ||
71 | *argv = NULL; | ||
72 | return error("unclosed quote"); | ||
73 | } | ||
74 | |||
75 | return count; | ||
76 | } | ||
77 | |||
diff --git a/Documentation/perf_counter/util/cache.h b/Documentation/perf_counter/util/cache.h new file mode 100644 index 000000000000..71080512fa86 --- /dev/null +++ b/Documentation/perf_counter/util/cache.h | |||
@@ -0,0 +1,117 @@ | |||
1 | #ifndef CACHE_H | ||
2 | #define CACHE_H | ||
3 | |||
4 | #include "util.h" | ||
5 | #include "strbuf.h" | ||
6 | |||
7 | #define PERF_DIR_ENVIRONMENT "PERF_DIR" | ||
8 | #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" | ||
9 | #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" | ||
10 | #define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" | ||
11 | #define INDEX_ENVIRONMENT "PERF_INDEX_FILE" | ||
12 | #define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" | ||
13 | #define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" | ||
14 | #define CONFIG_ENVIRONMENT "PERF_CONFIG" | ||
15 | #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" | ||
16 | #define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" | ||
17 | #define PERFATTRIBUTES_FILE ".perfattributes" | ||
18 | #define INFOATTRIBUTES_FILE "info/attributes" | ||
19 | #define ATTRIBUTE_MACRO_PREFIX "[attr]" | ||
20 | |||
21 | typedef int (*config_fn_t)(const char *, const char *, void *); | ||
22 | extern int perf_default_config(const char *, const char *, void *); | ||
23 | extern int perf_config_from_file(config_fn_t fn, const char *, void *); | ||
24 | extern int perf_config(config_fn_t fn, void *); | ||
25 | extern int perf_parse_ulong(const char *, unsigned long *); | ||
26 | extern int perf_config_int(const char *, const char *); | ||
27 | extern unsigned long perf_config_ulong(const char *, const char *); | ||
28 | extern int perf_config_bool_or_int(const char *, const char *, int *); | ||
29 | extern int perf_config_bool(const char *, const char *); | ||
30 | extern int perf_config_string(const char **, const char *, const char *); | ||
31 | extern int perf_config_set(const char *, const char *); | ||
32 | extern int perf_config_set_multivar(const char *, const char *, const char *, int); | ||
33 | extern int perf_config_rename_section(const char *, const char *); | ||
34 | extern const char *perf_etc_perfconfig(void); | ||
35 | extern int check_repository_format_version(const char *var, const char *value, void *cb); | ||
36 | extern int perf_config_system(void); | ||
37 | extern int perf_config_global(void); | ||
38 | extern int config_error_nonbool(const char *); | ||
39 | extern const char *config_exclusive_filename; | ||
40 | |||
41 | #define MAX_PERFNAME (1000) | ||
42 | extern char perf_default_email[MAX_PERFNAME]; | ||
43 | extern char perf_default_name[MAX_PERFNAME]; | ||
44 | extern int user_ident_explicitly_given; | ||
45 | |||
46 | extern const char *perf_log_output_encoding; | ||
47 | extern const char *perf_mailmap_file; | ||
48 | |||
49 | /* IO helper functions */ | ||
50 | extern void maybe_flush_or_die(FILE *, const char *); | ||
51 | extern int copy_fd(int ifd, int ofd); | ||
52 | extern int copy_file(const char *dst, const char *src, int mode); | ||
53 | extern ssize_t read_in_full(int fd, void *buf, size_t count); | ||
54 | extern ssize_t write_in_full(int fd, const void *buf, size_t count); | ||
55 | extern void write_or_die(int fd, const void *buf, size_t count); | ||
56 | extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); | ||
57 | extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); | ||
58 | extern void fsync_or_die(int fd, const char *); | ||
59 | |||
60 | /* pager.c */ | ||
61 | extern void setup_pager(void); | ||
62 | extern const char *pager_program; | ||
63 | extern int pager_in_use(void); | ||
64 | extern int pager_use_color; | ||
65 | |||
66 | extern const char *editor_program; | ||
67 | extern const char *excludes_file; | ||
68 | |||
69 | char *alias_lookup(const char *alias); | ||
70 | int split_cmdline(char *cmdline, const char ***argv); | ||
71 | |||
72 | #define alloc_nr(x) (((x)+16)*3/2) | ||
73 | |||
74 | /* | ||
75 | * Realloc the buffer pointed at by variable 'x' so that it can hold | ||
76 | * at least 'nr' entries; the number of entries currently allocated | ||
77 | * is 'alloc', using the standard growing factor alloc_nr() macro. | ||
78 | * | ||
79 | * DO NOT USE any expression with side-effect for 'x' or 'alloc'. | ||
80 | */ | ||
81 | #define ALLOC_GROW(x, nr, alloc) \ | ||
82 | do { \ | ||
83 | if ((nr) > alloc) { \ | ||
84 | if (alloc_nr(alloc) < (nr)) \ | ||
85 | alloc = (nr); \ | ||
86 | else \ | ||
87 | alloc = alloc_nr(alloc); \ | ||
88 | x = xrealloc((x), alloc * sizeof(*(x))); \ | ||
89 | } \ | ||
90 | } while(0) | ||
91 | |||
92 | |||
93 | static inline int is_absolute_path(const char *path) | ||
94 | { | ||
95 | return path[0] == '/'; | ||
96 | } | ||
97 | |||
98 | const char *make_absolute_path(const char *path); | ||
99 | const char *make_nonrelative_path(const char *path); | ||
100 | const char *make_relative_path(const char *abs, const char *base); | ||
101 | int normalize_path_copy(char *dst, const char *src); | ||
102 | int longest_ancestor_length(const char *path, const char *prefix_list); | ||
103 | char *strip_path_suffix(const char *path, const char *suffix); | ||
104 | |||
105 | extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); | ||
106 | extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); | ||
107 | |||
108 | extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) | ||
109 | __attribute__((format (printf, 3, 4))); | ||
110 | extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) | ||
111 | __attribute__((format (printf, 3, 4))); | ||
112 | extern char *perf_pathdup(const char *fmt, ...) | ||
113 | __attribute__((format (printf, 1, 2))); | ||
114 | |||
115 | extern size_t strlcpy(char *dest, const char *src, size_t size); | ||
116 | |||
117 | #endif /* CACHE_H */ | ||
diff --git a/Documentation/perf_counter/util/config.c b/Documentation/perf_counter/util/config.c new file mode 100644 index 000000000000..3dd13faa6a27 --- /dev/null +++ b/Documentation/perf_counter/util/config.c | |||
@@ -0,0 +1,873 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | * Copyright (C) Johannes Schindelin, 2005 | ||
6 | * | ||
7 | */ | ||
8 | #include "util.h" | ||
9 | #include "cache.h" | ||
10 | #include "exec_cmd.h" | ||
11 | |||
12 | #define MAXNAME (256) | ||
13 | |||
14 | static FILE *config_file; | ||
15 | static const char *config_file_name; | ||
16 | static int config_linenr; | ||
17 | static int config_file_eof; | ||
18 | |||
19 | const char *config_exclusive_filename = NULL; | ||
20 | |||
21 | static int get_next_char(void) | ||
22 | { | ||
23 | int c; | ||
24 | FILE *f; | ||
25 | |||
26 | c = '\n'; | ||
27 | if ((f = config_file) != NULL) { | ||
28 | c = fgetc(f); | ||
29 | if (c == '\r') { | ||
30 | /* DOS like systems */ | ||
31 | c = fgetc(f); | ||
32 | if (c != '\n') { | ||
33 | ungetc(c, f); | ||
34 | c = '\r'; | ||
35 | } | ||
36 | } | ||
37 | if (c == '\n') | ||
38 | config_linenr++; | ||
39 | if (c == EOF) { | ||
40 | config_file_eof = 1; | ||
41 | c = '\n'; | ||
42 | } | ||
43 | } | ||
44 | return c; | ||
45 | } | ||
46 | |||
47 | static char *parse_value(void) | ||
48 | { | ||
49 | static char value[1024]; | ||
50 | int quote = 0, comment = 0, len = 0, space = 0; | ||
51 | |||
52 | for (;;) { | ||
53 | int c = get_next_char(); | ||
54 | if (len >= sizeof(value) - 1) | ||
55 | return NULL; | ||
56 | if (c == '\n') { | ||
57 | if (quote) | ||
58 | return NULL; | ||
59 | value[len] = 0; | ||
60 | return value; | ||
61 | } | ||
62 | if (comment) | ||
63 | continue; | ||
64 | if (isspace(c) && !quote) { | ||
65 | space = 1; | ||
66 | continue; | ||
67 | } | ||
68 | if (!quote) { | ||
69 | if (c == ';' || c == '#') { | ||
70 | comment = 1; | ||
71 | continue; | ||
72 | } | ||
73 | } | ||
74 | if (space) { | ||
75 | if (len) | ||
76 | value[len++] = ' '; | ||
77 | space = 0; | ||
78 | } | ||
79 | if (c == '\\') { | ||
80 | c = get_next_char(); | ||
81 | switch (c) { | ||
82 | case '\n': | ||
83 | continue; | ||
84 | case 't': | ||
85 | c = '\t'; | ||
86 | break; | ||
87 | case 'b': | ||
88 | c = '\b'; | ||
89 | break; | ||
90 | case 'n': | ||
91 | c = '\n'; | ||
92 | break; | ||
93 | /* Some characters escape as themselves */ | ||
94 | case '\\': case '"': | ||
95 | break; | ||
96 | /* Reject unknown escape sequences */ | ||
97 | default: | ||
98 | return NULL; | ||
99 | } | ||
100 | value[len++] = c; | ||
101 | continue; | ||
102 | } | ||
103 | if (c == '"') { | ||
104 | quote = 1-quote; | ||
105 | continue; | ||
106 | } | ||
107 | value[len++] = c; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | static inline int iskeychar(int c) | ||
112 | { | ||
113 | return isalnum(c) || c == '-'; | ||
114 | } | ||
115 | |||
116 | static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) | ||
117 | { | ||
118 | int c; | ||
119 | char *value; | ||
120 | |||
121 | /* Get the full name */ | ||
122 | for (;;) { | ||
123 | c = get_next_char(); | ||
124 | if (config_file_eof) | ||
125 | break; | ||
126 | if (!iskeychar(c)) | ||
127 | break; | ||
128 | name[len++] = tolower(c); | ||
129 | if (len >= MAXNAME) | ||
130 | return -1; | ||
131 | } | ||
132 | name[len] = 0; | ||
133 | while (c == ' ' || c == '\t') | ||
134 | c = get_next_char(); | ||
135 | |||
136 | value = NULL; | ||
137 | if (c != '\n') { | ||
138 | if (c != '=') | ||
139 | return -1; | ||
140 | value = parse_value(); | ||
141 | if (!value) | ||
142 | return -1; | ||
143 | } | ||
144 | return fn(name, value, data); | ||
145 | } | ||
146 | |||
147 | static int get_extended_base_var(char *name, int baselen, int c) | ||
148 | { | ||
149 | do { | ||
150 | if (c == '\n') | ||
151 | return -1; | ||
152 | c = get_next_char(); | ||
153 | } while (isspace(c)); | ||
154 | |||
155 | /* We require the format to be '[base "extension"]' */ | ||
156 | if (c != '"') | ||
157 | return -1; | ||
158 | name[baselen++] = '.'; | ||
159 | |||
160 | for (;;) { | ||
161 | int c = get_next_char(); | ||
162 | if (c == '\n') | ||
163 | return -1; | ||
164 | if (c == '"') | ||
165 | break; | ||
166 | if (c == '\\') { | ||
167 | c = get_next_char(); | ||
168 | if (c == '\n') | ||
169 | return -1; | ||
170 | } | ||
171 | name[baselen++] = c; | ||
172 | if (baselen > MAXNAME / 2) | ||
173 | return -1; | ||
174 | } | ||
175 | |||
176 | /* Final ']' */ | ||
177 | if (get_next_char() != ']') | ||
178 | return -1; | ||
179 | return baselen; | ||
180 | } | ||
181 | |||
182 | static int get_base_var(char *name) | ||
183 | { | ||
184 | int baselen = 0; | ||
185 | |||
186 | for (;;) { | ||
187 | int c = get_next_char(); | ||
188 | if (config_file_eof) | ||
189 | return -1; | ||
190 | if (c == ']') | ||
191 | return baselen; | ||
192 | if (isspace(c)) | ||
193 | return get_extended_base_var(name, baselen, c); | ||
194 | if (!iskeychar(c) && c != '.') | ||
195 | return -1; | ||
196 | if (baselen > MAXNAME / 2) | ||
197 | return -1; | ||
198 | name[baselen++] = tolower(c); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | static int perf_parse_file(config_fn_t fn, void *data) | ||
203 | { | ||
204 | int comment = 0; | ||
205 | int baselen = 0; | ||
206 | static char var[MAXNAME]; | ||
207 | |||
208 | /* U+FEFF Byte Order Mark in UTF8 */ | ||
209 | static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; | ||
210 | const unsigned char *bomptr = utf8_bom; | ||
211 | |||
212 | for (;;) { | ||
213 | int c = get_next_char(); | ||
214 | if (bomptr && *bomptr) { | ||
215 | /* We are at the file beginning; skip UTF8-encoded BOM | ||
216 | * if present. Sane editors won't put this in on their | ||
217 | * own, but e.g. Windows Notepad will do it happily. */ | ||
218 | if ((unsigned char) c == *bomptr) { | ||
219 | bomptr++; | ||
220 | continue; | ||
221 | } else { | ||
222 | /* Do not tolerate partial BOM. */ | ||
223 | if (bomptr != utf8_bom) | ||
224 | break; | ||
225 | /* No BOM at file beginning. Cool. */ | ||
226 | bomptr = NULL; | ||
227 | } | ||
228 | } | ||
229 | if (c == '\n') { | ||
230 | if (config_file_eof) | ||
231 | return 0; | ||
232 | comment = 0; | ||
233 | continue; | ||
234 | } | ||
235 | if (comment || isspace(c)) | ||
236 | continue; | ||
237 | if (c == '#' || c == ';') { | ||
238 | comment = 1; | ||
239 | continue; | ||
240 | } | ||
241 | if (c == '[') { | ||
242 | baselen = get_base_var(var); | ||
243 | if (baselen <= 0) | ||
244 | break; | ||
245 | var[baselen++] = '.'; | ||
246 | var[baselen] = 0; | ||
247 | continue; | ||
248 | } | ||
249 | if (!isalpha(c)) | ||
250 | break; | ||
251 | var[baselen] = tolower(c); | ||
252 | if (get_value(fn, data, var, baselen+1) < 0) | ||
253 | break; | ||
254 | } | ||
255 | die("bad config file line %d in %s", config_linenr, config_file_name); | ||
256 | } | ||
257 | |||
258 | static int parse_unit_factor(const char *end, unsigned long *val) | ||
259 | { | ||
260 | if (!*end) | ||
261 | return 1; | ||
262 | else if (!strcasecmp(end, "k")) { | ||
263 | *val *= 1024; | ||
264 | return 1; | ||
265 | } | ||
266 | else if (!strcasecmp(end, "m")) { | ||
267 | *val *= 1024 * 1024; | ||
268 | return 1; | ||
269 | } | ||
270 | else if (!strcasecmp(end, "g")) { | ||
271 | *val *= 1024 * 1024 * 1024; | ||
272 | return 1; | ||
273 | } | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int perf_parse_long(const char *value, long *ret) | ||
278 | { | ||
279 | if (value && *value) { | ||
280 | char *end; | ||
281 | long val = strtol(value, &end, 0); | ||
282 | unsigned long factor = 1; | ||
283 | if (!parse_unit_factor(end, &factor)) | ||
284 | return 0; | ||
285 | *ret = val * factor; | ||
286 | return 1; | ||
287 | } | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | int perf_parse_ulong(const char *value, unsigned long *ret) | ||
292 | { | ||
293 | if (value && *value) { | ||
294 | char *end; | ||
295 | unsigned long val = strtoul(value, &end, 0); | ||
296 | if (!parse_unit_factor(end, &val)) | ||
297 | return 0; | ||
298 | *ret = val; | ||
299 | return 1; | ||
300 | } | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void die_bad_config(const char *name) | ||
305 | { | ||
306 | if (config_file_name) | ||
307 | die("bad config value for '%s' in %s", name, config_file_name); | ||
308 | die("bad config value for '%s'", name); | ||
309 | } | ||
310 | |||
311 | int perf_config_int(const char *name, const char *value) | ||
312 | { | ||
313 | long ret = 0; | ||
314 | if (!perf_parse_long(value, &ret)) | ||
315 | die_bad_config(name); | ||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | unsigned long perf_config_ulong(const char *name, const char *value) | ||
320 | { | ||
321 | unsigned long ret; | ||
322 | if (!perf_parse_ulong(value, &ret)) | ||
323 | die_bad_config(name); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) | ||
328 | { | ||
329 | *is_bool = 1; | ||
330 | if (!value) | ||
331 | return 1; | ||
332 | if (!*value) | ||
333 | return 0; | ||
334 | if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) | ||
335 | return 1; | ||
336 | if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) | ||
337 | return 0; | ||
338 | *is_bool = 0; | ||
339 | return perf_config_int(name, value); | ||
340 | } | ||
341 | |||
342 | int perf_config_bool(const char *name, const char *value) | ||
343 | { | ||
344 | int discard; | ||
345 | return !!perf_config_bool_or_int(name, value, &discard); | ||
346 | } | ||
347 | |||
348 | int perf_config_string(const char **dest, const char *var, const char *value) | ||
349 | { | ||
350 | if (!value) | ||
351 | return config_error_nonbool(var); | ||
352 | *dest = strdup(value); | ||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | static int perf_default_core_config(const char *var, const char *value) | ||
357 | { | ||
358 | /* Add other config variables here and to Documentation/config.txt. */ | ||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | int perf_default_config(const char *var, const char *value, void *dummy) | ||
363 | { | ||
364 | if (!prefixcmp(var, "core.")) | ||
365 | return perf_default_core_config(var, value); | ||
366 | |||
367 | /* Add other config variables here and to Documentation/config.txt. */ | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | int perf_config_from_file(config_fn_t fn, const char *filename, void *data) | ||
372 | { | ||
373 | int ret; | ||
374 | FILE *f = fopen(filename, "r"); | ||
375 | |||
376 | ret = -1; | ||
377 | if (f) { | ||
378 | config_file = f; | ||
379 | config_file_name = filename; | ||
380 | config_linenr = 1; | ||
381 | config_file_eof = 0; | ||
382 | ret = perf_parse_file(fn, data); | ||
383 | fclose(f); | ||
384 | config_file_name = NULL; | ||
385 | } | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | const char *perf_etc_perfconfig(void) | ||
390 | { | ||
391 | static const char *system_wide; | ||
392 | if (!system_wide) | ||
393 | system_wide = system_path(ETC_PERFCONFIG); | ||
394 | return system_wide; | ||
395 | } | ||
396 | |||
397 | static int perf_env_bool(const char *k, int def) | ||
398 | { | ||
399 | const char *v = getenv(k); | ||
400 | return v ? perf_config_bool(k, v) : def; | ||
401 | } | ||
402 | |||
403 | int perf_config_system(void) | ||
404 | { | ||
405 | return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); | ||
406 | } | ||
407 | |||
408 | int perf_config_global(void) | ||
409 | { | ||
410 | return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); | ||
411 | } | ||
412 | |||
413 | int perf_config(config_fn_t fn, void *data) | ||
414 | { | ||
415 | int ret = 0, found = 0; | ||
416 | char *repo_config = NULL; | ||
417 | const char *home = NULL; | ||
418 | |||
419 | /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ | ||
420 | if (config_exclusive_filename) | ||
421 | return perf_config_from_file(fn, config_exclusive_filename, data); | ||
422 | if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { | ||
423 | ret += perf_config_from_file(fn, perf_etc_perfconfig(), | ||
424 | data); | ||
425 | found += 1; | ||
426 | } | ||
427 | |||
428 | home = getenv("HOME"); | ||
429 | if (perf_config_global() && home) { | ||
430 | char *user_config = strdup(mkpath("%s/.perfconfig", home)); | ||
431 | if (!access(user_config, R_OK)) { | ||
432 | ret += perf_config_from_file(fn, user_config, data); | ||
433 | found += 1; | ||
434 | } | ||
435 | free(user_config); | ||
436 | } | ||
437 | |||
438 | repo_config = perf_pathdup("config"); | ||
439 | if (!access(repo_config, R_OK)) { | ||
440 | ret += perf_config_from_file(fn, repo_config, data); | ||
441 | found += 1; | ||
442 | } | ||
443 | free(repo_config); | ||
444 | if (found == 0) | ||
445 | return -1; | ||
446 | return ret; | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * Find all the stuff for perf_config_set() below. | ||
451 | */ | ||
452 | |||
453 | #define MAX_MATCHES 512 | ||
454 | |||
455 | static struct { | ||
456 | int baselen; | ||
457 | char* key; | ||
458 | int do_not_match; | ||
459 | regex_t* value_regex; | ||
460 | int multi_replace; | ||
461 | size_t offset[MAX_MATCHES]; | ||
462 | enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; | ||
463 | int seen; | ||
464 | } store; | ||
465 | |||
466 | static int matches(const char* key, const char* value) | ||
467 | { | ||
468 | return !strcmp(key, store.key) && | ||
469 | (store.value_regex == NULL || | ||
470 | (store.do_not_match ^ | ||
471 | !regexec(store.value_regex, value, 0, NULL, 0))); | ||
472 | } | ||
473 | |||
474 | static int store_aux(const char* key, const char* value, void *cb) | ||
475 | { | ||
476 | const char *ep; | ||
477 | size_t section_len; | ||
478 | |||
479 | switch (store.state) { | ||
480 | case KEY_SEEN: | ||
481 | if (matches(key, value)) { | ||
482 | if (store.seen == 1 && store.multi_replace == 0) { | ||
483 | warning("%s has multiple values", key); | ||
484 | } else if (store.seen >= MAX_MATCHES) { | ||
485 | error("too many matches for %s", key); | ||
486 | return 1; | ||
487 | } | ||
488 | |||
489 | store.offset[store.seen] = ftell(config_file); | ||
490 | store.seen++; | ||
491 | } | ||
492 | break; | ||
493 | case SECTION_SEEN: | ||
494 | /* | ||
495 | * What we are looking for is in store.key (both | ||
496 | * section and var), and its section part is baselen | ||
497 | * long. We found key (again, both section and var). | ||
498 | * We would want to know if this key is in the same | ||
499 | * section as what we are looking for. We already | ||
500 | * know we are in the same section as what should | ||
501 | * hold store.key. | ||
502 | */ | ||
503 | ep = strrchr(key, '.'); | ||
504 | section_len = ep - key; | ||
505 | |||
506 | if ((section_len != store.baselen) || | ||
507 | memcmp(key, store.key, section_len+1)) { | ||
508 | store.state = SECTION_END_SEEN; | ||
509 | break; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Do not increment matches: this is no match, but we | ||
514 | * just made sure we are in the desired section. | ||
515 | */ | ||
516 | store.offset[store.seen] = ftell(config_file); | ||
517 | /* fallthru */ | ||
518 | case SECTION_END_SEEN: | ||
519 | case START: | ||
520 | if (matches(key, value)) { | ||
521 | store.offset[store.seen] = ftell(config_file); | ||
522 | store.state = KEY_SEEN; | ||
523 | store.seen++; | ||
524 | } else { | ||
525 | if (strrchr(key, '.') - key == store.baselen && | ||
526 | !strncmp(key, store.key, store.baselen)) { | ||
527 | store.state = SECTION_SEEN; | ||
528 | store.offset[store.seen] = ftell(config_file); | ||
529 | } | ||
530 | } | ||
531 | } | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static int store_write_section(int fd, const char* key) | ||
536 | { | ||
537 | const char *dot; | ||
538 | int i, success; | ||
539 | struct strbuf sb = STRBUF_INIT; | ||
540 | |||
541 | dot = memchr(key, '.', store.baselen); | ||
542 | if (dot) { | ||
543 | strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); | ||
544 | for (i = dot - key + 1; i < store.baselen; i++) { | ||
545 | if (key[i] == '"' || key[i] == '\\') | ||
546 | strbuf_addch(&sb, '\\'); | ||
547 | strbuf_addch(&sb, key[i]); | ||
548 | } | ||
549 | strbuf_addstr(&sb, "\"]\n"); | ||
550 | } else { | ||
551 | strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); | ||
552 | } | ||
553 | |||
554 | success = write_in_full(fd, sb.buf, sb.len) == sb.len; | ||
555 | strbuf_release(&sb); | ||
556 | |||
557 | return success; | ||
558 | } | ||
559 | |||
560 | static int store_write_pair(int fd, const char* key, const char* value) | ||
561 | { | ||
562 | int i, success; | ||
563 | int length = strlen(key + store.baselen + 1); | ||
564 | const char *quote = ""; | ||
565 | struct strbuf sb = STRBUF_INIT; | ||
566 | |||
567 | /* | ||
568 | * Check to see if the value needs to be surrounded with a dq pair. | ||
569 | * Note that problematic characters are always backslash-quoted; this | ||
570 | * check is about not losing leading or trailing SP and strings that | ||
571 | * follow beginning-of-comment characters (i.e. ';' and '#') by the | ||
572 | * configuration parser. | ||
573 | */ | ||
574 | if (value[0] == ' ') | ||
575 | quote = "\""; | ||
576 | for (i = 0; value[i]; i++) | ||
577 | if (value[i] == ';' || value[i] == '#') | ||
578 | quote = "\""; | ||
579 | if (i && value[i - 1] == ' ') | ||
580 | quote = "\""; | ||
581 | |||
582 | strbuf_addf(&sb, "\t%.*s = %s", | ||
583 | length, key + store.baselen + 1, quote); | ||
584 | |||
585 | for (i = 0; value[i]; i++) | ||
586 | switch (value[i]) { | ||
587 | case '\n': | ||
588 | strbuf_addstr(&sb, "\\n"); | ||
589 | break; | ||
590 | case '\t': | ||
591 | strbuf_addstr(&sb, "\\t"); | ||
592 | break; | ||
593 | case '"': | ||
594 | case '\\': | ||
595 | strbuf_addch(&sb, '\\'); | ||
596 | default: | ||
597 | strbuf_addch(&sb, value[i]); | ||
598 | break; | ||
599 | } | ||
600 | strbuf_addf(&sb, "%s\n", quote); | ||
601 | |||
602 | success = write_in_full(fd, sb.buf, sb.len) == sb.len; | ||
603 | strbuf_release(&sb); | ||
604 | |||
605 | return success; | ||
606 | } | ||
607 | |||
608 | static ssize_t find_beginning_of_line(const char* contents, size_t size, | ||
609 | size_t offset_, int* found_bracket) | ||
610 | { | ||
611 | size_t equal_offset = size, bracket_offset = size; | ||
612 | ssize_t offset; | ||
613 | |||
614 | contline: | ||
615 | for (offset = offset_-2; offset > 0 | ||
616 | && contents[offset] != '\n'; offset--) | ||
617 | switch (contents[offset]) { | ||
618 | case '=': equal_offset = offset; break; | ||
619 | case ']': bracket_offset = offset; break; | ||
620 | } | ||
621 | if (offset > 0 && contents[offset-1] == '\\') { | ||
622 | offset_ = offset; | ||
623 | goto contline; | ||
624 | } | ||
625 | if (bracket_offset < equal_offset) { | ||
626 | *found_bracket = 1; | ||
627 | offset = bracket_offset+1; | ||
628 | } else | ||
629 | offset++; | ||
630 | |||
631 | return offset; | ||
632 | } | ||
633 | |||
634 | int perf_config_set(const char* key, const char* value) | ||
635 | { | ||
636 | return perf_config_set_multivar(key, value, NULL, 0); | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * If value==NULL, unset in (remove from) config, | ||
641 | * if value_regex!=NULL, disregard key/value pairs where value does not match. | ||
642 | * if multi_replace==0, nothing, or only one matching key/value is replaced, | ||
643 | * else all matching key/values (regardless how many) are removed, | ||
644 | * before the new pair is written. | ||
645 | * | ||
646 | * Returns 0 on success. | ||
647 | * | ||
648 | * This function does this: | ||
649 | * | ||
650 | * - it locks the config file by creating ".perf/config.lock" | ||
651 | * | ||
652 | * - it then parses the config using store_aux() as validator to find | ||
653 | * the position on the key/value pair to replace. If it is to be unset, | ||
654 | * it must be found exactly once. | ||
655 | * | ||
656 | * - the config file is mmap()ed and the part before the match (if any) is | ||
657 | * written to the lock file, then the changed part and the rest. | ||
658 | * | ||
659 | * - the config file is removed and the lock file rename()d to it. | ||
660 | * | ||
661 | */ | ||
662 | int perf_config_set_multivar(const char* key, const char* value, | ||
663 | const char* value_regex, int multi_replace) | ||
664 | { | ||
665 | int i, dot; | ||
666 | int fd = -1, in_fd; | ||
667 | int ret = 0; | ||
668 | char* config_filename; | ||
669 | const char* last_dot = strrchr(key, '.'); | ||
670 | |||
671 | if (config_exclusive_filename) | ||
672 | config_filename = strdup(config_exclusive_filename); | ||
673 | else | ||
674 | config_filename = perf_pathdup("config"); | ||
675 | |||
676 | /* | ||
677 | * Since "key" actually contains the section name and the real | ||
678 | * key name separated by a dot, we have to know where the dot is. | ||
679 | */ | ||
680 | |||
681 | if (last_dot == NULL) { | ||
682 | error("key does not contain a section: %s", key); | ||
683 | ret = 2; | ||
684 | goto out_free; | ||
685 | } | ||
686 | store.baselen = last_dot - key; | ||
687 | |||
688 | store.multi_replace = multi_replace; | ||
689 | |||
690 | /* | ||
691 | * Validate the key and while at it, lower case it for matching. | ||
692 | */ | ||
693 | store.key = malloc(strlen(key) + 1); | ||
694 | dot = 0; | ||
695 | for (i = 0; key[i]; i++) { | ||
696 | unsigned char c = key[i]; | ||
697 | if (c == '.') | ||
698 | dot = 1; | ||
699 | /* Leave the extended basename untouched.. */ | ||
700 | if (!dot || i > store.baselen) { | ||
701 | if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { | ||
702 | error("invalid key: %s", key); | ||
703 | free(store.key); | ||
704 | ret = 1; | ||
705 | goto out_free; | ||
706 | } | ||
707 | c = tolower(c); | ||
708 | } else if (c == '\n') { | ||
709 | error("invalid key (newline): %s", key); | ||
710 | free(store.key); | ||
711 | ret = 1; | ||
712 | goto out_free; | ||
713 | } | ||
714 | store.key[i] = c; | ||
715 | } | ||
716 | store.key[i] = 0; | ||
717 | |||
718 | /* | ||
719 | * If .perf/config does not exist yet, write a minimal version. | ||
720 | */ | ||
721 | in_fd = open(config_filename, O_RDONLY); | ||
722 | if ( in_fd < 0 ) { | ||
723 | free(store.key); | ||
724 | |||
725 | if ( ENOENT != errno ) { | ||
726 | error("opening %s: %s", config_filename, | ||
727 | strerror(errno)); | ||
728 | ret = 3; /* same as "invalid config file" */ | ||
729 | goto out_free; | ||
730 | } | ||
731 | /* if nothing to unset, error out */ | ||
732 | if (value == NULL) { | ||
733 | ret = 5; | ||
734 | goto out_free; | ||
735 | } | ||
736 | |||
737 | store.key = (char*)key; | ||
738 | if (!store_write_section(fd, key) || | ||
739 | !store_write_pair(fd, key, value)) | ||
740 | goto write_err_out; | ||
741 | } else { | ||
742 | struct stat st; | ||
743 | char* contents; | ||
744 | size_t contents_sz, copy_begin, copy_end; | ||
745 | int i, new_line = 0; | ||
746 | |||
747 | if (value_regex == NULL) | ||
748 | store.value_regex = NULL; | ||
749 | else { | ||
750 | if (value_regex[0] == '!') { | ||
751 | store.do_not_match = 1; | ||
752 | value_regex++; | ||
753 | } else | ||
754 | store.do_not_match = 0; | ||
755 | |||
756 | store.value_regex = (regex_t*)malloc(sizeof(regex_t)); | ||
757 | if (regcomp(store.value_regex, value_regex, | ||
758 | REG_EXTENDED)) { | ||
759 | error("invalid pattern: %s", value_regex); | ||
760 | free(store.value_regex); | ||
761 | ret = 6; | ||
762 | goto out_free; | ||
763 | } | ||
764 | } | ||
765 | |||
766 | store.offset[0] = 0; | ||
767 | store.state = START; | ||
768 | store.seen = 0; | ||
769 | |||
770 | /* | ||
771 | * After this, store.offset will contain the *end* offset | ||
772 | * of the last match, or remain at 0 if no match was found. | ||
773 | * As a side effect, we make sure to transform only a valid | ||
774 | * existing config file. | ||
775 | */ | ||
776 | if (perf_config_from_file(store_aux, config_filename, NULL)) { | ||
777 | error("invalid config file %s", config_filename); | ||
778 | free(store.key); | ||
779 | if (store.value_regex != NULL) { | ||
780 | regfree(store.value_regex); | ||
781 | free(store.value_regex); | ||
782 | } | ||
783 | ret = 3; | ||
784 | goto out_free; | ||
785 | } | ||
786 | |||
787 | free(store.key); | ||
788 | if (store.value_regex != NULL) { | ||
789 | regfree(store.value_regex); | ||
790 | free(store.value_regex); | ||
791 | } | ||
792 | |||
793 | /* if nothing to unset, or too many matches, error out */ | ||
794 | if ((store.seen == 0 && value == NULL) || | ||
795 | (store.seen > 1 && multi_replace == 0)) { | ||
796 | ret = 5; | ||
797 | goto out_free; | ||
798 | } | ||
799 | |||
800 | fstat(in_fd, &st); | ||
801 | contents_sz = xsize_t(st.st_size); | ||
802 | contents = mmap(NULL, contents_sz, PROT_READ, | ||
803 | MAP_PRIVATE, in_fd, 0); | ||
804 | close(in_fd); | ||
805 | |||
806 | if (store.seen == 0) | ||
807 | store.seen = 1; | ||
808 | |||
809 | for (i = 0, copy_begin = 0; i < store.seen; i++) { | ||
810 | if (store.offset[i] == 0) { | ||
811 | store.offset[i] = copy_end = contents_sz; | ||
812 | } else if (store.state != KEY_SEEN) { | ||
813 | copy_end = store.offset[i]; | ||
814 | } else | ||
815 | copy_end = find_beginning_of_line( | ||
816 | contents, contents_sz, | ||
817 | store.offset[i]-2, &new_line); | ||
818 | |||
819 | if (copy_end > 0 && contents[copy_end-1] != '\n') | ||
820 | new_line = 1; | ||
821 | |||
822 | /* write the first part of the config */ | ||
823 | if (copy_end > copy_begin) { | ||
824 | if (write_in_full(fd, contents + copy_begin, | ||
825 | copy_end - copy_begin) < | ||
826 | copy_end - copy_begin) | ||
827 | goto write_err_out; | ||
828 | if (new_line && | ||
829 | write_in_full(fd, "\n", 1) != 1) | ||
830 | goto write_err_out; | ||
831 | } | ||
832 | copy_begin = store.offset[i]; | ||
833 | } | ||
834 | |||
835 | /* write the pair (value == NULL means unset) */ | ||
836 | if (value != NULL) { | ||
837 | if (store.state == START) { | ||
838 | if (!store_write_section(fd, key)) | ||
839 | goto write_err_out; | ||
840 | } | ||
841 | if (!store_write_pair(fd, key, value)) | ||
842 | goto write_err_out; | ||
843 | } | ||
844 | |||
845 | /* write the rest of the config */ | ||
846 | if (copy_begin < contents_sz) | ||
847 | if (write_in_full(fd, contents + copy_begin, | ||
848 | contents_sz - copy_begin) < | ||
849 | contents_sz - copy_begin) | ||
850 | goto write_err_out; | ||
851 | |||
852 | munmap(contents, contents_sz); | ||
853 | } | ||
854 | |||
855 | ret = 0; | ||
856 | |||
857 | out_free: | ||
858 | free(config_filename); | ||
859 | return ret; | ||
860 | |||
861 | write_err_out: | ||
862 | goto out_free; | ||
863 | |||
864 | } | ||
865 | |||
866 | /* | ||
867 | * Call this to report error for your variable that should not | ||
868 | * get a boolean value (i.e. "[my] var" means "true"). | ||
869 | */ | ||
870 | int config_error_nonbool(const char *var) | ||
871 | { | ||
872 | return error("Missing value for '%s'", var); | ||
873 | } | ||
diff --git a/Documentation/perf_counter/util/ctype.c b/Documentation/perf_counter/util/ctype.c new file mode 100644 index 000000000000..b90ec004f29c --- /dev/null +++ b/Documentation/perf_counter/util/ctype.c | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Sane locale-independent, ASCII ctype. | ||
3 | * | ||
4 | * No surprises, and works with signed and unsigned chars. | ||
5 | */ | ||
6 | #include "cache.h" | ||
7 | |||
8 | enum { | ||
9 | S = GIT_SPACE, | ||
10 | A = GIT_ALPHA, | ||
11 | D = GIT_DIGIT, | ||
12 | G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ | ||
13 | R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ | ||
14 | }; | ||
15 | |||
16 | unsigned char sane_ctype[256] = { | ||
17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ | ||
18 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ | ||
19 | S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ | ||
20 | D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ | ||
21 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ | ||
22 | A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ | ||
23 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ | ||
24 | A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ | ||
25 | /* Nothing in the 128.. range */ | ||
26 | }; | ||
diff --git a/Documentation/perf_counter/util/exec_cmd.c b/Documentation/perf_counter/util/exec_cmd.c new file mode 100644 index 000000000000..d39292263153 --- /dev/null +++ b/Documentation/perf_counter/util/exec_cmd.c | |||
@@ -0,0 +1,165 @@ | |||
1 | #include "cache.h" | ||
2 | #include "exec_cmd.h" | ||
3 | #include "quote.h" | ||
4 | #define MAX_ARGS 32 | ||
5 | |||
6 | extern char **environ; | ||
7 | static const char *argv_exec_path; | ||
8 | static const char *argv0_path; | ||
9 | |||
10 | const char *system_path(const char *path) | ||
11 | { | ||
12 | #ifdef RUNTIME_PREFIX | ||
13 | static const char *prefix; | ||
14 | #else | ||
15 | static const char *prefix = PREFIX; | ||
16 | #endif | ||
17 | struct strbuf d = STRBUF_INIT; | ||
18 | |||
19 | if (is_absolute_path(path)) | ||
20 | return path; | ||
21 | |||
22 | #ifdef RUNTIME_PREFIX | ||
23 | assert(argv0_path); | ||
24 | assert(is_absolute_path(argv0_path)); | ||
25 | |||
26 | if (!prefix && | ||
27 | !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && | ||
28 | !(prefix = strip_path_suffix(argv0_path, BINDIR)) && | ||
29 | !(prefix = strip_path_suffix(argv0_path, "perf"))) { | ||
30 | prefix = PREFIX; | ||
31 | fprintf(stderr, "RUNTIME_PREFIX requested, " | ||
32 | "but prefix computation failed. " | ||
33 | "Using static fallback '%s'.\n", prefix); | ||
34 | } | ||
35 | #endif | ||
36 | |||
37 | strbuf_addf(&d, "%s/%s", prefix, path); | ||
38 | path = strbuf_detach(&d, NULL); | ||
39 | return path; | ||
40 | } | ||
41 | |||
42 | const char *perf_extract_argv0_path(const char *argv0) | ||
43 | { | ||
44 | const char *slash; | ||
45 | |||
46 | if (!argv0 || !*argv0) | ||
47 | return NULL; | ||
48 | slash = argv0 + strlen(argv0); | ||
49 | |||
50 | while (argv0 <= slash && !is_dir_sep(*slash)) | ||
51 | slash--; | ||
52 | |||
53 | if (slash >= argv0) { | ||
54 | argv0_path = strndup(argv0, slash - argv0); | ||
55 | return slash + 1; | ||
56 | } | ||
57 | |||
58 | return argv0; | ||
59 | } | ||
60 | |||
61 | void perf_set_argv_exec_path(const char *exec_path) | ||
62 | { | ||
63 | argv_exec_path = exec_path; | ||
64 | /* | ||
65 | * Propagate this setting to external programs. | ||
66 | */ | ||
67 | setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); | ||
68 | } | ||
69 | |||
70 | |||
71 | /* Returns the highest-priority, location to look for perf programs. */ | ||
72 | const char *perf_exec_path(void) | ||
73 | { | ||
74 | const char *env; | ||
75 | |||
76 | if (argv_exec_path) | ||
77 | return argv_exec_path; | ||
78 | |||
79 | env = getenv(EXEC_PATH_ENVIRONMENT); | ||
80 | if (env && *env) { | ||
81 | return env; | ||
82 | } | ||
83 | |||
84 | return system_path(PERF_EXEC_PATH); | ||
85 | } | ||
86 | |||
87 | static void add_path(struct strbuf *out, const char *path) | ||
88 | { | ||
89 | if (path && *path) { | ||
90 | if (is_absolute_path(path)) | ||
91 | strbuf_addstr(out, path); | ||
92 | else | ||
93 | strbuf_addstr(out, make_nonrelative_path(path)); | ||
94 | |||
95 | strbuf_addch(out, PATH_SEP); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | void setup_path(void) | ||
100 | { | ||
101 | const char *old_path = getenv("PATH"); | ||
102 | struct strbuf new_path = STRBUF_INIT; | ||
103 | |||
104 | add_path(&new_path, perf_exec_path()); | ||
105 | add_path(&new_path, argv0_path); | ||
106 | |||
107 | if (old_path) | ||
108 | strbuf_addstr(&new_path, old_path); | ||
109 | else | ||
110 | strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); | ||
111 | |||
112 | setenv("PATH", new_path.buf, 1); | ||
113 | |||
114 | strbuf_release(&new_path); | ||
115 | } | ||
116 | |||
117 | const char **prepare_perf_cmd(const char **argv) | ||
118 | { | ||
119 | int argc; | ||
120 | const char **nargv; | ||
121 | |||
122 | for (argc = 0; argv[argc]; argc++) | ||
123 | ; /* just counting */ | ||
124 | nargv = malloc(sizeof(*nargv) * (argc + 2)); | ||
125 | |||
126 | nargv[0] = "perf"; | ||
127 | for (argc = 0; argv[argc]; argc++) | ||
128 | nargv[argc + 1] = argv[argc]; | ||
129 | nargv[argc + 1] = NULL; | ||
130 | return nargv; | ||
131 | } | ||
132 | |||
133 | int execv_perf_cmd(const char **argv) { | ||
134 | const char **nargv = prepare_perf_cmd(argv); | ||
135 | |||
136 | /* execvp() can only ever return if it fails */ | ||
137 | execvp("perf", (char **)nargv); | ||
138 | |||
139 | free(nargv); | ||
140 | return -1; | ||
141 | } | ||
142 | |||
143 | |||
144 | int execl_perf_cmd(const char *cmd,...) | ||
145 | { | ||
146 | int argc; | ||
147 | const char *argv[MAX_ARGS + 1]; | ||
148 | const char *arg; | ||
149 | va_list param; | ||
150 | |||
151 | va_start(param, cmd); | ||
152 | argv[0] = cmd; | ||
153 | argc = 1; | ||
154 | while (argc < MAX_ARGS) { | ||
155 | arg = argv[argc++] = va_arg(param, char *); | ||
156 | if (!arg) | ||
157 | break; | ||
158 | } | ||
159 | va_end(param); | ||
160 | if (MAX_ARGS <= argc) | ||
161 | return error("too many args to run %s", cmd); | ||
162 | |||
163 | argv[argc] = NULL; | ||
164 | return execv_perf_cmd(argv); | ||
165 | } | ||
diff --git a/Documentation/perf_counter/util/exec_cmd.h b/Documentation/perf_counter/util/exec_cmd.h new file mode 100644 index 000000000000..effe25eb1545 --- /dev/null +++ b/Documentation/perf_counter/util/exec_cmd.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef PERF_EXEC_CMD_H | ||
2 | #define PERF_EXEC_CMD_H | ||
3 | |||
4 | extern void perf_set_argv_exec_path(const char *exec_path); | ||
5 | extern const char *perf_extract_argv0_path(const char *path); | ||
6 | extern const char *perf_exec_path(void); | ||
7 | extern void setup_path(void); | ||
8 | extern const char **prepare_perf_cmd(const char **argv); | ||
9 | extern int execv_perf_cmd(const char **argv); /* NULL terminated */ | ||
10 | extern int execl_perf_cmd(const char *cmd, ...); | ||
11 | extern const char *system_path(const char *path); | ||
12 | |||
13 | #endif /* PERF_EXEC_CMD_H */ | ||
diff --git a/Documentation/perf_counter/util/generate-cmdlist.sh b/Documentation/perf_counter/util/generate-cmdlist.sh new file mode 100755 index 000000000000..f06f6fd148f8 --- /dev/null +++ b/Documentation/perf_counter/util/generate-cmdlist.sh | |||
@@ -0,0 +1,24 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | echo "/* Automatically generated by $0 */ | ||
4 | struct cmdname_help | ||
5 | { | ||
6 | char name[16]; | ||
7 | char help[80]; | ||
8 | }; | ||
9 | |||
10 | static struct cmdname_help common_cmds[] = {" | ||
11 | |||
12 | sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | | ||
13 | sort | | ||
14 | while read cmd | ||
15 | do | ||
16 | sed -n ' | ||
17 | /^NAME/,/perf-'"$cmd"'/H | ||
18 | ${ | ||
19 | x | ||
20 | s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ | ||
21 | p | ||
22 | }' "Documentation/perf-$cmd.txt" | ||
23 | done | ||
24 | echo "};" | ||
diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c new file mode 100644 index 000000000000..edde541d238d --- /dev/null +++ b/Documentation/perf_counter/util/help.c | |||
@@ -0,0 +1,366 @@ | |||
1 | #include "cache.h" | ||
2 | #include "../builtin.h" | ||
3 | #include "exec_cmd.h" | ||
4 | #include "levenshtein.h" | ||
5 | #include "help.h" | ||
6 | |||
7 | /* most GUI terminals set COLUMNS (although some don't export it) */ | ||
8 | static int term_columns(void) | ||
9 | { | ||
10 | char *col_string = getenv("COLUMNS"); | ||
11 | int n_cols; | ||
12 | |||
13 | if (col_string && (n_cols = atoi(col_string)) > 0) | ||
14 | return n_cols; | ||
15 | |||
16 | #ifdef TIOCGWINSZ | ||
17 | { | ||
18 | struct winsize ws; | ||
19 | if (!ioctl(1, TIOCGWINSZ, &ws)) { | ||
20 | if (ws.ws_col) | ||
21 | return ws.ws_col; | ||
22 | } | ||
23 | } | ||
24 | #endif | ||
25 | |||
26 | return 80; | ||
27 | } | ||
28 | |||
29 | void add_cmdname(struct cmdnames *cmds, const char *name, int len) | ||
30 | { | ||
31 | struct cmdname *ent = malloc(sizeof(*ent) + len + 1); | ||
32 | |||
33 | ent->len = len; | ||
34 | memcpy(ent->name, name, len); | ||
35 | ent->name[len] = 0; | ||
36 | |||
37 | ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); | ||
38 | cmds->names[cmds->cnt++] = ent; | ||
39 | } | ||
40 | |||
41 | static void clean_cmdnames(struct cmdnames *cmds) | ||
42 | { | ||
43 | int i; | ||
44 | for (i = 0; i < cmds->cnt; ++i) | ||
45 | free(cmds->names[i]); | ||
46 | free(cmds->names); | ||
47 | cmds->cnt = 0; | ||
48 | cmds->alloc = 0; | ||
49 | } | ||
50 | |||
51 | static int cmdname_compare(const void *a_, const void *b_) | ||
52 | { | ||
53 | struct cmdname *a = *(struct cmdname **)a_; | ||
54 | struct cmdname *b = *(struct cmdname **)b_; | ||
55 | return strcmp(a->name, b->name); | ||
56 | } | ||
57 | |||
58 | static void uniq(struct cmdnames *cmds) | ||
59 | { | ||
60 | int i, j; | ||
61 | |||
62 | if (!cmds->cnt) | ||
63 | return; | ||
64 | |||
65 | for (i = j = 1; i < cmds->cnt; i++) | ||
66 | if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) | ||
67 | cmds->names[j++] = cmds->names[i]; | ||
68 | |||
69 | cmds->cnt = j; | ||
70 | } | ||
71 | |||
72 | void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) | ||
73 | { | ||
74 | int ci, cj, ei; | ||
75 | int cmp; | ||
76 | |||
77 | ci = cj = ei = 0; | ||
78 | while (ci < cmds->cnt && ei < excludes->cnt) { | ||
79 | cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); | ||
80 | if (cmp < 0) | ||
81 | cmds->names[cj++] = cmds->names[ci++]; | ||
82 | else if (cmp == 0) | ||
83 | ci++, ei++; | ||
84 | else if (cmp > 0) | ||
85 | ei++; | ||
86 | } | ||
87 | |||
88 | while (ci < cmds->cnt) | ||
89 | cmds->names[cj++] = cmds->names[ci++]; | ||
90 | |||
91 | cmds->cnt = cj; | ||
92 | } | ||
93 | |||
94 | static void pretty_print_string_list(struct cmdnames *cmds, int longest) | ||
95 | { | ||
96 | int cols = 1, rows; | ||
97 | int space = longest + 1; /* min 1 SP between words */ | ||
98 | int max_cols = term_columns() - 1; /* don't print *on* the edge */ | ||
99 | int i, j; | ||
100 | |||
101 | if (space < max_cols) | ||
102 | cols = max_cols / space; | ||
103 | rows = (cmds->cnt + cols - 1) / cols; | ||
104 | |||
105 | for (i = 0; i < rows; i++) { | ||
106 | printf(" "); | ||
107 | |||
108 | for (j = 0; j < cols; j++) { | ||
109 | int n = j * rows + i; | ||
110 | int size = space; | ||
111 | if (n >= cmds->cnt) | ||
112 | break; | ||
113 | if (j == cols-1 || n + rows >= cmds->cnt) | ||
114 | size = 1; | ||
115 | printf("%-*s", size, cmds->names[n]->name); | ||
116 | } | ||
117 | putchar('\n'); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | static int is_executable(const char *name) | ||
122 | { | ||
123 | struct stat st; | ||
124 | |||
125 | if (stat(name, &st) || /* stat, not lstat */ | ||
126 | !S_ISREG(st.st_mode)) | ||
127 | return 0; | ||
128 | |||
129 | #ifdef __MINGW32__ | ||
130 | /* cannot trust the executable bit, peek into the file instead */ | ||
131 | char buf[3] = { 0 }; | ||
132 | int n; | ||
133 | int fd = open(name, O_RDONLY); | ||
134 | st.st_mode &= ~S_IXUSR; | ||
135 | if (fd >= 0) { | ||
136 | n = read(fd, buf, 2); | ||
137 | if (n == 2) | ||
138 | /* DOS executables start with "MZ" */ | ||
139 | if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) | ||
140 | st.st_mode |= S_IXUSR; | ||
141 | close(fd); | ||
142 | } | ||
143 | #endif | ||
144 | return st.st_mode & S_IXUSR; | ||
145 | } | ||
146 | |||
147 | static void list_commands_in_dir(struct cmdnames *cmds, | ||
148 | const char *path, | ||
149 | const char *prefix) | ||
150 | { | ||
151 | int prefix_len; | ||
152 | DIR *dir = opendir(path); | ||
153 | struct dirent *de; | ||
154 | struct strbuf buf = STRBUF_INIT; | ||
155 | int len; | ||
156 | |||
157 | if (!dir) | ||
158 | return; | ||
159 | if (!prefix) | ||
160 | prefix = "perf-"; | ||
161 | prefix_len = strlen(prefix); | ||
162 | |||
163 | strbuf_addf(&buf, "%s/", path); | ||
164 | len = buf.len; | ||
165 | |||
166 | while ((de = readdir(dir)) != NULL) { | ||
167 | int entlen; | ||
168 | |||
169 | if (prefixcmp(de->d_name, prefix)) | ||
170 | continue; | ||
171 | |||
172 | strbuf_setlen(&buf, len); | ||
173 | strbuf_addstr(&buf, de->d_name); | ||
174 | if (!is_executable(buf.buf)) | ||
175 | continue; | ||
176 | |||
177 | entlen = strlen(de->d_name) - prefix_len; | ||
178 | if (has_extension(de->d_name, ".exe")) | ||
179 | entlen -= 4; | ||
180 | |||
181 | add_cmdname(cmds, de->d_name + prefix_len, entlen); | ||
182 | } | ||
183 | closedir(dir); | ||
184 | strbuf_release(&buf); | ||
185 | } | ||
186 | |||
187 | void load_command_list(const char *prefix, | ||
188 | struct cmdnames *main_cmds, | ||
189 | struct cmdnames *other_cmds) | ||
190 | { | ||
191 | const char *env_path = getenv("PATH"); | ||
192 | const char *exec_path = perf_exec_path(); | ||
193 | |||
194 | if (exec_path) { | ||
195 | list_commands_in_dir(main_cmds, exec_path, prefix); | ||
196 | qsort(main_cmds->names, main_cmds->cnt, | ||
197 | sizeof(*main_cmds->names), cmdname_compare); | ||
198 | uniq(main_cmds); | ||
199 | } | ||
200 | |||
201 | if (env_path) { | ||
202 | char *paths, *path, *colon; | ||
203 | path = paths = strdup(env_path); | ||
204 | while (1) { | ||
205 | if ((colon = strchr(path, PATH_SEP))) | ||
206 | *colon = 0; | ||
207 | if (!exec_path || strcmp(path, exec_path)) | ||
208 | list_commands_in_dir(other_cmds, path, prefix); | ||
209 | |||
210 | if (!colon) | ||
211 | break; | ||
212 | path = colon + 1; | ||
213 | } | ||
214 | free(paths); | ||
215 | |||
216 | qsort(other_cmds->names, other_cmds->cnt, | ||
217 | sizeof(*other_cmds->names), cmdname_compare); | ||
218 | uniq(other_cmds); | ||
219 | } | ||
220 | exclude_cmds(other_cmds, main_cmds); | ||
221 | } | ||
222 | |||
223 | void list_commands(const char *title, struct cmdnames *main_cmds, | ||
224 | struct cmdnames *other_cmds) | ||
225 | { | ||
226 | int i, longest = 0; | ||
227 | |||
228 | for (i = 0; i < main_cmds->cnt; i++) | ||
229 | if (longest < main_cmds->names[i]->len) | ||
230 | longest = main_cmds->names[i]->len; | ||
231 | for (i = 0; i < other_cmds->cnt; i++) | ||
232 | if (longest < other_cmds->names[i]->len) | ||
233 | longest = other_cmds->names[i]->len; | ||
234 | |||
235 | if (main_cmds->cnt) { | ||
236 | const char *exec_path = perf_exec_path(); | ||
237 | printf("available %s in '%s'\n", title, exec_path); | ||
238 | printf("----------------"); | ||
239 | mput_char('-', strlen(title) + strlen(exec_path)); | ||
240 | putchar('\n'); | ||
241 | pretty_print_string_list(main_cmds, longest); | ||
242 | putchar('\n'); | ||
243 | } | ||
244 | |||
245 | if (other_cmds->cnt) { | ||
246 | printf("%s available from elsewhere on your $PATH\n", title); | ||
247 | printf("---------------------------------------"); | ||
248 | mput_char('-', strlen(title)); | ||
249 | putchar('\n'); | ||
250 | pretty_print_string_list(other_cmds, longest); | ||
251 | putchar('\n'); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | int is_in_cmdlist(struct cmdnames *c, const char *s) | ||
256 | { | ||
257 | int i; | ||
258 | for (i = 0; i < c->cnt; i++) | ||
259 | if (!strcmp(s, c->names[i]->name)) | ||
260 | return 1; | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static int autocorrect; | ||
265 | static struct cmdnames aliases; | ||
266 | |||
267 | static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) | ||
268 | { | ||
269 | if (!strcmp(var, "help.autocorrect")) | ||
270 | autocorrect = perf_config_int(var,value); | ||
271 | /* Also use aliases for command lookup */ | ||
272 | if (!prefixcmp(var, "alias.")) | ||
273 | add_cmdname(&aliases, var + 6, strlen(var + 6)); | ||
274 | |||
275 | return perf_default_config(var, value, cb); | ||
276 | } | ||
277 | |||
278 | static int levenshtein_compare(const void *p1, const void *p2) | ||
279 | { | ||
280 | const struct cmdname *const *c1 = p1, *const *c2 = p2; | ||
281 | const char *s1 = (*c1)->name, *s2 = (*c2)->name; | ||
282 | int l1 = (*c1)->len; | ||
283 | int l2 = (*c2)->len; | ||
284 | return l1 != l2 ? l1 - l2 : strcmp(s1, s2); | ||
285 | } | ||
286 | |||
287 | static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) | ||
288 | { | ||
289 | int i; | ||
290 | ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); | ||
291 | |||
292 | for (i = 0; i < old->cnt; i++) | ||
293 | cmds->names[cmds->cnt++] = old->names[i]; | ||
294 | free(old->names); | ||
295 | old->cnt = 0; | ||
296 | old->names = NULL; | ||
297 | } | ||
298 | |||
299 | const char *help_unknown_cmd(const char *cmd) | ||
300 | { | ||
301 | int i, n, best_similarity = 0; | ||
302 | struct cmdnames main_cmds, other_cmds; | ||
303 | |||
304 | memset(&main_cmds, 0, sizeof(main_cmds)); | ||
305 | memset(&other_cmds, 0, sizeof(main_cmds)); | ||
306 | memset(&aliases, 0, sizeof(aliases)); | ||
307 | |||
308 | perf_config(perf_unknown_cmd_config, NULL); | ||
309 | |||
310 | load_command_list("perf-", &main_cmds, &other_cmds); | ||
311 | |||
312 | add_cmd_list(&main_cmds, &aliases); | ||
313 | add_cmd_list(&main_cmds, &other_cmds); | ||
314 | qsort(main_cmds.names, main_cmds.cnt, | ||
315 | sizeof(main_cmds.names), cmdname_compare); | ||
316 | uniq(&main_cmds); | ||
317 | |||
318 | /* This reuses cmdname->len for similarity index */ | ||
319 | for (i = 0; i < main_cmds.cnt; ++i) | ||
320 | main_cmds.names[i]->len = | ||
321 | levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); | ||
322 | |||
323 | qsort(main_cmds.names, main_cmds.cnt, | ||
324 | sizeof(*main_cmds.names), levenshtein_compare); | ||
325 | |||
326 | if (!main_cmds.cnt) | ||
327 | die ("Uh oh. Your system reports no Git commands at all."); | ||
328 | |||
329 | best_similarity = main_cmds.names[0]->len; | ||
330 | n = 1; | ||
331 | while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) | ||
332 | ++n; | ||
333 | if (autocorrect && n == 1) { | ||
334 | const char *assumed = main_cmds.names[0]->name; | ||
335 | main_cmds.names[0] = NULL; | ||
336 | clean_cmdnames(&main_cmds); | ||
337 | fprintf(stderr, "WARNING: You called a Git program named '%s', " | ||
338 | "which does not exist.\n" | ||
339 | "Continuing under the assumption that you meant '%s'\n", | ||
340 | cmd, assumed); | ||
341 | if (autocorrect > 0) { | ||
342 | fprintf(stderr, "in %0.1f seconds automatically...\n", | ||
343 | (float)autocorrect/10.0); | ||
344 | poll(NULL, 0, autocorrect * 100); | ||
345 | } | ||
346 | return assumed; | ||
347 | } | ||
348 | |||
349 | fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); | ||
350 | |||
351 | if (best_similarity < 6) { | ||
352 | fprintf(stderr, "\nDid you mean %s?\n", | ||
353 | n < 2 ? "this": "one of these"); | ||
354 | |||
355 | for (i = 0; i < n; i++) | ||
356 | fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); | ||
357 | } | ||
358 | |||
359 | exit(1); | ||
360 | } | ||
361 | |||
362 | int cmd_version(int argc, const char **argv, const char *prefix) | ||
363 | { | ||
364 | printf("perf version %s\n", perf_version_string); | ||
365 | return 0; | ||
366 | } | ||
diff --git a/Documentation/perf_counter/util/help.h b/Documentation/perf_counter/util/help.h new file mode 100644 index 000000000000..56bc15406ffc --- /dev/null +++ b/Documentation/perf_counter/util/help.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef HELP_H | ||
2 | #define HELP_H | ||
3 | |||
4 | struct cmdnames { | ||
5 | int alloc; | ||
6 | int cnt; | ||
7 | struct cmdname { | ||
8 | size_t len; /* also used for similarity index in help.c */ | ||
9 | char name[FLEX_ARRAY]; | ||
10 | } **names; | ||
11 | }; | ||
12 | |||
13 | static inline void mput_char(char c, unsigned int num) | ||
14 | { | ||
15 | while(num--) | ||
16 | putchar(c); | ||
17 | } | ||
18 | |||
19 | void load_command_list(const char *prefix, | ||
20 | struct cmdnames *main_cmds, | ||
21 | struct cmdnames *other_cmds); | ||
22 | void add_cmdname(struct cmdnames *cmds, const char *name, int len); | ||
23 | /* Here we require that excludes is a sorted list. */ | ||
24 | void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); | ||
25 | int is_in_cmdlist(struct cmdnames *c, const char *s); | ||
26 | void list_commands(const char *title, struct cmdnames *main_cmds, | ||
27 | struct cmdnames *other_cmds); | ||
28 | |||
29 | #endif /* HELP_H */ | ||
diff --git a/Documentation/perf_counter/util/levenshtein.c b/Documentation/perf_counter/util/levenshtein.c new file mode 100644 index 000000000000..e521d1516df6 --- /dev/null +++ b/Documentation/perf_counter/util/levenshtein.c | |||
@@ -0,0 +1,84 @@ | |||
1 | #include "cache.h" | ||
2 | #include "levenshtein.h" | ||
3 | |||
4 | /* | ||
5 | * This function implements the Damerau-Levenshtein algorithm to | ||
6 | * calculate a distance between strings. | ||
7 | * | ||
8 | * Basically, it says how many letters need to be swapped, substituted, | ||
9 | * deleted from, or added to string1, at least, to get string2. | ||
10 | * | ||
11 | * The idea is to build a distance matrix for the substrings of both | ||
12 | * strings. To avoid a large space complexity, only the last three rows | ||
13 | * are kept in memory (if swaps had the same or higher cost as one deletion | ||
14 | * plus one insertion, only two rows would be needed). | ||
15 | * | ||
16 | * At any stage, "i + 1" denotes the length of the current substring of | ||
17 | * string1 that the distance is calculated for. | ||
18 | * | ||
19 | * row2 holds the current row, row1 the previous row (i.e. for the substring | ||
20 | * of string1 of length "i"), and row0 the row before that. | ||
21 | * | ||
22 | * In other words, at the start of the big loop, row2[j + 1] contains the | ||
23 | * Damerau-Levenshtein distance between the substring of string1 of length | ||
24 | * "i" and the substring of string2 of length "j + 1". | ||
25 | * | ||
26 | * All the big loop does is determine the partial minimum-cost paths. | ||
27 | * | ||
28 | * It does so by calculating the costs of the path ending in characters | ||
29 | * i (in string1) and j (in string2), respectively, given that the last | ||
30 | * operation is a substition, a swap, a deletion, or an insertion. | ||
31 | * | ||
32 | * This implementation allows the costs to be weighted: | ||
33 | * | ||
34 | * - w (as in "sWap") | ||
35 | * - s (as in "Substitution") | ||
36 | * - a (for insertion, AKA "Add") | ||
37 | * - d (as in "Deletion") | ||
38 | * | ||
39 | * Note that this algorithm calculates a distance _iff_ d == a. | ||
40 | */ | ||
41 | int levenshtein(const char *string1, const char *string2, | ||
42 | int w, int s, int a, int d) | ||
43 | { | ||
44 | int len1 = strlen(string1), len2 = strlen(string2); | ||
45 | int *row0 = malloc(sizeof(int) * (len2 + 1)); | ||
46 | int *row1 = malloc(sizeof(int) * (len2 + 1)); | ||
47 | int *row2 = malloc(sizeof(int) * (len2 + 1)); | ||
48 | int i, j; | ||
49 | |||
50 | for (j = 0; j <= len2; j++) | ||
51 | row1[j] = j * a; | ||
52 | for (i = 0; i < len1; i++) { | ||
53 | int *dummy; | ||
54 | |||
55 | row2[0] = (i + 1) * d; | ||
56 | for (j = 0; j < len2; j++) { | ||
57 | /* substitution */ | ||
58 | row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); | ||
59 | /* swap */ | ||
60 | if (i > 0 && j > 0 && string1[i - 1] == string2[j] && | ||
61 | string1[i] == string2[j - 1] && | ||
62 | row2[j + 1] > row0[j - 1] + w) | ||
63 | row2[j + 1] = row0[j - 1] + w; | ||
64 | /* deletion */ | ||
65 | if (row2[j + 1] > row1[j + 1] + d) | ||
66 | row2[j + 1] = row1[j + 1] + d; | ||
67 | /* insertion */ | ||
68 | if (row2[j + 1] > row2[j] + a) | ||
69 | row2[j + 1] = row2[j] + a; | ||
70 | } | ||
71 | |||
72 | dummy = row0; | ||
73 | row0 = row1; | ||
74 | row1 = row2; | ||
75 | row2 = dummy; | ||
76 | } | ||
77 | |||
78 | i = row1[len2]; | ||
79 | free(row0); | ||
80 | free(row1); | ||
81 | free(row2); | ||
82 | |||
83 | return i; | ||
84 | } | ||
diff --git a/Documentation/perf_counter/util/levenshtein.h b/Documentation/perf_counter/util/levenshtein.h new file mode 100644 index 000000000000..0173abeef52c --- /dev/null +++ b/Documentation/perf_counter/util/levenshtein.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef LEVENSHTEIN_H | ||
2 | #define LEVENSHTEIN_H | ||
3 | |||
4 | int levenshtein(const char *string1, const char *string2, | ||
5 | int swap_penalty, int substition_penalty, | ||
6 | int insertion_penalty, int deletion_penalty); | ||
7 | |||
8 | #endif | ||
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c new file mode 100644 index 000000000000..28b34c1c29cf --- /dev/null +++ b/Documentation/perf_counter/util/parse-options.c | |||
@@ -0,0 +1,492 @@ | |||
1 | #include "util.h" | ||
2 | #include "parse-options.h" | ||
3 | #include "cache.h" | ||
4 | |||
5 | #define OPT_SHORT 1 | ||
6 | #define OPT_UNSET 2 | ||
7 | |||
8 | static int opterror(const struct option *opt, const char *reason, int flags) | ||
9 | { | ||
10 | if (flags & OPT_SHORT) | ||
11 | return error("switch `%c' %s", opt->short_name, reason); | ||
12 | if (flags & OPT_UNSET) | ||
13 | return error("option `no-%s' %s", opt->long_name, reason); | ||
14 | return error("option `%s' %s", opt->long_name, reason); | ||
15 | } | ||
16 | |||
17 | static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, | ||
18 | int flags, const char **arg) | ||
19 | { | ||
20 | if (p->opt) { | ||
21 | *arg = p->opt; | ||
22 | p->opt = NULL; | ||
23 | } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { | ||
24 | *arg = (const char *)opt->defval; | ||
25 | } else if (p->argc > 1) { | ||
26 | p->argc--; | ||
27 | *arg = *++p->argv; | ||
28 | } else | ||
29 | return opterror(opt, "requires a value", flags); | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | static int get_value(struct parse_opt_ctx_t *p, | ||
34 | const struct option *opt, int flags) | ||
35 | { | ||
36 | const char *s, *arg; | ||
37 | const int unset = flags & OPT_UNSET; | ||
38 | |||
39 | if (unset && p->opt) | ||
40 | return opterror(opt, "takes no value", flags); | ||
41 | if (unset && (opt->flags & PARSE_OPT_NONEG)) | ||
42 | return opterror(opt, "isn't available", flags); | ||
43 | |||
44 | if (!(flags & OPT_SHORT) && p->opt) { | ||
45 | switch (opt->type) { | ||
46 | case OPTION_CALLBACK: | ||
47 | if (!(opt->flags & PARSE_OPT_NOARG)) | ||
48 | break; | ||
49 | /* FALLTHROUGH */ | ||
50 | case OPTION_BOOLEAN: | ||
51 | case OPTION_BIT: | ||
52 | case OPTION_SET_INT: | ||
53 | case OPTION_SET_PTR: | ||
54 | return opterror(opt, "takes no value", flags); | ||
55 | default: | ||
56 | break; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | switch (opt->type) { | ||
61 | case OPTION_BIT: | ||
62 | if (unset) | ||
63 | *(int *)opt->value &= ~opt->defval; | ||
64 | else | ||
65 | *(int *)opt->value |= opt->defval; | ||
66 | return 0; | ||
67 | |||
68 | case OPTION_BOOLEAN: | ||
69 | *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; | ||
70 | return 0; | ||
71 | |||
72 | case OPTION_SET_INT: | ||
73 | *(int *)opt->value = unset ? 0 : opt->defval; | ||
74 | return 0; | ||
75 | |||
76 | case OPTION_SET_PTR: | ||
77 | *(void **)opt->value = unset ? NULL : (void *)opt->defval; | ||
78 | return 0; | ||
79 | |||
80 | case OPTION_STRING: | ||
81 | if (unset) | ||
82 | *(const char **)opt->value = NULL; | ||
83 | else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) | ||
84 | *(const char **)opt->value = (const char *)opt->defval; | ||
85 | else | ||
86 | return get_arg(p, opt, flags, (const char **)opt->value); | ||
87 | return 0; | ||
88 | |||
89 | case OPTION_CALLBACK: | ||
90 | if (unset) | ||
91 | return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; | ||
92 | if (opt->flags & PARSE_OPT_NOARG) | ||
93 | return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; | ||
94 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) | ||
95 | return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; | ||
96 | if (get_arg(p, opt, flags, &arg)) | ||
97 | return -1; | ||
98 | return (*opt->callback)(opt, arg, 0) ? (-1) : 0; | ||
99 | |||
100 | case OPTION_INTEGER: | ||
101 | if (unset) { | ||
102 | *(int *)opt->value = 0; | ||
103 | return 0; | ||
104 | } | ||
105 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { | ||
106 | *(int *)opt->value = opt->defval; | ||
107 | return 0; | ||
108 | } | ||
109 | if (get_arg(p, opt, flags, &arg)) | ||
110 | return -1; | ||
111 | *(int *)opt->value = strtol(arg, (char **)&s, 10); | ||
112 | if (*s) | ||
113 | return opterror(opt, "expects a numerical value", flags); | ||
114 | return 0; | ||
115 | |||
116 | default: | ||
117 | die("should not happen, someone must be hit on the forehead"); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) | ||
122 | { | ||
123 | for (; options->type != OPTION_END; options++) { | ||
124 | if (options->short_name == *p->opt) { | ||
125 | p->opt = p->opt[1] ? p->opt + 1 : NULL; | ||
126 | return get_value(p, options, OPT_SHORT); | ||
127 | } | ||
128 | } | ||
129 | return -2; | ||
130 | } | ||
131 | |||
132 | static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, | ||
133 | const struct option *options) | ||
134 | { | ||
135 | const char *arg_end = strchr(arg, '='); | ||
136 | const struct option *abbrev_option = NULL, *ambiguous_option = NULL; | ||
137 | int abbrev_flags = 0, ambiguous_flags = 0; | ||
138 | |||
139 | if (!arg_end) | ||
140 | arg_end = arg + strlen(arg); | ||
141 | |||
142 | for (; options->type != OPTION_END; options++) { | ||
143 | const char *rest; | ||
144 | int flags = 0; | ||
145 | |||
146 | if (!options->long_name) | ||
147 | continue; | ||
148 | |||
149 | rest = skip_prefix(arg, options->long_name); | ||
150 | if (options->type == OPTION_ARGUMENT) { | ||
151 | if (!rest) | ||
152 | continue; | ||
153 | if (*rest == '=') | ||
154 | return opterror(options, "takes no value", flags); | ||
155 | if (*rest) | ||
156 | continue; | ||
157 | p->out[p->cpidx++] = arg - 2; | ||
158 | return 0; | ||
159 | } | ||
160 | if (!rest) { | ||
161 | /* abbreviated? */ | ||
162 | if (!strncmp(options->long_name, arg, arg_end - arg)) { | ||
163 | is_abbreviated: | ||
164 | if (abbrev_option) { | ||
165 | /* | ||
166 | * If this is abbreviated, it is | ||
167 | * ambiguous. So when there is no | ||
168 | * exact match later, we need to | ||
169 | * error out. | ||
170 | */ | ||
171 | ambiguous_option = abbrev_option; | ||
172 | ambiguous_flags = abbrev_flags; | ||
173 | } | ||
174 | if (!(flags & OPT_UNSET) && *arg_end) | ||
175 | p->opt = arg_end + 1; | ||
176 | abbrev_option = options; | ||
177 | abbrev_flags = flags; | ||
178 | continue; | ||
179 | } | ||
180 | /* negated and abbreviated very much? */ | ||
181 | if (!prefixcmp("no-", arg)) { | ||
182 | flags |= OPT_UNSET; | ||
183 | goto is_abbreviated; | ||
184 | } | ||
185 | /* negated? */ | ||
186 | if (strncmp(arg, "no-", 3)) | ||
187 | continue; | ||
188 | flags |= OPT_UNSET; | ||
189 | rest = skip_prefix(arg + 3, options->long_name); | ||
190 | /* abbreviated and negated? */ | ||
191 | if (!rest && !prefixcmp(options->long_name, arg + 3)) | ||
192 | goto is_abbreviated; | ||
193 | if (!rest) | ||
194 | continue; | ||
195 | } | ||
196 | if (*rest) { | ||
197 | if (*rest != '=') | ||
198 | continue; | ||
199 | p->opt = rest + 1; | ||
200 | } | ||
201 | return get_value(p, options, flags); | ||
202 | } | ||
203 | |||
204 | if (ambiguous_option) | ||
205 | return error("Ambiguous option: %s " | ||
206 | "(could be --%s%s or --%s%s)", | ||
207 | arg, | ||
208 | (ambiguous_flags & OPT_UNSET) ? "no-" : "", | ||
209 | ambiguous_option->long_name, | ||
210 | (abbrev_flags & OPT_UNSET) ? "no-" : "", | ||
211 | abbrev_option->long_name); | ||
212 | if (abbrev_option) | ||
213 | return get_value(p, abbrev_option, abbrev_flags); | ||
214 | return -2; | ||
215 | } | ||
216 | |||
217 | static void check_typos(const char *arg, const struct option *options) | ||
218 | { | ||
219 | if (strlen(arg) < 3) | ||
220 | return; | ||
221 | |||
222 | if (!prefixcmp(arg, "no-")) { | ||
223 | error ("did you mean `--%s` (with two dashes ?)", arg); | ||
224 | exit(129); | ||
225 | } | ||
226 | |||
227 | for (; options->type != OPTION_END; options++) { | ||
228 | if (!options->long_name) | ||
229 | continue; | ||
230 | if (!prefixcmp(options->long_name, arg)) { | ||
231 | error ("did you mean `--%s` (with two dashes ?)", arg); | ||
232 | exit(129); | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | |||
237 | void parse_options_start(struct parse_opt_ctx_t *ctx, | ||
238 | int argc, const char **argv, int flags) | ||
239 | { | ||
240 | memset(ctx, 0, sizeof(*ctx)); | ||
241 | ctx->argc = argc - 1; | ||
242 | ctx->argv = argv + 1; | ||
243 | ctx->out = argv; | ||
244 | ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); | ||
245 | ctx->flags = flags; | ||
246 | if ((flags & PARSE_OPT_KEEP_UNKNOWN) && | ||
247 | (flags & PARSE_OPT_STOP_AT_NON_OPTION)) | ||
248 | die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); | ||
249 | } | ||
250 | |||
251 | static int usage_with_options_internal(const char * const *, | ||
252 | const struct option *, int); | ||
253 | |||
254 | int parse_options_step(struct parse_opt_ctx_t *ctx, | ||
255 | const struct option *options, | ||
256 | const char * const usagestr[]) | ||
257 | { | ||
258 | int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); | ||
259 | |||
260 | /* we must reset ->opt, unknown short option leave it dangling */ | ||
261 | ctx->opt = NULL; | ||
262 | |||
263 | for (; ctx->argc; ctx->argc--, ctx->argv++) { | ||
264 | const char *arg = ctx->argv[0]; | ||
265 | |||
266 | if (*arg != '-' || !arg[1]) { | ||
267 | if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) | ||
268 | break; | ||
269 | ctx->out[ctx->cpidx++] = ctx->argv[0]; | ||
270 | continue; | ||
271 | } | ||
272 | |||
273 | if (arg[1] != '-') { | ||
274 | ctx->opt = arg + 1; | ||
275 | if (internal_help && *ctx->opt == 'h') | ||
276 | return parse_options_usage(usagestr, options); | ||
277 | switch (parse_short_opt(ctx, options)) { | ||
278 | case -1: | ||
279 | return parse_options_usage(usagestr, options); | ||
280 | case -2: | ||
281 | goto unknown; | ||
282 | } | ||
283 | if (ctx->opt) | ||
284 | check_typos(arg + 1, options); | ||
285 | while (ctx->opt) { | ||
286 | if (internal_help && *ctx->opt == 'h') | ||
287 | return parse_options_usage(usagestr, options); | ||
288 | switch (parse_short_opt(ctx, options)) { | ||
289 | case -1: | ||
290 | return parse_options_usage(usagestr, options); | ||
291 | case -2: | ||
292 | /* fake a short option thing to hide the fact that we may have | ||
293 | * started to parse aggregated stuff | ||
294 | * | ||
295 | * This is leaky, too bad. | ||
296 | */ | ||
297 | ctx->argv[0] = strdup(ctx->opt - 1); | ||
298 | *(char *)ctx->argv[0] = '-'; | ||
299 | goto unknown; | ||
300 | } | ||
301 | } | ||
302 | continue; | ||
303 | } | ||
304 | |||
305 | if (!arg[2]) { /* "--" */ | ||
306 | if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { | ||
307 | ctx->argc--; | ||
308 | ctx->argv++; | ||
309 | } | ||
310 | break; | ||
311 | } | ||
312 | |||
313 | if (internal_help && !strcmp(arg + 2, "help-all")) | ||
314 | return usage_with_options_internal(usagestr, options, 1); | ||
315 | if (internal_help && !strcmp(arg + 2, "help")) | ||
316 | return parse_options_usage(usagestr, options); | ||
317 | switch (parse_long_opt(ctx, arg + 2, options)) { | ||
318 | case -1: | ||
319 | return parse_options_usage(usagestr, options); | ||
320 | case -2: | ||
321 | goto unknown; | ||
322 | } | ||
323 | continue; | ||
324 | unknown: | ||
325 | if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) | ||
326 | return PARSE_OPT_UNKNOWN; | ||
327 | ctx->out[ctx->cpidx++] = ctx->argv[0]; | ||
328 | ctx->opt = NULL; | ||
329 | } | ||
330 | return PARSE_OPT_DONE; | ||
331 | } | ||
332 | |||
333 | int parse_options_end(struct parse_opt_ctx_t *ctx) | ||
334 | { | ||
335 | memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); | ||
336 | ctx->out[ctx->cpidx + ctx->argc] = NULL; | ||
337 | return ctx->cpidx + ctx->argc; | ||
338 | } | ||
339 | |||
340 | int parse_options(int argc, const char **argv, const struct option *options, | ||
341 | const char * const usagestr[], int flags) | ||
342 | { | ||
343 | struct parse_opt_ctx_t ctx; | ||
344 | |||
345 | parse_options_start(&ctx, argc, argv, flags); | ||
346 | switch (parse_options_step(&ctx, options, usagestr)) { | ||
347 | case PARSE_OPT_HELP: | ||
348 | exit(129); | ||
349 | case PARSE_OPT_DONE: | ||
350 | break; | ||
351 | default: /* PARSE_OPT_UNKNOWN */ | ||
352 | if (ctx.argv[0][1] == '-') { | ||
353 | error("unknown option `%s'", ctx.argv[0] + 2); | ||
354 | } else { | ||
355 | error("unknown switch `%c'", *ctx.opt); | ||
356 | } | ||
357 | usage_with_options(usagestr, options); | ||
358 | } | ||
359 | |||
360 | return parse_options_end(&ctx); | ||
361 | } | ||
362 | |||
363 | #define USAGE_OPTS_WIDTH 24 | ||
364 | #define USAGE_GAP 2 | ||
365 | |||
366 | int usage_with_options_internal(const char * const *usagestr, | ||
367 | const struct option *opts, int full) | ||
368 | { | ||
369 | if (!usagestr) | ||
370 | return PARSE_OPT_HELP; | ||
371 | |||
372 | fprintf(stderr, "usage: %s\n", *usagestr++); | ||
373 | while (*usagestr && **usagestr) | ||
374 | fprintf(stderr, " or: %s\n", *usagestr++); | ||
375 | while (*usagestr) { | ||
376 | fprintf(stderr, "%s%s\n", | ||
377 | **usagestr ? " " : "", | ||
378 | *usagestr); | ||
379 | usagestr++; | ||
380 | } | ||
381 | |||
382 | if (opts->type != OPTION_GROUP) | ||
383 | fputc('\n', stderr); | ||
384 | |||
385 | for (; opts->type != OPTION_END; opts++) { | ||
386 | size_t pos; | ||
387 | int pad; | ||
388 | |||
389 | if (opts->type == OPTION_GROUP) { | ||
390 | fputc('\n', stderr); | ||
391 | if (*opts->help) | ||
392 | fprintf(stderr, "%s\n", opts->help); | ||
393 | continue; | ||
394 | } | ||
395 | if (!full && (opts->flags & PARSE_OPT_HIDDEN)) | ||
396 | continue; | ||
397 | |||
398 | pos = fprintf(stderr, " "); | ||
399 | if (opts->short_name) | ||
400 | pos += fprintf(stderr, "-%c", opts->short_name); | ||
401 | if (opts->long_name && opts->short_name) | ||
402 | pos += fprintf(stderr, ", "); | ||
403 | if (opts->long_name) | ||
404 | pos += fprintf(stderr, "--%s", opts->long_name); | ||
405 | |||
406 | switch (opts->type) { | ||
407 | case OPTION_ARGUMENT: | ||
408 | break; | ||
409 | case OPTION_INTEGER: | ||
410 | if (opts->flags & PARSE_OPT_OPTARG) | ||
411 | if (opts->long_name) | ||
412 | pos += fprintf(stderr, "[=<n>]"); | ||
413 | else | ||
414 | pos += fprintf(stderr, "[<n>]"); | ||
415 | else | ||
416 | pos += fprintf(stderr, " <n>"); | ||
417 | break; | ||
418 | case OPTION_CALLBACK: | ||
419 | if (opts->flags & PARSE_OPT_NOARG) | ||
420 | break; | ||
421 | /* FALLTHROUGH */ | ||
422 | case OPTION_STRING: | ||
423 | if (opts->argh) { | ||
424 | if (opts->flags & PARSE_OPT_OPTARG) | ||
425 | if (opts->long_name) | ||
426 | pos += fprintf(stderr, "[=<%s>]", opts->argh); | ||
427 | else | ||
428 | pos += fprintf(stderr, "[<%s>]", opts->argh); | ||
429 | else | ||
430 | pos += fprintf(stderr, " <%s>", opts->argh); | ||
431 | } else { | ||
432 | if (opts->flags & PARSE_OPT_OPTARG) | ||
433 | if (opts->long_name) | ||
434 | pos += fprintf(stderr, "[=...]"); | ||
435 | else | ||
436 | pos += fprintf(stderr, "[...]"); | ||
437 | else | ||
438 | pos += fprintf(stderr, " ..."); | ||
439 | } | ||
440 | break; | ||
441 | default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ | ||
442 | break; | ||
443 | } | ||
444 | |||
445 | if (pos <= USAGE_OPTS_WIDTH) | ||
446 | pad = USAGE_OPTS_WIDTH - pos; | ||
447 | else { | ||
448 | fputc('\n', stderr); | ||
449 | pad = USAGE_OPTS_WIDTH; | ||
450 | } | ||
451 | fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); | ||
452 | } | ||
453 | fputc('\n', stderr); | ||
454 | |||
455 | return PARSE_OPT_HELP; | ||
456 | } | ||
457 | |||
458 | void usage_with_options(const char * const *usagestr, | ||
459 | const struct option *opts) | ||
460 | { | ||
461 | usage_with_options_internal(usagestr, opts, 0); | ||
462 | exit(129); | ||
463 | } | ||
464 | |||
465 | int parse_options_usage(const char * const *usagestr, | ||
466 | const struct option *opts) | ||
467 | { | ||
468 | return usage_with_options_internal(usagestr, opts, 0); | ||
469 | } | ||
470 | |||
471 | |||
472 | int parse_opt_verbosity_cb(const struct option *opt, const char *arg, | ||
473 | int unset) | ||
474 | { | ||
475 | int *target = opt->value; | ||
476 | |||
477 | if (unset) | ||
478 | /* --no-quiet, --no-verbose */ | ||
479 | *target = 0; | ||
480 | else if (opt->short_name == 'v') { | ||
481 | if (*target >= 0) | ||
482 | (*target)++; | ||
483 | else | ||
484 | *target = 1; | ||
485 | } else { | ||
486 | if (*target <= 0) | ||
487 | (*target)--; | ||
488 | else | ||
489 | *target = -1; | ||
490 | } | ||
491 | return 0; | ||
492 | } | ||
diff --git a/Documentation/perf_counter/util/parse-options.h b/Documentation/perf_counter/util/parse-options.h new file mode 100644 index 000000000000..a81c7faff68e --- /dev/null +++ b/Documentation/perf_counter/util/parse-options.h | |||
@@ -0,0 +1,172 @@ | |||
1 | #ifndef PARSE_OPTIONS_H | ||
2 | #define PARSE_OPTIONS_H | ||
3 | |||
4 | enum parse_opt_type { | ||
5 | /* special types */ | ||
6 | OPTION_END, | ||
7 | OPTION_ARGUMENT, | ||
8 | OPTION_GROUP, | ||
9 | /* options with no arguments */ | ||
10 | OPTION_BIT, | ||
11 | OPTION_BOOLEAN, /* _INCR would have been a better name */ | ||
12 | OPTION_SET_INT, | ||
13 | OPTION_SET_PTR, | ||
14 | /* options with arguments (usually) */ | ||
15 | OPTION_STRING, | ||
16 | OPTION_INTEGER, | ||
17 | OPTION_CALLBACK, | ||
18 | }; | ||
19 | |||
20 | enum parse_opt_flags { | ||
21 | PARSE_OPT_KEEP_DASHDASH = 1, | ||
22 | PARSE_OPT_STOP_AT_NON_OPTION = 2, | ||
23 | PARSE_OPT_KEEP_ARGV0 = 4, | ||
24 | PARSE_OPT_KEEP_UNKNOWN = 8, | ||
25 | PARSE_OPT_NO_INTERNAL_HELP = 16, | ||
26 | }; | ||
27 | |||
28 | enum parse_opt_option_flags { | ||
29 | PARSE_OPT_OPTARG = 1, | ||
30 | PARSE_OPT_NOARG = 2, | ||
31 | PARSE_OPT_NONEG = 4, | ||
32 | PARSE_OPT_HIDDEN = 8, | ||
33 | PARSE_OPT_LASTARG_DEFAULT = 16, | ||
34 | }; | ||
35 | |||
36 | struct option; | ||
37 | typedef int parse_opt_cb(const struct option *, const char *arg, int unset); | ||
38 | |||
39 | /* | ||
40 | * `type`:: | ||
41 | * holds the type of the option, you must have an OPTION_END last in your | ||
42 | * array. | ||
43 | * | ||
44 | * `short_name`:: | ||
45 | * the character to use as a short option name, '\0' if none. | ||
46 | * | ||
47 | * `long_name`:: | ||
48 | * the long option name, without the leading dashes, NULL if none. | ||
49 | * | ||
50 | * `value`:: | ||
51 | * stores pointers to the values to be filled. | ||
52 | * | ||
53 | * `argh`:: | ||
54 | * token to explain the kind of argument this option wants. Keep it | ||
55 | * homogenous across the repository. | ||
56 | * | ||
57 | * `help`:: | ||
58 | * the short help associated to what the option does. | ||
59 | * Must never be NULL (except for OPTION_END). | ||
60 | * OPTION_GROUP uses this pointer to store the group header. | ||
61 | * | ||
62 | * `flags`:: | ||
63 | * mask of parse_opt_option_flags. | ||
64 | * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) | ||
65 | * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs | ||
66 | * PARSE_OPT_NONEG: says that this option cannot be negated | ||
67 | * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in | ||
68 | * the long one. | ||
69 | * | ||
70 | * `callback`:: | ||
71 | * pointer to the callback to use for OPTION_CALLBACK. | ||
72 | * | ||
73 | * `defval`:: | ||
74 | * default value to fill (*->value) with for PARSE_OPT_OPTARG. | ||
75 | * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in | ||
76 | * the value when met. | ||
77 | * CALLBACKS can use it like they want. | ||
78 | */ | ||
79 | struct option { | ||
80 | enum parse_opt_type type; | ||
81 | int short_name; | ||
82 | const char *long_name; | ||
83 | void *value; | ||
84 | const char *argh; | ||
85 | const char *help; | ||
86 | |||
87 | int flags; | ||
88 | parse_opt_cb *callback; | ||
89 | intptr_t defval; | ||
90 | }; | ||
91 | |||
92 | #define OPT_END() { OPTION_END } | ||
93 | #define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } | ||
94 | #define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } | ||
95 | #define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } | ||
96 | #define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } | ||
97 | #define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } | ||
98 | #define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } | ||
99 | #define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } | ||
100 | #define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } | ||
101 | #define OPT_DATE(s, l, v, h) \ | ||
102 | { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ | ||
103 | parse_opt_approxidate_cb } | ||
104 | #define OPT_CALLBACK(s, l, v, a, h, f) \ | ||
105 | { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } | ||
106 | |||
107 | /* parse_options() will filter out the processed options and leave the | ||
108 | * non-option argments in argv[]. | ||
109 | * Returns the number of arguments left in argv[]. | ||
110 | */ | ||
111 | extern int parse_options(int argc, const char **argv, | ||
112 | const struct option *options, | ||
113 | const char * const usagestr[], int flags); | ||
114 | |||
115 | extern NORETURN void usage_with_options(const char * const *usagestr, | ||
116 | const struct option *options); | ||
117 | |||
118 | /*----- incremantal advanced APIs -----*/ | ||
119 | |||
120 | enum { | ||
121 | PARSE_OPT_HELP = -1, | ||
122 | PARSE_OPT_DONE, | ||
123 | PARSE_OPT_UNKNOWN, | ||
124 | }; | ||
125 | |||
126 | /* | ||
127 | * It's okay for the caller to consume argv/argc in the usual way. | ||
128 | * Other fields of that structure are private to parse-options and should not | ||
129 | * be modified in any way. | ||
130 | */ | ||
131 | struct parse_opt_ctx_t { | ||
132 | const char **argv; | ||
133 | const char **out; | ||
134 | int argc, cpidx; | ||
135 | const char *opt; | ||
136 | int flags; | ||
137 | }; | ||
138 | |||
139 | extern int parse_options_usage(const char * const *usagestr, | ||
140 | const struct option *opts); | ||
141 | |||
142 | extern void parse_options_start(struct parse_opt_ctx_t *ctx, | ||
143 | int argc, const char **argv, int flags); | ||
144 | |||
145 | extern int parse_options_step(struct parse_opt_ctx_t *ctx, | ||
146 | const struct option *options, | ||
147 | const char * const usagestr[]); | ||
148 | |||
149 | extern int parse_options_end(struct parse_opt_ctx_t *ctx); | ||
150 | |||
151 | |||
152 | /*----- some often used options -----*/ | ||
153 | extern int parse_opt_abbrev_cb(const struct option *, const char *, int); | ||
154 | extern int parse_opt_approxidate_cb(const struct option *, const char *, int); | ||
155 | extern int parse_opt_verbosity_cb(const struct option *, const char *, int); | ||
156 | |||
157 | #define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") | ||
158 | #define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") | ||
159 | #define OPT__VERBOSITY(var) \ | ||
160 | { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ | ||
161 | PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ | ||
162 | { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ | ||
163 | PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } | ||
164 | #define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") | ||
165 | #define OPT__ABBREV(var) \ | ||
166 | { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ | ||
167 | "use <n> digits to display SHA-1s", \ | ||
168 | PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } | ||
169 | |||
170 | extern const char *parse_options_fix_filename(const char *prefix, const char *file); | ||
171 | |||
172 | #endif | ||
diff --git a/Documentation/perf_counter/util/path.c b/Documentation/perf_counter/util/path.c new file mode 100644 index 000000000000..a501a40dd2cb --- /dev/null +++ b/Documentation/perf_counter/util/path.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * I'm tired of doing "vsnprintf()" etc just to open a | ||
3 | * file, so here's a "return static buffer with printf" | ||
4 | * interface for paths. | ||
5 | * | ||
6 | * It's obviously not thread-safe. Sue me. But it's quite | ||
7 | * useful for doing things like | ||
8 | * | ||
9 | * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); | ||
10 | * | ||
11 | * which is what it's designed for. | ||
12 | */ | ||
13 | #include "cache.h" | ||
14 | |||
15 | static char bad_path[] = "/bad-path/"; | ||
16 | /* | ||
17 | * Two hacks: | ||
18 | */ | ||
19 | |||
20 | static char *get_perf_dir(void) | ||
21 | { | ||
22 | return "."; | ||
23 | } | ||
24 | |||
25 | size_t strlcpy(char *dest, const char *src, size_t size) | ||
26 | { | ||
27 | size_t ret = strlen(src); | ||
28 | |||
29 | if (size) { | ||
30 | size_t len = (ret >= size) ? size - 1 : ret; | ||
31 | memcpy(dest, src, len); | ||
32 | dest[len] = '\0'; | ||
33 | } | ||
34 | return ret; | ||
35 | } | ||
36 | |||
37 | |||
38 | static char *get_pathname(void) | ||
39 | { | ||
40 | static char pathname_array[4][PATH_MAX]; | ||
41 | static int index; | ||
42 | return pathname_array[3 & ++index]; | ||
43 | } | ||
44 | |||
45 | static char *cleanup_path(char *path) | ||
46 | { | ||
47 | /* Clean it up */ | ||
48 | if (!memcmp(path, "./", 2)) { | ||
49 | path += 2; | ||
50 | while (*path == '/') | ||
51 | path++; | ||
52 | } | ||
53 | return path; | ||
54 | } | ||
55 | |||
56 | char *mksnpath(char *buf, size_t n, const char *fmt, ...) | ||
57 | { | ||
58 | va_list args; | ||
59 | unsigned len; | ||
60 | |||
61 | va_start(args, fmt); | ||
62 | len = vsnprintf(buf, n, fmt, args); | ||
63 | va_end(args); | ||
64 | if (len >= n) { | ||
65 | strlcpy(buf, bad_path, n); | ||
66 | return buf; | ||
67 | } | ||
68 | return cleanup_path(buf); | ||
69 | } | ||
70 | |||
71 | static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) | ||
72 | { | ||
73 | const char *perf_dir = get_perf_dir(); | ||
74 | size_t len; | ||
75 | |||
76 | len = strlen(perf_dir); | ||
77 | if (n < len + 1) | ||
78 | goto bad; | ||
79 | memcpy(buf, perf_dir, len); | ||
80 | if (len && !is_dir_sep(perf_dir[len-1])) | ||
81 | buf[len++] = '/'; | ||
82 | len += vsnprintf(buf + len, n - len, fmt, args); | ||
83 | if (len >= n) | ||
84 | goto bad; | ||
85 | return cleanup_path(buf); | ||
86 | bad: | ||
87 | strlcpy(buf, bad_path, n); | ||
88 | return buf; | ||
89 | } | ||
90 | |||
91 | char *perf_snpath(char *buf, size_t n, const char *fmt, ...) | ||
92 | { | ||
93 | va_list args; | ||
94 | va_start(args, fmt); | ||
95 | (void)perf_vsnpath(buf, n, fmt, args); | ||
96 | va_end(args); | ||
97 | return buf; | ||
98 | } | ||
99 | |||
100 | char *perf_pathdup(const char *fmt, ...) | ||
101 | { | ||
102 | char path[PATH_MAX]; | ||
103 | va_list args; | ||
104 | va_start(args, fmt); | ||
105 | (void)perf_vsnpath(path, sizeof(path), fmt, args); | ||
106 | va_end(args); | ||
107 | return xstrdup(path); | ||
108 | } | ||
109 | |||
110 | char *mkpath(const char *fmt, ...) | ||
111 | { | ||
112 | va_list args; | ||
113 | unsigned len; | ||
114 | char *pathname = get_pathname(); | ||
115 | |||
116 | va_start(args, fmt); | ||
117 | len = vsnprintf(pathname, PATH_MAX, fmt, args); | ||
118 | va_end(args); | ||
119 | if (len >= PATH_MAX) | ||
120 | return bad_path; | ||
121 | return cleanup_path(pathname); | ||
122 | } | ||
123 | |||
124 | char *perf_path(const char *fmt, ...) | ||
125 | { | ||
126 | const char *perf_dir = get_perf_dir(); | ||
127 | char *pathname = get_pathname(); | ||
128 | va_list args; | ||
129 | unsigned len; | ||
130 | |||
131 | len = strlen(perf_dir); | ||
132 | if (len > PATH_MAX-100) | ||
133 | return bad_path; | ||
134 | memcpy(pathname, perf_dir, len); | ||
135 | if (len && perf_dir[len-1] != '/') | ||
136 | pathname[len++] = '/'; | ||
137 | va_start(args, fmt); | ||
138 | len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); | ||
139 | va_end(args); | ||
140 | if (len >= PATH_MAX) | ||
141 | return bad_path; | ||
142 | return cleanup_path(pathname); | ||
143 | } | ||
144 | |||
145 | |||
146 | /* perf_mkstemp() - create tmp file honoring TMPDIR variable */ | ||
147 | int perf_mkstemp(char *path, size_t len, const char *template) | ||
148 | { | ||
149 | const char *tmp; | ||
150 | size_t n; | ||
151 | |||
152 | tmp = getenv("TMPDIR"); | ||
153 | if (!tmp) | ||
154 | tmp = "/tmp"; | ||
155 | n = snprintf(path, len, "%s/%s", tmp, template); | ||
156 | if (len <= n) { | ||
157 | errno = ENAMETOOLONG; | ||
158 | return -1; | ||
159 | } | ||
160 | return mkstemp(path); | ||
161 | } | ||
162 | |||
163 | |||
164 | const char *make_relative_path(const char *abs, const char *base) | ||
165 | { | ||
166 | static char buf[PATH_MAX + 1]; | ||
167 | int baselen; | ||
168 | if (!base) | ||
169 | return abs; | ||
170 | baselen = strlen(base); | ||
171 | if (prefixcmp(abs, base)) | ||
172 | return abs; | ||
173 | if (abs[baselen] == '/') | ||
174 | baselen++; | ||
175 | else if (base[baselen - 1] != '/') | ||
176 | return abs; | ||
177 | strcpy(buf, abs + baselen); | ||
178 | return buf; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * It is okay if dst == src, but they should not overlap otherwise. | ||
183 | * | ||
184 | * Performs the following normalizations on src, storing the result in dst: | ||
185 | * - Ensures that components are separated by '/' (Windows only) | ||
186 | * - Squashes sequences of '/'. | ||
187 | * - Removes "." components. | ||
188 | * - Removes ".." components, and the components the precede them. | ||
189 | * Returns failure (non-zero) if a ".." component appears as first path | ||
190 | * component anytime during the normalization. Otherwise, returns success (0). | ||
191 | * | ||
192 | * Note that this function is purely textual. It does not follow symlinks, | ||
193 | * verify the existence of the path, or make any system calls. | ||
194 | */ | ||
195 | int normalize_path_copy(char *dst, const char *src) | ||
196 | { | ||
197 | char *dst0; | ||
198 | |||
199 | if (has_dos_drive_prefix(src)) { | ||
200 | *dst++ = *src++; | ||
201 | *dst++ = *src++; | ||
202 | } | ||
203 | dst0 = dst; | ||
204 | |||
205 | if (is_dir_sep(*src)) { | ||
206 | *dst++ = '/'; | ||
207 | while (is_dir_sep(*src)) | ||
208 | src++; | ||
209 | } | ||
210 | |||
211 | for (;;) { | ||
212 | char c = *src; | ||
213 | |||
214 | /* | ||
215 | * A path component that begins with . could be | ||
216 | * special: | ||
217 | * (1) "." and ends -- ignore and terminate. | ||
218 | * (2) "./" -- ignore them, eat slash and continue. | ||
219 | * (3) ".." and ends -- strip one and terminate. | ||
220 | * (4) "../" -- strip one, eat slash and continue. | ||
221 | */ | ||
222 | if (c == '.') { | ||
223 | if (!src[1]) { | ||
224 | /* (1) */ | ||
225 | src++; | ||
226 | } else if (is_dir_sep(src[1])) { | ||
227 | /* (2) */ | ||
228 | src += 2; | ||
229 | while (is_dir_sep(*src)) | ||
230 | src++; | ||
231 | continue; | ||
232 | } else if (src[1] == '.') { | ||
233 | if (!src[2]) { | ||
234 | /* (3) */ | ||
235 | src += 2; | ||
236 | goto up_one; | ||
237 | } else if (is_dir_sep(src[2])) { | ||
238 | /* (4) */ | ||
239 | src += 3; | ||
240 | while (is_dir_sep(*src)) | ||
241 | src++; | ||
242 | goto up_one; | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* copy up to the next '/', and eat all '/' */ | ||
248 | while ((c = *src++) != '\0' && !is_dir_sep(c)) | ||
249 | *dst++ = c; | ||
250 | if (is_dir_sep(c)) { | ||
251 | *dst++ = '/'; | ||
252 | while (is_dir_sep(c)) | ||
253 | c = *src++; | ||
254 | src--; | ||
255 | } else if (!c) | ||
256 | break; | ||
257 | continue; | ||
258 | |||
259 | up_one: | ||
260 | /* | ||
261 | * dst0..dst is prefix portion, and dst[-1] is '/'; | ||
262 | * go up one level. | ||
263 | */ | ||
264 | dst--; /* go to trailing '/' */ | ||
265 | if (dst <= dst0) | ||
266 | return -1; | ||
267 | /* Windows: dst[-1] cannot be backslash anymore */ | ||
268 | while (dst0 < dst && dst[-1] != '/') | ||
269 | dst--; | ||
270 | } | ||
271 | *dst = '\0'; | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * path = Canonical absolute path | ||
277 | * prefix_list = Colon-separated list of absolute paths | ||
278 | * | ||
279 | * Determines, for each path in prefix_list, whether the "prefix" really | ||
280 | * is an ancestor directory of path. Returns the length of the longest | ||
281 | * ancestor directory, excluding any trailing slashes, or -1 if no prefix | ||
282 | * is an ancestor. (Note that this means 0 is returned if prefix_list is | ||
283 | * "/".) "/foo" is not considered an ancestor of "/foobar". Directories | ||
284 | * are not considered to be their own ancestors. path must be in a | ||
285 | * canonical form: empty components, or "." or ".." components are not | ||
286 | * allowed. prefix_list may be null, which is like "". | ||
287 | */ | ||
288 | int longest_ancestor_length(const char *path, const char *prefix_list) | ||
289 | { | ||
290 | char buf[PATH_MAX+1]; | ||
291 | const char *ceil, *colon; | ||
292 | int len, max_len = -1; | ||
293 | |||
294 | if (prefix_list == NULL || !strcmp(path, "/")) | ||
295 | return -1; | ||
296 | |||
297 | for (colon = ceil = prefix_list; *colon; ceil = colon+1) { | ||
298 | for (colon = ceil; *colon && *colon != PATH_SEP; colon++); | ||
299 | len = colon - ceil; | ||
300 | if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) | ||
301 | continue; | ||
302 | strlcpy(buf, ceil, len+1); | ||
303 | if (normalize_path_copy(buf, buf) < 0) | ||
304 | continue; | ||
305 | len = strlen(buf); | ||
306 | if (len > 0 && buf[len-1] == '/') | ||
307 | buf[--len] = '\0'; | ||
308 | |||
309 | if (!strncmp(path, buf, len) && | ||
310 | path[len] == '/' && | ||
311 | len > max_len) { | ||
312 | max_len = len; | ||
313 | } | ||
314 | } | ||
315 | |||
316 | return max_len; | ||
317 | } | ||
318 | |||
319 | /* strip arbitrary amount of directory separators at end of path */ | ||
320 | static inline int chomp_trailing_dir_sep(const char *path, int len) | ||
321 | { | ||
322 | while (len && is_dir_sep(path[len - 1])) | ||
323 | len--; | ||
324 | return len; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * If path ends with suffix (complete path components), returns the | ||
329 | * part before suffix (sans trailing directory separators). | ||
330 | * Otherwise returns NULL. | ||
331 | */ | ||
332 | char *strip_path_suffix(const char *path, const char *suffix) | ||
333 | { | ||
334 | int path_len = strlen(path), suffix_len = strlen(suffix); | ||
335 | |||
336 | while (suffix_len) { | ||
337 | if (!path_len) | ||
338 | return NULL; | ||
339 | |||
340 | if (is_dir_sep(path[path_len - 1])) { | ||
341 | if (!is_dir_sep(suffix[suffix_len - 1])) | ||
342 | return NULL; | ||
343 | path_len = chomp_trailing_dir_sep(path, path_len); | ||
344 | suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); | ||
345 | } | ||
346 | else if (path[--path_len] != suffix[--suffix_len]) | ||
347 | return NULL; | ||
348 | } | ||
349 | |||
350 | if (path_len && !is_dir_sep(path[path_len - 1])) | ||
351 | return NULL; | ||
352 | return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); | ||
353 | } | ||
diff --git a/Documentation/perf_counter/util/quote.c b/Documentation/perf_counter/util/quote.c new file mode 100644 index 000000000000..7a49fcf69671 --- /dev/null +++ b/Documentation/perf_counter/util/quote.c | |||
@@ -0,0 +1,478 @@ | |||
1 | #include "cache.h" | ||
2 | #include "quote.h" | ||
3 | |||
4 | int quote_path_fully = 1; | ||
5 | |||
6 | /* Help to copy the thing properly quoted for the shell safety. | ||
7 | * any single quote is replaced with '\'', any exclamation point | ||
8 | * is replaced with '\!', and the whole thing is enclosed in a | ||
9 | * | ||
10 | * E.g. | ||
11 | * original sq_quote result | ||
12 | * name ==> name ==> 'name' | ||
13 | * a b ==> a b ==> 'a b' | ||
14 | * a'b ==> a'\''b ==> 'a'\''b' | ||
15 | * a!b ==> a'\!'b ==> 'a'\!'b' | ||
16 | */ | ||
17 | static inline int need_bs_quote(char c) | ||
18 | { | ||
19 | return (c == '\'' || c == '!'); | ||
20 | } | ||
21 | |||
22 | void sq_quote_buf(struct strbuf *dst, const char *src) | ||
23 | { | ||
24 | char *to_free = NULL; | ||
25 | |||
26 | if (dst->buf == src) | ||
27 | to_free = strbuf_detach(dst, NULL); | ||
28 | |||
29 | strbuf_addch(dst, '\''); | ||
30 | while (*src) { | ||
31 | size_t len = strcspn(src, "'!"); | ||
32 | strbuf_add(dst, src, len); | ||
33 | src += len; | ||
34 | while (need_bs_quote(*src)) { | ||
35 | strbuf_addstr(dst, "'\\"); | ||
36 | strbuf_addch(dst, *src++); | ||
37 | strbuf_addch(dst, '\''); | ||
38 | } | ||
39 | } | ||
40 | strbuf_addch(dst, '\''); | ||
41 | free(to_free); | ||
42 | } | ||
43 | |||
44 | void sq_quote_print(FILE *stream, const char *src) | ||
45 | { | ||
46 | char c; | ||
47 | |||
48 | fputc('\'', stream); | ||
49 | while ((c = *src++)) { | ||
50 | if (need_bs_quote(c)) { | ||
51 | fputs("'\\", stream); | ||
52 | fputc(c, stream); | ||
53 | fputc('\'', stream); | ||
54 | } else { | ||
55 | fputc(c, stream); | ||
56 | } | ||
57 | } | ||
58 | fputc('\'', stream); | ||
59 | } | ||
60 | |||
61 | void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) | ||
62 | { | ||
63 | int i; | ||
64 | |||
65 | /* Copy into destination buffer. */ | ||
66 | strbuf_grow(dst, 255); | ||
67 | for (i = 0; argv[i]; ++i) { | ||
68 | strbuf_addch(dst, ' '); | ||
69 | sq_quote_buf(dst, argv[i]); | ||
70 | if (maxlen && dst->len > maxlen) | ||
71 | die("Too many or long arguments"); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | char *sq_dequote_step(char *arg, char **next) | ||
76 | { | ||
77 | char *dst = arg; | ||
78 | char *src = arg; | ||
79 | char c; | ||
80 | |||
81 | if (*src != '\'') | ||
82 | return NULL; | ||
83 | for (;;) { | ||
84 | c = *++src; | ||
85 | if (!c) | ||
86 | return NULL; | ||
87 | if (c != '\'') { | ||
88 | *dst++ = c; | ||
89 | continue; | ||
90 | } | ||
91 | /* We stepped out of sq */ | ||
92 | switch (*++src) { | ||
93 | case '\0': | ||
94 | *dst = 0; | ||
95 | if (next) | ||
96 | *next = NULL; | ||
97 | return arg; | ||
98 | case '\\': | ||
99 | c = *++src; | ||
100 | if (need_bs_quote(c) && *++src == '\'') { | ||
101 | *dst++ = c; | ||
102 | continue; | ||
103 | } | ||
104 | /* Fallthrough */ | ||
105 | default: | ||
106 | if (!next || !isspace(*src)) | ||
107 | return NULL; | ||
108 | do { | ||
109 | c = *++src; | ||
110 | } while (isspace(c)); | ||
111 | *dst = 0; | ||
112 | *next = src; | ||
113 | return arg; | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | |||
118 | char *sq_dequote(char *arg) | ||
119 | { | ||
120 | return sq_dequote_step(arg, NULL); | ||
121 | } | ||
122 | |||
123 | int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) | ||
124 | { | ||
125 | char *next = arg; | ||
126 | |||
127 | if (!*arg) | ||
128 | return 0; | ||
129 | do { | ||
130 | char *dequoted = sq_dequote_step(next, &next); | ||
131 | if (!dequoted) | ||
132 | return -1; | ||
133 | ALLOC_GROW(*argv, *nr + 1, *alloc); | ||
134 | (*argv)[(*nr)++] = dequoted; | ||
135 | } while (next); | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* 1 means: quote as octal | ||
141 | * 0 means: quote as octal if (quote_path_fully) | ||
142 | * -1 means: never quote | ||
143 | * c: quote as "\\c" | ||
144 | */ | ||
145 | #define X8(x) x, x, x, x, x, x, x, x | ||
146 | #define X16(x) X8(x), X8(x) | ||
147 | static signed char const sq_lookup[256] = { | ||
148 | /* 0 1 2 3 4 5 6 7 */ | ||
149 | /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', | ||
150 | /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, | ||
151 | /* 0x10 */ X16(1), | ||
152 | /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, | ||
153 | /* 0x28 */ X16(-1), X16(-1), X16(-1), | ||
154 | /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, | ||
155 | /* 0x60 */ X16(-1), X8(-1), | ||
156 | /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, | ||
157 | /* 0x80 */ /* set to 0 */ | ||
158 | }; | ||
159 | |||
160 | static inline int sq_must_quote(char c) | ||
161 | { | ||
162 | return sq_lookup[(unsigned char)c] + quote_path_fully > 0; | ||
163 | } | ||
164 | |||
165 | /* returns the longest prefix not needing a quote up to maxlen if positive. | ||
166 | This stops at the first \0 because it's marked as a character needing an | ||
167 | escape */ | ||
168 | static size_t next_quote_pos(const char *s, ssize_t maxlen) | ||
169 | { | ||
170 | size_t len; | ||
171 | if (maxlen < 0) { | ||
172 | for (len = 0; !sq_must_quote(s[len]); len++); | ||
173 | } else { | ||
174 | for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); | ||
175 | } | ||
176 | return len; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * C-style name quoting. | ||
181 | * | ||
182 | * (1) if sb and fp are both NULL, inspect the input name and counts the | ||
183 | * number of bytes that are needed to hold c_style quoted version of name, | ||
184 | * counting the double quotes around it but not terminating NUL, and | ||
185 | * returns it. | ||
186 | * However, if name does not need c_style quoting, it returns 0. | ||
187 | * | ||
188 | * (2) if sb or fp are not NULL, it emits the c_style quoted version | ||
189 | * of name, enclosed with double quotes if asked and needed only. | ||
190 | * Return value is the same as in (1). | ||
191 | */ | ||
192 | static size_t quote_c_style_counted(const char *name, ssize_t maxlen, | ||
193 | struct strbuf *sb, FILE *fp, int no_dq) | ||
194 | { | ||
195 | #undef EMIT | ||
196 | #define EMIT(c) \ | ||
197 | do { \ | ||
198 | if (sb) strbuf_addch(sb, (c)); \ | ||
199 | if (fp) fputc((c), fp); \ | ||
200 | count++; \ | ||
201 | } while (0) | ||
202 | #define EMITBUF(s, l) \ | ||
203 | do { \ | ||
204 | if (sb) strbuf_add(sb, (s), (l)); \ | ||
205 | if (fp) fwrite((s), (l), 1, fp); \ | ||
206 | count += (l); \ | ||
207 | } while (0) | ||
208 | |||
209 | size_t len, count = 0; | ||
210 | const char *p = name; | ||
211 | |||
212 | for (;;) { | ||
213 | int ch; | ||
214 | |||
215 | len = next_quote_pos(p, maxlen); | ||
216 | if (len == maxlen || !p[len]) | ||
217 | break; | ||
218 | |||
219 | if (!no_dq && p == name) | ||
220 | EMIT('"'); | ||
221 | |||
222 | EMITBUF(p, len); | ||
223 | EMIT('\\'); | ||
224 | p += len; | ||
225 | ch = (unsigned char)*p++; | ||
226 | if (sq_lookup[ch] >= ' ') { | ||
227 | EMIT(sq_lookup[ch]); | ||
228 | } else { | ||
229 | EMIT(((ch >> 6) & 03) + '0'); | ||
230 | EMIT(((ch >> 3) & 07) + '0'); | ||
231 | EMIT(((ch >> 0) & 07) + '0'); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | EMITBUF(p, len); | ||
236 | if (p == name) /* no ending quote needed */ | ||
237 | return 0; | ||
238 | |||
239 | if (!no_dq) | ||
240 | EMIT('"'); | ||
241 | return count; | ||
242 | } | ||
243 | |||
244 | size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) | ||
245 | { | ||
246 | return quote_c_style_counted(name, -1, sb, fp, nodq); | ||
247 | } | ||
248 | |||
249 | void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) | ||
250 | { | ||
251 | if (quote_c_style(prefix, NULL, NULL, 0) || | ||
252 | quote_c_style(path, NULL, NULL, 0)) { | ||
253 | if (!nodq) | ||
254 | strbuf_addch(sb, '"'); | ||
255 | quote_c_style(prefix, sb, NULL, 1); | ||
256 | quote_c_style(path, sb, NULL, 1); | ||
257 | if (!nodq) | ||
258 | strbuf_addch(sb, '"'); | ||
259 | } else { | ||
260 | strbuf_addstr(sb, prefix); | ||
261 | strbuf_addstr(sb, path); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | void write_name_quoted(const char *name, FILE *fp, int terminator) | ||
266 | { | ||
267 | if (terminator) { | ||
268 | quote_c_style(name, NULL, fp, 0); | ||
269 | } else { | ||
270 | fputs(name, fp); | ||
271 | } | ||
272 | fputc(terminator, fp); | ||
273 | } | ||
274 | |||
275 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
276 | const char *name, FILE *fp, int terminator) | ||
277 | { | ||
278 | int needquote = 0; | ||
279 | |||
280 | if (terminator) { | ||
281 | needquote = next_quote_pos(pfx, pfxlen) < pfxlen | ||
282 | || name[next_quote_pos(name, -1)]; | ||
283 | } | ||
284 | if (needquote) { | ||
285 | fputc('"', fp); | ||
286 | quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); | ||
287 | quote_c_style(name, NULL, fp, 1); | ||
288 | fputc('"', fp); | ||
289 | } else { | ||
290 | fwrite(pfx, pfxlen, 1, fp); | ||
291 | fputs(name, fp); | ||
292 | } | ||
293 | fputc(terminator, fp); | ||
294 | } | ||
295 | |||
296 | /* quote path as relative to the given prefix */ | ||
297 | char *quote_path_relative(const char *in, int len, | ||
298 | struct strbuf *out, const char *prefix) | ||
299 | { | ||
300 | int needquote; | ||
301 | |||
302 | if (len < 0) | ||
303 | len = strlen(in); | ||
304 | |||
305 | /* "../" prefix itself does not need quoting, but "in" might. */ | ||
306 | needquote = next_quote_pos(in, len) < len; | ||
307 | strbuf_setlen(out, 0); | ||
308 | strbuf_grow(out, len); | ||
309 | |||
310 | if (needquote) | ||
311 | strbuf_addch(out, '"'); | ||
312 | if (prefix) { | ||
313 | int off = 0; | ||
314 | while (prefix[off] && off < len && prefix[off] == in[off]) | ||
315 | if (prefix[off] == '/') { | ||
316 | prefix += off + 1; | ||
317 | in += off + 1; | ||
318 | len -= off + 1; | ||
319 | off = 0; | ||
320 | } else | ||
321 | off++; | ||
322 | |||
323 | for (; *prefix; prefix++) | ||
324 | if (*prefix == '/') | ||
325 | strbuf_addstr(out, "../"); | ||
326 | } | ||
327 | |||
328 | quote_c_style_counted (in, len, out, NULL, 1); | ||
329 | |||
330 | if (needquote) | ||
331 | strbuf_addch(out, '"'); | ||
332 | if (!out->len) | ||
333 | strbuf_addstr(out, "./"); | ||
334 | |||
335 | return out->buf; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * C-style name unquoting. | ||
340 | * | ||
341 | * Quoted should point at the opening double quote. | ||
342 | * + Returns 0 if it was able to unquote the string properly, and appends the | ||
343 | * result in the strbuf `sb'. | ||
344 | * + Returns -1 in case of error, and doesn't touch the strbuf. Though note | ||
345 | * that this function will allocate memory in the strbuf, so calling | ||
346 | * strbuf_release is mandatory whichever result unquote_c_style returns. | ||
347 | * | ||
348 | * Updates endp pointer to point at one past the ending double quote if given. | ||
349 | */ | ||
350 | int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) | ||
351 | { | ||
352 | size_t oldlen = sb->len, len; | ||
353 | int ch, ac; | ||
354 | |||
355 | if (*quoted++ != '"') | ||
356 | return -1; | ||
357 | |||
358 | for (;;) { | ||
359 | len = strcspn(quoted, "\"\\"); | ||
360 | strbuf_add(sb, quoted, len); | ||
361 | quoted += len; | ||
362 | |||
363 | switch (*quoted++) { | ||
364 | case '"': | ||
365 | if (endp) | ||
366 | *endp = quoted; | ||
367 | return 0; | ||
368 | case '\\': | ||
369 | break; | ||
370 | default: | ||
371 | goto error; | ||
372 | } | ||
373 | |||
374 | switch ((ch = *quoted++)) { | ||
375 | case 'a': ch = '\a'; break; | ||
376 | case 'b': ch = '\b'; break; | ||
377 | case 'f': ch = '\f'; break; | ||
378 | case 'n': ch = '\n'; break; | ||
379 | case 'r': ch = '\r'; break; | ||
380 | case 't': ch = '\t'; break; | ||
381 | case 'v': ch = '\v'; break; | ||
382 | |||
383 | case '\\': case '"': | ||
384 | break; /* verbatim */ | ||
385 | |||
386 | /* octal values with first digit over 4 overflow */ | ||
387 | case '0': case '1': case '2': case '3': | ||
388 | ac = ((ch - '0') << 6); | ||
389 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
390 | goto error; | ||
391 | ac |= ((ch - '0') << 3); | ||
392 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
393 | goto error; | ||
394 | ac |= (ch - '0'); | ||
395 | ch = ac; | ||
396 | break; | ||
397 | default: | ||
398 | goto error; | ||
399 | } | ||
400 | strbuf_addch(sb, ch); | ||
401 | } | ||
402 | |||
403 | error: | ||
404 | strbuf_setlen(sb, oldlen); | ||
405 | return -1; | ||
406 | } | ||
407 | |||
408 | /* quoting as a string literal for other languages */ | ||
409 | |||
410 | void perl_quote_print(FILE *stream, const char *src) | ||
411 | { | ||
412 | const char sq = '\''; | ||
413 | const char bq = '\\'; | ||
414 | char c; | ||
415 | |||
416 | fputc(sq, stream); | ||
417 | while ((c = *src++)) { | ||
418 | if (c == sq || c == bq) | ||
419 | fputc(bq, stream); | ||
420 | fputc(c, stream); | ||
421 | } | ||
422 | fputc(sq, stream); | ||
423 | } | ||
424 | |||
425 | void python_quote_print(FILE *stream, const char *src) | ||
426 | { | ||
427 | const char sq = '\''; | ||
428 | const char bq = '\\'; | ||
429 | const char nl = '\n'; | ||
430 | char c; | ||
431 | |||
432 | fputc(sq, stream); | ||
433 | while ((c = *src++)) { | ||
434 | if (c == nl) { | ||
435 | fputc(bq, stream); | ||
436 | fputc('n', stream); | ||
437 | continue; | ||
438 | } | ||
439 | if (c == sq || c == bq) | ||
440 | fputc(bq, stream); | ||
441 | fputc(c, stream); | ||
442 | } | ||
443 | fputc(sq, stream); | ||
444 | } | ||
445 | |||
446 | void tcl_quote_print(FILE *stream, const char *src) | ||
447 | { | ||
448 | char c; | ||
449 | |||
450 | fputc('"', stream); | ||
451 | while ((c = *src++)) { | ||
452 | switch (c) { | ||
453 | case '[': case ']': | ||
454 | case '{': case '}': | ||
455 | case '$': case '\\': case '"': | ||
456 | fputc('\\', stream); | ||
457 | default: | ||
458 | fputc(c, stream); | ||
459 | break; | ||
460 | case '\f': | ||
461 | fputs("\\f", stream); | ||
462 | break; | ||
463 | case '\r': | ||
464 | fputs("\\r", stream); | ||
465 | break; | ||
466 | case '\n': | ||
467 | fputs("\\n", stream); | ||
468 | break; | ||
469 | case '\t': | ||
470 | fputs("\\t", stream); | ||
471 | break; | ||
472 | case '\v': | ||
473 | fputs("\\v", stream); | ||
474 | break; | ||
475 | } | ||
476 | } | ||
477 | fputc('"', stream); | ||
478 | } | ||
diff --git a/Documentation/perf_counter/util/quote.h b/Documentation/perf_counter/util/quote.h new file mode 100644 index 000000000000..5dfad89816db --- /dev/null +++ b/Documentation/perf_counter/util/quote.h | |||
@@ -0,0 +1,68 @@ | |||
1 | #ifndef QUOTE_H | ||
2 | #define QUOTE_H | ||
3 | |||
4 | #include <stddef.h> | ||
5 | #include <stdio.h> | ||
6 | |||
7 | /* Help to copy the thing properly quoted for the shell safety. | ||
8 | * any single quote is replaced with '\'', any exclamation point | ||
9 | * is replaced with '\!', and the whole thing is enclosed in a | ||
10 | * single quote pair. | ||
11 | * | ||
12 | * For example, if you are passing the result to system() as an | ||
13 | * argument: | ||
14 | * | ||
15 | * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) | ||
16 | * | ||
17 | * would be appropriate. If the system() is going to call ssh to | ||
18 | * run the command on the other side: | ||
19 | * | ||
20 | * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); | ||
21 | * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); | ||
22 | * | ||
23 | * Note that the above examples leak memory! Remember to free result from | ||
24 | * sq_quote() in a real application. | ||
25 | * | ||
26 | * sq_quote_buf() writes to an existing buffer of specified size; it | ||
27 | * will return the number of characters that would have been written | ||
28 | * excluding the final null regardless of the buffer size. | ||
29 | */ | ||
30 | |||
31 | extern void sq_quote_print(FILE *stream, const char *src); | ||
32 | |||
33 | extern void sq_quote_buf(struct strbuf *, const char *src); | ||
34 | extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); | ||
35 | |||
36 | /* This unwraps what sq_quote() produces in place, but returns | ||
37 | * NULL if the input does not look like what sq_quote would have | ||
38 | * produced. | ||
39 | */ | ||
40 | extern char *sq_dequote(char *); | ||
41 | |||
42 | /* | ||
43 | * Same as the above, but can be used to unwrap many arguments in the | ||
44 | * same string separated by space. "next" is changed to point to the | ||
45 | * next argument that should be passed as first parameter. When there | ||
46 | * is no more argument to be dequoted, "next" is updated to point to NULL. | ||
47 | */ | ||
48 | extern char *sq_dequote_step(char *arg, char **next); | ||
49 | extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); | ||
50 | |||
51 | extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); | ||
52 | extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); | ||
53 | extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); | ||
54 | |||
55 | extern void write_name_quoted(const char *name, FILE *, int terminator); | ||
56 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
57 | const char *name, FILE *, int terminator); | ||
58 | |||
59 | /* quote path as relative to the given prefix */ | ||
60 | char *quote_path_relative(const char *in, int len, | ||
61 | struct strbuf *out, const char *prefix); | ||
62 | |||
63 | /* quoting as a string literal for other languages */ | ||
64 | extern void perl_quote_print(FILE *stream, const char *src); | ||
65 | extern void python_quote_print(FILE *stream, const char *src); | ||
66 | extern void tcl_quote_print(FILE *stream, const char *src); | ||
67 | |||
68 | #endif | ||
diff --git a/Documentation/perf_counter/util/run-command.c b/Documentation/perf_counter/util/run-command.c new file mode 100644 index 000000000000..b2f5e854f40a --- /dev/null +++ b/Documentation/perf_counter/util/run-command.c | |||
@@ -0,0 +1,395 @@ | |||
1 | #include "cache.h" | ||
2 | #include "run-command.h" | ||
3 | #include "exec_cmd.h" | ||
4 | |||
5 | static inline void close_pair(int fd[2]) | ||
6 | { | ||
7 | close(fd[0]); | ||
8 | close(fd[1]); | ||
9 | } | ||
10 | |||
11 | static inline void dup_devnull(int to) | ||
12 | { | ||
13 | int fd = open("/dev/null", O_RDWR); | ||
14 | dup2(fd, to); | ||
15 | close(fd); | ||
16 | } | ||
17 | |||
18 | int start_command(struct child_process *cmd) | ||
19 | { | ||
20 | int need_in, need_out, need_err; | ||
21 | int fdin[2], fdout[2], fderr[2]; | ||
22 | |||
23 | /* | ||
24 | * In case of errors we must keep the promise to close FDs | ||
25 | * that have been passed in via ->in and ->out. | ||
26 | */ | ||
27 | |||
28 | need_in = !cmd->no_stdin && cmd->in < 0; | ||
29 | if (need_in) { | ||
30 | if (pipe(fdin) < 0) { | ||
31 | if (cmd->out > 0) | ||
32 | close(cmd->out); | ||
33 | return -ERR_RUN_COMMAND_PIPE; | ||
34 | } | ||
35 | cmd->in = fdin[1]; | ||
36 | } | ||
37 | |||
38 | need_out = !cmd->no_stdout | ||
39 | && !cmd->stdout_to_stderr | ||
40 | && cmd->out < 0; | ||
41 | if (need_out) { | ||
42 | if (pipe(fdout) < 0) { | ||
43 | if (need_in) | ||
44 | close_pair(fdin); | ||
45 | else if (cmd->in) | ||
46 | close(cmd->in); | ||
47 | return -ERR_RUN_COMMAND_PIPE; | ||
48 | } | ||
49 | cmd->out = fdout[0]; | ||
50 | } | ||
51 | |||
52 | need_err = !cmd->no_stderr && cmd->err < 0; | ||
53 | if (need_err) { | ||
54 | if (pipe(fderr) < 0) { | ||
55 | if (need_in) | ||
56 | close_pair(fdin); | ||
57 | else if (cmd->in) | ||
58 | close(cmd->in); | ||
59 | if (need_out) | ||
60 | close_pair(fdout); | ||
61 | else if (cmd->out) | ||
62 | close(cmd->out); | ||
63 | return -ERR_RUN_COMMAND_PIPE; | ||
64 | } | ||
65 | cmd->err = fderr[0]; | ||
66 | } | ||
67 | |||
68 | #ifndef __MINGW32__ | ||
69 | fflush(NULL); | ||
70 | cmd->pid = fork(); | ||
71 | if (!cmd->pid) { | ||
72 | if (cmd->no_stdin) | ||
73 | dup_devnull(0); | ||
74 | else if (need_in) { | ||
75 | dup2(fdin[0], 0); | ||
76 | close_pair(fdin); | ||
77 | } else if (cmd->in) { | ||
78 | dup2(cmd->in, 0); | ||
79 | close(cmd->in); | ||
80 | } | ||
81 | |||
82 | if (cmd->no_stderr) | ||
83 | dup_devnull(2); | ||
84 | else if (need_err) { | ||
85 | dup2(fderr[1], 2); | ||
86 | close_pair(fderr); | ||
87 | } | ||
88 | |||
89 | if (cmd->no_stdout) | ||
90 | dup_devnull(1); | ||
91 | else if (cmd->stdout_to_stderr) | ||
92 | dup2(2, 1); | ||
93 | else if (need_out) { | ||
94 | dup2(fdout[1], 1); | ||
95 | close_pair(fdout); | ||
96 | } else if (cmd->out > 1) { | ||
97 | dup2(cmd->out, 1); | ||
98 | close(cmd->out); | ||
99 | } | ||
100 | |||
101 | if (cmd->dir && chdir(cmd->dir)) | ||
102 | die("exec %s: cd to %s failed (%s)", cmd->argv[0], | ||
103 | cmd->dir, strerror(errno)); | ||
104 | if (cmd->env) { | ||
105 | for (; *cmd->env; cmd->env++) { | ||
106 | if (strchr(*cmd->env, '=')) | ||
107 | putenv((char*)*cmd->env); | ||
108 | else | ||
109 | unsetenv(*cmd->env); | ||
110 | } | ||
111 | } | ||
112 | if (cmd->preexec_cb) | ||
113 | cmd->preexec_cb(); | ||
114 | if (cmd->perf_cmd) { | ||
115 | execv_perf_cmd(cmd->argv); | ||
116 | } else { | ||
117 | execvp(cmd->argv[0], (char *const*) cmd->argv); | ||
118 | } | ||
119 | exit(127); | ||
120 | } | ||
121 | #else | ||
122 | int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ | ||
123 | const char **sargv = cmd->argv; | ||
124 | char **env = environ; | ||
125 | |||
126 | if (cmd->no_stdin) { | ||
127 | s0 = dup(0); | ||
128 | dup_devnull(0); | ||
129 | } else if (need_in) { | ||
130 | s0 = dup(0); | ||
131 | dup2(fdin[0], 0); | ||
132 | } else if (cmd->in) { | ||
133 | s0 = dup(0); | ||
134 | dup2(cmd->in, 0); | ||
135 | } | ||
136 | |||
137 | if (cmd->no_stderr) { | ||
138 | s2 = dup(2); | ||
139 | dup_devnull(2); | ||
140 | } else if (need_err) { | ||
141 | s2 = dup(2); | ||
142 | dup2(fderr[1], 2); | ||
143 | } | ||
144 | |||
145 | if (cmd->no_stdout) { | ||
146 | s1 = dup(1); | ||
147 | dup_devnull(1); | ||
148 | } else if (cmd->stdout_to_stderr) { | ||
149 | s1 = dup(1); | ||
150 | dup2(2, 1); | ||
151 | } else if (need_out) { | ||
152 | s1 = dup(1); | ||
153 | dup2(fdout[1], 1); | ||
154 | } else if (cmd->out > 1) { | ||
155 | s1 = dup(1); | ||
156 | dup2(cmd->out, 1); | ||
157 | } | ||
158 | |||
159 | if (cmd->dir) | ||
160 | die("chdir in start_command() not implemented"); | ||
161 | if (cmd->env) { | ||
162 | env = copy_environ(); | ||
163 | for (; *cmd->env; cmd->env++) | ||
164 | env = env_setenv(env, *cmd->env); | ||
165 | } | ||
166 | |||
167 | if (cmd->perf_cmd) { | ||
168 | cmd->argv = prepare_perf_cmd(cmd->argv); | ||
169 | } | ||
170 | |||
171 | cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); | ||
172 | |||
173 | if (cmd->env) | ||
174 | free_environ(env); | ||
175 | if (cmd->perf_cmd) | ||
176 | free(cmd->argv); | ||
177 | |||
178 | cmd->argv = sargv; | ||
179 | if (s0 >= 0) | ||
180 | dup2(s0, 0), close(s0); | ||
181 | if (s1 >= 0) | ||
182 | dup2(s1, 1), close(s1); | ||
183 | if (s2 >= 0) | ||
184 | dup2(s2, 2), close(s2); | ||
185 | #endif | ||
186 | |||
187 | if (cmd->pid < 0) { | ||
188 | int err = errno; | ||
189 | if (need_in) | ||
190 | close_pair(fdin); | ||
191 | else if (cmd->in) | ||
192 | close(cmd->in); | ||
193 | if (need_out) | ||
194 | close_pair(fdout); | ||
195 | else if (cmd->out) | ||
196 | close(cmd->out); | ||
197 | if (need_err) | ||
198 | close_pair(fderr); | ||
199 | return err == ENOENT ? | ||
200 | -ERR_RUN_COMMAND_EXEC : | ||
201 | -ERR_RUN_COMMAND_FORK; | ||
202 | } | ||
203 | |||
204 | if (need_in) | ||
205 | close(fdin[0]); | ||
206 | else if (cmd->in) | ||
207 | close(cmd->in); | ||
208 | |||
209 | if (need_out) | ||
210 | close(fdout[1]); | ||
211 | else if (cmd->out) | ||
212 | close(cmd->out); | ||
213 | |||
214 | if (need_err) | ||
215 | close(fderr[1]); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static int wait_or_whine(pid_t pid) | ||
221 | { | ||
222 | for (;;) { | ||
223 | int status, code; | ||
224 | pid_t waiting = waitpid(pid, &status, 0); | ||
225 | |||
226 | if (waiting < 0) { | ||
227 | if (errno == EINTR) | ||
228 | continue; | ||
229 | error("waitpid failed (%s)", strerror(errno)); | ||
230 | return -ERR_RUN_COMMAND_WAITPID; | ||
231 | } | ||
232 | if (waiting != pid) | ||
233 | return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; | ||
234 | if (WIFSIGNALED(status)) | ||
235 | return -ERR_RUN_COMMAND_WAITPID_SIGNAL; | ||
236 | |||
237 | if (!WIFEXITED(status)) | ||
238 | return -ERR_RUN_COMMAND_WAITPID_NOEXIT; | ||
239 | code = WEXITSTATUS(status); | ||
240 | switch (code) { | ||
241 | case 127: | ||
242 | return -ERR_RUN_COMMAND_EXEC; | ||
243 | case 0: | ||
244 | return 0; | ||
245 | default: | ||
246 | return -code; | ||
247 | } | ||
248 | } | ||
249 | } | ||
250 | |||
251 | int finish_command(struct child_process *cmd) | ||
252 | { | ||
253 | return wait_or_whine(cmd->pid); | ||
254 | } | ||
255 | |||
256 | int run_command(struct child_process *cmd) | ||
257 | { | ||
258 | int code = start_command(cmd); | ||
259 | if (code) | ||
260 | return code; | ||
261 | return finish_command(cmd); | ||
262 | } | ||
263 | |||
264 | static void prepare_run_command_v_opt(struct child_process *cmd, | ||
265 | const char **argv, | ||
266 | int opt) | ||
267 | { | ||
268 | memset(cmd, 0, sizeof(*cmd)); | ||
269 | cmd->argv = argv; | ||
270 | cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; | ||
271 | cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; | ||
272 | cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; | ||
273 | } | ||
274 | |||
275 | int run_command_v_opt(const char **argv, int opt) | ||
276 | { | ||
277 | struct child_process cmd; | ||
278 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
279 | return run_command(&cmd); | ||
280 | } | ||
281 | |||
282 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) | ||
283 | { | ||
284 | struct child_process cmd; | ||
285 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
286 | cmd.dir = dir; | ||
287 | cmd.env = env; | ||
288 | return run_command(&cmd); | ||
289 | } | ||
290 | |||
291 | #ifdef __MINGW32__ | ||
292 | static __stdcall unsigned run_thread(void *data) | ||
293 | { | ||
294 | struct async *async = data; | ||
295 | return async->proc(async->fd_for_proc, async->data); | ||
296 | } | ||
297 | #endif | ||
298 | |||
299 | int start_async(struct async *async) | ||
300 | { | ||
301 | int pipe_out[2]; | ||
302 | |||
303 | if (pipe(pipe_out) < 0) | ||
304 | return error("cannot create pipe: %s", strerror(errno)); | ||
305 | async->out = pipe_out[0]; | ||
306 | |||
307 | #ifndef __MINGW32__ | ||
308 | /* Flush stdio before fork() to avoid cloning buffers */ | ||
309 | fflush(NULL); | ||
310 | |||
311 | async->pid = fork(); | ||
312 | if (async->pid < 0) { | ||
313 | error("fork (async) failed: %s", strerror(errno)); | ||
314 | close_pair(pipe_out); | ||
315 | return -1; | ||
316 | } | ||
317 | if (!async->pid) { | ||
318 | close(pipe_out[0]); | ||
319 | exit(!!async->proc(pipe_out[1], async->data)); | ||
320 | } | ||
321 | close(pipe_out[1]); | ||
322 | #else | ||
323 | async->fd_for_proc = pipe_out[1]; | ||
324 | async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); | ||
325 | if (!async->tid) { | ||
326 | error("cannot create thread: %s", strerror(errno)); | ||
327 | close_pair(pipe_out); | ||
328 | return -1; | ||
329 | } | ||
330 | #endif | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | int finish_async(struct async *async) | ||
335 | { | ||
336 | #ifndef __MINGW32__ | ||
337 | int ret = 0; | ||
338 | |||
339 | if (wait_or_whine(async->pid)) | ||
340 | ret = error("waitpid (async) failed"); | ||
341 | #else | ||
342 | DWORD ret = 0; | ||
343 | if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) | ||
344 | ret = error("waiting for thread failed: %lu", GetLastError()); | ||
345 | else if (!GetExitCodeThread(async->tid, &ret)) | ||
346 | ret = error("cannot get thread exit code: %lu", GetLastError()); | ||
347 | CloseHandle(async->tid); | ||
348 | #endif | ||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | int run_hook(const char *index_file, const char *name, ...) | ||
353 | { | ||
354 | struct child_process hook; | ||
355 | const char **argv = NULL, *env[2]; | ||
356 | char index[PATH_MAX]; | ||
357 | va_list args; | ||
358 | int ret; | ||
359 | size_t i = 0, alloc = 0; | ||
360 | |||
361 | if (access(perf_path("hooks/%s", name), X_OK) < 0) | ||
362 | return 0; | ||
363 | |||
364 | va_start(args, name); | ||
365 | ALLOC_GROW(argv, i + 1, alloc); | ||
366 | argv[i++] = perf_path("hooks/%s", name); | ||
367 | while (argv[i-1]) { | ||
368 | ALLOC_GROW(argv, i + 1, alloc); | ||
369 | argv[i++] = va_arg(args, const char *); | ||
370 | } | ||
371 | va_end(args); | ||
372 | |||
373 | memset(&hook, 0, sizeof(hook)); | ||
374 | hook.argv = argv; | ||
375 | hook.no_stdin = 1; | ||
376 | hook.stdout_to_stderr = 1; | ||
377 | if (index_file) { | ||
378 | snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); | ||
379 | env[0] = index; | ||
380 | env[1] = NULL; | ||
381 | hook.env = env; | ||
382 | } | ||
383 | |||
384 | ret = start_command(&hook); | ||
385 | free(argv); | ||
386 | if (ret) { | ||
387 | warning("Could not spawn %s", argv[0]); | ||
388 | return ret; | ||
389 | } | ||
390 | ret = finish_command(&hook); | ||
391 | if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) | ||
392 | warning("%s exited due to uncaught signal", argv[0]); | ||
393 | |||
394 | return ret; | ||
395 | } | ||
diff --git a/Documentation/perf_counter/util/run-command.h b/Documentation/perf_counter/util/run-command.h new file mode 100644 index 000000000000..328289f23669 --- /dev/null +++ b/Documentation/perf_counter/util/run-command.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef RUN_COMMAND_H | ||
2 | #define RUN_COMMAND_H | ||
3 | |||
4 | enum { | ||
5 | ERR_RUN_COMMAND_FORK = 10000, | ||
6 | ERR_RUN_COMMAND_EXEC, | ||
7 | ERR_RUN_COMMAND_PIPE, | ||
8 | ERR_RUN_COMMAND_WAITPID, | ||
9 | ERR_RUN_COMMAND_WAITPID_WRONG_PID, | ||
10 | ERR_RUN_COMMAND_WAITPID_SIGNAL, | ||
11 | ERR_RUN_COMMAND_WAITPID_NOEXIT, | ||
12 | }; | ||
13 | #define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) | ||
14 | |||
15 | struct child_process { | ||
16 | const char **argv; | ||
17 | pid_t pid; | ||
18 | /* | ||
19 | * Using .in, .out, .err: | ||
20 | * - Specify 0 for no redirections (child inherits stdin, stdout, | ||
21 | * stderr from parent). | ||
22 | * - Specify -1 to have a pipe allocated as follows: | ||
23 | * .in: returns the writable pipe end; parent writes to it, | ||
24 | * the readable pipe end becomes child's stdin | ||
25 | * .out, .err: returns the readable pipe end; parent reads from | ||
26 | * it, the writable pipe end becomes child's stdout/stderr | ||
27 | * The caller of start_command() must close the returned FDs | ||
28 | * after it has completed reading from/writing to it! | ||
29 | * - Specify > 0 to set a channel to a particular FD as follows: | ||
30 | * .in: a readable FD, becomes child's stdin | ||
31 | * .out: a writable FD, becomes child's stdout/stderr | ||
32 | * .err > 0 not supported | ||
33 | * The specified FD is closed by start_command(), even in case | ||
34 | * of errors! | ||
35 | */ | ||
36 | int in; | ||
37 | int out; | ||
38 | int err; | ||
39 | const char *dir; | ||
40 | const char *const *env; | ||
41 | unsigned no_stdin:1; | ||
42 | unsigned no_stdout:1; | ||
43 | unsigned no_stderr:1; | ||
44 | unsigned perf_cmd:1; /* if this is to be perf sub-command */ | ||
45 | unsigned stdout_to_stderr:1; | ||
46 | void (*preexec_cb)(void); | ||
47 | }; | ||
48 | |||
49 | int start_command(struct child_process *); | ||
50 | int finish_command(struct child_process *); | ||
51 | int run_command(struct child_process *); | ||
52 | |||
53 | extern int run_hook(const char *index_file, const char *name, ...); | ||
54 | |||
55 | #define RUN_COMMAND_NO_STDIN 1 | ||
56 | #define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ | ||
57 | #define RUN_COMMAND_STDOUT_TO_STDERR 4 | ||
58 | int run_command_v_opt(const char **argv, int opt); | ||
59 | |||
60 | /* | ||
61 | * env (the environment) is to be formatted like environ: "VAR=VALUE". | ||
62 | * To unset an environment variable use just "VAR". | ||
63 | */ | ||
64 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); | ||
65 | |||
66 | /* | ||
67 | * The purpose of the following functions is to feed a pipe by running | ||
68 | * a function asynchronously and providing output that the caller reads. | ||
69 | * | ||
70 | * It is expected that no synchronization and mutual exclusion between | ||
71 | * the caller and the feed function is necessary so that the function | ||
72 | * can run in a thread without interfering with the caller. | ||
73 | */ | ||
74 | struct async { | ||
75 | /* | ||
76 | * proc writes to fd and closes it; | ||
77 | * returns 0 on success, non-zero on failure | ||
78 | */ | ||
79 | int (*proc)(int fd, void *data); | ||
80 | void *data; | ||
81 | int out; /* caller reads from here and closes it */ | ||
82 | #ifndef __MINGW32__ | ||
83 | pid_t pid; | ||
84 | #else | ||
85 | HANDLE tid; | ||
86 | int fd_for_proc; | ||
87 | #endif | ||
88 | }; | ||
89 | |||
90 | int start_async(struct async *async); | ||
91 | int finish_async(struct async *async); | ||
92 | |||
93 | #endif | ||
diff --git a/Documentation/perf_counter/util/strbuf.c b/Documentation/perf_counter/util/strbuf.c new file mode 100644 index 000000000000..eaba09306802 --- /dev/null +++ b/Documentation/perf_counter/util/strbuf.c | |||
@@ -0,0 +1,359 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | int prefixcmp(const char *str, const char *prefix) | ||
4 | { | ||
5 | for (; ; str++, prefix++) | ||
6 | if (!*prefix) | ||
7 | return 0; | ||
8 | else if (*str != *prefix) | ||
9 | return (unsigned char)*prefix - (unsigned char)*str; | ||
10 | } | ||
11 | |||
12 | /* | ||
13 | * Used as the default ->buf value, so that people can always assume | ||
14 | * buf is non NULL and ->buf is NUL terminated even for a freshly | ||
15 | * initialized strbuf. | ||
16 | */ | ||
17 | char strbuf_slopbuf[1]; | ||
18 | |||
19 | void strbuf_init(struct strbuf *sb, size_t hint) | ||
20 | { | ||
21 | sb->alloc = sb->len = 0; | ||
22 | sb->buf = strbuf_slopbuf; | ||
23 | if (hint) | ||
24 | strbuf_grow(sb, hint); | ||
25 | } | ||
26 | |||
27 | void strbuf_release(struct strbuf *sb) | ||
28 | { | ||
29 | if (sb->alloc) { | ||
30 | free(sb->buf); | ||
31 | strbuf_init(sb, 0); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | char *strbuf_detach(struct strbuf *sb, size_t *sz) | ||
36 | { | ||
37 | char *res = sb->alloc ? sb->buf : NULL; | ||
38 | if (sz) | ||
39 | *sz = sb->len; | ||
40 | strbuf_init(sb, 0); | ||
41 | return res; | ||
42 | } | ||
43 | |||
44 | void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) | ||
45 | { | ||
46 | strbuf_release(sb); | ||
47 | sb->buf = buf; | ||
48 | sb->len = len; | ||
49 | sb->alloc = alloc; | ||
50 | strbuf_grow(sb, 0); | ||
51 | sb->buf[sb->len] = '\0'; | ||
52 | } | ||
53 | |||
54 | void strbuf_grow(struct strbuf *sb, size_t extra) | ||
55 | { | ||
56 | if (sb->len + extra + 1 <= sb->len) | ||
57 | die("you want to use way too much memory"); | ||
58 | if (!sb->alloc) | ||
59 | sb->buf = NULL; | ||
60 | ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); | ||
61 | } | ||
62 | |||
63 | void strbuf_trim(struct strbuf *sb) | ||
64 | { | ||
65 | char *b = sb->buf; | ||
66 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
67 | sb->len--; | ||
68 | while (sb->len > 0 && isspace(*b)) { | ||
69 | b++; | ||
70 | sb->len--; | ||
71 | } | ||
72 | memmove(sb->buf, b, sb->len); | ||
73 | sb->buf[sb->len] = '\0'; | ||
74 | } | ||
75 | void strbuf_rtrim(struct strbuf *sb) | ||
76 | { | ||
77 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
78 | sb->len--; | ||
79 | sb->buf[sb->len] = '\0'; | ||
80 | } | ||
81 | |||
82 | void strbuf_ltrim(struct strbuf *sb) | ||
83 | { | ||
84 | char *b = sb->buf; | ||
85 | while (sb->len > 0 && isspace(*b)) { | ||
86 | b++; | ||
87 | sb->len--; | ||
88 | } | ||
89 | memmove(sb->buf, b, sb->len); | ||
90 | sb->buf[sb->len] = '\0'; | ||
91 | } | ||
92 | |||
93 | void strbuf_tolower(struct strbuf *sb) | ||
94 | { | ||
95 | int i; | ||
96 | for (i = 0; i < sb->len; i++) | ||
97 | sb->buf[i] = tolower(sb->buf[i]); | ||
98 | } | ||
99 | |||
100 | struct strbuf **strbuf_split(const struct strbuf *sb, int delim) | ||
101 | { | ||
102 | int alloc = 2, pos = 0; | ||
103 | char *n, *p; | ||
104 | struct strbuf **ret; | ||
105 | struct strbuf *t; | ||
106 | |||
107 | ret = calloc(alloc, sizeof(struct strbuf *)); | ||
108 | p = n = sb->buf; | ||
109 | while (n < sb->buf + sb->len) { | ||
110 | int len; | ||
111 | n = memchr(n, delim, sb->len - (n - sb->buf)); | ||
112 | if (pos + 1 >= alloc) { | ||
113 | alloc = alloc * 2; | ||
114 | ret = realloc(ret, sizeof(struct strbuf *) * alloc); | ||
115 | } | ||
116 | if (!n) | ||
117 | n = sb->buf + sb->len - 1; | ||
118 | len = n - p + 1; | ||
119 | t = malloc(sizeof(struct strbuf)); | ||
120 | strbuf_init(t, len); | ||
121 | strbuf_add(t, p, len); | ||
122 | ret[pos] = t; | ||
123 | ret[++pos] = NULL; | ||
124 | p = ++n; | ||
125 | } | ||
126 | return ret; | ||
127 | } | ||
128 | |||
129 | void strbuf_list_free(struct strbuf **sbs) | ||
130 | { | ||
131 | struct strbuf **s = sbs; | ||
132 | |||
133 | while (*s) { | ||
134 | strbuf_release(*s); | ||
135 | free(*s++); | ||
136 | } | ||
137 | free(sbs); | ||
138 | } | ||
139 | |||
140 | int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) | ||
141 | { | ||
142 | int len = a->len < b->len ? a->len: b->len; | ||
143 | int cmp = memcmp(a->buf, b->buf, len); | ||
144 | if (cmp) | ||
145 | return cmp; | ||
146 | return a->len < b->len ? -1: a->len != b->len; | ||
147 | } | ||
148 | |||
149 | void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, | ||
150 | const void *data, size_t dlen) | ||
151 | { | ||
152 | if (pos + len < pos) | ||
153 | die("you want to use way too much memory"); | ||
154 | if (pos > sb->len) | ||
155 | die("`pos' is too far after the end of the buffer"); | ||
156 | if (pos + len > sb->len) | ||
157 | die("`pos + len' is too far after the end of the buffer"); | ||
158 | |||
159 | if (dlen >= len) | ||
160 | strbuf_grow(sb, dlen - len); | ||
161 | memmove(sb->buf + pos + dlen, | ||
162 | sb->buf + pos + len, | ||
163 | sb->len - pos - len); | ||
164 | memcpy(sb->buf + pos, data, dlen); | ||
165 | strbuf_setlen(sb, sb->len + dlen - len); | ||
166 | } | ||
167 | |||
168 | void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) | ||
169 | { | ||
170 | strbuf_splice(sb, pos, 0, data, len); | ||
171 | } | ||
172 | |||
173 | void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) | ||
174 | { | ||
175 | strbuf_splice(sb, pos, len, NULL, 0); | ||
176 | } | ||
177 | |||
178 | void strbuf_add(struct strbuf *sb, const void *data, size_t len) | ||
179 | { | ||
180 | strbuf_grow(sb, len); | ||
181 | memcpy(sb->buf + sb->len, data, len); | ||
182 | strbuf_setlen(sb, sb->len + len); | ||
183 | } | ||
184 | |||
185 | void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) | ||
186 | { | ||
187 | strbuf_grow(sb, len); | ||
188 | memcpy(sb->buf + sb->len, sb->buf + pos, len); | ||
189 | strbuf_setlen(sb, sb->len + len); | ||
190 | } | ||
191 | |||
192 | void strbuf_addf(struct strbuf *sb, const char *fmt, ...) | ||
193 | { | ||
194 | int len; | ||
195 | va_list ap; | ||
196 | |||
197 | if (!strbuf_avail(sb)) | ||
198 | strbuf_grow(sb, 64); | ||
199 | va_start(ap, fmt); | ||
200 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
201 | va_end(ap); | ||
202 | if (len < 0) | ||
203 | die("your vsnprintf is broken"); | ||
204 | if (len > strbuf_avail(sb)) { | ||
205 | strbuf_grow(sb, len); | ||
206 | va_start(ap, fmt); | ||
207 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
208 | va_end(ap); | ||
209 | if (len > strbuf_avail(sb)) { | ||
210 | die("this should not happen, your snprintf is broken"); | ||
211 | } | ||
212 | } | ||
213 | strbuf_setlen(sb, sb->len + len); | ||
214 | } | ||
215 | |||
216 | void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, | ||
217 | void *context) | ||
218 | { | ||
219 | for (;;) { | ||
220 | const char *percent; | ||
221 | size_t consumed; | ||
222 | |||
223 | percent = strchrnul(format, '%'); | ||
224 | strbuf_add(sb, format, percent - format); | ||
225 | if (!*percent) | ||
226 | break; | ||
227 | format = percent + 1; | ||
228 | |||
229 | consumed = fn(sb, format, context); | ||
230 | if (consumed) | ||
231 | format += consumed; | ||
232 | else | ||
233 | strbuf_addch(sb, '%'); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, | ||
238 | void *context) | ||
239 | { | ||
240 | struct strbuf_expand_dict_entry *e = context; | ||
241 | size_t len; | ||
242 | |||
243 | for (; e->placeholder && (len = strlen(e->placeholder)); e++) { | ||
244 | if (!strncmp(placeholder, e->placeholder, len)) { | ||
245 | if (e->value) | ||
246 | strbuf_addstr(sb, e->value); | ||
247 | return len; | ||
248 | } | ||
249 | } | ||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) | ||
254 | { | ||
255 | size_t res; | ||
256 | size_t oldalloc = sb->alloc; | ||
257 | |||
258 | strbuf_grow(sb, size); | ||
259 | res = fread(sb->buf + sb->len, 1, size, f); | ||
260 | if (res > 0) | ||
261 | strbuf_setlen(sb, sb->len + res); | ||
262 | else if (res < 0 && oldalloc == 0) | ||
263 | strbuf_release(sb); | ||
264 | return res; | ||
265 | } | ||
266 | |||
267 | ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) | ||
268 | { | ||
269 | size_t oldlen = sb->len; | ||
270 | size_t oldalloc = sb->alloc; | ||
271 | |||
272 | strbuf_grow(sb, hint ? hint : 8192); | ||
273 | for (;;) { | ||
274 | ssize_t cnt; | ||
275 | |||
276 | cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); | ||
277 | if (cnt < 0) { | ||
278 | if (oldalloc == 0) | ||
279 | strbuf_release(sb); | ||
280 | else | ||
281 | strbuf_setlen(sb, oldlen); | ||
282 | return -1; | ||
283 | } | ||
284 | if (!cnt) | ||
285 | break; | ||
286 | sb->len += cnt; | ||
287 | strbuf_grow(sb, 8192); | ||
288 | } | ||
289 | |||
290 | sb->buf[sb->len] = '\0'; | ||
291 | return sb->len - oldlen; | ||
292 | } | ||
293 | |||
294 | #define STRBUF_MAXLINK (2*PATH_MAX) | ||
295 | |||
296 | int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) | ||
297 | { | ||
298 | size_t oldalloc = sb->alloc; | ||
299 | |||
300 | if (hint < 32) | ||
301 | hint = 32; | ||
302 | |||
303 | while (hint < STRBUF_MAXLINK) { | ||
304 | int len; | ||
305 | |||
306 | strbuf_grow(sb, hint); | ||
307 | len = readlink(path, sb->buf, hint); | ||
308 | if (len < 0) { | ||
309 | if (errno != ERANGE) | ||
310 | break; | ||
311 | } else if (len < hint) { | ||
312 | strbuf_setlen(sb, len); | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* .. the buffer was too small - try again */ | ||
317 | hint *= 2; | ||
318 | } | ||
319 | if (oldalloc == 0) | ||
320 | strbuf_release(sb); | ||
321 | return -1; | ||
322 | } | ||
323 | |||
324 | int strbuf_getline(struct strbuf *sb, FILE *fp, int term) | ||
325 | { | ||
326 | int ch; | ||
327 | |||
328 | strbuf_grow(sb, 0); | ||
329 | if (feof(fp)) | ||
330 | return EOF; | ||
331 | |||
332 | strbuf_reset(sb); | ||
333 | while ((ch = fgetc(fp)) != EOF) { | ||
334 | if (ch == term) | ||
335 | break; | ||
336 | strbuf_grow(sb, 1); | ||
337 | sb->buf[sb->len++] = ch; | ||
338 | } | ||
339 | if (ch == EOF && sb->len == 0) | ||
340 | return EOF; | ||
341 | |||
342 | sb->buf[sb->len] = '\0'; | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) | ||
347 | { | ||
348 | int fd, len; | ||
349 | |||
350 | fd = open(path, O_RDONLY); | ||
351 | if (fd < 0) | ||
352 | return -1; | ||
353 | len = strbuf_read(sb, fd, hint); | ||
354 | close(fd); | ||
355 | if (len < 0) | ||
356 | return -1; | ||
357 | |||
358 | return len; | ||
359 | } | ||
diff --git a/Documentation/perf_counter/util/strbuf.h b/Documentation/perf_counter/util/strbuf.h new file mode 100644 index 000000000000..9ee908a3ec5d --- /dev/null +++ b/Documentation/perf_counter/util/strbuf.h | |||
@@ -0,0 +1,137 @@ | |||
1 | #ifndef STRBUF_H | ||
2 | #define STRBUF_H | ||
3 | |||
4 | /* | ||
5 | * Strbuf's can be use in many ways: as a byte array, or to store arbitrary | ||
6 | * long, overflow safe strings. | ||
7 | * | ||
8 | * Strbufs has some invariants that are very important to keep in mind: | ||
9 | * | ||
10 | * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to | ||
11 | * build complex strings/buffers whose final size isn't easily known. | ||
12 | * | ||
13 | * It is NOT legal to copy the ->buf pointer away. | ||
14 | * `strbuf_detach' is the operation that detachs a buffer from its shell | ||
15 | * while keeping the shell valid wrt its invariants. | ||
16 | * | ||
17 | * 2. the ->buf member is a byte array that has at least ->len + 1 bytes | ||
18 | * allocated. The extra byte is used to store a '\0', allowing the ->buf | ||
19 | * member to be a valid C-string. Every strbuf function ensure this | ||
20 | * invariant is preserved. | ||
21 | * | ||
22 | * Note that it is OK to "play" with the buffer directly if you work it | ||
23 | * that way: | ||
24 | * | ||
25 | * strbuf_grow(sb, SOME_SIZE); | ||
26 | * ... Here, the memory array starting at sb->buf, and of length | ||
27 | * ... strbuf_avail(sb) is all yours, and you are sure that | ||
28 | * ... strbuf_avail(sb) is at least SOME_SIZE. | ||
29 | * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); | ||
30 | * | ||
31 | * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). | ||
32 | * | ||
33 | * Doing so is safe, though if it has to be done in many places, adding the | ||
34 | * missing API to the strbuf module is the way to go. | ||
35 | * | ||
36 | * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 | ||
37 | * even if it's true in the current implementation. Alloc is somehow a | ||
38 | * "private" member that should not be messed with. | ||
39 | */ | ||
40 | |||
41 | #include <assert.h> | ||
42 | |||
43 | extern char strbuf_slopbuf[]; | ||
44 | struct strbuf { | ||
45 | size_t alloc; | ||
46 | size_t len; | ||
47 | char *buf; | ||
48 | }; | ||
49 | |||
50 | #define STRBUF_INIT { 0, 0, strbuf_slopbuf } | ||
51 | |||
52 | /*----- strbuf life cycle -----*/ | ||
53 | extern void strbuf_init(struct strbuf *, size_t); | ||
54 | extern void strbuf_release(struct strbuf *); | ||
55 | extern char *strbuf_detach(struct strbuf *, size_t *); | ||
56 | extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); | ||
57 | static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { | ||
58 | struct strbuf tmp = *a; | ||
59 | *a = *b; | ||
60 | *b = tmp; | ||
61 | } | ||
62 | |||
63 | /*----- strbuf size related -----*/ | ||
64 | static inline size_t strbuf_avail(const struct strbuf *sb) { | ||
65 | return sb->alloc ? sb->alloc - sb->len - 1 : 0; | ||
66 | } | ||
67 | |||
68 | extern void strbuf_grow(struct strbuf *, size_t); | ||
69 | |||
70 | static inline void strbuf_setlen(struct strbuf *sb, size_t len) { | ||
71 | if (!sb->alloc) | ||
72 | strbuf_grow(sb, 0); | ||
73 | assert(len < sb->alloc); | ||
74 | sb->len = len; | ||
75 | sb->buf[len] = '\0'; | ||
76 | } | ||
77 | #define strbuf_reset(sb) strbuf_setlen(sb, 0) | ||
78 | |||
79 | /*----- content related -----*/ | ||
80 | extern void strbuf_trim(struct strbuf *); | ||
81 | extern void strbuf_rtrim(struct strbuf *); | ||
82 | extern void strbuf_ltrim(struct strbuf *); | ||
83 | extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); | ||
84 | extern void strbuf_tolower(struct strbuf *); | ||
85 | |||
86 | extern struct strbuf **strbuf_split(const struct strbuf *, int delim); | ||
87 | extern void strbuf_list_free(struct strbuf **); | ||
88 | |||
89 | /*----- add data in your buffer -----*/ | ||
90 | static inline void strbuf_addch(struct strbuf *sb, int c) { | ||
91 | strbuf_grow(sb, 1); | ||
92 | sb->buf[sb->len++] = c; | ||
93 | sb->buf[sb->len] = '\0'; | ||
94 | } | ||
95 | |||
96 | extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); | ||
97 | extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); | ||
98 | |||
99 | /* splice pos..pos+len with given data */ | ||
100 | extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, | ||
101 | const void *, size_t); | ||
102 | |||
103 | extern void strbuf_add(struct strbuf *, const void *, size_t); | ||
104 | static inline void strbuf_addstr(struct strbuf *sb, const char *s) { | ||
105 | strbuf_add(sb, s, strlen(s)); | ||
106 | } | ||
107 | static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { | ||
108 | strbuf_add(sb, sb2->buf, sb2->len); | ||
109 | } | ||
110 | extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); | ||
111 | |||
112 | typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); | ||
113 | extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); | ||
114 | struct strbuf_expand_dict_entry { | ||
115 | const char *placeholder; | ||
116 | const char *value; | ||
117 | }; | ||
118 | extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); | ||
119 | |||
120 | __attribute__((format(printf,2,3))) | ||
121 | extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); | ||
122 | |||
123 | extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); | ||
124 | /* XXX: if read fails, any partial read is undone */ | ||
125 | extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); | ||
126 | extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); | ||
127 | extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); | ||
128 | |||
129 | extern int strbuf_getline(struct strbuf *, FILE *, int); | ||
130 | |||
131 | extern void stripspace(struct strbuf *buf, int skip_comments); | ||
132 | extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); | ||
133 | |||
134 | extern int strbuf_branchname(struct strbuf *sb, const char *name); | ||
135 | extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); | ||
136 | |||
137 | #endif /* STRBUF_H */ | ||
diff --git a/Documentation/perf_counter/util/usage.c b/Documentation/perf_counter/util/usage.c new file mode 100644 index 000000000000..7a10421fe6b4 --- /dev/null +++ b/Documentation/perf_counter/util/usage.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | */ | ||
6 | #include "util.h" | ||
7 | |||
8 | static void report(const char *prefix, const char *err, va_list params) | ||
9 | { | ||
10 | char msg[1024]; | ||
11 | vsnprintf(msg, sizeof(msg), err, params); | ||
12 | fprintf(stderr, "%s%s\n", prefix, msg); | ||
13 | } | ||
14 | |||
15 | static NORETURN void usage_builtin(const char *err) | ||
16 | { | ||
17 | fprintf(stderr, "usage: %s\n", err); | ||
18 | exit(129); | ||
19 | } | ||
20 | |||
21 | static NORETURN void die_builtin(const char *err, va_list params) | ||
22 | { | ||
23 | report("fatal: ", err, params); | ||
24 | exit(128); | ||
25 | } | ||
26 | |||
27 | static void error_builtin(const char *err, va_list params) | ||
28 | { | ||
29 | report("error: ", err, params); | ||
30 | } | ||
31 | |||
32 | static void warn_builtin(const char *warn, va_list params) | ||
33 | { | ||
34 | report("warning: ", warn, params); | ||
35 | } | ||
36 | |||
37 | /* If we are in a dlopen()ed .so write to a global variable would segfault | ||
38 | * (ugh), so keep things static. */ | ||
39 | static void (*usage_routine)(const char *err) NORETURN = usage_builtin; | ||
40 | static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; | ||
41 | static void (*error_routine)(const char *err, va_list params) = error_builtin; | ||
42 | static void (*warn_routine)(const char *err, va_list params) = warn_builtin; | ||
43 | |||
44 | void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) | ||
45 | { | ||
46 | die_routine = routine; | ||
47 | } | ||
48 | |||
49 | void usage(const char *err) | ||
50 | { | ||
51 | usage_routine(err); | ||
52 | } | ||
53 | |||
54 | void die(const char *err, ...) | ||
55 | { | ||
56 | va_list params; | ||
57 | |||
58 | va_start(params, err); | ||
59 | die_routine(err, params); | ||
60 | va_end(params); | ||
61 | } | ||
62 | |||
63 | int error(const char *err, ...) | ||
64 | { | ||
65 | va_list params; | ||
66 | |||
67 | va_start(params, err); | ||
68 | error_routine(err, params); | ||
69 | va_end(params); | ||
70 | return -1; | ||
71 | } | ||
72 | |||
73 | void warning(const char *warn, ...) | ||
74 | { | ||
75 | va_list params; | ||
76 | |||
77 | va_start(params, warn); | ||
78 | warn_routine(warn, params); | ||
79 | va_end(params); | ||
80 | } | ||
diff --git a/Documentation/perf_counter/util/util.h b/Documentation/perf_counter/util/util.h new file mode 100644 index 000000000000..36e40c38e093 --- /dev/null +++ b/Documentation/perf_counter/util/util.h | |||
@@ -0,0 +1,408 @@ | |||
1 | #ifndef GIT_COMPAT_UTIL_H | ||
2 | #define GIT_COMPAT_UTIL_H | ||
3 | |||
4 | #define _FILE_OFFSET_BITS 64 | ||
5 | |||
6 | #ifndef FLEX_ARRAY | ||
7 | /* | ||
8 | * See if our compiler is known to support flexible array members. | ||
9 | */ | ||
10 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) | ||
11 | # define FLEX_ARRAY /* empty */ | ||
12 | #elif defined(__GNUC__) | ||
13 | # if (__GNUC__ >= 3) | ||
14 | # define FLEX_ARRAY /* empty */ | ||
15 | # else | ||
16 | # define FLEX_ARRAY 0 /* older GNU extension */ | ||
17 | # endif | ||
18 | #endif | ||
19 | |||
20 | /* | ||
21 | * Otherwise, default to safer but a bit wasteful traditional style | ||
22 | */ | ||
23 | #ifndef FLEX_ARRAY | ||
24 | # define FLEX_ARRAY 1 | ||
25 | #endif | ||
26 | #endif | ||
27 | |||
28 | #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) | ||
29 | |||
30 | #ifdef __GNUC__ | ||
31 | #define TYPEOF(x) (__typeof__(x)) | ||
32 | #else | ||
33 | #define TYPEOF(x) | ||
34 | #endif | ||
35 | |||
36 | #define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) | ||
37 | #define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ | ||
38 | |||
39 | /* Approximation of the length of the decimal representation of this type. */ | ||
40 | #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) | ||
41 | |||
42 | #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) | ||
43 | #define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ | ||
44 | #define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ | ||
45 | #endif | ||
46 | #define _ALL_SOURCE 1 | ||
47 | #define _GNU_SOURCE 1 | ||
48 | #define _BSD_SOURCE 1 | ||
49 | |||
50 | #include <unistd.h> | ||
51 | #include <stdio.h> | ||
52 | #include <sys/stat.h> | ||
53 | #include <fcntl.h> | ||
54 | #include <stddef.h> | ||
55 | #include <stdlib.h> | ||
56 | #include <stdarg.h> | ||
57 | #include <string.h> | ||
58 | #include <errno.h> | ||
59 | #include <limits.h> | ||
60 | #include <sys/param.h> | ||
61 | #include <sys/types.h> | ||
62 | #include <dirent.h> | ||
63 | #include <sys/time.h> | ||
64 | #include <time.h> | ||
65 | #include <signal.h> | ||
66 | #include <fnmatch.h> | ||
67 | #include <assert.h> | ||
68 | #include <regex.h> | ||
69 | #include <utime.h> | ||
70 | #ifndef __MINGW32__ | ||
71 | #include <sys/wait.h> | ||
72 | #include <sys/poll.h> | ||
73 | #include <sys/socket.h> | ||
74 | #include <sys/ioctl.h> | ||
75 | #ifndef NO_SYS_SELECT_H | ||
76 | #include <sys/select.h> | ||
77 | #endif | ||
78 | #include <netinet/in.h> | ||
79 | #include <netinet/tcp.h> | ||
80 | #include <arpa/inet.h> | ||
81 | #include <netdb.h> | ||
82 | #include <pwd.h> | ||
83 | #include <inttypes.h> | ||
84 | #if defined(__CYGWIN__) | ||
85 | #undef _XOPEN_SOURCE | ||
86 | #include <grp.h> | ||
87 | #define _XOPEN_SOURCE 600 | ||
88 | #include "compat/cygwin.h" | ||
89 | #else | ||
90 | #undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ | ||
91 | #include <grp.h> | ||
92 | #define _ALL_SOURCE 1 | ||
93 | #endif | ||
94 | #else /* __MINGW32__ */ | ||
95 | /* pull in Windows compatibility stuff */ | ||
96 | #include "compat/mingw.h" | ||
97 | #endif /* __MINGW32__ */ | ||
98 | |||
99 | #ifndef NO_ICONV | ||
100 | #include <iconv.h> | ||
101 | #endif | ||
102 | |||
103 | #ifndef NO_OPENSSL | ||
104 | #include <openssl/ssl.h> | ||
105 | #include <openssl/err.h> | ||
106 | #endif | ||
107 | |||
108 | /* On most systems <limits.h> would have given us this, but | ||
109 | * not on some systems (e.g. GNU/Hurd). | ||
110 | */ | ||
111 | #ifndef PATH_MAX | ||
112 | #define PATH_MAX 4096 | ||
113 | #endif | ||
114 | |||
115 | #ifndef PRIuMAX | ||
116 | #define PRIuMAX "llu" | ||
117 | #endif | ||
118 | |||
119 | #ifndef PRIu32 | ||
120 | #define PRIu32 "u" | ||
121 | #endif | ||
122 | |||
123 | #ifndef PRIx32 | ||
124 | #define PRIx32 "x" | ||
125 | #endif | ||
126 | |||
127 | #ifndef PATH_SEP | ||
128 | #define PATH_SEP ':' | ||
129 | #endif | ||
130 | |||
131 | #ifndef STRIP_EXTENSION | ||
132 | #define STRIP_EXTENSION "" | ||
133 | #endif | ||
134 | |||
135 | #ifndef has_dos_drive_prefix | ||
136 | #define has_dos_drive_prefix(path) 0 | ||
137 | #endif | ||
138 | |||
139 | #ifndef is_dir_sep | ||
140 | #define is_dir_sep(c) ((c) == '/') | ||
141 | #endif | ||
142 | |||
143 | #ifdef __GNUC__ | ||
144 | #define NORETURN __attribute__((__noreturn__)) | ||
145 | #else | ||
146 | #define NORETURN | ||
147 | #ifndef __attribute__ | ||
148 | #define __attribute__(x) | ||
149 | #endif | ||
150 | #endif | ||
151 | |||
152 | /* General helper functions */ | ||
153 | extern void usage(const char *err) NORETURN; | ||
154 | extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); | ||
155 | extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); | ||
156 | extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); | ||
157 | |||
158 | extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); | ||
159 | |||
160 | extern int prefixcmp(const char *str, const char *prefix); | ||
161 | extern time_t tm_to_time_t(const struct tm *tm); | ||
162 | |||
163 | static inline const char *skip_prefix(const char *str, const char *prefix) | ||
164 | { | ||
165 | size_t len = strlen(prefix); | ||
166 | return strncmp(str, prefix, len) ? NULL : str + len; | ||
167 | } | ||
168 | |||
169 | #if defined(NO_MMAP) || defined(USE_WIN32_MMAP) | ||
170 | |||
171 | #ifndef PROT_READ | ||
172 | #define PROT_READ 1 | ||
173 | #define PROT_WRITE 2 | ||
174 | #define MAP_PRIVATE 1 | ||
175 | #define MAP_FAILED ((void*)-1) | ||
176 | #endif | ||
177 | |||
178 | #define mmap git_mmap | ||
179 | #define munmap git_munmap | ||
180 | extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); | ||
181 | extern int git_munmap(void *start, size_t length); | ||
182 | |||
183 | #else /* NO_MMAP || USE_WIN32_MMAP */ | ||
184 | |||
185 | #include <sys/mman.h> | ||
186 | |||
187 | #endif /* NO_MMAP || USE_WIN32_MMAP */ | ||
188 | |||
189 | #ifdef NO_MMAP | ||
190 | |||
191 | /* This value must be multiple of (pagesize * 2) */ | ||
192 | #define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) | ||
193 | |||
194 | #else /* NO_MMAP */ | ||
195 | |||
196 | /* This value must be multiple of (pagesize * 2) */ | ||
197 | #define DEFAULT_PACKED_GIT_WINDOW_SIZE \ | ||
198 | (sizeof(void*) >= 8 \ | ||
199 | ? 1 * 1024 * 1024 * 1024 \ | ||
200 | : 32 * 1024 * 1024) | ||
201 | |||
202 | #endif /* NO_MMAP */ | ||
203 | |||
204 | #ifdef NO_ST_BLOCKS_IN_STRUCT_STAT | ||
205 | #define on_disk_bytes(st) ((st).st_size) | ||
206 | #else | ||
207 | #define on_disk_bytes(st) ((st).st_blocks * 512) | ||
208 | #endif | ||
209 | |||
210 | #define DEFAULT_PACKED_GIT_LIMIT \ | ||
211 | ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) | ||
212 | |||
213 | #ifdef NO_PREAD | ||
214 | #define pread git_pread | ||
215 | extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); | ||
216 | #endif | ||
217 | /* | ||
218 | * Forward decl that will remind us if its twin in cache.h changes. | ||
219 | * This function is used in compat/pread.c. But we can't include | ||
220 | * cache.h there. | ||
221 | */ | ||
222 | extern ssize_t read_in_full(int fd, void *buf, size_t count); | ||
223 | |||
224 | #ifdef NO_SETENV | ||
225 | #define setenv gitsetenv | ||
226 | extern int gitsetenv(const char *, const char *, int); | ||
227 | #endif | ||
228 | |||
229 | #ifdef NO_MKDTEMP | ||
230 | #define mkdtemp gitmkdtemp | ||
231 | extern char *gitmkdtemp(char *); | ||
232 | #endif | ||
233 | |||
234 | #ifdef NO_UNSETENV | ||
235 | #define unsetenv gitunsetenv | ||
236 | extern void gitunsetenv(const char *); | ||
237 | #endif | ||
238 | |||
239 | #ifdef NO_STRCASESTR | ||
240 | #define strcasestr gitstrcasestr | ||
241 | extern char *gitstrcasestr(const char *haystack, const char *needle); | ||
242 | #endif | ||
243 | |||
244 | #ifdef NO_STRLCPY | ||
245 | #define strlcpy gitstrlcpy | ||
246 | extern size_t gitstrlcpy(char *, const char *, size_t); | ||
247 | #endif | ||
248 | |||
249 | #ifdef NO_STRTOUMAX | ||
250 | #define strtoumax gitstrtoumax | ||
251 | extern uintmax_t gitstrtoumax(const char *, char **, int); | ||
252 | #endif | ||
253 | |||
254 | #ifdef NO_HSTRERROR | ||
255 | #define hstrerror githstrerror | ||
256 | extern const char *githstrerror(int herror); | ||
257 | #endif | ||
258 | |||
259 | #ifdef NO_MEMMEM | ||
260 | #define memmem gitmemmem | ||
261 | void *gitmemmem(const void *haystack, size_t haystacklen, | ||
262 | const void *needle, size_t needlelen); | ||
263 | #endif | ||
264 | |||
265 | #ifdef FREAD_READS_DIRECTORIES | ||
266 | #ifdef fopen | ||
267 | #undef fopen | ||
268 | #endif | ||
269 | #define fopen(a,b) git_fopen(a,b) | ||
270 | extern FILE *git_fopen(const char*, const char*); | ||
271 | #endif | ||
272 | |||
273 | #ifdef SNPRINTF_RETURNS_BOGUS | ||
274 | #define snprintf git_snprintf | ||
275 | extern int git_snprintf(char *str, size_t maxsize, | ||
276 | const char *format, ...); | ||
277 | #define vsnprintf git_vsnprintf | ||
278 | extern int git_vsnprintf(char *str, size_t maxsize, | ||
279 | const char *format, va_list ap); | ||
280 | #endif | ||
281 | |||
282 | #ifdef __GLIBC_PREREQ | ||
283 | #if __GLIBC_PREREQ(2, 1) | ||
284 | #define HAVE_STRCHRNUL | ||
285 | #endif | ||
286 | #endif | ||
287 | |||
288 | #ifndef HAVE_STRCHRNUL | ||
289 | #define strchrnul gitstrchrnul | ||
290 | static inline char *gitstrchrnul(const char *s, int c) | ||
291 | { | ||
292 | while (*s && *s != c) | ||
293 | s++; | ||
294 | return (char *)s; | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | /* | ||
299 | * Wrappers: | ||
300 | */ | ||
301 | extern char *xstrdup(const char *str); | ||
302 | extern void *xmalloc(size_t size); | ||
303 | extern void *xmemdupz(const void *data, size_t len); | ||
304 | extern char *xstrndup(const char *str, size_t len); | ||
305 | extern void *xrealloc(void *ptr, size_t size); | ||
306 | extern void *xcalloc(size_t nmemb, size_t size); | ||
307 | extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); | ||
308 | extern ssize_t xread(int fd, void *buf, size_t len); | ||
309 | extern ssize_t xwrite(int fd, const void *buf, size_t len); | ||
310 | extern int xdup(int fd); | ||
311 | extern FILE *xfdopen(int fd, const char *mode); | ||
312 | static inline size_t xsize_t(off_t len) | ||
313 | { | ||
314 | return (size_t)len; | ||
315 | } | ||
316 | |||
317 | static inline int has_extension(const char *filename, const char *ext) | ||
318 | { | ||
319 | size_t len = strlen(filename); | ||
320 | size_t extlen = strlen(ext); | ||
321 | return len > extlen && !memcmp(filename + len - extlen, ext, extlen); | ||
322 | } | ||
323 | |||
324 | /* Sane ctype - no locale, and works with signed chars */ | ||
325 | #undef isascii | ||
326 | #undef isspace | ||
327 | #undef isdigit | ||
328 | #undef isalpha | ||
329 | #undef isalnum | ||
330 | #undef tolower | ||
331 | #undef toupper | ||
332 | extern unsigned char sane_ctype[256]; | ||
333 | #define GIT_SPACE 0x01 | ||
334 | #define GIT_DIGIT 0x02 | ||
335 | #define GIT_ALPHA 0x04 | ||
336 | #define GIT_GLOB_SPECIAL 0x08 | ||
337 | #define GIT_REGEX_SPECIAL 0x10 | ||
338 | #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) | ||
339 | #define isascii(x) (((x) & ~0x7f) == 0) | ||
340 | #define isspace(x) sane_istest(x,GIT_SPACE) | ||
341 | #define isdigit(x) sane_istest(x,GIT_DIGIT) | ||
342 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | ||
343 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | ||
344 | #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) | ||
345 | #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) | ||
346 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | ||
347 | #define toupper(x) sane_case((unsigned char)(x), 0) | ||
348 | |||
349 | static inline int sane_case(int x, int high) | ||
350 | { | ||
351 | if (sane_istest(x, GIT_ALPHA)) | ||
352 | x = (x & ~0x20) | high; | ||
353 | return x; | ||
354 | } | ||
355 | |||
356 | static inline int strtoul_ui(char const *s, int base, unsigned int *result) | ||
357 | { | ||
358 | unsigned long ul; | ||
359 | char *p; | ||
360 | |||
361 | errno = 0; | ||
362 | ul = strtoul(s, &p, base); | ||
363 | if (errno || *p || p == s || (unsigned int) ul != ul) | ||
364 | return -1; | ||
365 | *result = ul; | ||
366 | return 0; | ||
367 | } | ||
368 | |||
369 | static inline int strtol_i(char const *s, int base, int *result) | ||
370 | { | ||
371 | long ul; | ||
372 | char *p; | ||
373 | |||
374 | errno = 0; | ||
375 | ul = strtol(s, &p, base); | ||
376 | if (errno || *p || p == s || (int) ul != ul) | ||
377 | return -1; | ||
378 | *result = ul; | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | #ifdef INTERNAL_QSORT | ||
383 | void git_qsort(void *base, size_t nmemb, size_t size, | ||
384 | int(*compar)(const void *, const void *)); | ||
385 | #define qsort git_qsort | ||
386 | #endif | ||
387 | |||
388 | #ifndef DIR_HAS_BSD_GROUP_SEMANTICS | ||
389 | # define FORCE_DIR_SET_GID S_ISGID | ||
390 | #else | ||
391 | # define FORCE_DIR_SET_GID 0 | ||
392 | #endif | ||
393 | |||
394 | #ifdef NO_NSEC | ||
395 | #undef USE_NSEC | ||
396 | #define ST_CTIME_NSEC(st) 0 | ||
397 | #define ST_MTIME_NSEC(st) 0 | ||
398 | #else | ||
399 | #ifdef USE_ST_TIMESPEC | ||
400 | #define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) | ||
401 | #define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) | ||
402 | #else | ||
403 | #define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) | ||
404 | #define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) | ||
405 | #endif | ||
406 | #endif | ||
407 | |||
408 | #endif | ||
diff --git a/Documentation/perf_counter/util/wrapper.c b/Documentation/perf_counter/util/wrapper.c new file mode 100644 index 000000000000..6350d65f6d9e --- /dev/null +++ b/Documentation/perf_counter/util/wrapper.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * Various trivial helper wrappers around standard functions | ||
3 | */ | ||
4 | #include "cache.h" | ||
5 | |||
6 | /* | ||
7 | * There's no pack memory to release - but stay close to the Git | ||
8 | * version so wrap this away: | ||
9 | */ | ||
10 | static inline void release_pack_memory(size_t size, int flag) | ||
11 | { | ||
12 | } | ||
13 | |||
14 | char *xstrdup(const char *str) | ||
15 | { | ||
16 | char *ret = strdup(str); | ||
17 | if (!ret) { | ||
18 | release_pack_memory(strlen(str) + 1, -1); | ||
19 | ret = strdup(str); | ||
20 | if (!ret) | ||
21 | die("Out of memory, strdup failed"); | ||
22 | } | ||
23 | return ret; | ||
24 | } | ||
25 | |||
26 | void *xmalloc(size_t size) | ||
27 | { | ||
28 | void *ret = malloc(size); | ||
29 | if (!ret && !size) | ||
30 | ret = malloc(1); | ||
31 | if (!ret) { | ||
32 | release_pack_memory(size, -1); | ||
33 | ret = malloc(size); | ||
34 | if (!ret && !size) | ||
35 | ret = malloc(1); | ||
36 | if (!ret) | ||
37 | die("Out of memory, malloc failed"); | ||
38 | } | ||
39 | #ifdef XMALLOC_POISON | ||
40 | memset(ret, 0xA5, size); | ||
41 | #endif | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of | ||
47 | * "data" to the allocated memory, zero terminates the allocated memory, | ||
48 | * and returns a pointer to the allocated memory. If the allocation fails, | ||
49 | * the program dies. | ||
50 | */ | ||
51 | void *xmemdupz(const void *data, size_t len) | ||
52 | { | ||
53 | char *p = xmalloc(len + 1); | ||
54 | memcpy(p, data, len); | ||
55 | p[len] = '\0'; | ||
56 | return p; | ||
57 | } | ||
58 | |||
59 | char *xstrndup(const char *str, size_t len) | ||
60 | { | ||
61 | char *p = memchr(str, '\0', len); | ||
62 | return xmemdupz(str, p ? p - str : len); | ||
63 | } | ||
64 | |||
65 | void *xrealloc(void *ptr, size_t size) | ||
66 | { | ||
67 | void *ret = realloc(ptr, size); | ||
68 | if (!ret && !size) | ||
69 | ret = realloc(ptr, 1); | ||
70 | if (!ret) { | ||
71 | release_pack_memory(size, -1); | ||
72 | ret = realloc(ptr, size); | ||
73 | if (!ret && !size) | ||
74 | ret = realloc(ptr, 1); | ||
75 | if (!ret) | ||
76 | die("Out of memory, realloc failed"); | ||
77 | } | ||
78 | return ret; | ||
79 | } | ||
80 | |||
81 | void *xcalloc(size_t nmemb, size_t size) | ||
82 | { | ||
83 | void *ret = calloc(nmemb, size); | ||
84 | if (!ret && (!nmemb || !size)) | ||
85 | ret = calloc(1, 1); | ||
86 | if (!ret) { | ||
87 | release_pack_memory(nmemb * size, -1); | ||
88 | ret = calloc(nmemb, size); | ||
89 | if (!ret && (!nmemb || !size)) | ||
90 | ret = calloc(1, 1); | ||
91 | if (!ret) | ||
92 | die("Out of memory, calloc failed"); | ||
93 | } | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | void *xmmap(void *start, size_t length, | ||
98 | int prot, int flags, int fd, off_t offset) | ||
99 | { | ||
100 | void *ret = mmap(start, length, prot, flags, fd, offset); | ||
101 | if (ret == MAP_FAILED) { | ||
102 | if (!length) | ||
103 | return NULL; | ||
104 | release_pack_memory(length, fd); | ||
105 | ret = mmap(start, length, prot, flags, fd, offset); | ||
106 | if (ret == MAP_FAILED) | ||
107 | die("Out of memory? mmap failed: %s", strerror(errno)); | ||
108 | } | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * xread() is the same a read(), but it automatically restarts read() | ||
114 | * operations with a recoverable error (EAGAIN and EINTR). xread() | ||
115 | * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. | ||
116 | */ | ||
117 | ssize_t xread(int fd, void *buf, size_t len) | ||
118 | { | ||
119 | ssize_t nr; | ||
120 | while (1) { | ||
121 | nr = read(fd, buf, len); | ||
122 | if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) | ||
123 | continue; | ||
124 | return nr; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * xwrite() is the same a write(), but it automatically restarts write() | ||
130 | * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT | ||
131 | * GUARANTEE that "len" bytes is written even if the operation is successful. | ||
132 | */ | ||
133 | ssize_t xwrite(int fd, const void *buf, size_t len) | ||
134 | { | ||
135 | ssize_t nr; | ||
136 | while (1) { | ||
137 | nr = write(fd, buf, len); | ||
138 | if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) | ||
139 | continue; | ||
140 | return nr; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | ssize_t read_in_full(int fd, void *buf, size_t count) | ||
145 | { | ||
146 | char *p = buf; | ||
147 | ssize_t total = 0; | ||
148 | |||
149 | while (count > 0) { | ||
150 | ssize_t loaded = xread(fd, p, count); | ||
151 | if (loaded <= 0) | ||
152 | return total ? total : loaded; | ||
153 | count -= loaded; | ||
154 | p += loaded; | ||
155 | total += loaded; | ||
156 | } | ||
157 | |||
158 | return total; | ||
159 | } | ||
160 | |||
161 | ssize_t write_in_full(int fd, const void *buf, size_t count) | ||
162 | { | ||
163 | const char *p = buf; | ||
164 | ssize_t total = 0; | ||
165 | |||
166 | while (count > 0) { | ||
167 | ssize_t written = xwrite(fd, p, count); | ||
168 | if (written < 0) | ||
169 | return -1; | ||
170 | if (!written) { | ||
171 | errno = ENOSPC; | ||
172 | return -1; | ||
173 | } | ||
174 | count -= written; | ||
175 | p += written; | ||
176 | total += written; | ||
177 | } | ||
178 | |||
179 | return total; | ||
180 | } | ||
181 | |||
182 | int xdup(int fd) | ||
183 | { | ||
184 | int ret = dup(fd); | ||
185 | if (ret < 0) | ||
186 | die("dup failed: %s", strerror(errno)); | ||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | FILE *xfdopen(int fd, const char *mode) | ||
191 | { | ||
192 | FILE *stream = fdopen(fd, mode); | ||
193 | if (stream == NULL) | ||
194 | die("Out of memory? fdopen failed: %s", strerror(errno)); | ||
195 | return stream; | ||
196 | } | ||
197 | |||
198 | int xmkstemp(char *template) | ||
199 | { | ||
200 | int fd; | ||
201 | |||
202 | fd = mkstemp(template); | ||
203 | if (fd < 0) | ||
204 | die("Unable to create temporary file: %s", strerror(errno)); | ||
205 | return fd; | ||
206 | } | ||
diff --git a/MAINTAINERS b/MAINTAINERS index c547f4a2bb62..5114b5341df4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4375,6 +4375,16 @@ S: Maintained | |||
4375 | F: include/linux/delayacct.h | 4375 | F: include/linux/delayacct.h |
4376 | F: kernel/delayacct.c | 4376 | F: kernel/delayacct.c |
4377 | 4377 | ||
4378 | PERFORMANCE COUNTER SUBSYSTEM | ||
4379 | P: Peter Zijlstra | ||
4380 | M: a.p.zijlstra@chello.nl | ||
4381 | P: Paul Mackerras | ||
4382 | M: paulus@samba.org | ||
4383 | P: Ingo Molnar | ||
4384 | M: mingo@elte.hu | ||
4385 | L: linux-kernel@vger.kernel.org | ||
4386 | S: Supported | ||
4387 | |||
4378 | PERSONALITY HANDLING | 4388 | PERSONALITY HANDLING |
4379 | P: Christoph Hellwig | 4389 | P: Christoph Hellwig |
4380 | M: hch@infradead.org | 4390 | M: hch@infradead.org |
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7e034b0a6dd..20a44d0c9fdd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h | |||
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags) | |||
131 | */ | 131 | */ |
132 | struct irq_chip; | 132 | struct irq_chip; |
133 | 133 | ||
134 | #ifdef CONFIG_PERF_COUNTERS | ||
135 | static inline unsigned long test_perf_counter_pending(void) | ||
136 | { | ||
137 | unsigned long x; | ||
138 | |||
139 | asm volatile("lbz %0,%1(13)" | ||
140 | : "=r" (x) | ||
141 | : "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
142 | return x; | ||
143 | } | ||
144 | |||
145 | static inline void set_perf_counter_pending(void) | ||
146 | { | ||
147 | asm volatile("stb %0,%1(13)" : : | ||
148 | "r" (1), | ||
149 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
150 | } | ||
151 | |||
152 | static inline void clear_perf_counter_pending(void) | ||
153 | { | ||
154 | asm volatile("stb %0,%1(13)" : : | ||
155 | "r" (0), | ||
156 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
157 | } | ||
158 | |||
159 | extern void perf_counter_do_pending(void); | ||
160 | |||
161 | #else | ||
162 | |||
163 | static inline unsigned long test_perf_counter_pending(void) | ||
164 | { | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static inline void set_perf_counter_pending(void) {} | ||
169 | static inline void clear_perf_counter_pending(void) {} | ||
170 | static inline void perf_counter_do_pending(void) {} | ||
171 | #endif /* CONFIG_PERF_COUNTERS */ | ||
172 | |||
134 | #endif /* __KERNEL__ */ | 173 | #endif /* __KERNEL__ */ |
135 | #endif /* _ASM_POWERPC_HW_IRQ_H */ | 174 | #endif /* _ASM_POWERPC_HW_IRQ_H */ |
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 082b3aedf145..6ef055723019 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -99,6 +99,7 @@ struct paca_struct { | |||
99 | u8 soft_enabled; /* irq soft-enable flag */ | 99 | u8 soft_enabled; /* irq soft-enable flag */ |
100 | u8 hard_enabled; /* set if irqs are enabled in MSR */ | 100 | u8 hard_enabled; /* set if irqs are enabled in MSR */ |
101 | u8 io_sync; /* writel() needs spin_unlock sync */ | 101 | u8 io_sync; /* writel() needs spin_unlock sync */ |
102 | u8 perf_counter_pending; /* PM interrupt while soft-disabled */ | ||
102 | 103 | ||
103 | /* Stuff for accurate time accounting */ | 104 | /* Stuff for accurate time accounting */ |
104 | u64 user_time; /* accumulated usermode TB ticks */ | 105 | u64 user_time; /* accumulated usermode TB ticks */ |
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h new file mode 100644 index 000000000000..9d7ff6d7fb56 --- /dev/null +++ b/arch/powerpc/include/asm/perf_counter.h | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * Performance counter support - PowerPC-specific definitions. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/types.h> | ||
12 | |||
13 | #define MAX_HWCOUNTERS 8 | ||
14 | #define MAX_EVENT_ALTERNATIVES 8 | ||
15 | |||
16 | /* | ||
17 | * This struct provides the constants and functions needed to | ||
18 | * describe the PMU on a particular POWER-family CPU. | ||
19 | */ | ||
20 | struct power_pmu { | ||
21 | int n_counter; | ||
22 | int max_alternatives; | ||
23 | u64 add_fields; | ||
24 | u64 test_adder; | ||
25 | int (*compute_mmcr)(unsigned int events[], int n_ev, | ||
26 | unsigned int hwc[], u64 mmcr[]); | ||
27 | int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); | ||
28 | int (*get_alternatives)(unsigned int event, unsigned int alt[]); | ||
29 | void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); | ||
30 | int n_generic; | ||
31 | int *generic_events; | ||
32 | }; | ||
33 | |||
34 | extern struct power_pmu *ppmu; | ||
35 | |||
36 | /* | ||
37 | * The power_pmu.get_constraint function returns a 64-bit value and | ||
38 | * a 64-bit mask that express the constraints between this event and | ||
39 | * other events. | ||
40 | * | ||
41 | * The value and mask are divided up into (non-overlapping) bitfields | ||
42 | * of three different types: | ||
43 | * | ||
44 | * Select field: this expresses the constraint that some set of bits | ||
45 | * in MMCR* needs to be set to a specific value for this event. For a | ||
46 | * select field, the mask contains 1s in every bit of the field, and | ||
47 | * the value contains a unique value for each possible setting of the | ||
48 | * MMCR* bits. The constraint checking code will ensure that two events | ||
49 | * that set the same field in their masks have the same value in their | ||
50 | * value dwords. | ||
51 | * | ||
52 | * Add field: this expresses the constraint that there can be at most | ||
53 | * N events in a particular class. A field of k bits can be used for | ||
54 | * N <= 2^(k-1) - 1. The mask has the most significant bit of the field | ||
55 | * set (and the other bits 0), and the value has only the least significant | ||
56 | * bit of the field set. In addition, the 'add_fields' and 'test_adder' | ||
57 | * in the struct power_pmu for this processor come into play. The | ||
58 | * add_fields value contains 1 in the LSB of the field, and the | ||
59 | * test_adder contains 2^(k-1) - 1 - N in the field. | ||
60 | * | ||
61 | * NAND field: this expresses the constraint that you may not have events | ||
62 | * in all of a set of classes. (For example, on PPC970, you can't select | ||
63 | * events from the FPU, ISU and IDU simultaneously, although any two are | ||
64 | * possible.) For N classes, the field is N+1 bits wide, and each class | ||
65 | * is assigned one bit from the least-significant N bits. The mask has | ||
66 | * only the most-significant bit set, and the value has only the bit | ||
67 | * for the event's class set. The test_adder has the least significant | ||
68 | * bit set in the field. | ||
69 | * | ||
70 | * If an event is not subject to the constraint expressed by a particular | ||
71 | * field, then it will have 0 in both the mask and value for that field. | ||
72 | */ | ||
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index d98a30dfd41c..a0b92de51c7e 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h | |||
@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1) | |||
322 | SYSCALL_SPU(dup3) | 322 | SYSCALL_SPU(dup3) |
323 | SYSCALL_SPU(pipe2) | 323 | SYSCALL_SPU(pipe2) |
324 | SYSCALL(inotify_init1) | 324 | SYSCALL(inotify_init1) |
325 | SYSCALL(ni_syscall) | 325 | SYSCALL_SPU(perf_counter_open) |
326 | COMPAT_SYS_SPU(preadv) | 326 | COMPAT_SYS_SPU(preadv) |
327 | COMPAT_SYS_SPU(pwritev) | 327 | COMPAT_SYS_SPU(pwritev) |
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3f06f8ec81c5..4badac2d11d1 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h | |||
@@ -341,6 +341,7 @@ | |||
341 | #define __NR_dup3 316 | 341 | #define __NR_dup3 316 |
342 | #define __NR_pipe2 317 | 342 | #define __NR_pipe2 317 |
343 | #define __NR_inotify_init1 318 | 343 | #define __NR_inotify_init1 318 |
344 | #define __NR_perf_counter_open 319 | ||
344 | #define __NR_preadv 320 | 345 | #define __NR_preadv 320 |
345 | #define __NR_pwritev 321 | 346 | #define __NR_pwritev 321 |
346 | 347 | ||
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 71901fbda4a5..9ba1bb731fcc 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -94,6 +94,8 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o | |||
94 | 94 | ||
95 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 95 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
96 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 96 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
97 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ | ||
98 | power5-pmu.o power5+-pmu.o power6-pmu.o | ||
97 | 99 | ||
98 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o | 100 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o |
99 | 101 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1e40bc053946..e981d1ce1914 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -131,6 +131,7 @@ int main(void) | |||
131 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); | 131 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
132 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); | 132 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
133 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); | 133 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); |
134 | DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); | ||
134 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); | 135 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); |
135 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); | 136 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); |
136 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 137 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index abfc32330479..43e073477c34 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | |||
526 | 2: | 526 | 2: |
527 | TRACE_AND_RESTORE_IRQ(r5); | 527 | TRACE_AND_RESTORE_IRQ(r5); |
528 | 528 | ||
529 | #ifdef CONFIG_PERF_COUNTERS | ||
530 | /* check paca->perf_counter_pending if we're enabling ints */ | ||
531 | lbz r3,PACAPERFPEND(r13) | ||
532 | and. r3,r3,r5 | ||
533 | beq 27f | ||
534 | bl .perf_counter_do_pending | ||
535 | 27: | ||
536 | #endif /* CONFIG_PERF_COUNTERS */ | ||
537 | |||
529 | /* extract EE bit and use it to restore paca->hard_enabled */ | 538 | /* extract EE bit and use it to restore paca->hard_enabled */ |
530 | ld r3,_MSR(r1) | 539 | ld r3,_MSR(r1) |
531 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ | 540 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8c1a4966867e..feff792ed0f9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en) | |||
135 | iseries_handle_interrupts(); | 135 | iseries_handle_interrupts(); |
136 | } | 136 | } |
137 | 137 | ||
138 | if (test_perf_counter_pending()) { | ||
139 | clear_perf_counter_pending(); | ||
140 | perf_counter_do_pending(); | ||
141 | } | ||
142 | |||
138 | /* | 143 | /* |
139 | * if (get_paca()->hard_enabled) return; | 144 | * if (get_paca()->hard_enabled) return; |
140 | * But again we need to take care that gcc gets hard_enabled directly | 145 | * But again we need to take care that gcc gets hard_enabled directly |
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c new file mode 100644 index 000000000000..bd76d0fa2c35 --- /dev/null +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -0,0 +1,866 @@ | |||
1 | /* | ||
2 | * Performance counter support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_counter.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/hardirq.h> | ||
16 | #include <asm/reg.h> | ||
17 | #include <asm/pmc.h> | ||
18 | #include <asm/machdep.h> | ||
19 | #include <asm/firmware.h> | ||
20 | |||
21 | struct cpu_hw_counters { | ||
22 | int n_counters; | ||
23 | int n_percpu; | ||
24 | int disabled; | ||
25 | int n_added; | ||
26 | struct perf_counter *counter[MAX_HWCOUNTERS]; | ||
27 | unsigned int events[MAX_HWCOUNTERS]; | ||
28 | u64 mmcr[3]; | ||
29 | u8 pmcs_enabled; | ||
30 | }; | ||
31 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); | ||
32 | |||
33 | struct power_pmu *ppmu; | ||
34 | |||
35 | /* | ||
36 | * Normally, to ignore kernel events we set the FCS (freeze counters | ||
37 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | ||
38 | * hypervisor bit set in the MSR, or if we are running on a processor | ||
39 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | ||
40 | * then we need to use the FCHV bit to ignore kernel events. | ||
41 | */ | ||
42 | static unsigned int freeze_counters_kernel = MMCR0_FCS; | ||
43 | |||
44 | static void perf_counter_interrupt(struct pt_regs *regs); | ||
45 | |||
46 | void perf_counter_print_debug(void) | ||
47 | { | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * Read one performance monitor counter (PMC). | ||
52 | */ | ||
53 | static unsigned long read_pmc(int idx) | ||
54 | { | ||
55 | unsigned long val; | ||
56 | |||
57 | switch (idx) { | ||
58 | case 1: | ||
59 | val = mfspr(SPRN_PMC1); | ||
60 | break; | ||
61 | case 2: | ||
62 | val = mfspr(SPRN_PMC2); | ||
63 | break; | ||
64 | case 3: | ||
65 | val = mfspr(SPRN_PMC3); | ||
66 | break; | ||
67 | case 4: | ||
68 | val = mfspr(SPRN_PMC4); | ||
69 | break; | ||
70 | case 5: | ||
71 | val = mfspr(SPRN_PMC5); | ||
72 | break; | ||
73 | case 6: | ||
74 | val = mfspr(SPRN_PMC6); | ||
75 | break; | ||
76 | case 7: | ||
77 | val = mfspr(SPRN_PMC7); | ||
78 | break; | ||
79 | case 8: | ||
80 | val = mfspr(SPRN_PMC8); | ||
81 | break; | ||
82 | default: | ||
83 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
84 | val = 0; | ||
85 | } | ||
86 | return val; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Write one PMC. | ||
91 | */ | ||
92 | static void write_pmc(int idx, unsigned long val) | ||
93 | { | ||
94 | switch (idx) { | ||
95 | case 1: | ||
96 | mtspr(SPRN_PMC1, val); | ||
97 | break; | ||
98 | case 2: | ||
99 | mtspr(SPRN_PMC2, val); | ||
100 | break; | ||
101 | case 3: | ||
102 | mtspr(SPRN_PMC3, val); | ||
103 | break; | ||
104 | case 4: | ||
105 | mtspr(SPRN_PMC4, val); | ||
106 | break; | ||
107 | case 5: | ||
108 | mtspr(SPRN_PMC5, val); | ||
109 | break; | ||
110 | case 6: | ||
111 | mtspr(SPRN_PMC6, val); | ||
112 | break; | ||
113 | case 7: | ||
114 | mtspr(SPRN_PMC7, val); | ||
115 | break; | ||
116 | case 8: | ||
117 | mtspr(SPRN_PMC8, val); | ||
118 | break; | ||
119 | default: | ||
120 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
121 | } | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Check if a set of events can all go on the PMU at once. | ||
126 | * If they can't, this will look at alternative codes for the events | ||
127 | * and see if any combination of alternative codes is feasible. | ||
128 | * The feasible set is returned in event[]. | ||
129 | */ | ||
130 | static int power_check_constraints(unsigned int event[], int n_ev) | ||
131 | { | ||
132 | u64 mask, value, nv; | ||
133 | unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
134 | u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
135 | u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
136 | u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; | ||
137 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; | ||
138 | int i, j; | ||
139 | u64 addf = ppmu->add_fields; | ||
140 | u64 tadd = ppmu->test_adder; | ||
141 | |||
142 | if (n_ev > ppmu->n_counter) | ||
143 | return -1; | ||
144 | |||
145 | /* First see if the events will go on as-is */ | ||
146 | for (i = 0; i < n_ev; ++i) { | ||
147 | alternatives[i][0] = event[i]; | ||
148 | if (ppmu->get_constraint(event[i], &amasks[i][0], | ||
149 | &avalues[i][0])) | ||
150 | return -1; | ||
151 | choice[i] = 0; | ||
152 | } | ||
153 | value = mask = 0; | ||
154 | for (i = 0; i < n_ev; ++i) { | ||
155 | nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); | ||
156 | if ((((nv + tadd) ^ value) & mask) != 0 || | ||
157 | (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) | ||
158 | break; | ||
159 | value = nv; | ||
160 | mask |= amasks[i][0]; | ||
161 | } | ||
162 | if (i == n_ev) | ||
163 | return 0; /* all OK */ | ||
164 | |||
165 | /* doesn't work, gather alternatives... */ | ||
166 | if (!ppmu->get_alternatives) | ||
167 | return -1; | ||
168 | for (i = 0; i < n_ev; ++i) { | ||
169 | n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); | ||
170 | for (j = 1; j < n_alt[i]; ++j) | ||
171 | ppmu->get_constraint(alternatives[i][j], | ||
172 | &amasks[i][j], &avalues[i][j]); | ||
173 | } | ||
174 | |||
175 | /* enumerate all possibilities and see if any will work */ | ||
176 | i = 0; | ||
177 | j = -1; | ||
178 | value = mask = nv = 0; | ||
179 | while (i < n_ev) { | ||
180 | if (j >= 0) { | ||
181 | /* we're backtracking, restore context */ | ||
182 | value = svalues[i]; | ||
183 | mask = smasks[i]; | ||
184 | j = choice[i]; | ||
185 | } | ||
186 | /* | ||
187 | * See if any alternative k for event i, | ||
188 | * where k > j, will satisfy the constraints. | ||
189 | */ | ||
190 | while (++j < n_alt[i]) { | ||
191 | nv = (value | avalues[i][j]) + | ||
192 | (value & avalues[i][j] & addf); | ||
193 | if ((((nv + tadd) ^ value) & mask) == 0 && | ||
194 | (((nv + tadd) ^ avalues[i][j]) | ||
195 | & amasks[i][j]) == 0) | ||
196 | break; | ||
197 | } | ||
198 | if (j >= n_alt[i]) { | ||
199 | /* | ||
200 | * No feasible alternative, backtrack | ||
201 | * to event i-1 and continue enumerating its | ||
202 | * alternatives from where we got up to. | ||
203 | */ | ||
204 | if (--i < 0) | ||
205 | return -1; | ||
206 | } else { | ||
207 | /* | ||
208 | * Found a feasible alternative for event i, | ||
209 | * remember where we got up to with this event, | ||
210 | * go on to the next event, and start with | ||
211 | * the first alternative for it. | ||
212 | */ | ||
213 | choice[i] = j; | ||
214 | svalues[i] = value; | ||
215 | smasks[i] = mask; | ||
216 | value = nv; | ||
217 | mask |= amasks[i][j]; | ||
218 | ++i; | ||
219 | j = -1; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | /* OK, we have a feasible combination, tell the caller the solution */ | ||
224 | for (i = 0; i < n_ev; ++i) | ||
225 | event[i] = alternatives[i][choice[i]]; | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * Check if newly-added counters have consistent settings for | ||
231 | * exclude_{user,kernel,hv} with each other and any previously | ||
232 | * added counters. | ||
233 | */ | ||
234 | static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) | ||
235 | { | ||
236 | int eu, ek, eh; | ||
237 | int i, n; | ||
238 | struct perf_counter *counter; | ||
239 | |||
240 | n = n_prev + n_new; | ||
241 | if (n <= 1) | ||
242 | return 0; | ||
243 | |||
244 | eu = ctrs[0]->hw_event.exclude_user; | ||
245 | ek = ctrs[0]->hw_event.exclude_kernel; | ||
246 | eh = ctrs[0]->hw_event.exclude_hv; | ||
247 | if (n_prev == 0) | ||
248 | n_prev = 1; | ||
249 | for (i = n_prev; i < n; ++i) { | ||
250 | counter = ctrs[i]; | ||
251 | if (counter->hw_event.exclude_user != eu || | ||
252 | counter->hw_event.exclude_kernel != ek || | ||
253 | counter->hw_event.exclude_hv != eh) | ||
254 | return -EAGAIN; | ||
255 | } | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static void power_perf_read(struct perf_counter *counter) | ||
260 | { | ||
261 | long val, delta, prev; | ||
262 | |||
263 | if (!counter->hw.idx) | ||
264 | return; | ||
265 | /* | ||
266 | * Performance monitor interrupts come even when interrupts | ||
267 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
268 | * Therefore we treat them like NMIs. | ||
269 | */ | ||
270 | do { | ||
271 | prev = atomic64_read(&counter->hw.prev_count); | ||
272 | barrier(); | ||
273 | val = read_pmc(counter->hw.idx); | ||
274 | } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); | ||
275 | |||
276 | /* The counters are only 32 bits wide */ | ||
277 | delta = (val - prev) & 0xfffffffful; | ||
278 | atomic64_add(delta, &counter->count); | ||
279 | atomic64_sub(delta, &counter->hw.period_left); | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * Disable all counters to prevent PMU interrupts and to allow | ||
284 | * counters to be added or removed. | ||
285 | */ | ||
286 | u64 hw_perf_save_disable(void) | ||
287 | { | ||
288 | struct cpu_hw_counters *cpuhw; | ||
289 | unsigned long ret; | ||
290 | unsigned long flags; | ||
291 | |||
292 | local_irq_save(flags); | ||
293 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
294 | |||
295 | ret = cpuhw->disabled; | ||
296 | if (!ret) { | ||
297 | cpuhw->disabled = 1; | ||
298 | cpuhw->n_added = 0; | ||
299 | |||
300 | /* | ||
301 | * Check if we ever enabled the PMU on this cpu. | ||
302 | */ | ||
303 | if (!cpuhw->pmcs_enabled) { | ||
304 | if (ppc_md.enable_pmcs) | ||
305 | ppc_md.enable_pmcs(); | ||
306 | cpuhw->pmcs_enabled = 1; | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * Disable instruction sampling if it was enabled | ||
311 | */ | ||
312 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
313 | mtspr(SPRN_MMCRA, | ||
314 | cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
315 | mb(); | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Set the 'freeze counters' bit. | ||
320 | * The barrier is to make sure the mtspr has been | ||
321 | * executed and the PMU has frozen the counters | ||
322 | * before we return. | ||
323 | */ | ||
324 | mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
325 | mb(); | ||
326 | } | ||
327 | local_irq_restore(flags); | ||
328 | return ret; | ||
329 | } | ||
330 | |||
331 | /* | ||
332 | * Re-enable all counters if disable == 0. | ||
333 | * If we were previously disabled and counters were added, then | ||
334 | * put the new config on the PMU. | ||
335 | */ | ||
336 | void hw_perf_restore(u64 disable) | ||
337 | { | ||
338 | struct perf_counter *counter; | ||
339 | struct cpu_hw_counters *cpuhw; | ||
340 | unsigned long flags; | ||
341 | long i; | ||
342 | unsigned long val; | ||
343 | s64 left; | ||
344 | unsigned int hwc_index[MAX_HWCOUNTERS]; | ||
345 | |||
346 | if (disable) | ||
347 | return; | ||
348 | local_irq_save(flags); | ||
349 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
350 | cpuhw->disabled = 0; | ||
351 | |||
352 | /* | ||
353 | * If we didn't change anything, or only removed counters, | ||
354 | * no need to recalculate MMCR* settings and reset the PMCs. | ||
355 | * Just reenable the PMU with the current MMCR* settings | ||
356 | * (possibly updated for removal of counters). | ||
357 | */ | ||
358 | if (!cpuhw->n_added) { | ||
359 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
360 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
361 | if (cpuhw->n_counters == 0) | ||
362 | get_lppaca()->pmcregs_in_use = 0; | ||
363 | goto out_enable; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * Compute MMCR* values for the new set of counters | ||
368 | */ | ||
369 | if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, | ||
370 | cpuhw->mmcr)) { | ||
371 | /* shouldn't ever get here */ | ||
372 | printk(KERN_ERR "oops compute_mmcr failed\n"); | ||
373 | goto out; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * Add in MMCR0 freeze bits corresponding to the | ||
378 | * hw_event.exclude_* bits for the first counter. | ||
379 | * We have already checked that all counters have the | ||
380 | * same values for these bits as the first counter. | ||
381 | */ | ||
382 | counter = cpuhw->counter[0]; | ||
383 | if (counter->hw_event.exclude_user) | ||
384 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
385 | if (counter->hw_event.exclude_kernel) | ||
386 | cpuhw->mmcr[0] |= freeze_counters_kernel; | ||
387 | if (counter->hw_event.exclude_hv) | ||
388 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
389 | |||
390 | /* | ||
391 | * Write the new configuration to MMCR* with the freeze | ||
392 | * bit set and set the hardware counters to their initial values. | ||
393 | * Then unfreeze the counters. | ||
394 | */ | ||
395 | get_lppaca()->pmcregs_in_use = 1; | ||
396 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
397 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
398 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
399 | | MMCR0_FC); | ||
400 | |||
401 | /* | ||
402 | * Read off any pre-existing counters that need to move | ||
403 | * to another PMC. | ||
404 | */ | ||
405 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
406 | counter = cpuhw->counter[i]; | ||
407 | if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { | ||
408 | power_perf_read(counter); | ||
409 | write_pmc(counter->hw.idx, 0); | ||
410 | counter->hw.idx = 0; | ||
411 | } | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Initialize the PMCs for all the new and moved counters. | ||
416 | */ | ||
417 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
418 | counter = cpuhw->counter[i]; | ||
419 | if (counter->hw.idx) | ||
420 | continue; | ||
421 | val = 0; | ||
422 | if (counter->hw_event.irq_period) { | ||
423 | left = atomic64_read(&counter->hw.period_left); | ||
424 | if (left < 0x80000000L) | ||
425 | val = 0x80000000L - left; | ||
426 | } | ||
427 | atomic64_set(&counter->hw.prev_count, val); | ||
428 | counter->hw.idx = hwc_index[i] + 1; | ||
429 | write_pmc(counter->hw.idx, val); | ||
430 | perf_counter_update_userpage(counter); | ||
431 | } | ||
432 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | ||
433 | |||
434 | out_enable: | ||
435 | mb(); | ||
436 | mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); | ||
437 | |||
438 | /* | ||
439 | * Enable instruction sampling if necessary | ||
440 | */ | ||
441 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
442 | mb(); | ||
443 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); | ||
444 | } | ||
445 | |||
446 | out: | ||
447 | local_irq_restore(flags); | ||
448 | } | ||
449 | |||
450 | static int collect_events(struct perf_counter *group, int max_count, | ||
451 | struct perf_counter *ctrs[], unsigned int *events) | ||
452 | { | ||
453 | int n = 0; | ||
454 | struct perf_counter *counter; | ||
455 | |||
456 | if (!is_software_counter(group)) { | ||
457 | if (n >= max_count) | ||
458 | return -1; | ||
459 | ctrs[n] = group; | ||
460 | events[n++] = group->hw.config; | ||
461 | } | ||
462 | list_for_each_entry(counter, &group->sibling_list, list_entry) { | ||
463 | if (!is_software_counter(counter) && | ||
464 | counter->state != PERF_COUNTER_STATE_OFF) { | ||
465 | if (n >= max_count) | ||
466 | return -1; | ||
467 | ctrs[n] = counter; | ||
468 | events[n++] = counter->hw.config; | ||
469 | } | ||
470 | } | ||
471 | return n; | ||
472 | } | ||
473 | |||
474 | static void counter_sched_in(struct perf_counter *counter, int cpu) | ||
475 | { | ||
476 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
477 | counter->oncpu = cpu; | ||
478 | counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; | ||
479 | if (is_software_counter(counter)) | ||
480 | counter->hw_ops->enable(counter); | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * Called to enable a whole group of counters. | ||
485 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
486 | * Assumes the caller has disabled interrupts and has | ||
487 | * frozen the PMU with hw_perf_save_disable. | ||
488 | */ | ||
489 | int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
490 | struct perf_cpu_context *cpuctx, | ||
491 | struct perf_counter_context *ctx, int cpu) | ||
492 | { | ||
493 | struct cpu_hw_counters *cpuhw; | ||
494 | long i, n, n0; | ||
495 | struct perf_counter *sub; | ||
496 | |||
497 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
498 | n0 = cpuhw->n_counters; | ||
499 | n = collect_events(group_leader, ppmu->n_counter - n0, | ||
500 | &cpuhw->counter[n0], &cpuhw->events[n0]); | ||
501 | if (n < 0) | ||
502 | return -EAGAIN; | ||
503 | if (check_excludes(cpuhw->counter, n0, n)) | ||
504 | return -EAGAIN; | ||
505 | if (power_check_constraints(cpuhw->events, n + n0)) | ||
506 | return -EAGAIN; | ||
507 | cpuhw->n_counters = n0 + n; | ||
508 | cpuhw->n_added += n; | ||
509 | |||
510 | /* | ||
511 | * OK, this group can go on; update counter states etc., | ||
512 | * and enable any software counters | ||
513 | */ | ||
514 | for (i = n0; i < n0 + n; ++i) | ||
515 | cpuhw->counter[i]->hw.config = cpuhw->events[i]; | ||
516 | cpuctx->active_oncpu += n; | ||
517 | n = 1; | ||
518 | counter_sched_in(group_leader, cpu); | ||
519 | list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { | ||
520 | if (sub->state != PERF_COUNTER_STATE_OFF) { | ||
521 | counter_sched_in(sub, cpu); | ||
522 | ++n; | ||
523 | } | ||
524 | } | ||
525 | ctx->nr_active += n; | ||
526 | |||
527 | return 1; | ||
528 | } | ||
529 | |||
530 | /* | ||
531 | * Add a counter to the PMU. | ||
532 | * If all counters are not already frozen, then we disable and | ||
533 | * re-enable the PMU in order to get hw_perf_restore to do the | ||
534 | * actual work of reconfiguring the PMU. | ||
535 | */ | ||
536 | static int power_perf_enable(struct perf_counter *counter) | ||
537 | { | ||
538 | struct cpu_hw_counters *cpuhw; | ||
539 | unsigned long flags; | ||
540 | u64 pmudis; | ||
541 | int n0; | ||
542 | int ret = -EAGAIN; | ||
543 | |||
544 | local_irq_save(flags); | ||
545 | pmudis = hw_perf_save_disable(); | ||
546 | |||
547 | /* | ||
548 | * Add the counter to the list (if there is room) | ||
549 | * and check whether the total set is still feasible. | ||
550 | */ | ||
551 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
552 | n0 = cpuhw->n_counters; | ||
553 | if (n0 >= ppmu->n_counter) | ||
554 | goto out; | ||
555 | cpuhw->counter[n0] = counter; | ||
556 | cpuhw->events[n0] = counter->hw.config; | ||
557 | if (check_excludes(cpuhw->counter, n0, 1)) | ||
558 | goto out; | ||
559 | if (power_check_constraints(cpuhw->events, n0 + 1)) | ||
560 | goto out; | ||
561 | |||
562 | counter->hw.config = cpuhw->events[n0]; | ||
563 | ++cpuhw->n_counters; | ||
564 | ++cpuhw->n_added; | ||
565 | |||
566 | ret = 0; | ||
567 | out: | ||
568 | hw_perf_restore(pmudis); | ||
569 | local_irq_restore(flags); | ||
570 | return ret; | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * Remove a counter from the PMU. | ||
575 | */ | ||
576 | static void power_perf_disable(struct perf_counter *counter) | ||
577 | { | ||
578 | struct cpu_hw_counters *cpuhw; | ||
579 | long i; | ||
580 | u64 pmudis; | ||
581 | unsigned long flags; | ||
582 | |||
583 | local_irq_save(flags); | ||
584 | pmudis = hw_perf_save_disable(); | ||
585 | |||
586 | power_perf_read(counter); | ||
587 | |||
588 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
589 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
590 | if (counter == cpuhw->counter[i]) { | ||
591 | while (++i < cpuhw->n_counters) | ||
592 | cpuhw->counter[i-1] = cpuhw->counter[i]; | ||
593 | --cpuhw->n_counters; | ||
594 | ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); | ||
595 | write_pmc(counter->hw.idx, 0); | ||
596 | counter->hw.idx = 0; | ||
597 | perf_counter_update_userpage(counter); | ||
598 | break; | ||
599 | } | ||
600 | } | ||
601 | if (cpuhw->n_counters == 0) { | ||
602 | /* disable exceptions if no counters are running */ | ||
603 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | ||
604 | } | ||
605 | |||
606 | hw_perf_restore(pmudis); | ||
607 | local_irq_restore(flags); | ||
608 | } | ||
609 | |||
610 | struct hw_perf_counter_ops power_perf_ops = { | ||
611 | .enable = power_perf_enable, | ||
612 | .disable = power_perf_disable, | ||
613 | .read = power_perf_read | ||
614 | }; | ||
615 | |||
616 | /* Number of perf_counters counting hardware events */ | ||
617 | static atomic_t num_counters; | ||
618 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
619 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
620 | |||
621 | /* | ||
622 | * Release the PMU if this is the last perf_counter. | ||
623 | */ | ||
624 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
625 | { | ||
626 | if (!atomic_add_unless(&num_counters, -1, 1)) { | ||
627 | mutex_lock(&pmc_reserve_mutex); | ||
628 | if (atomic_dec_return(&num_counters) == 0) | ||
629 | release_pmc_hardware(); | ||
630 | mutex_unlock(&pmc_reserve_mutex); | ||
631 | } | ||
632 | } | ||
633 | |||
634 | const struct hw_perf_counter_ops * | ||
635 | hw_perf_counter_init(struct perf_counter *counter) | ||
636 | { | ||
637 | unsigned long ev; | ||
638 | struct perf_counter *ctrs[MAX_HWCOUNTERS]; | ||
639 | unsigned int events[MAX_HWCOUNTERS]; | ||
640 | int n; | ||
641 | int err; | ||
642 | |||
643 | if (!ppmu) | ||
644 | return ERR_PTR(-ENXIO); | ||
645 | if ((s64)counter->hw_event.irq_period < 0) | ||
646 | return ERR_PTR(-EINVAL); | ||
647 | if (!perf_event_raw(&counter->hw_event)) { | ||
648 | ev = perf_event_id(&counter->hw_event); | ||
649 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
650 | return ERR_PTR(-EOPNOTSUPP); | ||
651 | ev = ppmu->generic_events[ev]; | ||
652 | } else { | ||
653 | ev = perf_event_config(&counter->hw_event); | ||
654 | } | ||
655 | counter->hw.config_base = ev; | ||
656 | counter->hw.idx = 0; | ||
657 | |||
658 | /* | ||
659 | * If we are not running on a hypervisor, force the | ||
660 | * exclude_hv bit to 0 so that we don't care what | ||
661 | * the user set it to. | ||
662 | */ | ||
663 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
664 | counter->hw_event.exclude_hv = 0; | ||
665 | |||
666 | /* | ||
667 | * If this is in a group, check if it can go on with all the | ||
668 | * other hardware counters in the group. We assume the counter | ||
669 | * hasn't been linked into its leader's sibling list at this point. | ||
670 | */ | ||
671 | n = 0; | ||
672 | if (counter->group_leader != counter) { | ||
673 | n = collect_events(counter->group_leader, ppmu->n_counter - 1, | ||
674 | ctrs, events); | ||
675 | if (n < 0) | ||
676 | return ERR_PTR(-EINVAL); | ||
677 | } | ||
678 | events[n] = ev; | ||
679 | ctrs[n] = counter; | ||
680 | if (check_excludes(ctrs, n, 1)) | ||
681 | return ERR_PTR(-EINVAL); | ||
682 | if (power_check_constraints(events, n + 1)) | ||
683 | return ERR_PTR(-EINVAL); | ||
684 | |||
685 | counter->hw.config = events[n]; | ||
686 | atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); | ||
687 | |||
688 | /* | ||
689 | * See if we need to reserve the PMU. | ||
690 | * If no counters are currently in use, then we have to take a | ||
691 | * mutex to ensure that we don't race with another task doing | ||
692 | * reserve_pmc_hardware or release_pmc_hardware. | ||
693 | */ | ||
694 | err = 0; | ||
695 | if (!atomic_inc_not_zero(&num_counters)) { | ||
696 | mutex_lock(&pmc_reserve_mutex); | ||
697 | if (atomic_read(&num_counters) == 0 && | ||
698 | reserve_pmc_hardware(perf_counter_interrupt)) | ||
699 | err = -EBUSY; | ||
700 | else | ||
701 | atomic_inc(&num_counters); | ||
702 | mutex_unlock(&pmc_reserve_mutex); | ||
703 | } | ||
704 | counter->destroy = hw_perf_counter_destroy; | ||
705 | |||
706 | if (err) | ||
707 | return ERR_PTR(err); | ||
708 | return &power_perf_ops; | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * A counter has overflowed; update its count and record | ||
713 | * things if requested. Note that interrupts are hard-disabled | ||
714 | * here so there is no possibility of being interrupted. | ||
715 | */ | ||
716 | static void record_and_restart(struct perf_counter *counter, long val, | ||
717 | struct pt_regs *regs, int nmi) | ||
718 | { | ||
719 | s64 prev, delta, left; | ||
720 | int record = 0; | ||
721 | |||
722 | /* we don't have to worry about interrupts here */ | ||
723 | prev = atomic64_read(&counter->hw.prev_count); | ||
724 | delta = (val - prev) & 0xfffffffful; | ||
725 | atomic64_add(delta, &counter->count); | ||
726 | |||
727 | /* | ||
728 | * See if the total period for this counter has expired, | ||
729 | * and update for the next period. | ||
730 | */ | ||
731 | val = 0; | ||
732 | left = atomic64_read(&counter->hw.period_left) - delta; | ||
733 | if (counter->hw_event.irq_period) { | ||
734 | if (left <= 0) { | ||
735 | left += counter->hw_event.irq_period; | ||
736 | if (left <= 0) | ||
737 | left = counter->hw_event.irq_period; | ||
738 | record = 1; | ||
739 | } | ||
740 | if (left < 0x80000000L) | ||
741 | val = 0x80000000L - left; | ||
742 | } | ||
743 | write_pmc(counter->hw.idx, val); | ||
744 | atomic64_set(&counter->hw.prev_count, val); | ||
745 | atomic64_set(&counter->hw.period_left, left); | ||
746 | perf_counter_update_userpage(counter); | ||
747 | |||
748 | /* | ||
749 | * Finally record data if requested. | ||
750 | */ | ||
751 | if (record) | ||
752 | perf_counter_overflow(counter, nmi, regs, 0); | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Performance monitor interrupt stuff | ||
757 | */ | ||
758 | static void perf_counter_interrupt(struct pt_regs *regs) | ||
759 | { | ||
760 | int i; | ||
761 | struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
762 | struct perf_counter *counter; | ||
763 | long val; | ||
764 | int found = 0; | ||
765 | int nmi; | ||
766 | |||
767 | /* | ||
768 | * If interrupts were soft-disabled when this PMU interrupt | ||
769 | * occurred, treat it as an NMI. | ||
770 | */ | ||
771 | nmi = !regs->softe; | ||
772 | if (nmi) | ||
773 | nmi_enter(); | ||
774 | else | ||
775 | irq_enter(); | ||
776 | |||
777 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
778 | counter = cpuhw->counter[i]; | ||
779 | val = read_pmc(counter->hw.idx); | ||
780 | if ((int)val < 0) { | ||
781 | /* counter has overflowed */ | ||
782 | found = 1; | ||
783 | record_and_restart(counter, val, regs, nmi); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * In case we didn't find and reset the counter that caused | ||
789 | * the interrupt, scan all counters and reset any that are | ||
790 | * negative, to avoid getting continual interrupts. | ||
791 | * Any that we processed in the previous loop will not be negative. | ||
792 | */ | ||
793 | if (!found) { | ||
794 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
795 | val = read_pmc(i + 1); | ||
796 | if ((int)val < 0) | ||
797 | write_pmc(i + 1, 0); | ||
798 | } | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * Reset MMCR0 to its normal value. This will set PMXE and | ||
803 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) | ||
804 | * and thus allow interrupts to occur again. | ||
805 | * XXX might want to use MSR.PM to keep the counters frozen until | ||
806 | * we get back out of this interrupt. | ||
807 | */ | ||
808 | mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); | ||
809 | |||
810 | if (nmi) | ||
811 | nmi_exit(); | ||
812 | else | ||
813 | irq_exit(); | ||
814 | } | ||
815 | |||
816 | void hw_perf_counter_setup(int cpu) | ||
817 | { | ||
818 | struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); | ||
819 | |||
820 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
821 | cpuhw->mmcr[0] = MMCR0_FC; | ||
822 | } | ||
823 | |||
824 | extern struct power_pmu power4_pmu; | ||
825 | extern struct power_pmu ppc970_pmu; | ||
826 | extern struct power_pmu power5_pmu; | ||
827 | extern struct power_pmu power5p_pmu; | ||
828 | extern struct power_pmu power6_pmu; | ||
829 | |||
830 | static int init_perf_counters(void) | ||
831 | { | ||
832 | unsigned long pvr; | ||
833 | |||
834 | /* XXX should get this from cputable */ | ||
835 | pvr = mfspr(SPRN_PVR); | ||
836 | switch (PVR_VER(pvr)) { | ||
837 | case PV_POWER4: | ||
838 | case PV_POWER4p: | ||
839 | ppmu = &power4_pmu; | ||
840 | break; | ||
841 | case PV_970: | ||
842 | case PV_970FX: | ||
843 | case PV_970MP: | ||
844 | ppmu = &ppc970_pmu; | ||
845 | break; | ||
846 | case PV_POWER5: | ||
847 | ppmu = &power5_pmu; | ||
848 | break; | ||
849 | case PV_POWER5p: | ||
850 | ppmu = &power5p_pmu; | ||
851 | break; | ||
852 | case 0x3e: | ||
853 | ppmu = &power6_pmu; | ||
854 | break; | ||
855 | } | ||
856 | |||
857 | /* | ||
858 | * Use FCHV to ignore kernel events if MSR.HV is set. | ||
859 | */ | ||
860 | if (mfmsr() & MSR_HV) | ||
861 | freeze_counters_kernel = MMCR0_FCHV; | ||
862 | |||
863 | return 0; | ||
864 | } | ||
865 | |||
866 | arch_initcall(init_perf_counters); | ||
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c new file mode 100644 index 000000000000..1407b19ab619 --- /dev/null +++ b/arch/powerpc/kernel/power4-pmu.c | |||
@@ -0,0 +1,557 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER4 | ||
17 | */ | ||
18 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
21 | #define PM_UNIT_MSK 0xf | ||
22 | #define PM_LOWER_SH 6 | ||
23 | #define PM_LOWER_MSK 1 | ||
24 | #define PM_LOWER_MSKS 0x40 | ||
25 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 3 | ||
27 | #define PM_PMCSEL_MSK 7 | ||
28 | |||
29 | /* | ||
30 | * Unit code values | ||
31 | */ | ||
32 | #define PM_FPU 1 | ||
33 | #define PM_ISU1 2 | ||
34 | #define PM_IFU 3 | ||
35 | #define PM_IDU0 4 | ||
36 | #define PM_ISU1_ALT 6 | ||
37 | #define PM_ISU2 7 | ||
38 | #define PM_IFU_ALT 8 | ||
39 | #define PM_LSU0 9 | ||
40 | #define PM_LSU1 0xc | ||
41 | #define PM_GPS 0xf | ||
42 | |||
43 | /* | ||
44 | * Bits in MMCR0 for POWER4 | ||
45 | */ | ||
46 | #define MMCR0_PMC1SEL_SH 8 | ||
47 | #define MMCR0_PMC2SEL_SH 1 | ||
48 | #define MMCR_PMCSEL_MSK 0x1f | ||
49 | |||
50 | /* | ||
51 | * Bits in MMCR1 for POWER4 | ||
52 | */ | ||
53 | #define MMCR1_TTM0SEL_SH 62 | ||
54 | #define MMCR1_TTC0SEL_SH 61 | ||
55 | #define MMCR1_TTM1SEL_SH 59 | ||
56 | #define MMCR1_TTC1SEL_SH 58 | ||
57 | #define MMCR1_TTM2SEL_SH 56 | ||
58 | #define MMCR1_TTC2SEL_SH 55 | ||
59 | #define MMCR1_TTM3SEL_SH 53 | ||
60 | #define MMCR1_TTC3SEL_SH 52 | ||
61 | #define MMCR1_TTMSEL_MSK 3 | ||
62 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
63 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
64 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
65 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
66 | #define MMCR1_DEBUG0SEL_SH 43 | ||
67 | #define MMCR1_DEBUG1SEL_SH 42 | ||
68 | #define MMCR1_DEBUG2SEL_SH 41 | ||
69 | #define MMCR1_DEBUG3SEL_SH 40 | ||
70 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
71 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
72 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
73 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
74 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
75 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
76 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
77 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
78 | #define MMCR1_PMC3SEL_SH 27 | ||
79 | #define MMCR1_PMC4SEL_SH 22 | ||
80 | #define MMCR1_PMC5SEL_SH 17 | ||
81 | #define MMCR1_PMC6SEL_SH 12 | ||
82 | #define MMCR1_PMC7SEL_SH 7 | ||
83 | #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ | ||
84 | |||
85 | static short mmcr1_adder_bits[8] = { | ||
86 | MMCR1_PMC1_ADDER_SEL_SH, | ||
87 | MMCR1_PMC2_ADDER_SEL_SH, | ||
88 | MMCR1_PMC3_ADDER_SEL_SH, | ||
89 | MMCR1_PMC4_ADDER_SEL_SH, | ||
90 | MMCR1_PMC5_ADDER_SEL_SH, | ||
91 | MMCR1_PMC6_ADDER_SEL_SH, | ||
92 | MMCR1_PMC7_ADDER_SEL_SH, | ||
93 | MMCR1_PMC8_ADDER_SEL_SH | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Bits in MMCRA | ||
98 | */ | ||
99 | #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ | ||
100 | |||
101 | /* | ||
102 | * Layout of constraint bits: | ||
103 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
104 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
105 | * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> | ||
106 | * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
107 | * \SMPL ||\TTC3SEL | ||
108 | * |\TTC_IFU_SEL | ||
109 | * \TTM2SEL0 | ||
110 | * | ||
111 | * SMPL - SAMPLE_ENABLE constraint | ||
112 | * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 | ||
113 | * | ||
114 | * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 | ||
115 | * 55: UC1 error 0x0080_0000_0000_0000 | ||
116 | * 54: FPU events needed 0x0040_0000_0000_0000 | ||
117 | * 53: ISU1 events needed 0x0020_0000_0000_0000 | ||
118 | * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 | ||
119 | * | ||
120 | * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 | ||
121 | * 51: UC2 error 0x0008_0000_0000_0000 | ||
122 | * 50: FPU events needed 0x0004_0000_0000_0000 | ||
123 | * 49: IFU events needed 0x0002_0000_0000_0000 | ||
124 | * 48: LSU0 events needed 0x0001_0000_0000_0000 | ||
125 | * | ||
126 | * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 | ||
127 | * 47: UC3 error 0x8000_0000_0000 | ||
128 | * 46: LSU0 events needed 0x4000_0000_0000 | ||
129 | * 45: IFU events needed 0x2000_0000_0000 | ||
130 | * 44: IDU0|ISU2 events needed 0x1000_0000_0000 | ||
131 | * 43: ISU1 events needed 0x0800_0000_0000 | ||
132 | * | ||
133 | * TTM2SEL0 | ||
134 | * 42: 0 = IDU0 events needed | ||
135 | * 1 = ISU2 events needed 0x0400_0000_0000 | ||
136 | * | ||
137 | * TTC_IFU_SEL | ||
138 | * 41: 0 = IFU.U events needed | ||
139 | * 1 = IFU.L events needed 0x0200_0000_0000 | ||
140 | * | ||
141 | * TTC3SEL | ||
142 | * 40: 0 = LSU1.U events needed | ||
143 | * 1 = LSU1.L events needed 0x0100_0000_0000 | ||
144 | * | ||
145 | * PS1 | ||
146 | * 39: PS1 error 0x0080_0000_0000 | ||
147 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
148 | * | ||
149 | * PS2 | ||
150 | * 35: PS2 error 0x0008_0000_0000 | ||
151 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
152 | * | ||
153 | * B0 | ||
154 | * 28-31: Byte 0 event source 0xf000_0000 | ||
155 | * 1 = FPU | ||
156 | * 2 = ISU1 | ||
157 | * 3 = IFU | ||
158 | * 4 = IDU0 | ||
159 | * 7 = ISU2 | ||
160 | * 9 = LSU0 | ||
161 | * c = LSU1 | ||
162 | * f = GPS | ||
163 | * | ||
164 | * B1, B2, B3 | ||
165 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
166 | * | ||
167 | * P8 | ||
168 | * 15: P8 error 0x8000 | ||
169 | * 14-15: Count of events needing PMC8 | ||
170 | * | ||
171 | * P1..P7 | ||
172 | * 0-13: Count of events needing PMC1..PMC7 | ||
173 | * | ||
174 | * Note: this doesn't allow events using IFU.U to be combined with events | ||
175 | * using IFU.L, though that is feasible (using TTM0 and TTM2). However | ||
176 | * there are no listed events for IFU.L (they are debug events not | ||
177 | * verified for performance monitoring) so this shouldn't cause a | ||
178 | * problem. | ||
179 | */ | ||
180 | |||
181 | static struct unitinfo { | ||
182 | u64 value, mask; | ||
183 | int unit; | ||
184 | int lowerbit; | ||
185 | } p4_unitinfo[16] = { | ||
186 | [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, | ||
187 | [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | ||
188 | [PM_ISU1_ALT] = | ||
189 | { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | ||
190 | [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | ||
191 | [PM_IFU_ALT] = | ||
192 | { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | ||
193 | [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, | ||
194 | [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, | ||
195 | [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, | ||
196 | [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, | ||
197 | [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } | ||
198 | }; | ||
199 | |||
200 | static unsigned char direct_marked_event[8] = { | ||
201 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
202 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
203 | (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ | ||
204 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
205 | (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ | ||
206 | (1<<3) | (1<<4) | (1<<5), | ||
207 | /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
208 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
209 | (1<<4), /* PMC8: PM_MRK_LSU_FIN */ | ||
210 | }; | ||
211 | |||
212 | /* | ||
213 | * Returns 1 if event counts things relating to marked instructions | ||
214 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
215 | */ | ||
216 | static int p4_marked_instr_event(unsigned int event) | ||
217 | { | ||
218 | int pmc, psel, unit, byte, bit; | ||
219 | unsigned int mask; | ||
220 | |||
221 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
222 | psel = event & PM_PMCSEL_MSK; | ||
223 | if (pmc) { | ||
224 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
225 | return 1; | ||
226 | if (psel == 0) /* add events */ | ||
227 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
228 | else if (psel == 6) /* decode events */ | ||
229 | bit = 4; | ||
230 | else | ||
231 | return 0; | ||
232 | } else | ||
233 | bit = psel; | ||
234 | |||
235 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
236 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
237 | mask = 0; | ||
238 | switch (unit) { | ||
239 | case PM_LSU1: | ||
240 | if (event & PM_LOWER_MSKS) | ||
241 | mask = 1 << 28; /* byte 7 bit 4 */ | ||
242 | else | ||
243 | mask = 6 << 24; /* byte 3 bits 1 and 2 */ | ||
244 | break; | ||
245 | case PM_LSU0: | ||
246 | /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ | ||
247 | mask = 0x083dff00; | ||
248 | } | ||
249 | return (mask >> (byte * 8 + bit)) & 1; | ||
250 | } | ||
251 | |||
252 | static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp) | ||
253 | { | ||
254 | int pmc, byte, unit, lower, sh; | ||
255 | u64 mask = 0, value = 0; | ||
256 | int grp = -1; | ||
257 | |||
258 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
259 | if (pmc) { | ||
260 | if (pmc > 8) | ||
261 | return -1; | ||
262 | sh = (pmc - 1) * 2; | ||
263 | mask |= 2 << sh; | ||
264 | value |= 1 << sh; | ||
265 | grp = ((pmc - 1) >> 1) & 1; | ||
266 | } | ||
267 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
268 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
269 | if (unit) { | ||
270 | lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
271 | |||
272 | /* | ||
273 | * Bus events on bytes 0 and 2 can be counted | ||
274 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
275 | */ | ||
276 | if (!pmc) | ||
277 | grp = byte & 1; | ||
278 | |||
279 | if (!p4_unitinfo[unit].unit) | ||
280 | return -1; | ||
281 | mask |= p4_unitinfo[unit].mask; | ||
282 | value |= p4_unitinfo[unit].value; | ||
283 | sh = p4_unitinfo[unit].lowerbit; | ||
284 | if (sh > 1) | ||
285 | value |= (u64)lower << sh; | ||
286 | else if (lower != sh) | ||
287 | return -1; | ||
288 | unit = p4_unitinfo[unit].unit; | ||
289 | |||
290 | /* Set byte lane select field */ | ||
291 | mask |= 0xfULL << (28 - 4 * byte); | ||
292 | value |= (u64)unit << (28 - 4 * byte); | ||
293 | } | ||
294 | if (grp == 0) { | ||
295 | /* increment PMC1/2/5/6 field */ | ||
296 | mask |= 0x8000000000ull; | ||
297 | value |= 0x1000000000ull; | ||
298 | } else { | ||
299 | /* increment PMC3/4/7/8 field */ | ||
300 | mask |= 0x800000000ull; | ||
301 | value |= 0x100000000ull; | ||
302 | } | ||
303 | |||
304 | /* Marked instruction events need sample_enable set */ | ||
305 | if (p4_marked_instr_event(event)) { | ||
306 | mask |= 1ull << 56; | ||
307 | value |= 1ull << 56; | ||
308 | } | ||
309 | |||
310 | /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ | ||
311 | if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) | ||
312 | mask |= 1ull << 56; | ||
313 | |||
314 | *maskp = mask; | ||
315 | *valp = value; | ||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static unsigned int ppc_inst_cmpl[] = { | ||
320 | 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 | ||
321 | }; | ||
322 | |||
323 | static int p4_get_alternatives(unsigned int event, unsigned int alt[]) | ||
324 | { | ||
325 | int i, j, na; | ||
326 | |||
327 | alt[0] = event; | ||
328 | na = 1; | ||
329 | |||
330 | /* 2 possibilities for PM_GRP_DISP_REJECT */ | ||
331 | if (event == 0x8003 || event == 0x0224) { | ||
332 | alt[1] = event ^ (0x8003 ^ 0x0224); | ||
333 | return 2; | ||
334 | } | ||
335 | |||
336 | /* 2 possibilities for PM_ST_MISS_L1 */ | ||
337 | if (event == 0x0c13 || event == 0x0c23) { | ||
338 | alt[1] = event ^ (0x0c13 ^ 0x0c23); | ||
339 | return 2; | ||
340 | } | ||
341 | |||
342 | /* several possibilities for PM_INST_CMPL */ | ||
343 | for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { | ||
344 | if (event == ppc_inst_cmpl[i]) { | ||
345 | for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) | ||
346 | if (j != i) | ||
347 | alt[na++] = ppc_inst_cmpl[j]; | ||
348 | break; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | return na; | ||
353 | } | ||
354 | |||
355 | static int p4_compute_mmcr(unsigned int event[], int n_ev, | ||
356 | unsigned int hwc[], u64 mmcr[]) | ||
357 | { | ||
358 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
359 | unsigned int pmc, unit, byte, psel, lower; | ||
360 | unsigned int ttm, grp; | ||
361 | unsigned int pmc_inuse = 0; | ||
362 | unsigned int pmc_grp_use[2]; | ||
363 | unsigned char busbyte[4]; | ||
364 | unsigned char unituse[16]; | ||
365 | unsigned int unitlower = 0; | ||
366 | int i; | ||
367 | |||
368 | if (n_ev > 8) | ||
369 | return -1; | ||
370 | |||
371 | /* First pass to count resource use */ | ||
372 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
373 | memset(busbyte, 0, sizeof(busbyte)); | ||
374 | memset(unituse, 0, sizeof(unituse)); | ||
375 | for (i = 0; i < n_ev; ++i) { | ||
376 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
377 | if (pmc) { | ||
378 | if (pmc_inuse & (1 << (pmc - 1))) | ||
379 | return -1; | ||
380 | pmc_inuse |= 1 << (pmc - 1); | ||
381 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
382 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
383 | } | ||
384 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
385 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
386 | lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
387 | if (unit) { | ||
388 | if (!pmc) | ||
389 | ++pmc_grp_use[byte & 1]; | ||
390 | if (unit == 6 || unit == 8) | ||
391 | /* map alt ISU1/IFU codes: 6->2, 8->3 */ | ||
392 | unit = (unit >> 1) - 1; | ||
393 | if (busbyte[byte] && busbyte[byte] != unit) | ||
394 | return -1; | ||
395 | busbyte[byte] = unit; | ||
396 | lower <<= unit; | ||
397 | if (unituse[unit] && lower != (unitlower & lower)) | ||
398 | return -1; | ||
399 | unituse[unit] = 1; | ||
400 | unitlower |= lower; | ||
401 | } | ||
402 | } | ||
403 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
404 | return -1; | ||
405 | |||
406 | /* | ||
407 | * Assign resources and set multiplexer selects. | ||
408 | * | ||
409 | * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. | ||
410 | * Each TTMx can only select one unit, but since | ||
411 | * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, | ||
412 | * we have some choices. | ||
413 | */ | ||
414 | if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { | ||
415 | unituse[6] = 1; /* Move 2 to 6 */ | ||
416 | unituse[2] = 0; | ||
417 | } | ||
418 | if (unituse[3] & (unituse[1] | unituse[2])) { | ||
419 | unituse[8] = 1; /* Move 3 to 8 */ | ||
420 | unituse[3] = 0; | ||
421 | unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); | ||
422 | } | ||
423 | /* Check only one unit per TTMx */ | ||
424 | if (unituse[1] + unituse[2] + unituse[3] > 1 || | ||
425 | unituse[4] + unituse[6] + unituse[7] > 1 || | ||
426 | unituse[8] + unituse[9] > 1 || | ||
427 | (unituse[5] | unituse[10] | unituse[11] | | ||
428 | unituse[13] | unituse[14])) | ||
429 | return -1; | ||
430 | |||
431 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | ||
432 | mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; | ||
433 | mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; | ||
434 | mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; | ||
435 | |||
436 | /* Set TTCxSEL fields. */ | ||
437 | if (unitlower & 0xe) | ||
438 | mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; | ||
439 | if (unitlower & 0xf0) | ||
440 | mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; | ||
441 | if (unitlower & 0xf00) | ||
442 | mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; | ||
443 | if (unitlower & 0x7000) | ||
444 | mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; | ||
445 | |||
446 | /* Set byte lane select fields. */ | ||
447 | for (byte = 0; byte < 4; ++byte) { | ||
448 | unit = busbyte[byte]; | ||
449 | if (!unit) | ||
450 | continue; | ||
451 | if (unit == 0xf) { | ||
452 | /* special case for GPS */ | ||
453 | mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); | ||
454 | } else { | ||
455 | if (!unituse[unit]) | ||
456 | ttm = unit - 1; /* 2->1, 3->2 */ | ||
457 | else | ||
458 | ttm = unit >> 2; | ||
459 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); | ||
460 | } | ||
461 | } | ||
462 | |||
463 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
464 | for (i = 0; i < n_ev; ++i) { | ||
465 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
466 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
467 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
468 | psel = event[i] & PM_PMCSEL_MSK; | ||
469 | if (!pmc) { | ||
470 | /* Bus event or 00xxx direct event (off or cycles) */ | ||
471 | if (unit) | ||
472 | psel |= 0x10 | ((byte & 2) << 2); | ||
473 | for (pmc = 0; pmc < 8; ++pmc) { | ||
474 | if (pmc_inuse & (1 << pmc)) | ||
475 | continue; | ||
476 | grp = (pmc >> 1) & 1; | ||
477 | if (unit) { | ||
478 | if (grp == (byte & 1)) | ||
479 | break; | ||
480 | } else if (pmc_grp_use[grp] < 4) { | ||
481 | ++pmc_grp_use[grp]; | ||
482 | break; | ||
483 | } | ||
484 | } | ||
485 | pmc_inuse |= 1 << pmc; | ||
486 | } else { | ||
487 | /* Direct event */ | ||
488 | --pmc; | ||
489 | if (psel == 0 && (byte & 2)) | ||
490 | /* add events on higher-numbered bus */ | ||
491 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
492 | else if (psel == 6 && byte == 3) | ||
493 | /* seem to need to set sample_enable here */ | ||
494 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
495 | psel |= 8; | ||
496 | } | ||
497 | if (pmc <= 1) | ||
498 | mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
499 | else | ||
500 | mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
501 | if (pmc == 7) /* PMC8 */ | ||
502 | mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; | ||
503 | hwc[i] = pmc; | ||
504 | if (p4_marked_instr_event(event[i])) | ||
505 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
506 | } | ||
507 | |||
508 | if (pmc_inuse & 1) | ||
509 | mmcr0 |= MMCR0_PMC1CE; | ||
510 | if (pmc_inuse & 0xfe) | ||
511 | mmcr0 |= MMCR0_PMCjCE; | ||
512 | |||
513 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
514 | |||
515 | /* Return MMCRx values */ | ||
516 | mmcr[0] = mmcr0; | ||
517 | mmcr[1] = mmcr1; | ||
518 | mmcr[2] = mmcra; | ||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
523 | { | ||
524 | /* | ||
525 | * Setting the PMCxSEL field to 0 disables PMC x. | ||
526 | * (Note that pmc is 0-based here, not 1-based.) | ||
527 | */ | ||
528 | if (pmc <= 1) { | ||
529 | mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); | ||
530 | } else { | ||
531 | mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); | ||
532 | if (pmc == 7) | ||
533 | mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); | ||
534 | } | ||
535 | } | ||
536 | |||
537 | static int p4_generic_events[] = { | ||
538 | [PERF_COUNT_CPU_CYCLES] = 7, | ||
539 | [PERF_COUNT_INSTRUCTIONS] = 0x1001, | ||
540 | [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ | ||
541 | [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ | ||
542 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ | ||
543 | [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ | ||
544 | }; | ||
545 | |||
546 | struct power_pmu power4_pmu = { | ||
547 | .n_counter = 8, | ||
548 | .max_alternatives = 5, | ||
549 | .add_fields = 0x0000001100005555ull, | ||
550 | .test_adder = 0x0011083300000000ull, | ||
551 | .compute_mmcr = p4_compute_mmcr, | ||
552 | .get_constraint = p4_get_constraint, | ||
553 | .get_alternatives = p4_get_alternatives, | ||
554 | .disable_pmc = p4_disable_pmc, | ||
555 | .n_generic = ARRAY_SIZE(p4_generic_events), | ||
556 | .generic_events = p4_generic_events, | ||
557 | }; | ||
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c new file mode 100644 index 000000000000..1222c8ea3c26 --- /dev/null +++ b/arch/powerpc/kernel/power5+-pmu.c | |||
@@ -0,0 +1,551 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5+/++ (not POWER5) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
24 | #define PM_BYTE_MSK 7 | ||
25 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
26 | #define PM_GRS_MSK 7 | ||
27 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* Values in PM_UNIT field */ | ||
31 | #define PM_FPU 0 | ||
32 | #define PM_ISU0 1 | ||
33 | #define PM_IFU 2 | ||
34 | #define PM_ISU1 3 | ||
35 | #define PM_IDU 4 | ||
36 | #define PM_ISU0_ALT 6 | ||
37 | #define PM_GRS 7 | ||
38 | #define PM_LSU0 8 | ||
39 | #define PM_LSU1 0xc | ||
40 | #define PM_LASTUNIT 0xc | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR1 for POWER5+ | ||
44 | */ | ||
45 | #define MMCR1_TTM0SEL_SH 62 | ||
46 | #define MMCR1_TTM1SEL_SH 60 | ||
47 | #define MMCR1_TTM2SEL_SH 58 | ||
48 | #define MMCR1_TTM3SEL_SH 56 | ||
49 | #define MMCR1_TTMSEL_MSK 3 | ||
50 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
51 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
52 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
53 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
54 | #define MMCR1_GRS_L2SEL_SH 46 | ||
55 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
56 | #define MMCR1_GRS_L3SEL_SH 44 | ||
57 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
58 | #define MMCR1_GRS_MCSEL_SH 41 | ||
59 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
60 | #define MMCR1_GRS_FABSEL_SH 39 | ||
61 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
62 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
63 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
64 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
65 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
66 | #define MMCR1_PMC1SEL_SH 25 | ||
67 | #define MMCR1_PMC2SEL_SH 17 | ||
68 | #define MMCR1_PMC3SEL_SH 9 | ||
69 | #define MMCR1_PMC4SEL_SH 1 | ||
70 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
71 | #define MMCR1_PMCSEL_MSK 0x7f | ||
72 | |||
73 | /* | ||
74 | * Bits in MMCRA | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Layout of constraint bits: | ||
79 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
80 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
81 | * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> | ||
82 | * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 | ||
83 | * | ||
84 | * NC - number of counters | ||
85 | * 51: NC error 0x0008_0000_0000_0000 | ||
86 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
87 | * | ||
88 | * G0..G3 - GRS mux constraints | ||
89 | * 46-47: GRS_L2SEL value | ||
90 | * 44-45: GRS_L3SEL value | ||
91 | * 41-44: GRS_MCSEL value | ||
92 | * 39-40: GRS_FABSEL value | ||
93 | * Note that these match up with their bit positions in MMCR1 | ||
94 | * | ||
95 | * T0 - TTM0 constraint | ||
96 | * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 | ||
97 | * | ||
98 | * T1 - TTM1 constraint | ||
99 | * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 | ||
100 | * | ||
101 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
102 | * 33: UC3 error 0x02_0000_0000 | ||
103 | * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 | ||
104 | * 31: ISU0 events needed 0x01_8000_0000 | ||
105 | * 30: IDU|GRS events needed 0x00_4000_0000 | ||
106 | * | ||
107 | * B0 | ||
108 | * 20-23: Byte 0 event source 0x00f0_0000 | ||
109 | * Encoding as for the event code | ||
110 | * | ||
111 | * B1, B2, B3 | ||
112 | * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources | ||
113 | * | ||
114 | * P4 | ||
115 | * 7: P1 error 0x80 | ||
116 | * 6-7: Count of events needing PMC4 | ||
117 | * | ||
118 | * P1..P3 | ||
119 | * 0-6: Count of events needing PMC1..PMC3 | ||
120 | */ | ||
121 | |||
122 | static const int grsel_shift[8] = { | ||
123 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
124 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
125 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
126 | }; | ||
127 | |||
128 | /* Masks and values for using events from the various units */ | ||
129 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
130 | [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, | ||
131 | [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, | ||
132 | [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, | ||
133 | [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, | ||
134 | [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, | ||
135 | [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, | ||
136 | }; | ||
137 | |||
138 | static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) | ||
139 | { | ||
140 | int pmc, byte, unit, sh; | ||
141 | int bit, fmask; | ||
142 | u64 mask = 0, value = 0; | ||
143 | |||
144 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
145 | if (pmc) { | ||
146 | if (pmc > 4) | ||
147 | return -1; | ||
148 | sh = (pmc - 1) * 2; | ||
149 | mask |= 2 << sh; | ||
150 | value |= 1 << sh; | ||
151 | } | ||
152 | if (event & PM_BUSEVENT_MSK) { | ||
153 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
154 | if (unit > PM_LASTUNIT) | ||
155 | return -1; | ||
156 | if (unit == PM_ISU0_ALT) | ||
157 | unit = PM_ISU0; | ||
158 | mask |= unit_cons[unit][0]; | ||
159 | value |= unit_cons[unit][1]; | ||
160 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
161 | if (byte >= 4) { | ||
162 | if (unit != PM_LSU1) | ||
163 | return -1; | ||
164 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
165 | ++unit; | ||
166 | byte &= 3; | ||
167 | } | ||
168 | if (unit == PM_GRS) { | ||
169 | bit = event & 7; | ||
170 | fmask = (bit == 6)? 7: 3; | ||
171 | sh = grsel_shift[bit]; | ||
172 | mask |= (u64)fmask << sh; | ||
173 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | ||
174 | } | ||
175 | /* Set byte lane select field */ | ||
176 | mask |= 0xfULL << (20 - 4 * byte); | ||
177 | value |= (u64)unit << (20 - 4 * byte); | ||
178 | } | ||
179 | mask |= 0x8000000000000ull; | ||
180 | value |= 0x1000000000000ull; | ||
181 | *maskp = mask; | ||
182 | *valp = value; | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
187 | |||
188 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
189 | { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ | ||
190 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
191 | { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ | ||
192 | { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ | ||
193 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
194 | { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ | ||
195 | { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ | ||
196 | { 0x100009, 0x200009 }, /* PM_INST_CMPL */ | ||
197 | { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ | ||
198 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
199 | }; | ||
200 | |||
201 | /* | ||
202 | * Scan the alternatives table for a match and return the | ||
203 | * index into the alternatives table if found, else -1. | ||
204 | */ | ||
205 | static int find_alternative(unsigned int event) | ||
206 | { | ||
207 | int i, j; | ||
208 | |||
209 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
210 | if (event < event_alternatives[i][0]) | ||
211 | break; | ||
212 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
213 | if (event == event_alternatives[i][j]) | ||
214 | return i; | ||
215 | } | ||
216 | return -1; | ||
217 | } | ||
218 | |||
219 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
220 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
221 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
222 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
223 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
224 | }; | ||
225 | |||
226 | /* | ||
227 | * Some direct events for decodes of event bus byte 3 have alternative | ||
228 | * PMCSEL values on other counters. This returns the alternative | ||
229 | * event code for those that do, or -1 otherwise. This also handles | ||
230 | * alternative PCMSEL values for add events. | ||
231 | */ | ||
232 | static int find_alternative_bdecode(unsigned int event) | ||
233 | { | ||
234 | int pmc, altpmc, pp, j; | ||
235 | |||
236 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
237 | if (pmc == 0 || pmc > 4) | ||
238 | return -1; | ||
239 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
240 | pp = event & PM_PMCSEL_MSK; | ||
241 | for (j = 0; j < 4; ++j) { | ||
242 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
243 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
244 | (altpmc << PM_PMC_SH) | | ||
245 | bytedecode_alternatives[altpmc - 1][j]; | ||
246 | } | ||
247 | } | ||
248 | |||
249 | /* new decode alternatives for power5+ */ | ||
250 | if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) | ||
251 | return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); | ||
252 | if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) | ||
253 | return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); | ||
254 | |||
255 | /* alternative add event encodings */ | ||
256 | if (pp == 0x10 || pp == 0x28) | ||
257 | return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | | ||
258 | (altpmc << PM_PMC_SH); | ||
259 | |||
260 | return -1; | ||
261 | } | ||
262 | |||
263 | static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) | ||
264 | { | ||
265 | int i, j, ae, nalt = 1; | ||
266 | |||
267 | alt[0] = event; | ||
268 | nalt = 1; | ||
269 | i = find_alternative(event); | ||
270 | if (i >= 0) { | ||
271 | for (j = 0; j < MAX_ALT; ++j) { | ||
272 | ae = event_alternatives[i][j]; | ||
273 | if (ae && ae != event) | ||
274 | alt[nalt++] = ae; | ||
275 | } | ||
276 | } else { | ||
277 | ae = find_alternative_bdecode(event); | ||
278 | if (ae > 0) | ||
279 | alt[nalt++] = ae; | ||
280 | } | ||
281 | return nalt; | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * Map of which direct events on which PMCs are marked instruction events. | ||
286 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
287 | * Bit 0 is set if it is marked for all PMCs. | ||
288 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
289 | */ | ||
290 | static unsigned char direct_event_is_marked[0x28] = { | ||
291 | 0, /* 00 */ | ||
292 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
293 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
294 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
295 | 0, /* 04 */ | ||
296 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
297 | 0x80, /* 06 */ | ||
298 | 0x80, /* 07 */ | ||
299 | 0, 0, 0,/* 08 - 0a */ | ||
300 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
301 | 0, /* 0c */ | ||
302 | 0x80, /* 0d */ | ||
303 | 0x80, /* 0e */ | ||
304 | 0, /* 0f */ | ||
305 | 0, /* 10 */ | ||
306 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
307 | 0, /* 12 */ | ||
308 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
309 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
310 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
311 | 0x80, /* 16 */ | ||
312 | 0x80, /* 17 */ | ||
313 | 0, 0, 0, 0, 0, | ||
314 | 0x80, /* 1d */ | ||
315 | 0x80, /* 1e */ | ||
316 | 0, /* 1f */ | ||
317 | 0x80, /* 20 */ | ||
318 | 0x80, /* 21 */ | ||
319 | 0x80, /* 22 */ | ||
320 | 0x80, /* 23 */ | ||
321 | 0x80, /* 24 */ | ||
322 | 0x80, /* 25 */ | ||
323 | 0x80, /* 26 */ | ||
324 | 0x80, /* 27 */ | ||
325 | }; | ||
326 | |||
327 | /* | ||
328 | * Returns 1 if event counts things relating to marked instructions | ||
329 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
330 | */ | ||
331 | static int power5p_marked_instr_event(unsigned int event) | ||
332 | { | ||
333 | int pmc, psel; | ||
334 | int bit, byte, unit; | ||
335 | u32 mask; | ||
336 | |||
337 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
338 | psel = event & PM_PMCSEL_MSK; | ||
339 | if (pmc >= 5) | ||
340 | return 0; | ||
341 | |||
342 | bit = -1; | ||
343 | if (psel < sizeof(direct_event_is_marked)) { | ||
344 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
345 | return 1; | ||
346 | if (direct_event_is_marked[psel] & 0x80) | ||
347 | bit = 4; | ||
348 | else if (psel == 0x08) | ||
349 | bit = pmc - 1; | ||
350 | else if (psel == 0x10) | ||
351 | bit = 4 - pmc; | ||
352 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
353 | bit = 4; | ||
354 | } else if ((psel & 0x48) == 0x40) { | ||
355 | bit = psel & 7; | ||
356 | } else if (psel == 0x28) { | ||
357 | bit = pmc - 1; | ||
358 | } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { | ||
359 | bit = 4; | ||
360 | } | ||
361 | |||
362 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
363 | return 0; | ||
364 | |||
365 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
366 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
367 | if (unit == PM_LSU0) { | ||
368 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
369 | mask = 0x5dff00; | ||
370 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
371 | byte -= 4; | ||
372 | /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ | ||
373 | mask = 0x5f11c000; | ||
374 | } else | ||
375 | return 0; | ||
376 | |||
377 | return (mask >> (byte * 8 + bit)) & 1; | ||
378 | } | ||
379 | |||
380 | static int power5p_compute_mmcr(unsigned int event[], int n_ev, | ||
381 | unsigned int hwc[], u64 mmcr[]) | ||
382 | { | ||
383 | u64 mmcr1 = 0; | ||
384 | u64 mmcra = 0; | ||
385 | unsigned int pmc, unit, byte, psel; | ||
386 | unsigned int ttm; | ||
387 | int i, isbus, bit, grsel; | ||
388 | unsigned int pmc_inuse = 0; | ||
389 | unsigned char busbyte[4]; | ||
390 | unsigned char unituse[16]; | ||
391 | int ttmuse; | ||
392 | |||
393 | if (n_ev > 4) | ||
394 | return -1; | ||
395 | |||
396 | /* First pass to count resource use */ | ||
397 | memset(busbyte, 0, sizeof(busbyte)); | ||
398 | memset(unituse, 0, sizeof(unituse)); | ||
399 | for (i = 0; i < n_ev; ++i) { | ||
400 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
401 | if (pmc) { | ||
402 | if (pmc > 4) | ||
403 | return -1; | ||
404 | if (pmc_inuse & (1 << (pmc - 1))) | ||
405 | return -1; | ||
406 | pmc_inuse |= 1 << (pmc - 1); | ||
407 | } | ||
408 | if (event[i] & PM_BUSEVENT_MSK) { | ||
409 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
410 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
411 | if (unit > PM_LASTUNIT) | ||
412 | return -1; | ||
413 | if (unit == PM_ISU0_ALT) | ||
414 | unit = PM_ISU0; | ||
415 | if (byte >= 4) { | ||
416 | if (unit != PM_LSU1) | ||
417 | return -1; | ||
418 | ++unit; | ||
419 | byte &= 3; | ||
420 | } | ||
421 | if (busbyte[byte] && busbyte[byte] != unit) | ||
422 | return -1; | ||
423 | busbyte[byte] = unit; | ||
424 | unituse[unit] = 1; | ||
425 | } | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Assign resources and set multiplexer selects. | ||
430 | * | ||
431 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
432 | * choice we have to deal with. | ||
433 | */ | ||
434 | if (unituse[PM_ISU0] & | ||
435 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
436 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
437 | unituse[PM_ISU0] = 0; | ||
438 | } | ||
439 | /* Set TTM[01]SEL fields. */ | ||
440 | ttmuse = 0; | ||
441 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
442 | if (!unituse[i]) | ||
443 | continue; | ||
444 | if (ttmuse++) | ||
445 | return -1; | ||
446 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | ||
447 | } | ||
448 | ttmuse = 0; | ||
449 | for (; i <= PM_GRS; ++i) { | ||
450 | if (!unituse[i]) | ||
451 | continue; | ||
452 | if (ttmuse++) | ||
453 | return -1; | ||
454 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | ||
455 | } | ||
456 | if (ttmuse > 1) | ||
457 | return -1; | ||
458 | |||
459 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
460 | for (byte = 0; byte < 4; ++byte) { | ||
461 | unit = busbyte[byte]; | ||
462 | if (!unit) | ||
463 | continue; | ||
464 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
465 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
466 | unit = PM_ISU0_ALT; | ||
467 | } else if (unit == PM_LSU1 + 1) { | ||
468 | /* select lower word of LSU1 for this byte */ | ||
469 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
470 | } | ||
471 | ttm = unit >> 2; | ||
472 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
473 | } | ||
474 | |||
475 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
476 | for (i = 0; i < n_ev; ++i) { | ||
477 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
478 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
479 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
480 | psel = event[i] & PM_PMCSEL_MSK; | ||
481 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
482 | if (!pmc) { | ||
483 | /* Bus event or any-PMC direct event */ | ||
484 | for (pmc = 0; pmc < 4; ++pmc) { | ||
485 | if (!(pmc_inuse & (1 << pmc))) | ||
486 | break; | ||
487 | } | ||
488 | if (pmc >= 4) | ||
489 | return -1; | ||
490 | pmc_inuse |= 1 << pmc; | ||
491 | } else { | ||
492 | /* Direct event */ | ||
493 | --pmc; | ||
494 | if (isbus && (byte & 2) && | ||
495 | (psel == 8 || psel == 0x10 || psel == 0x28)) | ||
496 | /* add events on higher-numbered bus */ | ||
497 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
498 | } | ||
499 | if (isbus && unit == PM_GRS) { | ||
500 | bit = psel & 7; | ||
501 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
502 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | ||
503 | } | ||
504 | if (power5p_marked_instr_event(event[i])) | ||
505 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
506 | if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) | ||
507 | /* select alternate byte lane */ | ||
508 | psel |= 0x10; | ||
509 | if (pmc <= 3) | ||
510 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
511 | hwc[i] = pmc; | ||
512 | } | ||
513 | |||
514 | /* Return MMCRx values */ | ||
515 | mmcr[0] = 0; | ||
516 | if (pmc_inuse & 1) | ||
517 | mmcr[0] = MMCR0_PMC1CE; | ||
518 | if (pmc_inuse & 0x3e) | ||
519 | mmcr[0] |= MMCR0_PMCjCE; | ||
520 | mmcr[1] = mmcr1; | ||
521 | mmcr[2] = mmcra; | ||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
526 | { | ||
527 | if (pmc <= 3) | ||
528 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
529 | } | ||
530 | |||
531 | static int power5p_generic_events[] = { | ||
532 | [PERF_COUNT_CPU_CYCLES] = 0xf, | ||
533 | [PERF_COUNT_INSTRUCTIONS] = 0x100009, | ||
534 | [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ | ||
535 | [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
536 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
537 | [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
538 | }; | ||
539 | |||
540 | struct power_pmu power5p_pmu = { | ||
541 | .n_counter = 4, | ||
542 | .max_alternatives = MAX_ALT, | ||
543 | .add_fields = 0x7000000000055ull, | ||
544 | .test_adder = 0x3000040000000ull, | ||
545 | .compute_mmcr = power5p_compute_mmcr, | ||
546 | .get_constraint = power5p_get_constraint, | ||
547 | .get_alternatives = power5p_get_alternatives, | ||
548 | .disable_pmc = power5p_disable_pmc, | ||
549 | .n_generic = ARRAY_SIZE(power5p_generic_events), | ||
550 | .generic_events = power5p_generic_events, | ||
551 | }; | ||
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c new file mode 100644 index 000000000000..116c4bb1809e --- /dev/null +++ b/arch/powerpc/kernel/power5-pmu.c | |||
@@ -0,0 +1,569 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5 (not POWER5++) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER5 (not POWER5++) | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
24 | #define PM_BYTE_MSK 7 | ||
25 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
26 | #define PM_GRS_MSK 7 | ||
27 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* Values in PM_UNIT field */ | ||
31 | #define PM_FPU 0 | ||
32 | #define PM_ISU0 1 | ||
33 | #define PM_IFU 2 | ||
34 | #define PM_ISU1 3 | ||
35 | #define PM_IDU 4 | ||
36 | #define PM_ISU0_ALT 6 | ||
37 | #define PM_GRS 7 | ||
38 | #define PM_LSU0 8 | ||
39 | #define PM_LSU1 0xc | ||
40 | #define PM_LASTUNIT 0xc | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR1 for POWER5 | ||
44 | */ | ||
45 | #define MMCR1_TTM0SEL_SH 62 | ||
46 | #define MMCR1_TTM1SEL_SH 60 | ||
47 | #define MMCR1_TTM2SEL_SH 58 | ||
48 | #define MMCR1_TTM3SEL_SH 56 | ||
49 | #define MMCR1_TTMSEL_MSK 3 | ||
50 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
51 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
52 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
53 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
54 | #define MMCR1_GRS_L2SEL_SH 46 | ||
55 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
56 | #define MMCR1_GRS_L3SEL_SH 44 | ||
57 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
58 | #define MMCR1_GRS_MCSEL_SH 41 | ||
59 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
60 | #define MMCR1_GRS_FABSEL_SH 39 | ||
61 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
62 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
63 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
64 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
65 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
66 | #define MMCR1_PMC1SEL_SH 25 | ||
67 | #define MMCR1_PMC2SEL_SH 17 | ||
68 | #define MMCR1_PMC3SEL_SH 9 | ||
69 | #define MMCR1_PMC4SEL_SH 1 | ||
70 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
71 | #define MMCR1_PMCSEL_MSK 0x7f | ||
72 | |||
73 | /* | ||
74 | * Bits in MMCRA | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Layout of constraint bits: | ||
79 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
80 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
81 | * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> | ||
82 | * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 | ||
83 | * | ||
84 | * T0 - TTM0 constraint | ||
85 | * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 | ||
86 | * | ||
87 | * T1 - TTM1 constraint | ||
88 | * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 | ||
89 | * | ||
90 | * NC - number of counters | ||
91 | * 51: NC error 0x0008_0000_0000_0000 | ||
92 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
93 | * | ||
94 | * G0..G3 - GRS mux constraints | ||
95 | * 46-47: GRS_L2SEL value | ||
96 | * 44-45: GRS_L3SEL value | ||
97 | * 41-44: GRS_MCSEL value | ||
98 | * 39-40: GRS_FABSEL value | ||
99 | * Note that these match up with their bit positions in MMCR1 | ||
100 | * | ||
101 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
102 | * 37: UC3 error 0x20_0000_0000 | ||
103 | * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 | ||
104 | * 35: ISU0 events needed 0x08_0000_0000 | ||
105 | * 34: IDU|GRS events needed 0x04_0000_0000 | ||
106 | * | ||
107 | * PS1 | ||
108 | * 33: PS1 error 0x2_0000_0000 | ||
109 | * 31-32: count of events needing PMC1/2 0x1_8000_0000 | ||
110 | * | ||
111 | * PS2 | ||
112 | * 30: PS2 error 0x4000_0000 | ||
113 | * 28-29: count of events needing PMC3/4 0x3000_0000 | ||
114 | * | ||
115 | * B0 | ||
116 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
117 | * Encoding as for the event code | ||
118 | * | ||
119 | * B1, B2, B3 | ||
120 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
121 | * | ||
122 | * P1..P6 | ||
123 | * 0-11: Count of events needing PMC1..PMC6 | ||
124 | */ | ||
125 | |||
126 | static const int grsel_shift[8] = { | ||
127 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
128 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
129 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
130 | }; | ||
131 | |||
132 | /* Masks and values for using events from the various units */ | ||
133 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
134 | [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, | ||
135 | [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, | ||
136 | [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, | ||
137 | [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, | ||
138 | [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, | ||
139 | [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, | ||
140 | }; | ||
141 | |||
142 | static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp) | ||
143 | { | ||
144 | int pmc, byte, unit, sh; | ||
145 | int bit, fmask; | ||
146 | u64 mask = 0, value = 0; | ||
147 | int grp = -1; | ||
148 | |||
149 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
150 | if (pmc) { | ||
151 | if (pmc > 6) | ||
152 | return -1; | ||
153 | sh = (pmc - 1) * 2; | ||
154 | mask |= 2 << sh; | ||
155 | value |= 1 << sh; | ||
156 | if (pmc <= 4) | ||
157 | grp = (pmc - 1) >> 1; | ||
158 | else if (event != 0x500009 && event != 0x600005) | ||
159 | return -1; | ||
160 | } | ||
161 | if (event & PM_BUSEVENT_MSK) { | ||
162 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
163 | if (unit > PM_LASTUNIT) | ||
164 | return -1; | ||
165 | if (unit == PM_ISU0_ALT) | ||
166 | unit = PM_ISU0; | ||
167 | mask |= unit_cons[unit][0]; | ||
168 | value |= unit_cons[unit][1]; | ||
169 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
170 | if (byte >= 4) { | ||
171 | if (unit != PM_LSU1) | ||
172 | return -1; | ||
173 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
174 | ++unit; | ||
175 | byte &= 3; | ||
176 | } | ||
177 | if (unit == PM_GRS) { | ||
178 | bit = event & 7; | ||
179 | fmask = (bit == 6)? 7: 3; | ||
180 | sh = grsel_shift[bit]; | ||
181 | mask |= (u64)fmask << sh; | ||
182 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | ||
183 | } | ||
184 | /* | ||
185 | * Bus events on bytes 0 and 2 can be counted | ||
186 | * on PMC1/2; bytes 1 and 3 on PMC3/4. | ||
187 | */ | ||
188 | if (!pmc) | ||
189 | grp = byte & 1; | ||
190 | /* Set byte lane select field */ | ||
191 | mask |= 0xfULL << (24 - 4 * byte); | ||
192 | value |= (u64)unit << (24 - 4 * byte); | ||
193 | } | ||
194 | if (grp == 0) { | ||
195 | /* increment PMC1/2 field */ | ||
196 | mask |= 0x200000000ull; | ||
197 | value |= 0x080000000ull; | ||
198 | } else if (grp == 1) { | ||
199 | /* increment PMC3/4 field */ | ||
200 | mask |= 0x40000000ull; | ||
201 | value |= 0x10000000ull; | ||
202 | } | ||
203 | if (pmc < 5) { | ||
204 | /* need a counter from PMC1-4 set */ | ||
205 | mask |= 0x8000000000000ull; | ||
206 | value |= 0x1000000000000ull; | ||
207 | } | ||
208 | *maskp = mask; | ||
209 | *valp = value; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
214 | |||
215 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
216 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
217 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
218 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
219 | { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ | ||
220 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
221 | }; | ||
222 | |||
223 | /* | ||
224 | * Scan the alternatives table for a match and return the | ||
225 | * index into the alternatives table if found, else -1. | ||
226 | */ | ||
227 | static int find_alternative(unsigned int event) | ||
228 | { | ||
229 | int i, j; | ||
230 | |||
231 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
232 | if (event < event_alternatives[i][0]) | ||
233 | break; | ||
234 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
235 | if (event == event_alternatives[i][j]) | ||
236 | return i; | ||
237 | } | ||
238 | return -1; | ||
239 | } | ||
240 | |||
241 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
242 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
243 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
244 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
245 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
246 | }; | ||
247 | |||
248 | /* | ||
249 | * Some direct events for decodes of event bus byte 3 have alternative | ||
250 | * PMCSEL values on other counters. This returns the alternative | ||
251 | * event code for those that do, or -1 otherwise. | ||
252 | */ | ||
253 | static int find_alternative_bdecode(unsigned int event) | ||
254 | { | ||
255 | int pmc, altpmc, pp, j; | ||
256 | |||
257 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
258 | if (pmc == 0 || pmc > 4) | ||
259 | return -1; | ||
260 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
261 | pp = event & PM_PMCSEL_MSK; | ||
262 | for (j = 0; j < 4; ++j) { | ||
263 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
264 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
265 | (altpmc << PM_PMC_SH) | | ||
266 | bytedecode_alternatives[altpmc - 1][j]; | ||
267 | } | ||
268 | } | ||
269 | return -1; | ||
270 | } | ||
271 | |||
272 | static int power5_get_alternatives(unsigned int event, unsigned int alt[]) | ||
273 | { | ||
274 | int i, j, ae, nalt = 1; | ||
275 | |||
276 | alt[0] = event; | ||
277 | nalt = 1; | ||
278 | i = find_alternative(event); | ||
279 | if (i >= 0) { | ||
280 | for (j = 0; j < MAX_ALT; ++j) { | ||
281 | ae = event_alternatives[i][j]; | ||
282 | if (ae && ae != event) | ||
283 | alt[nalt++] = ae; | ||
284 | } | ||
285 | } else { | ||
286 | ae = find_alternative_bdecode(event); | ||
287 | if (ae > 0) | ||
288 | alt[nalt++] = ae; | ||
289 | } | ||
290 | return nalt; | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * Map of which direct events on which PMCs are marked instruction events. | ||
295 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
296 | * Bit 0 is set if it is marked for all PMCs. | ||
297 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
298 | */ | ||
299 | static unsigned char direct_event_is_marked[0x28] = { | ||
300 | 0, /* 00 */ | ||
301 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
302 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
303 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
304 | 0, /* 04 */ | ||
305 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
306 | 0x80, /* 06 */ | ||
307 | 0x80, /* 07 */ | ||
308 | 0, 0, 0,/* 08 - 0a */ | ||
309 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
310 | 0, /* 0c */ | ||
311 | 0x80, /* 0d */ | ||
312 | 0x80, /* 0e */ | ||
313 | 0, /* 0f */ | ||
314 | 0, /* 10 */ | ||
315 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
316 | 0, /* 12 */ | ||
317 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
318 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
319 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
320 | 0x80, /* 16 */ | ||
321 | 0x80, /* 17 */ | ||
322 | 0, 0, 0, 0, 0, | ||
323 | 0x80, /* 1d */ | ||
324 | 0x80, /* 1e */ | ||
325 | 0, /* 1f */ | ||
326 | 0x80, /* 20 */ | ||
327 | 0x80, /* 21 */ | ||
328 | 0x80, /* 22 */ | ||
329 | 0x80, /* 23 */ | ||
330 | 0x80, /* 24 */ | ||
331 | 0x80, /* 25 */ | ||
332 | 0x80, /* 26 */ | ||
333 | 0x80, /* 27 */ | ||
334 | }; | ||
335 | |||
336 | /* | ||
337 | * Returns 1 if event counts things relating to marked instructions | ||
338 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
339 | */ | ||
340 | static int power5_marked_instr_event(unsigned int event) | ||
341 | { | ||
342 | int pmc, psel; | ||
343 | int bit, byte, unit; | ||
344 | u32 mask; | ||
345 | |||
346 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
347 | psel = event & PM_PMCSEL_MSK; | ||
348 | if (pmc >= 5) | ||
349 | return 0; | ||
350 | |||
351 | bit = -1; | ||
352 | if (psel < sizeof(direct_event_is_marked)) { | ||
353 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
354 | return 1; | ||
355 | if (direct_event_is_marked[psel] & 0x80) | ||
356 | bit = 4; | ||
357 | else if (psel == 0x08) | ||
358 | bit = pmc - 1; | ||
359 | else if (psel == 0x10) | ||
360 | bit = 4 - pmc; | ||
361 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
362 | bit = 4; | ||
363 | } else if ((psel & 0x58) == 0x40) | ||
364 | bit = psel & 7; | ||
365 | |||
366 | if (!(event & PM_BUSEVENT_MSK)) | ||
367 | return 0; | ||
368 | |||
369 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
370 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
371 | if (unit == PM_LSU0) { | ||
372 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
373 | mask = 0x5dff00; | ||
374 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
375 | byte -= 4; | ||
376 | /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ | ||
377 | mask = 0x5f00c0aa; | ||
378 | } else | ||
379 | return 0; | ||
380 | |||
381 | return (mask >> (byte * 8 + bit)) & 1; | ||
382 | } | ||
383 | |||
384 | static int power5_compute_mmcr(unsigned int event[], int n_ev, | ||
385 | unsigned int hwc[], u64 mmcr[]) | ||
386 | { | ||
387 | u64 mmcr1 = 0; | ||
388 | u64 mmcra = 0; | ||
389 | unsigned int pmc, unit, byte, psel; | ||
390 | unsigned int ttm, grp; | ||
391 | int i, isbus, bit, grsel; | ||
392 | unsigned int pmc_inuse = 0; | ||
393 | unsigned int pmc_grp_use[2]; | ||
394 | unsigned char busbyte[4]; | ||
395 | unsigned char unituse[16]; | ||
396 | int ttmuse; | ||
397 | |||
398 | if (n_ev > 6) | ||
399 | return -1; | ||
400 | |||
401 | /* First pass to count resource use */ | ||
402 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
403 | memset(busbyte, 0, sizeof(busbyte)); | ||
404 | memset(unituse, 0, sizeof(unituse)); | ||
405 | for (i = 0; i < n_ev; ++i) { | ||
406 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
407 | if (pmc) { | ||
408 | if (pmc > 6) | ||
409 | return -1; | ||
410 | if (pmc_inuse & (1 << (pmc - 1))) | ||
411 | return -1; | ||
412 | pmc_inuse |= 1 << (pmc - 1); | ||
413 | /* count 1/2 vs 3/4 use */ | ||
414 | if (pmc <= 4) | ||
415 | ++pmc_grp_use[(pmc - 1) >> 1]; | ||
416 | } | ||
417 | if (event[i] & PM_BUSEVENT_MSK) { | ||
418 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
419 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
420 | if (unit > PM_LASTUNIT) | ||
421 | return -1; | ||
422 | if (unit == PM_ISU0_ALT) | ||
423 | unit = PM_ISU0; | ||
424 | if (byte >= 4) { | ||
425 | if (unit != PM_LSU1) | ||
426 | return -1; | ||
427 | ++unit; | ||
428 | byte &= 3; | ||
429 | } | ||
430 | if (!pmc) | ||
431 | ++pmc_grp_use[byte & 1]; | ||
432 | if (busbyte[byte] && busbyte[byte] != unit) | ||
433 | return -1; | ||
434 | busbyte[byte] = unit; | ||
435 | unituse[unit] = 1; | ||
436 | } | ||
437 | } | ||
438 | if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) | ||
439 | return -1; | ||
440 | |||
441 | /* | ||
442 | * Assign resources and set multiplexer selects. | ||
443 | * | ||
444 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
445 | * choice we have to deal with. | ||
446 | */ | ||
447 | if (unituse[PM_ISU0] & | ||
448 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
449 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
450 | unituse[PM_ISU0] = 0; | ||
451 | } | ||
452 | /* Set TTM[01]SEL fields. */ | ||
453 | ttmuse = 0; | ||
454 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
455 | if (!unituse[i]) | ||
456 | continue; | ||
457 | if (ttmuse++) | ||
458 | return -1; | ||
459 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | ||
460 | } | ||
461 | ttmuse = 0; | ||
462 | for (; i <= PM_GRS; ++i) { | ||
463 | if (!unituse[i]) | ||
464 | continue; | ||
465 | if (ttmuse++) | ||
466 | return -1; | ||
467 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | ||
468 | } | ||
469 | if (ttmuse > 1) | ||
470 | return -1; | ||
471 | |||
472 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
473 | for (byte = 0; byte < 4; ++byte) { | ||
474 | unit = busbyte[byte]; | ||
475 | if (!unit) | ||
476 | continue; | ||
477 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
478 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
479 | unit = PM_ISU0_ALT; | ||
480 | } else if (unit == PM_LSU1 + 1) { | ||
481 | /* select lower word of LSU1 for this byte */ | ||
482 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
483 | } | ||
484 | ttm = unit >> 2; | ||
485 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
486 | } | ||
487 | |||
488 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
489 | for (i = 0; i < n_ev; ++i) { | ||
490 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
491 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
492 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
493 | psel = event[i] & PM_PMCSEL_MSK; | ||
494 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
495 | if (!pmc) { | ||
496 | /* Bus event or any-PMC direct event */ | ||
497 | for (pmc = 0; pmc < 4; ++pmc) { | ||
498 | if (pmc_inuse & (1 << pmc)) | ||
499 | continue; | ||
500 | grp = (pmc >> 1) & 1; | ||
501 | if (isbus) { | ||
502 | if (grp == (byte & 1)) | ||
503 | break; | ||
504 | } else if (pmc_grp_use[grp] < 2) { | ||
505 | ++pmc_grp_use[grp]; | ||
506 | break; | ||
507 | } | ||
508 | } | ||
509 | pmc_inuse |= 1 << pmc; | ||
510 | } else if (pmc <= 4) { | ||
511 | /* Direct event */ | ||
512 | --pmc; | ||
513 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | ||
514 | /* add events on higher-numbered bus */ | ||
515 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
516 | } else { | ||
517 | /* Instructions or run cycles on PMC5/6 */ | ||
518 | --pmc; | ||
519 | } | ||
520 | if (isbus && unit == PM_GRS) { | ||
521 | bit = psel & 7; | ||
522 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
523 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | ||
524 | } | ||
525 | if (power5_marked_instr_event(event[i])) | ||
526 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
527 | if (pmc <= 3) | ||
528 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
529 | hwc[i] = pmc; | ||
530 | } | ||
531 | |||
532 | /* Return MMCRx values */ | ||
533 | mmcr[0] = 0; | ||
534 | if (pmc_inuse & 1) | ||
535 | mmcr[0] = MMCR0_PMC1CE; | ||
536 | if (pmc_inuse & 0x3e) | ||
537 | mmcr[0] |= MMCR0_PMCjCE; | ||
538 | mmcr[1] = mmcr1; | ||
539 | mmcr[2] = mmcra; | ||
540 | return 0; | ||
541 | } | ||
542 | |||
543 | static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
544 | { | ||
545 | if (pmc <= 3) | ||
546 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
547 | } | ||
548 | |||
549 | static int power5_generic_events[] = { | ||
550 | [PERF_COUNT_CPU_CYCLES] = 0xf, | ||
551 | [PERF_COUNT_INSTRUCTIONS] = 0x100009, | ||
552 | [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ | ||
553 | [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
554 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
555 | [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
556 | }; | ||
557 | |||
558 | struct power_pmu power5_pmu = { | ||
559 | .n_counter = 6, | ||
560 | .max_alternatives = MAX_ALT, | ||
561 | .add_fields = 0x7000090000555ull, | ||
562 | .test_adder = 0x3000490000000ull, | ||
563 | .compute_mmcr = power5_compute_mmcr, | ||
564 | .get_constraint = power5_get_constraint, | ||
565 | .get_alternatives = power5_get_alternatives, | ||
566 | .disable_pmc = power5_disable_pmc, | ||
567 | .n_generic = ARRAY_SIZE(power5_generic_events), | ||
568 | .generic_events = power5_generic_events, | ||
569 | }; | ||
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c new file mode 100644 index 000000000000..fce1fc290a1d --- /dev/null +++ b/arch/powerpc/kernel/power6-pmu.c | |||
@@ -0,0 +1,407 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER6 processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER6 | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0x7 | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) | ||
24 | #define PM_LLAV 0x8000 /* Load lookahead match value */ | ||
25 | #define PM_LLA 0x4000 /* Load lookahead match enable */ | ||
26 | #define PM_BYTE_SH 12 /* Byte of event bus to use */ | ||
27 | #define PM_BYTE_MSK 3 | ||
28 | #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ | ||
29 | #define PM_SUBUNIT_MSK 7 | ||
30 | #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) | ||
31 | #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ | ||
32 | #define PM_BUSEVENT_MSK 0xf3700 | ||
33 | |||
34 | /* | ||
35 | * Bits in MMCR1 for POWER6 | ||
36 | */ | ||
37 | #define MMCR1_TTM0SEL_SH 60 | ||
38 | #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) | ||
39 | #define MMCR1_TTMSEL_MSK 0xf | ||
40 | #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) | ||
41 | #define MMCR1_NESTSEL_SH 45 | ||
42 | #define MMCR1_NESTSEL_MSK 0x7 | ||
43 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | ||
44 | #define MMCR1_PMC1_LLA ((u64)1 << 44) | ||
45 | #define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) | ||
46 | #define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) | ||
47 | #define MMCR1_PMC1SEL_SH 24 | ||
48 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
49 | #define MMCR1_PMCSEL_MSK 0xff | ||
50 | |||
51 | /* | ||
52 | * Map of which direct events on which PMCs are marked instruction events. | ||
53 | * Indexed by PMCSEL value >> 1. | ||
54 | * Bottom 4 bits are a map of which PMCs are interesting, | ||
55 | * top 4 bits say what sort of event: | ||
56 | * 0 = direct marked event, | ||
57 | * 1 = byte decode event, | ||
58 | * 4 = add/and event (PMC1 -> bits 0 & 4), | ||
59 | * 5 = add/and event (PMC1 -> bits 1 & 5), | ||
60 | * 6 = add/and event (PMC1 -> bits 2 & 6), | ||
61 | * 7 = add/and event (PMC1 -> bits 3 & 7). | ||
62 | */ | ||
63 | static unsigned char direct_event_is_marked[0x60 >> 1] = { | ||
64 | 0, /* 00 */ | ||
65 | 0, /* 02 */ | ||
66 | 0, /* 04 */ | ||
67 | 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
68 | 0x04, /* 08 PM_MRK_DFU_FIN */ | ||
69 | 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ | ||
70 | 0, /* 0c */ | ||
71 | 0, /* 0e */ | ||
72 | 0x02, /* 10 PM_MRK_INST_DISP */ | ||
73 | 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ | ||
74 | 0, /* 14 */ | ||
75 | 0, /* 16 */ | ||
76 | 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ | ||
77 | 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
78 | 0x01, /* 1c PM_MRK_INST_ISSUED */ | ||
79 | 0, /* 1e */ | ||
80 | 0, /* 20 */ | ||
81 | 0, /* 22 */ | ||
82 | 0, /* 24 */ | ||
83 | 0, /* 26 */ | ||
84 | 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ | ||
85 | 0, /* 2a */ | ||
86 | 0, /* 2c */ | ||
87 | 0, /* 2e */ | ||
88 | 0x4f, /* 30 */ | ||
89 | 0x7f, /* 32 */ | ||
90 | 0x4f, /* 34 */ | ||
91 | 0x5f, /* 36 */ | ||
92 | 0x6f, /* 38 */ | ||
93 | 0x4f, /* 3a */ | ||
94 | 0, /* 3c */ | ||
95 | 0x08, /* 3e PM_MRK_INST_TIMEO */ | ||
96 | 0x1f, /* 40 */ | ||
97 | 0x1f, /* 42 */ | ||
98 | 0x1f, /* 44 */ | ||
99 | 0x1f, /* 46 */ | ||
100 | 0x1f, /* 48 */ | ||
101 | 0x1f, /* 4a */ | ||
102 | 0x1f, /* 4c */ | ||
103 | 0x1f, /* 4e */ | ||
104 | 0, /* 50 */ | ||
105 | 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ | ||
106 | 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ | ||
107 | 0x02, /* 56 PM_MRK_LD_MISS_L1 */ | ||
108 | 0, /* 58 */ | ||
109 | 0, /* 5a */ | ||
110 | 0, /* 5c */ | ||
111 | 0, /* 5e */ | ||
112 | }; | ||
113 | |||
114 | /* | ||
115 | * Masks showing for each unit which bits are marked events. | ||
116 | * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. | ||
117 | */ | ||
118 | static u32 marked_bus_events[16] = { | ||
119 | 0x01000000, /* direct events set 1: byte 3 bit 0 */ | ||
120 | 0x00010000, /* direct events set 2: byte 2 bit 0 */ | ||
121 | 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ | ||
122 | 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ | ||
123 | 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ | ||
124 | 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ | ||
125 | 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ | ||
126 | 0, /* LSU set 3 */ | ||
127 | 0x00000010, /* VMX set 3: byte 0 bit 4 */ | ||
128 | 0, /* BFP set 1 */ | ||
129 | 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ | ||
130 | 0, 0 | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Returns 1 if event counts things relating to marked instructions | ||
135 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
136 | */ | ||
137 | static int power6_marked_instr_event(unsigned int event) | ||
138 | { | ||
139 | int pmc, psel, ptype; | ||
140 | int bit, byte, unit; | ||
141 | u32 mask; | ||
142 | |||
143 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
144 | psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ | ||
145 | if (pmc >= 5) | ||
146 | return 0; | ||
147 | |||
148 | bit = -1; | ||
149 | if (psel < sizeof(direct_event_is_marked)) { | ||
150 | ptype = direct_event_is_marked[psel]; | ||
151 | if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) | ||
152 | return 0; | ||
153 | ptype >>= 4; | ||
154 | if (ptype == 0) | ||
155 | return 1; | ||
156 | if (ptype == 1) | ||
157 | bit = 0; | ||
158 | else | ||
159 | bit = ptype ^ (pmc - 1); | ||
160 | } else if ((psel & 0x48) == 0x40) | ||
161 | bit = psel & 7; | ||
162 | |||
163 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
164 | return 0; | ||
165 | |||
166 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
167 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
168 | mask = marked_bus_events[unit]; | ||
169 | return (mask >> (byte * 8 + bit)) & 1; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Assign PMC numbers and compute MMCR1 value for a set of events | ||
174 | */ | ||
175 | static int p6_compute_mmcr(unsigned int event[], int n_ev, | ||
176 | unsigned int hwc[], u64 mmcr[]) | ||
177 | { | ||
178 | u64 mmcr1 = 0; | ||
179 | u64 mmcra = 0; | ||
180 | int i; | ||
181 | unsigned int pmc, ev, b, u, s, psel; | ||
182 | unsigned int ttmset = 0; | ||
183 | unsigned int pmc_inuse = 0; | ||
184 | |||
185 | if (n_ev > 4) | ||
186 | return -1; | ||
187 | for (i = 0; i < n_ev; ++i) { | ||
188 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
189 | if (pmc) { | ||
190 | if (pmc_inuse & (1 << (pmc - 1))) | ||
191 | return -1; /* collision! */ | ||
192 | pmc_inuse |= 1 << (pmc - 1); | ||
193 | } | ||
194 | } | ||
195 | for (i = 0; i < n_ev; ++i) { | ||
196 | ev = event[i]; | ||
197 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
198 | if (pmc) { | ||
199 | --pmc; | ||
200 | } else { | ||
201 | /* can go on any PMC; find a free one */ | ||
202 | for (pmc = 0; pmc < 4; ++pmc) | ||
203 | if (!(pmc_inuse & (1 << pmc))) | ||
204 | break; | ||
205 | pmc_inuse |= 1 << pmc; | ||
206 | } | ||
207 | hwc[i] = pmc; | ||
208 | psel = ev & PM_PMCSEL_MSK; | ||
209 | if (ev & PM_BUSEVENT_MSK) { | ||
210 | /* this event uses the event bus */ | ||
211 | b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
212 | u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
213 | /* check for conflict on this byte of event bus */ | ||
214 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | ||
215 | return -1; | ||
216 | mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); | ||
217 | ttmset |= 1 << b; | ||
218 | if (u == 5) { | ||
219 | /* Nest events have a further mux */ | ||
220 | s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
221 | if ((ttmset & 0x10) && | ||
222 | MMCR1_NESTSEL(mmcr1) != s) | ||
223 | return -1; | ||
224 | ttmset |= 0x10; | ||
225 | mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; | ||
226 | } | ||
227 | if (0x30 <= psel && psel <= 0x3d) { | ||
228 | /* these need the PMCx_ADDR_SEL bits */ | ||
229 | if (b >= 2) | ||
230 | mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; | ||
231 | } | ||
232 | /* bus select values are different for PMC3/4 */ | ||
233 | if (pmc >= 2 && (psel & 0x90) == 0x80) | ||
234 | psel ^= 0x20; | ||
235 | } | ||
236 | if (ev & PM_LLA) { | ||
237 | mmcr1 |= MMCR1_PMC1_LLA >> pmc; | ||
238 | if (ev & PM_LLAV) | ||
239 | mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; | ||
240 | } | ||
241 | if (power6_marked_instr_event(event[i])) | ||
242 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
243 | mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); | ||
244 | } | ||
245 | mmcr[0] = 0; | ||
246 | if (pmc_inuse & 1) | ||
247 | mmcr[0] = MMCR0_PMC1CE; | ||
248 | if (pmc_inuse & 0xe) | ||
249 | mmcr[0] |= MMCR0_PMCjCE; | ||
250 | mmcr[1] = mmcr1; | ||
251 | mmcr[2] = mmcra; | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Layout of constraint bits: | ||
257 | * | ||
258 | * 0-1 add field: number of uses of PMC1 (max 1) | ||
259 | * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 | ||
260 | * 8-10 select field: nest (subunit) event selector | ||
261 | * 16-19 select field: unit on byte 0 of event bus | ||
262 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | ||
263 | */ | ||
264 | static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) | ||
265 | { | ||
266 | int pmc, byte, sh; | ||
267 | unsigned int mask = 0, value = 0; | ||
268 | |||
269 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
270 | if (pmc) { | ||
271 | if (pmc > 4) | ||
272 | return -1; | ||
273 | sh = (pmc - 1) * 2; | ||
274 | mask |= 2 << sh; | ||
275 | value |= 1 << sh; | ||
276 | } | ||
277 | if (event & PM_BUSEVENT_MSK) { | ||
278 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
279 | sh = byte * 4; | ||
280 | mask |= PM_UNIT_MSKS << sh; | ||
281 | value |= (event & PM_UNIT_MSKS) << sh; | ||
282 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | ||
283 | mask |= PM_SUBUNIT_MSKS; | ||
284 | value |= event & PM_SUBUNIT_MSKS; | ||
285 | } | ||
286 | } | ||
287 | *maskp = mask; | ||
288 | *valp = value; | ||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | #define MAX_ALT 4 /* at most 4 alternatives for any event */ | ||
293 | |||
294 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
295 | { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ | ||
296 | { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ | ||
297 | { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ | ||
298 | { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ | ||
299 | { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ | ||
300 | { 0x10000e, 0x400010 }, /* PM_PURR */ | ||
301 | { 0x100010, 0x4000f8 }, /* PM_FLUSH */ | ||
302 | { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ | ||
303 | { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ | ||
304 | { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ | ||
305 | { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ | ||
306 | { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ | ||
307 | { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ | ||
308 | { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ | ||
309 | { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ | ||
310 | { 0x200012, 0x300012 }, /* PM_INST_DISP */ | ||
311 | { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ | ||
312 | { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ | ||
313 | { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ | ||
314 | { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ | ||
315 | { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ | ||
316 | { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ | ||
317 | { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ | ||
318 | }; | ||
319 | |||
320 | /* | ||
321 | * This could be made more efficient with a binary search on | ||
322 | * a presorted list, if necessary | ||
323 | */ | ||
324 | static int find_alternatives_list(unsigned int event) | ||
325 | { | ||
326 | int i, j; | ||
327 | unsigned int alt; | ||
328 | |||
329 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
330 | if (event < event_alternatives[i][0]) | ||
331 | return -1; | ||
332 | for (j = 0; j < MAX_ALT; ++j) { | ||
333 | alt = event_alternatives[i][j]; | ||
334 | if (!alt || event < alt) | ||
335 | break; | ||
336 | if (event == alt) | ||
337 | return i; | ||
338 | } | ||
339 | } | ||
340 | return -1; | ||
341 | } | ||
342 | |||
343 | static int p6_get_alternatives(unsigned int event, unsigned int alt[]) | ||
344 | { | ||
345 | int i, j; | ||
346 | unsigned int aevent, psel, pmc; | ||
347 | unsigned int nalt = 1; | ||
348 | |||
349 | alt[0] = event; | ||
350 | |||
351 | /* check the alternatives table */ | ||
352 | i = find_alternatives_list(event); | ||
353 | if (i >= 0) { | ||
354 | /* copy out alternatives from list */ | ||
355 | for (j = 0; j < MAX_ALT; ++j) { | ||
356 | aevent = event_alternatives[i][j]; | ||
357 | if (!aevent) | ||
358 | break; | ||
359 | if (aevent != event) | ||
360 | alt[nalt++] = aevent; | ||
361 | } | ||
362 | |||
363 | } else { | ||
364 | /* Check for alternative ways of computing sum events */ | ||
365 | /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ | ||
366 | psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ | ||
367 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
368 | if (pmc && (psel == 0x32 || psel == 0x34)) | ||
369 | alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | | ||
370 | ((5 - pmc) << PM_PMC_SH); | ||
371 | |||
372 | /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ | ||
373 | if (pmc && (psel == 0x38 || psel == 0x3a)) | ||
374 | alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | | ||
375 | ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); | ||
376 | } | ||
377 | |||
378 | return nalt; | ||
379 | } | ||
380 | |||
381 | static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
382 | { | ||
383 | /* Set PMCxSEL to 0 to disable PMCx */ | ||
384 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
385 | } | ||
386 | |||
387 | static int power6_generic_events[] = { | ||
388 | [PERF_COUNT_CPU_CYCLES] = 0x1e, | ||
389 | [PERF_COUNT_INSTRUCTIONS] = 2, | ||
390 | [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ | ||
391 | [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ | ||
392 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ | ||
393 | [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ | ||
394 | }; | ||
395 | |||
396 | struct power_pmu power6_pmu = { | ||
397 | .n_counter = 4, | ||
398 | .max_alternatives = MAX_ALT, | ||
399 | .add_fields = 0x55, | ||
400 | .test_adder = 0, | ||
401 | .compute_mmcr = p6_compute_mmcr, | ||
402 | .get_constraint = p6_get_constraint, | ||
403 | .get_alternatives = p6_get_alternatives, | ||
404 | .disable_pmc = p6_disable_pmc, | ||
405 | .n_generic = ARRAY_SIZE(power6_generic_events), | ||
406 | .generic_events = power6_generic_events, | ||
407 | }; | ||
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c new file mode 100644 index 000000000000..aed8ccd7c077 --- /dev/null +++ b/arch/powerpc/kernel/ppc970-pmu.c | |||
@@ -0,0 +1,441 @@ | |||
1 | /* | ||
2 | * Performance counter support for PPC970-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for PPC970 | ||
17 | */ | ||
18 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
21 | #define PM_UNIT_MSK 0xf | ||
22 | #define PM_SPCSEL_SH 6 | ||
23 | #define PM_SPCSEL_MSK 3 | ||
24 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
25 | #define PM_BYTE_MSK 3 | ||
26 | #define PM_PMCSEL_MSK 0xf | ||
27 | |||
28 | /* Values in PM_UNIT field */ | ||
29 | #define PM_NONE 0 | ||
30 | #define PM_FPU 1 | ||
31 | #define PM_VPU 2 | ||
32 | #define PM_ISU 3 | ||
33 | #define PM_IFU 4 | ||
34 | #define PM_IDU 5 | ||
35 | #define PM_STS 6 | ||
36 | #define PM_LSU0 7 | ||
37 | #define PM_LSU1U 8 | ||
38 | #define PM_LSU1L 9 | ||
39 | #define PM_LASTUNIT 9 | ||
40 | |||
41 | /* | ||
42 | * Bits in MMCR0 for PPC970 | ||
43 | */ | ||
44 | #define MMCR0_PMC1SEL_SH 8 | ||
45 | #define MMCR0_PMC2SEL_SH 1 | ||
46 | #define MMCR_PMCSEL_MSK 0x1f | ||
47 | |||
48 | /* | ||
49 | * Bits in MMCR1 for PPC970 | ||
50 | */ | ||
51 | #define MMCR1_TTM0SEL_SH 62 | ||
52 | #define MMCR1_TTM1SEL_SH 59 | ||
53 | #define MMCR1_TTM3SEL_SH 53 | ||
54 | #define MMCR1_TTMSEL_MSK 3 | ||
55 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
56 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
57 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
58 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
59 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
60 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
61 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
62 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
63 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
64 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
65 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
66 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
67 | #define MMCR1_PMC3SEL_SH 27 | ||
68 | #define MMCR1_PMC4SEL_SH 22 | ||
69 | #define MMCR1_PMC5SEL_SH 17 | ||
70 | #define MMCR1_PMC6SEL_SH 12 | ||
71 | #define MMCR1_PMC7SEL_SH 7 | ||
72 | #define MMCR1_PMC8SEL_SH 2 | ||
73 | |||
74 | static short mmcr1_adder_bits[8] = { | ||
75 | MMCR1_PMC1_ADDER_SEL_SH, | ||
76 | MMCR1_PMC2_ADDER_SEL_SH, | ||
77 | MMCR1_PMC3_ADDER_SEL_SH, | ||
78 | MMCR1_PMC4_ADDER_SEL_SH, | ||
79 | MMCR1_PMC5_ADDER_SEL_SH, | ||
80 | MMCR1_PMC6_ADDER_SEL_SH, | ||
81 | MMCR1_PMC7_ADDER_SEL_SH, | ||
82 | MMCR1_PMC8_ADDER_SEL_SH | ||
83 | }; | ||
84 | |||
85 | /* | ||
86 | * Bits in MMCRA | ||
87 | */ | ||
88 | |||
89 | /* | ||
90 | * Layout of constraint bits: | ||
91 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
92 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
93 | * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> | ||
94 | * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
95 | * | ||
96 | * SP - SPCSEL constraint | ||
97 | * 48-49: SPCSEL value 0x3_0000_0000_0000 | ||
98 | * | ||
99 | * T0 - TTM0 constraint | ||
100 | * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 | ||
101 | * | ||
102 | * T1 - TTM1 constraint | ||
103 | * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 | ||
104 | * | ||
105 | * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS | ||
106 | * 43: UC3 error 0x0800_0000_0000 | ||
107 | * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 | ||
108 | * 41: ISU events needed 0x0200_0000_0000 | ||
109 | * 40: IDU|STS events needed 0x0100_0000_0000 | ||
110 | * | ||
111 | * PS1 | ||
112 | * 39: PS1 error 0x0080_0000_0000 | ||
113 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
114 | * | ||
115 | * PS2 | ||
116 | * 35: PS2 error 0x0008_0000_0000 | ||
117 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
118 | * | ||
119 | * B0 | ||
120 | * 28-31: Byte 0 event source 0xf000_0000 | ||
121 | * Encoding as for the event code | ||
122 | * | ||
123 | * B1, B2, B3 | ||
124 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
125 | * | ||
126 | * P1 | ||
127 | * 15: P1 error 0x8000 | ||
128 | * 14-15: Count of events needing PMC1 | ||
129 | * | ||
130 | * P2..P8 | ||
131 | * 0-13: Count of events needing PMC2..PMC8 | ||
132 | */ | ||
133 | |||
134 | static unsigned char direct_marked_event[8] = { | ||
135 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
136 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
137 | (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ | ||
138 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
139 | (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ | ||
140 | (1<<3) | (1<<4) | (1<<5), | ||
141 | /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
142 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
143 | (1<<4) /* PMC8: PM_MRK_LSU_FIN */ | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * Returns 1 if event counts things relating to marked instructions | ||
148 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
149 | */ | ||
150 | static int p970_marked_instr_event(unsigned int event) | ||
151 | { | ||
152 | int pmc, psel, unit, byte, bit; | ||
153 | unsigned int mask; | ||
154 | |||
155 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
156 | psel = event & PM_PMCSEL_MSK; | ||
157 | if (pmc) { | ||
158 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
159 | return 1; | ||
160 | if (psel == 0) /* add events */ | ||
161 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
162 | else if (psel == 7 || psel == 13) /* decode events */ | ||
163 | bit = 4; | ||
164 | else | ||
165 | return 0; | ||
166 | } else | ||
167 | bit = psel; | ||
168 | |||
169 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
170 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
171 | mask = 0; | ||
172 | switch (unit) { | ||
173 | case PM_VPU: | ||
174 | mask = 0x4c; /* byte 0 bits 2,3,6 */ | ||
175 | case PM_LSU0: | ||
176 | /* byte 2 bits 0,2,3,4,6; all of byte 1 */ | ||
177 | mask = 0x085dff00; | ||
178 | case PM_LSU1L: | ||
179 | mask = 0x50 << 24; /* byte 3 bits 4,6 */ | ||
180 | break; | ||
181 | } | ||
182 | return (mask >> (byte * 8 + bit)) & 1; | ||
183 | } | ||
184 | |||
185 | /* Masks and values for using events from the various units */ | ||
186 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
187 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | ||
188 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | ||
189 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | ||
190 | [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, | ||
191 | [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, | ||
192 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | ||
193 | }; | ||
194 | |||
195 | static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) | ||
196 | { | ||
197 | int pmc, byte, unit, sh, spcsel; | ||
198 | u64 mask = 0, value = 0; | ||
199 | int grp = -1; | ||
200 | |||
201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
202 | if (pmc) { | ||
203 | if (pmc > 8) | ||
204 | return -1; | ||
205 | sh = (pmc - 1) * 2; | ||
206 | mask |= 2 << sh; | ||
207 | value |= 1 << sh; | ||
208 | grp = ((pmc - 1) >> 1) & 1; | ||
209 | } | ||
210 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
211 | if (unit) { | ||
212 | if (unit > PM_LASTUNIT) | ||
213 | return -1; | ||
214 | mask |= unit_cons[unit][0]; | ||
215 | value |= unit_cons[unit][1]; | ||
216 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
217 | /* | ||
218 | * Bus events on bytes 0 and 2 can be counted | ||
219 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
220 | */ | ||
221 | if (!pmc) | ||
222 | grp = byte & 1; | ||
223 | /* Set byte lane select field */ | ||
224 | mask |= 0xfULL << (28 - 4 * byte); | ||
225 | value |= (u64)unit << (28 - 4 * byte); | ||
226 | } | ||
227 | if (grp == 0) { | ||
228 | /* increment PMC1/2/5/6 field */ | ||
229 | mask |= 0x8000000000ull; | ||
230 | value |= 0x1000000000ull; | ||
231 | } else if (grp == 1) { | ||
232 | /* increment PMC3/4/7/8 field */ | ||
233 | mask |= 0x800000000ull; | ||
234 | value |= 0x100000000ull; | ||
235 | } | ||
236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
237 | if (spcsel) { | ||
238 | mask |= 3ull << 48; | ||
239 | value |= (u64)spcsel << 48; | ||
240 | } | ||
241 | *maskp = mask; | ||
242 | *valp = value; | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int p970_get_alternatives(unsigned int event, unsigned int alt[]) | ||
247 | { | ||
248 | alt[0] = event; | ||
249 | |||
250 | /* 2 alternatives for LSU empty */ | ||
251 | if (event == 0x2002 || event == 0x3002) { | ||
252 | alt[1] = event ^ 0x1000; | ||
253 | return 2; | ||
254 | } | ||
255 | |||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static int p970_compute_mmcr(unsigned int event[], int n_ev, | ||
260 | unsigned int hwc[], u64 mmcr[]) | ||
261 | { | ||
262 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
263 | unsigned int pmc, unit, byte, psel; | ||
264 | unsigned int ttm, grp; | ||
265 | unsigned int pmc_inuse = 0; | ||
266 | unsigned int pmc_grp_use[2]; | ||
267 | unsigned char busbyte[4]; | ||
268 | unsigned char unituse[16]; | ||
269 | unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; | ||
270 | unsigned char ttmuse[2]; | ||
271 | unsigned char pmcsel[8]; | ||
272 | int i; | ||
273 | int spcsel; | ||
274 | |||
275 | if (n_ev > 8) | ||
276 | return -1; | ||
277 | |||
278 | /* First pass to count resource use */ | ||
279 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
280 | memset(busbyte, 0, sizeof(busbyte)); | ||
281 | memset(unituse, 0, sizeof(unituse)); | ||
282 | for (i = 0; i < n_ev; ++i) { | ||
283 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
284 | if (pmc) { | ||
285 | if (pmc_inuse & (1 << (pmc - 1))) | ||
286 | return -1; | ||
287 | pmc_inuse |= 1 << (pmc - 1); | ||
288 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
289 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
290 | } | ||
291 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
292 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
293 | if (unit) { | ||
294 | if (unit > PM_LASTUNIT) | ||
295 | return -1; | ||
296 | if (!pmc) | ||
297 | ++pmc_grp_use[byte & 1]; | ||
298 | if (busbyte[byte] && busbyte[byte] != unit) | ||
299 | return -1; | ||
300 | busbyte[byte] = unit; | ||
301 | unituse[unit] = 1; | ||
302 | } | ||
303 | } | ||
304 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
305 | return -1; | ||
306 | |||
307 | /* | ||
308 | * Assign resources and set multiplexer selects. | ||
309 | * | ||
310 | * PM_ISU can go either on TTM0 or TTM1, but that's the only | ||
311 | * choice we have to deal with. | ||
312 | */ | ||
313 | if (unituse[PM_ISU] & | ||
314 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) | ||
315 | unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ | ||
316 | /* Set TTM[01]SEL fields. */ | ||
317 | ttmuse[0] = ttmuse[1] = 0; | ||
318 | for (i = PM_FPU; i <= PM_STS; ++i) { | ||
319 | if (!unituse[i]) | ||
320 | continue; | ||
321 | ttm = unitmap[i]; | ||
322 | ++ttmuse[(ttm >> 2) & 1]; | ||
323 | mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; | ||
324 | } | ||
325 | /* Check only one unit per TTMx */ | ||
326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | ||
327 | return -1; | ||
328 | |||
329 | /* Set byte lane select fields and TTM3SEL. */ | ||
330 | for (byte = 0; byte < 4; ++byte) { | ||
331 | unit = busbyte[byte]; | ||
332 | if (!unit) | ||
333 | continue; | ||
334 | if (unit <= PM_STS) | ||
335 | ttm = (unitmap[unit] >> 2) & 1; | ||
336 | else if (unit == PM_LSU0) | ||
337 | ttm = 2; | ||
338 | else { | ||
339 | ttm = 3; | ||
340 | if (unit == PM_LSU1L && byte >= 2) | ||
341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
342 | } | ||
343 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
344 | } | ||
345 | |||
346 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
347 | memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ | ||
348 | for (i = 0; i < n_ev; ++i) { | ||
349 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
350 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
351 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
352 | psel = event[i] & PM_PMCSEL_MSK; | ||
353 | if (!pmc) { | ||
354 | /* Bus event or any-PMC direct event */ | ||
355 | if (unit) | ||
356 | psel |= 0x10 | ((byte & 2) << 2); | ||
357 | else | ||
358 | psel |= 8; | ||
359 | for (pmc = 0; pmc < 8; ++pmc) { | ||
360 | if (pmc_inuse & (1 << pmc)) | ||
361 | continue; | ||
362 | grp = (pmc >> 1) & 1; | ||
363 | if (unit) { | ||
364 | if (grp == (byte & 1)) | ||
365 | break; | ||
366 | } else if (pmc_grp_use[grp] < 4) { | ||
367 | ++pmc_grp_use[grp]; | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | pmc_inuse |= 1 << pmc; | ||
372 | } else { | ||
373 | /* Direct event */ | ||
374 | --pmc; | ||
375 | if (psel == 0 && (byte & 2)) | ||
376 | /* add events on higher-numbered bus */ | ||
377 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
378 | } | ||
379 | pmcsel[pmc] = psel; | ||
380 | hwc[i] = pmc; | ||
381 | spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
382 | mmcr1 |= spcsel; | ||
383 | if (p970_marked_instr_event(event[i])) | ||
384 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
385 | } | ||
386 | for (pmc = 0; pmc < 2; ++pmc) | ||
387 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
388 | for (; pmc < 8; ++pmc) | ||
389 | mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
390 | if (pmc_inuse & 1) | ||
391 | mmcr0 |= MMCR0_PMC1CE; | ||
392 | if (pmc_inuse & 0xfe) | ||
393 | mmcr0 |= MMCR0_PMCjCE; | ||
394 | |||
395 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
396 | |||
397 | /* Return MMCRx values */ | ||
398 | mmcr[0] = mmcr0; | ||
399 | mmcr[1] = mmcr1; | ||
400 | mmcr[2] = mmcra; | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
405 | { | ||
406 | int shift, i; | ||
407 | |||
408 | if (pmc <= 1) { | ||
409 | shift = MMCR0_PMC1SEL_SH - 7 * pmc; | ||
410 | i = 0; | ||
411 | } else { | ||
412 | shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); | ||
413 | i = 1; | ||
414 | } | ||
415 | /* | ||
416 | * Setting the PMCxSEL field to 0x08 disables PMC x. | ||
417 | */ | ||
418 | mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); | ||
419 | } | ||
420 | |||
421 | static int ppc970_generic_events[] = { | ||
422 | [PERF_COUNT_CPU_CYCLES] = 7, | ||
423 | [PERF_COUNT_INSTRUCTIONS] = 1, | ||
424 | [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ | ||
425 | [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ | ||
426 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ | ||
427 | [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ | ||
428 | }; | ||
429 | |||
430 | struct power_pmu ppc970_pmu = { | ||
431 | .n_counter = 8, | ||
432 | .max_alternatives = 2, | ||
433 | .add_fields = 0x001100005555ull, | ||
434 | .test_adder = 0x013300000000ull, | ||
435 | .compute_mmcr = p970_compute_mmcr, | ||
436 | .get_constraint = p970_get_constraint, | ||
437 | .get_alternatives = p970_get_alternatives, | ||
438 | .disable_pmc = p970_disable_pmc, | ||
439 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | ||
440 | .generic_events = ppc970_generic_events, | ||
441 | }; | ||
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 76993941cac9..ac0e112031b2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/kprobes.h> | 30 | #include <linux/kprobes.h> |
31 | #include <linux/kdebug.h> | 31 | #include <linux/kdebug.h> |
32 | #include <linux/perf_counter.h> | ||
32 | 33 | ||
33 | #include <asm/firmware.h> | 34 | #include <asm/firmware.h> |
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
170 | die("Weird page fault", regs, SIGSEGV); | 171 | die("Weird page fault", regs, SIGSEGV); |
171 | } | 172 | } |
172 | 173 | ||
174 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); | ||
175 | |||
173 | /* When running in the kernel we expect faults to occur only to | 176 | /* When running in the kernel we expect faults to occur only to |
174 | * addresses in user space. All other faults represent errors in the | 177 | * addresses in user space. All other faults represent errors in the |
175 | * kernel and should generate an OOPS. Unfortunately, in the case of an | 178 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
@@ -309,6 +312,8 @@ good_area: | |||
309 | } | 312 | } |
310 | if (ret & VM_FAULT_MAJOR) { | 313 | if (ret & VM_FAULT_MAJOR) { |
311 | current->maj_flt++; | 314 | current->maj_flt++; |
315 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, | ||
316 | regs, address); | ||
312 | #ifdef CONFIG_PPC_SMLPAR | 317 | #ifdef CONFIG_PPC_SMLPAR |
313 | if (firmware_has_feature(FW_FEATURE_CMO)) { | 318 | if (firmware_has_feature(FW_FEATURE_CMO)) { |
314 | preempt_disable(); | 319 | preempt_disable(); |
@@ -316,8 +321,11 @@ good_area: | |||
316 | preempt_enable(); | 321 | preempt_enable(); |
317 | } | 322 | } |
318 | #endif | 323 | #endif |
319 | } else | 324 | } else { |
320 | current->min_flt++; | 325 | current->min_flt++; |
326 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, | ||
327 | regs, address); | ||
328 | } | ||
321 | up_read(&mm->mmap_sem); | 329 | up_read(&mm->mmap_sem); |
322 | return 0; | 330 | return 0; |
323 | 331 | ||
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9da795e49337..732ee93a8e98 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype | |||
@@ -1,6 +1,7 @@ | |||
1 | config PPC64 | 1 | config PPC64 |
2 | bool "64-bit kernel" | 2 | bool "64-bit kernel" |
3 | default n | 3 | default n |
4 | select HAVE_PERF_COUNTERS | ||
4 | help | 5 | help |
5 | This option selects whether a 32-bit or a 64-bit kernel | 6 | This option selects whether a 32-bit or a 64-bit kernel |
6 | will be built. | 7 | will be built. |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885eee14..32ada97c964d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -727,6 +727,7 @@ config X86_UP_IOAPIC | |||
727 | config X86_LOCAL_APIC | 727 | config X86_LOCAL_APIC |
728 | def_bool y | 728 | def_bool y |
729 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 729 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC |
730 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
730 | 731 | ||
731 | config X86_IO_APIC | 732 | config X86_IO_APIC |
732 | def_bool y | 733 | def_bool y |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202086e8..19c61ef6ab57 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -825,9 +825,10 @@ ia32_sys_call_table: | |||
825 | .quad compat_sys_signalfd4 | 825 | .quad compat_sys_signalfd4 |
826 | .quad sys_eventfd2 | 826 | .quad sys_eventfd2 |
827 | .quad sys_epoll_create1 | 827 | .quad sys_epoll_create1 |
828 | .quad sys_dup3 /* 330 */ | 828 | .quad sys_dup3 /* 330 */ |
829 | .quad sys_pipe2 | 829 | .quad sys_pipe2 |
830 | .quad sys_inotify_init1 | 830 | .quad sys_inotify_init1 |
831 | .quad compat_sys_preadv | 831 | .quad compat_sys_preadv |
832 | .quad compat_sys_pwritev | 832 | .quad compat_sys_pwritev |
833 | .quad sys_perf_counter_open | ||
833 | ia32_syscall_end: | 834 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 85b46fba4229..aff9f1fcdcd7 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
247 | #define smp_mb__before_atomic_inc() barrier() | 247 | #define smp_mb__before_atomic_inc() barrier() |
248 | #define smp_mb__after_atomic_inc() barrier() | 248 | #define smp_mb__after_atomic_inc() barrier() |
249 | 249 | ||
250 | /* An 64bit atomic type */ | ||
251 | |||
252 | typedef struct { | ||
253 | unsigned long long counter; | ||
254 | } atomic64_t; | ||
255 | |||
256 | #define ATOMIC64_INIT(val) { (val) } | ||
257 | |||
258 | /** | ||
259 | * atomic64_read - read atomic64 variable | ||
260 | * @v: pointer of type atomic64_t | ||
261 | * | ||
262 | * Atomically reads the value of @v. | ||
263 | * Doesn't imply a read memory barrier. | ||
264 | */ | ||
265 | #define __atomic64_read(ptr) ((ptr)->counter) | ||
266 | |||
267 | static inline unsigned long long | ||
268 | cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) | ||
269 | { | ||
270 | asm volatile( | ||
271 | |||
272 | LOCK_PREFIX "cmpxchg8b (%[ptr])\n" | ||
273 | |||
274 | : "=A" (old) | ||
275 | |||
276 | : [ptr] "D" (ptr), | ||
277 | "A" (old), | ||
278 | "b" (ll_low(new)), | ||
279 | "c" (ll_high(new)) | ||
280 | |||
281 | : "memory"); | ||
282 | |||
283 | return old; | ||
284 | } | ||
285 | |||
286 | static inline unsigned long long | ||
287 | atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | ||
288 | unsigned long long new_val) | ||
289 | { | ||
290 | return cmpxchg8b(&ptr->counter, old_val, new_val); | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * atomic64_xchg - xchg atomic64 variable | ||
295 | * @ptr: pointer to type atomic64_t | ||
296 | * @new_val: value to assign | ||
297 | * @old_val: old value that was there | ||
298 | * | ||
299 | * Atomically xchgs the value of @ptr to @new_val and returns | ||
300 | * the old value. | ||
301 | */ | ||
302 | |||
303 | static inline unsigned long long | ||
304 | atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) | ||
305 | { | ||
306 | unsigned long long old_val; | ||
307 | |||
308 | do { | ||
309 | old_val = atomic_read(ptr); | ||
310 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
311 | |||
312 | return old_val; | ||
313 | } | ||
314 | |||
315 | /** | ||
316 | * atomic64_set - set atomic64 variable | ||
317 | * @ptr: pointer to type atomic64_t | ||
318 | * @new_val: value to assign | ||
319 | * | ||
320 | * Atomically sets the value of @ptr to @new_val. | ||
321 | */ | ||
322 | static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) | ||
323 | { | ||
324 | atomic64_xchg(ptr, new_val); | ||
325 | } | ||
326 | |||
327 | /** | ||
328 | * atomic64_read - read atomic64 variable | ||
329 | * @ptr: pointer to type atomic64_t | ||
330 | * | ||
331 | * Atomically reads the value of @ptr and returns it. | ||
332 | */ | ||
333 | static inline unsigned long long atomic64_read(atomic64_t *ptr) | ||
334 | { | ||
335 | unsigned long long curr_val; | ||
336 | |||
337 | do { | ||
338 | curr_val = __atomic64_read(ptr); | ||
339 | } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); | ||
340 | |||
341 | return curr_val; | ||
342 | } | ||
343 | |||
344 | /** | ||
345 | * atomic64_add_return - add and return | ||
346 | * @delta: integer value to add | ||
347 | * @ptr: pointer to type atomic64_t | ||
348 | * | ||
349 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
350 | */ | ||
351 | static inline unsigned long long | ||
352 | atomic64_add_return(unsigned long long delta, atomic64_t *ptr) | ||
353 | { | ||
354 | unsigned long long old_val, new_val; | ||
355 | |||
356 | do { | ||
357 | old_val = atomic_read(ptr); | ||
358 | new_val = old_val + delta; | ||
359 | |||
360 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
361 | |||
362 | return new_val; | ||
363 | } | ||
364 | |||
365 | static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) | ||
366 | { | ||
367 | return atomic64_add_return(-delta, ptr); | ||
368 | } | ||
369 | |||
370 | static inline long atomic64_inc_return(atomic64_t *ptr) | ||
371 | { | ||
372 | return atomic64_add_return(1, ptr); | ||
373 | } | ||
374 | |||
375 | static inline long atomic64_dec_return(atomic64_t *ptr) | ||
376 | { | ||
377 | return atomic64_sub_return(1, ptr); | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * atomic64_add - add integer to atomic64 variable | ||
382 | * @delta: integer value to add | ||
383 | * @ptr: pointer to type atomic64_t | ||
384 | * | ||
385 | * Atomically adds @delta to @ptr. | ||
386 | */ | ||
387 | static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) | ||
388 | { | ||
389 | atomic64_add_return(delta, ptr); | ||
390 | } | ||
391 | |||
392 | /** | ||
393 | * atomic64_sub - subtract the atomic64 variable | ||
394 | * @delta: integer value to subtract | ||
395 | * @ptr: pointer to type atomic64_t | ||
396 | * | ||
397 | * Atomically subtracts @delta from @ptr. | ||
398 | */ | ||
399 | static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) | ||
400 | { | ||
401 | atomic64_add(-delta, ptr); | ||
402 | } | ||
403 | |||
404 | /** | ||
405 | * atomic64_sub_and_test - subtract value from variable and test result | ||
406 | * @delta: integer value to subtract | ||
407 | * @ptr: pointer to type atomic64_t | ||
408 | * | ||
409 | * Atomically subtracts @delta from @ptr and returns | ||
410 | * true if the result is zero, or false for all | ||
411 | * other cases. | ||
412 | */ | ||
413 | static inline int | ||
414 | atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) | ||
415 | { | ||
416 | unsigned long long old_val = atomic64_sub_return(delta, ptr); | ||
417 | |||
418 | return old_val == 0; | ||
419 | } | ||
420 | |||
421 | /** | ||
422 | * atomic64_inc - increment atomic64 variable | ||
423 | * @ptr: pointer to type atomic64_t | ||
424 | * | ||
425 | * Atomically increments @ptr by 1. | ||
426 | */ | ||
427 | static inline void atomic64_inc(atomic64_t *ptr) | ||
428 | { | ||
429 | atomic64_add(1, ptr); | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * atomic64_dec - decrement atomic64 variable | ||
434 | * @ptr: pointer to type atomic64_t | ||
435 | * | ||
436 | * Atomically decrements @ptr by 1. | ||
437 | */ | ||
438 | static inline void atomic64_dec(atomic64_t *ptr) | ||
439 | { | ||
440 | atomic64_sub(1, ptr); | ||
441 | } | ||
442 | |||
443 | /** | ||
444 | * atomic64_dec_and_test - decrement and test | ||
445 | * @ptr: pointer to type atomic64_t | ||
446 | * | ||
447 | * Atomically decrements @ptr by 1 and | ||
448 | * returns true if the result is 0, or false for all other | ||
449 | * cases. | ||
450 | */ | ||
451 | static inline int atomic64_dec_and_test(atomic64_t *ptr) | ||
452 | { | ||
453 | return atomic64_sub_and_test(1, ptr); | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * atomic64_inc_and_test - increment and test | ||
458 | * @ptr: pointer to type atomic64_t | ||
459 | * | ||
460 | * Atomically increments @ptr by 1 | ||
461 | * and returns true if the result is zero, or false for all | ||
462 | * other cases. | ||
463 | */ | ||
464 | static inline int atomic64_inc_and_test(atomic64_t *ptr) | ||
465 | { | ||
466 | return atomic64_sub_and_test(-1, ptr); | ||
467 | } | ||
468 | |||
469 | /** | ||
470 | * atomic64_add_negative - add and test if negative | ||
471 | * @delta: integer value to add | ||
472 | * @ptr: pointer to type atomic64_t | ||
473 | * | ||
474 | * Atomically adds @delta to @ptr and returns true | ||
475 | * if the result is negative, or false when | ||
476 | * result is greater than or equal to zero. | ||
477 | */ | ||
478 | static inline int | ||
479 | atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) | ||
480 | { | ||
481 | long long old_val = atomic64_add_return(delta, ptr); | ||
482 | |||
483 | return old_val < 0; | ||
484 | } | ||
485 | |||
250 | #include <asm-generic/atomic.h> | 486 | #include <asm-generic/atomic.h> |
251 | #endif /* _ASM_X86_ATOMIC_32_H */ | 487 | #endif /* _ASM_X86_ATOMIC_32_H */ |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..fe24d2802490 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | |||
50 | 50 | ||
51 | #ifdef CONFIG_PERF_COUNTERS | 51 | #ifdef CONFIG_PERF_COUNTERS |
52 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) | 52 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) |
53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) | ||
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | #ifdef CONFIG_X86_MCE_P4THERMAL | 56 | #ifdef CONFIG_X86_MCE_P4THERMAL |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 37555e52f980..9ebc5c255032 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -13,6 +13,8 @@ typedef struct { | |||
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | #endif | 14 | #endif |
15 | unsigned int generic_irqs; /* arch dependent */ | 15 | unsigned int generic_irqs; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | ||
17 | unsigned int apic_pending_irqs; | ||
16 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
17 | unsigned int irq_resched_count; | 19 | unsigned int irq_resched_count; |
18 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b762ea49bd70..7309c0ad6902 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -29,6 +29,9 @@ | |||
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void generic_interrupt(void); | 30 | extern void generic_interrupt(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_counter_interrupt(void); | ||
33 | extern void perf_pending_interrupt(void); | ||
34 | |||
32 | extern void spurious_interrupt(void); | 35 | extern void spurious_interrupt(void); |
33 | extern void thermal_interrupt(void); | 36 | extern void thermal_interrupt(void); |
34 | extern void reschedule_interrupt(void); | 37 | extern void reschedule_interrupt(void); |
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h deleted file mode 100644 index fa0fd068bc2e..000000000000 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H | ||
2 | #define _ASM_X86_INTEL_ARCH_PERFMON_H | ||
3 | |||
4 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
5 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
6 | |||
7 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
8 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
9 | |||
10 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
11 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
12 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
13 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
14 | |||
15 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) | ||
16 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
17 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) | ||
18 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
19 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
20 | |||
21 | union cpuid10_eax { | ||
22 | struct { | ||
23 | unsigned int version_id:8; | ||
24 | unsigned int num_counters:8; | ||
25 | unsigned int bit_width:8; | ||
26 | unsigned int mask_length:8; | ||
27 | } split; | ||
28 | unsigned int full; | ||
29 | }; | ||
30 | |||
31 | #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..545bb811ccb5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -117,6 +117,11 @@ | |||
117 | #define GENERIC_INTERRUPT_VECTOR 0xed | 117 | #define GENERIC_INTERRUPT_VECTOR 0xed |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * Performance monitoring pending work vector: | ||
121 | */ | ||
122 | #define LOCAL_PENDING_VECTOR 0xec | ||
123 | |||
124 | /* | ||
120 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 125 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
121 | * start at 0x31(0x41) to spread out vectors evenly between priority | 126 | * start at 0x31(0x41) to spread out vectors evenly between priority |
122 | * levels. (0x80 is the syscall vector) | 127 | * levels. (0x80 is the syscall vector) |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 000000000000..d08dd52cb8ff --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -0,0 +1,100 @@ | |||
1 | #ifndef _ASM_X86_PERF_COUNTER_H | ||
2 | #define _ASM_X86_PERF_COUNTER_H | ||
3 | |||
4 | /* | ||
5 | * Performance counter hw details: | ||
6 | */ | ||
7 | |||
8 | #define X86_PMC_MAX_GENERIC 8 | ||
9 | #define X86_PMC_MAX_FIXED 3 | ||
10 | |||
11 | #define X86_PMC_IDX_GENERIC 0 | ||
12 | #define X86_PMC_IDX_FIXED 32 | ||
13 | #define X86_PMC_IDX_MAX 64 | ||
14 | |||
15 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
16 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
17 | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
20 | |||
21 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
22 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
23 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
24 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
25 | |||
26 | /* | ||
27 | * Includes eventsel and unit mask as well: | ||
28 | */ | ||
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | ||
30 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | ||
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | ||
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
36 | |||
37 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | ||
38 | |||
39 | /* | ||
40 | * Intel "Architectural Performance Monitoring" CPUID | ||
41 | * detection/enumeration details: | ||
42 | */ | ||
43 | union cpuid10_eax { | ||
44 | struct { | ||
45 | unsigned int version_id:8; | ||
46 | unsigned int num_counters:8; | ||
47 | unsigned int bit_width:8; | ||
48 | unsigned int mask_length:8; | ||
49 | } split; | ||
50 | unsigned int full; | ||
51 | }; | ||
52 | |||
53 | union cpuid10_edx { | ||
54 | struct { | ||
55 | unsigned int num_counters_fixed:4; | ||
56 | unsigned int reserved:28; | ||
57 | } split; | ||
58 | unsigned int full; | ||
59 | }; | ||
60 | |||
61 | |||
62 | /* | ||
63 | * Fixed-purpose performance counters: | ||
64 | */ | ||
65 | |||
66 | /* | ||
67 | * All 3 fixed-mode PMCs are configured via this single MSR: | ||
68 | */ | ||
69 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d | ||
70 | |||
71 | /* | ||
72 | * The counts are available in three separate MSRs: | ||
73 | */ | ||
74 | |||
75 | /* Instr_Retired.Any: */ | ||
76 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | ||
77 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | ||
78 | |||
79 | /* CPU_CLK_Unhalted.Core: */ | ||
80 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | ||
81 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | ||
82 | |||
83 | /* CPU_CLK_Unhalted.Ref: */ | ||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | ||
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | ||
86 | |||
87 | extern void set_perf_counter_pending(void); | ||
88 | |||
89 | #define clear_perf_counter_pending() do { } while (0) | ||
90 | #define test_perf_counter_pending() (0) | ||
91 | |||
92 | #ifdef CONFIG_PERF_COUNTERS | ||
93 | extern void init_hw_perf_counters(void); | ||
94 | extern void perf_counters_lapic_init(int nmi); | ||
95 | #else | ||
96 | static inline void init_hw_perf_counters(void) { } | ||
97 | static inline void perf_counters_lapic_init(int nmi) { } | ||
98 | #endif | ||
99 | |||
100 | #endif /* _ASM_X86_PERF_COUNTER_H */ | ||
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..0b4d8c2b157d 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -340,6 +340,7 @@ | |||
340 | #define __NR_inotify_init1 332 | 340 | #define __NR_inotify_init1 332 |
341 | #define __NR_preadv 333 | 341 | #define __NR_preadv 333 |
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_perf_counter_open 333 | ||
343 | 344 | ||
344 | #ifdef __KERNEL__ | 345 | #ifdef __KERNEL__ |
345 | 346 | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..d9aad876ad76 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -657,7 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | |||
657 | __SYSCALL(__NR_preadv, sys_preadv) | 657 | __SYSCALL(__NR_preadv, sys_preadv) |
658 | #define __NR_pwritev 296 | 658 | #define __NR_pwritev 296 |
659 | __SYSCALL(__NR_pwritev, sys_pwritev) | 659 | __SYSCALL(__NR_pwritev, sys_pwritev) |
660 | 660 | #define __NR_perf_counter_open 295 | |
661 | __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | ||
661 | 662 | ||
662 | #ifndef __NO_STUBS | 663 | #ifndef __NO_STUBS |
663 | #define __ARCH_WANT_OLD_READDIR | 664 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f2870920f246..e9021a908020 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
36 | 36 | ||
37 | #include <asm/perf_counter.h> | ||
37 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
38 | #include <asm/atomic.h> | 39 | #include <asm/atomic.h> |
39 | #include <asm/mpspec.h> | 40 | #include <asm/mpspec.h> |
@@ -761,6 +762,8 @@ static void local_apic_timer_interrupt(void) | |||
761 | inc_irq_stat(apic_timer_irqs); | 762 | inc_irq_stat(apic_timer_irqs); |
762 | 763 | ||
763 | evt->event_handler(evt); | 764 | evt->event_handler(evt); |
765 | |||
766 | perf_counter_unthrottle(); | ||
764 | } | 767 | } |
765 | 768 | ||
766 | /* | 769 | /* |
@@ -1133,6 +1136,7 @@ void __cpuinit setup_local_APIC(void) | |||
1133 | apic_write(APIC_ESR, 0); | 1136 | apic_write(APIC_ESR, 0); |
1134 | } | 1137 | } |
1135 | #endif | 1138 | #endif |
1139 | perf_counters_lapic_init(0); | ||
1136 | 1140 | ||
1137 | preempt_disable(); | 1141 | preempt_disable(); |
1138 | 1142 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4e242f9a06e4..3efcb2b96a15 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | # | 1 | # |
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details, features and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | |||
23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
25 | 25 | ||
26 | obj-$(CONFIG_X86_MCE) += mcheck/ | 26 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o |
27 | obj-$(CONFIG_MTRR) += mtrr/ | ||
28 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
29 | 27 | ||
30 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 28 | obj-$(CONFIG_X86_MCE) += mcheck/ |
29 | obj-$(CONFIG_MTRR) += mtrr/ | ||
30 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
31 | |||
32 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
31 | 33 | ||
32 | quiet_cmd_mkcapflags = MKCAP $@ | 34 | quiet_cmd_mkcapflags = MKCAP $@ |
33 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | 35 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7e4a459daa64..fd69c514ca2a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -420,6 +420,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
420 | if (c->x86 >= 6) | 420 | if (c->x86 >= 6) |
421 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | 421 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); |
422 | 422 | ||
423 | /* Enable Performance counter for K7 and later */ | ||
424 | if (c->x86 > 6 && c->x86 <= 0x11) | ||
425 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | ||
426 | |||
423 | if (!c->x86_model_id[0]) { | 427 | if (!c->x86_model_id[0]) { |
424 | switch (c->x86) { | 428 | switch (c->x86) { |
425 | case 0xf: | 429 | case 0xf: |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c4f667896c28..a86769efe0df 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
16 | #include <asm/perf_counter.h> | ||
16 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
17 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -854,6 +855,7 @@ void __init identify_boot_cpu(void) | |||
854 | #else | 855 | #else |
855 | vgetcpu_set_mode(); | 856 | vgetcpu_set_mode(); |
856 | #endif | 857 | #endif |
858 | init_hw_perf_counters(); | ||
857 | } | 859 | } |
858 | 860 | ||
859 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 861 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c new file mode 100644 index 000000000000..0fcbaab83f9b --- /dev/null +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -0,0 +1,1213 @@ | |||
1 | /* | ||
2 | * Performance counter x86 architecture code | ||
3 | * | ||
4 | * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright(C) 2009 Jaswinder Singh Rajput | ||
7 | * | ||
8 | * For licencing details see kernel-base/COPYING | ||
9 | */ | ||
10 | |||
11 | #include <linux/perf_counter.h> | ||
12 | #include <linux/capability.h> | ||
13 | #include <linux/notifier.h> | ||
14 | #include <linux/hardirq.h> | ||
15 | #include <linux/kprobes.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/kdebug.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | |||
21 | #include <asm/apic.h> | ||
22 | #include <asm/stacktrace.h> | ||
23 | #include <asm/nmi.h> | ||
24 | |||
25 | static bool perf_counters_initialized __read_mostly; | ||
26 | |||
27 | /* | ||
28 | * Number of (generic) HW counters: | ||
29 | */ | ||
30 | static int nr_counters_generic __read_mostly; | ||
31 | static u64 perf_counter_mask __read_mostly; | ||
32 | static u64 counter_value_mask __read_mostly; | ||
33 | static int counter_value_bits __read_mostly; | ||
34 | |||
35 | static int nr_counters_fixed __read_mostly; | ||
36 | |||
37 | struct cpu_hw_counters { | ||
38 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | ||
39 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
40 | unsigned long interrupts; | ||
41 | u64 throttle_ctrl; | ||
42 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
43 | int enabled; | ||
44 | }; | ||
45 | |||
46 | /* | ||
47 | * struct pmc_x86_ops - performance counter x86 ops | ||
48 | */ | ||
49 | struct pmc_x86_ops { | ||
50 | u64 (*save_disable_all)(void); | ||
51 | void (*restore_all)(u64); | ||
52 | u64 (*get_status)(u64); | ||
53 | void (*ack_status)(u64); | ||
54 | void (*enable)(int, u64); | ||
55 | void (*disable)(int, u64); | ||
56 | unsigned eventsel; | ||
57 | unsigned perfctr; | ||
58 | u64 (*event_map)(int); | ||
59 | u64 (*raw_event)(u64); | ||
60 | int max_events; | ||
61 | }; | ||
62 | |||
63 | static struct pmc_x86_ops *pmc_ops __read_mostly; | ||
64 | |||
65 | static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | ||
66 | .enabled = 1, | ||
67 | }; | ||
68 | |||
69 | static __read_mostly int intel_perfmon_version; | ||
70 | |||
71 | /* | ||
72 | * Intel PerfMon v3. Used on Core2 and later. | ||
73 | */ | ||
74 | static const u64 intel_perfmon_event_map[] = | ||
75 | { | ||
76 | [PERF_COUNT_CPU_CYCLES] = 0x003c, | ||
77 | [PERF_COUNT_INSTRUCTIONS] = 0x00c0, | ||
78 | [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, | ||
79 | [PERF_COUNT_CACHE_MISSES] = 0x412e, | ||
80 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
81 | [PERF_COUNT_BRANCH_MISSES] = 0x00c5, | ||
82 | [PERF_COUNT_BUS_CYCLES] = 0x013c, | ||
83 | }; | ||
84 | |||
85 | static u64 pmc_intel_event_map(int event) | ||
86 | { | ||
87 | return intel_perfmon_event_map[event]; | ||
88 | } | ||
89 | |||
90 | static u64 pmc_intel_raw_event(u64 event) | ||
91 | { | ||
92 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
93 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
94 | #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL | ||
95 | |||
96 | #define CORE_EVNTSEL_MASK \ | ||
97 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
98 | CORE_EVNTSEL_UNIT_MASK | \ | ||
99 | CORE_EVNTSEL_COUNTER_MASK) | ||
100 | |||
101 | return event & CORE_EVNTSEL_MASK; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * AMD Performance Monitor K7 and later. | ||
106 | */ | ||
107 | static const u64 amd_perfmon_event_map[] = | ||
108 | { | ||
109 | [PERF_COUNT_CPU_CYCLES] = 0x0076, | ||
110 | [PERF_COUNT_INSTRUCTIONS] = 0x00c0, | ||
111 | [PERF_COUNT_CACHE_REFERENCES] = 0x0080, | ||
112 | [PERF_COUNT_CACHE_MISSES] = 0x0081, | ||
113 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
114 | [PERF_COUNT_BRANCH_MISSES] = 0x00c5, | ||
115 | }; | ||
116 | |||
117 | static u64 pmc_amd_event_map(int event) | ||
118 | { | ||
119 | return amd_perfmon_event_map[event]; | ||
120 | } | ||
121 | |||
122 | static u64 pmc_amd_raw_event(u64 event) | ||
123 | { | ||
124 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
125 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
126 | #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL | ||
127 | |||
128 | #define K7_EVNTSEL_MASK \ | ||
129 | (K7_EVNTSEL_EVENT_MASK | \ | ||
130 | K7_EVNTSEL_UNIT_MASK | \ | ||
131 | K7_EVNTSEL_COUNTER_MASK) | ||
132 | |||
133 | return event & K7_EVNTSEL_MASK; | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Propagate counter elapsed time into the generic counter. | ||
138 | * Can only be executed on the CPU where the counter is active. | ||
139 | * Returns the delta events processed. | ||
140 | */ | ||
141 | static void | ||
142 | x86_perf_counter_update(struct perf_counter *counter, | ||
143 | struct hw_perf_counter *hwc, int idx) | ||
144 | { | ||
145 | u64 prev_raw_count, new_raw_count, delta; | ||
146 | |||
147 | /* | ||
148 | * Careful: an NMI might modify the previous counter value. | ||
149 | * | ||
150 | * Our tactic to handle this is to first atomically read and | ||
151 | * exchange a new raw count - then add that new-prev delta | ||
152 | * count to the generic counter atomically: | ||
153 | */ | ||
154 | again: | ||
155 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
156 | rdmsrl(hwc->counter_base + idx, new_raw_count); | ||
157 | |||
158 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
159 | new_raw_count) != prev_raw_count) | ||
160 | goto again; | ||
161 | |||
162 | /* | ||
163 | * Now we have the new raw value and have updated the prev | ||
164 | * timestamp already. We can now calculate the elapsed delta | ||
165 | * (counter-)time and add that to the generic counter. | ||
166 | * | ||
167 | * Careful, not all hw sign-extends above the physical width | ||
168 | * of the count, so we do that by clipping the delta to 32 bits: | ||
169 | */ | ||
170 | delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count); | ||
171 | |||
172 | atomic64_add(delta, &counter->count); | ||
173 | atomic64_sub(delta, &hwc->period_left); | ||
174 | } | ||
175 | |||
176 | static atomic_t num_counters; | ||
177 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
178 | |||
179 | static bool reserve_pmc_hardware(void) | ||
180 | { | ||
181 | int i; | ||
182 | |||
183 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
184 | disable_lapic_nmi_watchdog(); | ||
185 | |||
186 | for (i = 0; i < nr_counters_generic; i++) { | ||
187 | if (!reserve_perfctr_nmi(pmc_ops->perfctr + i)) | ||
188 | goto perfctr_fail; | ||
189 | } | ||
190 | |||
191 | for (i = 0; i < nr_counters_generic; i++) { | ||
192 | if (!reserve_evntsel_nmi(pmc_ops->eventsel + i)) | ||
193 | goto eventsel_fail; | ||
194 | } | ||
195 | |||
196 | return true; | ||
197 | |||
198 | eventsel_fail: | ||
199 | for (i--; i >= 0; i--) | ||
200 | release_evntsel_nmi(pmc_ops->eventsel + i); | ||
201 | |||
202 | i = nr_counters_generic; | ||
203 | |||
204 | perfctr_fail: | ||
205 | for (i--; i >= 0; i--) | ||
206 | release_perfctr_nmi(pmc_ops->perfctr + i); | ||
207 | |||
208 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
209 | enable_lapic_nmi_watchdog(); | ||
210 | |||
211 | return false; | ||
212 | } | ||
213 | |||
214 | static void release_pmc_hardware(void) | ||
215 | { | ||
216 | int i; | ||
217 | |||
218 | for (i = 0; i < nr_counters_generic; i++) { | ||
219 | release_perfctr_nmi(pmc_ops->perfctr + i); | ||
220 | release_evntsel_nmi(pmc_ops->eventsel + i); | ||
221 | } | ||
222 | |||
223 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
224 | enable_lapic_nmi_watchdog(); | ||
225 | } | ||
226 | |||
227 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
228 | { | ||
229 | if (atomic_dec_and_mutex_lock(&num_counters, &pmc_reserve_mutex)) { | ||
230 | release_pmc_hardware(); | ||
231 | mutex_unlock(&pmc_reserve_mutex); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Setup the hardware configuration for a given hw_event_type | ||
237 | */ | ||
238 | static int __hw_perf_counter_init(struct perf_counter *counter) | ||
239 | { | ||
240 | struct perf_counter_hw_event *hw_event = &counter->hw_event; | ||
241 | struct hw_perf_counter *hwc = &counter->hw; | ||
242 | int err; | ||
243 | |||
244 | if (unlikely(!perf_counters_initialized)) | ||
245 | return -EINVAL; | ||
246 | |||
247 | err = 0; | ||
248 | if (atomic_inc_not_zero(&num_counters)) { | ||
249 | mutex_lock(&pmc_reserve_mutex); | ||
250 | if (atomic_read(&num_counters) == 0 && !reserve_pmc_hardware()) | ||
251 | err = -EBUSY; | ||
252 | else | ||
253 | atomic_inc(&num_counters); | ||
254 | mutex_unlock(&pmc_reserve_mutex); | ||
255 | } | ||
256 | if (err) | ||
257 | return err; | ||
258 | |||
259 | /* | ||
260 | * Generate PMC IRQs: | ||
261 | * (keep 'enabled' bit clear for now) | ||
262 | */ | ||
263 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
264 | |||
265 | /* | ||
266 | * Count user and OS events unless requested not to. | ||
267 | */ | ||
268 | if (!hw_event->exclude_user) | ||
269 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
270 | if (!hw_event->exclude_kernel) | ||
271 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
272 | |||
273 | /* | ||
274 | * If privileged enough, allow NMI events: | ||
275 | */ | ||
276 | hwc->nmi = 0; | ||
277 | if (capable(CAP_SYS_ADMIN) && hw_event->nmi) | ||
278 | hwc->nmi = 1; | ||
279 | |||
280 | hwc->irq_period = hw_event->irq_period; | ||
281 | /* | ||
282 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
283 | * so we install an artificial 1<<31 period regardless of | ||
284 | * the generic counter period: | ||
285 | */ | ||
286 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
287 | if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) | ||
288 | hwc->irq_period = 0x7FFFFFFF; | ||
289 | |||
290 | atomic64_set(&hwc->period_left, hwc->irq_period); | ||
291 | |||
292 | /* | ||
293 | * Raw event type provide the config in the event structure | ||
294 | */ | ||
295 | if (perf_event_raw(hw_event)) { | ||
296 | hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event)); | ||
297 | } else { | ||
298 | if (perf_event_id(hw_event) >= pmc_ops->max_events) | ||
299 | return -EINVAL; | ||
300 | /* | ||
301 | * The generic map: | ||
302 | */ | ||
303 | hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); | ||
304 | } | ||
305 | |||
306 | counter->destroy = hw_perf_counter_destroy; | ||
307 | |||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | static u64 pmc_intel_save_disable_all(void) | ||
312 | { | ||
313 | u64 ctrl; | ||
314 | |||
315 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
316 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
317 | |||
318 | return ctrl; | ||
319 | } | ||
320 | |||
321 | static u64 pmc_amd_save_disable_all(void) | ||
322 | { | ||
323 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
324 | int enabled, idx; | ||
325 | |||
326 | enabled = cpuc->enabled; | ||
327 | cpuc->enabled = 0; | ||
328 | /* | ||
329 | * ensure we write the disable before we start disabling the | ||
330 | * counters proper, so that pcm_amd_enable() does the right thing. | ||
331 | */ | ||
332 | barrier(); | ||
333 | |||
334 | for (idx = 0; idx < nr_counters_generic; idx++) { | ||
335 | u64 val; | ||
336 | |||
337 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
338 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) { | ||
339 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
340 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | return enabled; | ||
345 | } | ||
346 | |||
347 | u64 hw_perf_save_disable(void) | ||
348 | { | ||
349 | if (unlikely(!perf_counters_initialized)) | ||
350 | return 0; | ||
351 | |||
352 | return pmc_ops->save_disable_all(); | ||
353 | } | ||
354 | /* | ||
355 | * Exported because of ACPI idle | ||
356 | */ | ||
357 | EXPORT_SYMBOL_GPL(hw_perf_save_disable); | ||
358 | |||
359 | static void pmc_intel_restore_all(u64 ctrl) | ||
360 | { | ||
361 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
362 | } | ||
363 | |||
364 | static void pmc_amd_restore_all(u64 ctrl) | ||
365 | { | ||
366 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
367 | int idx; | ||
368 | |||
369 | cpuc->enabled = ctrl; | ||
370 | barrier(); | ||
371 | if (!ctrl) | ||
372 | return; | ||
373 | |||
374 | for (idx = 0; idx < nr_counters_generic; idx++) { | ||
375 | if (test_bit(idx, cpuc->active_mask)) { | ||
376 | u64 val; | ||
377 | |||
378 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
379 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
380 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
381 | } | ||
382 | } | ||
383 | } | ||
384 | |||
385 | void hw_perf_restore(u64 ctrl) | ||
386 | { | ||
387 | if (unlikely(!perf_counters_initialized)) | ||
388 | return; | ||
389 | |||
390 | pmc_ops->restore_all(ctrl); | ||
391 | } | ||
392 | /* | ||
393 | * Exported because of ACPI idle | ||
394 | */ | ||
395 | EXPORT_SYMBOL_GPL(hw_perf_restore); | ||
396 | |||
397 | static u64 pmc_intel_get_status(u64 mask) | ||
398 | { | ||
399 | u64 status; | ||
400 | |||
401 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
402 | |||
403 | return status; | ||
404 | } | ||
405 | |||
406 | static u64 pmc_amd_get_status(u64 mask) | ||
407 | { | ||
408 | u64 status = 0; | ||
409 | int idx; | ||
410 | |||
411 | for (idx = 0; idx < nr_counters_generic; idx++) { | ||
412 | s64 val; | ||
413 | |||
414 | if (!(mask & (1 << idx))) | ||
415 | continue; | ||
416 | |||
417 | rdmsrl(MSR_K7_PERFCTR0 + idx, val); | ||
418 | val <<= (64 - counter_value_bits); | ||
419 | if (val >= 0) | ||
420 | status |= (1 << idx); | ||
421 | } | ||
422 | |||
423 | return status; | ||
424 | } | ||
425 | |||
426 | static u64 hw_perf_get_status(u64 mask) | ||
427 | { | ||
428 | if (unlikely(!perf_counters_initialized)) | ||
429 | return 0; | ||
430 | |||
431 | return pmc_ops->get_status(mask); | ||
432 | } | ||
433 | |||
434 | static void pmc_intel_ack_status(u64 ack) | ||
435 | { | ||
436 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
437 | } | ||
438 | |||
439 | static void pmc_amd_ack_status(u64 ack) | ||
440 | { | ||
441 | } | ||
442 | |||
443 | static void hw_perf_ack_status(u64 ack) | ||
444 | { | ||
445 | if (unlikely(!perf_counters_initialized)) | ||
446 | return; | ||
447 | |||
448 | pmc_ops->ack_status(ack); | ||
449 | } | ||
450 | |||
451 | static void pmc_intel_enable(int idx, u64 config) | ||
452 | { | ||
453 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, | ||
454 | config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
455 | } | ||
456 | |||
457 | static void pmc_amd_enable(int idx, u64 config) | ||
458 | { | ||
459 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
460 | |||
461 | set_bit(idx, cpuc->active_mask); | ||
462 | if (cpuc->enabled) | ||
463 | config |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
464 | |||
465 | wrmsrl(MSR_K7_EVNTSEL0 + idx, config); | ||
466 | } | ||
467 | |||
468 | static void hw_perf_enable(int idx, u64 config) | ||
469 | { | ||
470 | if (unlikely(!perf_counters_initialized)) | ||
471 | return; | ||
472 | |||
473 | pmc_ops->enable(idx, config); | ||
474 | } | ||
475 | |||
476 | static void pmc_intel_disable(int idx, u64 config) | ||
477 | { | ||
478 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config); | ||
479 | } | ||
480 | |||
481 | static void pmc_amd_disable(int idx, u64 config) | ||
482 | { | ||
483 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
484 | |||
485 | clear_bit(idx, cpuc->active_mask); | ||
486 | wrmsrl(MSR_K7_EVNTSEL0 + idx, config); | ||
487 | |||
488 | } | ||
489 | |||
490 | static void hw_perf_disable(int idx, u64 config) | ||
491 | { | ||
492 | if (unlikely(!perf_counters_initialized)) | ||
493 | return; | ||
494 | |||
495 | pmc_ops->disable(idx, config); | ||
496 | } | ||
497 | |||
498 | static inline void | ||
499 | __pmc_fixed_disable(struct perf_counter *counter, | ||
500 | struct hw_perf_counter *hwc, unsigned int __idx) | ||
501 | { | ||
502 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
503 | u64 ctrl_val, mask; | ||
504 | int err; | ||
505 | |||
506 | mask = 0xfULL << (idx * 4); | ||
507 | |||
508 | rdmsrl(hwc->config_base, ctrl_val); | ||
509 | ctrl_val &= ~mask; | ||
510 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
511 | } | ||
512 | |||
513 | static inline void | ||
514 | __pmc_generic_disable(struct perf_counter *counter, | ||
515 | struct hw_perf_counter *hwc, unsigned int idx) | ||
516 | { | ||
517 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) | ||
518 | __pmc_fixed_disable(counter, hwc, idx); | ||
519 | else | ||
520 | hw_perf_disable(idx, hwc->config); | ||
521 | } | ||
522 | |||
523 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | ||
524 | |||
525 | /* | ||
526 | * Set the next IRQ period, based on the hwc->period_left value. | ||
527 | * To be called with the counter disabled in hw: | ||
528 | */ | ||
529 | static void | ||
530 | __hw_perf_counter_set_period(struct perf_counter *counter, | ||
531 | struct hw_perf_counter *hwc, int idx) | ||
532 | { | ||
533 | s64 left = atomic64_read(&hwc->period_left); | ||
534 | s64 period = hwc->irq_period; | ||
535 | int err; | ||
536 | |||
537 | /* | ||
538 | * If we are way outside a reasoable range then just skip forward: | ||
539 | */ | ||
540 | if (unlikely(left <= -period)) { | ||
541 | left = period; | ||
542 | atomic64_set(&hwc->period_left, left); | ||
543 | } | ||
544 | |||
545 | if (unlikely(left <= 0)) { | ||
546 | left += period; | ||
547 | atomic64_set(&hwc->period_left, left); | ||
548 | } | ||
549 | |||
550 | per_cpu(prev_left[idx], smp_processor_id()) = left; | ||
551 | |||
552 | /* | ||
553 | * The hw counter starts counting from this counter offset, | ||
554 | * mark it to be able to extra future deltas: | ||
555 | */ | ||
556 | atomic64_set(&hwc->prev_count, (u64)-left); | ||
557 | |||
558 | err = checking_wrmsrl(hwc->counter_base + idx, | ||
559 | (u64)(-left) & counter_value_mask); | ||
560 | } | ||
561 | |||
562 | static inline void | ||
563 | __pmc_fixed_enable(struct perf_counter *counter, | ||
564 | struct hw_perf_counter *hwc, unsigned int __idx) | ||
565 | { | ||
566 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
567 | u64 ctrl_val, bits, mask; | ||
568 | int err; | ||
569 | |||
570 | /* | ||
571 | * Enable IRQ generation (0x8), | ||
572 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
573 | * if requested: | ||
574 | */ | ||
575 | bits = 0x8ULL; | ||
576 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
577 | bits |= 0x2; | ||
578 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
579 | bits |= 0x1; | ||
580 | bits <<= (idx * 4); | ||
581 | mask = 0xfULL << (idx * 4); | ||
582 | |||
583 | rdmsrl(hwc->config_base, ctrl_val); | ||
584 | ctrl_val &= ~mask; | ||
585 | ctrl_val |= bits; | ||
586 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
587 | } | ||
588 | |||
589 | static void | ||
590 | __pmc_generic_enable(struct perf_counter *counter, | ||
591 | struct hw_perf_counter *hwc, int idx) | ||
592 | { | ||
593 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) | ||
594 | __pmc_fixed_enable(counter, hwc, idx); | ||
595 | else | ||
596 | hw_perf_enable(idx, hwc->config); | ||
597 | } | ||
598 | |||
599 | static int | ||
600 | fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | ||
601 | { | ||
602 | unsigned int event; | ||
603 | |||
604 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
605 | return -1; | ||
606 | |||
607 | if (unlikely(hwc->nmi)) | ||
608 | return -1; | ||
609 | |||
610 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
611 | |||
612 | if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS))) | ||
613 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
614 | if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES))) | ||
615 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
616 | if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES))) | ||
617 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
618 | |||
619 | return -1; | ||
620 | } | ||
621 | |||
622 | /* | ||
623 | * Find a PMC slot for the freshly enabled / scheduled in counter: | ||
624 | */ | ||
625 | static int pmc_generic_enable(struct perf_counter *counter) | ||
626 | { | ||
627 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
628 | struct hw_perf_counter *hwc = &counter->hw; | ||
629 | int idx; | ||
630 | |||
631 | idx = fixed_mode_idx(counter, hwc); | ||
632 | if (idx >= 0) { | ||
633 | /* | ||
634 | * Try to get the fixed counter, if that is already taken | ||
635 | * then try to get a generic counter: | ||
636 | */ | ||
637 | if (test_and_set_bit(idx, cpuc->used)) | ||
638 | goto try_generic; | ||
639 | |||
640 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
641 | /* | ||
642 | * We set it so that counter_base + idx in wrmsr/rdmsr maps to | ||
643 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
644 | */ | ||
645 | hwc->counter_base = | ||
646 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
647 | hwc->idx = idx; | ||
648 | } else { | ||
649 | idx = hwc->idx; | ||
650 | /* Try to get the previous generic counter again */ | ||
651 | if (test_and_set_bit(idx, cpuc->used)) { | ||
652 | try_generic: | ||
653 | idx = find_first_zero_bit(cpuc->used, nr_counters_generic); | ||
654 | if (idx == nr_counters_generic) | ||
655 | return -EAGAIN; | ||
656 | |||
657 | set_bit(idx, cpuc->used); | ||
658 | hwc->idx = idx; | ||
659 | } | ||
660 | hwc->config_base = pmc_ops->eventsel; | ||
661 | hwc->counter_base = pmc_ops->perfctr; | ||
662 | } | ||
663 | |||
664 | perf_counters_lapic_init(hwc->nmi); | ||
665 | |||
666 | __pmc_generic_disable(counter, hwc, idx); | ||
667 | |||
668 | cpuc->counters[idx] = counter; | ||
669 | /* | ||
670 | * Make it visible before enabling the hw: | ||
671 | */ | ||
672 | smp_wmb(); | ||
673 | |||
674 | __hw_perf_counter_set_period(counter, hwc, idx); | ||
675 | __pmc_generic_enable(counter, hwc, idx); | ||
676 | |||
677 | return 0; | ||
678 | } | ||
679 | |||
680 | void perf_counter_print_debug(void) | ||
681 | { | ||
682 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | ||
683 | struct cpu_hw_counters *cpuc; | ||
684 | int cpu, idx; | ||
685 | |||
686 | if (!nr_counters_generic) | ||
687 | return; | ||
688 | |||
689 | local_irq_disable(); | ||
690 | |||
691 | cpu = smp_processor_id(); | ||
692 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
693 | |||
694 | if (intel_perfmon_version >= 2) { | ||
695 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
696 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
697 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | ||
698 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | ||
699 | |||
700 | pr_info("\n"); | ||
701 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | ||
702 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | ||
703 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | ||
704 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | ||
705 | } | ||
706 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); | ||
707 | |||
708 | for (idx = 0; idx < nr_counters_generic; idx++) { | ||
709 | rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl); | ||
710 | rdmsrl(pmc_ops->perfctr + idx, pmc_count); | ||
711 | |||
712 | prev_left = per_cpu(prev_left[idx], cpu); | ||
713 | |||
714 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | ||
715 | cpu, idx, pmc_ctrl); | ||
716 | pr_info("CPU#%d: gen-PMC%d count: %016llx\n", | ||
717 | cpu, idx, pmc_count); | ||
718 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | ||
719 | cpu, idx, prev_left); | ||
720 | } | ||
721 | for (idx = 0; idx < nr_counters_fixed; idx++) { | ||
722 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | ||
723 | |||
724 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | ||
725 | cpu, idx, pmc_count); | ||
726 | } | ||
727 | local_irq_enable(); | ||
728 | } | ||
729 | |||
730 | static void pmc_generic_disable(struct perf_counter *counter) | ||
731 | { | ||
732 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
733 | struct hw_perf_counter *hwc = &counter->hw; | ||
734 | unsigned int idx = hwc->idx; | ||
735 | |||
736 | __pmc_generic_disable(counter, hwc, idx); | ||
737 | |||
738 | clear_bit(idx, cpuc->used); | ||
739 | cpuc->counters[idx] = NULL; | ||
740 | /* | ||
741 | * Make sure the cleared pointer becomes visible before we | ||
742 | * (potentially) free the counter: | ||
743 | */ | ||
744 | smp_wmb(); | ||
745 | |||
746 | /* | ||
747 | * Drain the remaining delta count out of a counter | ||
748 | * that we are disabling: | ||
749 | */ | ||
750 | x86_perf_counter_update(counter, hwc, idx); | ||
751 | } | ||
752 | |||
753 | /* | ||
754 | * Save and restart an expired counter. Called by NMI contexts, | ||
755 | * so it has to be careful about preempting normal counter ops: | ||
756 | */ | ||
757 | static void perf_save_and_restart(struct perf_counter *counter) | ||
758 | { | ||
759 | struct hw_perf_counter *hwc = &counter->hw; | ||
760 | int idx = hwc->idx; | ||
761 | |||
762 | x86_perf_counter_update(counter, hwc, idx); | ||
763 | __hw_perf_counter_set_period(counter, hwc, idx); | ||
764 | |||
765 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
766 | __pmc_generic_enable(counter, hwc, idx); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Maximum interrupt frequency of 100KHz per CPU | ||
771 | */ | ||
772 | #define PERFMON_MAX_INTERRUPTS (100000/HZ) | ||
773 | |||
774 | /* | ||
775 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
776 | * rules apply: | ||
777 | */ | ||
778 | static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) | ||
779 | { | ||
780 | int bit, cpu = smp_processor_id(); | ||
781 | u64 ack, status; | ||
782 | struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
783 | int ret = 0; | ||
784 | |||
785 | cpuc->throttle_ctrl = hw_perf_save_disable(); | ||
786 | |||
787 | status = hw_perf_get_status(cpuc->throttle_ctrl); | ||
788 | if (!status) | ||
789 | goto out; | ||
790 | |||
791 | ret = 1; | ||
792 | again: | ||
793 | inc_irq_stat(apic_perf_irqs); | ||
794 | ack = status; | ||
795 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
796 | struct perf_counter *counter = cpuc->counters[bit]; | ||
797 | |||
798 | clear_bit(bit, (unsigned long *) &status); | ||
799 | if (!counter) | ||
800 | continue; | ||
801 | |||
802 | perf_save_and_restart(counter); | ||
803 | if (perf_counter_overflow(counter, nmi, regs, 0)) | ||
804 | __pmc_generic_disable(counter, &counter->hw, bit); | ||
805 | } | ||
806 | |||
807 | hw_perf_ack_status(ack); | ||
808 | |||
809 | /* | ||
810 | * Repeat if there is more work to be done: | ||
811 | */ | ||
812 | status = hw_perf_get_status(cpuc->throttle_ctrl); | ||
813 | if (status) | ||
814 | goto again; | ||
815 | out: | ||
816 | /* | ||
817 | * Restore - do not reenable when global enable is off or throttled: | ||
818 | */ | ||
819 | if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) | ||
820 | hw_perf_restore(cpuc->throttle_ctrl); | ||
821 | |||
822 | return ret; | ||
823 | } | ||
824 | |||
825 | void perf_counter_unthrottle(void) | ||
826 | { | ||
827 | struct cpu_hw_counters *cpuc; | ||
828 | |||
829 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
830 | return; | ||
831 | |||
832 | if (unlikely(!perf_counters_initialized)) | ||
833 | return; | ||
834 | |||
835 | cpuc = &__get_cpu_var(cpu_hw_counters); | ||
836 | if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { | ||
837 | if (printk_ratelimit()) | ||
838 | printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n"); | ||
839 | hw_perf_restore(cpuc->throttle_ctrl); | ||
840 | } | ||
841 | cpuc->interrupts = 0; | ||
842 | } | ||
843 | |||
844 | void smp_perf_counter_interrupt(struct pt_regs *regs) | ||
845 | { | ||
846 | irq_enter(); | ||
847 | apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); | ||
848 | ack_APIC_irq(); | ||
849 | __smp_perf_counter_interrupt(regs, 0); | ||
850 | irq_exit(); | ||
851 | } | ||
852 | |||
853 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
854 | { | ||
855 | irq_enter(); | ||
856 | ack_APIC_irq(); | ||
857 | inc_irq_stat(apic_pending_irqs); | ||
858 | perf_counter_do_pending(); | ||
859 | irq_exit(); | ||
860 | } | ||
861 | |||
862 | void set_perf_counter_pending(void) | ||
863 | { | ||
864 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
865 | } | ||
866 | |||
867 | void perf_counters_lapic_init(int nmi) | ||
868 | { | ||
869 | u32 apic_val; | ||
870 | |||
871 | if (!perf_counters_initialized) | ||
872 | return; | ||
873 | /* | ||
874 | * Enable the performance counter vector in the APIC LVT: | ||
875 | */ | ||
876 | apic_val = apic_read(APIC_LVTERR); | ||
877 | |||
878 | apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED); | ||
879 | if (nmi) | ||
880 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
881 | else | ||
882 | apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); | ||
883 | apic_write(APIC_LVTERR, apic_val); | ||
884 | } | ||
885 | |||
886 | static int __kprobes | ||
887 | perf_counter_nmi_handler(struct notifier_block *self, | ||
888 | unsigned long cmd, void *__args) | ||
889 | { | ||
890 | struct die_args *args = __args; | ||
891 | struct pt_regs *regs; | ||
892 | int ret; | ||
893 | |||
894 | switch (cmd) { | ||
895 | case DIE_NMI: | ||
896 | case DIE_NMI_IPI: | ||
897 | break; | ||
898 | |||
899 | default: | ||
900 | return NOTIFY_DONE; | ||
901 | } | ||
902 | |||
903 | regs = args->regs; | ||
904 | |||
905 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
906 | ret = __smp_perf_counter_interrupt(regs, 1); | ||
907 | |||
908 | return ret ? NOTIFY_STOP : NOTIFY_OK; | ||
909 | } | ||
910 | |||
911 | static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | ||
912 | .notifier_call = perf_counter_nmi_handler, | ||
913 | .next = NULL, | ||
914 | .priority = 1 | ||
915 | }; | ||
916 | |||
917 | static struct pmc_x86_ops pmc_intel_ops = { | ||
918 | .save_disable_all = pmc_intel_save_disable_all, | ||
919 | .restore_all = pmc_intel_restore_all, | ||
920 | .get_status = pmc_intel_get_status, | ||
921 | .ack_status = pmc_intel_ack_status, | ||
922 | .enable = pmc_intel_enable, | ||
923 | .disable = pmc_intel_disable, | ||
924 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
925 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
926 | .event_map = pmc_intel_event_map, | ||
927 | .raw_event = pmc_intel_raw_event, | ||
928 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
929 | }; | ||
930 | |||
931 | static struct pmc_x86_ops pmc_amd_ops = { | ||
932 | .save_disable_all = pmc_amd_save_disable_all, | ||
933 | .restore_all = pmc_amd_restore_all, | ||
934 | .get_status = pmc_amd_get_status, | ||
935 | .ack_status = pmc_amd_ack_status, | ||
936 | .enable = pmc_amd_enable, | ||
937 | .disable = pmc_amd_disable, | ||
938 | .eventsel = MSR_K7_EVNTSEL0, | ||
939 | .perfctr = MSR_K7_PERFCTR0, | ||
940 | .event_map = pmc_amd_event_map, | ||
941 | .raw_event = pmc_amd_raw_event, | ||
942 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
943 | }; | ||
944 | |||
945 | static struct pmc_x86_ops *pmc_intel_init(void) | ||
946 | { | ||
947 | union cpuid10_edx edx; | ||
948 | union cpuid10_eax eax; | ||
949 | unsigned int unused; | ||
950 | unsigned int ebx; | ||
951 | |||
952 | /* | ||
953 | * Check whether the Architectural PerfMon supports | ||
954 | * Branch Misses Retired Event or not. | ||
955 | */ | ||
956 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
957 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
958 | return NULL; | ||
959 | |||
960 | intel_perfmon_version = eax.split.version_id; | ||
961 | if (intel_perfmon_version < 2) | ||
962 | return NULL; | ||
963 | |||
964 | pr_info("Intel Performance Monitoring support detected.\n"); | ||
965 | pr_info("... version: %d\n", intel_perfmon_version); | ||
966 | pr_info("... bit width: %d\n", eax.split.bit_width); | ||
967 | pr_info("... mask length: %d\n", eax.split.mask_length); | ||
968 | |||
969 | nr_counters_generic = eax.split.num_counters; | ||
970 | nr_counters_fixed = edx.split.num_counters_fixed; | ||
971 | counter_value_mask = (1ULL << eax.split.bit_width) - 1; | ||
972 | |||
973 | return &pmc_intel_ops; | ||
974 | } | ||
975 | |||
976 | static struct pmc_x86_ops *pmc_amd_init(void) | ||
977 | { | ||
978 | nr_counters_generic = 4; | ||
979 | nr_counters_fixed = 0; | ||
980 | counter_value_mask = 0x0000FFFFFFFFFFFFULL; | ||
981 | counter_value_bits = 48; | ||
982 | |||
983 | pr_info("AMD Performance Monitoring support detected.\n"); | ||
984 | |||
985 | return &pmc_amd_ops; | ||
986 | } | ||
987 | |||
988 | void __init init_hw_perf_counters(void) | ||
989 | { | ||
990 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
991 | return; | ||
992 | |||
993 | switch (boot_cpu_data.x86_vendor) { | ||
994 | case X86_VENDOR_INTEL: | ||
995 | pmc_ops = pmc_intel_init(); | ||
996 | break; | ||
997 | case X86_VENDOR_AMD: | ||
998 | pmc_ops = pmc_amd_init(); | ||
999 | break; | ||
1000 | } | ||
1001 | if (!pmc_ops) | ||
1002 | return; | ||
1003 | |||
1004 | pr_info("... num counters: %d\n", nr_counters_generic); | ||
1005 | if (nr_counters_generic > X86_PMC_MAX_GENERIC) { | ||
1006 | nr_counters_generic = X86_PMC_MAX_GENERIC; | ||
1007 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | ||
1008 | nr_counters_generic, X86_PMC_MAX_GENERIC); | ||
1009 | } | ||
1010 | perf_counter_mask = (1 << nr_counters_generic) - 1; | ||
1011 | perf_max_counters = nr_counters_generic; | ||
1012 | |||
1013 | pr_info("... value mask: %016Lx\n", counter_value_mask); | ||
1014 | |||
1015 | if (nr_counters_fixed > X86_PMC_MAX_FIXED) { | ||
1016 | nr_counters_fixed = X86_PMC_MAX_FIXED; | ||
1017 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | ||
1018 | nr_counters_fixed, X86_PMC_MAX_FIXED); | ||
1019 | } | ||
1020 | pr_info("... fixed counters: %d\n", nr_counters_fixed); | ||
1021 | |||
1022 | perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED; | ||
1023 | |||
1024 | pr_info("... counter mask: %016Lx\n", perf_counter_mask); | ||
1025 | perf_counters_initialized = true; | ||
1026 | |||
1027 | perf_counters_lapic_init(0); | ||
1028 | register_die_notifier(&perf_counter_nmi_notifier); | ||
1029 | } | ||
1030 | |||
1031 | static void pmc_generic_read(struct perf_counter *counter) | ||
1032 | { | ||
1033 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); | ||
1034 | } | ||
1035 | |||
1036 | static const struct hw_perf_counter_ops x86_perf_counter_ops = { | ||
1037 | .enable = pmc_generic_enable, | ||
1038 | .disable = pmc_generic_disable, | ||
1039 | .read = pmc_generic_read, | ||
1040 | }; | ||
1041 | |||
1042 | const struct hw_perf_counter_ops * | ||
1043 | hw_perf_counter_init(struct perf_counter *counter) | ||
1044 | { | ||
1045 | int err; | ||
1046 | |||
1047 | err = __hw_perf_counter_init(counter); | ||
1048 | if (err) | ||
1049 | return ERR_PTR(err); | ||
1050 | |||
1051 | return &x86_perf_counter_ops; | ||
1052 | } | ||
1053 | |||
1054 | /* | ||
1055 | * callchain support | ||
1056 | */ | ||
1057 | |||
1058 | static inline | ||
1059 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | ||
1060 | { | ||
1061 | if (entry->nr < MAX_STACK_DEPTH) | ||
1062 | entry->ip[entry->nr++] = ip; | ||
1063 | } | ||
1064 | |||
1065 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | ||
1066 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | ||
1067 | |||
1068 | |||
1069 | static void | ||
1070 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
1071 | { | ||
1072 | /* Ignore warnings */ | ||
1073 | } | ||
1074 | |||
1075 | static void backtrace_warning(void *data, char *msg) | ||
1076 | { | ||
1077 | /* Ignore warnings */ | ||
1078 | } | ||
1079 | |||
1080 | static int backtrace_stack(void *data, char *name) | ||
1081 | { | ||
1082 | /* Don't bother with IRQ stacks for now */ | ||
1083 | return -1; | ||
1084 | } | ||
1085 | |||
1086 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
1087 | { | ||
1088 | struct perf_callchain_entry *entry = data; | ||
1089 | |||
1090 | if (reliable) | ||
1091 | callchain_store(entry, addr); | ||
1092 | } | ||
1093 | |||
1094 | static const struct stacktrace_ops backtrace_ops = { | ||
1095 | .warning = backtrace_warning, | ||
1096 | .warning_symbol = backtrace_warning_symbol, | ||
1097 | .stack = backtrace_stack, | ||
1098 | .address = backtrace_address, | ||
1099 | }; | ||
1100 | |||
1101 | static void | ||
1102 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1103 | { | ||
1104 | unsigned long bp; | ||
1105 | char *stack; | ||
1106 | int nr = entry->nr; | ||
1107 | |||
1108 | callchain_store(entry, instruction_pointer(regs)); | ||
1109 | |||
1110 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
1111 | #ifdef CONFIG_FRAME_POINTER | ||
1112 | bp = frame_pointer(regs); | ||
1113 | #else | ||
1114 | bp = 0; | ||
1115 | #endif | ||
1116 | |||
1117 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | ||
1118 | |||
1119 | entry->kernel = entry->nr - nr; | ||
1120 | } | ||
1121 | |||
1122 | |||
1123 | struct stack_frame { | ||
1124 | const void __user *next_fp; | ||
1125 | unsigned long return_address; | ||
1126 | }; | ||
1127 | |||
1128 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
1129 | { | ||
1130 | int ret; | ||
1131 | |||
1132 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
1133 | return 0; | ||
1134 | |||
1135 | ret = 1; | ||
1136 | pagefault_disable(); | ||
1137 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1138 | ret = 0; | ||
1139 | pagefault_enable(); | ||
1140 | |||
1141 | return ret; | ||
1142 | } | ||
1143 | |||
1144 | static void | ||
1145 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1146 | { | ||
1147 | struct stack_frame frame; | ||
1148 | const void __user *fp; | ||
1149 | int nr = entry->nr; | ||
1150 | |||
1151 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
1152 | fp = (void __user *)regs->bp; | ||
1153 | |||
1154 | callchain_store(entry, regs->ip); | ||
1155 | |||
1156 | while (entry->nr < MAX_STACK_DEPTH) { | ||
1157 | frame.next_fp = NULL; | ||
1158 | frame.return_address = 0; | ||
1159 | |||
1160 | if (!copy_stack_frame(fp, &frame)) | ||
1161 | break; | ||
1162 | |||
1163 | if ((unsigned long)fp < user_stack_pointer(regs)) | ||
1164 | break; | ||
1165 | |||
1166 | callchain_store(entry, frame.return_address); | ||
1167 | fp = frame.next_fp; | ||
1168 | } | ||
1169 | |||
1170 | entry->user = entry->nr - nr; | ||
1171 | } | ||
1172 | |||
1173 | static void | ||
1174 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1175 | { | ||
1176 | int is_user; | ||
1177 | |||
1178 | if (!regs) | ||
1179 | return; | ||
1180 | |||
1181 | is_user = user_mode(regs); | ||
1182 | |||
1183 | if (!current || current->pid == 0) | ||
1184 | return; | ||
1185 | |||
1186 | if (is_user && current->state != TASK_RUNNING) | ||
1187 | return; | ||
1188 | |||
1189 | if (!is_user) | ||
1190 | perf_callchain_kernel(regs, entry); | ||
1191 | |||
1192 | if (current->mm) | ||
1193 | perf_callchain_user(regs, entry); | ||
1194 | } | ||
1195 | |||
1196 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1197 | { | ||
1198 | struct perf_callchain_entry *entry; | ||
1199 | |||
1200 | if (in_nmi()) | ||
1201 | entry = &__get_cpu_var(nmi_entry); | ||
1202 | else | ||
1203 | entry = &__get_cpu_var(irq_entry); | ||
1204 | |||
1205 | entry->nr = 0; | ||
1206 | entry->hv = 0; | ||
1207 | entry->kernel = 0; | ||
1208 | entry->user = 0; | ||
1209 | |||
1210 | perf_do_callchain(regs, entry); | ||
1211 | |||
1212 | return entry; | ||
1213 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f6c70a164e32..d6f5b9fbde32 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -19,8 +19,8 @@ | |||
19 | #include <linux/nmi.h> | 19 | #include <linux/nmi.h> |
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/genapic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/intel_arch_perfmon.h> | 23 | #include <asm/perf_counter.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | 25 | struct nmi_watchdog_ctlblk { |
26 | unsigned int cccr_msr; | 26 | unsigned int cccr_msr; |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38946c6e8433..891004619142 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1025,6 +1025,13 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
1025 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1025 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1026 | spurious_interrupt smp_spurious_interrupt | 1026 | spurious_interrupt smp_spurious_interrupt |
1027 | 1027 | ||
1028 | #ifdef CONFIG_PERF_COUNTERS | ||
1029 | apicinterrupt LOCAL_PERF_VECTOR \ | ||
1030 | perf_counter_interrupt smp_perf_counter_interrupt | ||
1031 | apicinterrupt LOCAL_PENDING_VECTOR \ | ||
1032 | perf_pending_interrupt smp_perf_pending_interrupt | ||
1033 | #endif | ||
1034 | |||
1028 | /* | 1035 | /* |
1029 | * Exception entry points. | 1036 | * Exception entry points. |
1030 | */ | 1037 | */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c3fe010d74c8..8279fb8df17f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -63,6 +63,14 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
63 | for_each_online_cpu(j) | 63 | for_each_online_cpu(j) |
64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | 64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); |
65 | seq_printf(p, " Spurious interrupts\n"); | 65 | seq_printf(p, " Spurious interrupts\n"); |
66 | seq_printf(p, "%*s: ", prec, "CNT"); | ||
67 | for_each_online_cpu(j) | ||
68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | ||
69 | seq_printf(p, " Performance counter interrupts\n"); | ||
70 | seq_printf(p, "%*s: ", prec, "PND"); | ||
71 | for_each_online_cpu(j) | ||
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | ||
73 | seq_printf(p, " Performance pending work\n"); | ||
66 | #endif | 74 | #endif |
67 | if (generic_interrupt_extension) { | 75 | if (generic_interrupt_extension) { |
68 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -166,6 +174,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
166 | #ifdef CONFIG_X86_LOCAL_APIC | 174 | #ifdef CONFIG_X86_LOCAL_APIC |
167 | sum += irq_stats(cpu)->apic_timer_irqs; | 175 | sum += irq_stats(cpu)->apic_timer_irqs; |
168 | sum += irq_stats(cpu)->irq_spurious_count; | 176 | sum += irq_stats(cpu)->irq_spurious_count; |
177 | sum += irq_stats(cpu)->apic_perf_irqs; | ||
178 | sum += irq_stats(cpu)->apic_pending_irqs; | ||
169 | #endif | 179 | #endif |
170 | if (generic_interrupt_extension) | 180 | if (generic_interrupt_extension) |
171 | sum += irq_stats(cpu)->generic_irqs; | 181 | sum += irq_stats(cpu)->generic_irqs; |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 368b0a8836f9..3190a6b961e6 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -118,28 +118,8 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
118 | return 0; | 118 | return 0; |
119 | } | 119 | } |
120 | 120 | ||
121 | /* Overridden in paravirt.c */ | 121 | static void __init smp_intr_init(void) |
122 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
123 | |||
124 | void __init native_init_IRQ(void) | ||
125 | { | 122 | { |
126 | int i; | ||
127 | |||
128 | /* Execute any quirks before the call gates are initialised: */ | ||
129 | x86_quirk_pre_intr_init(); | ||
130 | |||
131 | /* | ||
132 | * Cover the whole vector space, no vector can escape | ||
133 | * us. (some of these will be overridden and become | ||
134 | * 'special' SMP interrupts) | ||
135 | */ | ||
136 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | ||
137 | /* SYSCALL_VECTOR was reserved in trap_init. */ | ||
138 | if (i != SYSCALL_VECTOR) | ||
139 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); | ||
140 | } | ||
141 | |||
142 | |||
143 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) | 123 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) |
144 | /* | 124 | /* |
145 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | 125 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper |
@@ -168,6 +148,11 @@ void __init native_init_IRQ(void) | |||
168 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 148 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
169 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 149 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
170 | #endif | 150 | #endif |
151 | } | ||
152 | |||
153 | static void __init apic_intr_init(void) | ||
154 | { | ||
155 | smp_intr_init(); | ||
171 | 156 | ||
172 | #ifdef CONFIG_X86_LOCAL_APIC | 157 | #ifdef CONFIG_X86_LOCAL_APIC |
173 | /* self generated IPI for local APIC timer */ | 158 | /* self generated IPI for local APIC timer */ |
@@ -179,12 +164,41 @@ void __init native_init_IRQ(void) | |||
179 | /* IPI vectors for APIC spurious and error interrupts */ | 164 | /* IPI vectors for APIC spurious and error interrupts */ |
180 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 165 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
181 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 166 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
182 | #endif | 167 | # ifdef CONFIG_PERF_COUNTERS |
168 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
169 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | ||
170 | # endif | ||
183 | 171 | ||
184 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) | 172 | # ifdef CONFIG_X86_MCE_P4THERMAL |
185 | /* thermal monitor LVT interrupt */ | 173 | /* thermal monitor LVT interrupt */ |
186 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 174 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
175 | # endif | ||
187 | #endif | 176 | #endif |
177 | } | ||
178 | |||
179 | /* Overridden in paravirt.c */ | ||
180 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
181 | |||
182 | void __init native_init_IRQ(void) | ||
183 | { | ||
184 | int i; | ||
185 | |||
186 | /* Execute any quirks before the call gates are initialised: */ | ||
187 | x86_quirk_pre_intr_init(); | ||
188 | |||
189 | apic_intr_init(); | ||
190 | |||
191 | /* | ||
192 | * Cover the whole vector space, no vector can escape | ||
193 | * us. (some of these will be overridden and become | ||
194 | * 'special' SMP interrupts) | ||
195 | */ | ||
196 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
197 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
198 | /* SYSCALL_VECTOR was reserved in trap_init. */ | ||
199 | if (!test_bit(vector, used_vectors)) | ||
200 | set_intr_gate(vector, interrupt[i]); | ||
201 | } | ||
188 | 202 | ||
189 | if (!acpi_ioapic) | 203 | if (!acpi_ioapic) |
190 | setup_irq(2, &irq2); | 204 | setup_irq(2, &irq2); |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 8cd10537fd46..53ceb26f80ff 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -152,6 +152,12 @@ static void __init apic_intr_init(void) | |||
152 | /* IPI vectors for APIC spurious and error interrupts */ | 152 | /* IPI vectors for APIC spurious and error interrupts */ |
153 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 153 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
154 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 154 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
155 | |||
156 | /* Performance monitoring interrupt: */ | ||
157 | #ifdef CONFIG_PERF_COUNTERS | ||
158 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
159 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | ||
160 | #endif | ||
155 | } | 161 | } |
156 | 162 | ||
157 | void __init native_init_IRQ(void) | 163 | void __init native_init_IRQ(void) |
@@ -159,6 +165,9 @@ void __init native_init_IRQ(void) | |||
159 | int i; | 165 | int i; |
160 | 166 | ||
161 | init_ISA_irqs(); | 167 | init_ISA_irqs(); |
168 | |||
169 | apic_intr_init(); | ||
170 | |||
162 | /* | 171 | /* |
163 | * Cover the whole vector space, no vector can escape | 172 | * Cover the whole vector space, no vector can escape |
164 | * us. (some of these will be overridden and become | 173 | * us. (some of these will be overridden and become |
@@ -166,12 +175,10 @@ void __init native_init_IRQ(void) | |||
166 | */ | 175 | */ |
167 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | 176 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { |
168 | int vector = FIRST_EXTERNAL_VECTOR + i; | 177 | int vector = FIRST_EXTERNAL_VECTOR + i; |
169 | if (vector != IA32_SYSCALL_VECTOR) | 178 | if (!test_bit(vector, used_vectors)) |
170 | set_intr_gate(vector, interrupt[i]); | 179 | set_intr_gate(vector, interrupt[i]); |
171 | } | 180 | } |
172 | 181 | ||
173 | apic_intr_init(); | ||
174 | |||
175 | if (!acpi_ioapic) | 182 | if (!acpi_ioapic) |
176 | setup_irq(2, &irq2); | 183 | setup_irq(2, &irq2); |
177 | } | 184 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 14425166b8e3..0a813b17b172 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | |||
10 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c8736b491..c3ebbb901379 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -332,5 +332,6 @@ ENTRY(sys_call_table) | |||
332 | .long sys_dup3 /* 330 */ | 332 | .long sys_dup3 /* 330 */ |
333 | .long sys_pipe2 | 333 | .long sys_pipe2 |
334 | .long sys_inotify_init1 | 334 | .long sys_inotify_init1 |
335 | .long sys_perf_counter_open | ||
335 | .long sys_preadv | 336 | .long sys_preadv |
336 | .long sys_pwritev | 337 | .long sys_pwritev |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d288327ff0..2cc162e09c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -945,8 +945,13 @@ void __init trap_init(void) | |||
945 | #endif | 945 | #endif |
946 | set_intr_gate(19, &simd_coprocessor_error); | 946 | set_intr_gate(19, &simd_coprocessor_error); |
947 | 947 | ||
948 | /* Reserve all the builtin and the syscall vector: */ | ||
949 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
950 | set_bit(i, used_vectors); | ||
951 | |||
948 | #ifdef CONFIG_IA32_EMULATION | 952 | #ifdef CONFIG_IA32_EMULATION |
949 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 953 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
954 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
950 | #endif | 955 | #endif |
951 | 956 | ||
952 | #ifdef CONFIG_X86_32 | 957 | #ifdef CONFIG_X86_32 |
@@ -963,17 +968,9 @@ void __init trap_init(void) | |||
963 | } | 968 | } |
964 | 969 | ||
965 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 970 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
966 | #endif | ||
967 | |||
968 | /* Reserve all the builtin and the syscall vector: */ | ||
969 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
970 | set_bit(i, used_vectors); | ||
971 | |||
972 | #ifdef CONFIG_X86_64 | ||
973 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
974 | #else | ||
975 | set_bit(SYSCALL_VECTOR, used_vectors); | 971 | set_bit(SYSCALL_VECTOR, used_vectors); |
976 | #endif | 972 | #endif |
973 | |||
977 | /* | 974 | /* |
978 | * Should be a barrier for any external CPU state: | 975 | * Should be a barrier for any external CPU state: |
979 | */ | 976 | */ |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..6f9df2babe48 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/tty.h> | 27 | #include <linux/tty.h> |
28 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/perf_counter.h> | ||
30 | 31 | ||
31 | #include <asm-generic/sections.h> | 32 | #include <asm-generic/sections.h> |
32 | 33 | ||
@@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1044 | if (unlikely(error_code & PF_RSVD)) | 1045 | if (unlikely(error_code & PF_RSVD)) |
1045 | pgtable_bad(regs, error_code, address); | 1046 | pgtable_bad(regs, error_code, address); |
1046 | 1047 | ||
1048 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); | ||
1049 | |||
1047 | /* | 1050 | /* |
1048 | * If we're in an interrupt, have no user context or are running | 1051 | * If we're in an interrupt, have no user context or are running |
1049 | * in an atomic region then we must not take the fault: | 1052 | * in an atomic region then we must not take the fault: |
@@ -1137,10 +1140,15 @@ good_area: | |||
1137 | return; | 1140 | return; |
1138 | } | 1141 | } |
1139 | 1142 | ||
1140 | if (fault & VM_FAULT_MAJOR) | 1143 | if (fault & VM_FAULT_MAJOR) { |
1141 | tsk->maj_flt++; | 1144 | tsk->maj_flt++; |
1142 | else | 1145 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, |
1146 | regs, address); | ||
1147 | } else { | ||
1143 | tsk->min_flt++; | 1148 | tsk->min_flt++; |
1149 | perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, | ||
1150 | regs, address); | ||
1151 | } | ||
1144 | 1152 | ||
1145 | check_v8086_mode(regs, address, tsk); | 1153 | check_v8086_mode(regs, address, tsk); |
1146 | 1154 | ||
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a7..c638685136e1 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
40 | 40 | ||
41 | switch (val) { | 41 | switch (val) { |
42 | case DIE_NMI: | 42 | case DIE_NMI: |
43 | if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) | 43 | case DIE_NMI_IPI: |
44 | ret = NOTIFY_STOP; | 44 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); |
45 | ret = NOTIFY_STOP; | ||
45 | break; | 46 | break; |
46 | default: | 47 | default: |
47 | break; | 48 | break; |
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy) | |||
134 | static struct notifier_block profile_exceptions_nb = { | 135 | static struct notifier_block profile_exceptions_nb = { |
135 | .notifier_call = profile_exceptions_notify, | 136 | .notifier_call = profile_exceptions_notify, |
136 | .next = NULL, | 137 | .next = NULL, |
137 | .priority = 0 | 138 | .priority = 2 |
138 | }; | 139 | }; |
139 | 140 | ||
140 | static int nmi_setup(void) | 141 | static int nmi_setup(void) |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 10131fbdaada..4da7230b3d17 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
19 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
20 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | 21 | #include <asm/perf_counter.h> |
22 | 22 | ||
23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
136 | u64 val; | 136 | u64 val; |
137 | int i; | 137 | int i; |
138 | 138 | ||
139 | /* | ||
140 | * This can happen if perf counters are in use when | ||
141 | * we steal the die notifier NMI. | ||
142 | */ | ||
143 | if (unlikely(!reset_value)) | ||
144 | goto out; | ||
145 | |||
139 | for (i = 0 ; i < num_counters; ++i) { | 146 | for (i = 0 ; i < num_counters; ++i) { |
140 | if (!reset_value[i]) | 147 | if (!reset_value[i]) |
141 | continue; | 148 | continue; |
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
146 | } | 153 | } |
147 | } | 154 | } |
148 | 155 | ||
156 | out: | ||
149 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | 157 | /* Only P6 based Pentium M need to re-unmask the apic vector but it |
150 | * doesn't hurt other P6 variant */ | 158 | * doesn't hurt other P6 variant */ |
151 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | 159 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); |
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f7ca8c55956b..d2830f39d46b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c | |||
@@ -763,8 +763,11 @@ static int acpi_idle_bm_check(void) | |||
763 | */ | 763 | */ |
764 | static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) | 764 | static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) |
765 | { | 765 | { |
766 | u64 perf_flags; | ||
767 | |||
766 | /* Don't trace irqs off for idle */ | 768 | /* Don't trace irqs off for idle */ |
767 | stop_critical_timings(); | 769 | stop_critical_timings(); |
770 | perf_flags = hw_perf_save_disable(); | ||
768 | if (cx->entry_method == ACPI_CSTATE_FFH) { | 771 | if (cx->entry_method == ACPI_CSTATE_FFH) { |
769 | /* Call into architectural FFH based C-state */ | 772 | /* Call into architectural FFH based C-state */ |
770 | acpi_processor_ffh_cstate_enter(cx); | 773 | acpi_processor_ffh_cstate_enter(cx); |
@@ -779,6 +782,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) | |||
779 | gets asserted in time to freeze execution properly. */ | 782 | gets asserted in time to freeze execution properly. */ |
780 | unused = inl(acpi_gbl_FADT.xpm_timer_block.address); | 783 | unused = inl(acpi_gbl_FADT.xpm_timer_block.address); |
781 | } | 784 | } |
785 | hw_perf_restore(perf_flags); | ||
782 | start_critical_timings(); | 786 | start_critical_timings(); |
783 | } | 787 | } |
784 | 788 | ||
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index b0a6a3e51924..aed2b2936ecf 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kbd_kern.h> | 25 | #include <linux/kbd_kern.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/quotaops.h> | 27 | #include <linux/quotaops.h> |
28 | #include <linux/perf_counter.h> | ||
28 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
30 | #include <linux/suspend.h> | 31 | #include <linux/suspend.h> |
@@ -243,6 +244,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) | |||
243 | struct pt_regs *regs = get_irq_regs(); | 244 | struct pt_regs *regs = get_irq_regs(); |
244 | if (regs) | 245 | if (regs) |
245 | show_regs(regs); | 246 | show_regs(regs); |
247 | perf_counter_print_debug(); | ||
246 | } | 248 | } |
247 | static struct sysrq_key_op sysrq_showregs_op = { | 249 | static struct sysrq_key_op sysrq_showregs_op = { |
248 | .handler = sysrq_handle_showregs, | 250 | .handler = sysrq_handle_showregs, |
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
36 | #include <linux/perf_counter.h> | ||
36 | #include <linux/highmem.h> | 37 | #include <linux/highmem.h> |
37 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
38 | #include <linux/key.h> | 39 | #include <linux/key.h> |
@@ -950,6 +951,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
950 | task_lock(tsk); | 951 | task_lock(tsk); |
951 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); | 952 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); |
952 | task_unlock(tsk); | 953 | task_unlock(tsk); |
954 | perf_counter_comm(tsk); | ||
953 | } | 955 | } |
954 | 956 | ||
955 | int flush_old_exec(struct linux_binprm * bprm) | 957 | int flush_old_exec(struct linux_binprm * bprm) |
@@ -1018,6 +1020,13 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
1018 | 1020 | ||
1019 | current->personality &= ~bprm->per_clear; | 1021 | current->personality &= ~bprm->per_clear; |
1020 | 1022 | ||
1023 | /* | ||
1024 | * Flush performance counters when crossing a | ||
1025 | * security domain: | ||
1026 | */ | ||
1027 | if (!get_dumpable(current->mm)) | ||
1028 | perf_counter_exit_task(current); | ||
1029 | |||
1021 | /* An exec changes our domain. We are no longer part of the thread | 1030 | /* An exec changes our domain. We are no longer part of the thread |
1022 | group */ | 1031 | group */ |
1023 | 1032 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641f..503afaa0afa7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -108,6 +108,18 @@ extern struct group_info init_groups; | |||
108 | 108 | ||
109 | extern struct cred init_cred; | 109 | extern struct cred init_cred; |
110 | 110 | ||
111 | #ifdef CONFIG_PERF_COUNTERS | ||
112 | # define INIT_PERF_COUNTERS(tsk) \ | ||
113 | .perf_counter_ctx.counter_list = \ | ||
114 | LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ | ||
115 | .perf_counter_ctx.event_list = \ | ||
116 | LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ | ||
117 | .perf_counter_ctx.lock = \ | ||
118 | __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), | ||
119 | #else | ||
120 | # define INIT_PERF_COUNTERS(tsk) | ||
121 | #endif | ||
122 | |||
111 | /* | 123 | /* |
112 | * INIT_TASK is used to set up the first task table, touch at | 124 | * INIT_TASK is used to set up the first task table, touch at |
113 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 125 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
@@ -171,6 +183,7 @@ extern struct cred init_cred; | |||
171 | }, \ | 183 | }, \ |
172 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ | 184 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ |
173 | INIT_IDS \ | 185 | INIT_IDS \ |
186 | INIT_PERF_COUNTERS(tsk) \ | ||
174 | INIT_TRACE_IRQFLAGS \ | 187 | INIT_TRACE_IRQFLAGS \ |
175 | INIT_LOCKDEP \ | 188 | INIT_LOCKDEP \ |
176 | INIT_FTRACE_GRAPH \ | 189 | INIT_FTRACE_GRAPH \ |
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0c8b89f28a95..a77c6007dc99 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -81,7 +81,12 @@ static inline unsigned int kstat_irqs(unsigned int irq) | |||
81 | return sum; | 81 | return sum; |
82 | } | 82 | } |
83 | 83 | ||
84 | |||
85 | /* | ||
86 | * Lock/unlock the current runqueue - to extract task statistics: | ||
87 | */ | ||
84 | extern unsigned long long task_delta_exec(struct task_struct *); | 88 | extern unsigned long long task_delta_exec(struct task_struct *); |
89 | |||
85 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); | 90 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); |
86 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); | 91 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); |
87 | extern void account_steal_time(cputime_t); | 92 | extern void account_steal_time(cputime_t); |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab8..93054fc3635c 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |||
151 | extern int mutex_trylock(struct mutex *lock); | 151 | extern int mutex_trylock(struct mutex *lock); |
152 | extern void mutex_unlock(struct mutex *lock); | 152 | extern void mutex_unlock(struct mutex *lock); |
153 | 153 | ||
154 | /** | ||
155 | * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | ||
156 | * @cnt: the atomic which we are to dec | ||
157 | * @lock: the mutex to return holding if we dec to 0 | ||
158 | * | ||
159 | * return true and hold lock if we dec to 0, return false otherwise | ||
160 | */ | ||
161 | static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | ||
162 | { | ||
163 | /* dec if we can't possibly hit 0 */ | ||
164 | if (atomic_add_unless(cnt, -1, 1)) | ||
165 | return 0; | ||
166 | /* we might hit 0, so take the lock */ | ||
167 | mutex_lock(lock); | ||
168 | if (!atomic_dec_and_test(cnt)) { | ||
169 | /* when we actually did the dec, we didn't hit 0 */ | ||
170 | mutex_unlock(lock); | ||
171 | return 0; | ||
172 | } | ||
173 | /* we hit 0, and we hold the lock */ | ||
174 | return 1; | ||
175 | } | ||
176 | |||
154 | #endif | 177 | #endif |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 000000000000..981432885301 --- /dev/null +++ b/include/linux/perf_counter.h | |||
@@ -0,0 +1,606 @@ | |||
1 | /* | ||
2 | * Performance counters: | ||
3 | * | ||
4 | * Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar | ||
6 | * | ||
7 | * Data type definitions, declarations, prototypes. | ||
8 | * | ||
9 | * Started by: Thomas Gleixner and Ingo Molnar | ||
10 | * | ||
11 | * For licencing details see kernel-base/COPYING | ||
12 | */ | ||
13 | #ifndef _LINUX_PERF_COUNTER_H | ||
14 | #define _LINUX_PERF_COUNTER_H | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | #include <linux/ioctl.h> | ||
18 | #include <asm/byteorder.h> | ||
19 | |||
20 | /* | ||
21 | * User-space ABI bits: | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * hw_event.type | ||
26 | */ | ||
27 | enum perf_event_types { | ||
28 | PERF_TYPE_HARDWARE = 0, | ||
29 | PERF_TYPE_SOFTWARE = 1, | ||
30 | PERF_TYPE_TRACEPOINT = 2, | ||
31 | |||
32 | /* | ||
33 | * available TYPE space, raw is the max value. | ||
34 | */ | ||
35 | |||
36 | PERF_TYPE_RAW = 128, | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Generalized performance counter event types, used by the hw_event.event_id | ||
41 | * parameter of the sys_perf_counter_open() syscall: | ||
42 | */ | ||
43 | enum hw_event_ids { | ||
44 | /* | ||
45 | * Common hardware events, generalized by the kernel: | ||
46 | */ | ||
47 | PERF_COUNT_CPU_CYCLES = 0, | ||
48 | PERF_COUNT_INSTRUCTIONS = 1, | ||
49 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
50 | PERF_COUNT_CACHE_MISSES = 3, | ||
51 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
52 | PERF_COUNT_BRANCH_MISSES = 5, | ||
53 | PERF_COUNT_BUS_CYCLES = 6, | ||
54 | |||
55 | PERF_HW_EVENTS_MAX = 7, | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * Special "software" counters provided by the kernel, even if the hardware | ||
60 | * does not support performance counters. These counters measure various | ||
61 | * physical and sw events of the kernel (and allow the profiling of them as | ||
62 | * well): | ||
63 | */ | ||
64 | enum sw_event_ids { | ||
65 | PERF_COUNT_CPU_CLOCK = 0, | ||
66 | PERF_COUNT_TASK_CLOCK = 1, | ||
67 | PERF_COUNT_PAGE_FAULTS = 2, | ||
68 | PERF_COUNT_CONTEXT_SWITCHES = 3, | ||
69 | PERF_COUNT_CPU_MIGRATIONS = 4, | ||
70 | PERF_COUNT_PAGE_FAULTS_MIN = 5, | ||
71 | PERF_COUNT_PAGE_FAULTS_MAJ = 6, | ||
72 | |||
73 | PERF_SW_EVENTS_MAX = 7, | ||
74 | }; | ||
75 | |||
76 | #define __PERF_COUNTER_MASK(name) \ | ||
77 | (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ | ||
78 | PERF_COUNTER_##name##_SHIFT) | ||
79 | |||
80 | #define PERF_COUNTER_RAW_BITS 1 | ||
81 | #define PERF_COUNTER_RAW_SHIFT 63 | ||
82 | #define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) | ||
83 | |||
84 | #define PERF_COUNTER_CONFIG_BITS 63 | ||
85 | #define PERF_COUNTER_CONFIG_SHIFT 0 | ||
86 | #define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) | ||
87 | |||
88 | #define PERF_COUNTER_TYPE_BITS 7 | ||
89 | #define PERF_COUNTER_TYPE_SHIFT 56 | ||
90 | #define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) | ||
91 | |||
92 | #define PERF_COUNTER_EVENT_BITS 56 | ||
93 | #define PERF_COUNTER_EVENT_SHIFT 0 | ||
94 | #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) | ||
95 | |||
96 | /* | ||
97 | * Bits that can be set in hw_event.record_type to request information | ||
98 | * in the overflow packets. | ||
99 | */ | ||
100 | enum perf_counter_record_format { | ||
101 | PERF_RECORD_IP = 1U << 0, | ||
102 | PERF_RECORD_TID = 1U << 1, | ||
103 | PERF_RECORD_TIME = 1U << 2, | ||
104 | PERF_RECORD_ADDR = 1U << 3, | ||
105 | PERF_RECORD_GROUP = 1U << 4, | ||
106 | PERF_RECORD_CALLCHAIN = 1U << 5, | ||
107 | }; | ||
108 | |||
109 | /* | ||
110 | * Bits that can be set in hw_event.read_format to request that | ||
111 | * reads on the counter should return the indicated quantities, | ||
112 | * in increasing order of bit value, after the counter value. | ||
113 | */ | ||
114 | enum perf_counter_read_format { | ||
115 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1, | ||
116 | PERF_FORMAT_TOTAL_TIME_RUNNING = 2, | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * Hardware event to monitor via a performance monitoring counter: | ||
121 | */ | ||
122 | struct perf_counter_hw_event { | ||
123 | /* | ||
124 | * The MSB of the config word signifies if the rest contains cpu | ||
125 | * specific (raw) counter configuration data, if unset, the next | ||
126 | * 7 bits are an event type and the rest of the bits are the event | ||
127 | * identifier. | ||
128 | */ | ||
129 | __u64 config; | ||
130 | |||
131 | __u64 irq_period; | ||
132 | __u32 record_type; | ||
133 | __u32 read_format; | ||
134 | |||
135 | __u64 disabled : 1, /* off by default */ | ||
136 | nmi : 1, /* NMI sampling */ | ||
137 | inherit : 1, /* children inherit it */ | ||
138 | pinned : 1, /* must always be on PMU */ | ||
139 | exclusive : 1, /* only group on PMU */ | ||
140 | exclude_user : 1, /* don't count user */ | ||
141 | exclude_kernel : 1, /* ditto kernel */ | ||
142 | exclude_hv : 1, /* ditto hypervisor */ | ||
143 | exclude_idle : 1, /* don't count when idle */ | ||
144 | mmap : 1, /* include mmap data */ | ||
145 | munmap : 1, /* include munmap data */ | ||
146 | comm : 1, /* include comm data */ | ||
147 | |||
148 | __reserved_1 : 52; | ||
149 | |||
150 | __u32 extra_config_len; | ||
151 | __u32 wakeup_events; /* wakeup every n events */ | ||
152 | |||
153 | __u64 __reserved_2; | ||
154 | __u64 __reserved_3; | ||
155 | }; | ||
156 | |||
157 | /* | ||
158 | * Ioctls that can be done on a perf counter fd: | ||
159 | */ | ||
160 | #define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) | ||
161 | #define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) | ||
162 | #define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) | ||
163 | |||
164 | /* | ||
165 | * Structure of the page that can be mapped via mmap | ||
166 | */ | ||
167 | struct perf_counter_mmap_page { | ||
168 | __u32 version; /* version number of this structure */ | ||
169 | __u32 compat_version; /* lowest version this is compat with */ | ||
170 | |||
171 | /* | ||
172 | * Bits needed to read the hw counters in user-space. | ||
173 | * | ||
174 | * u32 seq; | ||
175 | * s64 count; | ||
176 | * | ||
177 | * do { | ||
178 | * seq = pc->lock; | ||
179 | * | ||
180 | * barrier() | ||
181 | * if (pc->index) { | ||
182 | * count = pmc_read(pc->index - 1); | ||
183 | * count += pc->offset; | ||
184 | * } else | ||
185 | * goto regular_read; | ||
186 | * | ||
187 | * barrier(); | ||
188 | * } while (pc->lock != seq); | ||
189 | * | ||
190 | * NOTE: for obvious reason this only works on self-monitoring | ||
191 | * processes. | ||
192 | */ | ||
193 | __u32 lock; /* seqlock for synchronization */ | ||
194 | __u32 index; /* hardware counter identifier */ | ||
195 | __s64 offset; /* add to hardware counter value */ | ||
196 | |||
197 | /* | ||
198 | * Control data for the mmap() data buffer. | ||
199 | * | ||
200 | * User-space reading this value should issue an rmb(), on SMP capable | ||
201 | * platforms, after reading this value -- see perf_counter_wakeup(). | ||
202 | */ | ||
203 | __u32 data_head; /* head in the data section */ | ||
204 | }; | ||
205 | |||
206 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
207 | #define PERF_EVENT_MISC_USER (1 << 1) | ||
208 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
209 | |||
210 | struct perf_event_header { | ||
211 | __u32 type; | ||
212 | __u16 misc; | ||
213 | __u16 size; | ||
214 | }; | ||
215 | |||
216 | enum perf_event_type { | ||
217 | |||
218 | /* | ||
219 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
220 | * correlate userspace IPs to code. They have the following structure: | ||
221 | * | ||
222 | * struct { | ||
223 | * struct perf_event_header header; | ||
224 | * | ||
225 | * u32 pid, tid; | ||
226 | * u64 addr; | ||
227 | * u64 len; | ||
228 | * u64 pgoff; | ||
229 | * char filename[]; | ||
230 | * }; | ||
231 | */ | ||
232 | PERF_EVENT_MMAP = 1, | ||
233 | PERF_EVENT_MUNMAP = 2, | ||
234 | |||
235 | /* | ||
236 | * struct { | ||
237 | * struct perf_event_header header; | ||
238 | * | ||
239 | * u32 pid, tid; | ||
240 | * char comm[]; | ||
241 | * }; | ||
242 | */ | ||
243 | PERF_EVENT_COMM = 3, | ||
244 | |||
245 | /* | ||
246 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | ||
247 | * will be PERF_RECORD_* | ||
248 | * | ||
249 | * struct { | ||
250 | * struct perf_event_header header; | ||
251 | * | ||
252 | * { u64 ip; } && PERF_RECORD_IP | ||
253 | * { u32 pid, tid; } && PERF_RECORD_TID | ||
254 | * { u64 time; } && PERF_RECORD_TIME | ||
255 | * { u64 addr; } && PERF_RECORD_ADDR | ||
256 | * | ||
257 | * { u64 nr; | ||
258 | * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP | ||
259 | * | ||
260 | * { u16 nr, | ||
261 | * hv, | ||
262 | * kernel, | ||
263 | * user; | ||
264 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
265 | * }; | ||
266 | */ | ||
267 | }; | ||
268 | |||
269 | #ifdef __KERNEL__ | ||
270 | /* | ||
271 | * Kernel-internal data types and definitions: | ||
272 | */ | ||
273 | |||
274 | #ifdef CONFIG_PERF_COUNTERS | ||
275 | # include <asm/perf_counter.h> | ||
276 | #endif | ||
277 | |||
278 | #include <linux/list.h> | ||
279 | #include <linux/mutex.h> | ||
280 | #include <linux/rculist.h> | ||
281 | #include <linux/rcupdate.h> | ||
282 | #include <linux/spinlock.h> | ||
283 | #include <linux/hrtimer.h> | ||
284 | #include <linux/fs.h> | ||
285 | #include <asm/atomic.h> | ||
286 | |||
287 | struct task_struct; | ||
288 | |||
289 | static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) | ||
290 | { | ||
291 | return hw_event->config & PERF_COUNTER_RAW_MASK; | ||
292 | } | ||
293 | |||
294 | static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) | ||
295 | { | ||
296 | return hw_event->config & PERF_COUNTER_CONFIG_MASK; | ||
297 | } | ||
298 | |||
299 | static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) | ||
300 | { | ||
301 | return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> | ||
302 | PERF_COUNTER_TYPE_SHIFT; | ||
303 | } | ||
304 | |||
305 | static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) | ||
306 | { | ||
307 | return hw_event->config & PERF_COUNTER_EVENT_MASK; | ||
308 | } | ||
309 | |||
310 | /** | ||
311 | * struct hw_perf_counter - performance counter hardware details: | ||
312 | */ | ||
313 | struct hw_perf_counter { | ||
314 | #ifdef CONFIG_PERF_COUNTERS | ||
315 | union { | ||
316 | struct { /* hardware */ | ||
317 | u64 config; | ||
318 | unsigned long config_base; | ||
319 | unsigned long counter_base; | ||
320 | int nmi; | ||
321 | unsigned int idx; | ||
322 | }; | ||
323 | union { /* software */ | ||
324 | atomic64_t count; | ||
325 | struct hrtimer hrtimer; | ||
326 | }; | ||
327 | }; | ||
328 | atomic64_t prev_count; | ||
329 | u64 irq_period; | ||
330 | atomic64_t period_left; | ||
331 | #endif | ||
332 | }; | ||
333 | |||
334 | struct perf_counter; | ||
335 | |||
336 | /** | ||
337 | * struct hw_perf_counter_ops - performance counter hw ops | ||
338 | */ | ||
339 | struct hw_perf_counter_ops { | ||
340 | int (*enable) (struct perf_counter *counter); | ||
341 | void (*disable) (struct perf_counter *counter); | ||
342 | void (*read) (struct perf_counter *counter); | ||
343 | }; | ||
344 | |||
345 | /** | ||
346 | * enum perf_counter_active_state - the states of a counter | ||
347 | */ | ||
348 | enum perf_counter_active_state { | ||
349 | PERF_COUNTER_STATE_ERROR = -2, | ||
350 | PERF_COUNTER_STATE_OFF = -1, | ||
351 | PERF_COUNTER_STATE_INACTIVE = 0, | ||
352 | PERF_COUNTER_STATE_ACTIVE = 1, | ||
353 | }; | ||
354 | |||
355 | struct file; | ||
356 | |||
357 | struct perf_mmap_data { | ||
358 | struct rcu_head rcu_head; | ||
359 | int nr_pages; /* nr of data pages */ | ||
360 | |||
361 | atomic_t wakeup; /* POLL_ for wakeups */ | ||
362 | atomic_t head; /* write position */ | ||
363 | atomic_t events; /* event limit */ | ||
364 | |||
365 | struct perf_counter_mmap_page *user_page; | ||
366 | void *data_pages[0]; | ||
367 | }; | ||
368 | |||
369 | struct perf_pending_entry { | ||
370 | struct perf_pending_entry *next; | ||
371 | void (*func)(struct perf_pending_entry *); | ||
372 | }; | ||
373 | |||
374 | /** | ||
375 | * struct perf_counter - performance counter kernel representation: | ||
376 | */ | ||
377 | struct perf_counter { | ||
378 | #ifdef CONFIG_PERF_COUNTERS | ||
379 | struct list_head list_entry; | ||
380 | struct list_head event_entry; | ||
381 | struct list_head sibling_list; | ||
382 | int nr_siblings; | ||
383 | struct perf_counter *group_leader; | ||
384 | const struct hw_perf_counter_ops *hw_ops; | ||
385 | |||
386 | enum perf_counter_active_state state; | ||
387 | enum perf_counter_active_state prev_state; | ||
388 | atomic64_t count; | ||
389 | |||
390 | /* | ||
391 | * These are the total time in nanoseconds that the counter | ||
392 | * has been enabled (i.e. eligible to run, and the task has | ||
393 | * been scheduled in, if this is a per-task counter) | ||
394 | * and running (scheduled onto the CPU), respectively. | ||
395 | * | ||
396 | * They are computed from tstamp_enabled, tstamp_running and | ||
397 | * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. | ||
398 | */ | ||
399 | u64 total_time_enabled; | ||
400 | u64 total_time_running; | ||
401 | |||
402 | /* | ||
403 | * These are timestamps used for computing total_time_enabled | ||
404 | * and total_time_running when the counter is in INACTIVE or | ||
405 | * ACTIVE state, measured in nanoseconds from an arbitrary point | ||
406 | * in time. | ||
407 | * tstamp_enabled: the notional time when the counter was enabled | ||
408 | * tstamp_running: the notional time when the counter was scheduled on | ||
409 | * tstamp_stopped: in INACTIVE state, the notional time when the | ||
410 | * counter was scheduled off. | ||
411 | */ | ||
412 | u64 tstamp_enabled; | ||
413 | u64 tstamp_running; | ||
414 | u64 tstamp_stopped; | ||
415 | |||
416 | struct perf_counter_hw_event hw_event; | ||
417 | struct hw_perf_counter hw; | ||
418 | |||
419 | struct perf_counter_context *ctx; | ||
420 | struct task_struct *task; | ||
421 | struct file *filp; | ||
422 | |||
423 | struct perf_counter *parent; | ||
424 | struct list_head child_list; | ||
425 | |||
426 | /* | ||
427 | * These accumulate total time (in nanoseconds) that children | ||
428 | * counters have been enabled and running, respectively. | ||
429 | */ | ||
430 | atomic64_t child_total_time_enabled; | ||
431 | atomic64_t child_total_time_running; | ||
432 | |||
433 | /* | ||
434 | * Protect attach/detach and child_list: | ||
435 | */ | ||
436 | struct mutex mutex; | ||
437 | |||
438 | int oncpu; | ||
439 | int cpu; | ||
440 | |||
441 | /* mmap bits */ | ||
442 | struct mutex mmap_mutex; | ||
443 | atomic_t mmap_count; | ||
444 | struct perf_mmap_data *data; | ||
445 | |||
446 | /* poll related */ | ||
447 | wait_queue_head_t waitq; | ||
448 | struct fasync_struct *fasync; | ||
449 | |||
450 | /* delayed work for NMIs and such */ | ||
451 | int pending_wakeup; | ||
452 | int pending_kill; | ||
453 | int pending_disable; | ||
454 | struct perf_pending_entry pending; | ||
455 | |||
456 | atomic_t event_limit; | ||
457 | |||
458 | void (*destroy)(struct perf_counter *); | ||
459 | struct rcu_head rcu_head; | ||
460 | #endif | ||
461 | }; | ||
462 | |||
463 | /** | ||
464 | * struct perf_counter_context - counter context structure | ||
465 | * | ||
466 | * Used as a container for task counters and CPU counters as well: | ||
467 | */ | ||
468 | struct perf_counter_context { | ||
469 | #ifdef CONFIG_PERF_COUNTERS | ||
470 | /* | ||
471 | * Protect the states of the counters in the list, | ||
472 | * nr_active, and the list: | ||
473 | */ | ||
474 | spinlock_t lock; | ||
475 | /* | ||
476 | * Protect the list of counters. Locking either mutex or lock | ||
477 | * is sufficient to ensure the list doesn't change; to change | ||
478 | * the list you need to lock both the mutex and the spinlock. | ||
479 | */ | ||
480 | struct mutex mutex; | ||
481 | |||
482 | struct list_head counter_list; | ||
483 | struct list_head event_list; | ||
484 | int nr_counters; | ||
485 | int nr_active; | ||
486 | int is_active; | ||
487 | struct task_struct *task; | ||
488 | |||
489 | /* | ||
490 | * Context clock, runs when context enabled. | ||
491 | */ | ||
492 | u64 time; | ||
493 | u64 timestamp; | ||
494 | #endif | ||
495 | }; | ||
496 | |||
497 | /** | ||
498 | * struct perf_counter_cpu_context - per cpu counter context structure | ||
499 | */ | ||
500 | struct perf_cpu_context { | ||
501 | struct perf_counter_context ctx; | ||
502 | struct perf_counter_context *task_ctx; | ||
503 | int active_oncpu; | ||
504 | int max_pertask; | ||
505 | int exclusive; | ||
506 | |||
507 | /* | ||
508 | * Recursion avoidance: | ||
509 | * | ||
510 | * task, softirq, irq, nmi context | ||
511 | */ | ||
512 | int recursion[4]; | ||
513 | }; | ||
514 | |||
515 | /* | ||
516 | * Set by architecture code: | ||
517 | */ | ||
518 | extern int perf_max_counters; | ||
519 | |||
520 | #ifdef CONFIG_PERF_COUNTERS | ||
521 | extern const struct hw_perf_counter_ops * | ||
522 | hw_perf_counter_init(struct perf_counter *counter); | ||
523 | |||
524 | extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); | ||
525 | extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); | ||
526 | extern void perf_counter_task_tick(struct task_struct *task, int cpu); | ||
527 | extern void perf_counter_init_task(struct task_struct *child); | ||
528 | extern void perf_counter_exit_task(struct task_struct *child); | ||
529 | extern void perf_counter_do_pending(void); | ||
530 | extern void perf_counter_print_debug(void); | ||
531 | extern void perf_counter_unthrottle(void); | ||
532 | extern u64 hw_perf_save_disable(void); | ||
533 | extern void hw_perf_restore(u64 ctrl); | ||
534 | extern int perf_counter_task_disable(void); | ||
535 | extern int perf_counter_task_enable(void); | ||
536 | extern int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
537 | struct perf_cpu_context *cpuctx, | ||
538 | struct perf_counter_context *ctx, int cpu); | ||
539 | extern void perf_counter_update_userpage(struct perf_counter *counter); | ||
540 | |||
541 | extern int perf_counter_overflow(struct perf_counter *counter, | ||
542 | int nmi, struct pt_regs *regs, u64 addr); | ||
543 | /* | ||
544 | * Return 1 for a software counter, 0 for a hardware counter | ||
545 | */ | ||
546 | static inline int is_software_counter(struct perf_counter *counter) | ||
547 | { | ||
548 | return !perf_event_raw(&counter->hw_event) && | ||
549 | perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; | ||
550 | } | ||
551 | |||
552 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | ||
553 | |||
554 | extern void perf_counter_mmap(unsigned long addr, unsigned long len, | ||
555 | unsigned long pgoff, struct file *file); | ||
556 | |||
557 | extern void perf_counter_munmap(unsigned long addr, unsigned long len, | ||
558 | unsigned long pgoff, struct file *file); | ||
559 | |||
560 | extern void perf_counter_comm(struct task_struct *tsk); | ||
561 | |||
562 | #define MAX_STACK_DEPTH 255 | ||
563 | |||
564 | struct perf_callchain_entry { | ||
565 | u16 nr, hv, kernel, user; | ||
566 | u64 ip[MAX_STACK_DEPTH]; | ||
567 | }; | ||
568 | |||
569 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | ||
570 | |||
571 | extern int sysctl_perf_counter_priv; | ||
572 | |||
573 | #else | ||
574 | static inline void | ||
575 | perf_counter_task_sched_in(struct task_struct *task, int cpu) { } | ||
576 | static inline void | ||
577 | perf_counter_task_sched_out(struct task_struct *task, int cpu) { } | ||
578 | static inline void | ||
579 | perf_counter_task_tick(struct task_struct *task, int cpu) { } | ||
580 | static inline void perf_counter_init_task(struct task_struct *child) { } | ||
581 | static inline void perf_counter_exit_task(struct task_struct *child) { } | ||
582 | static inline void perf_counter_do_pending(void) { } | ||
583 | static inline void perf_counter_print_debug(void) { } | ||
584 | static inline void perf_counter_unthrottle(void) { } | ||
585 | static inline void hw_perf_restore(u64 ctrl) { } | ||
586 | static inline u64 hw_perf_save_disable(void) { return 0; } | ||
587 | static inline int perf_counter_task_disable(void) { return -EINVAL; } | ||
588 | static inline int perf_counter_task_enable(void) { return -EINVAL; } | ||
589 | |||
590 | static inline void | ||
591 | perf_swcounter_event(u32 event, u64 nr, int nmi, | ||
592 | struct pt_regs *regs, u64 addr) { } | ||
593 | |||
594 | static inline void | ||
595 | perf_counter_mmap(unsigned long addr, unsigned long len, | ||
596 | unsigned long pgoff, struct file *file) { } | ||
597 | |||
598 | static inline void | ||
599 | perf_counter_munmap(unsigned long addr, unsigned long len, | ||
600 | unsigned long pgoff, struct file *file) { } | ||
601 | |||
602 | static inline void perf_counter_comm(struct task_struct *tsk) { } | ||
603 | #endif | ||
604 | |||
605 | #endif /* __KERNEL__ */ | ||
606 | #endif /* _LINUX_PERF_COUNTER_H */ | ||
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e7..b00df4c79c63 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
@@ -85,4 +85,7 @@ | |||
85 | #define PR_SET_TIMERSLACK 29 | 85 | #define PR_SET_TIMERSLACK 29 |
86 | #define PR_GET_TIMERSLACK 30 | 86 | #define PR_GET_TIMERSLACK 30 |
87 | 87 | ||
88 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
89 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
90 | |||
88 | #endif /* _LINUX_PRCTL_H */ | 91 | #endif /* _LINUX_PRCTL_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049c..d1857580a132 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -71,6 +71,7 @@ struct sched_param { | |||
71 | #include <linux/path.h> | 71 | #include <linux/path.h> |
72 | #include <linux/compiler.h> | 72 | #include <linux/compiler.h> |
73 | #include <linux/completion.h> | 73 | #include <linux/completion.h> |
74 | #include <linux/perf_counter.h> | ||
74 | #include <linux/pid.h> | 75 | #include <linux/pid.h> |
75 | #include <linux/percpu.h> | 76 | #include <linux/percpu.h> |
76 | #include <linux/topology.h> | 77 | #include <linux/topology.h> |
@@ -137,6 +138,7 @@ extern unsigned long nr_running(void); | |||
137 | extern unsigned long nr_uninterruptible(void); | 138 | extern unsigned long nr_uninterruptible(void); |
138 | extern unsigned long nr_active(void); | 139 | extern unsigned long nr_active(void); |
139 | extern unsigned long nr_iowait(void); | 140 | extern unsigned long nr_iowait(void); |
141 | extern u64 cpu_nr_migrations(int cpu); | ||
140 | 142 | ||
141 | extern unsigned long get_parent_ip(unsigned long addr); | 143 | extern unsigned long get_parent_ip(unsigned long addr); |
142 | 144 | ||
@@ -1052,9 +1054,10 @@ struct sched_entity { | |||
1052 | u64 last_wakeup; | 1054 | u64 last_wakeup; |
1053 | u64 avg_overlap; | 1055 | u64 avg_overlap; |
1054 | 1056 | ||
1057 | u64 nr_migrations; | ||
1058 | |||
1055 | u64 start_runtime; | 1059 | u64 start_runtime; |
1056 | u64 avg_wakeup; | 1060 | u64 avg_wakeup; |
1057 | u64 nr_migrations; | ||
1058 | 1061 | ||
1059 | #ifdef CONFIG_SCHEDSTATS | 1062 | #ifdef CONFIG_SCHEDSTATS |
1060 | u64 wait_start; | 1063 | u64 wait_start; |
@@ -1380,6 +1383,7 @@ struct task_struct { | |||
1380 | struct list_head pi_state_list; | 1383 | struct list_head pi_state_list; |
1381 | struct futex_pi_state *pi_state_cache; | 1384 | struct futex_pi_state *pi_state_cache; |
1382 | #endif | 1385 | #endif |
1386 | struct perf_counter_context perf_counter_ctx; | ||
1383 | #ifdef CONFIG_NUMA | 1387 | #ifdef CONFIG_NUMA |
1384 | struct mempolicy *mempolicy; | 1388 | struct mempolicy *mempolicy; |
1385 | short il_next; | 1389 | short il_next; |
@@ -2388,6 +2392,13 @@ static inline void inc_syscw(struct task_struct *tsk) | |||
2388 | #define TASK_SIZE_OF(tsk) TASK_SIZE | 2392 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2389 | #endif | 2393 | #endif |
2390 | 2394 | ||
2395 | /* | ||
2396 | * Call the function if the target task is executing on a CPU right now: | ||
2397 | */ | ||
2398 | extern void task_oncpu_function_call(struct task_struct *p, | ||
2399 | void (*func) (void *info), void *info); | ||
2400 | |||
2401 | |||
2391 | #ifdef CONFIG_MM_OWNER | 2402 | #ifdef CONFIG_MM_OWNER |
2392 | extern void mm_update_next_owner(struct mm_struct *mm); | 2403 | extern void mm_update_next_owner(struct mm_struct *mm); |
2393 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); | 2404 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 40617c1d8976..677d159fe5f4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -55,6 +55,7 @@ struct compat_timeval; | |||
55 | struct robust_list_head; | 55 | struct robust_list_head; |
56 | struct getcpu_cache; | 56 | struct getcpu_cache; |
57 | struct old_linux_dirent; | 57 | struct old_linux_dirent; |
58 | struct perf_counter_hw_event; | ||
58 | 59 | ||
59 | #include <linux/types.h> | 60 | #include <linux/types.h> |
60 | #include <linux/aio_abi.h> | 61 | #include <linux/aio_abi.h> |
@@ -754,4 +755,8 @@ asmlinkage long sys_pipe(int __user *); | |||
754 | 755 | ||
755 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | 756 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); |
756 | 757 | ||
758 | |||
759 | asmlinkage long sys_perf_counter_open( | ||
760 | const struct perf_counter_hw_event __user *hw_event_uptr, | ||
761 | pid_t pid, int cpu, int group_fd, unsigned long flags); | ||
757 | #endif | 762 | #endif |
diff --git a/init/Kconfig b/init/Kconfig index 7be4d3836745..8158f1f44694 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -933,6 +933,41 @@ config AIO | |||
933 | by some high performance threaded applications. Disabling | 933 | by some high performance threaded applications. Disabling |
934 | this option saves about 7k. | 934 | this option saves about 7k. |
935 | 935 | ||
936 | config HAVE_PERF_COUNTERS | ||
937 | bool | ||
938 | |||
939 | menu "Performance Counters" | ||
940 | |||
941 | config PERF_COUNTERS | ||
942 | bool "Kernel Performance Counters" | ||
943 | depends on HAVE_PERF_COUNTERS | ||
944 | default y | ||
945 | select ANON_INODES | ||
946 | help | ||
947 | Enable kernel support for performance counter hardware. | ||
948 | |||
949 | Performance counters are special hardware registers available | ||
950 | on most modern CPUs. These registers count the number of certain | ||
951 | types of hw events: such as instructions executed, cachemisses | ||
952 | suffered, or branches mis-predicted - without slowing down the | ||
953 | kernel or applications. These registers can also trigger interrupts | ||
954 | when a threshold number of events have passed - and can thus be | ||
955 | used to profile the code that runs on that CPU. | ||
956 | |||
957 | The Linux Performance Counter subsystem provides an abstraction of | ||
958 | these hardware capabilities, available via a system call. It | ||
959 | provides per task and per CPU counters, and it provides event | ||
960 | capabilities on top of those. | ||
961 | |||
962 | Say Y if unsure. | ||
963 | |||
964 | config EVENT_PROFILE | ||
965 | bool "Tracepoint profile sources" | ||
966 | depends on PERF_COUNTERS && EVENT_TRACER | ||
967 | default y | ||
968 | |||
969 | endmenu | ||
970 | |||
936 | config VM_EVENT_COUNTERS | 971 | config VM_EVENT_COUNTERS |
937 | default y | 972 | default y |
938 | bool "Enable VM event counters for /proc/vmstat" if EMBEDDED | 973 | bool "Enable VM event counters for /proc/vmstat" if EMBEDDED |
diff --git a/kernel/Makefile b/kernel/Makefile index 42423665660a..e914ca992d70 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -95,6 +95,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ | |||
95 | obj-$(CONFIG_TRACING) += trace/ | 95 | obj-$(CONFIG_TRACING) += trace/ |
96 | obj-$(CONFIG_SMP) += sched_cpupri.o | 96 | obj-$(CONFIG_SMP) += sched_cpupri.o |
97 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 97 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
98 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o | ||
98 | 99 | ||
99 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 100 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
100 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 101 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/exit.c b/kernel/exit.c index abf9cf3b95c6..4741376c8dec 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -158,6 +158,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
158 | { | 158 | { |
159 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 159 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
160 | 160 | ||
161 | #ifdef CONFIG_PERF_COUNTERS | ||
162 | WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); | ||
163 | #endif | ||
161 | trace_sched_process_free(tsk); | 164 | trace_sched_process_free(tsk); |
162 | put_task_struct(tsk); | 165 | put_task_struct(tsk); |
163 | } | 166 | } |
@@ -981,10 +984,6 @@ NORET_TYPE void do_exit(long code) | |||
981 | tsk->mempolicy = NULL; | 984 | tsk->mempolicy = NULL; |
982 | #endif | 985 | #endif |
983 | #ifdef CONFIG_FUTEX | 986 | #ifdef CONFIG_FUTEX |
984 | /* | ||
985 | * This must happen late, after the PID is not | ||
986 | * hashed anymore: | ||
987 | */ | ||
988 | if (unlikely(!list_empty(&tsk->pi_state_list))) | 987 | if (unlikely(!list_empty(&tsk->pi_state_list))) |
989 | exit_pi_state_list(tsk); | 988 | exit_pi_state_list(tsk); |
990 | if (unlikely(current->pi_state_cache)) | 989 | if (unlikely(current->pi_state_cache)) |
@@ -1251,6 +1250,12 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1251 | */ | 1250 | */ |
1252 | read_unlock(&tasklist_lock); | 1251 | read_unlock(&tasklist_lock); |
1253 | 1252 | ||
1253 | /* | ||
1254 | * Flush inherited counters to the parent - before the parent | ||
1255 | * gets woken up by child-exit notifications. | ||
1256 | */ | ||
1257 | perf_counter_exit_task(p); | ||
1258 | |||
1254 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1259 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; |
1255 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) | 1260 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) |
1256 | ? p->signal->group_exit_code : p->exit_code; | 1261 | ? p->signal->group_exit_code : p->exit_code; |
diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd00726..d32fef4d38e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -983,6 +983,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
983 | goto fork_out; | 983 | goto fork_out; |
984 | 984 | ||
985 | rt_mutex_init_task(p); | 985 | rt_mutex_init_task(p); |
986 | perf_counter_init_task(p); | ||
986 | 987 | ||
987 | #ifdef CONFIG_PROVE_LOCKING | 988 | #ifdef CONFIG_PROVE_LOCKING |
988 | DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); | 989 | DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 507cf2b5e9f1..f415e80a9119 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); | |||
89 | * | 89 | * |
90 | * This function is similar to (but not equivalent to) down(). | 90 | * This function is similar to (but not equivalent to) down(). |
91 | */ | 91 | */ |
92 | void inline __sched mutex_lock(struct mutex *lock) | 92 | void __sched mutex_lock(struct mutex *lock) |
93 | { | 93 | { |
94 | might_sleep(); | 94 | might_sleep(); |
95 | /* | 95 | /* |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c new file mode 100644 index 000000000000..09396098dd0d --- /dev/null +++ b/kernel/perf_counter.c | |||
@@ -0,0 +1,3302 @@ | |||
1 | /* | ||
2 | * Performance counter core code | ||
3 | * | ||
4 | * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar | ||
6 | * | ||
7 | * | ||
8 | * For licensing details see kernel-base/COPYING | ||
9 | */ | ||
10 | |||
11 | #include <linux/fs.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/cpu.h> | ||
14 | #include <linux/smp.h> | ||
15 | #include <linux/file.h> | ||
16 | #include <linux/poll.h> | ||
17 | #include <linux/sysfs.h> | ||
18 | #include <linux/ptrace.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <linux/vmstat.h> | ||
21 | #include <linux/hardirq.h> | ||
22 | #include <linux/rculist.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | #include <linux/syscalls.h> | ||
25 | #include <linux/anon_inodes.h> | ||
26 | #include <linux/kernel_stat.h> | ||
27 | #include <linux/perf_counter.h> | ||
28 | #include <linux/dcache.h> | ||
29 | |||
30 | #include <asm/irq_regs.h> | ||
31 | |||
32 | /* | ||
33 | * Each CPU has a list of per CPU counters: | ||
34 | */ | ||
35 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | ||
36 | |||
37 | int perf_max_counters __read_mostly = 1; | ||
38 | static int perf_reserved_percpu __read_mostly; | ||
39 | static int perf_overcommit __read_mostly = 1; | ||
40 | |||
41 | static atomic_t nr_mmap_tracking __read_mostly; | ||
42 | static atomic_t nr_munmap_tracking __read_mostly; | ||
43 | static atomic_t nr_comm_tracking __read_mostly; | ||
44 | |||
45 | int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ | ||
46 | |||
47 | /* | ||
48 | * Mutex for (sysadmin-configurable) counter reservations: | ||
49 | */ | ||
50 | static DEFINE_MUTEX(perf_resource_mutex); | ||
51 | |||
52 | /* | ||
53 | * Architecture provided APIs - weak aliases: | ||
54 | */ | ||
55 | extern __weak const struct hw_perf_counter_ops * | ||
56 | hw_perf_counter_init(struct perf_counter *counter) | ||
57 | { | ||
58 | return NULL; | ||
59 | } | ||
60 | |||
61 | u64 __weak hw_perf_save_disable(void) { return 0; } | ||
62 | void __weak hw_perf_restore(u64 ctrl) { barrier(); } | ||
63 | void __weak hw_perf_counter_setup(int cpu) { barrier(); } | ||
64 | int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
65 | struct perf_cpu_context *cpuctx, | ||
66 | struct perf_counter_context *ctx, int cpu) | ||
67 | { | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | void __weak perf_counter_print_debug(void) { } | ||
72 | |||
73 | static void | ||
74 | list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
75 | { | ||
76 | struct perf_counter *group_leader = counter->group_leader; | ||
77 | |||
78 | /* | ||
79 | * Depending on whether it is a standalone or sibling counter, | ||
80 | * add it straight to the context's counter list, or to the group | ||
81 | * leader's sibling list: | ||
82 | */ | ||
83 | if (counter->group_leader == counter) | ||
84 | list_add_tail(&counter->list_entry, &ctx->counter_list); | ||
85 | else { | ||
86 | list_add_tail(&counter->list_entry, &group_leader->sibling_list); | ||
87 | group_leader->nr_siblings++; | ||
88 | } | ||
89 | |||
90 | list_add_rcu(&counter->event_entry, &ctx->event_list); | ||
91 | } | ||
92 | |||
93 | static void | ||
94 | list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
95 | { | ||
96 | struct perf_counter *sibling, *tmp; | ||
97 | |||
98 | list_del_init(&counter->list_entry); | ||
99 | list_del_rcu(&counter->event_entry); | ||
100 | |||
101 | if (counter->group_leader != counter) | ||
102 | counter->group_leader->nr_siblings--; | ||
103 | |||
104 | /* | ||
105 | * If this was a group counter with sibling counters then | ||
106 | * upgrade the siblings to singleton counters by adding them | ||
107 | * to the context list directly: | ||
108 | */ | ||
109 | list_for_each_entry_safe(sibling, tmp, | ||
110 | &counter->sibling_list, list_entry) { | ||
111 | |||
112 | list_move_tail(&sibling->list_entry, &ctx->counter_list); | ||
113 | sibling->group_leader = sibling; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | static void | ||
118 | counter_sched_out(struct perf_counter *counter, | ||
119 | struct perf_cpu_context *cpuctx, | ||
120 | struct perf_counter_context *ctx) | ||
121 | { | ||
122 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
123 | return; | ||
124 | |||
125 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
126 | counter->tstamp_stopped = ctx->time; | ||
127 | counter->hw_ops->disable(counter); | ||
128 | counter->oncpu = -1; | ||
129 | |||
130 | if (!is_software_counter(counter)) | ||
131 | cpuctx->active_oncpu--; | ||
132 | ctx->nr_active--; | ||
133 | if (counter->hw_event.exclusive || !cpuctx->active_oncpu) | ||
134 | cpuctx->exclusive = 0; | ||
135 | } | ||
136 | |||
137 | static void | ||
138 | group_sched_out(struct perf_counter *group_counter, | ||
139 | struct perf_cpu_context *cpuctx, | ||
140 | struct perf_counter_context *ctx) | ||
141 | { | ||
142 | struct perf_counter *counter; | ||
143 | |||
144 | if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
145 | return; | ||
146 | |||
147 | counter_sched_out(group_counter, cpuctx, ctx); | ||
148 | |||
149 | /* | ||
150 | * Schedule out siblings (if any): | ||
151 | */ | ||
152 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) | ||
153 | counter_sched_out(counter, cpuctx, ctx); | ||
154 | |||
155 | if (group_counter->hw_event.exclusive) | ||
156 | cpuctx->exclusive = 0; | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Cross CPU call to remove a performance counter | ||
161 | * | ||
162 | * We disable the counter on the hardware level first. After that we | ||
163 | * remove it from the context list. | ||
164 | */ | ||
165 | static void __perf_counter_remove_from_context(void *info) | ||
166 | { | ||
167 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
168 | struct perf_counter *counter = info; | ||
169 | struct perf_counter_context *ctx = counter->ctx; | ||
170 | unsigned long flags; | ||
171 | u64 perf_flags; | ||
172 | |||
173 | /* | ||
174 | * If this is a task context, we need to check whether it is | ||
175 | * the current task context of this cpu. If not it has been | ||
176 | * scheduled out before the smp call arrived. | ||
177 | */ | ||
178 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
179 | return; | ||
180 | |||
181 | spin_lock_irqsave(&ctx->lock, flags); | ||
182 | |||
183 | counter_sched_out(counter, cpuctx, ctx); | ||
184 | |||
185 | counter->task = NULL; | ||
186 | ctx->nr_counters--; | ||
187 | |||
188 | /* | ||
189 | * Protect the list operation against NMI by disabling the | ||
190 | * counters on a global level. NOP for non NMI based counters. | ||
191 | */ | ||
192 | perf_flags = hw_perf_save_disable(); | ||
193 | list_del_counter(counter, ctx); | ||
194 | hw_perf_restore(perf_flags); | ||
195 | |||
196 | if (!ctx->task) { | ||
197 | /* | ||
198 | * Allow more per task counters with respect to the | ||
199 | * reservation: | ||
200 | */ | ||
201 | cpuctx->max_pertask = | ||
202 | min(perf_max_counters - ctx->nr_counters, | ||
203 | perf_max_counters - perf_reserved_percpu); | ||
204 | } | ||
205 | |||
206 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
207 | } | ||
208 | |||
209 | |||
210 | /* | ||
211 | * Remove the counter from a task's (or a CPU's) list of counters. | ||
212 | * | ||
213 | * Must be called with counter->mutex and ctx->mutex held. | ||
214 | * | ||
215 | * CPU counters are removed with a smp call. For task counters we only | ||
216 | * call when the task is on a CPU. | ||
217 | */ | ||
218 | static void perf_counter_remove_from_context(struct perf_counter *counter) | ||
219 | { | ||
220 | struct perf_counter_context *ctx = counter->ctx; | ||
221 | struct task_struct *task = ctx->task; | ||
222 | |||
223 | if (!task) { | ||
224 | /* | ||
225 | * Per cpu counters are removed via an smp call and | ||
226 | * the removal is always sucessful. | ||
227 | */ | ||
228 | smp_call_function_single(counter->cpu, | ||
229 | __perf_counter_remove_from_context, | ||
230 | counter, 1); | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | retry: | ||
235 | task_oncpu_function_call(task, __perf_counter_remove_from_context, | ||
236 | counter); | ||
237 | |||
238 | spin_lock_irq(&ctx->lock); | ||
239 | /* | ||
240 | * If the context is active we need to retry the smp call. | ||
241 | */ | ||
242 | if (ctx->nr_active && !list_empty(&counter->list_entry)) { | ||
243 | spin_unlock_irq(&ctx->lock); | ||
244 | goto retry; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * The lock prevents that this context is scheduled in so we | ||
249 | * can remove the counter safely, if the call above did not | ||
250 | * succeed. | ||
251 | */ | ||
252 | if (!list_empty(&counter->list_entry)) { | ||
253 | ctx->nr_counters--; | ||
254 | list_del_counter(counter, ctx); | ||
255 | counter->task = NULL; | ||
256 | } | ||
257 | spin_unlock_irq(&ctx->lock); | ||
258 | } | ||
259 | |||
260 | static inline u64 perf_clock(void) | ||
261 | { | ||
262 | return cpu_clock(smp_processor_id()); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Update the record of the current time in a context. | ||
267 | */ | ||
268 | static void update_context_time(struct perf_counter_context *ctx) | ||
269 | { | ||
270 | u64 now = perf_clock(); | ||
271 | |||
272 | ctx->time += now - ctx->timestamp; | ||
273 | ctx->timestamp = now; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Update the total_time_enabled and total_time_running fields for a counter. | ||
278 | */ | ||
279 | static void update_counter_times(struct perf_counter *counter) | ||
280 | { | ||
281 | struct perf_counter_context *ctx = counter->ctx; | ||
282 | u64 run_end; | ||
283 | |||
284 | if (counter->state < PERF_COUNTER_STATE_INACTIVE) | ||
285 | return; | ||
286 | |||
287 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; | ||
288 | |||
289 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) | ||
290 | run_end = counter->tstamp_stopped; | ||
291 | else | ||
292 | run_end = ctx->time; | ||
293 | |||
294 | counter->total_time_running = run_end - counter->tstamp_running; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Update total_time_enabled and total_time_running for all counters in a group. | ||
299 | */ | ||
300 | static void update_group_times(struct perf_counter *leader) | ||
301 | { | ||
302 | struct perf_counter *counter; | ||
303 | |||
304 | update_counter_times(leader); | ||
305 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
306 | update_counter_times(counter); | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * Cross CPU call to disable a performance counter | ||
311 | */ | ||
312 | static void __perf_counter_disable(void *info) | ||
313 | { | ||
314 | struct perf_counter *counter = info; | ||
315 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
316 | struct perf_counter_context *ctx = counter->ctx; | ||
317 | unsigned long flags; | ||
318 | |||
319 | /* | ||
320 | * If this is a per-task counter, need to check whether this | ||
321 | * counter's task is the current task on this cpu. | ||
322 | */ | ||
323 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
324 | return; | ||
325 | |||
326 | spin_lock_irqsave(&ctx->lock, flags); | ||
327 | |||
328 | /* | ||
329 | * If the counter is on, turn it off. | ||
330 | * If it is in error state, leave it in error state. | ||
331 | */ | ||
332 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { | ||
333 | update_context_time(ctx); | ||
334 | update_counter_times(counter); | ||
335 | if (counter == counter->group_leader) | ||
336 | group_sched_out(counter, cpuctx, ctx); | ||
337 | else | ||
338 | counter_sched_out(counter, cpuctx, ctx); | ||
339 | counter->state = PERF_COUNTER_STATE_OFF; | ||
340 | } | ||
341 | |||
342 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Disable a counter. | ||
347 | */ | ||
348 | static void perf_counter_disable(struct perf_counter *counter) | ||
349 | { | ||
350 | struct perf_counter_context *ctx = counter->ctx; | ||
351 | struct task_struct *task = ctx->task; | ||
352 | |||
353 | if (!task) { | ||
354 | /* | ||
355 | * Disable the counter on the cpu that it's on | ||
356 | */ | ||
357 | smp_call_function_single(counter->cpu, __perf_counter_disable, | ||
358 | counter, 1); | ||
359 | return; | ||
360 | } | ||
361 | |||
362 | retry: | ||
363 | task_oncpu_function_call(task, __perf_counter_disable, counter); | ||
364 | |||
365 | spin_lock_irq(&ctx->lock); | ||
366 | /* | ||
367 | * If the counter is still active, we need to retry the cross-call. | ||
368 | */ | ||
369 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | ||
370 | spin_unlock_irq(&ctx->lock); | ||
371 | goto retry; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * Since we have the lock this context can't be scheduled | ||
376 | * in, so we can change the state safely. | ||
377 | */ | ||
378 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
379 | update_counter_times(counter); | ||
380 | counter->state = PERF_COUNTER_STATE_OFF; | ||
381 | } | ||
382 | |||
383 | spin_unlock_irq(&ctx->lock); | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * Disable a counter and all its children. | ||
388 | */ | ||
389 | static void perf_counter_disable_family(struct perf_counter *counter) | ||
390 | { | ||
391 | struct perf_counter *child; | ||
392 | |||
393 | perf_counter_disable(counter); | ||
394 | |||
395 | /* | ||
396 | * Lock the mutex to protect the list of children | ||
397 | */ | ||
398 | mutex_lock(&counter->mutex); | ||
399 | list_for_each_entry(child, &counter->child_list, child_list) | ||
400 | perf_counter_disable(child); | ||
401 | mutex_unlock(&counter->mutex); | ||
402 | } | ||
403 | |||
404 | static int | ||
405 | counter_sched_in(struct perf_counter *counter, | ||
406 | struct perf_cpu_context *cpuctx, | ||
407 | struct perf_counter_context *ctx, | ||
408 | int cpu) | ||
409 | { | ||
410 | if (counter->state <= PERF_COUNTER_STATE_OFF) | ||
411 | return 0; | ||
412 | |||
413 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
414 | counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | ||
415 | /* | ||
416 | * The new state must be visible before we turn it on in the hardware: | ||
417 | */ | ||
418 | smp_wmb(); | ||
419 | |||
420 | if (counter->hw_ops->enable(counter)) { | ||
421 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
422 | counter->oncpu = -1; | ||
423 | return -EAGAIN; | ||
424 | } | ||
425 | |||
426 | counter->tstamp_running += ctx->time - counter->tstamp_stopped; | ||
427 | |||
428 | if (!is_software_counter(counter)) | ||
429 | cpuctx->active_oncpu++; | ||
430 | ctx->nr_active++; | ||
431 | |||
432 | if (counter->hw_event.exclusive) | ||
433 | cpuctx->exclusive = 1; | ||
434 | |||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | /* | ||
439 | * Return 1 for a group consisting entirely of software counters, | ||
440 | * 0 if the group contains any hardware counters. | ||
441 | */ | ||
442 | static int is_software_only_group(struct perf_counter *leader) | ||
443 | { | ||
444 | struct perf_counter *counter; | ||
445 | |||
446 | if (!is_software_counter(leader)) | ||
447 | return 0; | ||
448 | |||
449 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
450 | if (!is_software_counter(counter)) | ||
451 | return 0; | ||
452 | |||
453 | return 1; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Work out whether we can put this counter group on the CPU now. | ||
458 | */ | ||
459 | static int group_can_go_on(struct perf_counter *counter, | ||
460 | struct perf_cpu_context *cpuctx, | ||
461 | int can_add_hw) | ||
462 | { | ||
463 | /* | ||
464 | * Groups consisting entirely of software counters can always go on. | ||
465 | */ | ||
466 | if (is_software_only_group(counter)) | ||
467 | return 1; | ||
468 | /* | ||
469 | * If an exclusive group is already on, no other hardware | ||
470 | * counters can go on. | ||
471 | */ | ||
472 | if (cpuctx->exclusive) | ||
473 | return 0; | ||
474 | /* | ||
475 | * If this group is exclusive and there are already | ||
476 | * counters on the CPU, it can't go on. | ||
477 | */ | ||
478 | if (counter->hw_event.exclusive && cpuctx->active_oncpu) | ||
479 | return 0; | ||
480 | /* | ||
481 | * Otherwise, try to add it if all previous groups were able | ||
482 | * to go on. | ||
483 | */ | ||
484 | return can_add_hw; | ||
485 | } | ||
486 | |||
487 | static void add_counter_to_ctx(struct perf_counter *counter, | ||
488 | struct perf_counter_context *ctx) | ||
489 | { | ||
490 | list_add_counter(counter, ctx); | ||
491 | ctx->nr_counters++; | ||
492 | counter->prev_state = PERF_COUNTER_STATE_OFF; | ||
493 | counter->tstamp_enabled = ctx->time; | ||
494 | counter->tstamp_running = ctx->time; | ||
495 | counter->tstamp_stopped = ctx->time; | ||
496 | } | ||
497 | |||
498 | /* | ||
499 | * Cross CPU call to install and enable a performance counter | ||
500 | */ | ||
501 | static void __perf_install_in_context(void *info) | ||
502 | { | ||
503 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
504 | struct perf_counter *counter = info; | ||
505 | struct perf_counter_context *ctx = counter->ctx; | ||
506 | struct perf_counter *leader = counter->group_leader; | ||
507 | int cpu = smp_processor_id(); | ||
508 | unsigned long flags; | ||
509 | u64 perf_flags; | ||
510 | int err; | ||
511 | |||
512 | /* | ||
513 | * If this is a task context, we need to check whether it is | ||
514 | * the current task context of this cpu. If not it has been | ||
515 | * scheduled out before the smp call arrived. | ||
516 | */ | ||
517 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
518 | return; | ||
519 | |||
520 | spin_lock_irqsave(&ctx->lock, flags); | ||
521 | update_context_time(ctx); | ||
522 | |||
523 | /* | ||
524 | * Protect the list operation against NMI by disabling the | ||
525 | * counters on a global level. NOP for non NMI based counters. | ||
526 | */ | ||
527 | perf_flags = hw_perf_save_disable(); | ||
528 | |||
529 | add_counter_to_ctx(counter, ctx); | ||
530 | |||
531 | /* | ||
532 | * Don't put the counter on if it is disabled or if | ||
533 | * it is in a group and the group isn't on. | ||
534 | */ | ||
535 | if (counter->state != PERF_COUNTER_STATE_INACTIVE || | ||
536 | (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) | ||
537 | goto unlock; | ||
538 | |||
539 | /* | ||
540 | * An exclusive counter can't go on if there are already active | ||
541 | * hardware counters, and no hardware counter can go on if there | ||
542 | * is already an exclusive counter on. | ||
543 | */ | ||
544 | if (!group_can_go_on(counter, cpuctx, 1)) | ||
545 | err = -EEXIST; | ||
546 | else | ||
547 | err = counter_sched_in(counter, cpuctx, ctx, cpu); | ||
548 | |||
549 | if (err) { | ||
550 | /* | ||
551 | * This counter couldn't go on. If it is in a group | ||
552 | * then we have to pull the whole group off. | ||
553 | * If the counter group is pinned then put it in error state. | ||
554 | */ | ||
555 | if (leader != counter) | ||
556 | group_sched_out(leader, cpuctx, ctx); | ||
557 | if (leader->hw_event.pinned) { | ||
558 | update_group_times(leader); | ||
559 | leader->state = PERF_COUNTER_STATE_ERROR; | ||
560 | } | ||
561 | } | ||
562 | |||
563 | if (!err && !ctx->task && cpuctx->max_pertask) | ||
564 | cpuctx->max_pertask--; | ||
565 | |||
566 | unlock: | ||
567 | hw_perf_restore(perf_flags); | ||
568 | |||
569 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
570 | } | ||
571 | |||
572 | /* | ||
573 | * Attach a performance counter to a context | ||
574 | * | ||
575 | * First we add the counter to the list with the hardware enable bit | ||
576 | * in counter->hw_config cleared. | ||
577 | * | ||
578 | * If the counter is attached to a task which is on a CPU we use a smp | ||
579 | * call to enable it in the task context. The task might have been | ||
580 | * scheduled away, but we check this in the smp call again. | ||
581 | * | ||
582 | * Must be called with ctx->mutex held. | ||
583 | */ | ||
584 | static void | ||
585 | perf_install_in_context(struct perf_counter_context *ctx, | ||
586 | struct perf_counter *counter, | ||
587 | int cpu) | ||
588 | { | ||
589 | struct task_struct *task = ctx->task; | ||
590 | |||
591 | if (!task) { | ||
592 | /* | ||
593 | * Per cpu counters are installed via an smp call and | ||
594 | * the install is always sucessful. | ||
595 | */ | ||
596 | smp_call_function_single(cpu, __perf_install_in_context, | ||
597 | counter, 1); | ||
598 | return; | ||
599 | } | ||
600 | |||
601 | counter->task = task; | ||
602 | retry: | ||
603 | task_oncpu_function_call(task, __perf_install_in_context, | ||
604 | counter); | ||
605 | |||
606 | spin_lock_irq(&ctx->lock); | ||
607 | /* | ||
608 | * we need to retry the smp call. | ||
609 | */ | ||
610 | if (ctx->is_active && list_empty(&counter->list_entry)) { | ||
611 | spin_unlock_irq(&ctx->lock); | ||
612 | goto retry; | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * The lock prevents that this context is scheduled in so we | ||
617 | * can add the counter safely, if it the call above did not | ||
618 | * succeed. | ||
619 | */ | ||
620 | if (list_empty(&counter->list_entry)) | ||
621 | add_counter_to_ctx(counter, ctx); | ||
622 | spin_unlock_irq(&ctx->lock); | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * Cross CPU call to enable a performance counter | ||
627 | */ | ||
628 | static void __perf_counter_enable(void *info) | ||
629 | { | ||
630 | struct perf_counter *counter = info; | ||
631 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
632 | struct perf_counter_context *ctx = counter->ctx; | ||
633 | struct perf_counter *leader = counter->group_leader; | ||
634 | unsigned long flags; | ||
635 | int err; | ||
636 | |||
637 | /* | ||
638 | * If this is a per-task counter, need to check whether this | ||
639 | * counter's task is the current task on this cpu. | ||
640 | */ | ||
641 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
642 | return; | ||
643 | |||
644 | spin_lock_irqsave(&ctx->lock, flags); | ||
645 | update_context_time(ctx); | ||
646 | |||
647 | counter->prev_state = counter->state; | ||
648 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
649 | goto unlock; | ||
650 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
651 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
652 | |||
653 | /* | ||
654 | * If the counter is in a group and isn't the group leader, | ||
655 | * then don't put it on unless the group is on. | ||
656 | */ | ||
657 | if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) | ||
658 | goto unlock; | ||
659 | |||
660 | if (!group_can_go_on(counter, cpuctx, 1)) | ||
661 | err = -EEXIST; | ||
662 | else | ||
663 | err = counter_sched_in(counter, cpuctx, ctx, | ||
664 | smp_processor_id()); | ||
665 | |||
666 | if (err) { | ||
667 | /* | ||
668 | * If this counter can't go on and it's part of a | ||
669 | * group, then the whole group has to come off. | ||
670 | */ | ||
671 | if (leader != counter) | ||
672 | group_sched_out(leader, cpuctx, ctx); | ||
673 | if (leader->hw_event.pinned) { | ||
674 | update_group_times(leader); | ||
675 | leader->state = PERF_COUNTER_STATE_ERROR; | ||
676 | } | ||
677 | } | ||
678 | |||
679 | unlock: | ||
680 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * Enable a counter. | ||
685 | */ | ||
686 | static void perf_counter_enable(struct perf_counter *counter) | ||
687 | { | ||
688 | struct perf_counter_context *ctx = counter->ctx; | ||
689 | struct task_struct *task = ctx->task; | ||
690 | |||
691 | if (!task) { | ||
692 | /* | ||
693 | * Enable the counter on the cpu that it's on | ||
694 | */ | ||
695 | smp_call_function_single(counter->cpu, __perf_counter_enable, | ||
696 | counter, 1); | ||
697 | return; | ||
698 | } | ||
699 | |||
700 | spin_lock_irq(&ctx->lock); | ||
701 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
702 | goto out; | ||
703 | |||
704 | /* | ||
705 | * If the counter is in error state, clear that first. | ||
706 | * That way, if we see the counter in error state below, we | ||
707 | * know that it has gone back into error state, as distinct | ||
708 | * from the task having been scheduled away before the | ||
709 | * cross-call arrived. | ||
710 | */ | ||
711 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
712 | counter->state = PERF_COUNTER_STATE_OFF; | ||
713 | |||
714 | retry: | ||
715 | spin_unlock_irq(&ctx->lock); | ||
716 | task_oncpu_function_call(task, __perf_counter_enable, counter); | ||
717 | |||
718 | spin_lock_irq(&ctx->lock); | ||
719 | |||
720 | /* | ||
721 | * If the context is active and the counter is still off, | ||
722 | * we need to retry the cross-call. | ||
723 | */ | ||
724 | if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) | ||
725 | goto retry; | ||
726 | |||
727 | /* | ||
728 | * Since we have the lock this context can't be scheduled | ||
729 | * in, so we can change the state safely. | ||
730 | */ | ||
731 | if (counter->state == PERF_COUNTER_STATE_OFF) { | ||
732 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
733 | counter->tstamp_enabled = | ||
734 | ctx->time - counter->total_time_enabled; | ||
735 | } | ||
736 | out: | ||
737 | spin_unlock_irq(&ctx->lock); | ||
738 | } | ||
739 | |||
740 | static void perf_counter_refresh(struct perf_counter *counter, int refresh) | ||
741 | { | ||
742 | atomic_add(refresh, &counter->event_limit); | ||
743 | perf_counter_enable(counter); | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * Enable a counter and all its children. | ||
748 | */ | ||
749 | static void perf_counter_enable_family(struct perf_counter *counter) | ||
750 | { | ||
751 | struct perf_counter *child; | ||
752 | |||
753 | perf_counter_enable(counter); | ||
754 | |||
755 | /* | ||
756 | * Lock the mutex to protect the list of children | ||
757 | */ | ||
758 | mutex_lock(&counter->mutex); | ||
759 | list_for_each_entry(child, &counter->child_list, child_list) | ||
760 | perf_counter_enable(child); | ||
761 | mutex_unlock(&counter->mutex); | ||
762 | } | ||
763 | |||
764 | void __perf_counter_sched_out(struct perf_counter_context *ctx, | ||
765 | struct perf_cpu_context *cpuctx) | ||
766 | { | ||
767 | struct perf_counter *counter; | ||
768 | u64 flags; | ||
769 | |||
770 | spin_lock(&ctx->lock); | ||
771 | ctx->is_active = 0; | ||
772 | if (likely(!ctx->nr_counters)) | ||
773 | goto out; | ||
774 | update_context_time(ctx); | ||
775 | |||
776 | flags = hw_perf_save_disable(); | ||
777 | if (ctx->nr_active) { | ||
778 | list_for_each_entry(counter, &ctx->counter_list, list_entry) | ||
779 | group_sched_out(counter, cpuctx, ctx); | ||
780 | } | ||
781 | hw_perf_restore(flags); | ||
782 | out: | ||
783 | spin_unlock(&ctx->lock); | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * Called from scheduler to remove the counters of the current task, | ||
788 | * with interrupts disabled. | ||
789 | * | ||
790 | * We stop each counter and update the counter value in counter->count. | ||
791 | * | ||
792 | * This does not protect us against NMI, but disable() | ||
793 | * sets the disabled bit in the control field of counter _before_ | ||
794 | * accessing the counter control register. If a NMI hits, then it will | ||
795 | * not restart the counter. | ||
796 | */ | ||
797 | void perf_counter_task_sched_out(struct task_struct *task, int cpu) | ||
798 | { | ||
799 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
800 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | ||
801 | struct pt_regs *regs; | ||
802 | |||
803 | if (likely(!cpuctx->task_ctx)) | ||
804 | return; | ||
805 | |||
806 | update_context_time(ctx); | ||
807 | |||
808 | regs = task_pt_regs(task); | ||
809 | perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); | ||
810 | __perf_counter_sched_out(ctx, cpuctx); | ||
811 | |||
812 | cpuctx->task_ctx = NULL; | ||
813 | } | ||
814 | |||
815 | static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) | ||
816 | { | ||
817 | __perf_counter_sched_out(&cpuctx->ctx, cpuctx); | ||
818 | } | ||
819 | |||
820 | static int | ||
821 | group_sched_in(struct perf_counter *group_counter, | ||
822 | struct perf_cpu_context *cpuctx, | ||
823 | struct perf_counter_context *ctx, | ||
824 | int cpu) | ||
825 | { | ||
826 | struct perf_counter *counter, *partial_group; | ||
827 | int ret; | ||
828 | |||
829 | if (group_counter->state == PERF_COUNTER_STATE_OFF) | ||
830 | return 0; | ||
831 | |||
832 | ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); | ||
833 | if (ret) | ||
834 | return ret < 0 ? ret : 0; | ||
835 | |||
836 | group_counter->prev_state = group_counter->state; | ||
837 | if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) | ||
838 | return -EAGAIN; | ||
839 | |||
840 | /* | ||
841 | * Schedule in siblings as one group (if any): | ||
842 | */ | ||
843 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { | ||
844 | counter->prev_state = counter->state; | ||
845 | if (counter_sched_in(counter, cpuctx, ctx, cpu)) { | ||
846 | partial_group = counter; | ||
847 | goto group_error; | ||
848 | } | ||
849 | } | ||
850 | |||
851 | return 0; | ||
852 | |||
853 | group_error: | ||
854 | /* | ||
855 | * Groups can be scheduled in as one unit only, so undo any | ||
856 | * partial group before returning: | ||
857 | */ | ||
858 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { | ||
859 | if (counter == partial_group) | ||
860 | break; | ||
861 | counter_sched_out(counter, cpuctx, ctx); | ||
862 | } | ||
863 | counter_sched_out(group_counter, cpuctx, ctx); | ||
864 | |||
865 | return -EAGAIN; | ||
866 | } | ||
867 | |||
868 | static void | ||
869 | __perf_counter_sched_in(struct perf_counter_context *ctx, | ||
870 | struct perf_cpu_context *cpuctx, int cpu) | ||
871 | { | ||
872 | struct perf_counter *counter; | ||
873 | u64 flags; | ||
874 | int can_add_hw = 1; | ||
875 | |||
876 | spin_lock(&ctx->lock); | ||
877 | ctx->is_active = 1; | ||
878 | if (likely(!ctx->nr_counters)) | ||
879 | goto out; | ||
880 | |||
881 | ctx->timestamp = perf_clock(); | ||
882 | |||
883 | flags = hw_perf_save_disable(); | ||
884 | |||
885 | /* | ||
886 | * First go through the list and put on any pinned groups | ||
887 | * in order to give them the best chance of going on. | ||
888 | */ | ||
889 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
890 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
891 | !counter->hw_event.pinned) | ||
892 | continue; | ||
893 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
894 | continue; | ||
895 | |||
896 | if (group_can_go_on(counter, cpuctx, 1)) | ||
897 | group_sched_in(counter, cpuctx, ctx, cpu); | ||
898 | |||
899 | /* | ||
900 | * If this pinned group hasn't been scheduled, | ||
901 | * put it in error state. | ||
902 | */ | ||
903 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
904 | update_group_times(counter); | ||
905 | counter->state = PERF_COUNTER_STATE_ERROR; | ||
906 | } | ||
907 | } | ||
908 | |||
909 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
910 | /* | ||
911 | * Ignore counters in OFF or ERROR state, and | ||
912 | * ignore pinned counters since we did them already. | ||
913 | */ | ||
914 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
915 | counter->hw_event.pinned) | ||
916 | continue; | ||
917 | |||
918 | /* | ||
919 | * Listen to the 'cpu' scheduling filter constraint | ||
920 | * of counters: | ||
921 | */ | ||
922 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
923 | continue; | ||
924 | |||
925 | if (group_can_go_on(counter, cpuctx, can_add_hw)) { | ||
926 | if (group_sched_in(counter, cpuctx, ctx, cpu)) | ||
927 | can_add_hw = 0; | ||
928 | } | ||
929 | } | ||
930 | hw_perf_restore(flags); | ||
931 | out: | ||
932 | spin_unlock(&ctx->lock); | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * Called from scheduler to add the counters of the current task | ||
937 | * with interrupts disabled. | ||
938 | * | ||
939 | * We restore the counter value and then enable it. | ||
940 | * | ||
941 | * This does not protect us against NMI, but enable() | ||
942 | * sets the enabled bit in the control field of counter _before_ | ||
943 | * accessing the counter control register. If a NMI hits, then it will | ||
944 | * keep the counter running. | ||
945 | */ | ||
946 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) | ||
947 | { | ||
948 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
949 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | ||
950 | |||
951 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
952 | cpuctx->task_ctx = ctx; | ||
953 | } | ||
954 | |||
955 | static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | ||
956 | { | ||
957 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
958 | |||
959 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
960 | } | ||
961 | |||
962 | int perf_counter_task_disable(void) | ||
963 | { | ||
964 | struct task_struct *curr = current; | ||
965 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
966 | struct perf_counter *counter; | ||
967 | unsigned long flags; | ||
968 | u64 perf_flags; | ||
969 | int cpu; | ||
970 | |||
971 | if (likely(!ctx->nr_counters)) | ||
972 | return 0; | ||
973 | |||
974 | local_irq_save(flags); | ||
975 | cpu = smp_processor_id(); | ||
976 | |||
977 | perf_counter_task_sched_out(curr, cpu); | ||
978 | |||
979 | spin_lock(&ctx->lock); | ||
980 | |||
981 | /* | ||
982 | * Disable all the counters: | ||
983 | */ | ||
984 | perf_flags = hw_perf_save_disable(); | ||
985 | |||
986 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
987 | if (counter->state != PERF_COUNTER_STATE_ERROR) { | ||
988 | update_group_times(counter); | ||
989 | counter->state = PERF_COUNTER_STATE_OFF; | ||
990 | } | ||
991 | } | ||
992 | |||
993 | hw_perf_restore(perf_flags); | ||
994 | |||
995 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
996 | |||
997 | return 0; | ||
998 | } | ||
999 | |||
1000 | int perf_counter_task_enable(void) | ||
1001 | { | ||
1002 | struct task_struct *curr = current; | ||
1003 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
1004 | struct perf_counter *counter; | ||
1005 | unsigned long flags; | ||
1006 | u64 perf_flags; | ||
1007 | int cpu; | ||
1008 | |||
1009 | if (likely(!ctx->nr_counters)) | ||
1010 | return 0; | ||
1011 | |||
1012 | local_irq_save(flags); | ||
1013 | cpu = smp_processor_id(); | ||
1014 | |||
1015 | perf_counter_task_sched_out(curr, cpu); | ||
1016 | |||
1017 | spin_lock(&ctx->lock); | ||
1018 | |||
1019 | /* | ||
1020 | * Disable all the counters: | ||
1021 | */ | ||
1022 | perf_flags = hw_perf_save_disable(); | ||
1023 | |||
1024 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1025 | if (counter->state > PERF_COUNTER_STATE_OFF) | ||
1026 | continue; | ||
1027 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
1028 | counter->tstamp_enabled = | ||
1029 | ctx->time - counter->total_time_enabled; | ||
1030 | counter->hw_event.disabled = 0; | ||
1031 | } | ||
1032 | hw_perf_restore(perf_flags); | ||
1033 | |||
1034 | spin_unlock(&ctx->lock); | ||
1035 | |||
1036 | perf_counter_task_sched_in(curr, cpu); | ||
1037 | |||
1038 | local_irq_restore(flags); | ||
1039 | |||
1040 | return 0; | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Round-robin a context's counters: | ||
1045 | */ | ||
1046 | static void rotate_ctx(struct perf_counter_context *ctx) | ||
1047 | { | ||
1048 | struct perf_counter *counter; | ||
1049 | u64 perf_flags; | ||
1050 | |||
1051 | if (!ctx->nr_counters) | ||
1052 | return; | ||
1053 | |||
1054 | spin_lock(&ctx->lock); | ||
1055 | /* | ||
1056 | * Rotate the first entry last (works just fine for group counters too): | ||
1057 | */ | ||
1058 | perf_flags = hw_perf_save_disable(); | ||
1059 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1060 | list_move_tail(&counter->list_entry, &ctx->counter_list); | ||
1061 | break; | ||
1062 | } | ||
1063 | hw_perf_restore(perf_flags); | ||
1064 | |||
1065 | spin_unlock(&ctx->lock); | ||
1066 | } | ||
1067 | |||
1068 | void perf_counter_task_tick(struct task_struct *curr, int cpu) | ||
1069 | { | ||
1070 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1071 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
1072 | const int rotate_percpu = 0; | ||
1073 | |||
1074 | if (rotate_percpu) | ||
1075 | perf_counter_cpu_sched_out(cpuctx); | ||
1076 | perf_counter_task_sched_out(curr, cpu); | ||
1077 | |||
1078 | if (rotate_percpu) | ||
1079 | rotate_ctx(&cpuctx->ctx); | ||
1080 | rotate_ctx(ctx); | ||
1081 | |||
1082 | if (rotate_percpu) | ||
1083 | perf_counter_cpu_sched_in(cpuctx, cpu); | ||
1084 | perf_counter_task_sched_in(curr, cpu); | ||
1085 | } | ||
1086 | |||
1087 | /* | ||
1088 | * Cross CPU call to read the hardware counter | ||
1089 | */ | ||
1090 | static void __read(void *info) | ||
1091 | { | ||
1092 | struct perf_counter *counter = info; | ||
1093 | struct perf_counter_context *ctx = counter->ctx; | ||
1094 | unsigned long flags; | ||
1095 | |||
1096 | local_irq_save(flags); | ||
1097 | if (ctx->is_active) | ||
1098 | update_context_time(ctx); | ||
1099 | counter->hw_ops->read(counter); | ||
1100 | update_counter_times(counter); | ||
1101 | local_irq_restore(flags); | ||
1102 | } | ||
1103 | |||
1104 | static u64 perf_counter_read(struct perf_counter *counter) | ||
1105 | { | ||
1106 | /* | ||
1107 | * If counter is enabled and currently active on a CPU, update the | ||
1108 | * value in the counter structure: | ||
1109 | */ | ||
1110 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | ||
1111 | smp_call_function_single(counter->oncpu, | ||
1112 | __read, counter, 1); | ||
1113 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
1114 | update_counter_times(counter); | ||
1115 | } | ||
1116 | |||
1117 | return atomic64_read(&counter->count); | ||
1118 | } | ||
1119 | |||
1120 | static void put_context(struct perf_counter_context *ctx) | ||
1121 | { | ||
1122 | if (ctx->task) | ||
1123 | put_task_struct(ctx->task); | ||
1124 | } | ||
1125 | |||
1126 | static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | ||
1127 | { | ||
1128 | struct perf_cpu_context *cpuctx; | ||
1129 | struct perf_counter_context *ctx; | ||
1130 | struct task_struct *task; | ||
1131 | |||
1132 | /* | ||
1133 | * If cpu is not a wildcard then this is a percpu counter: | ||
1134 | */ | ||
1135 | if (cpu != -1) { | ||
1136 | /* Must be root to operate on a CPU counter: */ | ||
1137 | if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN)) | ||
1138 | return ERR_PTR(-EACCES); | ||
1139 | |||
1140 | if (cpu < 0 || cpu > num_possible_cpus()) | ||
1141 | return ERR_PTR(-EINVAL); | ||
1142 | |||
1143 | /* | ||
1144 | * We could be clever and allow to attach a counter to an | ||
1145 | * offline CPU and activate it when the CPU comes up, but | ||
1146 | * that's for later. | ||
1147 | */ | ||
1148 | if (!cpu_isset(cpu, cpu_online_map)) | ||
1149 | return ERR_PTR(-ENODEV); | ||
1150 | |||
1151 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1152 | ctx = &cpuctx->ctx; | ||
1153 | |||
1154 | return ctx; | ||
1155 | } | ||
1156 | |||
1157 | rcu_read_lock(); | ||
1158 | if (!pid) | ||
1159 | task = current; | ||
1160 | else | ||
1161 | task = find_task_by_vpid(pid); | ||
1162 | if (task) | ||
1163 | get_task_struct(task); | ||
1164 | rcu_read_unlock(); | ||
1165 | |||
1166 | if (!task) | ||
1167 | return ERR_PTR(-ESRCH); | ||
1168 | |||
1169 | ctx = &task->perf_counter_ctx; | ||
1170 | ctx->task = task; | ||
1171 | |||
1172 | /* Reuse ptrace permission checks for now. */ | ||
1173 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) { | ||
1174 | put_context(ctx); | ||
1175 | return ERR_PTR(-EACCES); | ||
1176 | } | ||
1177 | |||
1178 | return ctx; | ||
1179 | } | ||
1180 | |||
1181 | static void free_counter_rcu(struct rcu_head *head) | ||
1182 | { | ||
1183 | struct perf_counter *counter; | ||
1184 | |||
1185 | counter = container_of(head, struct perf_counter, rcu_head); | ||
1186 | kfree(counter); | ||
1187 | } | ||
1188 | |||
1189 | static void perf_pending_sync(struct perf_counter *counter); | ||
1190 | |||
1191 | static void free_counter(struct perf_counter *counter) | ||
1192 | { | ||
1193 | perf_pending_sync(counter); | ||
1194 | |||
1195 | if (counter->hw_event.mmap) | ||
1196 | atomic_dec(&nr_mmap_tracking); | ||
1197 | if (counter->hw_event.munmap) | ||
1198 | atomic_dec(&nr_munmap_tracking); | ||
1199 | if (counter->hw_event.comm) | ||
1200 | atomic_dec(&nr_comm_tracking); | ||
1201 | |||
1202 | if (counter->destroy) | ||
1203 | counter->destroy(counter); | ||
1204 | |||
1205 | call_rcu(&counter->rcu_head, free_counter_rcu); | ||
1206 | } | ||
1207 | |||
1208 | /* | ||
1209 | * Called when the last reference to the file is gone. | ||
1210 | */ | ||
1211 | static int perf_release(struct inode *inode, struct file *file) | ||
1212 | { | ||
1213 | struct perf_counter *counter = file->private_data; | ||
1214 | struct perf_counter_context *ctx = counter->ctx; | ||
1215 | |||
1216 | file->private_data = NULL; | ||
1217 | |||
1218 | mutex_lock(&ctx->mutex); | ||
1219 | mutex_lock(&counter->mutex); | ||
1220 | |||
1221 | perf_counter_remove_from_context(counter); | ||
1222 | |||
1223 | mutex_unlock(&counter->mutex); | ||
1224 | mutex_unlock(&ctx->mutex); | ||
1225 | |||
1226 | free_counter(counter); | ||
1227 | put_context(ctx); | ||
1228 | |||
1229 | return 0; | ||
1230 | } | ||
1231 | |||
1232 | /* | ||
1233 | * Read the performance counter - simple non blocking version for now | ||
1234 | */ | ||
1235 | static ssize_t | ||
1236 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | ||
1237 | { | ||
1238 | u64 values[3]; | ||
1239 | int n; | ||
1240 | |||
1241 | /* | ||
1242 | * Return end-of-file for a read on a counter that is in | ||
1243 | * error state (i.e. because it was pinned but it couldn't be | ||
1244 | * scheduled on to the CPU at some point). | ||
1245 | */ | ||
1246 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
1247 | return 0; | ||
1248 | |||
1249 | mutex_lock(&counter->mutex); | ||
1250 | values[0] = perf_counter_read(counter); | ||
1251 | n = 1; | ||
1252 | if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
1253 | values[n++] = counter->total_time_enabled + | ||
1254 | atomic64_read(&counter->child_total_time_enabled); | ||
1255 | if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
1256 | values[n++] = counter->total_time_running + | ||
1257 | atomic64_read(&counter->child_total_time_running); | ||
1258 | mutex_unlock(&counter->mutex); | ||
1259 | |||
1260 | if (count < n * sizeof(u64)) | ||
1261 | return -EINVAL; | ||
1262 | count = n * sizeof(u64); | ||
1263 | |||
1264 | if (copy_to_user(buf, values, count)) | ||
1265 | return -EFAULT; | ||
1266 | |||
1267 | return count; | ||
1268 | } | ||
1269 | |||
1270 | static ssize_t | ||
1271 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | ||
1272 | { | ||
1273 | struct perf_counter *counter = file->private_data; | ||
1274 | |||
1275 | return perf_read_hw(counter, buf, count); | ||
1276 | } | ||
1277 | |||
1278 | static unsigned int perf_poll(struct file *file, poll_table *wait) | ||
1279 | { | ||
1280 | struct perf_counter *counter = file->private_data; | ||
1281 | struct perf_mmap_data *data; | ||
1282 | unsigned int events; | ||
1283 | |||
1284 | rcu_read_lock(); | ||
1285 | data = rcu_dereference(counter->data); | ||
1286 | if (data) | ||
1287 | events = atomic_xchg(&data->wakeup, 0); | ||
1288 | else | ||
1289 | events = POLL_HUP; | ||
1290 | rcu_read_unlock(); | ||
1291 | |||
1292 | poll_wait(file, &counter->waitq, wait); | ||
1293 | |||
1294 | return events; | ||
1295 | } | ||
1296 | |||
1297 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
1298 | { | ||
1299 | struct perf_counter *counter = file->private_data; | ||
1300 | int err = 0; | ||
1301 | |||
1302 | switch (cmd) { | ||
1303 | case PERF_COUNTER_IOC_ENABLE: | ||
1304 | perf_counter_enable_family(counter); | ||
1305 | break; | ||
1306 | case PERF_COUNTER_IOC_DISABLE: | ||
1307 | perf_counter_disable_family(counter); | ||
1308 | break; | ||
1309 | case PERF_COUNTER_IOC_REFRESH: | ||
1310 | perf_counter_refresh(counter, arg); | ||
1311 | break; | ||
1312 | default: | ||
1313 | err = -ENOTTY; | ||
1314 | } | ||
1315 | return err; | ||
1316 | } | ||
1317 | |||
1318 | /* | ||
1319 | * Callers need to ensure there can be no nesting of this function, otherwise | ||
1320 | * the seqlock logic goes bad. We can not serialize this because the arch | ||
1321 | * code calls this from NMI context. | ||
1322 | */ | ||
1323 | void perf_counter_update_userpage(struct perf_counter *counter) | ||
1324 | { | ||
1325 | struct perf_mmap_data *data; | ||
1326 | struct perf_counter_mmap_page *userpg; | ||
1327 | |||
1328 | rcu_read_lock(); | ||
1329 | data = rcu_dereference(counter->data); | ||
1330 | if (!data) | ||
1331 | goto unlock; | ||
1332 | |||
1333 | userpg = data->user_page; | ||
1334 | |||
1335 | /* | ||
1336 | * Disable preemption so as to not let the corresponding user-space | ||
1337 | * spin too long if we get preempted. | ||
1338 | */ | ||
1339 | preempt_disable(); | ||
1340 | ++userpg->lock; | ||
1341 | barrier(); | ||
1342 | userpg->index = counter->hw.idx; | ||
1343 | userpg->offset = atomic64_read(&counter->count); | ||
1344 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1345 | userpg->offset -= atomic64_read(&counter->hw.prev_count); | ||
1346 | |||
1347 | barrier(); | ||
1348 | ++userpg->lock; | ||
1349 | preempt_enable(); | ||
1350 | unlock: | ||
1351 | rcu_read_unlock(); | ||
1352 | } | ||
1353 | |||
1354 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1355 | { | ||
1356 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1357 | struct perf_mmap_data *data; | ||
1358 | int ret = VM_FAULT_SIGBUS; | ||
1359 | |||
1360 | rcu_read_lock(); | ||
1361 | data = rcu_dereference(counter->data); | ||
1362 | if (!data) | ||
1363 | goto unlock; | ||
1364 | |||
1365 | if (vmf->pgoff == 0) { | ||
1366 | vmf->page = virt_to_page(data->user_page); | ||
1367 | } else { | ||
1368 | int nr = vmf->pgoff - 1; | ||
1369 | |||
1370 | if ((unsigned)nr > data->nr_pages) | ||
1371 | goto unlock; | ||
1372 | |||
1373 | vmf->page = virt_to_page(data->data_pages[nr]); | ||
1374 | } | ||
1375 | get_page(vmf->page); | ||
1376 | ret = 0; | ||
1377 | unlock: | ||
1378 | rcu_read_unlock(); | ||
1379 | |||
1380 | return ret; | ||
1381 | } | ||
1382 | |||
1383 | static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) | ||
1384 | { | ||
1385 | struct perf_mmap_data *data; | ||
1386 | unsigned long size; | ||
1387 | int i; | ||
1388 | |||
1389 | WARN_ON(atomic_read(&counter->mmap_count)); | ||
1390 | |||
1391 | size = sizeof(struct perf_mmap_data); | ||
1392 | size += nr_pages * sizeof(void *); | ||
1393 | |||
1394 | data = kzalloc(size, GFP_KERNEL); | ||
1395 | if (!data) | ||
1396 | goto fail; | ||
1397 | |||
1398 | data->user_page = (void *)get_zeroed_page(GFP_KERNEL); | ||
1399 | if (!data->user_page) | ||
1400 | goto fail_user_page; | ||
1401 | |||
1402 | for (i = 0; i < nr_pages; i++) { | ||
1403 | data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | ||
1404 | if (!data->data_pages[i]) | ||
1405 | goto fail_data_pages; | ||
1406 | } | ||
1407 | |||
1408 | data->nr_pages = nr_pages; | ||
1409 | |||
1410 | rcu_assign_pointer(counter->data, data); | ||
1411 | |||
1412 | return 0; | ||
1413 | |||
1414 | fail_data_pages: | ||
1415 | for (i--; i >= 0; i--) | ||
1416 | free_page((unsigned long)data->data_pages[i]); | ||
1417 | |||
1418 | free_page((unsigned long)data->user_page); | ||
1419 | |||
1420 | fail_user_page: | ||
1421 | kfree(data); | ||
1422 | |||
1423 | fail: | ||
1424 | return -ENOMEM; | ||
1425 | } | ||
1426 | |||
1427 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | ||
1428 | { | ||
1429 | struct perf_mmap_data *data = container_of(rcu_head, | ||
1430 | struct perf_mmap_data, rcu_head); | ||
1431 | int i; | ||
1432 | |||
1433 | free_page((unsigned long)data->user_page); | ||
1434 | for (i = 0; i < data->nr_pages; i++) | ||
1435 | free_page((unsigned long)data->data_pages[i]); | ||
1436 | kfree(data); | ||
1437 | } | ||
1438 | |||
1439 | static void perf_mmap_data_free(struct perf_counter *counter) | ||
1440 | { | ||
1441 | struct perf_mmap_data *data = counter->data; | ||
1442 | |||
1443 | WARN_ON(atomic_read(&counter->mmap_count)); | ||
1444 | |||
1445 | rcu_assign_pointer(counter->data, NULL); | ||
1446 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | ||
1447 | } | ||
1448 | |||
1449 | static void perf_mmap_open(struct vm_area_struct *vma) | ||
1450 | { | ||
1451 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1452 | |||
1453 | atomic_inc(&counter->mmap_count); | ||
1454 | } | ||
1455 | |||
1456 | static void perf_mmap_close(struct vm_area_struct *vma) | ||
1457 | { | ||
1458 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1459 | |||
1460 | if (atomic_dec_and_mutex_lock(&counter->mmap_count, | ||
1461 | &counter->mmap_mutex)) { | ||
1462 | vma->vm_mm->locked_vm -= counter->data->nr_pages + 1; | ||
1463 | perf_mmap_data_free(counter); | ||
1464 | mutex_unlock(&counter->mmap_mutex); | ||
1465 | } | ||
1466 | } | ||
1467 | |||
1468 | static struct vm_operations_struct perf_mmap_vmops = { | ||
1469 | .open = perf_mmap_open, | ||
1470 | .close = perf_mmap_close, | ||
1471 | .fault = perf_mmap_fault, | ||
1472 | }; | ||
1473 | |||
1474 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) | ||
1475 | { | ||
1476 | struct perf_counter *counter = file->private_data; | ||
1477 | unsigned long vma_size; | ||
1478 | unsigned long nr_pages; | ||
1479 | unsigned long locked, lock_limit; | ||
1480 | int ret = 0; | ||
1481 | |||
1482 | if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) | ||
1483 | return -EINVAL; | ||
1484 | |||
1485 | vma_size = vma->vm_end - vma->vm_start; | ||
1486 | nr_pages = (vma_size / PAGE_SIZE) - 1; | ||
1487 | |||
1488 | /* | ||
1489 | * If we have data pages ensure they're a power-of-two number, so we | ||
1490 | * can do bitmasks instead of modulo. | ||
1491 | */ | ||
1492 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) | ||
1493 | return -EINVAL; | ||
1494 | |||
1495 | if (vma_size != PAGE_SIZE * (1 + nr_pages)) | ||
1496 | return -EINVAL; | ||
1497 | |||
1498 | if (vma->vm_pgoff != 0) | ||
1499 | return -EINVAL; | ||
1500 | |||
1501 | mutex_lock(&counter->mmap_mutex); | ||
1502 | if (atomic_inc_not_zero(&counter->mmap_count)) { | ||
1503 | if (nr_pages != counter->data->nr_pages) | ||
1504 | ret = -EINVAL; | ||
1505 | goto unlock; | ||
1506 | } | ||
1507 | |||
1508 | locked = vma->vm_mm->locked_vm; | ||
1509 | locked += nr_pages + 1; | ||
1510 | |||
1511 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | ||
1512 | lock_limit >>= PAGE_SHIFT; | ||
1513 | |||
1514 | if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { | ||
1515 | ret = -EPERM; | ||
1516 | goto unlock; | ||
1517 | } | ||
1518 | |||
1519 | WARN_ON(counter->data); | ||
1520 | ret = perf_mmap_data_alloc(counter, nr_pages); | ||
1521 | if (ret) | ||
1522 | goto unlock; | ||
1523 | |||
1524 | atomic_set(&counter->mmap_count, 1); | ||
1525 | vma->vm_mm->locked_vm += nr_pages + 1; | ||
1526 | unlock: | ||
1527 | mutex_unlock(&counter->mmap_mutex); | ||
1528 | |||
1529 | vma->vm_flags &= ~VM_MAYWRITE; | ||
1530 | vma->vm_flags |= VM_RESERVED; | ||
1531 | vma->vm_ops = &perf_mmap_vmops; | ||
1532 | |||
1533 | return ret; | ||
1534 | } | ||
1535 | |||
1536 | static int perf_fasync(int fd, struct file *filp, int on) | ||
1537 | { | ||
1538 | struct perf_counter *counter = filp->private_data; | ||
1539 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
1540 | int retval; | ||
1541 | |||
1542 | mutex_lock(&inode->i_mutex); | ||
1543 | retval = fasync_helper(fd, filp, on, &counter->fasync); | ||
1544 | mutex_unlock(&inode->i_mutex); | ||
1545 | |||
1546 | if (retval < 0) | ||
1547 | return retval; | ||
1548 | |||
1549 | return 0; | ||
1550 | } | ||
1551 | |||
1552 | static const struct file_operations perf_fops = { | ||
1553 | .release = perf_release, | ||
1554 | .read = perf_read, | ||
1555 | .poll = perf_poll, | ||
1556 | .unlocked_ioctl = perf_ioctl, | ||
1557 | .compat_ioctl = perf_ioctl, | ||
1558 | .mmap = perf_mmap, | ||
1559 | .fasync = perf_fasync, | ||
1560 | }; | ||
1561 | |||
1562 | /* | ||
1563 | * Perf counter wakeup | ||
1564 | * | ||
1565 | * If there's data, ensure we set the poll() state and publish everything | ||
1566 | * to user-space before waking everybody up. | ||
1567 | */ | ||
1568 | |||
1569 | void perf_counter_wakeup(struct perf_counter *counter) | ||
1570 | { | ||
1571 | struct perf_mmap_data *data; | ||
1572 | |||
1573 | rcu_read_lock(); | ||
1574 | data = rcu_dereference(counter->data); | ||
1575 | if (data) { | ||
1576 | atomic_set(&data->wakeup, POLL_IN); | ||
1577 | /* | ||
1578 | * Ensure all data writes are issued before updating the | ||
1579 | * user-space data head information. The matching rmb() | ||
1580 | * will be in userspace after reading this value. | ||
1581 | */ | ||
1582 | smp_wmb(); | ||
1583 | data->user_page->data_head = atomic_read(&data->head); | ||
1584 | } | ||
1585 | rcu_read_unlock(); | ||
1586 | |||
1587 | wake_up_all(&counter->waitq); | ||
1588 | |||
1589 | if (counter->pending_kill) { | ||
1590 | kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); | ||
1591 | counter->pending_kill = 0; | ||
1592 | } | ||
1593 | } | ||
1594 | |||
1595 | /* | ||
1596 | * Pending wakeups | ||
1597 | * | ||
1598 | * Handle the case where we need to wakeup up from NMI (or rq->lock) context. | ||
1599 | * | ||
1600 | * The NMI bit means we cannot possibly take locks. Therefore, maintain a | ||
1601 | * single linked list and use cmpxchg() to add entries lockless. | ||
1602 | */ | ||
1603 | |||
1604 | static void perf_pending_counter(struct perf_pending_entry *entry) | ||
1605 | { | ||
1606 | struct perf_counter *counter = container_of(entry, | ||
1607 | struct perf_counter, pending); | ||
1608 | |||
1609 | if (counter->pending_disable) { | ||
1610 | counter->pending_disable = 0; | ||
1611 | perf_counter_disable(counter); | ||
1612 | } | ||
1613 | |||
1614 | if (counter->pending_wakeup) { | ||
1615 | counter->pending_wakeup = 0; | ||
1616 | perf_counter_wakeup(counter); | ||
1617 | } | ||
1618 | } | ||
1619 | |||
1620 | #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) | ||
1621 | |||
1622 | static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { | ||
1623 | PENDING_TAIL, | ||
1624 | }; | ||
1625 | |||
1626 | static void perf_pending_queue(struct perf_pending_entry *entry, | ||
1627 | void (*func)(struct perf_pending_entry *)) | ||
1628 | { | ||
1629 | struct perf_pending_entry **head; | ||
1630 | |||
1631 | if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) | ||
1632 | return; | ||
1633 | |||
1634 | entry->func = func; | ||
1635 | |||
1636 | head = &get_cpu_var(perf_pending_head); | ||
1637 | |||
1638 | do { | ||
1639 | entry->next = *head; | ||
1640 | } while (cmpxchg(head, entry->next, entry) != entry->next); | ||
1641 | |||
1642 | set_perf_counter_pending(); | ||
1643 | |||
1644 | put_cpu_var(perf_pending_head); | ||
1645 | } | ||
1646 | |||
1647 | static int __perf_pending_run(void) | ||
1648 | { | ||
1649 | struct perf_pending_entry *list; | ||
1650 | int nr = 0; | ||
1651 | |||
1652 | list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); | ||
1653 | while (list != PENDING_TAIL) { | ||
1654 | void (*func)(struct perf_pending_entry *); | ||
1655 | struct perf_pending_entry *entry = list; | ||
1656 | |||
1657 | list = list->next; | ||
1658 | |||
1659 | func = entry->func; | ||
1660 | entry->next = NULL; | ||
1661 | /* | ||
1662 | * Ensure we observe the unqueue before we issue the wakeup, | ||
1663 | * so that we won't be waiting forever. | ||
1664 | * -- see perf_not_pending(). | ||
1665 | */ | ||
1666 | smp_wmb(); | ||
1667 | |||
1668 | func(entry); | ||
1669 | nr++; | ||
1670 | } | ||
1671 | |||
1672 | return nr; | ||
1673 | } | ||
1674 | |||
1675 | static inline int perf_not_pending(struct perf_counter *counter) | ||
1676 | { | ||
1677 | /* | ||
1678 | * If we flush on whatever cpu we run, there is a chance we don't | ||
1679 | * need to wait. | ||
1680 | */ | ||
1681 | get_cpu(); | ||
1682 | __perf_pending_run(); | ||
1683 | put_cpu(); | ||
1684 | |||
1685 | /* | ||
1686 | * Ensure we see the proper queue state before going to sleep | ||
1687 | * so that we do not miss the wakeup. -- see perf_pending_handle() | ||
1688 | */ | ||
1689 | smp_rmb(); | ||
1690 | return counter->pending.next == NULL; | ||
1691 | } | ||
1692 | |||
1693 | static void perf_pending_sync(struct perf_counter *counter) | ||
1694 | { | ||
1695 | wait_event(counter->waitq, perf_not_pending(counter)); | ||
1696 | } | ||
1697 | |||
1698 | void perf_counter_do_pending(void) | ||
1699 | { | ||
1700 | __perf_pending_run(); | ||
1701 | } | ||
1702 | |||
1703 | /* | ||
1704 | * Callchain support -- arch specific | ||
1705 | */ | ||
1706 | |||
1707 | __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1708 | { | ||
1709 | return NULL; | ||
1710 | } | ||
1711 | |||
1712 | /* | ||
1713 | * Output | ||
1714 | */ | ||
1715 | |||
1716 | struct perf_output_handle { | ||
1717 | struct perf_counter *counter; | ||
1718 | struct perf_mmap_data *data; | ||
1719 | unsigned int offset; | ||
1720 | unsigned int head; | ||
1721 | int wakeup; | ||
1722 | int nmi; | ||
1723 | int overflow; | ||
1724 | }; | ||
1725 | |||
1726 | static inline void __perf_output_wakeup(struct perf_output_handle *handle) | ||
1727 | { | ||
1728 | if (handle->nmi) { | ||
1729 | handle->counter->pending_wakeup = 1; | ||
1730 | perf_pending_queue(&handle->counter->pending, | ||
1731 | perf_pending_counter); | ||
1732 | } else | ||
1733 | perf_counter_wakeup(handle->counter); | ||
1734 | } | ||
1735 | |||
1736 | static int perf_output_begin(struct perf_output_handle *handle, | ||
1737 | struct perf_counter *counter, unsigned int size, | ||
1738 | int nmi, int overflow) | ||
1739 | { | ||
1740 | struct perf_mmap_data *data; | ||
1741 | unsigned int offset, head; | ||
1742 | |||
1743 | rcu_read_lock(); | ||
1744 | data = rcu_dereference(counter->data); | ||
1745 | if (!data) | ||
1746 | goto out; | ||
1747 | |||
1748 | handle->counter = counter; | ||
1749 | handle->nmi = nmi; | ||
1750 | handle->overflow = overflow; | ||
1751 | |||
1752 | if (!data->nr_pages) | ||
1753 | goto fail; | ||
1754 | |||
1755 | do { | ||
1756 | offset = head = atomic_read(&data->head); | ||
1757 | head += size; | ||
1758 | } while (atomic_cmpxchg(&data->head, offset, head) != offset); | ||
1759 | |||
1760 | handle->data = data; | ||
1761 | handle->offset = offset; | ||
1762 | handle->head = head; | ||
1763 | handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); | ||
1764 | |||
1765 | return 0; | ||
1766 | |||
1767 | fail: | ||
1768 | __perf_output_wakeup(handle); | ||
1769 | out: | ||
1770 | rcu_read_unlock(); | ||
1771 | |||
1772 | return -ENOSPC; | ||
1773 | } | ||
1774 | |||
1775 | static void perf_output_copy(struct perf_output_handle *handle, | ||
1776 | void *buf, unsigned int len) | ||
1777 | { | ||
1778 | unsigned int pages_mask; | ||
1779 | unsigned int offset; | ||
1780 | unsigned int size; | ||
1781 | void **pages; | ||
1782 | |||
1783 | offset = handle->offset; | ||
1784 | pages_mask = handle->data->nr_pages - 1; | ||
1785 | pages = handle->data->data_pages; | ||
1786 | |||
1787 | do { | ||
1788 | unsigned int page_offset; | ||
1789 | int nr; | ||
1790 | |||
1791 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
1792 | page_offset = offset & (PAGE_SIZE - 1); | ||
1793 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
1794 | |||
1795 | memcpy(pages[nr] + page_offset, buf, size); | ||
1796 | |||
1797 | len -= size; | ||
1798 | buf += size; | ||
1799 | offset += size; | ||
1800 | } while (len); | ||
1801 | |||
1802 | handle->offset = offset; | ||
1803 | |||
1804 | WARN_ON_ONCE(handle->offset > handle->head); | ||
1805 | } | ||
1806 | |||
1807 | #define perf_output_put(handle, x) \ | ||
1808 | perf_output_copy((handle), &(x), sizeof(x)) | ||
1809 | |||
1810 | static void perf_output_end(struct perf_output_handle *handle) | ||
1811 | { | ||
1812 | int wakeup_events = handle->counter->hw_event.wakeup_events; | ||
1813 | |||
1814 | if (handle->overflow && wakeup_events) { | ||
1815 | int events = atomic_inc_return(&handle->data->events); | ||
1816 | if (events >= wakeup_events) { | ||
1817 | atomic_sub(wakeup_events, &handle->data->events); | ||
1818 | __perf_output_wakeup(handle); | ||
1819 | } | ||
1820 | } else if (handle->wakeup) | ||
1821 | __perf_output_wakeup(handle); | ||
1822 | rcu_read_unlock(); | ||
1823 | } | ||
1824 | |||
1825 | static void perf_counter_output(struct perf_counter *counter, | ||
1826 | int nmi, struct pt_regs *regs, u64 addr) | ||
1827 | { | ||
1828 | int ret; | ||
1829 | u64 record_type = counter->hw_event.record_type; | ||
1830 | struct perf_output_handle handle; | ||
1831 | struct perf_event_header header; | ||
1832 | u64 ip; | ||
1833 | struct { | ||
1834 | u32 pid, tid; | ||
1835 | } tid_entry; | ||
1836 | struct { | ||
1837 | u64 event; | ||
1838 | u64 counter; | ||
1839 | } group_entry; | ||
1840 | struct perf_callchain_entry *callchain = NULL; | ||
1841 | int callchain_size = 0; | ||
1842 | u64 time; | ||
1843 | |||
1844 | header.type = 0; | ||
1845 | header.size = sizeof(header); | ||
1846 | |||
1847 | header.misc = PERF_EVENT_MISC_OVERFLOW; | ||
1848 | header.misc |= user_mode(regs) ? | ||
1849 | PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; | ||
1850 | |||
1851 | if (record_type & PERF_RECORD_IP) { | ||
1852 | ip = instruction_pointer(regs); | ||
1853 | header.type |= PERF_RECORD_IP; | ||
1854 | header.size += sizeof(ip); | ||
1855 | } | ||
1856 | |||
1857 | if (record_type & PERF_RECORD_TID) { | ||
1858 | /* namespace issues */ | ||
1859 | tid_entry.pid = current->group_leader->pid; | ||
1860 | tid_entry.tid = current->pid; | ||
1861 | |||
1862 | header.type |= PERF_RECORD_TID; | ||
1863 | header.size += sizeof(tid_entry); | ||
1864 | } | ||
1865 | |||
1866 | if (record_type & PERF_RECORD_TIME) { | ||
1867 | /* | ||
1868 | * Maybe do better on x86 and provide cpu_clock_nmi() | ||
1869 | */ | ||
1870 | time = sched_clock(); | ||
1871 | |||
1872 | header.type |= PERF_RECORD_TIME; | ||
1873 | header.size += sizeof(u64); | ||
1874 | } | ||
1875 | |||
1876 | if (record_type & PERF_RECORD_ADDR) { | ||
1877 | header.type |= PERF_RECORD_ADDR; | ||
1878 | header.size += sizeof(u64); | ||
1879 | } | ||
1880 | |||
1881 | if (record_type & PERF_RECORD_GROUP) { | ||
1882 | header.type |= PERF_RECORD_GROUP; | ||
1883 | header.size += sizeof(u64) + | ||
1884 | counter->nr_siblings * sizeof(group_entry); | ||
1885 | } | ||
1886 | |||
1887 | if (record_type & PERF_RECORD_CALLCHAIN) { | ||
1888 | callchain = perf_callchain(regs); | ||
1889 | |||
1890 | if (callchain) { | ||
1891 | callchain_size = (1 + callchain->nr) * sizeof(u64); | ||
1892 | |||
1893 | header.type |= PERF_RECORD_CALLCHAIN; | ||
1894 | header.size += callchain_size; | ||
1895 | } | ||
1896 | } | ||
1897 | |||
1898 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); | ||
1899 | if (ret) | ||
1900 | return; | ||
1901 | |||
1902 | perf_output_put(&handle, header); | ||
1903 | |||
1904 | if (record_type & PERF_RECORD_IP) | ||
1905 | perf_output_put(&handle, ip); | ||
1906 | |||
1907 | if (record_type & PERF_RECORD_TID) | ||
1908 | perf_output_put(&handle, tid_entry); | ||
1909 | |||
1910 | if (record_type & PERF_RECORD_TIME) | ||
1911 | perf_output_put(&handle, time); | ||
1912 | |||
1913 | if (record_type & PERF_RECORD_ADDR) | ||
1914 | perf_output_put(&handle, addr); | ||
1915 | |||
1916 | if (record_type & PERF_RECORD_GROUP) { | ||
1917 | struct perf_counter *leader, *sub; | ||
1918 | u64 nr = counter->nr_siblings; | ||
1919 | |||
1920 | perf_output_put(&handle, nr); | ||
1921 | |||
1922 | leader = counter->group_leader; | ||
1923 | list_for_each_entry(sub, &leader->sibling_list, list_entry) { | ||
1924 | if (sub != counter) | ||
1925 | sub->hw_ops->read(sub); | ||
1926 | |||
1927 | group_entry.event = sub->hw_event.config; | ||
1928 | group_entry.counter = atomic64_read(&sub->count); | ||
1929 | |||
1930 | perf_output_put(&handle, group_entry); | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | if (callchain) | ||
1935 | perf_output_copy(&handle, callchain, callchain_size); | ||
1936 | |||
1937 | perf_output_end(&handle); | ||
1938 | } | ||
1939 | |||
1940 | /* | ||
1941 | * comm tracking | ||
1942 | */ | ||
1943 | |||
1944 | struct perf_comm_event { | ||
1945 | struct task_struct *task; | ||
1946 | char *comm; | ||
1947 | int comm_size; | ||
1948 | |||
1949 | struct { | ||
1950 | struct perf_event_header header; | ||
1951 | |||
1952 | u32 pid; | ||
1953 | u32 tid; | ||
1954 | } event; | ||
1955 | }; | ||
1956 | |||
1957 | static void perf_counter_comm_output(struct perf_counter *counter, | ||
1958 | struct perf_comm_event *comm_event) | ||
1959 | { | ||
1960 | struct perf_output_handle handle; | ||
1961 | int size = comm_event->event.header.size; | ||
1962 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
1963 | |||
1964 | if (ret) | ||
1965 | return; | ||
1966 | |||
1967 | perf_output_put(&handle, comm_event->event); | ||
1968 | perf_output_copy(&handle, comm_event->comm, | ||
1969 | comm_event->comm_size); | ||
1970 | perf_output_end(&handle); | ||
1971 | } | ||
1972 | |||
1973 | static int perf_counter_comm_match(struct perf_counter *counter, | ||
1974 | struct perf_comm_event *comm_event) | ||
1975 | { | ||
1976 | if (counter->hw_event.comm && | ||
1977 | comm_event->event.header.type == PERF_EVENT_COMM) | ||
1978 | return 1; | ||
1979 | |||
1980 | return 0; | ||
1981 | } | ||
1982 | |||
1983 | static void perf_counter_comm_ctx(struct perf_counter_context *ctx, | ||
1984 | struct perf_comm_event *comm_event) | ||
1985 | { | ||
1986 | struct perf_counter *counter; | ||
1987 | |||
1988 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
1989 | return; | ||
1990 | |||
1991 | rcu_read_lock(); | ||
1992 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
1993 | if (perf_counter_comm_match(counter, comm_event)) | ||
1994 | perf_counter_comm_output(counter, comm_event); | ||
1995 | } | ||
1996 | rcu_read_unlock(); | ||
1997 | } | ||
1998 | |||
1999 | static void perf_counter_comm_event(struct perf_comm_event *comm_event) | ||
2000 | { | ||
2001 | struct perf_cpu_context *cpuctx; | ||
2002 | unsigned int size; | ||
2003 | char *comm = comm_event->task->comm; | ||
2004 | |||
2005 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | ||
2006 | |||
2007 | comm_event->comm = comm; | ||
2008 | comm_event->comm_size = size; | ||
2009 | |||
2010 | comm_event->event.header.size = sizeof(comm_event->event) + size; | ||
2011 | |||
2012 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2013 | perf_counter_comm_ctx(&cpuctx->ctx, comm_event); | ||
2014 | put_cpu_var(perf_cpu_context); | ||
2015 | |||
2016 | perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); | ||
2017 | } | ||
2018 | |||
2019 | void perf_counter_comm(struct task_struct *task) | ||
2020 | { | ||
2021 | struct perf_comm_event comm_event; | ||
2022 | |||
2023 | if (!atomic_read(&nr_comm_tracking)) | ||
2024 | return; | ||
2025 | |||
2026 | comm_event = (struct perf_comm_event){ | ||
2027 | .task = task, | ||
2028 | .event = { | ||
2029 | .header = { .type = PERF_EVENT_COMM, }, | ||
2030 | .pid = task->group_leader->pid, | ||
2031 | .tid = task->pid, | ||
2032 | }, | ||
2033 | }; | ||
2034 | |||
2035 | perf_counter_comm_event(&comm_event); | ||
2036 | } | ||
2037 | |||
2038 | /* | ||
2039 | * mmap tracking | ||
2040 | */ | ||
2041 | |||
2042 | struct perf_mmap_event { | ||
2043 | struct file *file; | ||
2044 | char *file_name; | ||
2045 | int file_size; | ||
2046 | |||
2047 | struct { | ||
2048 | struct perf_event_header header; | ||
2049 | |||
2050 | u32 pid; | ||
2051 | u32 tid; | ||
2052 | u64 start; | ||
2053 | u64 len; | ||
2054 | u64 pgoff; | ||
2055 | } event; | ||
2056 | }; | ||
2057 | |||
2058 | static void perf_counter_mmap_output(struct perf_counter *counter, | ||
2059 | struct perf_mmap_event *mmap_event) | ||
2060 | { | ||
2061 | struct perf_output_handle handle; | ||
2062 | int size = mmap_event->event.header.size; | ||
2063 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
2064 | |||
2065 | if (ret) | ||
2066 | return; | ||
2067 | |||
2068 | perf_output_put(&handle, mmap_event->event); | ||
2069 | perf_output_copy(&handle, mmap_event->file_name, | ||
2070 | mmap_event->file_size); | ||
2071 | perf_output_end(&handle); | ||
2072 | } | ||
2073 | |||
2074 | static int perf_counter_mmap_match(struct perf_counter *counter, | ||
2075 | struct perf_mmap_event *mmap_event) | ||
2076 | { | ||
2077 | if (counter->hw_event.mmap && | ||
2078 | mmap_event->event.header.type == PERF_EVENT_MMAP) | ||
2079 | return 1; | ||
2080 | |||
2081 | if (counter->hw_event.munmap && | ||
2082 | mmap_event->event.header.type == PERF_EVENT_MUNMAP) | ||
2083 | return 1; | ||
2084 | |||
2085 | return 0; | ||
2086 | } | ||
2087 | |||
2088 | static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, | ||
2089 | struct perf_mmap_event *mmap_event) | ||
2090 | { | ||
2091 | struct perf_counter *counter; | ||
2092 | |||
2093 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2094 | return; | ||
2095 | |||
2096 | rcu_read_lock(); | ||
2097 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2098 | if (perf_counter_mmap_match(counter, mmap_event)) | ||
2099 | perf_counter_mmap_output(counter, mmap_event); | ||
2100 | } | ||
2101 | rcu_read_unlock(); | ||
2102 | } | ||
2103 | |||
2104 | static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) | ||
2105 | { | ||
2106 | struct perf_cpu_context *cpuctx; | ||
2107 | struct file *file = mmap_event->file; | ||
2108 | unsigned int size; | ||
2109 | char tmp[16]; | ||
2110 | char *buf = NULL; | ||
2111 | char *name; | ||
2112 | |||
2113 | if (file) { | ||
2114 | buf = kzalloc(PATH_MAX, GFP_KERNEL); | ||
2115 | if (!buf) { | ||
2116 | name = strncpy(tmp, "//enomem", sizeof(tmp)); | ||
2117 | goto got_name; | ||
2118 | } | ||
2119 | name = d_path(&file->f_path, buf, PATH_MAX); | ||
2120 | if (IS_ERR(name)) { | ||
2121 | name = strncpy(tmp, "//toolong", sizeof(tmp)); | ||
2122 | goto got_name; | ||
2123 | } | ||
2124 | } else { | ||
2125 | name = strncpy(tmp, "//anon", sizeof(tmp)); | ||
2126 | goto got_name; | ||
2127 | } | ||
2128 | |||
2129 | got_name: | ||
2130 | size = ALIGN(strlen(name)+1, sizeof(u64)); | ||
2131 | |||
2132 | mmap_event->file_name = name; | ||
2133 | mmap_event->file_size = size; | ||
2134 | |||
2135 | mmap_event->event.header.size = sizeof(mmap_event->event) + size; | ||
2136 | |||
2137 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2138 | perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); | ||
2139 | put_cpu_var(perf_cpu_context); | ||
2140 | |||
2141 | perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); | ||
2142 | |||
2143 | kfree(buf); | ||
2144 | } | ||
2145 | |||
2146 | void perf_counter_mmap(unsigned long addr, unsigned long len, | ||
2147 | unsigned long pgoff, struct file *file) | ||
2148 | { | ||
2149 | struct perf_mmap_event mmap_event; | ||
2150 | |||
2151 | if (!atomic_read(&nr_mmap_tracking)) | ||
2152 | return; | ||
2153 | |||
2154 | mmap_event = (struct perf_mmap_event){ | ||
2155 | .file = file, | ||
2156 | .event = { | ||
2157 | .header = { .type = PERF_EVENT_MMAP, }, | ||
2158 | .pid = current->group_leader->pid, | ||
2159 | .tid = current->pid, | ||
2160 | .start = addr, | ||
2161 | .len = len, | ||
2162 | .pgoff = pgoff, | ||
2163 | }, | ||
2164 | }; | ||
2165 | |||
2166 | perf_counter_mmap_event(&mmap_event); | ||
2167 | } | ||
2168 | |||
2169 | void perf_counter_munmap(unsigned long addr, unsigned long len, | ||
2170 | unsigned long pgoff, struct file *file) | ||
2171 | { | ||
2172 | struct perf_mmap_event mmap_event; | ||
2173 | |||
2174 | if (!atomic_read(&nr_munmap_tracking)) | ||
2175 | return; | ||
2176 | |||
2177 | mmap_event = (struct perf_mmap_event){ | ||
2178 | .file = file, | ||
2179 | .event = { | ||
2180 | .header = { .type = PERF_EVENT_MUNMAP, }, | ||
2181 | .pid = current->group_leader->pid, | ||
2182 | .tid = current->pid, | ||
2183 | .start = addr, | ||
2184 | .len = len, | ||
2185 | .pgoff = pgoff, | ||
2186 | }, | ||
2187 | }; | ||
2188 | |||
2189 | perf_counter_mmap_event(&mmap_event); | ||
2190 | } | ||
2191 | |||
2192 | /* | ||
2193 | * Generic counter overflow handling. | ||
2194 | */ | ||
2195 | |||
2196 | int perf_counter_overflow(struct perf_counter *counter, | ||
2197 | int nmi, struct pt_regs *regs, u64 addr) | ||
2198 | { | ||
2199 | int events = atomic_read(&counter->event_limit); | ||
2200 | int ret = 0; | ||
2201 | |||
2202 | counter->pending_kill = POLL_IN; | ||
2203 | if (events && atomic_dec_and_test(&counter->event_limit)) { | ||
2204 | ret = 1; | ||
2205 | counter->pending_kill = POLL_HUP; | ||
2206 | if (nmi) { | ||
2207 | counter->pending_disable = 1; | ||
2208 | perf_pending_queue(&counter->pending, | ||
2209 | perf_pending_counter); | ||
2210 | } else | ||
2211 | perf_counter_disable(counter); | ||
2212 | } | ||
2213 | |||
2214 | perf_counter_output(counter, nmi, regs, addr); | ||
2215 | return ret; | ||
2216 | } | ||
2217 | |||
2218 | /* | ||
2219 | * Generic software counter infrastructure | ||
2220 | */ | ||
2221 | |||
2222 | static void perf_swcounter_update(struct perf_counter *counter) | ||
2223 | { | ||
2224 | struct hw_perf_counter *hwc = &counter->hw; | ||
2225 | u64 prev, now; | ||
2226 | s64 delta; | ||
2227 | |||
2228 | again: | ||
2229 | prev = atomic64_read(&hwc->prev_count); | ||
2230 | now = atomic64_read(&hwc->count); | ||
2231 | if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) | ||
2232 | goto again; | ||
2233 | |||
2234 | delta = now - prev; | ||
2235 | |||
2236 | atomic64_add(delta, &counter->count); | ||
2237 | atomic64_sub(delta, &hwc->period_left); | ||
2238 | } | ||
2239 | |||
2240 | static void perf_swcounter_set_period(struct perf_counter *counter) | ||
2241 | { | ||
2242 | struct hw_perf_counter *hwc = &counter->hw; | ||
2243 | s64 left = atomic64_read(&hwc->period_left); | ||
2244 | s64 period = hwc->irq_period; | ||
2245 | |||
2246 | if (unlikely(left <= -period)) { | ||
2247 | left = period; | ||
2248 | atomic64_set(&hwc->period_left, left); | ||
2249 | } | ||
2250 | |||
2251 | if (unlikely(left <= 0)) { | ||
2252 | left += period; | ||
2253 | atomic64_add(period, &hwc->period_left); | ||
2254 | } | ||
2255 | |||
2256 | atomic64_set(&hwc->prev_count, -left); | ||
2257 | atomic64_set(&hwc->count, -left); | ||
2258 | } | ||
2259 | |||
2260 | static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | ||
2261 | { | ||
2262 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
2263 | struct perf_counter *counter; | ||
2264 | struct pt_regs *regs; | ||
2265 | |||
2266 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); | ||
2267 | counter->hw_ops->read(counter); | ||
2268 | |||
2269 | regs = get_irq_regs(); | ||
2270 | /* | ||
2271 | * In case we exclude kernel IPs or are somehow not in interrupt | ||
2272 | * context, provide the next best thing, the user IP. | ||
2273 | */ | ||
2274 | if ((counter->hw_event.exclude_kernel || !regs) && | ||
2275 | !counter->hw_event.exclude_user) | ||
2276 | regs = task_pt_regs(current); | ||
2277 | |||
2278 | if (regs) { | ||
2279 | if (perf_counter_overflow(counter, 0, regs, 0)) | ||
2280 | ret = HRTIMER_NORESTART; | ||
2281 | } | ||
2282 | |||
2283 | hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); | ||
2284 | |||
2285 | return ret; | ||
2286 | } | ||
2287 | |||
2288 | static void perf_swcounter_overflow(struct perf_counter *counter, | ||
2289 | int nmi, struct pt_regs *regs, u64 addr) | ||
2290 | { | ||
2291 | perf_swcounter_update(counter); | ||
2292 | perf_swcounter_set_period(counter); | ||
2293 | if (perf_counter_overflow(counter, nmi, regs, addr)) | ||
2294 | /* soft-disable the counter */ | ||
2295 | ; | ||
2296 | |||
2297 | } | ||
2298 | |||
2299 | static int perf_swcounter_match(struct perf_counter *counter, | ||
2300 | enum perf_event_types type, | ||
2301 | u32 event, struct pt_regs *regs) | ||
2302 | { | ||
2303 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
2304 | return 0; | ||
2305 | |||
2306 | if (perf_event_raw(&counter->hw_event)) | ||
2307 | return 0; | ||
2308 | |||
2309 | if (perf_event_type(&counter->hw_event) != type) | ||
2310 | return 0; | ||
2311 | |||
2312 | if (perf_event_id(&counter->hw_event) != event) | ||
2313 | return 0; | ||
2314 | |||
2315 | if (counter->hw_event.exclude_user && user_mode(regs)) | ||
2316 | return 0; | ||
2317 | |||
2318 | if (counter->hw_event.exclude_kernel && !user_mode(regs)) | ||
2319 | return 0; | ||
2320 | |||
2321 | return 1; | ||
2322 | } | ||
2323 | |||
2324 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, | ||
2325 | int nmi, struct pt_regs *regs, u64 addr) | ||
2326 | { | ||
2327 | int neg = atomic64_add_negative(nr, &counter->hw.count); | ||
2328 | if (counter->hw.irq_period && !neg) | ||
2329 | perf_swcounter_overflow(counter, nmi, regs, addr); | ||
2330 | } | ||
2331 | |||
2332 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | ||
2333 | enum perf_event_types type, u32 event, | ||
2334 | u64 nr, int nmi, struct pt_regs *regs, | ||
2335 | u64 addr) | ||
2336 | { | ||
2337 | struct perf_counter *counter; | ||
2338 | |||
2339 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2340 | return; | ||
2341 | |||
2342 | rcu_read_lock(); | ||
2343 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2344 | if (perf_swcounter_match(counter, type, event, regs)) | ||
2345 | perf_swcounter_add(counter, nr, nmi, regs, addr); | ||
2346 | } | ||
2347 | rcu_read_unlock(); | ||
2348 | } | ||
2349 | |||
2350 | static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) | ||
2351 | { | ||
2352 | if (in_nmi()) | ||
2353 | return &cpuctx->recursion[3]; | ||
2354 | |||
2355 | if (in_irq()) | ||
2356 | return &cpuctx->recursion[2]; | ||
2357 | |||
2358 | if (in_softirq()) | ||
2359 | return &cpuctx->recursion[1]; | ||
2360 | |||
2361 | return &cpuctx->recursion[0]; | ||
2362 | } | ||
2363 | |||
2364 | static void __perf_swcounter_event(enum perf_event_types type, u32 event, | ||
2365 | u64 nr, int nmi, struct pt_regs *regs, | ||
2366 | u64 addr) | ||
2367 | { | ||
2368 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
2369 | int *recursion = perf_swcounter_recursion_context(cpuctx); | ||
2370 | |||
2371 | if (*recursion) | ||
2372 | goto out; | ||
2373 | |||
2374 | (*recursion)++; | ||
2375 | barrier(); | ||
2376 | |||
2377 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, | ||
2378 | nr, nmi, regs, addr); | ||
2379 | if (cpuctx->task_ctx) { | ||
2380 | perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, | ||
2381 | nr, nmi, regs, addr); | ||
2382 | } | ||
2383 | |||
2384 | barrier(); | ||
2385 | (*recursion)--; | ||
2386 | |||
2387 | out: | ||
2388 | put_cpu_var(perf_cpu_context); | ||
2389 | } | ||
2390 | |||
2391 | void | ||
2392 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | ||
2393 | { | ||
2394 | __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); | ||
2395 | } | ||
2396 | |||
2397 | static void perf_swcounter_read(struct perf_counter *counter) | ||
2398 | { | ||
2399 | perf_swcounter_update(counter); | ||
2400 | } | ||
2401 | |||
2402 | static int perf_swcounter_enable(struct perf_counter *counter) | ||
2403 | { | ||
2404 | perf_swcounter_set_period(counter); | ||
2405 | return 0; | ||
2406 | } | ||
2407 | |||
2408 | static void perf_swcounter_disable(struct perf_counter *counter) | ||
2409 | { | ||
2410 | perf_swcounter_update(counter); | ||
2411 | } | ||
2412 | |||
2413 | static const struct hw_perf_counter_ops perf_ops_generic = { | ||
2414 | .enable = perf_swcounter_enable, | ||
2415 | .disable = perf_swcounter_disable, | ||
2416 | .read = perf_swcounter_read, | ||
2417 | }; | ||
2418 | |||
2419 | /* | ||
2420 | * Software counter: cpu wall time clock | ||
2421 | */ | ||
2422 | |||
2423 | static void cpu_clock_perf_counter_update(struct perf_counter *counter) | ||
2424 | { | ||
2425 | int cpu = raw_smp_processor_id(); | ||
2426 | s64 prev; | ||
2427 | u64 now; | ||
2428 | |||
2429 | now = cpu_clock(cpu); | ||
2430 | prev = atomic64_read(&counter->hw.prev_count); | ||
2431 | atomic64_set(&counter->hw.prev_count, now); | ||
2432 | atomic64_add(now - prev, &counter->count); | ||
2433 | } | ||
2434 | |||
2435 | static int cpu_clock_perf_counter_enable(struct perf_counter *counter) | ||
2436 | { | ||
2437 | struct hw_perf_counter *hwc = &counter->hw; | ||
2438 | int cpu = raw_smp_processor_id(); | ||
2439 | |||
2440 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
2441 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2442 | hwc->hrtimer.function = perf_swcounter_hrtimer; | ||
2443 | if (hwc->irq_period) { | ||
2444 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
2445 | ns_to_ktime(hwc->irq_period), 0, | ||
2446 | HRTIMER_MODE_REL, 0); | ||
2447 | } | ||
2448 | |||
2449 | return 0; | ||
2450 | } | ||
2451 | |||
2452 | static void cpu_clock_perf_counter_disable(struct perf_counter *counter) | ||
2453 | { | ||
2454 | hrtimer_cancel(&counter->hw.hrtimer); | ||
2455 | cpu_clock_perf_counter_update(counter); | ||
2456 | } | ||
2457 | |||
2458 | static void cpu_clock_perf_counter_read(struct perf_counter *counter) | ||
2459 | { | ||
2460 | cpu_clock_perf_counter_update(counter); | ||
2461 | } | ||
2462 | |||
2463 | static const struct hw_perf_counter_ops perf_ops_cpu_clock = { | ||
2464 | .enable = cpu_clock_perf_counter_enable, | ||
2465 | .disable = cpu_clock_perf_counter_disable, | ||
2466 | .read = cpu_clock_perf_counter_read, | ||
2467 | }; | ||
2468 | |||
2469 | /* | ||
2470 | * Software counter: task time clock | ||
2471 | */ | ||
2472 | |||
2473 | static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) | ||
2474 | { | ||
2475 | u64 prev; | ||
2476 | s64 delta; | ||
2477 | |||
2478 | prev = atomic64_xchg(&counter->hw.prev_count, now); | ||
2479 | delta = now - prev; | ||
2480 | atomic64_add(delta, &counter->count); | ||
2481 | } | ||
2482 | |||
2483 | static int task_clock_perf_counter_enable(struct perf_counter *counter) | ||
2484 | { | ||
2485 | struct hw_perf_counter *hwc = &counter->hw; | ||
2486 | u64 now; | ||
2487 | |||
2488 | now = counter->ctx->time; | ||
2489 | |||
2490 | atomic64_set(&hwc->prev_count, now); | ||
2491 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2492 | hwc->hrtimer.function = perf_swcounter_hrtimer; | ||
2493 | if (hwc->irq_period) { | ||
2494 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
2495 | ns_to_ktime(hwc->irq_period), 0, | ||
2496 | HRTIMER_MODE_REL, 0); | ||
2497 | } | ||
2498 | |||
2499 | return 0; | ||
2500 | } | ||
2501 | |||
2502 | static void task_clock_perf_counter_disable(struct perf_counter *counter) | ||
2503 | { | ||
2504 | hrtimer_cancel(&counter->hw.hrtimer); | ||
2505 | task_clock_perf_counter_update(counter, counter->ctx->time); | ||
2506 | |||
2507 | } | ||
2508 | |||
2509 | static void task_clock_perf_counter_read(struct perf_counter *counter) | ||
2510 | { | ||
2511 | u64 time; | ||
2512 | |||
2513 | if (!in_nmi()) { | ||
2514 | update_context_time(counter->ctx); | ||
2515 | time = counter->ctx->time; | ||
2516 | } else { | ||
2517 | u64 now = perf_clock(); | ||
2518 | u64 delta = now - counter->ctx->timestamp; | ||
2519 | time = counter->ctx->time + delta; | ||
2520 | } | ||
2521 | |||
2522 | task_clock_perf_counter_update(counter, time); | ||
2523 | } | ||
2524 | |||
2525 | static const struct hw_perf_counter_ops perf_ops_task_clock = { | ||
2526 | .enable = task_clock_perf_counter_enable, | ||
2527 | .disable = task_clock_perf_counter_disable, | ||
2528 | .read = task_clock_perf_counter_read, | ||
2529 | }; | ||
2530 | |||
2531 | /* | ||
2532 | * Software counter: cpu migrations | ||
2533 | */ | ||
2534 | |||
2535 | static inline u64 get_cpu_migrations(struct perf_counter *counter) | ||
2536 | { | ||
2537 | struct task_struct *curr = counter->ctx->task; | ||
2538 | |||
2539 | if (curr) | ||
2540 | return curr->se.nr_migrations; | ||
2541 | return cpu_nr_migrations(smp_processor_id()); | ||
2542 | } | ||
2543 | |||
2544 | static void cpu_migrations_perf_counter_update(struct perf_counter *counter) | ||
2545 | { | ||
2546 | u64 prev, now; | ||
2547 | s64 delta; | ||
2548 | |||
2549 | prev = atomic64_read(&counter->hw.prev_count); | ||
2550 | now = get_cpu_migrations(counter); | ||
2551 | |||
2552 | atomic64_set(&counter->hw.prev_count, now); | ||
2553 | |||
2554 | delta = now - prev; | ||
2555 | |||
2556 | atomic64_add(delta, &counter->count); | ||
2557 | } | ||
2558 | |||
2559 | static void cpu_migrations_perf_counter_read(struct perf_counter *counter) | ||
2560 | { | ||
2561 | cpu_migrations_perf_counter_update(counter); | ||
2562 | } | ||
2563 | |||
2564 | static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) | ||
2565 | { | ||
2566 | if (counter->prev_state <= PERF_COUNTER_STATE_OFF) | ||
2567 | atomic64_set(&counter->hw.prev_count, | ||
2568 | get_cpu_migrations(counter)); | ||
2569 | return 0; | ||
2570 | } | ||
2571 | |||
2572 | static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) | ||
2573 | { | ||
2574 | cpu_migrations_perf_counter_update(counter); | ||
2575 | } | ||
2576 | |||
2577 | static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { | ||
2578 | .enable = cpu_migrations_perf_counter_enable, | ||
2579 | .disable = cpu_migrations_perf_counter_disable, | ||
2580 | .read = cpu_migrations_perf_counter_read, | ||
2581 | }; | ||
2582 | |||
2583 | #ifdef CONFIG_EVENT_PROFILE | ||
2584 | void perf_tpcounter_event(int event_id) | ||
2585 | { | ||
2586 | struct pt_regs *regs = get_irq_regs(); | ||
2587 | |||
2588 | if (!regs) | ||
2589 | regs = task_pt_regs(current); | ||
2590 | |||
2591 | __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); | ||
2592 | } | ||
2593 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); | ||
2594 | |||
2595 | extern int ftrace_profile_enable(int); | ||
2596 | extern void ftrace_profile_disable(int); | ||
2597 | |||
2598 | static void tp_perf_counter_destroy(struct perf_counter *counter) | ||
2599 | { | ||
2600 | ftrace_profile_disable(perf_event_id(&counter->hw_event)); | ||
2601 | } | ||
2602 | |||
2603 | static const struct hw_perf_counter_ops * | ||
2604 | tp_perf_counter_init(struct perf_counter *counter) | ||
2605 | { | ||
2606 | int event_id = perf_event_id(&counter->hw_event); | ||
2607 | int ret; | ||
2608 | |||
2609 | ret = ftrace_profile_enable(event_id); | ||
2610 | if (ret) | ||
2611 | return NULL; | ||
2612 | |||
2613 | counter->destroy = tp_perf_counter_destroy; | ||
2614 | counter->hw.irq_period = counter->hw_event.irq_period; | ||
2615 | |||
2616 | return &perf_ops_generic; | ||
2617 | } | ||
2618 | #else | ||
2619 | static const struct hw_perf_counter_ops * | ||
2620 | tp_perf_counter_init(struct perf_counter *counter) | ||
2621 | { | ||
2622 | return NULL; | ||
2623 | } | ||
2624 | #endif | ||
2625 | |||
2626 | static const struct hw_perf_counter_ops * | ||
2627 | sw_perf_counter_init(struct perf_counter *counter) | ||
2628 | { | ||
2629 | struct perf_counter_hw_event *hw_event = &counter->hw_event; | ||
2630 | const struct hw_perf_counter_ops *hw_ops = NULL; | ||
2631 | struct hw_perf_counter *hwc = &counter->hw; | ||
2632 | |||
2633 | /* | ||
2634 | * Software counters (currently) can't in general distinguish | ||
2635 | * between user, kernel and hypervisor events. | ||
2636 | * However, context switches and cpu migrations are considered | ||
2637 | * to be kernel events, and page faults are never hypervisor | ||
2638 | * events. | ||
2639 | */ | ||
2640 | switch (perf_event_id(&counter->hw_event)) { | ||
2641 | case PERF_COUNT_CPU_CLOCK: | ||
2642 | hw_ops = &perf_ops_cpu_clock; | ||
2643 | |||
2644 | if (hw_event->irq_period && hw_event->irq_period < 10000) | ||
2645 | hw_event->irq_period = 10000; | ||
2646 | break; | ||
2647 | case PERF_COUNT_TASK_CLOCK: | ||
2648 | /* | ||
2649 | * If the user instantiates this as a per-cpu counter, | ||
2650 | * use the cpu_clock counter instead. | ||
2651 | */ | ||
2652 | if (counter->ctx->task) | ||
2653 | hw_ops = &perf_ops_task_clock; | ||
2654 | else | ||
2655 | hw_ops = &perf_ops_cpu_clock; | ||
2656 | |||
2657 | if (hw_event->irq_period && hw_event->irq_period < 10000) | ||
2658 | hw_event->irq_period = 10000; | ||
2659 | break; | ||
2660 | case PERF_COUNT_PAGE_FAULTS: | ||
2661 | case PERF_COUNT_PAGE_FAULTS_MIN: | ||
2662 | case PERF_COUNT_PAGE_FAULTS_MAJ: | ||
2663 | case PERF_COUNT_CONTEXT_SWITCHES: | ||
2664 | hw_ops = &perf_ops_generic; | ||
2665 | break; | ||
2666 | case PERF_COUNT_CPU_MIGRATIONS: | ||
2667 | if (!counter->hw_event.exclude_kernel) | ||
2668 | hw_ops = &perf_ops_cpu_migrations; | ||
2669 | break; | ||
2670 | } | ||
2671 | |||
2672 | if (hw_ops) | ||
2673 | hwc->irq_period = hw_event->irq_period; | ||
2674 | |||
2675 | return hw_ops; | ||
2676 | } | ||
2677 | |||
2678 | /* | ||
2679 | * Allocate and initialize a counter structure | ||
2680 | */ | ||
2681 | static struct perf_counter * | ||
2682 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, | ||
2683 | int cpu, | ||
2684 | struct perf_counter_context *ctx, | ||
2685 | struct perf_counter *group_leader, | ||
2686 | gfp_t gfpflags) | ||
2687 | { | ||
2688 | const struct hw_perf_counter_ops *hw_ops; | ||
2689 | struct perf_counter *counter; | ||
2690 | long err; | ||
2691 | |||
2692 | counter = kzalloc(sizeof(*counter), gfpflags); | ||
2693 | if (!counter) | ||
2694 | return ERR_PTR(-ENOMEM); | ||
2695 | |||
2696 | /* | ||
2697 | * Single counters are their own group leaders, with an | ||
2698 | * empty sibling list: | ||
2699 | */ | ||
2700 | if (!group_leader) | ||
2701 | group_leader = counter; | ||
2702 | |||
2703 | mutex_init(&counter->mutex); | ||
2704 | INIT_LIST_HEAD(&counter->list_entry); | ||
2705 | INIT_LIST_HEAD(&counter->event_entry); | ||
2706 | INIT_LIST_HEAD(&counter->sibling_list); | ||
2707 | init_waitqueue_head(&counter->waitq); | ||
2708 | |||
2709 | mutex_init(&counter->mmap_mutex); | ||
2710 | |||
2711 | INIT_LIST_HEAD(&counter->child_list); | ||
2712 | |||
2713 | counter->cpu = cpu; | ||
2714 | counter->hw_event = *hw_event; | ||
2715 | counter->group_leader = group_leader; | ||
2716 | counter->hw_ops = NULL; | ||
2717 | counter->ctx = ctx; | ||
2718 | |||
2719 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
2720 | if (hw_event->disabled) | ||
2721 | counter->state = PERF_COUNTER_STATE_OFF; | ||
2722 | |||
2723 | hw_ops = NULL; | ||
2724 | |||
2725 | if (perf_event_raw(hw_event)) { | ||
2726 | hw_ops = hw_perf_counter_init(counter); | ||
2727 | goto done; | ||
2728 | } | ||
2729 | |||
2730 | switch (perf_event_type(hw_event)) { | ||
2731 | case PERF_TYPE_HARDWARE: | ||
2732 | hw_ops = hw_perf_counter_init(counter); | ||
2733 | break; | ||
2734 | |||
2735 | case PERF_TYPE_SOFTWARE: | ||
2736 | hw_ops = sw_perf_counter_init(counter); | ||
2737 | break; | ||
2738 | |||
2739 | case PERF_TYPE_TRACEPOINT: | ||
2740 | hw_ops = tp_perf_counter_init(counter); | ||
2741 | break; | ||
2742 | } | ||
2743 | done: | ||
2744 | err = 0; | ||
2745 | if (!hw_ops) | ||
2746 | err = -EINVAL; | ||
2747 | else if (IS_ERR(hw_ops)) | ||
2748 | err = PTR_ERR(hw_ops); | ||
2749 | |||
2750 | if (err) { | ||
2751 | kfree(counter); | ||
2752 | return ERR_PTR(err); | ||
2753 | } | ||
2754 | |||
2755 | counter->hw_ops = hw_ops; | ||
2756 | |||
2757 | if (counter->hw_event.mmap) | ||
2758 | atomic_inc(&nr_mmap_tracking); | ||
2759 | if (counter->hw_event.munmap) | ||
2760 | atomic_inc(&nr_munmap_tracking); | ||
2761 | if (counter->hw_event.comm) | ||
2762 | atomic_inc(&nr_comm_tracking); | ||
2763 | |||
2764 | return counter; | ||
2765 | } | ||
2766 | |||
2767 | /** | ||
2768 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | ||
2769 | * | ||
2770 | * @hw_event_uptr: event type attributes for monitoring/sampling | ||
2771 | * @pid: target pid | ||
2772 | * @cpu: target cpu | ||
2773 | * @group_fd: group leader counter fd | ||
2774 | */ | ||
2775 | SYSCALL_DEFINE5(perf_counter_open, | ||
2776 | const struct perf_counter_hw_event __user *, hw_event_uptr, | ||
2777 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | ||
2778 | { | ||
2779 | struct perf_counter *counter, *group_leader; | ||
2780 | struct perf_counter_hw_event hw_event; | ||
2781 | struct perf_counter_context *ctx; | ||
2782 | struct file *counter_file = NULL; | ||
2783 | struct file *group_file = NULL; | ||
2784 | int fput_needed = 0; | ||
2785 | int fput_needed2 = 0; | ||
2786 | int ret; | ||
2787 | |||
2788 | /* for future expandability... */ | ||
2789 | if (flags) | ||
2790 | return -EINVAL; | ||
2791 | |||
2792 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) | ||
2793 | return -EFAULT; | ||
2794 | |||
2795 | /* | ||
2796 | * Get the target context (task or percpu): | ||
2797 | */ | ||
2798 | ctx = find_get_context(pid, cpu); | ||
2799 | if (IS_ERR(ctx)) | ||
2800 | return PTR_ERR(ctx); | ||
2801 | |||
2802 | /* | ||
2803 | * Look up the group leader (we will attach this counter to it): | ||
2804 | */ | ||
2805 | group_leader = NULL; | ||
2806 | if (group_fd != -1) { | ||
2807 | ret = -EINVAL; | ||
2808 | group_file = fget_light(group_fd, &fput_needed); | ||
2809 | if (!group_file) | ||
2810 | goto err_put_context; | ||
2811 | if (group_file->f_op != &perf_fops) | ||
2812 | goto err_put_context; | ||
2813 | |||
2814 | group_leader = group_file->private_data; | ||
2815 | /* | ||
2816 | * Do not allow a recursive hierarchy (this new sibling | ||
2817 | * becoming part of another group-sibling): | ||
2818 | */ | ||
2819 | if (group_leader->group_leader != group_leader) | ||
2820 | goto err_put_context; | ||
2821 | /* | ||
2822 | * Do not allow to attach to a group in a different | ||
2823 | * task or CPU context: | ||
2824 | */ | ||
2825 | if (group_leader->ctx != ctx) | ||
2826 | goto err_put_context; | ||
2827 | /* | ||
2828 | * Only a group leader can be exclusive or pinned | ||
2829 | */ | ||
2830 | if (hw_event.exclusive || hw_event.pinned) | ||
2831 | goto err_put_context; | ||
2832 | } | ||
2833 | |||
2834 | counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader, | ||
2835 | GFP_KERNEL); | ||
2836 | ret = PTR_ERR(counter); | ||
2837 | if (IS_ERR(counter)) | ||
2838 | goto err_put_context; | ||
2839 | |||
2840 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); | ||
2841 | if (ret < 0) | ||
2842 | goto err_free_put_context; | ||
2843 | |||
2844 | counter_file = fget_light(ret, &fput_needed2); | ||
2845 | if (!counter_file) | ||
2846 | goto err_free_put_context; | ||
2847 | |||
2848 | counter->filp = counter_file; | ||
2849 | mutex_lock(&ctx->mutex); | ||
2850 | perf_install_in_context(ctx, counter, cpu); | ||
2851 | mutex_unlock(&ctx->mutex); | ||
2852 | |||
2853 | fput_light(counter_file, fput_needed2); | ||
2854 | |||
2855 | out_fput: | ||
2856 | fput_light(group_file, fput_needed); | ||
2857 | |||
2858 | return ret; | ||
2859 | |||
2860 | err_free_put_context: | ||
2861 | kfree(counter); | ||
2862 | |||
2863 | err_put_context: | ||
2864 | put_context(ctx); | ||
2865 | |||
2866 | goto out_fput; | ||
2867 | } | ||
2868 | |||
2869 | /* | ||
2870 | * Initialize the perf_counter context in a task_struct: | ||
2871 | */ | ||
2872 | static void | ||
2873 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
2874 | struct task_struct *task) | ||
2875 | { | ||
2876 | memset(ctx, 0, sizeof(*ctx)); | ||
2877 | spin_lock_init(&ctx->lock); | ||
2878 | mutex_init(&ctx->mutex); | ||
2879 | INIT_LIST_HEAD(&ctx->counter_list); | ||
2880 | INIT_LIST_HEAD(&ctx->event_list); | ||
2881 | ctx->task = task; | ||
2882 | } | ||
2883 | |||
2884 | /* | ||
2885 | * inherit a counter from parent task to child task: | ||
2886 | */ | ||
2887 | static struct perf_counter * | ||
2888 | inherit_counter(struct perf_counter *parent_counter, | ||
2889 | struct task_struct *parent, | ||
2890 | struct perf_counter_context *parent_ctx, | ||
2891 | struct task_struct *child, | ||
2892 | struct perf_counter *group_leader, | ||
2893 | struct perf_counter_context *child_ctx) | ||
2894 | { | ||
2895 | struct perf_counter *child_counter; | ||
2896 | |||
2897 | /* | ||
2898 | * Instead of creating recursive hierarchies of counters, | ||
2899 | * we link inherited counters back to the original parent, | ||
2900 | * which has a filp for sure, which we use as the reference | ||
2901 | * count: | ||
2902 | */ | ||
2903 | if (parent_counter->parent) | ||
2904 | parent_counter = parent_counter->parent; | ||
2905 | |||
2906 | child_counter = perf_counter_alloc(&parent_counter->hw_event, | ||
2907 | parent_counter->cpu, child_ctx, | ||
2908 | group_leader, GFP_KERNEL); | ||
2909 | if (IS_ERR(child_counter)) | ||
2910 | return child_counter; | ||
2911 | |||
2912 | /* | ||
2913 | * Link it up in the child's context: | ||
2914 | */ | ||
2915 | child_counter->task = child; | ||
2916 | add_counter_to_ctx(child_counter, child_ctx); | ||
2917 | |||
2918 | child_counter->parent = parent_counter; | ||
2919 | /* | ||
2920 | * inherit into child's child as well: | ||
2921 | */ | ||
2922 | child_counter->hw_event.inherit = 1; | ||
2923 | |||
2924 | /* | ||
2925 | * Get a reference to the parent filp - we will fput it | ||
2926 | * when the child counter exits. This is safe to do because | ||
2927 | * we are in the parent and we know that the filp still | ||
2928 | * exists and has a nonzero count: | ||
2929 | */ | ||
2930 | atomic_long_inc(&parent_counter->filp->f_count); | ||
2931 | |||
2932 | /* | ||
2933 | * Link this into the parent counter's child list | ||
2934 | */ | ||
2935 | mutex_lock(&parent_counter->mutex); | ||
2936 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); | ||
2937 | |||
2938 | /* | ||
2939 | * Make the child state follow the state of the parent counter, | ||
2940 | * not its hw_event.disabled bit. We hold the parent's mutex, | ||
2941 | * so we won't race with perf_counter_{en,dis}able_family. | ||
2942 | */ | ||
2943 | if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
2944 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
2945 | else | ||
2946 | child_counter->state = PERF_COUNTER_STATE_OFF; | ||
2947 | |||
2948 | mutex_unlock(&parent_counter->mutex); | ||
2949 | |||
2950 | return child_counter; | ||
2951 | } | ||
2952 | |||
2953 | static int inherit_group(struct perf_counter *parent_counter, | ||
2954 | struct task_struct *parent, | ||
2955 | struct perf_counter_context *parent_ctx, | ||
2956 | struct task_struct *child, | ||
2957 | struct perf_counter_context *child_ctx) | ||
2958 | { | ||
2959 | struct perf_counter *leader; | ||
2960 | struct perf_counter *sub; | ||
2961 | struct perf_counter *child_ctr; | ||
2962 | |||
2963 | leader = inherit_counter(parent_counter, parent, parent_ctx, | ||
2964 | child, NULL, child_ctx); | ||
2965 | if (IS_ERR(leader)) | ||
2966 | return PTR_ERR(leader); | ||
2967 | list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { | ||
2968 | child_ctr = inherit_counter(sub, parent, parent_ctx, | ||
2969 | child, leader, child_ctx); | ||
2970 | if (IS_ERR(child_ctr)) | ||
2971 | return PTR_ERR(child_ctr); | ||
2972 | } | ||
2973 | return 0; | ||
2974 | } | ||
2975 | |||
2976 | static void sync_child_counter(struct perf_counter *child_counter, | ||
2977 | struct perf_counter *parent_counter) | ||
2978 | { | ||
2979 | u64 parent_val, child_val; | ||
2980 | |||
2981 | parent_val = atomic64_read(&parent_counter->count); | ||
2982 | child_val = atomic64_read(&child_counter->count); | ||
2983 | |||
2984 | /* | ||
2985 | * Add back the child's count to the parent's count: | ||
2986 | */ | ||
2987 | atomic64_add(child_val, &parent_counter->count); | ||
2988 | atomic64_add(child_counter->total_time_enabled, | ||
2989 | &parent_counter->child_total_time_enabled); | ||
2990 | atomic64_add(child_counter->total_time_running, | ||
2991 | &parent_counter->child_total_time_running); | ||
2992 | |||
2993 | /* | ||
2994 | * Remove this counter from the parent's list | ||
2995 | */ | ||
2996 | mutex_lock(&parent_counter->mutex); | ||
2997 | list_del_init(&child_counter->child_list); | ||
2998 | mutex_unlock(&parent_counter->mutex); | ||
2999 | |||
3000 | /* | ||
3001 | * Release the parent counter, if this was the last | ||
3002 | * reference to it. | ||
3003 | */ | ||
3004 | fput(parent_counter->filp); | ||
3005 | } | ||
3006 | |||
3007 | static void | ||
3008 | __perf_counter_exit_task(struct task_struct *child, | ||
3009 | struct perf_counter *child_counter, | ||
3010 | struct perf_counter_context *child_ctx) | ||
3011 | { | ||
3012 | struct perf_counter *parent_counter; | ||
3013 | struct perf_counter *sub, *tmp; | ||
3014 | |||
3015 | /* | ||
3016 | * If we do not self-reap then we have to wait for the | ||
3017 | * child task to unschedule (it will happen for sure), | ||
3018 | * so that its counter is at its final count. (This | ||
3019 | * condition triggers rarely - child tasks usually get | ||
3020 | * off their CPU before the parent has a chance to | ||
3021 | * get this far into the reaping action) | ||
3022 | */ | ||
3023 | if (child != current) { | ||
3024 | wait_task_inactive(child, 0); | ||
3025 | list_del_init(&child_counter->list_entry); | ||
3026 | update_counter_times(child_counter); | ||
3027 | } else { | ||
3028 | struct perf_cpu_context *cpuctx; | ||
3029 | unsigned long flags; | ||
3030 | u64 perf_flags; | ||
3031 | |||
3032 | /* | ||
3033 | * Disable and unlink this counter. | ||
3034 | * | ||
3035 | * Be careful about zapping the list - IRQ/NMI context | ||
3036 | * could still be processing it: | ||
3037 | */ | ||
3038 | local_irq_save(flags); | ||
3039 | perf_flags = hw_perf_save_disable(); | ||
3040 | |||
3041 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3042 | |||
3043 | group_sched_out(child_counter, cpuctx, child_ctx); | ||
3044 | update_counter_times(child_counter); | ||
3045 | |||
3046 | list_del_init(&child_counter->list_entry); | ||
3047 | |||
3048 | child_ctx->nr_counters--; | ||
3049 | |||
3050 | hw_perf_restore(perf_flags); | ||
3051 | local_irq_restore(flags); | ||
3052 | } | ||
3053 | |||
3054 | parent_counter = child_counter->parent; | ||
3055 | /* | ||
3056 | * It can happen that parent exits first, and has counters | ||
3057 | * that are still around due to the child reference. These | ||
3058 | * counters need to be zapped - but otherwise linger. | ||
3059 | */ | ||
3060 | if (parent_counter) { | ||
3061 | sync_child_counter(child_counter, parent_counter); | ||
3062 | list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list, | ||
3063 | list_entry) { | ||
3064 | if (sub->parent) { | ||
3065 | sync_child_counter(sub, sub->parent); | ||
3066 | free_counter(sub); | ||
3067 | } | ||
3068 | } | ||
3069 | free_counter(child_counter); | ||
3070 | } | ||
3071 | } | ||
3072 | |||
3073 | /* | ||
3074 | * When a child task exits, feed back counter values to parent counters. | ||
3075 | * | ||
3076 | * Note: we may be running in child context, but the PID is not hashed | ||
3077 | * anymore so new counters will not be added. | ||
3078 | */ | ||
3079 | void perf_counter_exit_task(struct task_struct *child) | ||
3080 | { | ||
3081 | struct perf_counter *child_counter, *tmp; | ||
3082 | struct perf_counter_context *child_ctx; | ||
3083 | |||
3084 | child_ctx = &child->perf_counter_ctx; | ||
3085 | |||
3086 | if (likely(!child_ctx->nr_counters)) | ||
3087 | return; | ||
3088 | |||
3089 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, | ||
3090 | list_entry) | ||
3091 | __perf_counter_exit_task(child, child_counter, child_ctx); | ||
3092 | } | ||
3093 | |||
3094 | /* | ||
3095 | * Initialize the perf_counter context in task_struct | ||
3096 | */ | ||
3097 | void perf_counter_init_task(struct task_struct *child) | ||
3098 | { | ||
3099 | struct perf_counter_context *child_ctx, *parent_ctx; | ||
3100 | struct perf_counter *counter; | ||
3101 | struct task_struct *parent = current; | ||
3102 | |||
3103 | child_ctx = &child->perf_counter_ctx; | ||
3104 | parent_ctx = &parent->perf_counter_ctx; | ||
3105 | |||
3106 | __perf_counter_init_context(child_ctx, child); | ||
3107 | |||
3108 | /* | ||
3109 | * This is executed from the parent task context, so inherit | ||
3110 | * counters that have been marked for cloning: | ||
3111 | */ | ||
3112 | |||
3113 | if (likely(!parent_ctx->nr_counters)) | ||
3114 | return; | ||
3115 | |||
3116 | /* | ||
3117 | * Lock the parent list. No need to lock the child - not PID | ||
3118 | * hashed yet and not running, so nobody can access it. | ||
3119 | */ | ||
3120 | mutex_lock(&parent_ctx->mutex); | ||
3121 | |||
3122 | /* | ||
3123 | * We dont have to disable NMIs - we are only looking at | ||
3124 | * the list, not manipulating it: | ||
3125 | */ | ||
3126 | list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { | ||
3127 | if (!counter->hw_event.inherit) | ||
3128 | continue; | ||
3129 | |||
3130 | if (inherit_group(counter, parent, | ||
3131 | parent_ctx, child, child_ctx)) | ||
3132 | break; | ||
3133 | } | ||
3134 | |||
3135 | mutex_unlock(&parent_ctx->mutex); | ||
3136 | } | ||
3137 | |||
3138 | static void __cpuinit perf_counter_init_cpu(int cpu) | ||
3139 | { | ||
3140 | struct perf_cpu_context *cpuctx; | ||
3141 | |||
3142 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3143 | __perf_counter_init_context(&cpuctx->ctx, NULL); | ||
3144 | |||
3145 | mutex_lock(&perf_resource_mutex); | ||
3146 | cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; | ||
3147 | mutex_unlock(&perf_resource_mutex); | ||
3148 | |||
3149 | hw_perf_counter_setup(cpu); | ||
3150 | } | ||
3151 | |||
3152 | #ifdef CONFIG_HOTPLUG_CPU | ||
3153 | static void __perf_counter_exit_cpu(void *info) | ||
3154 | { | ||
3155 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3156 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
3157 | struct perf_counter *counter, *tmp; | ||
3158 | |||
3159 | list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) | ||
3160 | __perf_counter_remove_from_context(counter); | ||
3161 | } | ||
3162 | static void perf_counter_exit_cpu(int cpu) | ||
3163 | { | ||
3164 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3165 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
3166 | |||
3167 | mutex_lock(&ctx->mutex); | ||
3168 | smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); | ||
3169 | mutex_unlock(&ctx->mutex); | ||
3170 | } | ||
3171 | #else | ||
3172 | static inline void perf_counter_exit_cpu(int cpu) { } | ||
3173 | #endif | ||
3174 | |||
3175 | static int __cpuinit | ||
3176 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | ||
3177 | { | ||
3178 | unsigned int cpu = (long)hcpu; | ||
3179 | |||
3180 | switch (action) { | ||
3181 | |||
3182 | case CPU_UP_PREPARE: | ||
3183 | case CPU_UP_PREPARE_FROZEN: | ||
3184 | perf_counter_init_cpu(cpu); | ||
3185 | break; | ||
3186 | |||
3187 | case CPU_DOWN_PREPARE: | ||
3188 | case CPU_DOWN_PREPARE_FROZEN: | ||
3189 | perf_counter_exit_cpu(cpu); | ||
3190 | break; | ||
3191 | |||
3192 | default: | ||
3193 | break; | ||
3194 | } | ||
3195 | |||
3196 | return NOTIFY_OK; | ||
3197 | } | ||
3198 | |||
3199 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
3200 | .notifier_call = perf_cpu_notify, | ||
3201 | }; | ||
3202 | |||
3203 | static int __init perf_counter_init(void) | ||
3204 | { | ||
3205 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | ||
3206 | (void *)(long)smp_processor_id()); | ||
3207 | register_cpu_notifier(&perf_cpu_nb); | ||
3208 | |||
3209 | return 0; | ||
3210 | } | ||
3211 | early_initcall(perf_counter_init); | ||
3212 | |||
3213 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) | ||
3214 | { | ||
3215 | return sprintf(buf, "%d\n", perf_reserved_percpu); | ||
3216 | } | ||
3217 | |||
3218 | static ssize_t | ||
3219 | perf_set_reserve_percpu(struct sysdev_class *class, | ||
3220 | const char *buf, | ||
3221 | size_t count) | ||
3222 | { | ||
3223 | struct perf_cpu_context *cpuctx; | ||
3224 | unsigned long val; | ||
3225 | int err, cpu, mpt; | ||
3226 | |||
3227 | err = strict_strtoul(buf, 10, &val); | ||
3228 | if (err) | ||
3229 | return err; | ||
3230 | if (val > perf_max_counters) | ||
3231 | return -EINVAL; | ||
3232 | |||
3233 | mutex_lock(&perf_resource_mutex); | ||
3234 | perf_reserved_percpu = val; | ||
3235 | for_each_online_cpu(cpu) { | ||
3236 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3237 | spin_lock_irq(&cpuctx->ctx.lock); | ||
3238 | mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, | ||
3239 | perf_max_counters - perf_reserved_percpu); | ||
3240 | cpuctx->max_pertask = mpt; | ||
3241 | spin_unlock_irq(&cpuctx->ctx.lock); | ||
3242 | } | ||
3243 | mutex_unlock(&perf_resource_mutex); | ||
3244 | |||
3245 | return count; | ||
3246 | } | ||
3247 | |||
3248 | static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) | ||
3249 | { | ||
3250 | return sprintf(buf, "%d\n", perf_overcommit); | ||
3251 | } | ||
3252 | |||
3253 | static ssize_t | ||
3254 | perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) | ||
3255 | { | ||
3256 | unsigned long val; | ||
3257 | int err; | ||
3258 | |||
3259 | err = strict_strtoul(buf, 10, &val); | ||
3260 | if (err) | ||
3261 | return err; | ||
3262 | if (val > 1) | ||
3263 | return -EINVAL; | ||
3264 | |||
3265 | mutex_lock(&perf_resource_mutex); | ||
3266 | perf_overcommit = val; | ||
3267 | mutex_unlock(&perf_resource_mutex); | ||
3268 | |||
3269 | return count; | ||
3270 | } | ||
3271 | |||
3272 | static SYSDEV_CLASS_ATTR( | ||
3273 | reserve_percpu, | ||
3274 | 0644, | ||
3275 | perf_show_reserve_percpu, | ||
3276 | perf_set_reserve_percpu | ||
3277 | ); | ||
3278 | |||
3279 | static SYSDEV_CLASS_ATTR( | ||
3280 | overcommit, | ||
3281 | 0644, | ||
3282 | perf_show_overcommit, | ||
3283 | perf_set_overcommit | ||
3284 | ); | ||
3285 | |||
3286 | static struct attribute *perfclass_attrs[] = { | ||
3287 | &attr_reserve_percpu.attr, | ||
3288 | &attr_overcommit.attr, | ||
3289 | NULL | ||
3290 | }; | ||
3291 | |||
3292 | static struct attribute_group perfclass_attr_group = { | ||
3293 | .attrs = perfclass_attrs, | ||
3294 | .name = "perf_counters", | ||
3295 | }; | ||
3296 | |||
3297 | static int __init perf_counter_sysfs_init(void) | ||
3298 | { | ||
3299 | return sysfs_create_group(&cpu_sysdev_class.kset.kobj, | ||
3300 | &perfclass_attr_group); | ||
3301 | } | ||
3302 | device_initcall(perf_counter_sysfs_init); | ||
diff --git a/kernel/sched.c b/kernel/sched.c index b902e587a3a0..2f600e30dcf0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -584,6 +584,7 @@ struct rq { | |||
584 | struct load_weight load; | 584 | struct load_weight load; |
585 | unsigned long nr_load_updates; | 585 | unsigned long nr_load_updates; |
586 | u64 nr_switches; | 586 | u64 nr_switches; |
587 | u64 nr_migrations_in; | ||
587 | 588 | ||
588 | struct cfs_rq cfs; | 589 | struct cfs_rq cfs; |
589 | struct rt_rq rt; | 590 | struct rt_rq rt; |
@@ -692,7 +693,7 @@ static inline int cpu_of(struct rq *rq) | |||
692 | #define task_rq(p) cpu_rq(task_cpu(p)) | 693 | #define task_rq(p) cpu_rq(task_cpu(p)) |
693 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 694 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
694 | 695 | ||
695 | static inline void update_rq_clock(struct rq *rq) | 696 | inline void update_rq_clock(struct rq *rq) |
696 | { | 697 | { |
697 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 698 | rq->clock = sched_clock_cpu(cpu_of(rq)); |
698 | } | 699 | } |
@@ -1967,12 +1968,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1967 | p->se.sleep_start -= clock_offset; | 1968 | p->se.sleep_start -= clock_offset; |
1968 | if (p->se.block_start) | 1969 | if (p->se.block_start) |
1969 | p->se.block_start -= clock_offset; | 1970 | p->se.block_start -= clock_offset; |
1971 | #endif | ||
1970 | if (old_cpu != new_cpu) { | 1972 | if (old_cpu != new_cpu) { |
1971 | schedstat_inc(p, se.nr_migrations); | 1973 | p->se.nr_migrations++; |
1974 | new_rq->nr_migrations_in++; | ||
1975 | #ifdef CONFIG_SCHEDSTATS | ||
1972 | if (task_hot(p, old_rq->clock, NULL)) | 1976 | if (task_hot(p, old_rq->clock, NULL)) |
1973 | schedstat_inc(p, se.nr_forced2_migrations); | 1977 | schedstat_inc(p, se.nr_forced2_migrations); |
1974 | } | ||
1975 | #endif | 1978 | #endif |
1979 | } | ||
1976 | p->se.vruntime -= old_cfsrq->min_vruntime - | 1980 | p->se.vruntime -= old_cfsrq->min_vruntime - |
1977 | new_cfsrq->min_vruntime; | 1981 | new_cfsrq->min_vruntime; |
1978 | 1982 | ||
@@ -2324,6 +2328,27 @@ static int sched_balance_self(int cpu, int flag) | |||
2324 | 2328 | ||
2325 | #endif /* CONFIG_SMP */ | 2329 | #endif /* CONFIG_SMP */ |
2326 | 2330 | ||
2331 | /** | ||
2332 | * task_oncpu_function_call - call a function on the cpu on which a task runs | ||
2333 | * @p: the task to evaluate | ||
2334 | * @func: the function to be called | ||
2335 | * @info: the function call argument | ||
2336 | * | ||
2337 | * Calls the function @func when the task is currently running. This might | ||
2338 | * be on the current CPU, which just calls the function directly | ||
2339 | */ | ||
2340 | void task_oncpu_function_call(struct task_struct *p, | ||
2341 | void (*func) (void *info), void *info) | ||
2342 | { | ||
2343 | int cpu; | ||
2344 | |||
2345 | preempt_disable(); | ||
2346 | cpu = task_cpu(p); | ||
2347 | if (task_curr(p)) | ||
2348 | smp_call_function_single(cpu, func, info, 1); | ||
2349 | preempt_enable(); | ||
2350 | } | ||
2351 | |||
2327 | /*** | 2352 | /*** |
2328 | * try_to_wake_up - wake up a thread | 2353 | * try_to_wake_up - wake up a thread |
2329 | * @p: the to-be-woken-up thread | 2354 | * @p: the to-be-woken-up thread |
@@ -2480,6 +2505,7 @@ static void __sched_fork(struct task_struct *p) | |||
2480 | p->se.exec_start = 0; | 2505 | p->se.exec_start = 0; |
2481 | p->se.sum_exec_runtime = 0; | 2506 | p->se.sum_exec_runtime = 0; |
2482 | p->se.prev_sum_exec_runtime = 0; | 2507 | p->se.prev_sum_exec_runtime = 0; |
2508 | p->se.nr_migrations = 0; | ||
2483 | p->se.last_wakeup = 0; | 2509 | p->se.last_wakeup = 0; |
2484 | p->se.avg_overlap = 0; | 2510 | p->se.avg_overlap = 0; |
2485 | p->se.start_runtime = 0; | 2511 | p->se.start_runtime = 0; |
@@ -2710,6 +2736,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2710 | */ | 2736 | */ |
2711 | prev_state = prev->state; | 2737 | prev_state = prev->state; |
2712 | finish_arch_switch(prev); | 2738 | finish_arch_switch(prev); |
2739 | perf_counter_task_sched_in(current, cpu_of(rq)); | ||
2713 | finish_lock_switch(rq, prev); | 2740 | finish_lock_switch(rq, prev); |
2714 | #ifdef CONFIG_SMP | 2741 | #ifdef CONFIG_SMP |
2715 | if (post_schedule) | 2742 | if (post_schedule) |
@@ -2872,6 +2899,15 @@ unsigned long nr_active(void) | |||
2872 | } | 2899 | } |
2873 | 2900 | ||
2874 | /* | 2901 | /* |
2902 | * Externally visible per-cpu scheduler statistics: | ||
2903 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
2904 | */ | ||
2905 | u64 cpu_nr_migrations(int cpu) | ||
2906 | { | ||
2907 | return cpu_rq(cpu)->nr_migrations_in; | ||
2908 | } | ||
2909 | |||
2910 | /* | ||
2875 | * Update rq->cpu_load[] statistics. This function is usually called every | 2911 | * Update rq->cpu_load[] statistics. This function is usually called every |
2876 | * scheduler tick (TICK_NSEC). | 2912 | * scheduler tick (TICK_NSEC). |
2877 | */ | 2913 | */ |
@@ -4838,6 +4874,7 @@ void scheduler_tick(void) | |||
4838 | update_rq_clock(rq); | 4874 | update_rq_clock(rq); |
4839 | update_cpu_load(rq); | 4875 | update_cpu_load(rq); |
4840 | curr->sched_class->task_tick(rq, curr, 0); | 4876 | curr->sched_class->task_tick(rq, curr, 0); |
4877 | perf_counter_task_tick(curr, cpu); | ||
4841 | spin_unlock(&rq->lock); | 4878 | spin_unlock(&rq->lock); |
4842 | 4879 | ||
4843 | #ifdef CONFIG_SMP | 4880 | #ifdef CONFIG_SMP |
@@ -5053,6 +5090,7 @@ need_resched_nonpreemptible: | |||
5053 | 5090 | ||
5054 | if (likely(prev != next)) { | 5091 | if (likely(prev != next)) { |
5055 | sched_info_switch(prev, next); | 5092 | sched_info_switch(prev, next); |
5093 | perf_counter_task_sched_out(prev, cpu); | ||
5056 | 5094 | ||
5057 | rq->nr_switches++; | 5095 | rq->nr_switches++; |
5058 | rq->curr = next; | 5096 | rq->curr = next; |
diff --git a/kernel/sys.c b/kernel/sys.c index e7998cf31498..438d99a38c87 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/prctl.h> | 14 | #include <linux/prctl.h> |
15 | #include <linux/highuid.h> | 15 | #include <linux/highuid.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/perf_counter.h> | ||
17 | #include <linux/resource.h> | 18 | #include <linux/resource.h> |
18 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
19 | #include <linux/kexec.h> | 20 | #include <linux/kexec.h> |
@@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1793 | case PR_SET_TSC: | 1794 | case PR_SET_TSC: |
1794 | error = SET_TSC_CTL(arg2); | 1795 | error = SET_TSC_CTL(arg2); |
1795 | break; | 1796 | break; |
1797 | case PR_TASK_PERF_COUNTERS_DISABLE: | ||
1798 | error = perf_counter_task_disable(); | ||
1799 | break; | ||
1800 | case PR_TASK_PERF_COUNTERS_ENABLE: | ||
1801 | error = perf_counter_task_enable(); | ||
1802 | break; | ||
1796 | case PR_GET_TIMERSLACK: | 1803 | case PR_GET_TIMERSLACK: |
1797 | error = current->timer_slack_ns; | 1804 | error = current->timer_slack_ns; |
1798 | break; | 1805 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 27dad2967387..68320f6b07b5 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime); | |||
175 | cond_syscall(compat_sys_timerfd_gettime); | 175 | cond_syscall(compat_sys_timerfd_gettime); |
176 | cond_syscall(sys_eventfd); | 176 | cond_syscall(sys_eventfd); |
177 | cond_syscall(sys_eventfd2); | 177 | cond_syscall(sys_eventfd2); |
178 | |||
179 | /* performance counters: */ | ||
180 | cond_syscall(sys_perf_counter_open); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e3d2c7dd59b9..8203d70928d5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/reboot.h> | 49 | #include <linux/reboot.h> |
50 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
51 | #include <linux/slow-work.h> | 51 | #include <linux/slow-work.h> |
52 | #include <linux/perf_counter.h> | ||
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
54 | #include <asm/processor.h> | 55 | #include <asm/processor.h> |
@@ -910,6 +911,16 @@ static struct ctl_table kern_table[] = { | |||
910 | .child = slow_work_sysctls, | 911 | .child = slow_work_sysctls, |
911 | }, | 912 | }, |
912 | #endif | 913 | #endif |
914 | #ifdef CONFIG_PERF_COUNTERS | ||
915 | { | ||
916 | .ctl_name = CTL_UNNUMBERED, | ||
917 | .procname = "perf_counter_privileged", | ||
918 | .data = &sysctl_perf_counter_priv, | ||
919 | .maxlen = sizeof(sysctl_perf_counter_priv), | ||
920 | .mode = 0644, | ||
921 | .proc_handler = &proc_dointvec, | ||
922 | }, | ||
923 | #endif | ||
913 | /* | 924 | /* |
914 | * NOTE: do not add new entries to this table unless you have read | 925 | * NOTE: do not add new entries to this table unless you have read |
915 | * Documentation/sysctl/ctl_unnumbered.txt | 926 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/timer.c b/kernel/timer.c index cffffad01c31..fed53be44fd9 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_counter.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <asm/unistd.h> | 43 | #include <asm/unistd.h> |
@@ -1170,6 +1171,8 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1170 | { | 1171 | { |
1171 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1172 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1172 | 1173 | ||
1174 | perf_counter_do_pending(); | ||
1175 | |||
1173 | hrtimer_run_pending(); | 1176 | hrtimer_run_pending(); |
1174 | 1177 | ||
1175 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1178 | if (time_after_eq(jiffies, base->timer_jiffies)) |
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/mempolicy.h> | 28 | #include <linux/mempolicy.h> |
29 | #include <linux/rmap.h> | 29 | #include <linux/rmap.h> |
30 | #include <linux/mmu_notifier.h> | 30 | #include <linux/mmu_notifier.h> |
31 | #include <linux/perf_counter.h> | ||
31 | 32 | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/cacheflush.h> | 34 | #include <asm/cacheflush.h> |
@@ -1223,6 +1224,9 @@ munmap_back: | |||
1223 | if (correct_wcount) | 1224 | if (correct_wcount) |
1224 | atomic_inc(&inode->i_writecount); | 1225 | atomic_inc(&inode->i_writecount); |
1225 | out: | 1226 | out: |
1227 | if (vm_flags & VM_EXEC) | ||
1228 | perf_counter_mmap(addr, len, pgoff, file); | ||
1229 | |||
1226 | mm->total_vm += len >> PAGE_SHIFT; | 1230 | mm->total_vm += len >> PAGE_SHIFT; |
1227 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1231 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); |
1228 | if (vm_flags & VM_LOCKED) { | 1232 | if (vm_flags & VM_LOCKED) { |
@@ -1756,6 +1760,12 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) | |||
1756 | do { | 1760 | do { |
1757 | long nrpages = vma_pages(vma); | 1761 | long nrpages = vma_pages(vma); |
1758 | 1762 | ||
1763 | if (vma->vm_flags & VM_EXEC) { | ||
1764 | perf_counter_munmap(vma->vm_start, | ||
1765 | nrpages << PAGE_SHIFT, | ||
1766 | vma->vm_pgoff, vma->vm_file); | ||
1767 | } | ||
1768 | |||
1759 | mm->total_vm -= nrpages; | 1769 | mm->total_vm -= nrpages; |
1760 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); | 1770 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); |
1761 | vma = remove_vma(vma); | 1771 | vma = remove_vma(vma); |