aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab109
-rw-r--r--Documentation/lockstat.txt12
-rw-r--r--MAINTAINERS13
-rw-r--r--arch/arm/mach-msm/Kconfig30
-rw-r--r--arch/arm/mach-msm/Makefile1
-rw-r--r--arch/arm/mach-msm/board-dream.c93
-rw-r--r--arch/arm/mach-msm/board-dream.h5
-rw-r--r--arch/arm/mach-msm/include/mach/debug-macro.S24
-rw-r--r--arch/arm/mach-msm/include/mach/mmc.h26
-rw-r--r--arch/arm/mach-msm/include/mach/msm_iomap.h12
-rw-r--r--arch/arm/mach-msm/include/mach/uncompress.h7
-rw-r--r--arch/arm/mach-msm/io.c3
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h4
-rw-r--r--arch/x86/include/asm/system.h1
-rw-r--r--arch/x86/kernel/amd_iommu.c46
-rw-r--r--arch/x86/kernel/amd_iommu_init.c9
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.c31
-rw-r--r--arch/x86/kernel/dumpstack_64.c33
-rw-r--r--arch/x86/kernel/entry_64.S6
-rw-r--r--arch/x86/kernel/hw_breakpoint.c5
-rw-r--r--arch/x86/kernel/microcode_amd.c40
-rw-r--r--arch/x86/kernel/microcode_core.c26
-rw-r--r--arch/x86/kernel/microcode_intel.c47
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c14
-rw-r--r--arch/x86/kernel/process_64.c16
-rw-r--r--arch/x86/kernel/ptrace.c68
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup_percpu.c13
-rw-r--r--arch/x86/kvm/i8254.c12
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/mm/kmmio.c42
-rw-r--r--arch/x86/mm/mmio-mod.c71
-rw-r--r--arch/x86/tools/test_get_len.c2
-rw-r--r--drivers/char/lp.c115
-rw-r--r--drivers/mmc/host/msm_sdcc.c5
-rw-r--r--drivers/usb/core/devio.c110
-rw-r--r--drivers/video/Kconfig2
-rw-r--r--fs/compat_ioctl.c767
-rw-r--r--include/asm-generic/gpio.h3
-rw-r--r--include/asm-generic/unistd.h10
-rw-r--r--include/linux/cpumask.h2
-rw-r--r--include/linux/ftrace_event.h1
-rw-r--r--include/linux/hrtimer.h56
-rw-r--r--include/linux/hw_breakpoint.h40
-rw-r--r--include/linux/perf_event.h41
-rw-r--r--include/linux/sched.h23
-rw-r--r--include/linux/trace_seq.h7
-rw-r--r--include/linux/usbdevice_fs.h26
-rw-r--r--include/trace/events/timer.h8
-rw-r--r--kernel/cpu.c18
-rw-r--r--kernel/cpuset.c18
-rw-r--r--kernel/futex.c10
-rw-r--r--kernel/hrtimer.c121
-rw-r--r--kernel/hw_breakpoint.c146
-rw-r--r--kernel/lockdep.c16
-rw-r--r--kernel/perf_event.c75
-rw-r--r--kernel/sched.c218
-rw-r--r--kernel/sched_debug.c13
-rw-r--r--kernel/sched_fair.c155
-rw-r--r--kernel/sched_features.h5
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/sysctl.c30
-rw-r--r--kernel/time/timer_list.c5
-rw-r--r--kernel/trace/trace.c57
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_functions_graph.c165
-rw-r--r--kernel/trace/trace_kprobe.c41
-rw-r--r--kernel/trace/trace_ksym.c5
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--lib/checksum.c14
-rw-r--r--mm/slab.c118
-rw-r--r--mm/slub.c20
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c7
-rw-r--r--tools/perf/Documentation/perf-kmem.txt13
-rw-r--r--tools/perf/Documentation/perf-probe.txt21
-rw-r--r--tools/perf/builtin-buildid-list.c2
-rw-r--r--tools/perf/builtin-kmem.c123
-rw-r--r--tools/perf/builtin-probe.c80
-rw-r--r--tools/perf/builtin-report.c39
-rw-r--r--tools/perf/builtin-sched.c212
-rw-r--r--tools/perf/builtin-timechart.c54
-rw-r--r--tools/perf/builtin-trace.c48
-rw-r--r--tools/perf/util/data_map.c4
-rw-r--r--tools/perf/util/data_map.h2
-rw-r--r--tools/perf/util/event.c67
-rw-r--r--tools/perf/util/event.h17
-rw-r--r--tools/perf/util/header.c9
-rw-r--r--tools/perf/util/parse-events.c17
-rw-r--r--tools/perf/util/parse-options.c3
-rw-r--r--tools/perf/util/probe-event.c133
-rw-r--r--tools/perf/util/probe-event.h1
-rw-r--r--tools/perf/util/probe-finder.c2
-rw-r--r--tools/perf/util/symbol.c5
-rw-r--r--tools/perf/util/trace-event-parse.c4
-rw-r--r--tools/perf/util/trace-event-perl.c67
-rw-r--r--tools/perf/util/trace-event-perl.h4
-rw-r--r--tools/perf/util/trace-event-read.c3
103 files changed, 2532 insertions, 1829 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 6dcf75e594fb..8b093f8222d3 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -45,8 +45,9 @@ KernelVersion: 2.6.25
45Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 45Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
46 Christoph Lameter <cl@linux-foundation.org> 46 Christoph Lameter <cl@linux-foundation.org>
47Description: 47Description:
48 The alloc_fastpath file is read-only and specifies how many 48 The alloc_fastpath file shows how many objects have been
49 objects have been allocated using the fast path. 49 allocated using the fast path. It can be written to clear the
50 current count.
50 Available when CONFIG_SLUB_STATS is enabled. 51 Available when CONFIG_SLUB_STATS is enabled.
51 52
52What: /sys/kernel/slab/cache/alloc_from_partial 53What: /sys/kernel/slab/cache/alloc_from_partial
@@ -55,9 +56,10 @@ KernelVersion: 2.6.25
55Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 56Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
56 Christoph Lameter <cl@linux-foundation.org> 57 Christoph Lameter <cl@linux-foundation.org>
57Description: 58Description:
58 The alloc_from_partial file is read-only and specifies how 59 The alloc_from_partial file shows how many times a cpu slab has
59 many times a cpu slab has been full and it has been refilled 60 been full and it has been refilled by using a slab from the list
60 by using a slab from the list of partially used slabs. 61 of partially used slabs. It can be written to clear the current
62 count.
61 Available when CONFIG_SLUB_STATS is enabled. 63 Available when CONFIG_SLUB_STATS is enabled.
62 64
63What: /sys/kernel/slab/cache/alloc_refill 65What: /sys/kernel/slab/cache/alloc_refill
@@ -66,9 +68,9 @@ KernelVersion: 2.6.25
66Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 68Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
67 Christoph Lameter <cl@linux-foundation.org> 69 Christoph Lameter <cl@linux-foundation.org>
68Description: 70Description:
69 The alloc_refill file is read-only and specifies how many 71 The alloc_refill file shows how many times the per-cpu freelist
70 times the per-cpu freelist was empty but there were objects 72 was empty but there were objects available as the result of
71 available as the result of remote cpu frees. 73 remote cpu frees. It can be written to clear the current count.
72 Available when CONFIG_SLUB_STATS is enabled. 74 Available when CONFIG_SLUB_STATS is enabled.
73 75
74What: /sys/kernel/slab/cache/alloc_slab 76What: /sys/kernel/slab/cache/alloc_slab
@@ -77,8 +79,9 @@ KernelVersion: 2.6.25
77Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 79Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
78 Christoph Lameter <cl@linux-foundation.org> 80 Christoph Lameter <cl@linux-foundation.org>
79Description: 81Description:
80 The alloc_slab file is read-only and specifies how many times 82 The alloc_slab file is shows how many times a new slab had to
81 a new slab had to be allocated from the page allocator. 83 be allocated from the page allocator. It can be written to
84 clear the current count.
82 Available when CONFIG_SLUB_STATS is enabled. 85 Available when CONFIG_SLUB_STATS is enabled.
83 86
84What: /sys/kernel/slab/cache/alloc_slowpath 87What: /sys/kernel/slab/cache/alloc_slowpath
@@ -87,9 +90,10 @@ KernelVersion: 2.6.25
87Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 90Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
88 Christoph Lameter <cl@linux-foundation.org> 91 Christoph Lameter <cl@linux-foundation.org>
89Description: 92Description:
90 The alloc_slowpath file is read-only and specifies how many 93 The alloc_slowpath file shows how many objects have been
91 objects have been allocated using the slow path because of a 94 allocated using the slow path because of a refill or
92 refill or allocation from a partial or new slab. 95 allocation from a partial or new slab. It can be written to
96 clear the current count.
93 Available when CONFIG_SLUB_STATS is enabled. 97 Available when CONFIG_SLUB_STATS is enabled.
94 98
95What: /sys/kernel/slab/cache/cache_dma 99What: /sys/kernel/slab/cache/cache_dma
@@ -117,10 +121,11 @@ KernelVersion: 2.6.31
117Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 121Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
118 Christoph Lameter <cl@linux-foundation.org> 122 Christoph Lameter <cl@linux-foundation.org>
119Description: 123Description:
120 The file cpuslab_flush is read-only and specifies how many 124 The file cpuslab_flush shows how many times a cache's cpu slabs
121 times a cache's cpu slabs have been flushed as the result of 125 have been flushed as the result of destroying or shrinking a
122 destroying or shrinking a cache, a cpu going offline, or as 126 cache, a cpu going offline, or as the result of forcing an
123 the result of forcing an allocation from a certain node. 127 allocation from a certain node. It can be written to clear the
128 current count.
124 Available when CONFIG_SLUB_STATS is enabled. 129 Available when CONFIG_SLUB_STATS is enabled.
125 130
126What: /sys/kernel/slab/cache/ctor 131What: /sys/kernel/slab/cache/ctor
@@ -139,8 +144,8 @@ KernelVersion: 2.6.25
139Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 144Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
140 Christoph Lameter <cl@linux-foundation.org> 145 Christoph Lameter <cl@linux-foundation.org>
141Description: 146Description:
142 The file deactivate_empty is read-only and specifies how many 147 The deactivate_empty file shows how many times an empty cpu slab
143 times an empty cpu slab was deactivated. 148 was deactivated. It can be written to clear the current count.
144 Available when CONFIG_SLUB_STATS is enabled. 149 Available when CONFIG_SLUB_STATS is enabled.
145 150
146What: /sys/kernel/slab/cache/deactivate_full 151What: /sys/kernel/slab/cache/deactivate_full
@@ -149,8 +154,8 @@ KernelVersion: 2.6.25
149Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 154Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
150 Christoph Lameter <cl@linux-foundation.org> 155 Christoph Lameter <cl@linux-foundation.org>
151Description: 156Description:
152 The file deactivate_full is read-only and specifies how many 157 The deactivate_full file shows how many times a full cpu slab
153 times a full cpu slab was deactivated. 158 was deactivated. It can be written to clear the current count.
154 Available when CONFIG_SLUB_STATS is enabled. 159 Available when CONFIG_SLUB_STATS is enabled.
155 160
156What: /sys/kernel/slab/cache/deactivate_remote_frees 161What: /sys/kernel/slab/cache/deactivate_remote_frees
@@ -159,9 +164,9 @@ KernelVersion: 2.6.25
159Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 164Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
160 Christoph Lameter <cl@linux-foundation.org> 165 Christoph Lameter <cl@linux-foundation.org>
161Description: 166Description:
162 The file deactivate_remote_frees is read-only and specifies how 167 The deactivate_remote_frees file shows how many times a cpu slab
163 many times a cpu slab has been deactivated and contained free 168 has been deactivated and contained free objects that were freed
164 objects that were freed remotely. 169 remotely. It can be written to clear the current count.
165 Available when CONFIG_SLUB_STATS is enabled. 170 Available when CONFIG_SLUB_STATS is enabled.
166 171
167What: /sys/kernel/slab/cache/deactivate_to_head 172What: /sys/kernel/slab/cache/deactivate_to_head
@@ -170,9 +175,9 @@ KernelVersion: 2.6.25
170Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 175Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
171 Christoph Lameter <cl@linux-foundation.org> 176 Christoph Lameter <cl@linux-foundation.org>
172Description: 177Description:
173 The file deactivate_to_head is read-only and specifies how 178 The deactivate_to_head file shows how many times a partial cpu
174 many times a partial cpu slab was deactivated and added to the 179 slab was deactivated and added to the head of its node's partial
175 head of its node's partial list. 180 list. It can be written to clear the current count.
176 Available when CONFIG_SLUB_STATS is enabled. 181 Available when CONFIG_SLUB_STATS is enabled.
177 182
178What: /sys/kernel/slab/cache/deactivate_to_tail 183What: /sys/kernel/slab/cache/deactivate_to_tail
@@ -181,9 +186,9 @@ KernelVersion: 2.6.25
181Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 186Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
182 Christoph Lameter <cl@linux-foundation.org> 187 Christoph Lameter <cl@linux-foundation.org>
183Description: 188Description:
184 The file deactivate_to_tail is read-only and specifies how 189 The deactivate_to_tail file shows how many times a partial cpu
185 many times a partial cpu slab was deactivated and added to the 190 slab was deactivated and added to the tail of its node's partial
186 tail of its node's partial list. 191 list. It can be written to clear the current count.
187 Available when CONFIG_SLUB_STATS is enabled. 192 Available when CONFIG_SLUB_STATS is enabled.
188 193
189What: /sys/kernel/slab/cache/destroy_by_rcu 194What: /sys/kernel/slab/cache/destroy_by_rcu
@@ -201,9 +206,9 @@ KernelVersion: 2.6.25
201Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 206Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
202 Christoph Lameter <cl@linux-foundation.org> 207 Christoph Lameter <cl@linux-foundation.org>
203Description: 208Description:
204 The file free_add_partial is read-only and specifies how many 209 The free_add_partial file shows how many times an object has
205 times an object has been freed in a full slab so that it had to 210 been freed in a full slab so that it had to added to its node's
206 added to its node's partial list. 211 partial list. It can be written to clear the current count.
207 Available when CONFIG_SLUB_STATS is enabled. 212 Available when CONFIG_SLUB_STATS is enabled.
208 213
209What: /sys/kernel/slab/cache/free_calls 214What: /sys/kernel/slab/cache/free_calls
@@ -222,9 +227,9 @@ KernelVersion: 2.6.25
222Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 227Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
223 Christoph Lameter <cl@linux-foundation.org> 228 Christoph Lameter <cl@linux-foundation.org>
224Description: 229Description:
225 The free_fastpath file is read-only and specifies how many 230 The free_fastpath file shows how many objects have been freed
226 objects have been freed using the fast path because it was an 231 using the fast path because it was an object from the cpu slab.
227 object from the cpu slab. 232 It can be written to clear the current count.
228 Available when CONFIG_SLUB_STATS is enabled. 233 Available when CONFIG_SLUB_STATS is enabled.
229 234
230What: /sys/kernel/slab/cache/free_frozen 235What: /sys/kernel/slab/cache/free_frozen
@@ -233,9 +238,9 @@ KernelVersion: 2.6.25
233Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 238Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
234 Christoph Lameter <cl@linux-foundation.org> 239 Christoph Lameter <cl@linux-foundation.org>
235Description: 240Description:
236 The free_frozen file is read-only and specifies how many 241 The free_frozen file shows how many objects have been freed to
237 objects have been freed to a frozen slab (i.e. a remote cpu 242 a frozen slab (i.e. a remote cpu slab). It can be written to
238 slab). 243 clear the current count.
239 Available when CONFIG_SLUB_STATS is enabled. 244 Available when CONFIG_SLUB_STATS is enabled.
240 245
241What: /sys/kernel/slab/cache/free_remove_partial 246What: /sys/kernel/slab/cache/free_remove_partial
@@ -244,9 +249,10 @@ KernelVersion: 2.6.25
244Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 249Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
245 Christoph Lameter <cl@linux-foundation.org> 250 Christoph Lameter <cl@linux-foundation.org>
246Description: 251Description:
247 The file free_remove_partial is read-only and specifies how 252 The free_remove_partial file shows how many times an object has
248 many times an object has been freed to a now-empty slab so 253 been freed to a now-empty slab so that it had to be removed from
249 that it had to be removed from its node's partial list. 254 its node's partial list. It can be written to clear the current
255 count.
250 Available when CONFIG_SLUB_STATS is enabled. 256 Available when CONFIG_SLUB_STATS is enabled.
251 257
252What: /sys/kernel/slab/cache/free_slab 258What: /sys/kernel/slab/cache/free_slab
@@ -255,8 +261,9 @@ KernelVersion: 2.6.25
255Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 261Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
256 Christoph Lameter <cl@linux-foundation.org> 262 Christoph Lameter <cl@linux-foundation.org>
257Description: 263Description:
258 The free_slab file is read-only and specifies how many times an 264 The free_slab file shows how many times an empty slab has been
259 empty slab has been freed back to the page allocator. 265 freed back to the page allocator. It can be written to clear
266 the current count.
260 Available when CONFIG_SLUB_STATS is enabled. 267 Available when CONFIG_SLUB_STATS is enabled.
261 268
262What: /sys/kernel/slab/cache/free_slowpath 269What: /sys/kernel/slab/cache/free_slowpath
@@ -265,9 +272,9 @@ KernelVersion: 2.6.25
265Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 272Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
266 Christoph Lameter <cl@linux-foundation.org> 273 Christoph Lameter <cl@linux-foundation.org>
267Description: 274Description:
268 The free_slowpath file is read-only and specifies how many 275 The free_slowpath file shows how many objects have been freed
269 objects have been freed using the slow path (i.e. to a full or 276 using the slow path (i.e. to a full or partial slab). It can
270 partial slab). 277 be written to clear the current count.
271 Available when CONFIG_SLUB_STATS is enabled. 278 Available when CONFIG_SLUB_STATS is enabled.
272 279
273What: /sys/kernel/slab/cache/hwcache_align 280What: /sys/kernel/slab/cache/hwcache_align
@@ -346,10 +353,10 @@ KernelVersion: 2.6.26
346Contact: Pekka Enberg <penberg@cs.helsinki.fi>, 353Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
347 Christoph Lameter <cl@linux-foundation.org> 354 Christoph Lameter <cl@linux-foundation.org>
348Description: 355Description:
349 The file order_fallback is read-only and specifies how many 356 The order_fallback file shows how many times an allocation of a
350 times an allocation of a new slab has not been possible at the 357 new slab has not been possible at the cache's order and instead
351 cache's order and instead fallen back to its minimum possible 358 fallen back to its minimum possible order. It can be written to
352 order. 359 clear the current count.
353 Available when CONFIG_SLUB_STATS is enabled. 360 Available when CONFIG_SLUB_STATS is enabled.
354 361
355What: /sys/kernel/slab/cache/partial 362What: /sys/kernel/slab/cache/partial
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 9cb9138f7a79..65f4c795015d 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -62,8 +62,20 @@ applicable).
62It also tracks 4 contention points per class. A contention point is a call site 62It also tracks 4 contention points per class. A contention point is a call site
63that had to wait on lock acquisition. 63that had to wait on lock acquisition.
64 64
65 - CONFIGURATION
66
67Lock statistics are enabled via CONFIG_LOCK_STATS.
68
65 - USAGE 69 - USAGE
66 70
71Enable collection of statistics:
72
73# echo 1 >/proc/sys/kernel/lock_stat
74
75Disable collection of statistics:
76
77# echo 0 >/proc/sys/kernel/lock_stat
78
67Look at the current lock statistics: 79Look at the current lock statistics:
68 80
69( line numbers not part of actual output, done for clarity in the explanation 81( line numbers not part of actual output, done for clarity in the explanation
diff --git a/MAINTAINERS b/MAINTAINERS
index 93a074330782..d58fa703ec16 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -801,6 +801,19 @@ L: openmoko-kernel@lists.openmoko.org (subscribers-only)
801W: http://wiki.openmoko.org/wiki/Neo_FreeRunner 801W: http://wiki.openmoko.org/wiki/Neo_FreeRunner
802S: Supported 802S: Supported
803 803
804ARM/QUALCOMM MSM MACHINE SUPPORT
805M: David Brown <davidb@codeaurora.org>
806M: Daniel Walker <dwalker@codeaurora.org>
807M: Bryan Huntsman <bryanh@codeaurora.org>
808F: arch/arm/mach-msm/
809F: drivers/video/msm/
810F: drivers/mmc/host/msm_sdcc.c
811F: drivers/mmc/host/msm_sdcc.h
812F: drivers/serial/msm_serial.h
813F: drivers/serial/msm_serial.c
814T: git git://codeaurora.org/quic/kernel/dwalker/linux-msm.git
815S: Maintained
816
804ARM/TOSA MACHINE SUPPORT 817ARM/TOSA MACHINE SUPPORT
805M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> 818M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
806M: Dirk Opfer <dirk@opfer-online.de> 819M: Dirk Opfer <dirk@opfer-online.de>
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index d140abca690a..f780086befd7 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -3,6 +3,30 @@ if ARCH_MSM
3comment "MSM Board Type" 3comment "MSM Board Type"
4 depends on ARCH_MSM 4 depends on ARCH_MSM
5 5
6config MSM_DEBUG_UART
7 int
8 default 1 if MSM_DEBUG_UART1
9 default 2 if MSM_DEBUG_UART2
10 default 3 if MSM_DEBUG_UART3
11
12choice
13 prompt "Debug UART"
14
15 default MSM_DEBUG_UART_NONE
16
17 config MSM_DEBUG_UART_NONE
18 bool "None"
19
20 config MSM_DEBUG_UART1
21 bool "UART1"
22
23 config MSM_DEBUG_UART2
24 bool "UART2"
25
26 config MSM_DEBUG_UART3
27 bool "UART3"
28endchoice
29
6config MACH_HALIBUT 30config MACH_HALIBUT
7 depends on ARCH_MSM 31 depends on ARCH_MSM
8 default y 32 default y
@@ -10,4 +34,10 @@ config MACH_HALIBUT
10 help 34 help
11 Support for the Qualcomm SURF7201A eval board. 35 Support for the Qualcomm SURF7201A eval board.
12 36
37config MACH_TROUT
38 default y
39 bool "HTC Dream (aka trout)"
40 help
41 Support for the HTC Dream, T-Mobile G1, Android ADP1 devices.
42
13endif 43endif
diff --git a/arch/arm/mach-msm/Makefile b/arch/arm/mach-msm/Makefile
index 1aa47001aa3b..91e6f5c95dc1 100644
--- a/arch/arm/mach-msm/Makefile
+++ b/arch/arm/mach-msm/Makefile
@@ -6,3 +6,4 @@ obj-y += clock.o clock-7x01a.o
6 6
7obj-$(CONFIG_MACH_HALIBUT) += board-halibut.o 7obj-$(CONFIG_MACH_HALIBUT) += board-halibut.o
8 8
9obj-$(CONFIG_MACH_TROUT) += board-dream.o
diff --git a/arch/arm/mach-msm/board-dream.c b/arch/arm/mach-msm/board-dream.c
new file mode 100644
index 000000000000..21afa8513168
--- /dev/null
+++ b/arch/arm/mach-msm/board-dream.c
@@ -0,0 +1,93 @@
1/* linux/arch/arm/mach-msm/board-dream.c
2 *
3 * Copyright (C) 2009 Google, Inc.
4 * Author: Brian Swetland <swetland@google.com>
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#include <linux/kernel.h>
18#include <linux/init.h>
19#include <linux/platform_device.h>
20
21#include <asm/mach-types.h>
22#include <asm/mach/arch.h>
23#include <asm/mach/map.h>
24#include <asm/setup.h>
25
26#include <mach/board.h>
27#include <mach/hardware.h>
28#include <mach/msm_iomap.h>
29
30#include "devices.h"
31#include "board-dream.h"
32
33static struct platform_device *devices[] __initdata = {
34 &msm_device_uart3,
35 &msm_device_smd,
36 &msm_device_nand,
37 &msm_device_hsusb,
38 &msm_device_i2c,
39};
40
41extern struct sys_timer msm_timer;
42
43static void __init trout_init_irq(void)
44{
45 msm_init_irq();
46}
47
48static void __init trout_fixup(struct machine_desc *desc, struct tag *tags,
49 char **cmdline, struct meminfo *mi)
50{
51 mi->nr_banks = 1;
52 mi->bank[0].start = PHYS_OFFSET;
53 mi->bank[0].node = PHYS_TO_NID(PHYS_OFFSET);
54 mi->bank[0].size = (101*1024*1024);
55}
56
57static void __init trout_init(void)
58{
59 platform_add_devices(devices, ARRAY_SIZE(devices));
60}
61
62static struct map_desc trout_io_desc[] __initdata = {
63 {
64 .virtual = TROUT_CPLD_BASE,
65 .pfn = __phys_to_pfn(TROUT_CPLD_START),
66 .length = TROUT_CPLD_SIZE,
67 .type = MT_DEVICE_NONSHARED
68 }
69};
70
71static void __init trout_map_io(void)
72{
73 msm_map_common_io();
74 iotable_init(trout_io_desc, ARRAY_SIZE(trout_io_desc));
75
76#ifdef CONFIG_MSM_DEBUG_UART3
77 /* route UART3 to the "H2W" extended usb connector */
78 writeb(0x80, TROUT_CPLD_BASE + 0x00);
79#endif
80
81 msm_clock_init();
82}
83
84MACHINE_START(TROUT, "HTC Dream")
85 .phys_io = MSM_DEBUG_UART_PHYS,
86 .io_pg_offst = ((MSM_DEBUG_UART_BASE) >> 18) & 0xfffc,
87 .boot_params = 0x10000100,
88 .fixup = trout_fixup,
89 .map_io = trout_map_io,
90 .init_irq = trout_init_irq,
91 .init_machine = trout_init,
92 .timer = &msm_timer,
93MACHINE_END
diff --git a/arch/arm/mach-msm/board-dream.h b/arch/arm/mach-msm/board-dream.h
new file mode 100644
index 000000000000..4f345a5a0a61
--- /dev/null
+++ b/arch/arm/mach-msm/board-dream.h
@@ -0,0 +1,5 @@
1
2#define TROUT_CPLD_BASE 0xE8100000
3#define TROUT_CPLD_START 0x98000000
4#define TROUT_CPLD_SIZE SZ_4K
5
diff --git a/arch/arm/mach-msm/include/mach/debug-macro.S b/arch/arm/mach-msm/include/mach/debug-macro.S
index 1db3c97dbc49..d48747ebcd3d 100644
--- a/arch/arm/mach-msm/include/mach/debug-macro.S
+++ b/arch/arm/mach-msm/include/mach/debug-macro.S
@@ -14,15 +14,18 @@
14 * 14 *
15 */ 15 */
16 16
17
18
17#include <mach/hardware.h> 19#include <mach/hardware.h>
18#include <mach/msm_iomap.h> 20#include <mach/msm_iomap.h>
19 21
22#ifdef CONFIG_MSM_DEBUG_UART
20 .macro addruart,rx 23 .macro addruart,rx
21 @ see if the MMU is enabled and select appropriate base address 24 @ see if the MMU is enabled and select appropriate base address
22 mrc p15, 0, \rx, c1, c0 25 mrc p15, 0, \rx, c1, c0
23 tst \rx, #1 26 tst \rx, #1
24 ldreq \rx, =MSM_UART1_PHYS 27 ldreq \rx, =MSM_DEBUG_UART_PHYS
25 movne \rx, #0 28 ldrne \rx, =MSM_DEBUG_UART_BASE
26 .endm 29 .endm
27 30
28 .macro senduart,rd,rx 31 .macro senduart,rd,rx
@@ -32,13 +35,20 @@
32 35
33 .macro waituart,rd,rx 36 .macro waituart,rd,rx
34 @ wait for TX_READY 37 @ wait for TX_READY
35 teq \rx, #0 381001: ldr \rd, [\rx, #0x08]
36 bne 2f
371: ldr \rd, [\rx, #0x08]
38 tst \rd, #0x04 39 tst \rd, #0x04
39 beq 1b 40 beq 1001b
402: 41 .endm
42#else
43 .macro addruart,rx
44 .endm
45
46 .macro senduart,rd,rx
47 .endm
48
49 .macro waituart,rd,rx
41 .endm 50 .endm
51#endif
42 52
43 .macro busyuart,rd,rx 53 .macro busyuart,rd,rx
44 .endm 54 .endm
diff --git a/arch/arm/mach-msm/include/mach/mmc.h b/arch/arm/mach-msm/include/mach/mmc.h
new file mode 100644
index 000000000000..0ecf25426284
--- /dev/null
+++ b/arch/arm/mach-msm/include/mach/mmc.h
@@ -0,0 +1,26 @@
1/*
2 * arch/arm/include/asm/mach/mmc.h
3 */
4#ifndef ASMARM_MACH_MMC_H
5#define ASMARM_MACH_MMC_H
6
7#include <linux/mmc/host.h>
8#include <linux/mmc/card.h>
9#include <linux/mmc/sdio_func.h>
10
11struct embedded_sdio_data {
12 struct sdio_cis cis;
13 struct sdio_cccr cccr;
14 struct sdio_embedded_func *funcs;
15 int num_funcs;
16};
17
18struct mmc_platform_data {
19 unsigned int ocr_mask; /* available voltages */
20 u32 (*translate_vdd)(struct device *, unsigned int);
21 unsigned int (*status)(struct device *);
22 struct embedded_sdio_data *embedded_sdio;
23 int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id);
24};
25
26#endif
diff --git a/arch/arm/mach-msm/include/mach/msm_iomap.h b/arch/arm/mach-msm/include/mach/msm_iomap.h
index 2f7b4c8620d9..9dae1a98c77a 100644
--- a/arch/arm/mach-msm/include/mach/msm_iomap.h
+++ b/arch/arm/mach-msm/include/mach/msm_iomap.h
@@ -84,6 +84,18 @@
84#define MSM_UART3_PHYS 0xA9C00000 84#define MSM_UART3_PHYS 0xA9C00000
85#define MSM_UART3_SIZE SZ_4K 85#define MSM_UART3_SIZE SZ_4K
86 86
87#ifdef CONFIG_MSM_DEBUG_UART
88#define MSM_DEBUG_UART_BASE 0xE1000000
89#if CONFIG_MSM_DEBUG_UART == 1
90#define MSM_DEBUG_UART_PHYS MSM_UART1_PHYS
91#elif CONFIG_MSM_DEBUG_UART == 2
92#define MSM_DEBUG_UART_PHYS MSM_UART2_PHYS
93#elif CONFIG_MSM_DEBUG_UART == 3
94#define MSM_DEBUG_UART_PHYS MSM_UART3_PHYS
95#endif
96#define MSM_DEBUG_UART_SIZE SZ_4K
97#endif
98
87#define MSM_SDC1_PHYS 0xA0400000 99#define MSM_SDC1_PHYS 0xA0400000
88#define MSM_SDC1_SIZE SZ_4K 100#define MSM_SDC1_SIZE SZ_4K
89 101
diff --git a/arch/arm/mach-msm/include/mach/uncompress.h b/arch/arm/mach-msm/include/mach/uncompress.h
index 026e8955ace9..d94292c29d8e 100644
--- a/arch/arm/mach-msm/include/mach/uncompress.h
+++ b/arch/arm/mach-msm/include/mach/uncompress.h
@@ -16,9 +16,16 @@
16#ifndef __ASM_ARCH_MSM_UNCOMPRESS_H 16#ifndef __ASM_ARCH_MSM_UNCOMPRESS_H
17 17
18#include "hardware.h" 18#include "hardware.h"
19#include "linux/io.h"
20#include "mach/msm_iomap.h"
19 21
20static void putc(int c) 22static void putc(int c)
21{ 23{
24#if defined(MSM_DEBUG_UART_PHYS)
25 unsigned base = MSM_DEBUG_UART_PHYS;
26 while (!(readl(base + 0x08) & 0x04)) ;
27 writel(c, base + 0x0c);
28#endif
22} 29}
23 30
24static inline void flush(void) 31static inline void flush(void)
diff --git a/arch/arm/mach-msm/io.c b/arch/arm/mach-msm/io.c
index 6e7692ff6f2c..1c5e7dac086f 100644
--- a/arch/arm/mach-msm/io.c
+++ b/arch/arm/mach-msm/io.c
@@ -42,6 +42,9 @@ static struct map_desc msm_io_desc[] __initdata = {
42 MSM_DEVICE(GPIO1), 42 MSM_DEVICE(GPIO1),
43 MSM_DEVICE(GPIO2), 43 MSM_DEVICE(GPIO2),
44 MSM_DEVICE(CLK_CTL), 44 MSM_DEVICE(CLK_CTL),
45#ifdef CONFIG_MSM_DEBUG_UART
46 MSM_DEVICE(DEBUG_UART),
47#endif
45 { 48 {
46 .virtual = (unsigned long) MSM_SHARED_RAM_BASE, 49 .virtual = (unsigned long) MSM_SHARED_RAM_BASE,
47 .pfn = __phys_to_pfn(MSM_SHARED_RAM_PHYS), 50 .pfn = __phys_to_pfn(MSM_SHARED_RAM_PHYS),
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 731318e5ac1d..bc01e3ebfeb2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT
187 def_bool y 187 def_bool y
188 188
189config X86_DECODER_SELFTEST 189config X86_DECODER_SELFTEST
190 bool "x86 instruction decoder selftest" 190 bool "x86 instruction decoder selftest"
191 depends on DEBUG_KERNEL 191 depends on DEBUG_KERNEL && KPROBES
192 ---help--- 192 ---help---
193 Perform x86 instruction decoder selftests at build time. 193 Perform x86 instruction decoder selftests at build time.
194 This option is useful for checking the sanity of x86 instruction 194 This option is useful for checking the sanity of x86 instruction
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 84786fb9a23b..4d817f9e6e77 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -28,7 +28,9 @@ extern void amd_iommu_flush_all_domains(void);
28extern void amd_iommu_flush_all_devices(void); 28extern void amd_iommu_flush_all_devices(void);
29extern void amd_iommu_apply_erratum_63(u16 devid); 29extern void amd_iommu_apply_erratum_63(u16 devid);
30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); 30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
31 31extern int amd_iommu_init_devices(void);
32extern void amd_iommu_uninit_devices(void);
33extern void amd_iommu_init_notifier(void);
32#ifndef CONFIG_AMD_IOMMU_STATS 34#ifndef CONFIG_AMD_IOMMU_STATS
33 35
34static inline void amd_iommu_stats_init(void) { } 36static inline void amd_iommu_stats_init(void) { }
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 022a84386de8..ecb544e65382 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
23struct tss_struct; 23struct tss_struct;
24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
25 struct tss_struct *tss); 25 struct tss_struct *tss);
26extern void show_regs_common(void);
26 27
27#ifdef CONFIG_X86_32 28#ifdef CONFIG_X86_32
28 29
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1c0fb4d4ad55..b990b5cc9541 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev)
166{ 166{
167 kfree(dev->archdata.iommu); 167 kfree(dev->archdata.iommu);
168} 168}
169
170void __init amd_iommu_uninit_devices(void)
171{
172 struct pci_dev *pdev = NULL;
173
174 for_each_pci_dev(pdev) {
175
176 if (!check_device(&pdev->dev))
177 continue;
178
179 iommu_uninit_device(&pdev->dev);
180 }
181}
182
183int __init amd_iommu_init_devices(void)
184{
185 struct pci_dev *pdev = NULL;
186 int ret = 0;
187
188 for_each_pci_dev(pdev) {
189
190 if (!check_device(&pdev->dev))
191 continue;
192
193 ret = iommu_init_device(&pdev->dev);
194 if (ret)
195 goto out_free;
196 }
197
198 return 0;
199
200out_free:
201
202 amd_iommu_uninit_devices();
203
204 return ret;
205}
169#ifdef CONFIG_AMD_IOMMU_STATS 206#ifdef CONFIG_AMD_IOMMU_STATS
170 207
171/* 208/*
@@ -1587,6 +1624,11 @@ static struct notifier_block device_nb = {
1587 .notifier_call = device_change_notifier, 1624 .notifier_call = device_change_notifier,
1588}; 1625};
1589 1626
1627void amd_iommu_init_notifier(void)
1628{
1629 bus_register_notifier(&pci_bus_type, &device_nb);
1630}
1631
1590/***************************************************************************** 1632/*****************************************************************************
1591 * 1633 *
1592 * The next functions belong to the dma_ops mapping/unmapping code. 1634 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -2145,8 +2187,6 @@ static void prealloc_protection_domains(void)
2145 if (!check_device(&dev->dev)) 2187 if (!check_device(&dev->dev))
2146 continue; 2188 continue;
2147 2189
2148 iommu_init_device(&dev->dev);
2149
2150 /* Is there already any domain for it? */ 2190 /* Is there already any domain for it? */
2151 if (domain_for_device(&dev->dev)) 2191 if (domain_for_device(&dev->dev))
2152 continue; 2192 continue;
@@ -2215,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void)
2215 2255
2216 register_iommu(&amd_iommu_ops); 2256 register_iommu(&amd_iommu_ops);
2217 2257
2218 bus_register_notifier(&pci_bus_type, &device_nb);
2219
2220 amd_iommu_stats_init(); 2258 amd_iommu_stats_init();
2221 2259
2222 return 0; 2260 return 0;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 9c4a6f747552..1dca9c34eaeb 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1274,6 +1274,10 @@ static int __init amd_iommu_init(void)
1274 if (ret) 1274 if (ret)
1275 goto free; 1275 goto free;
1276 1276
1277 ret = amd_iommu_init_devices();
1278 if (ret)
1279 goto free;
1280
1277 if (iommu_pass_through) 1281 if (iommu_pass_through)
1278 ret = amd_iommu_init_passthrough(); 1282 ret = amd_iommu_init_passthrough();
1279 else 1283 else
@@ -1281,6 +1285,8 @@ static int __init amd_iommu_init(void)
1281 if (ret) 1285 if (ret)
1282 goto free; 1286 goto free;
1283 1287
1288 amd_iommu_init_notifier();
1289
1284 enable_iommus(); 1290 enable_iommus();
1285 1291
1286 if (iommu_pass_through) 1292 if (iommu_pass_through)
@@ -1296,6 +1302,9 @@ out:
1296 return ret; 1302 return ret;
1297 1303
1298free: 1304free:
1305
1306 amd_iommu_uninit_devices();
1307
1299 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1308 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1300 get_order(MAX_DOMAIN_ID/8)); 1309 get_order(MAX_DOMAIN_ID/8));
1301 1310
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index d9acc3bee0f4..e31b9ffe25f5 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -127,7 +127,7 @@ static u32 noop_apic_read(u32 reg)
127 127
128static void noop_apic_write(u32 reg, u32 v) 128static void noop_apic_write(u32 reg, u32 v)
129{ 129{
130 WARN_ON_ONCE((cpu_has_apic || !disable_apic)); 130 WARN_ON_ONCE(cpu_has_apic && !disable_apic);
131} 131}
132 132
133struct apic apic_noop = { 133struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e85f8fb7f8e7..dd2b5f264643 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -27,6 +27,9 @@
27 * 27 *
28 * http://www.unisys.com 28 * http://www.unisys.com
29 */ 29 */
30
31#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
32
30#include <linux/notifier.h> 33#include <linux/notifier.h>
31#include <linux/spinlock.h> 34#include <linux/spinlock.h>
32#include <linux/cpumask.h> 35#include <linux/cpumask.h>
@@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr)
223 mip_addr = val; 226 mip_addr = val;
224 mip = (struct mip_reg *)val; 227 mip = (struct mip_reg *)val;
225 mip_reg = __va(mip); 228 mip_reg = __va(mip);
226 pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", 229 pr_debug("host_reg = 0x%lx\n",
227 (unsigned long)host_reg); 230 (unsigned long)host_reg);
228 pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", 231 pr_debug("mip_reg = 0x%lx\n",
229 (unsigned long)mip_reg); 232 (unsigned long)mip_reg);
230 success++; 233 success++;
231 break; 234 break;
@@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void)
401 if (!es7000_plat) 404 if (!es7000_plat)
402 return; 405 return;
403 406
404 printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); 407 pr_info("Enabling APIC mode.\n");
405 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); 408 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
406 es7000_mip_reg.off_0x00 = MIP_SW_APIC; 409 es7000_mip_reg.off_0x00 = MIP_SW_APIC;
407 es7000_mip_reg.off_0x38 = MIP_VALID; 410 es7000_mip_reg.off_0x38 = MIP_VALID;
@@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void)
514{ 517{
515 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 518 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
516 519
517 printk(KERN_INFO 520 pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
518 "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
519 (apic_version[apic] == 0x14) ? 521 (apic_version[apic] == 0x14) ?
520 "Physical Cluster" : "Logical Cluster", 522 "Physical Cluster" : "Logical Cluster",
521 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); 523 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d7ebf25d10ed..a8aacd4b513c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1388,13 +1388,14 @@ static void __mcheck_cpu_init_timer(void)
1388 struct timer_list *t = &__get_cpu_var(mce_timer); 1388 struct timer_list *t = &__get_cpu_var(mce_timer);
1389 int *n = &__get_cpu_var(mce_next_interval); 1389 int *n = &__get_cpu_var(mce_next_interval);
1390 1390
1391 setup_timer(t, mce_start_timer, smp_processor_id());
1392
1391 if (mce_ignore_ce) 1393 if (mce_ignore_ce)
1392 return; 1394 return;
1393 1395
1394 *n = check_interval * HZ; 1396 *n = check_interval * HZ;
1395 if (!*n) 1397 if (!*n)
1396 return; 1398 return;
1397 setup_timer(t, mce_start_timer, smp_processor_id());
1398 t->expires = round_jiffies(jiffies + *n); 1399 t->expires = round_jiffies(jiffies + *n);
1399 add_timer_on(t, smp_processor_id()); 1400 add_timer_on(t, smp_processor_id());
1400} 1401}
@@ -1928,7 +1929,7 @@ error2:
1928 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); 1929 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1929error: 1930error:
1930 while (--i >= 0) 1931 while (--i >= 0)
1931 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); 1932 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1932 1933
1933 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1934 sysdev_unregister(&per_cpu(mce_dev, cpu));
1934 1935
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ab1a8a89b984..45506d5dd8df 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1632 1632
1633 data.period = event->hw.last_period; 1633 data.period = event->hw.last_period;
1634 data.addr = 0; 1634 data.addr = 0;
1635 data.raw = NULL;
1635 regs.ip = 0; 1636 regs.ip = 0;
1636 1637
1637 /* 1638 /*
@@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
1749 u64 val; 1750 u64 val;
1750 1751
1751 data.addr = 0; 1752 data.addr = 0;
1753 data.raw = NULL;
1752 1754
1753 cpuc = &__get_cpu_var(cpu_hw_events); 1755 cpuc = &__get_cpu_var(cpu_hw_events);
1754 1756
@@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1794 u64 ack, status; 1796 u64 ack, status;
1795 1797
1796 data.addr = 0; 1798 data.addr = 0;
1799 data.raw = NULL;
1797 1800
1798 cpuc = &__get_cpu_var(cpu_hw_events); 1801 cpuc = &__get_cpu_var(cpu_hw_events);
1799 1802
@@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1857 u64 val; 1860 u64 val;
1858 1861
1859 data.addr = 0; 1862 data.addr = 0;
1863 data.raw = NULL;
1860 1864
1861 cpuc = &__get_cpu_var(cpu_hw_events); 1865 cpuc = &__get_cpu_var(cpu_hw_events);
1862 1866
@@ -2062,12 +2066,6 @@ static __init int p6_pmu_init(void)
2062 2066
2063 x86_pmu = p6_pmu; 2067 x86_pmu = p6_pmu;
2064 2068
2065 if (!cpu_has_apic) {
2066 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
2067 pr_info("no hardware sampling interrupt available.\n");
2068 x86_pmu.apic = 0;
2069 }
2070
2071 return 0; 2069 return 0;
2072} 2070}
2073 2071
@@ -2159,6 +2157,16 @@ static __init int amd_pmu_init(void)
2159 return 0; 2157 return 0;
2160} 2158}
2161 2159
2160static void __init pmu_check_apic(void)
2161{
2162 if (cpu_has_apic)
2163 return;
2164
2165 x86_pmu.apic = 0;
2166 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
2167 pr_info("no hardware sampling interrupt available.\n");
2168}
2169
2162void __init init_hw_perf_events(void) 2170void __init init_hw_perf_events(void)
2163{ 2171{
2164 int err; 2172 int err;
@@ -2180,6 +2188,8 @@ void __init init_hw_perf_events(void)
2180 return; 2188 return;
2181 } 2189 }
2182 2190
2191 pmu_check_apic();
2192
2183 pr_cont("%s PMU driver.\n", x86_pmu.name); 2193 pr_cont("%s PMU driver.\n", x86_pmu.name);
2184 2194
2185 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 2195 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
@@ -2287,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2287 2297
2288static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 2298static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2289static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 2299static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2290static DEFINE_PER_CPU(int, in_nmi_frame); 2300static DEFINE_PER_CPU(int, in_ignored_frame);
2291 2301
2292 2302
2293static void 2303static void
@@ -2303,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg)
2303 2313
2304static int backtrace_stack(void *data, char *name) 2314static int backtrace_stack(void *data, char *name)
2305{ 2315{
2306 per_cpu(in_nmi_frame, smp_processor_id()) = 2316 per_cpu(in_ignored_frame, smp_processor_id()) =
2307 x86_is_stack_id(NMI_STACK, name); 2317 x86_is_stack_id(NMI_STACK, name) ||
2318 x86_is_stack_id(DEBUG_STACK, name);
2308 2319
2309 return 0; 2320 return 0;
2310} 2321}
@@ -2313,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
2313{ 2324{
2314 struct perf_callchain_entry *entry = data; 2325 struct perf_callchain_entry *entry = data;
2315 2326
2316 if (per_cpu(in_nmi_frame, smp_processor_id())) 2327 if (per_cpu(in_ignored_frame, smp_processor_id()))
2317 return; 2328 return;
2318 2329
2319 if (reliable) 2330 if (reliable)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 8e740934bd1f..b13af53883aa 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -103,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
103 return NULL; 103 return NULL;
104} 104}
105 105
106static inline int
107in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
108 unsigned long *irq_stack_end)
109{
110 return (stack >= irq_stack && stack < irq_stack_end);
111}
112
113/*
114 * We are returning from the irq stack and go to the previous one.
115 * If the previous stack is also in the irq stack, then bp in the first
116 * frame of the irq stack points to the previous, interrupted one.
117 * Otherwise we have another level of indirection: We first save
118 * the bp of the previous stack, then we switch the stack to the irq one
119 * and save a new bp that links to the previous one.
120 * (See save_args())
121 */
122static inline unsigned long
123fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
124 unsigned long *irq_stack, unsigned long *irq_stack_end)
125{
126#ifdef CONFIG_FRAME_POINTER
127 struct stack_frame *frame = (struct stack_frame *)bp;
128
129 if (!in_irq_stack(stack, irq_stack, irq_stack_end))
130 return (unsigned long)frame->next_frame;
131#endif
132 return bp;
133}
134
106/* 135/*
107 * x86-64 can have up to three kernel stacks: 136 * x86-64 can have up to three kernel stacks:
108 * process stack 137 * process stack
@@ -175,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
175 irq_stack = irq_stack_end - 204 irq_stack = irq_stack_end -
176 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); 205 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
177 206
178 if (stack >= irq_stack && stack < irq_stack_end) { 207 if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
179 if (ops->stack(data, "IRQ") < 0) 208 if (ops->stack(data, "IRQ") < 0)
180 break; 209 break;
181 bp = print_context_stack(tinfo, stack, bp, 210 bp = print_context_stack(tinfo, stack, bp,
@@ -186,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
186 * pointer (index -1 to end) in the IRQ stack: 215 * pointer (index -1 to end) in the IRQ stack:
187 */ 216 */
188 stack = (unsigned long *) (irq_stack_end[-1]); 217 stack = (unsigned long *) (irq_stack_end[-1]);
218 bp = fixup_bp_irq_link(bp, stack, irq_stack,
219 irq_stack_end);
189 irq_stack_end = NULL; 220 irq_stack_end = NULL;
190 ops->stack(data, "EOI"); 221 ops->stack(data, "EOI");
191 continue; 222 continue;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 63bca794c8f9..673f693fb451 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1076,10 +1076,10 @@ ENTRY(\sym)
1076 TRACE_IRQS_OFF 1076 TRACE_IRQS_OFF
1077 movq %rsp,%rdi /* pt_regs pointer */ 1077 movq %rsp,%rdi /* pt_regs pointer */
1078 xorl %esi,%esi /* no error code */ 1078 xorl %esi,%esi /* no error code */
1079 PER_CPU(init_tss, %rbp) 1079 PER_CPU(init_tss, %r12)
1080 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) 1080 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
1081 call \do_sym 1081 call \do_sym
1082 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) 1082 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
1083 jmp paranoid_exit /* %ebx: no swapgs flag */ 1083 jmp paranoid_exit /* %ebx: no swapgs flag */
1084 CFI_ENDPROC 1084 CFI_ENDPROC
1085END(\sym) 1085END(\sym)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index d42f65ac4927..05d5fec64a94 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
362 return ret; 362 return ret;
363 } 363 }
364 364
365 if (bp->callback) 365 ret = arch_store_info(bp);
366 ret = arch_store_info(bp);
367 366
368 if (ret < 0) 367 if (ret < 0)
369 return ret; 368 return ret;
@@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
519 break; 518 break;
520 } 519 }
521 520
522 (bp->callback)(bp, args->regs); 521 perf_bp_event(bp, args->regs);
523 522
524 rcu_read_unlock(); 523 rcu_read_unlock();
525 } 524 }
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 63123d902103..37542b67c57e 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,6 +13,9 @@
13 * Licensed under the terms of the GNU General Public 13 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 14 * License version 2. See file COPYING for details.
15 */ 15 */
16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
16#include <linux/firmware.h> 19#include <linux/firmware.h>
17#include <linux/pci_ids.h> 20#include <linux/pci_ids.h>
18#include <linux/uaccess.h> 21#include <linux/uaccess.h>
@@ -81,7 +84,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
81 84
82 memset(csig, 0, sizeof(*csig)); 85 memset(csig, 0, sizeof(*csig));
83 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); 86 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
84 pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); 87 pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
85 return 0; 88 return 0;
86} 89}
87 90
@@ -111,8 +114,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
111 114
112 /* ucode might be chipset specific -- currently we don't support this */ 115 /* ucode might be chipset specific -- currently we don't support this */
113 if (mc_header->nb_dev_id || mc_header->sb_dev_id) { 116 if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
114 pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " 117 pr_err("CPU%d: loading of chipset specific code not yet supported\n",
115 "specific code not yet supported\n", cpu); 118 cpu);
116 return 0; 119 return 0;
117 } 120 }
118 121
@@ -141,12 +144,12 @@ static int apply_microcode_amd(int cpu)
141 144
142 /* check current patch id and patch's id for match */ 145 /* check current patch id and patch's id for match */
143 if (rev != mc_amd->hdr.patch_id) { 146 if (rev != mc_amd->hdr.patch_id) {
144 pr_err("microcode: CPU%d: update failed " 147 pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
145 "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); 148 cpu, mc_amd->hdr.patch_id);
146 return -1; 149 return -1;
147 } 150 }
148 151
149 pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); 152 pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
150 uci->cpu_sig.rev = rev; 153 uci->cpu_sig.rev = rev;
151 154
152 return 0; 155 return 0;
@@ -169,15 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
169 return NULL; 172 return NULL;
170 173
171 if (section_hdr[0] != UCODE_UCODE_TYPE) { 174 if (section_hdr[0] != UCODE_UCODE_TYPE) {
172 pr_err("microcode: error: invalid type field in " 175 pr_err("error: invalid type field in container file section header\n");
173 "container file section header\n");
174 return NULL; 176 return NULL;
175 } 177 }
176 178
177 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); 179 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
178 180
179 if (total_size > size || total_size > UCODE_MAX_SIZE) { 181 if (total_size > size || total_size > UCODE_MAX_SIZE) {
180 pr_err("microcode: error: size mismatch\n"); 182 pr_err("error: size mismatch\n");
181 return NULL; 183 return NULL;
182 } 184 }
183 185
@@ -206,14 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf)
206 size = buf_pos[2]; 208 size = buf_pos[2];
207 209
208 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { 210 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
209 pr_err("microcode: error: invalid type field in " 211 pr_err("error: invalid type field in container file section header\n");
210 "container file section header\n");
211 return 0; 212 return 0;
212 } 213 }
213 214
214 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); 215 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
215 if (!equiv_cpu_table) { 216 if (!equiv_cpu_table) {
216 pr_err("microcode: failed to allocate equivalent CPU table\n"); 217 pr_err("failed to allocate equivalent CPU table\n");
217 return 0; 218 return 0;
218 } 219 }
219 220
@@ -246,7 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
246 247
247 offset = install_equiv_cpu_table(ucode_ptr); 248 offset = install_equiv_cpu_table(ucode_ptr);
248 if (!offset) { 249 if (!offset) {
249 pr_err("microcode: failed to create equivalent cpu table\n"); 250 pr_err("failed to create equivalent cpu table\n");
250 return UCODE_ERROR; 251 return UCODE_ERROR;
251 } 252 }
252 253
@@ -277,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
277 if (!leftover) { 278 if (!leftover) {
278 vfree(uci->mc); 279 vfree(uci->mc);
279 uci->mc = new_mc; 280 uci->mc = new_mc;
280 pr_debug("microcode: CPU%d found a matching microcode " 281 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
281 "update with version 0x%x (current=0x%x)\n",
282 cpu, new_rev, uci->cpu_sig.rev); 282 cpu, new_rev, uci->cpu_sig.rev);
283 } else { 283 } else {
284 vfree(new_mc); 284 vfree(new_mc);
@@ -300,7 +300,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
300 return UCODE_NFOUND; 300 return UCODE_NFOUND;
301 301
302 if (*(u32 *)firmware->data != UCODE_MAGIC) { 302 if (*(u32 *)firmware->data != UCODE_MAGIC) {
303 pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", 303 pr_err("invalid UCODE_MAGIC (0x%08x)\n",
304 *(u32 *)firmware->data); 304 *(u32 *)firmware->data);
305 return UCODE_ERROR; 305 return UCODE_ERROR;
306 } 306 }
@@ -313,8 +313,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
313static enum ucode_state 313static enum ucode_state
314request_microcode_user(int cpu, const void __user *buf, size_t size) 314request_microcode_user(int cpu, const void __user *buf, size_t size)
315{ 315{
316 pr_info("microcode: AMD microcode update via " 316 pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
317 "/dev/cpu/microcode not supported\n");
318 return UCODE_ERROR; 317 return UCODE_ERROR;
319} 318}
320 319
@@ -334,14 +333,13 @@ void init_microcode_amd(struct device *device)
334 WARN_ON(c->x86_vendor != X86_VENDOR_AMD); 333 WARN_ON(c->x86_vendor != X86_VENDOR_AMD);
335 334
336 if (c->x86 < 0x10) { 335 if (c->x86 < 0x10) {
337 pr_warning("microcode: AMD CPU family 0x%x not supported\n", 336 pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
338 c->x86);
339 return; 337 return;
340 } 338 }
341 supported_cpu = 1; 339 supported_cpu = 1;
342 340
343 if (request_firmware(&firmware, fw_name, device)) 341 if (request_firmware(&firmware, fw_name, device))
344 pr_err("microcode: failed to load file %s\n", fw_name); 342 pr_err("failed to load file %s\n", fw_name);
345} 343}
346 344
347void fini_microcode_amd(void) 345void fini_microcode_amd(void)
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index e68aae397869..844c02c65fcb 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -70,6 +70,9 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73
74#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
75
73#include <linux/platform_device.h> 76#include <linux/platform_device.h>
74#include <linux/miscdevice.h> 77#include <linux/miscdevice.h>
75#include <linux/capability.h> 78#include <linux/capability.h>
@@ -209,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
209 ssize_t ret = -EINVAL; 212 ssize_t ret = -EINVAL;
210 213
211 if ((len >> PAGE_SHIFT) > totalram_pages) { 214 if ((len >> PAGE_SHIFT) > totalram_pages) {
212 pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); 215 pr_err("too much data (max %ld pages)\n", totalram_pages);
213 return ret; 216 return ret;
214 } 217 }
215 218
@@ -244,7 +247,7 @@ static int __init microcode_dev_init(void)
244 247
245 error = misc_register(&microcode_dev); 248 error = misc_register(&microcode_dev);
246 if (error) { 249 if (error) {
247 pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); 250 pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
248 return error; 251 return error;
249 } 252 }
250 253
@@ -359,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu)
359 if (!uci->mc) 362 if (!uci->mc)
360 return UCODE_NFOUND; 363 return UCODE_NFOUND;
361 364
362 pr_debug("microcode: CPU%d updated upon resume\n", cpu); 365 pr_debug("CPU%d updated upon resume\n", cpu);
363 apply_microcode_on_target(cpu); 366 apply_microcode_on_target(cpu);
364 367
365 return UCODE_OK; 368 return UCODE_OK;
@@ -379,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu)
379 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev); 382 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
380 383
381 if (ustate == UCODE_OK) { 384 if (ustate == UCODE_OK) {
382 pr_debug("microcode: CPU%d updated upon init\n", cpu); 385 pr_debug("CPU%d updated upon init\n", cpu);
383 apply_microcode_on_target(cpu); 386 apply_microcode_on_target(cpu);
384 } 387 }
385 388
@@ -406,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
406 if (!cpu_online(cpu)) 409 if (!cpu_online(cpu))
407 return 0; 410 return 0;
408 411
409 pr_debug("microcode: CPU%d added\n", cpu); 412 pr_debug("CPU%d added\n", cpu);
410 413
411 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); 414 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
412 if (err) 415 if (err)
@@ -425,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
425 if (!cpu_online(cpu)) 428 if (!cpu_online(cpu))
426 return 0; 429 return 0;
427 430
428 pr_debug("microcode: CPU%d removed\n", cpu); 431 pr_debug("CPU%d removed\n", cpu);
429 microcode_fini_cpu(cpu); 432 microcode_fini_cpu(cpu);
430 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 433 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
431 return 0; 434 return 0;
@@ -473,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
473 microcode_update_cpu(cpu); 476 microcode_update_cpu(cpu);
474 case CPU_DOWN_FAILED: 477 case CPU_DOWN_FAILED:
475 case CPU_DOWN_FAILED_FROZEN: 478 case CPU_DOWN_FAILED_FROZEN:
476 pr_debug("microcode: CPU%d added\n", cpu); 479 pr_debug("CPU%d added\n", cpu);
477 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) 480 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
478 pr_err("microcode: Failed to create group for CPU%d\n", cpu); 481 pr_err("Failed to create group for CPU%d\n", cpu);
479 break; 482 break;
480 case CPU_DOWN_PREPARE: 483 case CPU_DOWN_PREPARE:
481 case CPU_DOWN_PREPARE_FROZEN: 484 case CPU_DOWN_PREPARE_FROZEN:
482 /* Suspend is in progress, only remove the interface */ 485 /* Suspend is in progress, only remove the interface */
483 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 486 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
484 pr_debug("microcode: CPU%d removed\n", cpu); 487 pr_debug("CPU%d removed\n", cpu);
485 break; 488 break;
486 case CPU_DEAD: 489 case CPU_DEAD:
487 case CPU_UP_CANCELED_FROZEN: 490 case CPU_UP_CANCELED_FROZEN:
@@ -507,7 +510,7 @@ static int __init microcode_init(void)
507 microcode_ops = init_amd_microcode(); 510 microcode_ops = init_amd_microcode();
508 511
509 if (!microcode_ops) { 512 if (!microcode_ops) {
510 pr_err("microcode: no support for this CPU vendor\n"); 513 pr_err("no support for this CPU vendor\n");
511 return -ENODEV; 514 return -ENODEV;
512 } 515 }
513 516
@@ -541,8 +544,7 @@ static int __init microcode_init(void)
541 register_hotcpu_notifier(&mc_cpu_notifier); 544 register_hotcpu_notifier(&mc_cpu_notifier);
542 545
543 pr_info("Microcode Update Driver: v" MICROCODE_VERSION 546 pr_info("Microcode Update Driver: v" MICROCODE_VERSION
544 " <tigran@aivazian.fsnet.co.uk>," 547 " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
545 " Peter Oruba\n");
546 548
547 return 0; 549 return 0;
548} 550}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0d334ddd0a96..ebd193e476ca 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,6 +70,9 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73
74#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
75
73#include <linux/firmware.h> 76#include <linux/firmware.h>
74#include <linux/uaccess.h> 77#include <linux/uaccess.h>
75#include <linux/kernel.h> 78#include <linux/kernel.h>
@@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
146 149
147 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || 150 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
148 cpu_has(c, X86_FEATURE_IA64)) { 151 cpu_has(c, X86_FEATURE_IA64)) {
149 printk(KERN_ERR "microcode: CPU%d not a capable Intel " 152 pr_err("CPU%d not a capable Intel processor\n", cpu_num);
150 "processor\n", cpu_num);
151 return -1; 153 return -1;
152 } 154 }
153 155
@@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
165 /* get the current revision from MSR 0x8B */ 167 /* get the current revision from MSR 0x8B */
166 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); 168 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
167 169
168 printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", 170 pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
169 cpu_num, csig->sig, csig->pf, csig->rev); 171 cpu_num, csig->sig, csig->pf, csig->rev);
170 172
171 return 0; 173 return 0;
172} 174}
@@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc)
194 data_size = get_datasize(mc_header); 196 data_size = get_datasize(mc_header);
195 197
196 if (data_size + MC_HEADER_SIZE > total_size) { 198 if (data_size + MC_HEADER_SIZE > total_size) {
197 printk(KERN_ERR "microcode: error! " 199 pr_err("error! Bad data size in microcode data file\n");
198 "Bad data size in microcode data file\n");
199 return -EINVAL; 200 return -EINVAL;
200 } 201 }
201 202
202 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { 203 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
203 printk(KERN_ERR "microcode: error! " 204 pr_err("error! Unknown microcode update format\n");
204 "Unknown microcode update format\n");
205 return -EINVAL; 205 return -EINVAL;
206 } 206 }
207 ext_table_size = total_size - (MC_HEADER_SIZE + data_size); 207 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
208 if (ext_table_size) { 208 if (ext_table_size) {
209 if ((ext_table_size < EXT_HEADER_SIZE) 209 if ((ext_table_size < EXT_HEADER_SIZE)
210 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { 210 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
211 printk(KERN_ERR "microcode: error! " 211 pr_err("error! Small exttable size in microcode data file\n");
212 "Small exttable size in microcode data file\n");
213 return -EINVAL; 212 return -EINVAL;
214 } 213 }
215 ext_header = mc + MC_HEADER_SIZE + data_size; 214 ext_header = mc + MC_HEADER_SIZE + data_size;
216 if (ext_table_size != exttable_size(ext_header)) { 215 if (ext_table_size != exttable_size(ext_header)) {
217 printk(KERN_ERR "microcode: error! " 216 pr_err("error! Bad exttable size in microcode data file\n");
218 "Bad exttable size in microcode data file\n");
219 return -EFAULT; 217 return -EFAULT;
220 } 218 }
221 ext_sigcount = ext_header->count; 219 ext_sigcount = ext_header->count;
@@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc)
230 while (i--) 228 while (i--)
231 ext_table_sum += ext_tablep[i]; 229 ext_table_sum += ext_tablep[i];
232 if (ext_table_sum) { 230 if (ext_table_sum) {
233 printk(KERN_WARNING "microcode: aborting, " 231 pr_warning("aborting, bad extended signature table checksum\n");
234 "bad extended signature table checksum\n");
235 return -EINVAL; 232 return -EINVAL;
236 } 233 }
237 } 234 }
@@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc)
242 while (i--) 239 while (i--)
243 orig_sum += ((int *)mc)[i]; 240 orig_sum += ((int *)mc)[i];
244 if (orig_sum) { 241 if (orig_sum) {
245 printk(KERN_ERR "microcode: aborting, bad checksum\n"); 242 pr_err("aborting, bad checksum\n");
246 return -EINVAL; 243 return -EINVAL;
247 } 244 }
248 if (!ext_table_size) 245 if (!ext_table_size)
@@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc)
255 - (mc_header->sig + mc_header->pf + mc_header->cksum) 252 - (mc_header->sig + mc_header->pf + mc_header->cksum)
256 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); 253 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
257 if (sum) { 254 if (sum) {
258 printk(KERN_ERR "microcode: aborting, bad checksum\n"); 255 pr_err("aborting, bad checksum\n");
259 return -EINVAL; 256 return -EINVAL;
260 } 257 }
261 } 258 }
@@ -327,13 +324,11 @@ static int apply_microcode(int cpu)
327 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); 324 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
328 325
329 if (val[1] != mc_intel->hdr.rev) { 326 if (val[1] != mc_intel->hdr.rev) {
330 printk(KERN_ERR "microcode: CPU%d update " 327 pr_err("CPU%d update to revision 0x%x failed\n",
331 "to revision 0x%x failed\n", 328 cpu_num, mc_intel->hdr.rev);
332 cpu_num, mc_intel->hdr.rev);
333 return -1; 329 return -1;
334 } 330 }
335 printk(KERN_INFO "microcode: CPU%d updated to revision " 331 pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n",
336 "0x%x, date = %04x-%02x-%02x \n",
337 cpu_num, val[1], 332 cpu_num, val[1],
338 mc_intel->hdr.date & 0xffff, 333 mc_intel->hdr.date & 0xffff,
339 mc_intel->hdr.date >> 24, 334 mc_intel->hdr.date >> 24,
@@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
362 357
363 mc_size = get_totalsize(&mc_header); 358 mc_size = get_totalsize(&mc_header);
364 if (!mc_size || mc_size > leftover) { 359 if (!mc_size || mc_size > leftover) {
365 printk(KERN_ERR "microcode: error!" 360 pr_err("error! Bad data in microcode data file\n");
366 "Bad data in microcode data file\n");
367 break; 361 break;
368 } 362 }
369 363
@@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
405 vfree(uci->mc); 399 vfree(uci->mc);
406 uci->mc = (struct microcode_intel *)new_mc; 400 uci->mc = (struct microcode_intel *)new_mc;
407 401
408 pr_debug("microcode: CPU%d found a matching microcode update with" 402 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
409 " version 0x%x (current=0x%x)\n", 403 cpu, new_rev, uci->cpu_sig.rev);
410 cpu, new_rev, uci->cpu_sig.rev);
411out: 404out:
412 return state; 405 return state;
413} 406}
@@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
429 c->x86, c->x86_model, c->x86_mask); 422 c->x86, c->x86_model, c->x86_mask);
430 423
431 if (request_firmware(&firmware, name, device)) { 424 if (request_firmware(&firmware, name, device)) {
432 pr_debug("microcode: data file %s load failed\n", name); 425 pr_debug("data file %s load failed\n", name);
433 return UCODE_NFOUND; 426 return UCODE_NFOUND;
434 } 427 }
435 428
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5e2ba634ea15..7a7bd4e3ec49 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,8 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <linux/user-return-notifier.h> 12#include <linux/user-return-notifier.h>
13#include <linux/dmi.h>
14#include <linux/utsname.h>
13#include <trace/events/power.h> 15#include <trace/events/power.h>
14#include <linux/hw_breakpoint.h> 16#include <linux/hw_breakpoint.h>
15#include <asm/system.h> 17#include <asm/system.h>
@@ -90,6 +92,25 @@ void exit_thread(void)
90 } 92 }
91} 93}
92 94
95void show_regs_common(void)
96{
97 const char *board, *product;
98
99 board = dmi_get_system_info(DMI_BOARD_NAME);
100 if (!board)
101 board = "";
102 product = dmi_get_system_info(DMI_PRODUCT_NAME);
103 if (!product)
104 product = "";
105
106 printk("\n");
107 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
108 current->pid, current->comm, print_tainted(),
109 init_utsname()->release,
110 (int)strcspn(init_utsname()->version, " "),
111 init_utsname()->version, board, product);
112}
113
93void flush_thread(void) 114void flush_thread(void)
94{ 115{
95 struct task_struct *tsk = current; 116 struct task_struct *tsk = current;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 075580b35682..120b88797a75 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -23,7 +23,6 @@
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/user.h> 24#include <linux/user.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/utsname.h>
27#include <linux/delay.h> 26#include <linux/delay.h>
28#include <linux/reboot.h> 27#include <linux/reboot.h>
29#include <linux/init.h> 28#include <linux/init.h>
@@ -35,7 +34,6 @@
35#include <linux/tick.h> 34#include <linux/tick.h>
36#include <linux/percpu.h> 35#include <linux/percpu.h>
37#include <linux/prctl.h> 36#include <linux/prctl.h>
38#include <linux/dmi.h>
39#include <linux/ftrace.h> 37#include <linux/ftrace.h>
40#include <linux/uaccess.h> 38#include <linux/uaccess.h>
41#include <linux/io.h> 39#include <linux/io.h>
@@ -128,7 +126,6 @@ void __show_regs(struct pt_regs *regs, int all)
128 unsigned long d0, d1, d2, d3, d6, d7; 126 unsigned long d0, d1, d2, d3, d6, d7;
129 unsigned long sp; 127 unsigned long sp;
130 unsigned short ss, gs; 128 unsigned short ss, gs;
131 const char *board;
132 129
133 if (user_mode_vm(regs)) { 130 if (user_mode_vm(regs)) {
134 sp = regs->sp; 131 sp = regs->sp;
@@ -140,16 +137,7 @@ void __show_regs(struct pt_regs *regs, int all)
140 savesegment(gs, gs); 137 savesegment(gs, gs);
141 } 138 }
142 139
143 printk("\n"); 140 show_regs_common();
144
145 board = dmi_get_system_info(DMI_PRODUCT_NAME);
146 if (!board)
147 board = "";
148 printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
149 task_pid_nr(current), current->comm,
150 print_tainted(), init_utsname()->release,
151 (int)strcspn(init_utsname()->version, " "),
152 init_utsname()->version, board);
153 141
154 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", 142 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
155 (u16)regs->cs, regs->ip, regs->flags, 143 (u16)regs->cs, regs->ip, regs->flags,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c95c8f4e790a..e5ab0cd0ef36 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -26,7 +26,6 @@
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/user.h> 27#include <linux/user.h>
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <linux/utsname.h>
30#include <linux/delay.h> 29#include <linux/delay.h>
31#include <linux/module.h> 30#include <linux/module.h>
32#include <linux/ptrace.h> 31#include <linux/ptrace.h>
@@ -38,7 +37,6 @@
38#include <linux/uaccess.h> 37#include <linux/uaccess.h>
39#include <linux/io.h> 38#include <linux/io.h>
40#include <linux/ftrace.h> 39#include <linux/ftrace.h>
41#include <linux/dmi.h>
42 40
43#include <asm/pgtable.h> 41#include <asm/pgtable.h>
44#include <asm/system.h> 42#include <asm/system.h>
@@ -163,18 +161,8 @@ void __show_regs(struct pt_regs *regs, int all)
163 unsigned long d0, d1, d2, d3, d6, d7; 161 unsigned long d0, d1, d2, d3, d6, d7;
164 unsigned int fsindex, gsindex; 162 unsigned int fsindex, gsindex;
165 unsigned int ds, cs, es; 163 unsigned int ds, cs, es;
166 const char *board; 164
167 165 show_regs_common();
168 printk("\n");
169 print_modules();
170 board = dmi_get_system_info(DMI_PRODUCT_NAME);
171 if (!board)
172 board = "";
173 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
174 current->pid, current->comm, print_tainted(),
175 init_utsname()->release,
176 (int)strcspn(init_utsname()->version, " "),
177 init_utsname()->version, board);
178 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 166 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
179 printk_address(regs->ip, 1); 167 printk_address(regs->ip, 1);
180 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, 168 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 04d182a7cfdb..7079ddaf0731 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target,
555 return ret; 555 return ret;
556} 556}
557 557
558static void ptrace_triggered(struct perf_event *bp, void *data) 558static void ptrace_triggered(struct perf_event *bp, int nmi,
559 struct perf_sample_data *data,
560 struct pt_regs *regs)
559{ 561{
560 int i; 562 int i;
561 struct thread_struct *thread = &(current->thread); 563 struct thread_struct *thread = &(current->thread);
@@ -593,13 +595,13 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
593 return dr7; 595 return dr7;
594} 596}
595 597
596static struct perf_event * 598static int
597ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, 599ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
598 struct task_struct *tsk, int disabled) 600 struct task_struct *tsk, int disabled)
599{ 601{
600 int err; 602 int err;
601 int gen_len, gen_type; 603 int gen_len, gen_type;
602 DEFINE_BREAKPOINT_ATTR(attr); 604 struct perf_event_attr attr;
603 605
604 /* 606 /*
605 * We shoud have at least an inactive breakpoint at this 607 * We shoud have at least an inactive breakpoint at this
@@ -607,18 +609,18 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
607 * written the address register first 609 * written the address register first
608 */ 610 */
609 if (!bp) 611 if (!bp)
610 return ERR_PTR(-EINVAL); 612 return -EINVAL;
611 613
612 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); 614 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
613 if (err) 615 if (err)
614 return ERR_PTR(err); 616 return err;
615 617
616 attr = bp->attr; 618 attr = bp->attr;
617 attr.bp_len = gen_len; 619 attr.bp_len = gen_len;
618 attr.bp_type = gen_type; 620 attr.bp_type = gen_type;
619 attr.disabled = disabled; 621 attr.disabled = disabled;
620 622
621 return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); 623 return modify_user_hw_breakpoint(bp, &attr);
622} 624}
623 625
624/* 626/*
@@ -656,28 +658,17 @@ restore:
656 if (!second_pass) 658 if (!second_pass)
657 continue; 659 continue;
658 660
659 thread->ptrace_bps[i] = NULL; 661 rc = ptrace_modify_breakpoint(bp, len, type,
660 bp = ptrace_modify_breakpoint(bp, len, type,
661 tsk, 1); 662 tsk, 1);
662 if (IS_ERR(bp)) { 663 if (rc)
663 rc = PTR_ERR(bp);
664 thread->ptrace_bps[i] = NULL;
665 break; 664 break;
666 }
667 thread->ptrace_bps[i] = bp;
668 } 665 }
669 continue; 666 continue;
670 } 667 }
671 668
672 bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); 669 rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
673 670 if (rc)
674 /* Incorrect bp, or we have a bug in bp API */
675 if (IS_ERR(bp)) {
676 rc = PTR_ERR(bp);
677 thread->ptrace_bps[i] = NULL;
678 break; 671 break;
679 }
680 thread->ptrace_bps[i] = bp;
681 } 672 }
682 /* 673 /*
683 * Make a second pass to free the remaining unused breakpoints 674 * Make a second pass to free the remaining unused breakpoints
@@ -721,9 +712,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
721{ 712{
722 struct perf_event *bp; 713 struct perf_event *bp;
723 struct thread_struct *t = &tsk->thread; 714 struct thread_struct *t = &tsk->thread;
724 DEFINE_BREAKPOINT_ATTR(attr); 715 struct perf_event_attr attr;
725 716
726 if (!t->ptrace_bps[nr]) { 717 if (!t->ptrace_bps[nr]) {
718 hw_breakpoint_init(&attr);
727 /* 719 /*
728 * Put stub len and type to register (reserve) an inactive but 720 * Put stub len and type to register (reserve) an inactive but
729 * correct bp 721 * correct bp
@@ -734,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
734 attr.disabled = 1; 726 attr.disabled = 1;
735 727
736 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); 728 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
729
730 /*
731 * CHECKME: the previous code returned -EIO if the addr wasn't
732 * a valid task virtual addr. The new one will return -EINVAL in
733 * this case.
734 * -EINVAL may be what we want for in-kernel breakpoints users,
735 * but -EIO looks better for ptrace, since we refuse a register
736 * writing for the user. And anyway this is the previous
737 * behaviour.
738 */
739 if (IS_ERR(bp))
740 return PTR_ERR(bp);
741
742 t->ptrace_bps[nr] = bp;
737 } else { 743 } else {
744 int err;
745
738 bp = t->ptrace_bps[nr]; 746 bp = t->ptrace_bps[nr];
739 t->ptrace_bps[nr] = NULL;
740 747
741 attr = bp->attr; 748 attr = bp->attr;
742 attr.bp_addr = addr; 749 attr.bp_addr = addr;
743 bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); 750 err = modify_user_hw_breakpoint(bp, &attr);
751 if (err)
752 return err;
744 } 753 }
745 /*
746 * CHECKME: the previous code returned -EIO if the addr wasn't a
747 * valid task virtual addr. The new one will return -EINVAL in this
748 * case.
749 * -EINVAL may be what we want for in-kernel breakpoints users, but
750 * -EIO looks better for ptrace, since we refuse a register writing
751 * for the user. And anyway this is the previous behaviour.
752 */
753 if (IS_ERR(bp))
754 return PTR_ERR(bp);
755 754
756 t->ptrace_bps[nr] = bp;
757 755
758 return 0; 756 return 0;
759} 757}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b97fc5b124e..1545bc0c9845 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -259,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
259 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), 259 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
260 }, 260 },
261 }, 261 },
262 { /* Handle problems with rebooting on ASUS P4S800 */
263 .callback = set_bios_reboot,
264 .ident = "ASUS P4S800",
265 .matches = {
266 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
267 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
268 },
269 },
262 { } 270 { }
263}; 271};
264 272
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d559af913e1f..35abcb8b00e9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
1#include <linux/kernel.h> 3#include <linux/kernel.h>
2#include <linux/module.h> 4#include <linux/module.h>
3#include <linux/init.h> 5#include <linux/init.h>
@@ -20,9 +22,9 @@
20#include <asm/stackprotector.h> 22#include <asm/stackprotector.h>
21 23
22#ifdef CONFIG_DEBUG_PER_CPU_MAPS 24#ifdef CONFIG_DEBUG_PER_CPU_MAPS
23# define DBG(x...) printk(KERN_DEBUG x) 25# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
24#else 26#else
25# define DBG(x...) 27# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
26#endif 28#endif
27 29
28DEFINE_PER_CPU(int, cpu_number); 30DEFINE_PER_CPU(int, cpu_number);
@@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
116 } else { 118 } else {
117 ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), 119 ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
118 size, align, goal); 120 size, align, goal);
119 pr_debug("per cpu data for cpu%d %lu bytes on node%d at " 121 pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
120 "%016lx\n", cpu, size, node, __pa(ptr)); 122 cpu, size, node, __pa(ptr));
121 } 123 }
122 return ptr; 124 return ptr;
123#else 125#else
@@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void)
198 pcpu_cpu_distance, 200 pcpu_cpu_distance,
199 pcpu_fc_alloc, pcpu_fc_free); 201 pcpu_fc_alloc, pcpu_fc_free);
200 if (rc < 0) 202 if (rc < 0)
201 pr_warning("PERCPU: %s allocator failed (%d), " 203 pr_warning("%s allocator failed (%d), falling back to page size\n",
202 "falling back to page size\n",
203 pcpu_fc_names[pcpu_chosen_fc], rc); 204 pcpu_fc_names[pcpu_chosen_fc], rc);
204 } 205 }
205 if (rc < 0) 206 if (rc < 0)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index fab7440c9bb2..296aba49472a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -29,6 +29,8 @@
29 * Based on QEMU and Xen. 29 * Based on QEMU and Xen.
30 */ 30 */
31 31
32#define pr_fmt(fmt) "pit: " fmt
33
32#include <linux/kvm_host.h> 34#include <linux/kvm_host.h>
33 35
34#include "irq.h" 36#include "irq.h"
@@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 264
263static void destroy_pit_timer(struct kvm_timer *pt) 265static void destroy_pit_timer(struct kvm_timer *pt)
264{ 266{
265 pr_debug("pit: execute del timer!\n"); 267 pr_debug("execute del timer!\n");
266 hrtimer_cancel(&pt->timer); 268 hrtimer_cancel(&pt->timer);
267} 269}
268 270
@@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
284 286
285 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 287 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
286 288
287 pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); 289 pr_debug("create pit timer, interval is %llu nsec\n", interval);
288 290
289 /* TODO The new value only affected after the retriggered */ 291 /* TODO The new value only affected after the retriggered */
290 hrtimer_cancel(&pt->timer); 292 hrtimer_cancel(&pt->timer);
@@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
309 311
310 WARN_ON(!mutex_is_locked(&ps->lock)); 312 WARN_ON(!mutex_is_locked(&ps->lock));
311 313
312 pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); 314 pr_debug("load_count val is %d, channel is %d\n", val, channel);
313 315
314 /* 316 /*
315 * The largest possible initial count is 0; this is equivalent 317 * The largest possible initial count is 0; this is equivalent
@@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
395 mutex_lock(&pit_state->lock); 397 mutex_lock(&pit_state->lock);
396 398
397 if (val != 0) 399 if (val != 0)
398 pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", 400 pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n",
399 (unsigned int)addr, len, val); 401 (unsigned int)addr, len, val);
400 402
401 if (addr == 3) { 403 if (addr == 3) {
402 channel = val >> 6; 404 channel = val >> 6;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index a2d6472895fb..45b20e486c2f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,7 +5,7 @@
5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk 5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt 6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
7quiet_cmd_inat_tables = GEN $@ 7quiet_cmd_inat_tables = GEN $@
8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ 8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
9 9
10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) 10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
11 $(call cmd,inat_tables) 11 $(call cmd,inat_tables)
@@ -20,7 +20,7 @@ lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-y += insn.o inat.o 23lib-$(CONFIG_KPROBES) += insn.o inat.o
24 24
25obj-y += msr-reg.o msr-reg-export.o 25obj-y += msr-reg.o msr-reg-export.o
26 26
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 07bcc309cfda..c0f6198565eb 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -5,6 +5,8 @@
5 * 2008 Pekka Paalanen <pq@iki.fi> 5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
8#include <linux/list.h> 10#include <linux/list.h>
9#include <linux/rculist.h> 11#include <linux/rculist.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
@@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
136 pte_t *pte = lookup_address(f->page, &level); 138 pte_t *pte = lookup_address(f->page, &level);
137 139
138 if (!pte) { 140 if (!pte) {
139 pr_err("kmmio: no pte for page 0x%08lx\n", f->page); 141 pr_err("no pte for page 0x%08lx\n", f->page);
140 return -1; 142 return -1;
141 } 143 }
142 144
@@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
148 clear_pte_presence(pte, clear, &f->old_presence); 150 clear_pte_presence(pte, clear, &f->old_presence);
149 break; 151 break;
150 default: 152 default:
151 pr_err("kmmio: unexpected page level 0x%x.\n", level); 153 pr_err("unexpected page level 0x%x.\n", level);
152 return -1; 154 return -1;
153 } 155 }
154 156
@@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
170static int arm_kmmio_fault_page(struct kmmio_fault_page *f) 172static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
171{ 173{
172 int ret; 174 int ret;
173 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); 175 WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
174 if (f->armed) { 176 if (f->armed) {
175 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", 177 pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
176 f->page, f->count, !!f->old_presence); 178 f->page, f->count, !!f->old_presence);
177 } 179 }
178 ret = clear_page_presence(f, true); 180 ret = clear_page_presence(f, true);
179 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); 181 WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
182 f->page);
180 f->armed = true; 183 f->armed = true;
181 return ret; 184 return ret;
182} 185}
@@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
240 * condition needs handling by do_page_fault(), the 243 * condition needs handling by do_page_fault(), the
241 * page really not being present is the most common. 244 * page really not being present is the most common.
242 */ 245 */
243 pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", 246 pr_debug("secondary hit for 0x%08lx CPU %d.\n",
244 addr, smp_processor_id()); 247 addr, smp_processor_id());
245 248
246 if (!faultpage->old_presence) 249 if (!faultpage->old_presence)
247 pr_info("kmmio: unexpected secondary hit for " 250 pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
248 "address 0x%08lx on CPU %d.\n", addr, 251 addr, smp_processor_id());
249 smp_processor_id());
250 } else { 252 } else {
251 /* 253 /*
252 * Prevent overwriting already in-flight context. 254 * Prevent overwriting already in-flight context.
253 * This should not happen, let's hope disarming at 255 * This should not happen, let's hope disarming at
254 * least prevents a panic. 256 * least prevents a panic.
255 */ 257 */
256 pr_emerg("kmmio: recursive probe hit on CPU %d, " 258 pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
257 "for address 0x%08lx. Ignoring.\n", 259 smp_processor_id(), addr);
258 smp_processor_id(), addr); 260 pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
259 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
260 ctx->addr);
261 disarm_kmmio_fault_page(faultpage); 261 disarm_kmmio_fault_page(faultpage);
262 } 262 }
263 goto no_kmmio_ctx; 263 goto no_kmmio_ctx;
@@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
316 * something external causing them (f.e. using a debugger while 316 * something external causing them (f.e. using a debugger while
317 * mmio tracing enabled), or erroneous behaviour 317 * mmio tracing enabled), or erroneous behaviour
318 */ 318 */
319 pr_warning("kmmio: unexpected debug trap on CPU %d.\n", 319 pr_warning("unexpected debug trap on CPU %d.\n",
320 smp_processor_id()); 320 smp_processor_id());
321 goto out; 321 goto out;
322 } 322 }
323 323
@@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
425 list_add_rcu(&p->list, &kmmio_probes); 425 list_add_rcu(&p->list, &kmmio_probes);
426 while (size < size_lim) { 426 while (size < size_lim) {
427 if (add_kmmio_fault_page(p->addr + size)) 427 if (add_kmmio_fault_page(p->addr + size))
428 pr_err("kmmio: Unable to set page fault.\n"); 428 pr_err("Unable to set page fault.\n");
429 size += PAGE_SIZE; 429 size += PAGE_SIZE;
430 } 430 }
431out: 431out:
@@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
490 * 2. remove_kmmio_fault_pages() 490 * 2. remove_kmmio_fault_pages()
491 * Remove the pages from kmmio_page_table. 491 * Remove the pages from kmmio_page_table.
492 * 3. rcu_free_kmmio_fault_pages() 492 * 3. rcu_free_kmmio_fault_pages()
493 * Actally free the kmmio_fault_page structs as with RCU. 493 * Actually free the kmmio_fault_page structs as with RCU.
494 */ 494 */
495void unregister_kmmio_probe(struct kmmio_probe *p) 495void unregister_kmmio_probe(struct kmmio_probe *p)
496{ 496{
@@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
511 511
512 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); 512 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
513 if (!drelease) { 513 if (!drelease) {
514 pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); 514 pr_crit("leaking kmmio_fault_page objects.\n");
515 return; 515 return;
516 } 516 }
517 drelease->release_list = release_list; 517 drelease->release_list = release_list;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 132772a8ec57..4c765e9c4664 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -19,6 +19,9 @@
19 * 19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi. 20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */ 21 */
22
23#define pr_fmt(fmt) "mmiotrace: "
24
22#define DEBUG 1 25#define DEBUG 1
23 26
24#include <linux/module.h> 27#include <linux/module.h>
@@ -36,8 +39,6 @@
36 39
37#include "pf_in.h" 40#include "pf_in.h"
38 41
39#define NAME "mmiotrace: "
40
41struct trap_reason { 42struct trap_reason {
42 unsigned long addr; 43 unsigned long addr;
43 unsigned long ip; 44 unsigned long ip;
@@ -96,17 +97,18 @@ static void print_pte(unsigned long address)
96 pte_t *pte = lookup_address(address, &level); 97 pte_t *pte = lookup_address(address, &level);
97 98
98 if (!pte) { 99 if (!pte) {
99 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", 100 pr_err("Error in %s: no pte for page 0x%08lx\n",
100 __func__, address); 101 __func__, address);
101 return; 102 return;
102 } 103 }
103 104
104 if (level == PG_LEVEL_2M) { 105 if (level == PG_LEVEL_2M) {
105 pr_emerg(NAME "4MB pages are not currently supported: " 106 pr_emerg("4MB pages are not currently supported: 0x%08lx\n",
106 "0x%08lx\n", address); 107 address);
107 BUG(); 108 BUG();
108 } 109 }
109 pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, 110 pr_info("pte for 0x%lx: 0x%llx 0x%llx\n",
111 address,
110 (unsigned long long)pte_val(*pte), 112 (unsigned long long)pte_val(*pte),
111 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); 113 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
112} 114}
@@ -118,22 +120,21 @@ static void print_pte(unsigned long address)
118static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) 120static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
119{ 121{
120 const struct trap_reason *my_reason = &get_cpu_var(pf_reason); 122 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
121 pr_emerg(NAME "unexpected fault for address: 0x%08lx, " 123 pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n",
122 "last fault for address: 0x%08lx\n", 124 addr, my_reason->addr);
123 addr, my_reason->addr);
124 print_pte(addr); 125 print_pte(addr);
125 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); 126 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
126 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); 127 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
127#ifdef __i386__ 128#ifdef __i386__
128 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", 129 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
129 regs->ax, regs->bx, regs->cx, regs->dx); 130 regs->ax, regs->bx, regs->cx, regs->dx);
130 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 131 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
131 regs->si, regs->di, regs->bp, regs->sp); 132 regs->si, regs->di, regs->bp, regs->sp);
132#else 133#else
133 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", 134 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
134 regs->ax, regs->cx, regs->dx); 135 regs->ax, regs->cx, regs->dx);
135 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", 136 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
136 regs->si, regs->di, regs->bp, regs->sp); 137 regs->si, regs->di, regs->bp, regs->sp);
137#endif 138#endif
138 put_cpu_var(pf_reason); 139 put_cpu_var(pf_reason);
139 BUG(); 140 BUG();
@@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition,
213 /* this should always return the active_trace count to 0 */ 214 /* this should always return the active_trace count to 0 */
214 my_reason->active_traces--; 215 my_reason->active_traces--;
215 if (my_reason->active_traces) { 216 if (my_reason->active_traces) {
216 pr_emerg(NAME "unexpected post handler"); 217 pr_emerg("unexpected post handler");
217 BUG(); 218 BUG();
218 } 219 }
219 220
@@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
244 }; 245 };
245 246
246 if (!trace) { 247 if (!trace) {
247 pr_err(NAME "kmalloc failed in ioremap\n"); 248 pr_err("kmalloc failed in ioremap\n");
248 return; 249 return;
249 } 250 }
250 251
@@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
282 if (!is_enabled()) /* recheck and proper locking in *_core() */ 283 if (!is_enabled()) /* recheck and proper locking in *_core() */
283 return; 284 return;
284 285
285 pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", 286 pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n",
286 (unsigned long long)offset, size, addr); 287 (unsigned long long)offset, size, addr);
287 if ((filter_offset) && (offset != filter_offset)) 288 if ((filter_offset) && (offset != filter_offset))
288 return; 289 return;
289 ioremap_trace_core(offset, size, addr); 290 ioremap_trace_core(offset, size, addr);
@@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr)
301 struct remap_trace *tmp; 302 struct remap_trace *tmp;
302 struct remap_trace *found_trace = NULL; 303 struct remap_trace *found_trace = NULL;
303 304
304 pr_debug(NAME "Unmapping %p.\n", addr); 305 pr_debug("Unmapping %p.\n", addr);
305 306
306 spin_lock_irq(&trace_lock); 307 spin_lock_irq(&trace_lock);
307 if (!is_enabled()) 308 if (!is_enabled())
@@ -363,9 +364,8 @@ static void clear_trace_list(void)
363 * Caller also ensures is_enabled() cannot change. 364 * Caller also ensures is_enabled() cannot change.
364 */ 365 */
365 list_for_each_entry(trace, &trace_list, list) { 366 list_for_each_entry(trace, &trace_list, list) {
366 pr_notice(NAME "purging non-iounmapped " 367 pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n",
367 "trace @0x%08lx, size 0x%lx.\n", 368 trace->probe.addr, trace->probe.len);
368 trace->probe.addr, trace->probe.len);
369 if (!nommiotrace) 369 if (!nommiotrace)
370 unregister_kmmio_probe(&trace->probe); 370 unregister_kmmio_probe(&trace->probe);
371 } 371 }
@@ -387,7 +387,7 @@ static void enter_uniprocessor(void)
387 387
388 if (downed_cpus == NULL && 388 if (downed_cpus == NULL &&
389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { 389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
390 pr_notice(NAME "Failed to allocate mask\n"); 390 pr_notice("Failed to allocate mask\n");
391 goto out; 391 goto out;
392 } 392 }
393 393
@@ -395,20 +395,19 @@ static void enter_uniprocessor(void)
395 cpumask_copy(downed_cpus, cpu_online_mask); 395 cpumask_copy(downed_cpus, cpu_online_mask);
396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); 396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
397 if (num_online_cpus() > 1) 397 if (num_online_cpus() > 1)
398 pr_notice(NAME "Disabling non-boot CPUs...\n"); 398 pr_notice("Disabling non-boot CPUs...\n");
399 put_online_cpus(); 399 put_online_cpus();
400 400
401 for_each_cpu(cpu, downed_cpus) { 401 for_each_cpu(cpu, downed_cpus) {
402 err = cpu_down(cpu); 402 err = cpu_down(cpu);
403 if (!err) 403 if (!err)
404 pr_info(NAME "CPU%d is down.\n", cpu); 404 pr_info("CPU%d is down.\n", cpu);
405 else 405 else
406 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); 406 pr_err("Error taking CPU%d down: %d\n", cpu, err);
407 } 407 }
408out: 408out:
409 if (num_online_cpus() > 1) 409 if (num_online_cpus() > 1)
410 pr_warning(NAME "multiple CPUs still online, " 410 pr_warning("multiple CPUs still online, may miss events.\n");
411 "may miss events.\n");
412} 411}
413 412
414/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, 413/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
@@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void)
420 419
421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) 420 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
422 return; 421 return;
423 pr_notice(NAME "Re-enabling CPUs...\n"); 422 pr_notice("Re-enabling CPUs...\n");
424 for_each_cpu(cpu, downed_cpus) { 423 for_each_cpu(cpu, downed_cpus) {
425 err = cpu_up(cpu); 424 err = cpu_up(cpu);
426 if (!err) 425 if (!err)
427 pr_info(NAME "enabled CPU%d.\n", cpu); 426 pr_info("enabled CPU%d.\n", cpu);
428 else 427 else
429 pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); 428 pr_err("cannot re-enable CPU%d: %d\n", cpu, err);
430 } 429 }
431} 430}
432 431
@@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void)
434static void enter_uniprocessor(void) 433static void enter_uniprocessor(void)
435{ 434{
436 if (num_online_cpus() > 1) 435 if (num_online_cpus() > 1)
437 pr_warning(NAME "multiple CPUs are online, may miss events. " 436 pr_warning("multiple CPUs are online, may miss events. "
438 "Suggest booting with maxcpus=1 kernel argument.\n"); 437 "Suggest booting with maxcpus=1 kernel argument.\n");
439} 438}
440 439
441static void leave_uniprocessor(void) 440static void leave_uniprocessor(void)
@@ -450,13 +449,13 @@ void enable_mmiotrace(void)
450 goto out; 449 goto out;
451 450
452 if (nommiotrace) 451 if (nommiotrace)
453 pr_info(NAME "MMIO tracing disabled.\n"); 452 pr_info("MMIO tracing disabled.\n");
454 kmmio_init(); 453 kmmio_init();
455 enter_uniprocessor(); 454 enter_uniprocessor();
456 spin_lock_irq(&trace_lock); 455 spin_lock_irq(&trace_lock);
457 atomic_inc(&mmiotrace_enabled); 456 atomic_inc(&mmiotrace_enabled);
458 spin_unlock_irq(&trace_lock); 457 spin_unlock_irq(&trace_lock);
459 pr_info(NAME "enabled.\n"); 458 pr_info("enabled.\n");
460out: 459out:
461 mutex_unlock(&mmiotrace_mutex); 460 mutex_unlock(&mmiotrace_mutex);
462} 461}
@@ -475,7 +474,7 @@ void disable_mmiotrace(void)
475 clear_trace_list(); /* guarantees: no more kmmio callbacks */ 474 clear_trace_list(); /* guarantees: no more kmmio callbacks */
476 leave_uniprocessor(); 475 leave_uniprocessor();
477 kmmio_cleanup(); 476 kmmio_cleanup();
478 pr_info(NAME "disabled.\n"); 477 pr_info("disabled.\n");
479out: 478out:
480 mutex_unlock(&mmiotrace_mutex); 479 mutex_unlock(&mmiotrace_mutex);
481} 480}
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index d8214dc03fa7..bee8d6ac2691 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -113,7 +113,7 @@ int main(int argc, char **argv)
113 char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; 113 char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
114 unsigned char insn_buf[16]; 114 unsigned char insn_buf[16];
115 struct insn insn; 115 struct insn insn;
116 int insns = 0, c; 116 int insns = 0;
117 int warnings = 0; 117 int warnings = 0;
118 118
119 parse_args(argc, argv); 119 parse_args(argc, argv);
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index e444c2dba160..938a3a273886 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -127,6 +127,7 @@
127#include <linux/wait.h> 127#include <linux/wait.h>
128#include <linux/jiffies.h> 128#include <linux/jiffies.h>
129#include <linux/smp_lock.h> 129#include <linux/smp_lock.h>
130#include <linux/compat.h>
130 131
131#include <linux/parport.h> 132#include <linux/parport.h>
132#undef LP_STATS 133#undef LP_STATS
@@ -571,13 +572,11 @@ static int lp_release(struct inode * inode, struct file * file)
571 return 0; 572 return 0;
572} 573}
573 574
574static int lp_ioctl(struct inode *inode, struct file *file, 575static int lp_do_ioctl(unsigned int minor, unsigned int cmd,
575 unsigned int cmd, unsigned long arg) 576 unsigned long arg, void __user *argp)
576{ 577{
577 unsigned int minor = iminor(inode);
578 int status; 578 int status;
579 int retval = 0; 579 int retval = 0;
580 void __user *argp = (void __user *)arg;
581 580
582#ifdef LP_DEBUG 581#ifdef LP_DEBUG
583 printk(KERN_DEBUG "lp%d ioctl, cmd: 0x%x, arg: 0x%lx\n", minor, cmd, arg); 582 printk(KERN_DEBUG "lp%d ioctl, cmd: 0x%x, arg: 0x%lx\n", minor, cmd, arg);
@@ -587,9 +586,6 @@ static int lp_ioctl(struct inode *inode, struct file *file,
587 if ((LP_F(minor) & LP_EXIST) == 0) 586 if ((LP_F(minor) & LP_EXIST) == 0)
588 return -ENODEV; 587 return -ENODEV;
589 switch ( cmd ) { 588 switch ( cmd ) {
590 struct timeval par_timeout;
591 long to_jiffies;
592
593 case LPTIME: 589 case LPTIME:
594 LP_TIME(minor) = arg * HZ/100; 590 LP_TIME(minor) = arg * HZ/100;
595 break; 591 break;
@@ -652,34 +648,101 @@ static int lp_ioctl(struct inode *inode, struct file *file,
652 return -EFAULT; 648 return -EFAULT;
653 break; 649 break;
654 650
655 case LPSETTIMEOUT:
656 if (copy_from_user (&par_timeout, argp,
657 sizeof (struct timeval))) {
658 return -EFAULT;
659 }
660 /* Convert to jiffies, place in lp_table */
661 if ((par_timeout.tv_sec < 0) ||
662 (par_timeout.tv_usec < 0)) {
663 return -EINVAL;
664 }
665 to_jiffies = DIV_ROUND_UP(par_timeout.tv_usec, 1000000/HZ);
666 to_jiffies += par_timeout.tv_sec * (long) HZ;
667 if (to_jiffies <= 0) {
668 return -EINVAL;
669 }
670 lp_table[minor].timeout = to_jiffies;
671 break;
672
673 default: 651 default:
674 retval = -EINVAL; 652 retval = -EINVAL;
675 } 653 }
676 return retval; 654 return retval;
677} 655}
678 656
657static int lp_set_timeout(unsigned int minor, struct timeval *par_timeout)
658{
659 long to_jiffies;
660
661 /* Convert to jiffies, place in lp_table */
662 if ((par_timeout->tv_sec < 0) ||
663 (par_timeout->tv_usec < 0)) {
664 return -EINVAL;
665 }
666 to_jiffies = DIV_ROUND_UP(par_timeout->tv_usec, 1000000/HZ);
667 to_jiffies += par_timeout->tv_sec * (long) HZ;
668 if (to_jiffies <= 0) {
669 return -EINVAL;
670 }
671 lp_table[minor].timeout = to_jiffies;
672 return 0;
673}
674
675static long lp_ioctl(struct file *file, unsigned int cmd,
676 unsigned long arg)
677{
678 unsigned int minor;
679 struct timeval par_timeout;
680 int ret;
681
682 minor = iminor(file->f_path.dentry->d_inode);
683 lock_kernel();
684 switch (cmd) {
685 case LPSETTIMEOUT:
686 if (copy_from_user(&par_timeout, (void __user *)arg,
687 sizeof (struct timeval))) {
688 ret = -EFAULT;
689 break;
690 }
691 ret = lp_set_timeout(minor, &par_timeout);
692 break;
693 default:
694 ret = lp_do_ioctl(minor, cmd, arg, (void __user *)arg);
695 break;
696 }
697 unlock_kernel();
698
699 return ret;
700}
701
702#ifdef CONFIG_COMPAT
703static long lp_compat_ioctl(struct file *file, unsigned int cmd,
704 unsigned long arg)
705{
706 unsigned int minor;
707 struct timeval par_timeout;
708 struct compat_timeval __user *tc;
709 int ret;
710
711 minor = iminor(file->f_path.dentry->d_inode);
712 lock_kernel();
713 switch (cmd) {
714 case LPSETTIMEOUT:
715 tc = compat_ptr(arg);
716 if (get_user(par_timeout.tv_sec, &tc->tv_sec) ||
717 get_user(par_timeout.tv_usec, &tc->tv_usec)) {
718 ret = -EFAULT;
719 break;
720 }
721 ret = lp_set_timeout(minor, &par_timeout);
722 break;
723#ifdef LP_STATS
724 case LPGETSTATS:
725 /* FIXME: add an implementation if you set LP_STATS */
726 ret = -EINVAL;
727 break;
728#endif
729 default:
730 ret = lp_do_ioctl(minor, cmd, arg, compat_ptr(arg));
731 break;
732 }
733 unlock_kernel();
734
735 return ret;
736}
737#endif
738
679static const struct file_operations lp_fops = { 739static const struct file_operations lp_fops = {
680 .owner = THIS_MODULE, 740 .owner = THIS_MODULE,
681 .write = lp_write, 741 .write = lp_write,
682 .ioctl = lp_ioctl, 742 .unlocked_ioctl = lp_ioctl,
743#ifdef CONFIG_COMPAT
744 .compat_ioctl = lp_compat_ioctl,
745#endif
683 .open = lp_open, 746 .open = lp_open,
684 .release = lp_release, 747 .release = lp_release,
685#ifdef CONFIG_PARPORT_1284 748#ifdef CONFIG_PARPORT_1284
diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index dba4600bcdb4..b31946e0b4ca 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -38,10 +38,9 @@
38#include <asm/div64.h> 38#include <asm/div64.h>
39#include <asm/sizes.h> 39#include <asm/sizes.h>
40 40
41#include <asm/mach/mmc.h> 41#include <mach/mmc.h>
42#include <mach/msm_iomap.h> 42#include <mach/msm_iomap.h>
43#include <mach/dma.h> 43#include <mach/dma.h>
44#include <mach/htc_pwrsink.h>
45 44
46#include "msm_sdcc.h" 45#include "msm_sdcc.h"
47 46
@@ -775,13 +774,11 @@ msmsdcc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
775 774
776 switch (ios->power_mode) { 775 switch (ios->power_mode) {
777 case MMC_POWER_OFF: 776 case MMC_POWER_OFF:
778 htc_pwrsink_set(PWRSINK_SDCARD, 0);
779 break; 777 break;
780 case MMC_POWER_UP: 778 case MMC_POWER_UP:
781 pwr |= MCI_PWR_UP; 779 pwr |= MCI_PWR_UP;
782 break; 780 break;
783 case MMC_POWER_ON: 781 case MMC_POWER_ON:
784 htc_pwrsink_set(PWRSINK_SDCARD, 100);
785 pwr |= MCI_PWR_ON; 782 pwr |= MCI_PWR_ON;
786 break; 783 break;
787 } 784 }
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 181f78c84105..6e8bcdfd23b4 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1388,6 +1388,46 @@ static int proc_reapurbnonblock(struct dev_state *ps, void __user *arg)
1388} 1388}
1389 1389
1390#ifdef CONFIG_COMPAT 1390#ifdef CONFIG_COMPAT
1391static int proc_control_compat(struct dev_state *ps,
1392 struct usbdevfs_ctrltransfer32 __user *p32)
1393{
1394 struct usbdevfs_ctrltransfer __user *p;
1395 __u32 udata;
1396 p = compat_alloc_user_space(sizeof(*p));
1397 if (copy_in_user(p, p32, (sizeof(*p32) - sizeof(compat_caddr_t))) ||
1398 get_user(udata, &p32->data) ||
1399 put_user(compat_ptr(udata), &p->data))
1400 return -EFAULT;
1401 return proc_control(ps, p);
1402}
1403
1404static int proc_bulk_compat(struct dev_state *ps,
1405 struct usbdevfs_bulktransfer32 __user *p32)
1406{
1407 struct usbdevfs_bulktransfer __user *p;
1408 compat_uint_t n;
1409 compat_caddr_t addr;
1410
1411 p = compat_alloc_user_space(sizeof(*p));
1412
1413 if (get_user(n, &p32->ep) || put_user(n, &p->ep) ||
1414 get_user(n, &p32->len) || put_user(n, &p->len) ||
1415 get_user(n, &p32->timeout) || put_user(n, &p->timeout) ||
1416 get_user(addr, &p32->data) || put_user(compat_ptr(addr), &p->data))
1417 return -EFAULT;
1418
1419 return proc_bulk(ps, p);
1420}
1421static int proc_disconnectsignal_compat(struct dev_state *ps, void __user *arg)
1422{
1423 struct usbdevfs_disconnectsignal32 ds;
1424
1425 if (copy_from_user(&ds, arg, sizeof(ds)))
1426 return -EFAULT;
1427 ps->discsignr = ds.signr;
1428 ps->disccontext = compat_ptr(ds.context);
1429 return 0;
1430}
1391 1431
1392static int get_urb32(struct usbdevfs_urb *kurb, 1432static int get_urb32(struct usbdevfs_urb *kurb,
1393 struct usbdevfs_urb32 __user *uurb) 1433 struct usbdevfs_urb32 __user *uurb)
@@ -1482,6 +1522,7 @@ static int proc_reapurbnonblock_compat(struct dev_state *ps, void __user *arg)
1482 return processcompl_compat(as, (void __user * __user *)arg); 1522 return processcompl_compat(as, (void __user * __user *)arg);
1483} 1523}
1484 1524
1525
1485#endif 1526#endif
1486 1527
1487static int proc_disconnectsignal(struct dev_state *ps, void __user *arg) 1528static int proc_disconnectsignal(struct dev_state *ps, void __user *arg)
@@ -1648,12 +1689,12 @@ static int proc_release_port(struct dev_state *ps, void __user *arg)
1648 * are assuming that somehow the configuration has been prevented from 1689 * are assuming that somehow the configuration has been prevented from
1649 * changing. But there's no mechanism to ensure that... 1690 * changing. But there's no mechanism to ensure that...
1650 */ 1691 */
1651static int usbdev_ioctl(struct inode *inode, struct file *file, 1692static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
1652 unsigned int cmd, unsigned long arg) 1693 void __user *p)
1653{ 1694{
1654 struct dev_state *ps = file->private_data; 1695 struct dev_state *ps = file->private_data;
1696 struct inode *inode = file->f_path.dentry->d_inode;
1655 struct usb_device *dev = ps->dev; 1697 struct usb_device *dev = ps->dev;
1656 void __user *p = (void __user *)arg;
1657 int ret = -ENOTTY; 1698 int ret = -ENOTTY;
1658 1699
1659 if (!(file->f_mode & FMODE_WRITE)) 1700 if (!(file->f_mode & FMODE_WRITE))
@@ -1726,6 +1767,24 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
1726 break; 1767 break;
1727 1768
1728#ifdef CONFIG_COMPAT 1769#ifdef CONFIG_COMPAT
1770 case USBDEVFS_CONTROL32:
1771 snoop(&dev->dev, "%s: CONTROL32\n", __func__);
1772 ret = proc_control_compat(ps, p);
1773 if (ret >= 0)
1774 inode->i_mtime = CURRENT_TIME;
1775 break;
1776
1777 case USBDEVFS_BULK32:
1778 snoop(&dev->dev, "%s: BULK32\n", __func__);
1779 ret = proc_bulk_compat(ps, p);
1780 if (ret >= 0)
1781 inode->i_mtime = CURRENT_TIME;
1782 break;
1783
1784 case USBDEVFS_DISCSIGNAL32:
1785 snoop(&dev->dev, "%s: DISCSIGNAL32\n", __func__);
1786 ret = proc_disconnectsignal_compat(ps, p);
1787 break;
1729 1788
1730 case USBDEVFS_SUBMITURB32: 1789 case USBDEVFS_SUBMITURB32:
1731 snoop(&dev->dev, "%s: SUBMITURB32\n", __func__); 1790 snoop(&dev->dev, "%s: SUBMITURB32\n", __func__);
@@ -1745,7 +1804,7 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
1745 break; 1804 break;
1746 1805
1747 case USBDEVFS_IOCTL32: 1806 case USBDEVFS_IOCTL32:
1748 snoop(&dev->dev, "%s: IOCTL\n", __func__); 1807 snoop(&dev->dev, "%s: IOCTL32\n", __func__);
1749 ret = proc_ioctl_compat(ps, ptr_to_compat(p)); 1808 ret = proc_ioctl_compat(ps, ptr_to_compat(p));
1750 break; 1809 break;
1751#endif 1810#endif
@@ -1801,6 +1860,32 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
1801 return ret; 1860 return ret;
1802} 1861}
1803 1862
1863static long usbdev_ioctl(struct file *file, unsigned int cmd,
1864 unsigned long arg)
1865{
1866 int ret;
1867
1868 lock_kernel();
1869 ret = usbdev_do_ioctl(file, cmd, (void __user *)arg);
1870 unlock_kernel();
1871
1872 return ret;
1873}
1874
1875#ifdef CONFIG_COMPAT
1876static long usbdev_compat_ioctl(struct file *file, unsigned int cmd,
1877 unsigned long arg)
1878{
1879 int ret;
1880
1881 lock_kernel();
1882 ret = usbdev_do_ioctl(file, cmd, compat_ptr(arg));
1883 unlock_kernel();
1884
1885 return ret;
1886}
1887#endif
1888
1804/* No kernel lock - fine */ 1889/* No kernel lock - fine */
1805static unsigned int usbdev_poll(struct file *file, 1890static unsigned int usbdev_poll(struct file *file,
1806 struct poll_table_struct *wait) 1891 struct poll_table_struct *wait)
@@ -1817,13 +1902,16 @@ static unsigned int usbdev_poll(struct file *file,
1817} 1902}
1818 1903
1819const struct file_operations usbdev_file_operations = { 1904const struct file_operations usbdev_file_operations = {
1820 .owner = THIS_MODULE, 1905 .owner = THIS_MODULE,
1821 .llseek = usbdev_lseek, 1906 .llseek = usbdev_lseek,
1822 .read = usbdev_read, 1907 .read = usbdev_read,
1823 .poll = usbdev_poll, 1908 .poll = usbdev_poll,
1824 .ioctl = usbdev_ioctl, 1909 .unlocked_ioctl = usbdev_ioctl,
1825 .open = usbdev_open, 1910#ifdef CONFIG_COMPAT
1826 .release = usbdev_release, 1911 .compat_ioctl = usbdev_compat_ioctl,
1912#endif
1913 .open = usbdev_open,
1914 .release = usbdev_release,
1827}; 1915};
1828 1916
1829static void usbdev_remove(struct usb_device *udev) 1917static void usbdev_remove(struct usb_device *udev)
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index bb5fbed89e7f..99c0df1c7ebf 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2131,7 +2131,7 @@ config FB_PRE_INIT_FB
2131 the bootloader. 2131 the bootloader.
2132 2132
2133config FB_MSM 2133config FB_MSM
2134 tristate 2134 tristate "MSM Framebuffer support"
2135 depends on FB && ARCH_MSM 2135 depends on FB && ARCH_MSM
2136 select FB_CFB_FILLRECT 2136 select FB_CFB_FILLRECT
2137 select FB_CFB_COPYAREA 2137 select FB_CFB_COPYAREA
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 2346895b3a77..278020d2449c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -111,43 +111,40 @@
111#include <linux/dvb/frontend.h> 111#include <linux/dvb/frontend.h>
112#include <linux/dvb/video.h> 112#include <linux/dvb/video.h>
113 113
114#include <linux/sort.h>
115
114#ifdef CONFIG_SPARC 116#ifdef CONFIG_SPARC
115#include <asm/fbio.h> 117#include <asm/fbio.h>
116#endif 118#endif
117 119
118static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, 120static int w_long(unsigned int fd, unsigned int cmd,
119 unsigned long arg, struct file *f) 121 compat_ulong_t __user *argp)
120{
121 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
122}
123
124static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
125{ 122{
126 mm_segment_t old_fs = get_fs(); 123 mm_segment_t old_fs = get_fs();
127 int err; 124 int err;
128 unsigned long val; 125 unsigned long val;
129 126
130 set_fs (KERNEL_DS); 127 set_fs (KERNEL_DS);
131 err = sys_ioctl(fd, cmd, (unsigned long)&val); 128 err = sys_ioctl(fd, cmd, (unsigned long)&val);
132 set_fs (old_fs); 129 set_fs (old_fs);
133 if (!err && put_user(val, (u32 __user *)compat_ptr(arg))) 130 if (!err && put_user(val, argp))
134 return -EFAULT; 131 return -EFAULT;
135 return err; 132 return err;
136} 133}
137 134
138static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) 135static int rw_long(unsigned int fd, unsigned int cmd,
136 compat_ulong_t __user *argp)
139{ 137{
140 mm_segment_t old_fs = get_fs(); 138 mm_segment_t old_fs = get_fs();
141 u32 __user *argptr = compat_ptr(arg);
142 int err; 139 int err;
143 unsigned long val; 140 unsigned long val;
144 141
145 if(get_user(val, argptr)) 142 if(get_user(val, argp))
146 return -EFAULT; 143 return -EFAULT;
147 set_fs (KERNEL_DS); 144 set_fs (KERNEL_DS);
148 err = sys_ioctl(fd, cmd, (unsigned long)&val); 145 err = sys_ioctl(fd, cmd, (unsigned long)&val);
149 set_fs (old_fs); 146 set_fs (old_fs);
150 if (!err && put_user(val, argptr)) 147 if (!err && put_user(val, argp))
151 return -EFAULT; 148 return -EFAULT;
152 return err; 149 return err;
153} 150}
@@ -161,7 +158,8 @@ struct compat_video_event {
161 } u; 158 } u;
162}; 159};
163 160
164static int do_video_get_event(unsigned int fd, unsigned int cmd, unsigned long arg) 161static int do_video_get_event(unsigned int fd, unsigned int cmd,
162 struct compat_video_event __user *up)
165{ 163{
166 struct video_event kevent; 164 struct video_event kevent;
167 mm_segment_t old_fs = get_fs(); 165 mm_segment_t old_fs = get_fs();
@@ -172,8 +170,6 @@ static int do_video_get_event(unsigned int fd, unsigned int cmd, unsigned long a
172 set_fs(old_fs); 170 set_fs(old_fs);
173 171
174 if (!err) { 172 if (!err) {
175 struct compat_video_event __user *up = compat_ptr(arg);
176
177 err = put_user(kevent.type, &up->type); 173 err = put_user(kevent.type, &up->type);
178 err |= put_user(kevent.timestamp, &up->timestamp); 174 err |= put_user(kevent.timestamp, &up->timestamp);
179 err |= put_user(kevent.u.size.w, &up->u.size.w); 175 err |= put_user(kevent.u.size.w, &up->u.size.w);
@@ -192,15 +188,14 @@ struct compat_video_still_picture {
192 int32_t size; 188 int32_t size;
193}; 189};
194 190
195static int do_video_stillpicture(unsigned int fd, unsigned int cmd, unsigned long arg) 191static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
192 struct compat_video_still_picture __user *up)
196{ 193{
197 struct compat_video_still_picture __user *up;
198 struct video_still_picture __user *up_native; 194 struct video_still_picture __user *up_native;
199 compat_uptr_t fp; 195 compat_uptr_t fp;
200 int32_t size; 196 int32_t size;
201 int err; 197 int err;
202 198
203 up = (struct compat_video_still_picture __user *) arg;
204 err = get_user(fp, &up->iFrame); 199 err = get_user(fp, &up->iFrame);
205 err |= get_user(size, &up->size); 200 err |= get_user(size, &up->size);
206 if (err) 201 if (err)
@@ -224,14 +219,13 @@ struct compat_video_spu_palette {
224 compat_uptr_t palette; 219 compat_uptr_t palette;
225}; 220};
226 221
227static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned long arg) 222static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
223 struct compat_video_spu_palette __user *up)
228{ 224{
229 struct compat_video_spu_palette __user *up;
230 struct video_spu_palette __user *up_native; 225 struct video_spu_palette __user *up_native;
231 compat_uptr_t palp; 226 compat_uptr_t palp;
232 int length, err; 227 int length, err;
233 228
234 up = (struct compat_video_spu_palette __user *) arg;
235 err = get_user(palp, &up->palette); 229 err = get_user(palp, &up->palette);
236 err |= get_user(length, &up->length); 230 err |= get_user(length, &up->length);
237 231
@@ -299,16 +293,15 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov
299 return 0; 293 return 0;
300} 294}
301 295
302static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 296static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
297 sg_io_hdr32_t __user *sgio32)
303{ 298{
304 sg_io_hdr_t __user *sgio; 299 sg_io_hdr_t __user *sgio;
305 sg_io_hdr32_t __user *sgio32;
306 u16 iovec_count; 300 u16 iovec_count;
307 u32 data; 301 u32 data;
308 void __user *dxferp; 302 void __user *dxferp;
309 int err; 303 int err;
310 304
311 sgio32 = compat_ptr(arg);
312 if (get_user(iovec_count, &sgio32->iovec_count)) 305 if (get_user(iovec_count, &sgio32->iovec_count))
313 return -EFAULT; 306 return -EFAULT;
314 307
@@ -398,11 +391,11 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
398 int unused; 391 int unused;
399}; 392};
400 393
401static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 394static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct
395 compat_sg_req_info __user *o)
402{ 396{
403 int err, i; 397 int err, i;
404 sg_req_info_t __user *r; 398 sg_req_info_t __user *r;
405 struct compat_sg_req_info __user *o = (void __user *)arg;
406 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE); 399 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
407 err = sys_ioctl(fd,cmd,(unsigned long)r); 400 err = sys_ioctl(fd,cmd,(unsigned long)r);
408 if (err < 0) 401 if (err < 0)
@@ -430,9 +423,9 @@ struct sock_fprog32 {
430#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32) 423#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32)
431#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32) 424#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32)
432 425
433static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 426static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
427 struct sock_fprog32 __user *u_fprog32)
434{ 428{
435 struct sock_fprog32 __user *u_fprog32 = compat_ptr(arg);
436 struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog)); 429 struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog));
437 void __user *fptr64; 430 void __user *fptr64;
438 u32 fptr32; 431 u32 fptr32;
@@ -469,15 +462,14 @@ struct ppp_idle32 {
469}; 462};
470#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32) 463#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32)
471 464
472static int ppp_gidle(unsigned int fd, unsigned int cmd, unsigned long arg) 465static int ppp_gidle(unsigned int fd, unsigned int cmd,
466 struct ppp_idle32 __user *idle32)
473{ 467{
474 struct ppp_idle __user *idle; 468 struct ppp_idle __user *idle;
475 struct ppp_idle32 __user *idle32;
476 __kernel_time_t xmit, recv; 469 __kernel_time_t xmit, recv;
477 int err; 470 int err;
478 471
479 idle = compat_alloc_user_space(sizeof(*idle)); 472 idle = compat_alloc_user_space(sizeof(*idle));
480 idle32 = compat_ptr(arg);
481 473
482 err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle); 474 err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle);
483 475
@@ -491,15 +483,14 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd, unsigned long arg)
491 return err; 483 return err;
492} 484}
493 485
494static int ppp_scompress(unsigned int fd, unsigned int cmd, unsigned long arg) 486static int ppp_scompress(unsigned int fd, unsigned int cmd,
487 struct ppp_option_data32 __user *odata32)
495{ 488{
496 struct ppp_option_data __user *odata; 489 struct ppp_option_data __user *odata;
497 struct ppp_option_data32 __user *odata32;
498 __u32 data; 490 __u32 data;
499 void __user *datap; 491 void __user *datap;
500 492
501 odata = compat_alloc_user_space(sizeof(*odata)); 493 odata = compat_alloc_user_space(sizeof(*odata));
502 odata32 = compat_ptr(arg);
503 494
504 if (get_user(data, &odata32->ptr)) 495 if (get_user(data, &odata32->ptr))
505 return -EFAULT; 496 return -EFAULT;
@@ -515,35 +506,6 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd, unsigned long arg)
515 return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata); 506 return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata);
516} 507}
517 508
518static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
519{
520 int err;
521
522 switch (cmd) {
523 case PPPIOCGIDLE32:
524 err = ppp_gidle(fd, cmd, arg);
525 break;
526
527 case PPPIOCSCOMPRESS32:
528 err = ppp_scompress(fd, cmd, arg);
529 break;
530
531 default:
532 do {
533 static int count;
534 if (++count <= 20)
535 printk("ppp_ioctl: Unknown cmd fd(%d) "
536 "cmd(%08x) arg(%08x)\n",
537 (int)fd, (unsigned int)cmd, (unsigned int)arg);
538 } while(0);
539 err = -EINVAL;
540 break;
541 };
542
543 return err;
544}
545
546
547#ifdef CONFIG_BLOCK 509#ifdef CONFIG_BLOCK
548struct mtget32 { 510struct mtget32 {
549 compat_long_t mt_type; 511 compat_long_t mt_type;
@@ -561,7 +523,7 @@ struct mtpos32 {
561}; 523};
562#define MTIOCPOS32 _IOR('m', 3, struct mtpos32) 524#define MTIOCPOS32 _IOR('m', 3, struct mtpos32)
563 525
564static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 526static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
565{ 527{
566 mm_segment_t old_fs = get_fs(); 528 mm_segment_t old_fs = get_fs();
567 struct mtget get; 529 struct mtget get;
@@ -581,15 +543,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
581 kcmd = MTIOCGET; 543 kcmd = MTIOCGET;
582 karg = &get; 544 karg = &get;
583 break; 545 break;
584 default:
585 do {
586 static int count;
587 if (++count <= 20)
588 printk("mt_ioctl: Unknown cmd fd(%d) "
589 "cmd(%08x) arg(%08x)\n",
590 (int)fd, (unsigned int)cmd, (unsigned int)arg);
591 } while(0);
592 return -EINVAL;
593 } 546 }
594 set_fs (KERNEL_DS); 547 set_fs (KERNEL_DS);
595 err = sys_ioctl (fd, kcmd, (unsigned long)karg); 548 err = sys_ioctl (fd, kcmd, (unsigned long)karg);
@@ -598,11 +551,11 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
598 return err; 551 return err;
599 switch (cmd) { 552 switch (cmd) {
600 case MTIOCPOS32: 553 case MTIOCPOS32:
601 upos32 = compat_ptr(arg); 554 upos32 = argp;
602 err = __put_user(pos.mt_blkno, &upos32->mt_blkno); 555 err = __put_user(pos.mt_blkno, &upos32->mt_blkno);
603 break; 556 break;
604 case MTIOCGET32: 557 case MTIOCGET32:
605 umget32 = compat_ptr(arg); 558 umget32 = argp;
606 err = __put_user(get.mt_type, &umget32->mt_type); 559 err = __put_user(get.mt_type, &umget32->mt_type);
607 err |= __put_user(get.mt_resid, &umget32->mt_resid); 560 err |= __put_user(get.mt_resid, &umget32->mt_resid);
608 err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg); 561 err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg);
@@ -617,162 +570,8 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
617 570
618#endif /* CONFIG_BLOCK */ 571#endif /* CONFIG_BLOCK */
619 572
620#ifdef CONFIG_VT 573static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
621 574 compat_uid_t __user *argp)
622static int vt_check(struct file *file)
623{
624 struct tty_struct *tty;
625 struct inode *inode = file->f_path.dentry->d_inode;
626 struct vc_data *vc;
627
628 if (file->f_op->unlocked_ioctl != tty_ioctl)
629 return -EINVAL;
630
631 tty = (struct tty_struct *)file->private_data;
632 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
633 return -EINVAL;
634
635 if (tty->ops->ioctl != vt_ioctl)
636 return -EINVAL;
637
638 vc = (struct vc_data *)tty->driver_data;
639 if (!vc_cons_allocated(vc->vc_num)) /* impossible? */
640 return -ENOIOCTLCMD;
641
642 /*
643 * To have permissions to do most of the vt ioctls, we either have
644 * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG.
645 */
646 if (current->signal->tty == tty || capable(CAP_SYS_TTY_CONFIG))
647 return 1;
648 return 0;
649}
650
651struct consolefontdesc32 {
652 unsigned short charcount; /* characters in font (256 or 512) */
653 unsigned short charheight; /* scan lines per character (1-32) */
654 compat_caddr_t chardata; /* font data in expanded form */
655};
656
657static int do_fontx_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
658{
659 struct consolefontdesc32 __user *user_cfd = compat_ptr(arg);
660 struct console_font_op op;
661 compat_caddr_t data;
662 int i, perm;
663
664 perm = vt_check(file);
665 if (perm < 0) return perm;
666
667 switch (cmd) {
668 case PIO_FONTX:
669 if (!perm)
670 return -EPERM;
671 op.op = KD_FONT_OP_SET;
672 op.flags = 0;
673 op.width = 8;
674 if (get_user(op.height, &user_cfd->charheight) ||
675 get_user(op.charcount, &user_cfd->charcount) ||
676 get_user(data, &user_cfd->chardata))
677 return -EFAULT;
678 op.data = compat_ptr(data);
679 return con_font_op(vc_cons[fg_console].d, &op);
680 case GIO_FONTX:
681 op.op = KD_FONT_OP_GET;
682 op.flags = 0;
683 op.width = 8;
684 if (get_user(op.height, &user_cfd->charheight) ||
685 get_user(op.charcount, &user_cfd->charcount) ||
686 get_user(data, &user_cfd->chardata))
687 return -EFAULT;
688 if (!data)
689 return 0;
690 op.data = compat_ptr(data);
691 i = con_font_op(vc_cons[fg_console].d, &op);
692 if (i)
693 return i;
694 if (put_user(op.height, &user_cfd->charheight) ||
695 put_user(op.charcount, &user_cfd->charcount) ||
696 put_user((compat_caddr_t)(unsigned long)op.data,
697 &user_cfd->chardata))
698 return -EFAULT;
699 return 0;
700 }
701 return -EINVAL;
702}
703
704struct console_font_op32 {
705 compat_uint_t op; /* operation code KD_FONT_OP_* */
706 compat_uint_t flags; /* KD_FONT_FLAG_* */
707 compat_uint_t width, height; /* font size */
708 compat_uint_t charcount;
709 compat_caddr_t data; /* font data with height fixed to 32 */
710};
711
712static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
713{
714 struct console_font_op op;
715 struct console_font_op32 __user *fontop = compat_ptr(arg);
716 int perm = vt_check(file), i;
717 struct vc_data *vc;
718
719 if (perm < 0) return perm;
720
721 if (copy_from_user(&op, fontop, sizeof(struct console_font_op32)))
722 return -EFAULT;
723 if (!perm && op.op != KD_FONT_OP_GET)
724 return -EPERM;
725 op.data = compat_ptr(((struct console_font_op32 *)&op)->data);
726 op.flags |= KD_FONT_FLAG_OLD;
727 vc = ((struct tty_struct *)file->private_data)->driver_data;
728 i = con_font_op(vc, &op);
729 if (i)
730 return i;
731 ((struct console_font_op32 *)&op)->data = (unsigned long)op.data;
732 if (copy_to_user(fontop, &op, sizeof(struct console_font_op32)))
733 return -EFAULT;
734 return 0;
735}
736
737struct unimapdesc32 {
738 unsigned short entry_ct;
739 compat_caddr_t entries;
740};
741
742static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
743{
744 struct unimapdesc32 tmp;
745 struct unimapdesc32 __user *user_ud = compat_ptr(arg);
746 int perm = vt_check(file);
747 struct vc_data *vc;
748
749 if (perm < 0)
750 return perm;
751 if (copy_from_user(&tmp, user_ud, sizeof tmp))
752 return -EFAULT;
753 if (tmp.entries)
754 if (!access_ok(VERIFY_WRITE, compat_ptr(tmp.entries),
755 tmp.entry_ct*sizeof(struct unipair)))
756 return -EFAULT;
757 vc = ((struct tty_struct *)file->private_data)->driver_data;
758 switch (cmd) {
759 case PIO_UNIMAP:
760 if (!perm)
761 return -EPERM;
762 return con_set_unimap(vc, tmp.entry_ct,
763 compat_ptr(tmp.entries));
764 case GIO_UNIMAP:
765 if (!perm && fg_console != vc->vc_num)
766 return -EPERM;
767 return con_get_unimap(vc, tmp.entry_ct, &(user_ud->entry_ct),
768 compat_ptr(tmp.entries));
769 }
770 return 0;
771}
772
773#endif /* CONFIG_VT */
774
775static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long arg)
776{ 575{
777 mm_segment_t old_fs = get_fs(); 576 mm_segment_t old_fs = get_fs();
778 __kernel_uid_t kuid; 577 __kernel_uid_t kuid;
@@ -785,20 +584,15 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long a
785 set_fs(old_fs); 584 set_fs(old_fs);
786 585
787 if (err >= 0) 586 if (err >= 0)
788 err = put_user(kuid, (compat_uid_t __user *)compat_ptr(arg)); 587 err = put_user(kuid, argp);
789 588
790 return err; 589 return err;
791} 590}
792 591
793static __used int 592static int ioc_settimeout(unsigned int fd, unsigned int cmd,
794ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) 593 compat_ulong_t __user *argp)
795{
796 return -EINVAL;
797}
798
799static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
800{ 594{
801 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); 595 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
802} 596}
803 597
804/* Bluetooth ioctls */ 598/* Bluetooth ioctls */
@@ -856,7 +650,8 @@ static int set_raw32_request(struct raw_config_request *req, struct raw32_config
856 return ret ? -EFAULT : 0; 650 return ret ? -EFAULT : 0;
857} 651}
858 652
859static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 653static int raw_ioctl(unsigned fd, unsigned cmd,
654 struct raw32_config_request __user *user_req)
860{ 655{
861 int ret; 656 int ret;
862 657
@@ -864,7 +659,6 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
864 case RAW_SETBIND: 659 case RAW_SETBIND:
865 case RAW_GETBIND: { 660 case RAW_GETBIND: {
866 struct raw_config_request req; 661 struct raw_config_request req;
867 struct raw32_config_request __user *user_req = compat_ptr(arg);
868 mm_segment_t oldfs = get_fs(); 662 mm_segment_t oldfs = get_fs();
869 663
870 if ((ret = get_raw32_request(&req, user_req))) 664 if ((ret = get_raw32_request(&req, user_req)))
@@ -879,9 +673,6 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
879 } 673 }
880 break; 674 break;
881 } 675 }
882 default:
883 ret = sys_ioctl(fd, cmd, arg);
884 break;
885 } 676 }
886 return ret; 677 return ret;
887} 678}
@@ -909,11 +700,11 @@ struct serial_struct32 {
909 compat_int_t reserved[1]; 700 compat_int_t reserved[1];
910}; 701};
911 702
912static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 703static int serial_struct_ioctl(unsigned fd, unsigned cmd,
704 struct serial_struct32 __user *ss32)
913{ 705{
914 typedef struct serial_struct SS; 706 typedef struct serial_struct SS;
915 typedef struct serial_struct32 SS32; 707 typedef struct serial_struct32 SS32;
916 struct serial_struct32 __user *ss32 = compat_ptr(arg);
917 int err; 708 int err;
918 struct serial_struct ss; 709 struct serial_struct ss;
919 mm_segment_t oldseg = get_fs(); 710 mm_segment_t oldseg = get_fs();
@@ -951,96 +742,6 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
951 return err; 742 return err;
952} 743}
953 744
954struct usbdevfs_ctrltransfer32 {
955 u8 bRequestType;
956 u8 bRequest;
957 u16 wValue;
958 u16 wIndex;
959 u16 wLength;
960 u32 timeout; /* in milliseconds */
961 compat_caddr_t data;
962};
963
964#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
965
966static int do_usbdevfs_control(unsigned int fd, unsigned int cmd, unsigned long arg)
967{
968 struct usbdevfs_ctrltransfer32 __user *p32 = compat_ptr(arg);
969 struct usbdevfs_ctrltransfer __user *p;
970 __u32 udata;
971 p = compat_alloc_user_space(sizeof(*p));
972 if (copy_in_user(p, p32, (sizeof(*p32) - sizeof(compat_caddr_t))) ||
973 get_user(udata, &p32->data) ||
974 put_user(compat_ptr(udata), &p->data))
975 return -EFAULT;
976 return sys_ioctl(fd, USBDEVFS_CONTROL, (unsigned long)p);
977}
978
979
980struct usbdevfs_bulktransfer32 {
981 compat_uint_t ep;
982 compat_uint_t len;
983 compat_uint_t timeout; /* in milliseconds */
984 compat_caddr_t data;
985};
986
987#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
988
989static int do_usbdevfs_bulk(unsigned int fd, unsigned int cmd, unsigned long arg)
990{
991 struct usbdevfs_bulktransfer32 __user *p32 = compat_ptr(arg);
992 struct usbdevfs_bulktransfer __user *p;
993 compat_uint_t n;
994 compat_caddr_t addr;
995
996 p = compat_alloc_user_space(sizeof(*p));
997
998 if (get_user(n, &p32->ep) || put_user(n, &p->ep) ||
999 get_user(n, &p32->len) || put_user(n, &p->len) ||
1000 get_user(n, &p32->timeout) || put_user(n, &p->timeout) ||
1001 get_user(addr, &p32->data) || put_user(compat_ptr(addr), &p->data))
1002 return -EFAULT;
1003
1004 return sys_ioctl(fd, USBDEVFS_BULK, (unsigned long)p);
1005}
1006
1007
1008/*
1009 * USBDEVFS_SUBMITURB, USBDEVFS_REAPURB and USBDEVFS_REAPURBNDELAY
1010 * are handled in usbdevfs core. -Christopher Li
1011 */
1012
1013struct usbdevfs_disconnectsignal32 {
1014 compat_int_t signr;
1015 compat_caddr_t context;
1016};
1017
1018#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
1019
1020static int do_usbdevfs_discsignal(unsigned int fd, unsigned int cmd, unsigned long arg)
1021{
1022 struct usbdevfs_disconnectsignal kdis;
1023 struct usbdevfs_disconnectsignal32 __user *udis;
1024 mm_segment_t old_fs;
1025 u32 uctx;
1026 int err;
1027
1028 udis = compat_ptr(arg);
1029
1030 if (get_user(kdis.signr, &udis->signr) ||
1031 __get_user(uctx, &udis->context))
1032 return -EFAULT;
1033
1034 kdis.context = compat_ptr(uctx);
1035
1036 old_fs = get_fs();
1037 set_fs(KERNEL_DS);
1038 err = sys_ioctl(fd, USBDEVFS_DISCSIGNAL, (unsigned long) &kdis);
1039 set_fs(old_fs);
1040
1041 return err;
1042}
1043
1044/* 745/*
1045 * I2C layer ioctls 746 * I2C layer ioctls
1046 */ 747 */
@@ -1069,9 +770,9 @@ struct i2c_rdwr_aligned {
1069 struct i2c_msg msgs[0]; 770 struct i2c_msg msgs[0];
1070}; 771};
1071 772
1072static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 773static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
774 struct i2c_rdwr_ioctl_data32 __user *udata)
1073{ 775{
1074 struct i2c_rdwr_ioctl_data32 __user *udata = compat_ptr(arg);
1075 struct i2c_rdwr_aligned __user *tdata; 776 struct i2c_rdwr_aligned __user *tdata;
1076 struct i2c_msg __user *tmsgs; 777 struct i2c_msg __user *tmsgs;
1077 struct i2c_msg32 __user *umsgs; 778 struct i2c_msg32 __user *umsgs;
@@ -1105,10 +806,10 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
1105 return sys_ioctl(fd, cmd, (unsigned long)tdata); 806 return sys_ioctl(fd, cmd, (unsigned long)tdata);
1106} 807}
1107 808
1108static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 809static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
810 struct i2c_smbus_ioctl_data32 __user *udata)
1109{ 811{
1110 struct i2c_smbus_ioctl_data __user *tdata; 812 struct i2c_smbus_ioctl_data __user *tdata;
1111 struct i2c_smbus_ioctl_data32 __user *udata;
1112 compat_caddr_t datap; 813 compat_caddr_t datap;
1113 814
1114 tdata = compat_alloc_user_space(sizeof(*tdata)); 815 tdata = compat_alloc_user_space(sizeof(*tdata));
@@ -1117,7 +818,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1117 if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata))) 818 if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata)))
1118 return -EFAULT; 819 return -EFAULT;
1119 820
1120 udata = compat_ptr(arg);
1121 if (!access_ok(VERIFY_READ, udata, sizeof(*udata))) 821 if (!access_ok(VERIFY_READ, udata, sizeof(*udata)))
1122 return -EFAULT; 822 return -EFAULT;
1123 823
@@ -1137,7 +837,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1137#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) 837#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
1138#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t) 838#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
1139 839
1140static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 840static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp)
1141{ 841{
1142 mm_segment_t oldfs = get_fs(); 842 mm_segment_t oldfs = get_fs();
1143 compat_ulong_t val32; 843 compat_ulong_t val32;
@@ -1155,29 +855,14 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1155 if (ret) 855 if (ret)
1156 return ret; 856 return ret;
1157 val32 = kval; 857 val32 = kval;
1158 return put_user(val32, (unsigned int __user *)arg); 858 return put_user(val32, (unsigned int __user *)argp);
1159 case RTC_IRQP_SET32: 859 case RTC_IRQP_SET32:
1160 return sys_ioctl(fd, RTC_IRQP_SET, arg); 860 return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp);
1161 case RTC_EPOCH_SET32: 861 case RTC_EPOCH_SET32:
1162 return sys_ioctl(fd, RTC_EPOCH_SET, arg); 862 return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp);
1163 default:
1164 /* unreached */
1165 return -ENOIOCTLCMD;
1166 } 863 }
1167}
1168 864
1169static int 865 return -ENOIOCTLCMD;
1170lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1171{
1172 struct compat_timeval __user *tc = (struct compat_timeval __user *)arg;
1173 struct timeval __user *tn = compat_alloc_user_space(sizeof(struct timeval));
1174 struct timeval ts;
1175 if (get_user(ts.tv_sec, &tc->tv_sec) ||
1176 get_user(ts.tv_usec, &tc->tv_usec) ||
1177 put_user(ts.tv_sec, &tn->tv_sec) ||
1178 put_user(ts.tv_usec, &tn->tv_usec))
1179 return -EFAULT;
1180 return sys_ioctl(fd, cmd, (unsigned long)tn);
1181} 866}
1182 867
1183/* on ia32 l_start is on a 32-bit boundary */ 868/* on ia32 l_start is on a 32-bit boundary */
@@ -1197,9 +882,9 @@ struct space_resv_32 {
1197#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) 882#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32)
1198 883
1199/* just account for different alignment */ 884/* just account for different alignment */
1200static int compat_ioctl_preallocate(struct file *file, unsigned long arg) 885static int compat_ioctl_preallocate(struct file *file,
886 struct space_resv_32 __user *p32)
1201{ 887{
1202 struct space_resv_32 __user *p32 = compat_ptr(arg);
1203 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p)); 888 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p));
1204 889
1205 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || 890 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
@@ -1215,27 +900,13 @@ static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
1215} 900}
1216#endif 901#endif
1217 902
903/*
904 * simple reversible transform to make our table more evenly
905 * distributed after sorting.
906 */
907#define XFORM(i) (((i) ^ ((i) << 27) ^ ((i) << 17)) & 0xffffffff)
1218 908
1219typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, 909#define COMPATIBLE_IOCTL(cmd) XFORM(cmd),
1220 unsigned long, struct file *);
1221
1222struct ioctl_trans {
1223 unsigned long cmd;
1224 ioctl_trans_handler_t handler;
1225 struct ioctl_trans *next;
1226};
1227
1228#define HANDLE_IOCTL(cmd,handler) \
1229 { (cmd), (ioctl_trans_handler_t)(handler) },
1230
1231/* pointer to compatible structure or no argument */
1232#define COMPATIBLE_IOCTL(cmd) \
1233 { (cmd), do_ioctl32_pointer },
1234
1235/* argument is an unsigned long integer, not a pointer */
1236#define ULONG_IOCTL(cmd) \
1237 { (cmd), (ioctl_trans_handler_t)sys_ioctl },
1238
1239/* ioctl should not be warned about even if it's not implemented. 910/* ioctl should not be warned about even if it's not implemented.
1240 Valid reasons to use this: 911 Valid reasons to use this:
1241 - It is implemented with ->compat_ioctl on some device, but programs 912 - It is implemented with ->compat_ioctl on some device, but programs
@@ -1245,7 +916,7 @@ struct ioctl_trans {
1245 Most other reasons are not valid. */ 916 Most other reasons are not valid. */
1246#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd) 917#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd)
1247 918
1248static struct ioctl_trans ioctl_start[] = { 919static unsigned int ioctl_pointer[] = {
1249/* compatible ioctls first */ 920/* compatible ioctls first */
1250COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ 921COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */
1251COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ 922COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */
@@ -1256,7 +927,6 @@ COMPATIBLE_IOCTL(TCSETA)
1256COMPATIBLE_IOCTL(TCSETAW) 927COMPATIBLE_IOCTL(TCSETAW)
1257COMPATIBLE_IOCTL(TCSETAF) 928COMPATIBLE_IOCTL(TCSETAF)
1258COMPATIBLE_IOCTL(TCSBRK) 929COMPATIBLE_IOCTL(TCSBRK)
1259ULONG_IOCTL(TCSBRKP)
1260COMPATIBLE_IOCTL(TCXONC) 930COMPATIBLE_IOCTL(TCXONC)
1261COMPATIBLE_IOCTL(TCFLSH) 931COMPATIBLE_IOCTL(TCFLSH)
1262COMPATIBLE_IOCTL(TCGETS) 932COMPATIBLE_IOCTL(TCGETS)
@@ -1266,7 +936,6 @@ COMPATIBLE_IOCTL(TCSETSF)
1266COMPATIBLE_IOCTL(TIOCLINUX) 936COMPATIBLE_IOCTL(TIOCLINUX)
1267COMPATIBLE_IOCTL(TIOCSBRK) 937COMPATIBLE_IOCTL(TIOCSBRK)
1268COMPATIBLE_IOCTL(TIOCCBRK) 938COMPATIBLE_IOCTL(TIOCCBRK)
1269ULONG_IOCTL(TIOCMIWAIT)
1270COMPATIBLE_IOCTL(TIOCGICOUNT) 939COMPATIBLE_IOCTL(TIOCGICOUNT)
1271/* Little t */ 940/* Little t */
1272COMPATIBLE_IOCTL(TIOCGETD) 941COMPATIBLE_IOCTL(TIOCGETD)
@@ -1288,7 +957,6 @@ COMPATIBLE_IOCTL(TIOCSTI)
1288COMPATIBLE_IOCTL(TIOCOUTQ) 957COMPATIBLE_IOCTL(TIOCOUTQ)
1289COMPATIBLE_IOCTL(TIOCSPGRP) 958COMPATIBLE_IOCTL(TIOCSPGRP)
1290COMPATIBLE_IOCTL(TIOCGPGRP) 959COMPATIBLE_IOCTL(TIOCGPGRP)
1291ULONG_IOCTL(TIOCSCTTY)
1292COMPATIBLE_IOCTL(TIOCGPTN) 960COMPATIBLE_IOCTL(TIOCGPTN)
1293COMPATIBLE_IOCTL(TIOCSPTLCK) 961COMPATIBLE_IOCTL(TIOCSPTLCK)
1294COMPATIBLE_IOCTL(TIOCSERGETLSR) 962COMPATIBLE_IOCTL(TIOCSERGETLSR)
@@ -1319,36 +987,21 @@ COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
1319COMPATIBLE_IOCTL(RAID_AUTORUN) 987COMPATIBLE_IOCTL(RAID_AUTORUN)
1320COMPATIBLE_IOCTL(CLEAR_ARRAY) 988COMPATIBLE_IOCTL(CLEAR_ARRAY)
1321COMPATIBLE_IOCTL(ADD_NEW_DISK) 989COMPATIBLE_IOCTL(ADD_NEW_DISK)
1322ULONG_IOCTL(HOT_REMOVE_DISK)
1323COMPATIBLE_IOCTL(SET_ARRAY_INFO) 990COMPATIBLE_IOCTL(SET_ARRAY_INFO)
1324COMPATIBLE_IOCTL(SET_DISK_INFO) 991COMPATIBLE_IOCTL(SET_DISK_INFO)
1325COMPATIBLE_IOCTL(WRITE_RAID_INFO) 992COMPATIBLE_IOCTL(WRITE_RAID_INFO)
1326COMPATIBLE_IOCTL(UNPROTECT_ARRAY) 993COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
1327COMPATIBLE_IOCTL(PROTECT_ARRAY) 994COMPATIBLE_IOCTL(PROTECT_ARRAY)
1328ULONG_IOCTL(HOT_ADD_DISK)
1329ULONG_IOCTL(SET_DISK_FAULTY)
1330COMPATIBLE_IOCTL(RUN_ARRAY) 995COMPATIBLE_IOCTL(RUN_ARRAY)
1331COMPATIBLE_IOCTL(STOP_ARRAY) 996COMPATIBLE_IOCTL(STOP_ARRAY)
1332COMPATIBLE_IOCTL(STOP_ARRAY_RO) 997COMPATIBLE_IOCTL(STOP_ARRAY_RO)
1333COMPATIBLE_IOCTL(RESTART_ARRAY_RW) 998COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
1334COMPATIBLE_IOCTL(GET_BITMAP_FILE) 999COMPATIBLE_IOCTL(GET_BITMAP_FILE)
1335ULONG_IOCTL(SET_BITMAP_FILE)
1336/* Big K */
1337COMPATIBLE_IOCTL(PIO_FONT)
1338COMPATIBLE_IOCTL(GIO_FONT)
1339COMPATIBLE_IOCTL(PIO_CMAP)
1340COMPATIBLE_IOCTL(GIO_CMAP)
1341ULONG_IOCTL(KDSIGACCEPT)
1342COMPATIBLE_IOCTL(KDGETKEYCODE) 1000COMPATIBLE_IOCTL(KDGETKEYCODE)
1343COMPATIBLE_IOCTL(KDSETKEYCODE) 1001COMPATIBLE_IOCTL(KDSETKEYCODE)
1344ULONG_IOCTL(KIOCSOUND)
1345ULONG_IOCTL(KDMKTONE)
1346COMPATIBLE_IOCTL(KDGKBTYPE) 1002COMPATIBLE_IOCTL(KDGKBTYPE)
1347ULONG_IOCTL(KDSETMODE)
1348COMPATIBLE_IOCTL(KDGETMODE) 1003COMPATIBLE_IOCTL(KDGETMODE)
1349ULONG_IOCTL(KDSKBMODE)
1350COMPATIBLE_IOCTL(KDGKBMODE) 1004COMPATIBLE_IOCTL(KDGKBMODE)
1351ULONG_IOCTL(KDSKBMETA)
1352COMPATIBLE_IOCTL(KDGKBMETA) 1005COMPATIBLE_IOCTL(KDGKBMETA)
1353COMPATIBLE_IOCTL(KDGKBENT) 1006COMPATIBLE_IOCTL(KDGKBENT)
1354COMPATIBLE_IOCTL(KDSKBENT) 1007COMPATIBLE_IOCTL(KDSKBENT)
@@ -1358,15 +1011,7 @@ COMPATIBLE_IOCTL(KDGKBDIACR)
1358COMPATIBLE_IOCTL(KDSKBDIACR) 1011COMPATIBLE_IOCTL(KDSKBDIACR)
1359COMPATIBLE_IOCTL(KDKBDREP) 1012COMPATIBLE_IOCTL(KDKBDREP)
1360COMPATIBLE_IOCTL(KDGKBLED) 1013COMPATIBLE_IOCTL(KDGKBLED)
1361ULONG_IOCTL(KDSKBLED)
1362COMPATIBLE_IOCTL(KDGETLED) 1014COMPATIBLE_IOCTL(KDGETLED)
1363ULONG_IOCTL(KDSETLED)
1364COMPATIBLE_IOCTL(GIO_SCRNMAP)
1365COMPATIBLE_IOCTL(PIO_SCRNMAP)
1366COMPATIBLE_IOCTL(GIO_UNISCRNMAP)
1367COMPATIBLE_IOCTL(PIO_UNISCRNMAP)
1368COMPATIBLE_IOCTL(PIO_FONTRESET)
1369COMPATIBLE_IOCTL(PIO_UNIMAPCLR)
1370#ifdef CONFIG_BLOCK 1015#ifdef CONFIG_BLOCK
1371/* Big S */ 1016/* Big S */
1372COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) 1017COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN)
@@ -1378,20 +1023,6 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1378COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1023COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1379COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1024COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1380#endif 1025#endif
1381/* Big V */
1382COMPATIBLE_IOCTL(VT_SETMODE)
1383COMPATIBLE_IOCTL(VT_GETMODE)
1384COMPATIBLE_IOCTL(VT_GETSTATE)
1385COMPATIBLE_IOCTL(VT_OPENQRY)
1386ULONG_IOCTL(VT_ACTIVATE)
1387ULONG_IOCTL(VT_WAITACTIVE)
1388ULONG_IOCTL(VT_RELDISP)
1389ULONG_IOCTL(VT_DISALLOCATE)
1390COMPATIBLE_IOCTL(VT_RESIZE)
1391COMPATIBLE_IOCTL(VT_RESIZEX)
1392COMPATIBLE_IOCTL(VT_LOCKSWITCH)
1393COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
1394COMPATIBLE_IOCTL(VT_GETHIFONTMASK)
1395/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1026/* Little p (/dev/rtc, /dev/envctrl, etc.) */
1396COMPATIBLE_IOCTL(RTC_AIE_ON) 1027COMPATIBLE_IOCTL(RTC_AIE_ON)
1397COMPATIBLE_IOCTL(RTC_AIE_OFF) 1028COMPATIBLE_IOCTL(RTC_AIE_OFF)
@@ -1420,11 +1051,12 @@ COMPATIBLE_IOCTL(MTIOCTOP)
1420/* Socket level stuff */ 1051/* Socket level stuff */
1421COMPATIBLE_IOCTL(FIOQSIZE) 1052COMPATIBLE_IOCTL(FIOQSIZE)
1422#ifdef CONFIG_BLOCK 1053#ifdef CONFIG_BLOCK
1054/* loop */
1055IGNORE_IOCTL(LOOP_CLR_FD)
1423/* SG stuff */ 1056/* SG stuff */
1424COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 1057COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
1425COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 1058COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
1426COMPATIBLE_IOCTL(SG_EMULATED_HOST) 1059COMPATIBLE_IOCTL(SG_EMULATED_HOST)
1427ULONG_IOCTL(SG_SET_TRANSFORM)
1428COMPATIBLE_IOCTL(SG_GET_TRANSFORM) 1060COMPATIBLE_IOCTL(SG_GET_TRANSFORM)
1429COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE) 1061COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE)
1430COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE) 1062COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE)
@@ -1478,8 +1110,6 @@ COMPATIBLE_IOCTL(PPPIOCGCHAN)
1478/* PPPOX */ 1110/* PPPOX */
1479COMPATIBLE_IOCTL(PPPOEIOCSFWD) 1111COMPATIBLE_IOCTL(PPPOEIOCSFWD)
1480COMPATIBLE_IOCTL(PPPOEIOCDFWD) 1112COMPATIBLE_IOCTL(PPPOEIOCDFWD)
1481/* LP */
1482COMPATIBLE_IOCTL(LPGETSTATUS)
1483/* ppdev */ 1113/* ppdev */
1484COMPATIBLE_IOCTL(PPSETMODE) 1114COMPATIBLE_IOCTL(PPSETMODE)
1485COMPATIBLE_IOCTL(PPRSTATUS) 1115COMPATIBLE_IOCTL(PPRSTATUS)
@@ -1661,8 +1291,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
1661COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) 1291COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
1662COMPATIBLE_IOCTL(OSS_GETVERSION) 1292COMPATIBLE_IOCTL(OSS_GETVERSION)
1663/* AUTOFS */ 1293/* AUTOFS */
1664ULONG_IOCTL(AUTOFS_IOC_READY)
1665ULONG_IOCTL(AUTOFS_IOC_FAIL)
1666COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC) 1294COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
1667COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) 1295COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
1668COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 1296COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
@@ -1755,30 +1383,11 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
1755COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO) 1383COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
1756COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM) 1384COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
1757COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE) 1385COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
1758/* USB */
1759COMPATIBLE_IOCTL(USBDEVFS_RESETEP)
1760COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE)
1761COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION)
1762COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER)
1763COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB)
1764COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE)
1765COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE)
1766COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO)
1767COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO)
1768COMPATIBLE_IOCTL(USBDEVFS_RESET)
1769COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
1770COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
1771COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
1772COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
1773/* NBD */ 1386/* NBD */
1774ULONG_IOCTL(NBD_SET_SOCK)
1775ULONG_IOCTL(NBD_SET_BLKSIZE)
1776ULONG_IOCTL(NBD_SET_SIZE)
1777COMPATIBLE_IOCTL(NBD_DO_IT) 1387COMPATIBLE_IOCTL(NBD_DO_IT)
1778COMPATIBLE_IOCTL(NBD_CLEAR_SOCK) 1388COMPATIBLE_IOCTL(NBD_CLEAR_SOCK)
1779COMPATIBLE_IOCTL(NBD_CLEAR_QUE) 1389COMPATIBLE_IOCTL(NBD_CLEAR_QUE)
1780COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) 1390COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
1781ULONG_IOCTL(NBD_SET_SIZE_BLOCKS)
1782COMPATIBLE_IOCTL(NBD_DISCONNECT) 1391COMPATIBLE_IOCTL(NBD_DISCONNECT)
1783/* i2c */ 1392/* i2c */
1784COMPATIBLE_IOCTL(I2C_SLAVE) 1393COMPATIBLE_IOCTL(I2C_SLAVE)
@@ -1878,42 +1487,6 @@ COMPATIBLE_IOCTL(JSIOCGAXES)
1878COMPATIBLE_IOCTL(JSIOCGBUTTONS) 1487COMPATIBLE_IOCTL(JSIOCGBUTTONS)
1879COMPATIBLE_IOCTL(JSIOCGNAME(0)) 1488COMPATIBLE_IOCTL(JSIOCGNAME(0))
1880 1489
1881/* now things that need handlers */
1882#ifdef CONFIG_BLOCK
1883HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
1884HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
1885#endif
1886HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
1887HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
1888HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
1889HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
1890#ifdef CONFIG_BLOCK
1891HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
1892HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
1893#endif
1894#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
1895HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
1896#ifdef CONFIG_VT
1897HANDLE_IOCTL(PIO_FONTX, do_fontx_ioctl)
1898HANDLE_IOCTL(GIO_FONTX, do_fontx_ioctl)
1899HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
1900HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
1901HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
1902#endif
1903/* One SMB ioctl needs translations. */
1904#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1905HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
1906/* block stuff */
1907#ifdef CONFIG_BLOCK
1908/* loop */
1909IGNORE_IOCTL(LOOP_CLR_FD)
1910/* Raw devices */
1911HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
1912HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
1913#endif
1914/* Serial */
1915HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
1916HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
1917#ifdef TIOCGLTC 1490#ifdef TIOCGLTC
1918COMPATIBLE_IOCTL(TIOCGLTC) 1491COMPATIBLE_IOCTL(TIOCGLTC)
1919COMPATIBLE_IOCTL(TIOCSLTC) 1492COMPATIBLE_IOCTL(TIOCSLTC)
@@ -1928,39 +1501,6 @@ COMPATIBLE_IOCTL(TIOCSLTC)
1928COMPATIBLE_IOCTL(TIOCSTART) 1501COMPATIBLE_IOCTL(TIOCSTART)
1929COMPATIBLE_IOCTL(TIOCSTOP) 1502COMPATIBLE_IOCTL(TIOCSTOP)
1930#endif 1503#endif
1931/* Usbdevfs */
1932HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control)
1933HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk)
1934HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal)
1935COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
1936/* i2c */
1937HANDLE_IOCTL(I2C_FUNCS, w_long)
1938HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
1939HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
1940/* Not implemented in the native kernel */
1941HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
1942HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
1943HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
1944HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl)
1945
1946/* dvb */
1947HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
1948HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
1949HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
1950
1951/* parport */
1952COMPATIBLE_IOCTL(LPTIME)
1953COMPATIBLE_IOCTL(LPCHAR)
1954COMPATIBLE_IOCTL(LPABORTOPEN)
1955COMPATIBLE_IOCTL(LPCAREFUL)
1956COMPATIBLE_IOCTL(LPWAIT)
1957COMPATIBLE_IOCTL(LPSETIRQ)
1958COMPATIBLE_IOCTL(LPGETSTATUS)
1959COMPATIBLE_IOCTL(LPGETSTATUS)
1960COMPATIBLE_IOCTL(LPRESET)
1961/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
1962COMPATIBLE_IOCTL(LPGETFLAGS)
1963HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
1964 1504
1965/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl, 1505/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
1966 but we don't want warnings on other file systems. So declare 1506 but we don't want warnings on other file systems. So declare
@@ -1988,12 +1528,110 @@ IGNORE_IOCTL(FBIOGCURSOR32)
1988#endif 1528#endif
1989}; 1529};
1990 1530
1991#define IOCTL_HASHSIZE 256 1531/*
1992static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; 1532 * Convert common ioctl arguments based on their command number
1993 1533 *
1994static inline unsigned long ioctl32_hash(unsigned long cmd) 1534 * Please do not add any code in here. Instead, implement
1535 * a compat_ioctl operation in the place that handleѕ the
1536 * ioctl for the native case.
1537 */
1538static long do_ioctl_trans(int fd, unsigned int cmd,
1539 unsigned long arg, struct file *file)
1995{ 1540{
1996 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; 1541 void __user *argp = compat_ptr(arg);
1542
1543 switch (cmd) {
1544 case PPPIOCGIDLE32:
1545 return ppp_gidle(fd, cmd, argp);
1546 case PPPIOCSCOMPRESS32:
1547 return ppp_scompress(fd, cmd, argp);
1548 case PPPIOCSPASS32:
1549 case PPPIOCSACTIVE32:
1550 return ppp_sock_fprog_ioctl_trans(fd, cmd, argp);
1551#ifdef CONFIG_BLOCK
1552 case SG_IO:
1553 return sg_ioctl_trans(fd, cmd, argp);
1554 case SG_GET_REQUEST_TABLE:
1555 return sg_grt_trans(fd, cmd, argp);
1556 case MTIOCGET32:
1557 case MTIOCPOS32:
1558 return mt_ioctl_trans(fd, cmd, argp);
1559 /* Raw devices */
1560 case RAW_SETBIND:
1561 case RAW_GETBIND:
1562 return raw_ioctl(fd, cmd, argp);
1563#endif
1564#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
1565 case AUTOFS_IOC_SETTIMEOUT32:
1566 return ioc_settimeout(fd, cmd, argp);
1567 /* One SMB ioctl needs translations. */
1568#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1569 case SMB_IOC_GETMOUNTUID_32:
1570 return do_smb_getmountuid(fd, cmd, argp);
1571 /* Serial */
1572 case TIOCGSERIAL:
1573 case TIOCSSERIAL:
1574 return serial_struct_ioctl(fd, cmd, argp);
1575 /* i2c */
1576 case I2C_FUNCS:
1577 return w_long(fd, cmd, argp);
1578 case I2C_RDWR:
1579 return do_i2c_rdwr_ioctl(fd, cmd, argp);
1580 case I2C_SMBUS:
1581 return do_i2c_smbus_ioctl(fd, cmd, argp);
1582 /* Not implemented in the native kernel */
1583 case RTC_IRQP_READ32:
1584 case RTC_IRQP_SET32:
1585 case RTC_EPOCH_READ32:
1586 case RTC_EPOCH_SET32:
1587 return rtc_ioctl(fd, cmd, argp);
1588
1589 /* dvb */
1590 case VIDEO_GET_EVENT:
1591 return do_video_get_event(fd, cmd, argp);
1592 case VIDEO_STILLPICTURE:
1593 return do_video_stillpicture(fd, cmd, argp);
1594 case VIDEO_SET_SPU_PALETTE:
1595 return do_video_set_spu_palette(fd, cmd, argp);
1596 }
1597
1598 /*
1599 * These take an integer instead of a pointer as 'arg',
1600 * so we must not do a compat_ptr() translation.
1601 */
1602 switch (cmd) {
1603 /* Big T */
1604 case TCSBRKP:
1605 case TIOCMIWAIT:
1606 case TIOCSCTTY:
1607 /* RAID */
1608 case HOT_REMOVE_DISK:
1609 case HOT_ADD_DISK:
1610 case SET_DISK_FAULTY:
1611 case SET_BITMAP_FILE:
1612 /* Big K */
1613 case KDSIGACCEPT:
1614 case KIOCSOUND:
1615 case KDMKTONE:
1616 case KDSETMODE:
1617 case KDSKBMODE:
1618 case KDSKBMETA:
1619 case KDSKBLED:
1620 case KDSETLED:
1621 /* SG stuff */
1622 case SG_SET_TRANSFORM:
1623 /* AUTOFS */
1624 case AUTOFS_IOC_READY:
1625 case AUTOFS_IOC_FAIL:
1626 /* NBD */
1627 case NBD_SET_SOCK:
1628 case NBD_SET_BLKSIZE:
1629 case NBD_SET_SIZE:
1630 case NBD_SET_SIZE_BLOCKS:
1631 return do_vfs_ioctl(file, fd, cmd, arg);
1632 }
1633
1634 return -ENOIOCTLCMD;
1997} 1635}
1998 1636
1999static void compat_ioctl_error(struct file *filp, unsigned int fd, 1637static void compat_ioctl_error(struct file *filp, unsigned int fd,
@@ -2025,12 +1663,33 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
2025 free_page((unsigned long)path); 1663 free_page((unsigned long)path);
2026} 1664}
2027 1665
1666static int compat_ioctl_check_table(unsigned int xcmd)
1667{
1668 int i;
1669 const int max = ARRAY_SIZE(ioctl_pointer) - 1;
1670
1671 BUILD_BUG_ON(max >= (1 << 16));
1672
1673 /* guess initial offset into table, assuming a
1674 normalized distribution */
1675 i = ((xcmd >> 16) * max) >> 16;
1676
1677 /* do linear search up first, until greater or equal */
1678 while (ioctl_pointer[i] < xcmd && i < max)
1679 i++;
1680
1681 /* then do linear search down */
1682 while (ioctl_pointer[i] > xcmd && i > 0)
1683 i--;
1684
1685 return ioctl_pointer[i] == xcmd;
1686}
1687
2028asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1688asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2029 unsigned long arg) 1689 unsigned long arg)
2030{ 1690{
2031 struct file *filp; 1691 struct file *filp;
2032 int error = -EBADF; 1692 int error = -EBADF;
2033 struct ioctl_trans *t;
2034 int fput_needed; 1693 int fput_needed;
2035 1694
2036 filp = fget_light(fd, &fput_needed); 1695 filp = fget_light(fd, &fput_needed);
@@ -2058,7 +1717,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2058#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1717#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
2059 case FS_IOC_RESVSP_32: 1718 case FS_IOC_RESVSP_32:
2060 case FS_IOC_RESVSP64_32: 1719 case FS_IOC_RESVSP64_32:
2061 error = compat_ioctl_preallocate(filp, arg); 1720 error = compat_ioctl_preallocate(filp, compat_ptr(arg));
2062 goto out_fput; 1721 goto out_fput;
2063#else 1722#else
2064 case FS_IOC_RESVSP: 1723 case FS_IOC_RESVSP:
@@ -2087,12 +1746,11 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2087 break; 1746 break;
2088 } 1747 }
2089 1748
2090 for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) { 1749 if (compat_ioctl_check_table(XFORM(cmd)))
2091 if (t->cmd == cmd) 1750 goto found_handler;
2092 goto found_handler;
2093 }
2094 1751
2095 { 1752 error = do_ioctl_trans(fd, cmd, arg, filp);
1753 if (error == -ENOIOCTLCMD) {
2096 static int count; 1754 static int count;
2097 1755
2098 if (++count <= 50) 1756 if (++count <= 50)
@@ -2103,13 +1761,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2103 goto out_fput; 1761 goto out_fput;
2104 1762
2105 found_handler: 1763 found_handler:
2106 if (t->handler) { 1764 arg = (unsigned long)compat_ptr(arg);
2107 lock_kernel();
2108 error = t->handler(fd, cmd, arg, filp);
2109 unlock_kernel();
2110 goto out_fput;
2111 }
2112
2113 do_ioctl: 1765 do_ioctl:
2114 error = do_vfs_ioctl(filp, fd, cmd, arg); 1766 error = do_vfs_ioctl(filp, fd, cmd, arg);
2115 out_fput: 1767 out_fput:
@@ -2118,35 +1770,22 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2118 return error; 1770 return error;
2119} 1771}
2120 1772
2121static void ioctl32_insert_translation(struct ioctl_trans *trans) 1773static int __init init_sys32_ioctl_cmp(const void *p, const void *q)
2122{ 1774{
2123 unsigned long hash; 1775 unsigned int a, b;
2124 struct ioctl_trans *t; 1776 a = *(unsigned int *)p;
2125 1777 b = *(unsigned int *)q;
2126 hash = ioctl32_hash (trans->cmd); 1778 if (a > b)
2127 if (!ioctl32_hash_table[hash]) 1779 return 1;
2128 ioctl32_hash_table[hash] = trans; 1780 if (a < b)
2129 else { 1781 return -1;
2130 t = ioctl32_hash_table[hash]; 1782 return 0;
2131 while (t->next)
2132 t = t->next;
2133 trans->next = NULL;
2134 t->next = trans;
2135 }
2136} 1783}
2137 1784
2138static int __init init_sys32_ioctl(void) 1785static int __init init_sys32_ioctl(void)
2139{ 1786{
2140 int i; 1787 sort(ioctl_pointer, ARRAY_SIZE(ioctl_pointer), sizeof(*ioctl_pointer),
2141 1788 init_sys32_ioctl_cmp, NULL);
2142 for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
2143 if (ioctl_start[i].next) {
2144 printk("ioctl translation %d bad\n",i);
2145 return -1;
2146 }
2147
2148 ioctl32_insert_translation(&ioctl_start[i]);
2149 }
2150 return 0; 1789 return 0;
2151} 1790}
2152__initcall(init_sys32_ioctl); 1791__initcall(init_sys32_ioctl);
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 66d6106a2067..204bed37e82d 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -28,6 +28,7 @@ static inline int gpio_is_valid(int number)
28 return ((unsigned)number) < ARCH_NR_GPIOS; 28 return ((unsigned)number) < ARCH_NR_GPIOS;
29} 29}
30 30
31struct device;
31struct seq_file; 32struct seq_file;
32struct module; 33struct module;
33 34
@@ -181,6 +182,8 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
181 182
182#ifndef CONFIG_GPIO_SYSFS 183#ifndef CONFIG_GPIO_SYSFS
183 184
185struct device;
186
184/* sysfs support is only available with gpiolib, where it's optional */ 187/* sysfs support is only available with gpiolib, where it's optional */
185 188
186static inline int gpio_export(unsigned gpio, bool direction_may_change) 189static inline int gpio_export(unsigned gpio, bool direction_may_change)
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 7c38c147e5e6..6a0b30f78a62 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -622,9 +622,13 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
622__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) 622__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
623#define __NR_perf_event_open 241 623#define __NR_perf_event_open 241
624__SYSCALL(__NR_perf_event_open, sys_perf_event_open) 624__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
625#define __NR_accept4 242
626__SYSCALL(__NR_accept4, sys_accept4)
627#define __NR_recvmmsg 243
628__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
625 629
626#undef __NR_syscalls 630#undef __NR_syscalls
627#define __NR_syscalls 242 631#define __NR_syscalls 244
628 632
629/* 633/*
630 * All syscalls below here should go away really, 634 * All syscalls below here should go away really,
@@ -802,7 +806,7 @@ __SYSCALL(__NR_fork, sys_ni_syscall)
802#define __NR_statfs __NR3264_statfs 806#define __NR_statfs __NR3264_statfs
803#define __NR_fstatfs __NR3264_fstatfs 807#define __NR_fstatfs __NR3264_fstatfs
804#define __NR_truncate __NR3264_truncate 808#define __NR_truncate __NR3264_truncate
805#define __NR_ftruncate __NR3264_truncate 809#define __NR_ftruncate __NR3264_ftruncate
806#define __NR_lseek __NR3264_lseek 810#define __NR_lseek __NR3264_lseek
807#define __NR_sendfile __NR3264_sendfile 811#define __NR_sendfile __NR3264_sendfile
808#define __NR_newfstatat __NR3264_fstatat 812#define __NR_newfstatat __NR3264_fstatat
@@ -818,7 +822,7 @@ __SYSCALL(__NR_fork, sys_ni_syscall)
818#define __NR_statfs64 __NR3264_statfs 822#define __NR_statfs64 __NR3264_statfs
819#define __NR_fstatfs64 __NR3264_fstatfs 823#define __NR_fstatfs64 __NR3264_fstatfs
820#define __NR_truncate64 __NR3264_truncate 824#define __NR_truncate64 __NR3264_truncate
821#define __NR_ftruncate64 __NR3264_truncate 825#define __NR_ftruncate64 __NR3264_ftruncate
822#define __NR_llseek __NR3264_lseek 826#define __NR_llseek __NR3264_lseek
823#define __NR_sendfile64 __NR3264_sendfile 827#define __NR_sendfile64 __NR3264_sendfile
824#define __NR_fstatat64 __NR3264_fstatat 828#define __NR_fstatat64 __NR3264_fstatat
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 789cf5f920ce..d77b54733c5b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -84,6 +84,7 @@ extern const struct cpumask *const cpu_active_mask;
84#define num_online_cpus() cpumask_weight(cpu_online_mask) 84#define num_online_cpus() cpumask_weight(cpu_online_mask)
85#define num_possible_cpus() cpumask_weight(cpu_possible_mask) 85#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
86#define num_present_cpus() cpumask_weight(cpu_present_mask) 86#define num_present_cpus() cpumask_weight(cpu_present_mask)
87#define num_active_cpus() cpumask_weight(cpu_active_mask)
87#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) 88#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
88#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) 89#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
89#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) 90#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
@@ -92,6 +93,7 @@ extern const struct cpumask *const cpu_active_mask;
92#define num_online_cpus() 1 93#define num_online_cpus() 1
93#define num_possible_cpus() 1 94#define num_possible_cpus() 1
94#define num_present_cpus() 1 95#define num_present_cpus() 1
96#define num_active_cpus() 1
95#define cpu_online(cpu) ((cpu) == 0) 97#define cpu_online(cpu) ((cpu) == 0)
96#define cpu_possible(cpu) ((cpu) == 0) 98#define cpu_possible(cpu) ((cpu) == 0)
97#define cpu_present(cpu) ((cpu) == 0) 99#define cpu_present(cpu) ((cpu) == 0)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 47bbdf9c38d0..38f8d6553831 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -57,6 +57,7 @@ struct trace_iterator {
57 /* The below is zeroed out in pipe_read */ 57 /* The below is zeroed out in pipe_read */
58 struct trace_seq seq; 58 struct trace_seq seq;
59 struct trace_entry *ent; 59 struct trace_entry *ent;
60 int leftover;
60 int cpu; 61 int cpu;
61 u64 ts; 62 u64 ts;
62 63
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 9bace4b9f4fe..af634e95871d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -162,10 +162,11 @@ struct hrtimer_clock_base {
162 * @expires_next: absolute time of the next event which was scheduled 162 * @expires_next: absolute time of the next event which was scheduled
163 * via clock_set_next_event() 163 * via clock_set_next_event()
164 * @hres_active: State of high resolution mode 164 * @hres_active: State of high resolution mode
165 * @check_clocks: Indictator, when set evaluate time source and clock 165 * @hang_detected: The last hrtimer interrupt detected a hang
166 * event devices whether high resolution mode can be 166 * @nr_events: Total number of hrtimer interrupt events
167 * activated. 167 * @nr_retries: Total number of hrtimer interrupt retries
168 * @nr_events: Total number of timer interrupt events 168 * @nr_hangs: Total number of hrtimer interrupt hangs
169 * @max_hang_time: Maximum time spent in hrtimer_interrupt
169 */ 170 */
170struct hrtimer_cpu_base { 171struct hrtimer_cpu_base {
171 spinlock_t lock; 172 spinlock_t lock;
@@ -173,7 +174,11 @@ struct hrtimer_cpu_base {
173#ifdef CONFIG_HIGH_RES_TIMERS 174#ifdef CONFIG_HIGH_RES_TIMERS
174 ktime_t expires_next; 175 ktime_t expires_next;
175 int hres_active; 176 int hres_active;
177 int hang_detected;
176 unsigned long nr_events; 178 unsigned long nr_events;
179 unsigned long nr_retries;
180 unsigned long nr_hangs;
181 ktime_t max_hang_time;
177#endif 182#endif
178}; 183};
179 184
@@ -435,47 +440,4 @@ extern u64 ktime_divns(const ktime_t kt, s64 div);
435/* Show pending timers: */ 440/* Show pending timers: */
436extern void sysrq_timer_list_show(void); 441extern void sysrq_timer_list_show(void);
437 442
438/*
439 * Timer-statistics info:
440 */
441#ifdef CONFIG_TIMER_STATS
442
443extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
444 void *timerf, char *comm,
445 unsigned int timer_flag);
446
447static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
448{
449 if (likely(!timer_stats_active))
450 return;
451 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
452 timer->function, timer->start_comm, 0);
453}
454
455extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
456 void *addr);
457
458static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
459{
460 __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
461}
462
463static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
464{
465 timer->start_site = NULL;
466}
467#else
468static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
469{
470}
471
472static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
473{
474}
475
476static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
477{
478}
479#endif
480
481#endif 443#endif
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index a03daed08c59..69f07a9f1277 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -20,19 +20,18 @@ enum {
20 20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT 21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22 22
23/* As it's for in-kernel or ptrace use, we want it to be pinned */
24#define DEFINE_BREAKPOINT_ATTR(name) \
25struct perf_event_attr name = { \
26 .type = PERF_TYPE_BREAKPOINT, \
27 .size = sizeof(name), \
28 .pinned = 1, \
29};
30
31static inline void hw_breakpoint_init(struct perf_event_attr *attr) 23static inline void hw_breakpoint_init(struct perf_event_attr *attr)
32{ 24{
25 memset(attr, 0, sizeof(*attr));
26
33 attr->type = PERF_TYPE_BREAKPOINT; 27 attr->type = PERF_TYPE_BREAKPOINT;
34 attr->size = sizeof(*attr); 28 attr->size = sizeof(*attr);
29 /*
30 * As it's for in-kernel or ptrace use, we want it to be pinned
31 * and to call its callback every hits.
32 */
35 attr->pinned = 1; 33 attr->pinned = 1;
34 attr->sample_period = 1;
36} 35}
37 36
38static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) 37static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
@@ -52,27 +51,24 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
52 51
53extern struct perf_event * 52extern struct perf_event *
54register_user_hw_breakpoint(struct perf_event_attr *attr, 53register_user_hw_breakpoint(struct perf_event_attr *attr,
55 perf_callback_t triggered, 54 perf_overflow_handler_t triggered,
56 struct task_struct *tsk); 55 struct task_struct *tsk);
57 56
58/* FIXME: only change from the attr, and don't unregister */ 57/* FIXME: only change from the attr, and don't unregister */
59extern struct perf_event * 58extern int
60modify_user_hw_breakpoint(struct perf_event *bp, 59modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
61 struct perf_event_attr *attr,
62 perf_callback_t triggered,
63 struct task_struct *tsk);
64 60
65/* 61/*
66 * Kernel breakpoints are not associated with any particular thread. 62 * Kernel breakpoints are not associated with any particular thread.
67 */ 63 */
68extern struct perf_event * 64extern struct perf_event *
69register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, 65register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
70 perf_callback_t triggered, 66 perf_overflow_handler_t triggered,
71 int cpu); 67 int cpu);
72 68
73extern struct perf_event ** 69extern struct perf_event **
74register_wide_hw_breakpoint(struct perf_event_attr *attr, 70register_wide_hw_breakpoint(struct perf_event_attr *attr,
75 perf_callback_t triggered); 71 perf_overflow_handler_t triggered);
76 72
77extern int register_perf_hw_breakpoint(struct perf_event *bp); 73extern int register_perf_hw_breakpoint(struct perf_event *bp);
78extern int __register_perf_hw_breakpoint(struct perf_event *bp); 74extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -93,20 +89,18 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
93 89
94static inline struct perf_event * 90static inline struct perf_event *
95register_user_hw_breakpoint(struct perf_event_attr *attr, 91register_user_hw_breakpoint(struct perf_event_attr *attr,
96 perf_callback_t triggered, 92 perf_overflow_handler_t triggered,
97 struct task_struct *tsk) { return NULL; } 93 struct task_struct *tsk) { return NULL; }
98static inline struct perf_event * 94static inline int
99modify_user_hw_breakpoint(struct perf_event *bp, 95modify_user_hw_breakpoint(struct perf_event *bp,
100 struct perf_event_attr *attr, 96 struct perf_event_attr *attr) { return NULL; }
101 perf_callback_t triggered,
102 struct task_struct *tsk) { return NULL; }
103static inline struct perf_event * 97static inline struct perf_event *
104register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, 98register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
105 perf_callback_t triggered, 99 perf_overflow_handler_t triggered,
106 int cpu) { return NULL; } 100 int cpu) { return NULL; }
107static inline struct perf_event ** 101static inline struct perf_event **
108register_wide_hw_breakpoint(struct perf_event_attr *attr, 102register_wide_hw_breakpoint(struct perf_event_attr *attr,
109 perf_callback_t triggered) { return NULL; } 103 perf_overflow_handler_t triggered) { return NULL; }
110static inline int 104static inline int
111register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } 105register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
112static inline int 106static inline int
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 43adbd7f0010..64a53f74c9a9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,10 +18,6 @@
18#include <linux/ioctl.h> 18#include <linux/ioctl.h>
19#include <asm/byteorder.h> 19#include <asm/byteorder.h>
20 20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22#include <asm/hw_breakpoint.h>
23#endif
24
25/* 21/*
26 * User-space ABI bits: 22 * User-space ABI bits:
27 */ 23 */
@@ -215,12 +211,12 @@ struct perf_event_attr {
215 __u32 wakeup_watermark; /* bytes before wakeup */ 211 __u32 wakeup_watermark; /* bytes before wakeup */
216 }; 212 };
217 213
218 union { 214 struct { /* Hardware breakpoint info */
219 struct { /* Hardware breakpoint info */ 215 __u64 bp_addr;
220 __u64 bp_addr; 216 __u32 bp_type;
221 __u32 bp_type; 217 __u32 bp_len;
222 __u32 bp_len; 218 __u64 __bp_reserved_1;
223 }; 219 __u64 __bp_reserved_2;
224 }; 220 };
225 221
226 __u32 __reserved_2; 222 __u32 __reserved_2;
@@ -451,6 +447,10 @@ enum perf_callchain_context {
451# include <asm/perf_event.h> 447# include <asm/perf_event.h>
452#endif 448#endif
453 449
450#ifdef CONFIG_HAVE_HW_BREAKPOINT
451#include <asm/hw_breakpoint.h>
452#endif
453
454#include <linux/list.h> 454#include <linux/list.h>
455#include <linux/mutex.h> 455#include <linux/mutex.h>
456#include <linux/rculist.h> 456#include <linux/rculist.h>
@@ -565,10 +565,12 @@ struct perf_pending_entry {
565 void (*func)(struct perf_pending_entry *); 565 void (*func)(struct perf_pending_entry *);
566}; 566};
567 567
568typedef void (*perf_callback_t)(struct perf_event *, void *);
569
570struct perf_sample_data; 568struct perf_sample_data;
571 569
570typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
571 struct perf_sample_data *,
572 struct pt_regs *regs);
573
572/** 574/**
573 * struct perf_event - performance event kernel representation: 575 * struct perf_event - performance event kernel representation:
574 */ 576 */
@@ -660,18 +662,12 @@ struct perf_event {
660 struct pid_namespace *ns; 662 struct pid_namespace *ns;
661 u64 id; 663 u64 id;
662 664
663 void (*overflow_handler)(struct perf_event *event, 665 perf_overflow_handler_t overflow_handler;
664 int nmi, struct perf_sample_data *data,
665 struct pt_regs *regs);
666 666
667#ifdef CONFIG_EVENT_PROFILE 667#ifdef CONFIG_EVENT_PROFILE
668 struct event_filter *filter; 668 struct event_filter *filter;
669#endif 669#endif
670 670
671 perf_callback_t callback;
672
673 perf_callback_t event_callback;
674
675#endif /* CONFIG_PERF_EVENTS */ 671#endif /* CONFIG_PERF_EVENTS */
676}; 672};
677 673
@@ -781,7 +777,7 @@ extern struct perf_event *
781perf_event_create_kernel_counter(struct perf_event_attr *attr, 777perf_event_create_kernel_counter(struct perf_event_attr *attr,
782 int cpu, 778 int cpu,
783 pid_t pid, 779 pid_t pid,
784 perf_callback_t callback); 780 perf_overflow_handler_t callback);
785extern u64 perf_event_read_value(struct perf_event *event, 781extern u64 perf_event_read_value(struct perf_event *event,
786 u64 *enabled, u64 *running); 782 u64 *enabled, u64 *running);
787 783
@@ -876,6 +872,8 @@ extern void perf_output_copy(struct perf_output_handle *handle,
876 const void *buf, unsigned int len); 872 const void *buf, unsigned int len);
877extern int perf_swevent_get_recursion_context(void); 873extern int perf_swevent_get_recursion_context(void);
878extern void perf_swevent_put_recursion_context(int rctx); 874extern void perf_swevent_put_recursion_context(int rctx);
875extern void perf_event_enable(struct perf_event *event);
876extern void perf_event_disable(struct perf_event *event);
879#else 877#else
880static inline void 878static inline void
881perf_event_task_sched_in(struct task_struct *task, int cpu) { } 879perf_event_task_sched_in(struct task_struct *task, int cpu) { }
@@ -906,7 +904,8 @@ static inline void perf_event_fork(struct task_struct *tsk) { }
906static inline void perf_event_init(void) { } 904static inline void perf_event_init(void) { }
907static inline int perf_swevent_get_recursion_context(void) { return -1; } 905static inline int perf_swevent_get_recursion_context(void) { return -1; }
908static inline void perf_swevent_put_recursion_context(int rctx) { } 906static inline void perf_swevent_put_recursion_context(int rctx) { }
909 907static inline void perf_event_enable(struct perf_event *event) { }
908static inline void perf_event_disable(struct perf_event *event) { }
910#endif 909#endif
911 910
912#define perf_output_put(handle, x) \ 911#define perf_output_put(handle, x) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89115ec7d43f..294eb2f80144 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1102,7 +1102,7 @@ struct sched_class {
1102 1102
1103 void (*set_curr_task) (struct rq *rq); 1103 void (*set_curr_task) (struct rq *rq);
1104 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); 1104 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1105 void (*task_new) (struct rq *rq, struct task_struct *p); 1105 void (*task_fork) (struct task_struct *p);
1106 1106
1107 void (*switched_from) (struct rq *this_rq, struct task_struct *task, 1107 void (*switched_from) (struct rq *this_rq, struct task_struct *task,
1108 int running); 1108 int running);
@@ -1111,7 +1111,8 @@ struct sched_class {
1111 void (*prio_changed) (struct rq *this_rq, struct task_struct *task, 1111 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1112 int oldprio, int running); 1112 int oldprio, int running);
1113 1113
1114 unsigned int (*get_rr_interval) (struct task_struct *task); 1114 unsigned int (*get_rr_interval) (struct rq *rq,
1115 struct task_struct *task);
1115 1116
1116#ifdef CONFIG_FAIR_GROUP_SCHED 1117#ifdef CONFIG_FAIR_GROUP_SCHED
1117 void (*moved_group) (struct task_struct *p); 1118 void (*moved_group) (struct task_struct *p);
@@ -1151,8 +1152,6 @@ struct sched_entity {
1151 u64 start_runtime; 1152 u64 start_runtime;
1152 u64 avg_wakeup; 1153 u64 avg_wakeup;
1153 1154
1154 u64 avg_running;
1155
1156#ifdef CONFIG_SCHEDSTATS 1155#ifdef CONFIG_SCHEDSTATS
1157 u64 wait_start; 1156 u64 wait_start;
1158 u64 wait_max; 1157 u64 wait_max;
@@ -1175,7 +1174,6 @@ struct sched_entity {
1175 u64 nr_failed_migrations_running; 1174 u64 nr_failed_migrations_running;
1176 u64 nr_failed_migrations_hot; 1175 u64 nr_failed_migrations_hot;
1177 u64 nr_forced_migrations; 1176 u64 nr_forced_migrations;
1178 u64 nr_forced2_migrations;
1179 1177
1180 u64 nr_wakeups; 1178 u64 nr_wakeups;
1181 u64 nr_wakeups_sync; 1179 u64 nr_wakeups_sync;
@@ -1840,7 +1838,8 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1840extern int sched_clock_stable; 1838extern int sched_clock_stable;
1841#endif 1839#endif
1842 1840
1843extern unsigned long long sched_clock(void); 1841/* ftrace calls sched_clock() directly */
1842extern unsigned long long notrace sched_clock(void);
1844 1843
1845extern void sched_clock_init(void); 1844extern void sched_clock_init(void);
1846extern u64 sched_clock_cpu(int cpu); 1845extern u64 sched_clock_cpu(int cpu);
@@ -1903,14 +1902,22 @@ extern unsigned int sysctl_sched_wakeup_granularity;
1903extern unsigned int sysctl_sched_shares_ratelimit; 1902extern unsigned int sysctl_sched_shares_ratelimit;
1904extern unsigned int sysctl_sched_shares_thresh; 1903extern unsigned int sysctl_sched_shares_thresh;
1905extern unsigned int sysctl_sched_child_runs_first; 1904extern unsigned int sysctl_sched_child_runs_first;
1905
1906enum sched_tunable_scaling {
1907 SCHED_TUNABLESCALING_NONE,
1908 SCHED_TUNABLESCALING_LOG,
1909 SCHED_TUNABLESCALING_LINEAR,
1910 SCHED_TUNABLESCALING_END,
1911};
1912extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
1913
1906#ifdef CONFIG_SCHED_DEBUG 1914#ifdef CONFIG_SCHED_DEBUG
1907extern unsigned int sysctl_sched_features;
1908extern unsigned int sysctl_sched_migration_cost; 1915extern unsigned int sysctl_sched_migration_cost;
1909extern unsigned int sysctl_sched_nr_migrate; 1916extern unsigned int sysctl_sched_nr_migrate;
1910extern unsigned int sysctl_sched_time_avg; 1917extern unsigned int sysctl_sched_time_avg;
1911extern unsigned int sysctl_timer_migration; 1918extern unsigned int sysctl_timer_migration;
1912 1919
1913int sched_nr_latency_handler(struct ctl_table *table, int write, 1920int sched_proc_update_handler(struct ctl_table *table, int write,
1914 void __user *buffer, size_t *length, 1921 void __user *buffer, size_t *length,
1915 loff_t *ppos); 1922 loff_t *ppos);
1916#endif 1923#endif
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 09077f6ed128..5cf397ceb726 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -14,6 +14,7 @@ struct trace_seq {
14 unsigned char buffer[PAGE_SIZE]; 14 unsigned char buffer[PAGE_SIZE];
15 unsigned int len; 15 unsigned int len;
16 unsigned int readpos; 16 unsigned int readpos;
17 int full;
17}; 18};
18 19
19static inline void 20static inline void
@@ -21,6 +22,7 @@ trace_seq_init(struct trace_seq *s)
21{ 22{
22 s->len = 0; 23 s->len = 0;
23 s->readpos = 0; 24 s->readpos = 0;
25 s->full = 0;
24} 26}
25 27
26/* 28/*
@@ -33,7 +35,7 @@ extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
33 __attribute__ ((format (printf, 2, 0))); 35 __attribute__ ((format (printf, 2, 0)));
34extern int 36extern int
35trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); 37trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
36extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); 38extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
37extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 39extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
38 size_t cnt); 40 size_t cnt);
39extern int trace_seq_puts(struct trace_seq *s, const char *str); 41extern int trace_seq_puts(struct trace_seq *s, const char *str);
@@ -55,8 +57,9 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
55 return 0; 57 return 0;
56} 58}
57 59
58static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s) 60static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
59{ 61{
62 return 0;
60} 63}
61static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 64static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
62 size_t cnt) 65 size_t cnt)
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index b2a7d8ba6ee3..15591d2ea400 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -128,6 +128,29 @@ struct usbdevfs_hub_portinfo {
128#ifdef __KERNEL__ 128#ifdef __KERNEL__
129#ifdef CONFIG_COMPAT 129#ifdef CONFIG_COMPAT
130#include <linux/compat.h> 130#include <linux/compat.h>
131
132struct usbdevfs_ctrltransfer32 {
133 u8 bRequestType;
134 u8 bRequest;
135 u16 wValue;
136 u16 wIndex;
137 u16 wLength;
138 u32 timeout; /* in milliseconds */
139 compat_caddr_t data;
140};
141
142struct usbdevfs_bulktransfer32 {
143 compat_uint_t ep;
144 compat_uint_t len;
145 compat_uint_t timeout; /* in milliseconds */
146 compat_caddr_t data;
147};
148
149struct usbdevfs_disconnectsignal32 {
150 compat_int_t signr;
151 compat_caddr_t context;
152};
153
131struct usbdevfs_urb32 { 154struct usbdevfs_urb32 {
132 unsigned char type; 155 unsigned char type;
133 unsigned char endpoint; 156 unsigned char endpoint;
@@ -153,7 +176,9 @@ struct usbdevfs_ioctl32 {
153#endif /* __KERNEL__ */ 176#endif /* __KERNEL__ */
154 177
155#define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer) 178#define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer)
179#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
156#define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer) 180#define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer)
181#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
157#define USBDEVFS_RESETEP _IOR('U', 3, unsigned int) 182#define USBDEVFS_RESETEP _IOR('U', 3, unsigned int)
158#define USBDEVFS_SETINTERFACE _IOR('U', 4, struct usbdevfs_setinterface) 183#define USBDEVFS_SETINTERFACE _IOR('U', 4, struct usbdevfs_setinterface)
159#define USBDEVFS_SETCONFIGURATION _IOR('U', 5, unsigned int) 184#define USBDEVFS_SETCONFIGURATION _IOR('U', 5, unsigned int)
@@ -166,6 +191,7 @@ struct usbdevfs_ioctl32 {
166#define USBDEVFS_REAPURBNDELAY _IOW('U', 13, void *) 191#define USBDEVFS_REAPURBNDELAY _IOW('U', 13, void *)
167#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, __u32) 192#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, __u32)
168#define USBDEVFS_DISCSIGNAL _IOR('U', 14, struct usbdevfs_disconnectsignal) 193#define USBDEVFS_DISCSIGNAL _IOR('U', 14, struct usbdevfs_disconnectsignal)
194#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
169#define USBDEVFS_CLAIMINTERFACE _IOR('U', 15, unsigned int) 195#define USBDEVFS_CLAIMINTERFACE _IOR('U', 15, unsigned int)
170#define USBDEVFS_RELEASEINTERFACE _IOR('U', 16, unsigned int) 196#define USBDEVFS_RELEASEINTERFACE _IOR('U', 16, unsigned int)
171#define USBDEVFS_CONNECTINFO _IOW('U', 17, struct usbdevfs_connectinfo) 197#define USBDEVFS_CONNECTINFO _IOW('U', 17, struct usbdevfs_connectinfo)
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index e5ce87a0498d..9496b965d62a 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -301,8 +301,8 @@ TRACE_EVENT(itimer_state,
301 __entry->interval_usec = value->it_interval.tv_usec; 301 __entry->interval_usec = value->it_interval.tv_usec;
302 ), 302 ),
303 303
304 TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", 304 TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld",
305 __entry->which, __entry->expires, 305 __entry->which, (unsigned long long)__entry->expires,
306 __entry->value_sec, __entry->value_usec, 306 __entry->value_sec, __entry->value_usec,
307 __entry->interval_sec, __entry->interval_usec) 307 __entry->interval_sec, __entry->interval_usec)
308); 308);
@@ -331,8 +331,8 @@ TRACE_EVENT(itimer_expire,
331 __entry->pid = pid_nr(pid); 331 __entry->pid = pid_nr(pid);
332 ), 332 ),
333 333
334 TP_printk("which=%d pid=%d now=%lu", __entry->which, 334 TP_printk("which=%d pid=%d now=%llu", __entry->which,
335 (int) __entry->pid, __entry->now) 335 (int) __entry->pid, (unsigned long long)__entry->now)
336); 336);
337 337
338#endif /* _TRACE_TIMER_H */ 338#endif /* _TRACE_TIMER_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 7c4e2713df0a..291ac586f37f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
212 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, 212 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
213 hcpu, -1, &nr_calls); 213 hcpu, -1, &nr_calls);
214 if (err == NOTIFY_BAD) { 214 if (err == NOTIFY_BAD) {
215 set_cpu_active(cpu, true);
216
215 nr_calls--; 217 nr_calls--;
216 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 218 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
217 hcpu, nr_calls, NULL); 219 hcpu, nr_calls, NULL);
@@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
223 225
224 /* Ensure that we are not runnable on dying cpu */ 226 /* Ensure that we are not runnable on dying cpu */
225 cpumask_copy(old_allowed, &current->cpus_allowed); 227 cpumask_copy(old_allowed, &current->cpus_allowed);
226 set_cpus_allowed_ptr(current, 228 set_cpus_allowed_ptr(current, cpu_active_mask);
227 cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
228 229
229 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 230 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
230 if (err) { 231 if (err) {
232 set_cpu_active(cpu, true);
231 /* CPU didn't die: tell everyone. Can't complain. */ 233 /* CPU didn't die: tell everyone. Can't complain. */
232 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 234 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
233 hcpu) == NOTIFY_BAD) 235 hcpu) == NOTIFY_BAD)
@@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu)
292 294
293 err = _cpu_down(cpu, 0); 295 err = _cpu_down(cpu, 0);
294 296
295 if (cpu_online(cpu))
296 set_cpu_active(cpu, true);
297
298out: 297out:
299 cpu_maps_update_done(); 298 cpu_maps_update_done();
300 stop_machine_destroy(); 299 stop_machine_destroy();
@@ -387,6 +386,15 @@ int disable_nonboot_cpus(void)
387 * with the userspace trying to use the CPU hotplug at the same time 386 * with the userspace trying to use the CPU hotplug at the same time
388 */ 387 */
389 cpumask_clear(frozen_cpus); 388 cpumask_clear(frozen_cpus);
389
390 for_each_online_cpu(cpu) {
391 if (cpu == first_cpu)
392 continue;
393 set_cpu_active(cpu, false);
394 }
395
396 synchronize_sched();
397
390 printk("Disabling non-boot CPUs ...\n"); 398 printk("Disabling non-boot CPUs ...\n");
391 for_each_online_cpu(cpu) { 399 for_each_online_cpu(cpu) {
392 if (cpu == first_cpu) 400 if (cpu == first_cpu)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3cf2183b472d..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
737{ 737{
738} 738}
739 739
740static int generate_sched_domains(struct cpumask **domains, 740static int generate_sched_domains(cpumask_var_t **domains,
741 struct sched_domain_attr **attributes) 741 struct sched_domain_attr **attributes)
742{ 742{
743 *domains = NULL; 743 *domains = NULL;
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
872 if (retval < 0) 872 if (retval < 0)
873 return retval; 873 return retval;
874 874
875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) 875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
876 return -EINVAL; 876 return -EINVAL;
877 } 877 }
878 retval = validate_change(cs, trialcs); 878 retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2010 } 2010 }
2011 2011
2012 /* Continue past cpusets with all cpus, mems online */ 2012 /* Continue past cpusets with all cpus, mems online */
2013 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && 2013 if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2015 continue; 2015 continue;
2016 2016
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2019 /* Remove offline cpus and mems from this cpuset. */ 2019 /* Remove offline cpus and mems from this cpuset. */
2020 mutex_lock(&callback_mutex); 2020 mutex_lock(&callback_mutex);
2021 cpumask_and(cp->cpus_allowed, cp->cpus_allowed, 2021 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2022 cpu_online_mask); 2022 cpu_active_mask);
2023 nodes_and(cp->mems_allowed, cp->mems_allowed, 2023 nodes_and(cp->mems_allowed, cp->mems_allowed,
2024 node_states[N_HIGH_MEMORY]); 2024 node_states[N_HIGH_MEMORY]);
2025 mutex_unlock(&callback_mutex); 2025 mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2057 switch (phase) { 2057 switch (phase) {
2058 case CPU_ONLINE: 2058 case CPU_ONLINE:
2059 case CPU_ONLINE_FROZEN: 2059 case CPU_ONLINE_FROZEN:
2060 case CPU_DEAD: 2060 case CPU_DOWN_PREPARE:
2061 case CPU_DEAD_FROZEN: 2061 case CPU_DOWN_PREPARE_FROZEN:
2062 case CPU_DOWN_FAILED:
2063 case CPU_DOWN_FAILED_FROZEN:
2062 break; 2064 break;
2063 2065
2064 default: 2066 default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2067 2069
2068 cgroup_lock(); 2070 cgroup_lock();
2069 mutex_lock(&callback_mutex); 2071 mutex_lock(&callback_mutex);
2070 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2072 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2071 mutex_unlock(&callback_mutex); 2073 mutex_unlock(&callback_mutex);
2072 scan_for_empty_cpusets(&top_cpuset); 2074 scan_for_empty_cpusets(&top_cpuset);
2073 ndoms = generate_sched_domains(&doms, &attr); 2075 ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2114 2116
2115void __init cpuset_init_smp(void) 2117void __init cpuset_init_smp(void)
2116{ 2118{
2117 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2119 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2118 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2120 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2119 2121
2120 hotcpu_notifier(cpuset_track_online_cpus, 0); 2122 hotcpu_notifier(cpuset_track_online_cpus, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index fb65e822fc41..d73ef1f3e55d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -304,8 +304,14 @@ void put_futex_key(int fshared, union futex_key *key)
304 */ 304 */
305static int fault_in_user_writeable(u32 __user *uaddr) 305static int fault_in_user_writeable(u32 __user *uaddr)
306{ 306{
307 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, 307 struct mm_struct *mm = current->mm;
308 1, 1, 0, NULL, NULL); 308 int ret;
309
310 down_read(&mm->mmap_sem);
311 ret = get_user_pages(current, mm, (unsigned long)uaddr,
312 1, 1, 0, NULL, NULL);
313 up_read(&mm->mmap_sem);
314
309 return ret < 0 ? ret : 0; 315 return ret < 0 ? ret : 0;
310} 316}
311 317
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ede527708123..d2f9239dc6ba 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
557static int hrtimer_reprogram(struct hrtimer *timer, 557static int hrtimer_reprogram(struct hrtimer *timer,
558 struct hrtimer_clock_base *base) 558 struct hrtimer_clock_base *base)
559{ 559{
560 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; 560 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
562 int res; 562 int res;
563 563
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
582 if (expires.tv64 < 0) 582 if (expires.tv64 < 0)
583 return -ETIME; 583 return -ETIME;
584 584
585 if (expires.tv64 >= expires_next->tv64) 585 if (expires.tv64 >= cpu_base->expires_next.tv64)
586 return 0;
587
588 /*
589 * If a hang was detected in the last timer interrupt then we
590 * do not schedule a timer which is earlier than the expiry
591 * which we enforced in the hang detection. We want the system
592 * to make progress.
593 */
594 if (cpu_base->hang_detected)
586 return 0; 595 return 0;
587 596
588 /* 597 /*
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
590 */ 599 */
591 res = tick_program_event(expires, 0); 600 res = tick_program_event(expires, 0);
592 if (!IS_ERR_VALUE(res)) 601 if (!IS_ERR_VALUE(res))
593 *expires_next = expires; 602 cpu_base->expires_next = expires;
594 return res; 603 return res;
595} 604}
596 605
@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
747 756
748#endif /* CONFIG_HIGH_RES_TIMERS */ 757#endif /* CONFIG_HIGH_RES_TIMERS */
749 758
750#ifdef CONFIG_TIMER_STATS 759static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
751void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
752{ 760{
761#ifdef CONFIG_TIMER_STATS
753 if (timer->start_site) 762 if (timer->start_site)
754 return; 763 return;
755 764 timer->start_site = __builtin_return_address(0);
756 timer->start_site = addr;
757 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 765 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
758 timer->start_pid = current->pid; 766 timer->start_pid = current->pid;
767#endif
759} 768}
769
770static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
771{
772#ifdef CONFIG_TIMER_STATS
773 timer->start_site = NULL;
774#endif
775}
776
777static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
778{
779#ifdef CONFIG_TIMER_STATS
780 if (likely(!timer_stats_active))
781 return;
782 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
783 timer->function, timer->start_comm, 0);
760#endif 784#endif
785}
761 786
762/* 787/*
763 * Counterpart to lock_hrtimer_base above: 788 * Counterpart to lock_hrtimer_base above:
@@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1217 1242
1218#ifdef CONFIG_HIGH_RES_TIMERS 1243#ifdef CONFIG_HIGH_RES_TIMERS
1219 1244
1220static int force_clock_reprogram;
1221
1222/*
1223 * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1224 * is hanging, which could happen with something that slows the interrupt
1225 * such as the tracing. Then we force the clock reprogramming for each future
1226 * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1227 * threshold that we will overwrite.
1228 * The next tick event will be scheduled to 3 times we currently spend on
1229 * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1230 * 1/4 of their time to process the hrtimer interrupts. This is enough to
1231 * let it running without serious starvation.
1232 */
1233
1234static inline void
1235hrtimer_interrupt_hanging(struct clock_event_device *dev,
1236 ktime_t try_time)
1237{
1238 force_clock_reprogram = 1;
1239 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1240 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1241 "forcing clock min delta to %llu ns\n",
1242 (unsigned long long) dev->min_delta_ns);
1243}
1244/* 1245/*
1245 * High resolution timer interrupt 1246 * High resolution timer interrupt
1246 * Called with interrupts disabled 1247 * Called with interrupts disabled
@@ -1249,21 +1250,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1249{ 1250{
1250 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1251 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1251 struct hrtimer_clock_base *base; 1252 struct hrtimer_clock_base *base;
1252 ktime_t expires_next, now; 1253 ktime_t expires_next, now, entry_time, delta;
1253 int nr_retries = 0; 1254 int i, retries = 0;
1254 int i;
1255 1255
1256 BUG_ON(!cpu_base->hres_active); 1256 BUG_ON(!cpu_base->hres_active);
1257 cpu_base->nr_events++; 1257 cpu_base->nr_events++;
1258 dev->next_event.tv64 = KTIME_MAX; 1258 dev->next_event.tv64 = KTIME_MAX;
1259 1259
1260 retry: 1260 entry_time = now = ktime_get();
1261 /* 5 retries is enough to notice a hang */ 1261retry:
1262 if (!(++nr_retries % 5))
1263 hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1264
1265 now = ktime_get();
1266
1267 expires_next.tv64 = KTIME_MAX; 1262 expires_next.tv64 = KTIME_MAX;
1268 1263
1269 spin_lock(&cpu_base->lock); 1264 spin_lock(&cpu_base->lock);
@@ -1325,10 +1320,48 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1325 spin_unlock(&cpu_base->lock); 1320 spin_unlock(&cpu_base->lock);
1326 1321
1327 /* Reprogramming necessary ? */ 1322 /* Reprogramming necessary ? */
1328 if (expires_next.tv64 != KTIME_MAX) { 1323 if (expires_next.tv64 == KTIME_MAX ||
1329 if (tick_program_event(expires_next, force_clock_reprogram)) 1324 !tick_program_event(expires_next, 0)) {
1330 goto retry; 1325 cpu_base->hang_detected = 0;
1326 return;
1331 } 1327 }
1328
1329 /*
1330 * The next timer was already expired due to:
1331 * - tracing
1332 * - long lasting callbacks
1333 * - being scheduled away when running in a VM
1334 *
1335 * We need to prevent that we loop forever in the hrtimer
1336 * interrupt routine. We give it 3 attempts to avoid
1337 * overreacting on some spurious event.
1338 */
1339 now = ktime_get();
1340 cpu_base->nr_retries++;
1341 if (++retries < 3)
1342 goto retry;
1343 /*
1344 * Give the system a chance to do something else than looping
1345 * here. We stored the entry time, so we know exactly how long
1346 * we spent here. We schedule the next event this amount of
1347 * time away.
1348 */
1349 cpu_base->nr_hangs++;
1350 cpu_base->hang_detected = 1;
1351 delta = ktime_sub(now, entry_time);
1352 if (delta.tv64 > cpu_base->max_hang_time.tv64)
1353 cpu_base->max_hang_time = delta;
1354 /*
1355 * Limit it to a sensible value as we enforce a longer
1356 * delay. Give the CPU at least 100ms to catch up.
1357 */
1358 if (delta.tv64 > 100 * NSEC_PER_MSEC)
1359 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1360 else
1361 expires_next = ktime_add(now, delta);
1362 tick_program_event(expires_next, 1);
1363 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
1364 ktime_to_ns(delta));
1332} 1365}
1333 1366
1334/* 1367/*
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index cf5ee1628411..366eedf949c0 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -52,7 +52,7 @@
52static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); 52static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
53 53
54/* Number of pinned task breakpoints in a cpu */ 54/* Number of pinned task breakpoints in a cpu */
55static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); 55static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
56 56
57/* Number of non-pinned cpu/task breakpoints in a cpu */ 57/* Number of non-pinned cpu/task breakpoints in a cpu */
58static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); 58static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
73static unsigned int max_task_bp_pinned(int cpu) 73static unsigned int max_task_bp_pinned(int cpu)
74{ 74{
75 int i; 75 int i;
76 unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); 76 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
77 77
78 for (i = HBP_NUM -1; i >= 0; i--) { 78 for (i = HBP_NUM -1; i >= 0; i--) {
79 if (tsk_pinned[i] > 0) 79 if (tsk_pinned[i] > 0)
@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu)
83 return 0; 83 return 0;
84} 84}
85 85
86static int task_bp_pinned(struct task_struct *tsk)
87{
88 struct perf_event_context *ctx = tsk->perf_event_ctxp;
89 struct list_head *list;
90 struct perf_event *bp;
91 unsigned long flags;
92 int count = 0;
93
94 if (WARN_ONCE(!ctx, "No perf context for this task"))
95 return 0;
96
97 list = &ctx->event_list;
98
99 spin_lock_irqsave(&ctx->lock, flags);
100
101 /*
102 * The current breakpoint counter is not included in the list
103 * at the open() callback time
104 */
105 list_for_each_entry(bp, list, event_entry) {
106 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
107 count++;
108 }
109
110 spin_unlock_irqrestore(&ctx->lock, flags);
111
112 return count;
113}
114
86/* 115/*
87 * Report the number of pinned/un-pinned breakpoints we have in 116 * Report the number of pinned/un-pinned breakpoints we have in
88 * a given cpu (cpu > -1) or in all of them (cpu = -1). 117 * a given cpu (cpu > -1) or in all of them (cpu = -1).
89 */ 118 */
90static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) 119static void
120fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
91{ 121{
122 int cpu = bp->cpu;
123 struct task_struct *tsk = bp->ctx->task;
124
92 if (cpu >= 0) { 125 if (cpu >= 0) {
93 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); 126 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
94 slots->pinned += max_task_bp_pinned(cpu); 127 if (!tsk)
128 slots->pinned += max_task_bp_pinned(cpu);
129 else
130 slots->pinned += task_bp_pinned(tsk);
95 slots->flexible = per_cpu(nr_bp_flexible, cpu); 131 slots->flexible = per_cpu(nr_bp_flexible, cpu);
96 132
97 return; 133 return;
@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
101 unsigned int nr; 137 unsigned int nr;
102 138
103 nr = per_cpu(nr_cpu_bp_pinned, cpu); 139 nr = per_cpu(nr_cpu_bp_pinned, cpu);
104 nr += max_task_bp_pinned(cpu); 140 if (!tsk)
141 nr += max_task_bp_pinned(cpu);
142 else
143 nr += task_bp_pinned(tsk);
105 144
106 if (nr > slots->pinned) 145 if (nr > slots->pinned)
107 slots->pinned = nr; 146 slots->pinned = nr;
@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
118 */ 157 */
119static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) 158static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
120{ 159{
121 int count = 0;
122 struct perf_event *bp;
123 struct perf_event_context *ctx = tsk->perf_event_ctxp;
124 unsigned int *tsk_pinned; 160 unsigned int *tsk_pinned;
125 struct list_head *list; 161 int count = 0;
126 unsigned long flags;
127
128 if (WARN_ONCE(!ctx, "No perf context for this task"))
129 return;
130
131 list = &ctx->event_list;
132
133 spin_lock_irqsave(&ctx->lock, flags);
134
135 /*
136 * The current breakpoint counter is not included in the list
137 * at the open() callback time
138 */
139 list_for_each_entry(bp, list, event_entry) {
140 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
141 count++;
142 }
143 162
144 spin_unlock_irqrestore(&ctx->lock, flags); 163 count = task_bp_pinned(tsk);
145 164
146 if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) 165 tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
147 return;
148
149 tsk_pinned = per_cpu(task_bp_pinned, cpu);
150 if (enable) { 166 if (enable) {
151 tsk_pinned[count]++; 167 tsk_pinned[count]++;
152 if (count > 0) 168 if (count > 0)
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
193 * - If attached to a single cpu, check: 209 * - If attached to a single cpu, check:
194 * 210 *
195 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) 211 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
196 * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM 212 * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
197 * 213 *
198 * -> If there are already non-pinned counters in this cpu, it means 214 * -> If there are already non-pinned counters in this cpu, it means
199 * there is already a free slot for them. 215 * there is already a free slot for them.
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
204 * - If attached to every cpus, check: 220 * - If attached to every cpus, check:
205 * 221 *
206 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) 222 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
207 * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM 223 * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
208 * 224 *
209 * -> This is roughly the same, except we check the number of per cpu 225 * -> This is roughly the same, except we check the number of per cpu
210 * bp for every cpu and we keep the max one. Same for the per tasks 226 * bp for every cpu and we keep the max one. Same for the per tasks
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
216 * - If attached to a single cpu, check: 232 * - If attached to a single cpu, check:
217 * 233 *
218 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) 234 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
219 * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM 235 * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
220 * 236 *
221 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep 237 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
222 * one register at least (or they will never be fed). 238 * one register at least (or they will never be fed).
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
224 * - If attached to every cpus, check: 240 * - If attached to every cpus, check:
225 * 241 *
226 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 242 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
227 * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM 243 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
228 */ 244 */
229int reserve_bp_slot(struct perf_event *bp) 245int reserve_bp_slot(struct perf_event *bp)
230{ 246{
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
233 249
234 mutex_lock(&nr_bp_mutex); 250 mutex_lock(&nr_bp_mutex);
235 251
236 fetch_bp_busy_slots(&slots, bp->cpu); 252 fetch_bp_busy_slots(&slots, bp);
237 253
238 /* Flexible counters need to keep at least one slot */ 254 /* Flexible counters need to keep at least one slot */
239 if (slots.pinned + (!!slots.flexible) == HBP_NUM) { 255 if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
259} 275}
260 276
261 277
262int __register_perf_hw_breakpoint(struct perf_event *bp) 278int register_perf_hw_breakpoint(struct perf_event *bp)
263{ 279{
264 int ret; 280 int ret;
265 281
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
276 * This is a quick hack that will be removed soon, once we remove 292 * This is a quick hack that will be removed soon, once we remove
277 * the tmp breakpoints from ptrace 293 * the tmp breakpoints from ptrace
278 */ 294 */
279 if (!bp->attr.disabled || bp->callback == perf_bp_event) 295 if (!bp->attr.disabled || !bp->overflow_handler)
280 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); 296 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
281 297
282 return ret; 298 return ret;
283} 299}
284 300
285int register_perf_hw_breakpoint(struct perf_event *bp)
286{
287 bp->callback = perf_bp_event;
288
289 return __register_perf_hw_breakpoint(bp);
290}
291
292/** 301/**
293 * register_user_hw_breakpoint - register a hardware breakpoint for user space 302 * register_user_hw_breakpoint - register a hardware breakpoint for user space
294 * @attr: breakpoint attributes 303 * @attr: breakpoint attributes
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
297 */ 306 */
298struct perf_event * 307struct perf_event *
299register_user_hw_breakpoint(struct perf_event_attr *attr, 308register_user_hw_breakpoint(struct perf_event_attr *attr,
300 perf_callback_t triggered, 309 perf_overflow_handler_t triggered,
301 struct task_struct *tsk) 310 struct task_struct *tsk)
302{ 311{
303 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 312 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
311 * @triggered: callback to trigger when we hit the breakpoint 320 * @triggered: callback to trigger when we hit the breakpoint
312 * @tsk: pointer to 'task_struct' of the process to which the address belongs 321 * @tsk: pointer to 'task_struct' of the process to which the address belongs
313 */ 322 */
314struct perf_event * 323int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
315modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
316 perf_callback_t triggered,
317 struct task_struct *tsk)
318{ 324{
319 /* 325 u64 old_addr = bp->attr.bp_addr;
320 * FIXME: do it without unregistering 326 int old_type = bp->attr.bp_type;
321 * - We don't want to lose our slot 327 int old_len = bp->attr.bp_len;
322 * - If the new bp is incorrect, don't lose the older one 328 int err = 0;
323 */
324 unregister_hw_breakpoint(bp);
325 329
326 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 330 perf_event_disable(bp);
331
332 bp->attr.bp_addr = attr->bp_addr;
333 bp->attr.bp_type = attr->bp_type;
334 bp->attr.bp_len = attr->bp_len;
335
336 if (attr->disabled)
337 goto end;
338
339 err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
340 if (!err)
341 perf_event_enable(bp);
342
343 if (err) {
344 bp->attr.bp_addr = old_addr;
345 bp->attr.bp_type = old_type;
346 bp->attr.bp_len = old_len;
347 if (!bp->attr.disabled)
348 perf_event_enable(bp);
349
350 return err;
351 }
352
353end:
354 bp->attr.disabled = attr->disabled;
355
356 return 0;
327} 357}
328EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 358EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
329 359
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
348 */ 378 */
349struct perf_event ** 379struct perf_event **
350register_wide_hw_breakpoint(struct perf_event_attr *attr, 380register_wide_hw_breakpoint(struct perf_event_attr *attr,
351 perf_callback_t triggered) 381 perf_overflow_handler_t triggered)
352{ 382{
353 struct perf_event **cpu_events, **pevent, *bp; 383 struct perf_event **cpu_events, **pevent, *bp;
354 long err; 384 long err;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f5dcd36d3151..4f8df01dbe51 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -168,7 +168,7 @@ static void lock_time_inc(struct lock_time *lt, u64 time)
168 if (time > lt->max) 168 if (time > lt->max)
169 lt->max = time; 169 lt->max = time;
170 170
171 if (time < lt->min || !lt->min) 171 if (time < lt->min || !lt->nr)
172 lt->min = time; 172 lt->min = time;
173 173
174 lt->total += time; 174 lt->total += time;
@@ -177,8 +177,15 @@ static void lock_time_inc(struct lock_time *lt, u64 time)
177 177
178static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) 178static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
179{ 179{
180 dst->min += src->min; 180 if (!src->nr)
181 dst->max += src->max; 181 return;
182
183 if (src->max > dst->max)
184 dst->max = src->max;
185
186 if (src->min < dst->min || !dst->nr)
187 dst->min = src->min;
188
182 dst->total += src->total; 189 dst->total += src->total;
183 dst->nr += src->nr; 190 dst->nr += src->nr;
184} 191}
@@ -379,7 +386,8 @@ static int save_trace(struct stack_trace *trace)
379 * complete trace that maxes out the entries provided will be reported 386 * complete trace that maxes out the entries provided will be reported
380 * as incomplete, friggin useless </rant> 387 * as incomplete, friggin useless </rant>
381 */ 388 */
382 if (trace->entries[trace->nr_entries-1] == ULONG_MAX) 389 if (trace->nr_entries != 0 &&
390 trace->entries[trace->nr_entries-1] == ULONG_MAX)
383 trace->nr_entries--; 391 trace->nr_entries--;
384 392
385 trace->max_entries = trace->nr_entries; 393 trace->max_entries = trace->nr_entries;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 40a996ec39fa..e73e53c7582f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -36,7 +36,7 @@
36/* 36/*
37 * Each CPU has a list of per CPU events: 37 * Each CPU has a list of per CPU events:
38 */ 38 */
39DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); 39static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
40 40
41int perf_max_events __read_mostly = 1; 41int perf_max_events __read_mostly = 1;
42static int perf_reserved_percpu __read_mostly; 42static int perf_reserved_percpu __read_mostly;
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
567 * is the current context on this CPU and preemption is disabled, 567 * is the current context on this CPU and preemption is disabled,
568 * hence we can't get into perf_event_task_sched_out for this context. 568 * hence we can't get into perf_event_task_sched_out for this context.
569 */ 569 */
570static void perf_event_disable(struct perf_event *event) 570void perf_event_disable(struct perf_event *event)
571{ 571{
572 struct perf_event_context *ctx = event->ctx; 572 struct perf_event_context *ctx = event->ctx;
573 struct task_struct *task = ctx->task; 573 struct task_struct *task = ctx->task;
@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info)
971 * perf_event_for_each_child or perf_event_for_each as described 971 * perf_event_for_each_child or perf_event_for_each as described
972 * for perf_event_disable. 972 * for perf_event_disable.
973 */ 973 */
974static void perf_event_enable(struct perf_event *event) 974void perf_event_enable(struct perf_event *event)
975{ 975{
976 struct perf_event_context *ctx = event->ctx; 976 struct perf_event_context *ctx = event->ctx;
977 struct task_struct *task = ctx->task; 977 struct task_struct *task = ctx->task;
@@ -1579,7 +1579,6 @@ static void
1579__perf_event_init_context(struct perf_event_context *ctx, 1579__perf_event_init_context(struct perf_event_context *ctx,
1580 struct task_struct *task) 1580 struct task_struct *task)
1581{ 1581{
1582 memset(ctx, 0, sizeof(*ctx));
1583 spin_lock_init(&ctx->lock); 1582 spin_lock_init(&ctx->lock);
1584 mutex_init(&ctx->mutex); 1583 mutex_init(&ctx->mutex);
1585 INIT_LIST_HEAD(&ctx->group_list); 1584 INIT_LIST_HEAD(&ctx->group_list);
@@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1654 } 1653 }
1655 1654
1656 if (!ctx) { 1655 if (!ctx) {
1657 ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); 1656 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1658 err = -ENOMEM; 1657 err = -ENOMEM;
1659 if (!ctx) 1658 if (!ctx)
1660 goto errout; 1659 goto errout;
@@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4011 event->pmu->read(event); 4010 event->pmu->read(event);
4012 4011
4013 data.addr = 0; 4012 data.addr = 0;
4013 data.raw = NULL;
4014 data.period = event->hw.last_period; 4014 data.period = event->hw.last_period;
4015 regs = get_irq_regs(); 4015 regs = get_irq_regs();
4016 /* 4016 /*
@@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4080 u64 now; 4080 u64 now;
4081 4081
4082 now = cpu_clock(cpu); 4082 now = cpu_clock(cpu);
4083 prev = atomic64_read(&event->hw.prev_count); 4083 prev = atomic64_xchg(&event->hw.prev_count, now);
4084 atomic64_set(&event->hw.prev_count, now);
4085 atomic64_add(now - prev, &event->count); 4084 atomic64_add(now - prev, &event->count);
4086} 4085}
4087 4086
@@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
4286static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4285static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4287{ 4286{
4288 int err; 4287 int err;
4289 /* 4288
4290 * The breakpoint is already filled if we haven't created the counter 4289 err = register_perf_hw_breakpoint(bp);
4291 * through perf syscall
4292 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4293 */
4294 if (!bp->callback)
4295 err = register_perf_hw_breakpoint(bp);
4296 else
4297 err = __register_perf_hw_breakpoint(bp);
4298 if (err) 4290 if (err)
4299 return ERR_PTR(err); 4291 return ERR_PTR(err);
4300 4292
@@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
4308 struct perf_sample_data sample; 4300 struct perf_sample_data sample;
4309 struct pt_regs *regs = data; 4301 struct pt_regs *regs = data;
4310 4302
4303 sample.raw = NULL;
4311 sample.addr = bp->attr.bp_addr; 4304 sample.addr = bp->attr.bp_addr;
4312 4305
4313 if (!perf_exclude_event(bp, regs)) 4306 if (!perf_exclude_event(bp, regs))
@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4390 struct perf_event_context *ctx, 4383 struct perf_event_context *ctx,
4391 struct perf_event *group_leader, 4384 struct perf_event *group_leader,
4392 struct perf_event *parent_event, 4385 struct perf_event *parent_event,
4393 perf_callback_t callback, 4386 perf_overflow_handler_t overflow_handler,
4394 gfp_t gfpflags) 4387 gfp_t gfpflags)
4395{ 4388{
4396 const struct pmu *pmu; 4389 const struct pmu *pmu;
@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr,
4433 4426
4434 event->state = PERF_EVENT_STATE_INACTIVE; 4427 event->state = PERF_EVENT_STATE_INACTIVE;
4435 4428
4436 if (!callback && parent_event) 4429 if (!overflow_handler && parent_event)
4437 callback = parent_event->callback; 4430 overflow_handler = parent_event->overflow_handler;
4438 4431
4439 event->callback = callback; 4432 event->overflow_handler = overflow_handler;
4440 4433
4441 if (attr->disabled) 4434 if (attr->disabled)
4442 event->state = PERF_EVENT_STATE_OFF; 4435 event->state = PERF_EVENT_STATE_OFF;
@@ -4776,7 +4769,8 @@ err_put_context:
4776 */ 4769 */
4777struct perf_event * 4770struct perf_event *
4778perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, 4771perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4779 pid_t pid, perf_callback_t callback) 4772 pid_t pid,
4773 perf_overflow_handler_t overflow_handler)
4780{ 4774{
4781 struct perf_event *event; 4775 struct perf_event *event;
4782 struct perf_event_context *ctx; 4776 struct perf_event_context *ctx;
@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4793 } 4787 }
4794 4788
4795 event = perf_event_alloc(attr, cpu, ctx, NULL, 4789 event = perf_event_alloc(attr, cpu, ctx, NULL,
4796 NULL, callback, GFP_KERNEL); 4790 NULL, overflow_handler, GFP_KERNEL);
4797 if (IS_ERR(event)) { 4791 if (IS_ERR(event)) {
4798 err = PTR_ERR(event); 4792 err = PTR_ERR(event);
4799 goto err_put_context; 4793 goto err_put_context;
@@ -5090,7 +5084,7 @@ again:
5090 */ 5084 */
5091int perf_event_init_task(struct task_struct *child) 5085int perf_event_init_task(struct task_struct *child)
5092{ 5086{
5093 struct perf_event_context *child_ctx, *parent_ctx; 5087 struct perf_event_context *child_ctx = NULL, *parent_ctx;
5094 struct perf_event_context *cloned_ctx; 5088 struct perf_event_context *cloned_ctx;
5095 struct perf_event *event; 5089 struct perf_event *event;
5096 struct task_struct *parent = current; 5090 struct task_struct *parent = current;
@@ -5106,20 +5100,6 @@ int perf_event_init_task(struct task_struct *child)
5106 return 0; 5100 return 0;
5107 5101
5108 /* 5102 /*
5109 * This is executed from the parent task context, so inherit
5110 * events that have been marked for cloning.
5111 * First allocate and initialize a context for the child.
5112 */
5113
5114 child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
5115 if (!child_ctx)
5116 return -ENOMEM;
5117
5118 __perf_event_init_context(child_ctx, child);
5119 child->perf_event_ctxp = child_ctx;
5120 get_task_struct(child);
5121
5122 /*
5123 * If the parent's context is a clone, pin it so it won't get 5103 * If the parent's context is a clone, pin it so it won't get
5124 * swapped under us. 5104 * swapped under us.
5125 */ 5105 */
@@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child)
5149 continue; 5129 continue;
5150 } 5130 }
5151 5131
5132 if (!child->perf_event_ctxp) {
5133 /*
5134 * This is executed from the parent task context, so
5135 * inherit events that have been marked for cloning.
5136 * First allocate and initialize a context for the
5137 * child.
5138 */
5139
5140 child_ctx = kzalloc(sizeof(struct perf_event_context),
5141 GFP_KERNEL);
5142 if (!child_ctx) {
5143 ret = -ENOMEM;
5144 goto exit;
5145 }
5146
5147 __perf_event_init_context(child_ctx, child);
5148 child->perf_event_ctxp = child_ctx;
5149 get_task_struct(child);
5150 }
5151
5152 ret = inherit_group(event, parent, parent_ctx, 5152 ret = inherit_group(event, parent, parent_ctx,
5153 child, child_ctx); 5153 child, child_ctx);
5154 if (ret) { 5154 if (ret) {
@@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child)
5177 get_ctx(child_ctx->parent_ctx); 5177 get_ctx(child_ctx->parent_ctx);
5178 } 5178 }
5179 5179
5180exit:
5180 mutex_unlock(&parent_ctx->mutex); 5181 mutex_unlock(&parent_ctx->mutex);
5181 5182
5182 perf_unpin_context(parent_ctx); 5183 perf_unpin_context(parent_ctx);
diff --git a/kernel/sched.c b/kernel/sched.c
index e7f2cfa6a257..ff39cadf621e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
814 * default: 0.25ms 814 * default: 0.25ms
815 */ 815 */
816unsigned int sysctl_sched_shares_ratelimit = 250000; 816unsigned int sysctl_sched_shares_ratelimit = 250000;
817unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
817 818
818/* 819/*
819 * Inject some fuzzyness into changing the per-cpu group shares 820 * Inject some fuzzyness into changing the per-cpu group shares
@@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
1614 */ 1615 */
1615static int tg_shares_up(struct task_group *tg, void *data) 1616static int tg_shares_up(struct task_group *tg, void *data)
1616{ 1617{
1617 unsigned long weight, rq_weight = 0, shares = 0; 1618 unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
1618 unsigned long *usd_rq_weight; 1619 unsigned long *usd_rq_weight;
1619 struct sched_domain *sd = data; 1620 struct sched_domain *sd = data;
1620 unsigned long flags; 1621 unsigned long flags;
@@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1630 weight = tg->cfs_rq[i]->load.weight; 1631 weight = tg->cfs_rq[i]->load.weight;
1631 usd_rq_weight[i] = weight; 1632 usd_rq_weight[i] = weight;
1632 1633
1634 rq_weight += weight;
1633 /* 1635 /*
1634 * If there are currently no tasks on the cpu pretend there 1636 * If there are currently no tasks on the cpu pretend there
1635 * is one of average load so that when a new task gets to 1637 * is one of average load so that when a new task gets to
@@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
1638 if (!weight) 1640 if (!weight)
1639 weight = NICE_0_LOAD; 1641 weight = NICE_0_LOAD;
1640 1642
1641 rq_weight += weight; 1643 sum_weight += weight;
1642 shares += tg->cfs_rq[i]->shares; 1644 shares += tg->cfs_rq[i]->shares;
1643 } 1645 }
1644 1646
1647 if (!rq_weight)
1648 rq_weight = sum_weight;
1649
1645 if ((!shares && rq_weight) || shares > tg->shares) 1650 if ((!shares && rq_weight) || shares > tg->shares)
1646 shares = tg->shares; 1651 shares = tg->shares;
1647 1652
@@ -1810,6 +1815,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1810#endif 1815#endif
1811 1816
1812static void calc_load_account_active(struct rq *this_rq); 1817static void calc_load_account_active(struct rq *this_rq);
1818static void update_sysctl(void);
1819static int get_update_sysctl_factor(void);
1820
1821static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1822{
1823 set_task_rq(p, cpu);
1824#ifdef CONFIG_SMP
1825 /*
1826 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1827 * successfuly executed on another CPU. We must ensure that updates of
1828 * per-task data have been completed by this moment.
1829 */
1830 smp_wmb();
1831 task_thread_info(p)->cpu = cpu;
1832#endif
1833}
1813 1834
1814#include "sched_stats.h" 1835#include "sched_stats.h"
1815#include "sched_idletask.c" 1836#include "sched_idletask.c"
@@ -1967,20 +1988,6 @@ inline int task_curr(const struct task_struct *p)
1967 return cpu_curr(task_cpu(p)) == p; 1988 return cpu_curr(task_cpu(p)) == p;
1968} 1989}
1969 1990
1970static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1971{
1972 set_task_rq(p, cpu);
1973#ifdef CONFIG_SMP
1974 /*
1975 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1976 * successfuly executed on another CPU. We must ensure that updates of
1977 * per-task data have been completed by this moment.
1978 */
1979 smp_wmb();
1980 task_thread_info(p)->cpu = cpu;
1981#endif
1982}
1983
1984static inline void check_class_changed(struct rq *rq, struct task_struct *p, 1991static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1985 const struct sched_class *prev_class, 1992 const struct sched_class *prev_class,
1986 int oldprio, int running) 1993 int oldprio, int running)
@@ -2060,29 +2067,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2060void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2067void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2061{ 2068{
2062 int old_cpu = task_cpu(p); 2069 int old_cpu = task_cpu(p);
2063 struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
2064 struct cfs_rq *old_cfsrq = task_cfs_rq(p), 2070 struct cfs_rq *old_cfsrq = task_cfs_rq(p),
2065 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); 2071 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
2066 u64 clock_offset;
2067
2068 clock_offset = old_rq->clock - new_rq->clock;
2069 2072
2070 trace_sched_migrate_task(p, new_cpu); 2073 trace_sched_migrate_task(p, new_cpu);
2071 2074
2072#ifdef CONFIG_SCHEDSTATS
2073 if (p->se.wait_start)
2074 p->se.wait_start -= clock_offset;
2075 if (p->se.sleep_start)
2076 p->se.sleep_start -= clock_offset;
2077 if (p->se.block_start)
2078 p->se.block_start -= clock_offset;
2079#endif
2080 if (old_cpu != new_cpu) { 2075 if (old_cpu != new_cpu) {
2081 p->se.nr_migrations++; 2076 p->se.nr_migrations++;
2082#ifdef CONFIG_SCHEDSTATS
2083 if (task_hot(p, old_rq->clock, NULL))
2084 schedstat_inc(p, se.nr_forced2_migrations);
2085#endif
2086 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 2077 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
2087 1, 1, NULL, 0); 2078 1, 1, NULL, 0);
2088 } 2079 }
@@ -2323,6 +2314,14 @@ void task_oncpu_function_call(struct task_struct *p,
2323 preempt_enable(); 2314 preempt_enable();
2324} 2315}
2325 2316
2317#ifdef CONFIG_SMP
2318static inline
2319int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2320{
2321 return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2322}
2323#endif
2324
2326/*** 2325/***
2327 * try_to_wake_up - wake up a thread 2326 * try_to_wake_up - wake up a thread
2328 * @p: the to-be-woken-up thread 2327 * @p: the to-be-woken-up thread
@@ -2374,17 +2373,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2374 if (task_contributes_to_load(p)) 2373 if (task_contributes_to_load(p))
2375 rq->nr_uninterruptible--; 2374 rq->nr_uninterruptible--;
2376 p->state = TASK_WAKING; 2375 p->state = TASK_WAKING;
2377 task_rq_unlock(rq, &flags); 2376 __task_rq_unlock(rq);
2378 2377
2379 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2378 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2380 if (cpu != orig_cpu) { 2379 if (cpu != orig_cpu)
2381 local_irq_save(flags);
2382 rq = cpu_rq(cpu);
2383 update_rq_clock(rq);
2384 set_task_cpu(p, cpu); 2380 set_task_cpu(p, cpu);
2385 local_irq_restore(flags); 2381
2386 } 2382 rq = __task_rq_lock(p);
2387 rq = task_rq_lock(p, &flags); 2383 update_rq_clock(rq);
2388 2384
2389 WARN_ON(p->state != TASK_WAKING); 2385 WARN_ON(p->state != TASK_WAKING);
2390 cpu = task_cpu(p); 2386 cpu = task_cpu(p);
@@ -2499,7 +2495,6 @@ static void __sched_fork(struct task_struct *p)
2499 p->se.avg_overlap = 0; 2495 p->se.avg_overlap = 0;
2500 p->se.start_runtime = 0; 2496 p->se.start_runtime = 0;
2501 p->se.avg_wakeup = sysctl_sched_wakeup_granularity; 2497 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2502 p->se.avg_running = 0;
2503 2498
2504#ifdef CONFIG_SCHEDSTATS 2499#ifdef CONFIG_SCHEDSTATS
2505 p->se.wait_start = 0; 2500 p->se.wait_start = 0;
@@ -2521,7 +2516,6 @@ static void __sched_fork(struct task_struct *p)
2521 p->se.nr_failed_migrations_running = 0; 2516 p->se.nr_failed_migrations_running = 0;
2522 p->se.nr_failed_migrations_hot = 0; 2517 p->se.nr_failed_migrations_hot = 0;
2523 p->se.nr_forced_migrations = 0; 2518 p->se.nr_forced_migrations = 0;
2524 p->se.nr_forced2_migrations = 0;
2525 2519
2526 p->se.nr_wakeups = 0; 2520 p->se.nr_wakeups = 0;
2527 p->se.nr_wakeups_sync = 0; 2521 p->se.nr_wakeups_sync = 0;
@@ -2558,7 +2552,6 @@ static void __sched_fork(struct task_struct *p)
2558void sched_fork(struct task_struct *p, int clone_flags) 2552void sched_fork(struct task_struct *p, int clone_flags)
2559{ 2553{
2560 int cpu = get_cpu(); 2554 int cpu = get_cpu();
2561 unsigned long flags;
2562 2555
2563 __sched_fork(p); 2556 __sched_fork(p);
2564 2557
@@ -2592,13 +2585,13 @@ void sched_fork(struct task_struct *p, int clone_flags)
2592 if (!rt_prio(p->prio)) 2585 if (!rt_prio(p->prio))
2593 p->sched_class = &fair_sched_class; 2586 p->sched_class = &fair_sched_class;
2594 2587
2588 if (p->sched_class->task_fork)
2589 p->sched_class->task_fork(p);
2590
2595#ifdef CONFIG_SMP 2591#ifdef CONFIG_SMP
2596 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); 2592 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2597#endif 2593#endif
2598 local_irq_save(flags);
2599 update_rq_clock(cpu_rq(cpu));
2600 set_task_cpu(p, cpu); 2594 set_task_cpu(p, cpu);
2601 local_irq_restore(flags);
2602 2595
2603#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2596#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2604 if (likely(sched_info_on())) 2597 if (likely(sched_info_on()))
@@ -2631,17 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2631 rq = task_rq_lock(p, &flags); 2624 rq = task_rq_lock(p, &flags);
2632 BUG_ON(p->state != TASK_RUNNING); 2625 BUG_ON(p->state != TASK_RUNNING);
2633 update_rq_clock(rq); 2626 update_rq_clock(rq);
2634 2627 activate_task(rq, p, 0);
2635 if (!p->sched_class->task_new || !current->se.on_rq) {
2636 activate_task(rq, p, 0);
2637 } else {
2638 /*
2639 * Let the scheduling class do new task startup
2640 * management (if any):
2641 */
2642 p->sched_class->task_new(rq, p);
2643 inc_nr_running(rq);
2644 }
2645 trace_sched_wakeup_new(rq, p, 1); 2628 trace_sched_wakeup_new(rq, p, 1);
2646 check_preempt_curr(rq, p, WF_FORK); 2629 check_preempt_curr(rq, p, WF_FORK);
2647#ifdef CONFIG_SMP 2630#ifdef CONFIG_SMP
@@ -3156,7 +3139,7 @@ out:
3156void sched_exec(void) 3139void sched_exec(void)
3157{ 3140{
3158 int new_cpu, this_cpu = get_cpu(); 3141 int new_cpu, this_cpu = get_cpu();
3159 new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); 3142 new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
3160 put_cpu(); 3143 put_cpu();
3161 if (new_cpu != this_cpu) 3144 if (new_cpu != this_cpu)
3162 sched_migrate_task(current, new_cpu); 3145 sched_migrate_task(current, new_cpu);
@@ -3172,10 +3155,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
3172 deactivate_task(src_rq, p, 0); 3155 deactivate_task(src_rq, p, 0);
3173 set_task_cpu(p, this_cpu); 3156 set_task_cpu(p, this_cpu);
3174 activate_task(this_rq, p, 0); 3157 activate_task(this_rq, p, 0);
3175 /*
3176 * Note that idle threads have a prio of MAX_PRIO, for this test
3177 * to be always true for them.
3178 */
3179 check_preempt_curr(this_rq, p, 0); 3158 check_preempt_curr(this_rq, p, 0);
3180} 3159}
3181 3160
@@ -4134,7 +4113,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4134 unsigned long flags; 4113 unsigned long flags;
4135 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4114 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4136 4115
4137 cpumask_copy(cpus, cpu_online_mask); 4116 cpumask_copy(cpus, cpu_active_mask);
4138 4117
4139 /* 4118 /*
4140 * When power savings policy is enabled for the parent domain, idle 4119 * When power savings policy is enabled for the parent domain, idle
@@ -4297,7 +4276,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
4297 int all_pinned = 0; 4276 int all_pinned = 0;
4298 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4277 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4299 4278
4300 cpumask_copy(cpus, cpu_online_mask); 4279 cpumask_copy(cpus, cpu_active_mask);
4301 4280
4302 /* 4281 /*
4303 * When power savings policy is enabled for the parent domain, idle 4282 * When power savings policy is enabled for the parent domain, idle
@@ -4694,7 +4673,7 @@ int select_nohz_load_balancer(int stop_tick)
4694 cpumask_set_cpu(cpu, nohz.cpu_mask); 4673 cpumask_set_cpu(cpu, nohz.cpu_mask);
4695 4674
4696 /* time for ilb owner also to sleep */ 4675 /* time for ilb owner also to sleep */
4697 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 4676 if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
4698 if (atomic_read(&nohz.load_balancer) == cpu) 4677 if (atomic_read(&nohz.load_balancer) == cpu)
4699 atomic_set(&nohz.load_balancer, -1); 4678 atomic_set(&nohz.load_balancer, -1);
4700 return 0; 4679 return 0;
@@ -5396,13 +5375,14 @@ static inline void schedule_debug(struct task_struct *prev)
5396#endif 5375#endif
5397} 5376}
5398 5377
5399static void put_prev_task(struct rq *rq, struct task_struct *p) 5378static void put_prev_task(struct rq *rq, struct task_struct *prev)
5400{ 5379{
5401 u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; 5380 if (prev->state == TASK_RUNNING) {
5381 u64 runtime = prev->se.sum_exec_runtime;
5402 5382
5403 update_avg(&p->se.avg_running, runtime); 5383 runtime -= prev->se.prev_sum_exec_runtime;
5384 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
5404 5385
5405 if (p->state == TASK_RUNNING) {
5406 /* 5386 /*
5407 * In order to avoid avg_overlap growing stale when we are 5387 * In order to avoid avg_overlap growing stale when we are
5408 * indeed overlapping and hence not getting put to sleep, grow 5388 * indeed overlapping and hence not getting put to sleep, grow
@@ -5412,12 +5392,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p)
5412 * correlates to the amount of cache footprint a task can 5392 * correlates to the amount of cache footprint a task can
5413 * build up. 5393 * build up.
5414 */ 5394 */
5415 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); 5395 update_avg(&prev->se.avg_overlap, runtime);
5416 update_avg(&p->se.avg_overlap, runtime);
5417 } else {
5418 update_avg(&p->se.avg_running, 0);
5419 } 5396 }
5420 p->sched_class->put_prev_task(rq, p); 5397 prev->sched_class->put_prev_task(rq, prev);
5421} 5398}
5422 5399
5423/* 5400/*
@@ -6631,6 +6608,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
6631long sched_getaffinity(pid_t pid, struct cpumask *mask) 6608long sched_getaffinity(pid_t pid, struct cpumask *mask)
6632{ 6609{
6633 struct task_struct *p; 6610 struct task_struct *p;
6611 unsigned long flags;
6612 struct rq *rq;
6634 int retval; 6613 int retval;
6635 6614
6636 get_online_cpus(); 6615 get_online_cpus();
@@ -6645,7 +6624,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6645 if (retval) 6624 if (retval)
6646 goto out_unlock; 6625 goto out_unlock;
6647 6626
6627 rq = task_rq_lock(p, &flags);
6648 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); 6628 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
6629 task_rq_unlock(rq, &flags);
6649 6630
6650out_unlock: 6631out_unlock:
6651 read_unlock(&tasklist_lock); 6632 read_unlock(&tasklist_lock);
@@ -6883,6 +6864,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6883{ 6864{
6884 struct task_struct *p; 6865 struct task_struct *p;
6885 unsigned int time_slice; 6866 unsigned int time_slice;
6867 unsigned long flags;
6868 struct rq *rq;
6886 int retval; 6869 int retval;
6887 struct timespec t; 6870 struct timespec t;
6888 6871
@@ -6899,7 +6882,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6899 if (retval) 6882 if (retval)
6900 goto out_unlock; 6883 goto out_unlock;
6901 6884
6902 time_slice = p->sched_class->get_rr_interval(p); 6885 rq = task_rq_lock(p, &flags);
6886 time_slice = p->sched_class->get_rr_interval(rq, p);
6887 task_rq_unlock(rq, &flags);
6903 6888
6904 read_unlock(&tasklist_lock); 6889 read_unlock(&tasklist_lock);
6905 jiffies_to_timespec(time_slice, &t); 6890 jiffies_to_timespec(time_slice, &t);
@@ -7000,7 +6985,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
7000 __sched_fork(idle); 6985 __sched_fork(idle);
7001 idle->se.exec_start = sched_clock(); 6986 idle->se.exec_start = sched_clock();
7002 6987
7003 idle->prio = idle->normal_prio = MAX_PRIO;
7004 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 6988 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
7005 __set_task_cpu(idle, cpu); 6989 __set_task_cpu(idle, cpu);
7006 6990
@@ -7041,22 +7025,43 @@ cpumask_var_t nohz_cpu_mask;
7041 * 7025 *
7042 * This idea comes from the SD scheduler of Con Kolivas: 7026 * This idea comes from the SD scheduler of Con Kolivas:
7043 */ 7027 */
7044static inline void sched_init_granularity(void) 7028static int get_update_sysctl_factor(void)
7045{ 7029{
7046 unsigned int factor = 1 + ilog2(num_online_cpus()); 7030 unsigned int cpus = min_t(int, num_online_cpus(), 8);
7047 const unsigned long limit = 200000000; 7031 unsigned int factor;
7032
7033 switch (sysctl_sched_tunable_scaling) {
7034 case SCHED_TUNABLESCALING_NONE:
7035 factor = 1;
7036 break;
7037 case SCHED_TUNABLESCALING_LINEAR:
7038 factor = cpus;
7039 break;
7040 case SCHED_TUNABLESCALING_LOG:
7041 default:
7042 factor = 1 + ilog2(cpus);
7043 break;
7044 }
7048 7045
7049 sysctl_sched_min_granularity *= factor; 7046 return factor;
7050 if (sysctl_sched_min_granularity > limit) 7047}
7051 sysctl_sched_min_granularity = limit;
7052 7048
7053 sysctl_sched_latency *= factor; 7049static void update_sysctl(void)
7054 if (sysctl_sched_latency > limit) 7050{
7055 sysctl_sched_latency = limit; 7051 unsigned int factor = get_update_sysctl_factor();
7056 7052
7057 sysctl_sched_wakeup_granularity *= factor; 7053#define SET_SYSCTL(name) \
7054 (sysctl_##name = (factor) * normalized_sysctl_##name)
7055 SET_SYSCTL(sched_min_granularity);
7056 SET_SYSCTL(sched_latency);
7057 SET_SYSCTL(sched_wakeup_granularity);
7058 SET_SYSCTL(sched_shares_ratelimit);
7059#undef SET_SYSCTL
7060}
7058 7061
7059 sysctl_sched_shares_ratelimit *= factor; 7062static inline void sched_init_granularity(void)
7063{
7064 update_sysctl();
7060} 7065}
7061 7066
7062#ifdef CONFIG_SMP 7067#ifdef CONFIG_SMP
@@ -7093,7 +7098,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7093 int ret = 0; 7098 int ret = 0;
7094 7099
7095 rq = task_rq_lock(p, &flags); 7100 rq = task_rq_lock(p, &flags);
7096 if (!cpumask_intersects(new_mask, cpu_online_mask)) { 7101 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7097 ret = -EINVAL; 7102 ret = -EINVAL;
7098 goto out; 7103 goto out;
7099 } 7104 }
@@ -7115,7 +7120,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7115 if (cpumask_test_cpu(task_cpu(p), new_mask)) 7120 if (cpumask_test_cpu(task_cpu(p), new_mask))
7116 goto out; 7121 goto out;
7117 7122
7118 if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { 7123 if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
7119 /* Need help from migration thread: drop lock and wait. */ 7124 /* Need help from migration thread: drop lock and wait. */
7120 struct task_struct *mt = rq->migration_thread; 7125 struct task_struct *mt = rq->migration_thread;
7121 7126
@@ -7269,19 +7274,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
7269 7274
7270again: 7275again:
7271 /* Look for allowed, online CPU in same node. */ 7276 /* Look for allowed, online CPU in same node. */
7272 for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) 7277 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
7273 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 7278 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7274 goto move; 7279 goto move;
7275 7280
7276 /* Any allowed, online CPU? */ 7281 /* Any allowed, online CPU? */
7277 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); 7282 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
7278 if (dest_cpu < nr_cpu_ids) 7283 if (dest_cpu < nr_cpu_ids)
7279 goto move; 7284 goto move;
7280 7285
7281 /* No more Mr. Nice Guy. */ 7286 /* No more Mr. Nice Guy. */
7282 if (dest_cpu >= nr_cpu_ids) { 7287 if (dest_cpu >= nr_cpu_ids) {
7283 cpuset_cpus_allowed_locked(p, &p->cpus_allowed); 7288 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
7284 dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); 7289 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
7285 7290
7286 /* 7291 /*
7287 * Don't tell them about moving exiting tasks or 7292 * Don't tell them about moving exiting tasks or
@@ -7310,7 +7315,7 @@ move:
7310 */ 7315 */
7311static void migrate_nr_uninterruptible(struct rq *rq_src) 7316static void migrate_nr_uninterruptible(struct rq *rq_src)
7312{ 7317{
7313 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); 7318 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
7314 unsigned long flags; 7319 unsigned long flags;
7315 7320
7316 local_irq_save(flags); 7321 local_irq_save(flags);
@@ -7563,7 +7568,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
7563static struct ctl_table_header *sd_sysctl_header; 7568static struct ctl_table_header *sd_sysctl_header;
7564static void register_sched_domain_sysctl(void) 7569static void register_sched_domain_sysctl(void)
7565{ 7570{
7566 int i, cpu_num = num_online_cpus(); 7571 int i, cpu_num = num_possible_cpus();
7567 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); 7572 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
7568 char buf[32]; 7573 char buf[32];
7569 7574
@@ -7573,7 +7578,7 @@ static void register_sched_domain_sysctl(void)
7573 if (entry == NULL) 7578 if (entry == NULL)
7574 return; 7579 return;
7575 7580
7576 for_each_online_cpu(i) { 7581 for_each_possible_cpu(i) {
7577 snprintf(buf, 32, "cpu%d", i); 7582 snprintf(buf, 32, "cpu%d", i);
7578 entry->procname = kstrdup(buf, GFP_KERNEL); 7583 entry->procname = kstrdup(buf, GFP_KERNEL);
7579 entry->mode = 0555; 7584 entry->mode = 0555;
@@ -7703,7 +7708,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7703 spin_lock_irq(&rq->lock); 7708 spin_lock_irq(&rq->lock);
7704 update_rq_clock(rq); 7709 update_rq_clock(rq);
7705 deactivate_task(rq, rq->idle, 0); 7710 deactivate_task(rq, rq->idle, 0);
7706 rq->idle->static_prio = MAX_PRIO;
7707 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 7711 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
7708 rq->idle->sched_class = &idle_sched_class; 7712 rq->idle->sched_class = &idle_sched_class;
7709 migrate_dead_tasks(cpu); 7713 migrate_dead_tasks(cpu);
@@ -9099,7 +9103,7 @@ match1:
9099 if (doms_new == NULL) { 9103 if (doms_new == NULL) {
9100 ndoms_cur = 0; 9104 ndoms_cur = 0;
9101 doms_new = &fallback_doms; 9105 doms_new = &fallback_doms;
9102 cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); 9106 cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
9103 WARN_ON_ONCE(dattr_new); 9107 WARN_ON_ONCE(dattr_new);
9104 } 9108 }
9105 9109
@@ -9230,8 +9234,10 @@ static int update_sched_domains(struct notifier_block *nfb,
9230 switch (action) { 9234 switch (action) {
9231 case CPU_ONLINE: 9235 case CPU_ONLINE:
9232 case CPU_ONLINE_FROZEN: 9236 case CPU_ONLINE_FROZEN:
9233 case CPU_DEAD: 9237 case CPU_DOWN_PREPARE:
9234 case CPU_DEAD_FROZEN: 9238 case CPU_DOWN_PREPARE_FROZEN:
9239 case CPU_DOWN_FAILED:
9240 case CPU_DOWN_FAILED_FROZEN:
9235 partition_sched_domains(1, NULL, NULL); 9241 partition_sched_domains(1, NULL, NULL);
9236 return NOTIFY_OK; 9242 return NOTIFY_OK;
9237 9243
@@ -9278,7 +9284,7 @@ void __init sched_init_smp(void)
9278#endif 9284#endif
9279 get_online_cpus(); 9285 get_online_cpus();
9280 mutex_lock(&sched_domains_mutex); 9286 mutex_lock(&sched_domains_mutex);
9281 arch_init_sched_domains(cpu_online_mask); 9287 arch_init_sched_domains(cpu_active_mask);
9282 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); 9288 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
9283 if (cpumask_empty(non_isolated_cpus)) 9289 if (cpumask_empty(non_isolated_cpus))
9284 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); 9290 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -9842,13 +9848,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
9842 se = kzalloc_node(sizeof(struct sched_entity), 9848 se = kzalloc_node(sizeof(struct sched_entity),
9843 GFP_KERNEL, cpu_to_node(i)); 9849 GFP_KERNEL, cpu_to_node(i));
9844 if (!se) 9850 if (!se)
9845 goto err; 9851 goto err_free_rq;
9846 9852
9847 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); 9853 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
9848 } 9854 }
9849 9855
9850 return 1; 9856 return 1;
9851 9857
9858 err_free_rq:
9859 kfree(cfs_rq);
9852 err: 9860 err:
9853 return 0; 9861 return 0;
9854} 9862}
@@ -9930,13 +9938,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
9930 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 9938 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
9931 GFP_KERNEL, cpu_to_node(i)); 9939 GFP_KERNEL, cpu_to_node(i));
9932 if (!rt_se) 9940 if (!rt_se)
9933 goto err; 9941 goto err_free_rq;
9934 9942
9935 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); 9943 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
9936 } 9944 }
9937 9945
9938 return 1; 9946 return 1;
9939 9947
9948 err_free_rq:
9949 kfree(rt_rq);
9940 err: 9950 err:
9941 return 0; 9951 return 0;
9942} 9952}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6988cf08f705..5ae24fc65d75 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu)
309 print_rq(m, rq, cpu); 309 print_rq(m, rq, cpu);
310} 310}
311 311
312static const char *sched_tunable_scaling_names[] = {
313 "none",
314 "logaritmic",
315 "linear"
316};
317
312static int sched_debug_show(struct seq_file *m, void *v) 318static int sched_debug_show(struct seq_file *m, void *v)
313{ 319{
314 u64 now = ktime_to_ns(ktime_get()); 320 u64 now = ktime_to_ns(ktime_get());
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v)
334#undef PN 340#undef PN
335#undef P 341#undef P
336 342
343 SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
344 sysctl_sched_tunable_scaling,
345 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
346
337 for_each_online_cpu(cpu) 347 for_each_online_cpu(cpu)
338 print_cpu(m, cpu); 348 print_cpu(m, cpu);
339 349
@@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
399 PN(se.sum_exec_runtime); 409 PN(se.sum_exec_runtime);
400 PN(se.avg_overlap); 410 PN(se.avg_overlap);
401 PN(se.avg_wakeup); 411 PN(se.avg_wakeup);
402 PN(se.avg_running);
403 412
404 nr_switches = p->nvcsw + p->nivcsw; 413 nr_switches = p->nvcsw + p->nivcsw;
405 414
@@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
423 P(se.nr_failed_migrations_running); 432 P(se.nr_failed_migrations_running);
424 P(se.nr_failed_migrations_hot); 433 P(se.nr_failed_migrations_hot);
425 P(se.nr_forced_migrations); 434 P(se.nr_forced_migrations);
426 P(se.nr_forced2_migrations);
427 P(se.nr_wakeups); 435 P(se.nr_wakeups);
428 P(se.nr_wakeups_sync); 436 P(se.nr_wakeups_sync);
429 P(se.nr_wakeups_migrate); 437 P(se.nr_wakeups_migrate);
@@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p)
499 p->se.nr_failed_migrations_running = 0; 507 p->se.nr_failed_migrations_running = 0;
500 p->se.nr_failed_migrations_hot = 0; 508 p->se.nr_failed_migrations_hot = 0;
501 p->se.nr_forced_migrations = 0; 509 p->se.nr_forced_migrations = 0;
502 p->se.nr_forced2_migrations = 0;
503 p->se.nr_wakeups = 0; 510 p->se.nr_wakeups = 0;
504 p->se.nr_wakeups_sync = 0; 511 p->se.nr_wakeups_sync = 0;
505 p->se.nr_wakeups_migrate = 0; 512 p->se.nr_wakeups_migrate = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f61837ad336d..804a411838f1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23#include <linux/latencytop.h> 23#include <linux/latencytop.h>
24#include <linux/sched.h>
24 25
25/* 26/*
26 * Targeted preemption latency for CPU-bound tasks: 27 * Targeted preemption latency for CPU-bound tasks:
@@ -35,12 +36,26 @@
35 * run vmstat and monitor the context-switches (cs) field) 36 * run vmstat and monitor the context-switches (cs) field)
36 */ 37 */
37unsigned int sysctl_sched_latency = 5000000ULL; 38unsigned int sysctl_sched_latency = 5000000ULL;
39unsigned int normalized_sysctl_sched_latency = 5000000ULL;
40
41/*
42 * The initial- and re-scaling of tunables is configurable
43 * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
44 *
45 * Options are:
46 * SCHED_TUNABLESCALING_NONE - unscaled, always *1
47 * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
48 * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
49 */
50enum sched_tunable_scaling sysctl_sched_tunable_scaling
51 = SCHED_TUNABLESCALING_LOG;
38 52
39/* 53/*
40 * Minimal preemption granularity for CPU-bound tasks: 54 * Minimal preemption granularity for CPU-bound tasks:
41 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 55 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
42 */ 56 */
43unsigned int sysctl_sched_min_granularity = 1000000ULL; 57unsigned int sysctl_sched_min_granularity = 1000000ULL;
58unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL;
44 59
45/* 60/*
46 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity 61 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
70 * have immediate wakeup/sleep latencies. 85 * have immediate wakeup/sleep latencies.
71 */ 86 */
72unsigned int sysctl_sched_wakeup_granularity = 1000000UL; 87unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
88unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
73 89
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 90const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 91
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
383 */ 399 */
384 400
385#ifdef CONFIG_SCHED_DEBUG 401#ifdef CONFIG_SCHED_DEBUG
386int sched_nr_latency_handler(struct ctl_table *table, int write, 402int sched_proc_update_handler(struct ctl_table *table, int write,
387 void __user *buffer, size_t *lenp, 403 void __user *buffer, size_t *lenp,
388 loff_t *ppos) 404 loff_t *ppos)
389{ 405{
390 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 406 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
407 int factor = get_update_sysctl_factor();
391 408
392 if (ret || !write) 409 if (ret || !write)
393 return ret; 410 return ret;
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
395 sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, 412 sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
396 sysctl_sched_min_granularity); 413 sysctl_sched_min_granularity);
397 414
415#define WRT_SYSCTL(name) \
416 (normalized_sysctl_##name = sysctl_##name / (factor))
417 WRT_SYSCTL(sched_min_granularity);
418 WRT_SYSCTL(sched_latency);
419 WRT_SYSCTL(sched_wakeup_granularity);
420 WRT_SYSCTL(sched_shares_ratelimit);
421#undef WRT_SYSCTL
422
398 return 0; 423 return 0;
399} 424}
400#endif 425#endif
@@ -1403,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1403 new_cpu = prev_cpu; 1428 new_cpu = prev_cpu;
1404 } 1429 }
1405 1430
1406 rcu_read_lock();
1407 for_each_domain(cpu, tmp) { 1431 for_each_domain(cpu, tmp) {
1408 /* 1432 /*
1409 * If power savings logic is enabled for a domain, see if we 1433 * If power savings logic is enabled for a domain, see if we
@@ -1484,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1484 update_shares(tmp); 1508 update_shares(tmp);
1485 } 1509 }
1486 1510
1487 if (affine_sd && wake_affine(affine_sd, p, sync)) { 1511 if (affine_sd && wake_affine(affine_sd, p, sync))
1488 new_cpu = cpu; 1512 return cpu;
1489 goto out;
1490 }
1491 1513
1492 while (sd) { 1514 while (sd) {
1493 int load_idx = sd->forkexec_idx; 1515 int load_idx = sd->forkexec_idx;
@@ -1528,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1528 /* while loop will break here if sd == NULL */ 1550 /* while loop will break here if sd == NULL */
1529 } 1551 }
1530 1552
1531out:
1532 rcu_read_unlock();
1533 return new_cpu; 1553 return new_cpu;
1534} 1554}
1535#endif /* CONFIG_SMP */ 1555#endif /* CONFIG_SMP */
@@ -1651,12 +1671,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1651 int sync = wake_flags & WF_SYNC; 1671 int sync = wake_flags & WF_SYNC;
1652 int scale = cfs_rq->nr_running >= sched_nr_latency; 1672 int scale = cfs_rq->nr_running >= sched_nr_latency;
1653 1673
1654 update_curr(cfs_rq); 1674 if (unlikely(rt_prio(p->prio)))
1655 1675 goto preempt;
1656 if (unlikely(rt_prio(p->prio))) {
1657 resched_task(curr);
1658 return;
1659 }
1660 1676
1661 if (unlikely(p->sched_class != &fair_sched_class)) 1677 if (unlikely(p->sched_class != &fair_sched_class))
1662 return; 1678 return;
@@ -1682,50 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1682 return; 1698 return;
1683 1699
1684 /* Idle tasks are by definition preempted by everybody. */ 1700 /* Idle tasks are by definition preempted by everybody. */
1685 if (unlikely(curr->policy == SCHED_IDLE)) { 1701 if (unlikely(curr->policy == SCHED_IDLE))
1686 resched_task(curr); 1702 goto preempt;
1687 return;
1688 }
1689 1703
1690 if ((sched_feat(WAKEUP_SYNC) && sync) || 1704 if (sched_feat(WAKEUP_SYNC) && sync)
1691 (sched_feat(WAKEUP_OVERLAP) && 1705 goto preempt;
1692 (se->avg_overlap < sysctl_sched_migration_cost &&
1693 pse->avg_overlap < sysctl_sched_migration_cost))) {
1694 resched_task(curr);
1695 return;
1696 }
1697 1706
1698 if (sched_feat(WAKEUP_RUNNING)) { 1707 if (sched_feat(WAKEUP_OVERLAP) &&
1699 if (pse->avg_running < se->avg_running) { 1708 se->avg_overlap < sysctl_sched_migration_cost &&
1700 set_next_buddy(pse); 1709 pse->avg_overlap < sysctl_sched_migration_cost)
1701 resched_task(curr); 1710 goto preempt;
1702 return;
1703 }
1704 }
1705 1711
1706 if (!sched_feat(WAKEUP_PREEMPT)) 1712 if (!sched_feat(WAKEUP_PREEMPT))
1707 return; 1713 return;
1708 1714
1715 update_curr(cfs_rq);
1709 find_matching_se(&se, &pse); 1716 find_matching_se(&se, &pse);
1710
1711 BUG_ON(!pse); 1717 BUG_ON(!pse);
1718 if (wakeup_preempt_entity(se, pse) == 1)
1719 goto preempt;
1712 1720
1713 if (wakeup_preempt_entity(se, pse) == 1) { 1721 return;
1714 resched_task(curr); 1722
1715 /* 1723preempt:
1716 * Only set the backward buddy when the current task is still 1724 resched_task(curr);
1717 * on the rq. This can happen when a wakeup gets interleaved 1725 /*
1718 * with schedule on the ->pre_schedule() or idle_balance() 1726 * Only set the backward buddy when the current task is still
1719 * point, either of which can * drop the rq lock. 1727 * on the rq. This can happen when a wakeup gets interleaved
1720 * 1728 * with schedule on the ->pre_schedule() or idle_balance()
1721 * Also, during early boot the idle thread is in the fair class, 1729 * point, either of which can * drop the rq lock.
1722 * for obvious reasons its a bad idea to schedule back to it. 1730 *
1723 */ 1731 * Also, during early boot the idle thread is in the fair class,
1724 if (unlikely(!se->on_rq || curr == rq->idle)) 1732 * for obvious reasons its a bad idea to schedule back to it.
1725 return; 1733 */
1726 if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) 1734 if (unlikely(!se->on_rq || curr == rq->idle))
1727 set_last_buddy(se); 1735 return;
1728 } 1736
1737 if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
1738 set_last_buddy(se);
1729} 1739}
1730 1740
1731static struct task_struct *pick_next_task_fair(struct rq *rq) 1741static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1905,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1905 1915
1906 return 0; 1916 return 0;
1907} 1917}
1918
1919static void rq_online_fair(struct rq *rq)
1920{
1921 update_sysctl();
1922}
1923
1924static void rq_offline_fair(struct rq *rq)
1925{
1926 update_sysctl();
1927}
1928
1908#endif /* CONFIG_SMP */ 1929#endif /* CONFIG_SMP */
1909 1930
1910/* 1931/*
@@ -1922,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1922} 1943}
1923 1944
1924/* 1945/*
1925 * Share the fairness runtime between parent and child, thus the 1946 * called on fork with the child task as argument from the parent's context
1926 * total amount of pressure for CPU stays equal - new tasks 1947 * - child not yet on the tasklist
1927 * get a chance to run but frequent forkers are not allowed to 1948 * - preemption disabled
1928 * monopolize the CPU. Note: the parent runqueue is locked,
1929 * the child is not running yet.
1930 */ 1949 */
1931static void task_new_fair(struct rq *rq, struct task_struct *p) 1950static void task_fork_fair(struct task_struct *p)
1932{ 1951{
1933 struct cfs_rq *cfs_rq = task_cfs_rq(p); 1952 struct cfs_rq *cfs_rq = task_cfs_rq(current);
1934 struct sched_entity *se = &p->se, *curr = cfs_rq->curr; 1953 struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
1935 int this_cpu = smp_processor_id(); 1954 int this_cpu = smp_processor_id();
1955 struct rq *rq = this_rq();
1956 unsigned long flags;
1957
1958 spin_lock_irqsave(&rq->lock, flags);
1936 1959
1937 sched_info_queued(p); 1960 if (unlikely(task_cpu(p) != this_cpu))
1961 __set_task_cpu(p, this_cpu);
1938 1962
1939 update_curr(cfs_rq); 1963 update_curr(cfs_rq);
1964
1940 if (curr) 1965 if (curr)
1941 se->vruntime = curr->vruntime; 1966 se->vruntime = curr->vruntime;
1942 place_entity(cfs_rq, se, 1); 1967 place_entity(cfs_rq, se, 1);
1943 1968
1944 /* 'curr' will be NULL if the child belongs to a different group */ 1969 if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
1945 if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
1946 curr && entity_before(curr, se)) {
1947 /* 1970 /*
1948 * Upon rescheduling, sched_class::put_prev_task() will place 1971 * Upon rescheduling, sched_class::put_prev_task() will place
1949 * 'current' within the tree based on its new key value. 1972 * 'current' within the tree based on its new key value.
@@ -1952,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1952 resched_task(rq->curr); 1975 resched_task(rq->curr);
1953 } 1976 }
1954 1977
1955 enqueue_task_fair(rq, p, 0); 1978 spin_unlock_irqrestore(&rq->lock, flags);
1956} 1979}
1957 1980
1958/* 1981/*
@@ -2014,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p)
2014} 2037}
2015#endif 2038#endif
2016 2039
2017unsigned int get_rr_interval_fair(struct task_struct *task) 2040unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
2018{ 2041{
2019 struct sched_entity *se = &task->se; 2042 struct sched_entity *se = &task->se;
2020 unsigned long flags;
2021 struct rq *rq;
2022 unsigned int rr_interval = 0; 2043 unsigned int rr_interval = 0;
2023 2044
2024 /* 2045 /*
2025 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise 2046 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
2026 * idle runqueue: 2047 * idle runqueue:
2027 */ 2048 */
2028 rq = task_rq_lock(task, &flags);
2029 if (rq->cfs.load.weight) 2049 if (rq->cfs.load.weight)
2030 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); 2050 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
2031 task_rq_unlock(rq, &flags);
2032 2051
2033 return rr_interval; 2052 return rr_interval;
2034} 2053}
@@ -2052,11 +2071,13 @@ static const struct sched_class fair_sched_class = {
2052 2071
2053 .load_balance = load_balance_fair, 2072 .load_balance = load_balance_fair,
2054 .move_one_task = move_one_task_fair, 2073 .move_one_task = move_one_task_fair,
2074 .rq_online = rq_online_fair,
2075 .rq_offline = rq_offline_fair,
2055#endif 2076#endif
2056 2077
2057 .set_curr_task = set_curr_task_fair, 2078 .set_curr_task = set_curr_task_fair,
2058 .task_tick = task_tick_fair, 2079 .task_tick = task_tick_fair,
2059 .task_new = task_new_fair, 2080 .task_fork = task_fork_fair,
2060 2081
2061 .prio_changed = prio_changed_fair, 2082 .prio_changed = prio_changed_fair,
2062 .switched_to = switched_to_fair, 2083 .switched_to = switched_to_fair,
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 0d94083582c7..d5059fd761d9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
54SCHED_FEAT(WAKEUP_OVERLAP, 0) 54SCHED_FEAT(WAKEUP_OVERLAP, 0)
55 55
56/* 56/*
57 * Wakeup preemption towards tasks that run short
58 */
59SCHED_FEAT(WAKEUP_RUNNING, 0)
60
61/*
62 * Use the SYNC wakeup hint, pipes and the likes use this to indicate 57 * Use the SYNC wakeup hint, pipes and the likes use this to indicate
63 * the remote end is likely to consume the data we just wrote, and 58 * the remote end is likely to consume the data we just wrote, and
64 * therefore has cache benefit from being placed on the same cpu, see 59 * therefore has cache benefit from being placed on the same cpu, see
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index b133a28fcde3..33d5384a73a8 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
97 check_preempt_curr(rq, p, 0); 97 check_preempt_curr(rq, p, 0);
98} 98}
99 99
100unsigned int get_rr_interval_idle(struct task_struct *task) 100unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
101{ 101{
102 return 0; 102 return 0;
103} 103}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5c5fef378415..aecbd9c6b20c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq)
1721 dequeue_pushable_task(rq, p); 1721 dequeue_pushable_task(rq, p);
1722} 1722}
1723 1723
1724unsigned int get_rr_interval_rt(struct task_struct *task) 1724unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
1725{ 1725{
1726 /* 1726 /*
1727 * Time slice is 0 for SCHED_FIFO tasks 1727 * Time slice is 0 for SCHED_FIFO tasks
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9327a26765c5..554ac4894f0f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -244,6 +244,10 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */
244static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ 244static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
245static int min_wakeup_granularity_ns; /* 0 usecs */ 245static int min_wakeup_granularity_ns; /* 0 usecs */
246static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ 246static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
247static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
248static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
249static int min_sched_shares_ratelimit = 100000; /* 100 usec */
250static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
247#endif 251#endif
248 252
249static struct ctl_table kern_table[] = { 253static struct ctl_table kern_table[] = {
@@ -260,7 +264,7 @@ static struct ctl_table kern_table[] = {
260 .data = &sysctl_sched_min_granularity, 264 .data = &sysctl_sched_min_granularity,
261 .maxlen = sizeof(unsigned int), 265 .maxlen = sizeof(unsigned int),
262 .mode = 0644, 266 .mode = 0644,
263 .proc_handler = sched_nr_latency_handler, 267 .proc_handler = sched_proc_update_handler,
264 .extra1 = &min_sched_granularity_ns, 268 .extra1 = &min_sched_granularity_ns,
265 .extra2 = &max_sched_granularity_ns, 269 .extra2 = &max_sched_granularity_ns,
266 }, 270 },
@@ -269,7 +273,7 @@ static struct ctl_table kern_table[] = {
269 .data = &sysctl_sched_latency, 273 .data = &sysctl_sched_latency,
270 .maxlen = sizeof(unsigned int), 274 .maxlen = sizeof(unsigned int),
271 .mode = 0644, 275 .mode = 0644,
272 .proc_handler = sched_nr_latency_handler, 276 .proc_handler = sched_proc_update_handler,
273 .extra1 = &min_sched_granularity_ns, 277 .extra1 = &min_sched_granularity_ns,
274 .extra2 = &max_sched_granularity_ns, 278 .extra2 = &max_sched_granularity_ns,
275 }, 279 },
@@ -278,7 +282,7 @@ static struct ctl_table kern_table[] = {
278 .data = &sysctl_sched_wakeup_granularity, 282 .data = &sysctl_sched_wakeup_granularity,
279 .maxlen = sizeof(unsigned int), 283 .maxlen = sizeof(unsigned int),
280 .mode = 0644, 284 .mode = 0644,
281 .proc_handler = proc_dointvec_minmax, 285 .proc_handler = sched_proc_update_handler,
282 .extra1 = &min_wakeup_granularity_ns, 286 .extra1 = &min_wakeup_granularity_ns,
283 .extra2 = &max_wakeup_granularity_ns, 287 .extra2 = &max_wakeup_granularity_ns,
284 }, 288 },
@@ -287,7 +291,18 @@ static struct ctl_table kern_table[] = {
287 .data = &sysctl_sched_shares_ratelimit, 291 .data = &sysctl_sched_shares_ratelimit,
288 .maxlen = sizeof(unsigned int), 292 .maxlen = sizeof(unsigned int),
289 .mode = 0644, 293 .mode = 0644,
290 .proc_handler = proc_dointvec, 294 .proc_handler = sched_proc_update_handler,
295 .extra1 = &min_sched_shares_ratelimit,
296 .extra2 = &max_sched_shares_ratelimit,
297 },
298 {
299 .procname = "sched_tunable_scaling",
300 .data = &sysctl_sched_tunable_scaling,
301 .maxlen = sizeof(enum sched_tunable_scaling),
302 .mode = 0644,
303 .proc_handler = sched_proc_update_handler,
304 .extra1 = &min_sched_tunable_scaling,
305 .extra2 = &max_sched_tunable_scaling,
291 }, 306 },
292 { 307 {
293 .procname = "sched_shares_thresh", 308 .procname = "sched_shares_thresh",
@@ -298,13 +313,6 @@ static struct ctl_table kern_table[] = {
298 .extra1 = &zero, 313 .extra1 = &zero,
299 }, 314 },
300 { 315 {
301 .procname = "sched_features",
302 .data = &sysctl_sched_features,
303 .maxlen = sizeof(unsigned int),
304 .mode = 0644,
305 .proc_handler = proc_dointvec,
306 },
307 {
308 .procname = "sched_migration_cost", 316 .procname = "sched_migration_cost",
309 .data = &sysctl_sched_migration_cost, 317 .data = &sysctl_sched_migration_cost,
310 .maxlen = sizeof(unsigned int), 318 .maxlen = sizeof(unsigned int),
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 665c76edbf17..9d80db4747d4 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
150 P_ns(expires_next); 150 P_ns(expires_next);
151 P(hres_active); 151 P(hres_active);
152 P(nr_events); 152 P(nr_events);
153 P(nr_retries);
154 P(nr_hangs);
155 P_ns(max_hang_time);
153#endif 156#endif
154#undef P 157#undef P
155#undef P_ns 158#undef P_ns
@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
254 u64 now = ktime_to_ns(ktime_get()); 257 u64 now = ktime_to_ns(ktime_get());
255 int cpu; 258 int cpu;
256 259
257 SEQ_printf(m, "Timer List Version: v0.4\n"); 260 SEQ_printf(m, "Timer List Version: v0.5\n");
258 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 261 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
259 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 262 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
260 263
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f2893cff0..88bd9ae2a9ed 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1361,11 +1361,7 @@ int trace_array_vprintk(struct trace_array *tr,
1361 pause_graph_tracing(); 1361 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1362 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1363 __raw_spin_lock(&trace_buf_lock);
1364 if (args == NULL) { 1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
1366 len = strlen(trace_buf);
1367 } else
1368 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1369 1365
1370 size = sizeof(*entry) + len + 1; 1366 size = sizeof(*entry) + len + 1;
1371 buffer = tr->buffer; 1367 buffer = tr->buffer;
@@ -1516,6 +1512,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1516 int i = (int)*pos; 1512 int i = (int)*pos;
1517 void *ent; 1513 void *ent;
1518 1514
1515 WARN_ON_ONCE(iter->leftover);
1516
1519 (*pos)++; 1517 (*pos)++;
1520 1518
1521 /* can't go backwards */ 1519 /* can't go backwards */
@@ -1614,8 +1612,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1614 ; 1612 ;
1615 1613
1616 } else { 1614 } else {
1617 l = *pos - 1; 1615 /*
1618 p = s_next(m, p, &l); 1616 * If we overflowed the seq_file before, then we want
1617 * to just reuse the trace_seq buffer again.
1618 */
1619 if (iter->leftover)
1620 p = iter;
1621 else {
1622 l = *pos - 1;
1623 p = s_next(m, p, &l);
1624 }
1619 } 1625 }
1620 1626
1621 trace_event_read_lock(); 1627 trace_event_read_lock();
@@ -1923,6 +1929,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1923static int s_show(struct seq_file *m, void *v) 1929static int s_show(struct seq_file *m, void *v)
1924{ 1930{
1925 struct trace_iterator *iter = v; 1931 struct trace_iterator *iter = v;
1932 int ret;
1926 1933
1927 if (iter->ent == NULL) { 1934 if (iter->ent == NULL) {
1928 if (iter->tr) { 1935 if (iter->tr) {
@@ -1942,9 +1949,27 @@ static int s_show(struct seq_file *m, void *v)
1942 if (!(trace_flags & TRACE_ITER_VERBOSE)) 1949 if (!(trace_flags & TRACE_ITER_VERBOSE))
1943 print_func_help_header(m); 1950 print_func_help_header(m);
1944 } 1951 }
1952 } else if (iter->leftover) {
1953 /*
1954 * If we filled the seq_file buffer earlier, we
1955 * want to just show it now.
1956 */
1957 ret = trace_print_seq(m, &iter->seq);
1958
1959 /* ret should this time be zero, but you never know */
1960 iter->leftover = ret;
1961
1945 } else { 1962 } else {
1946 print_trace_line(iter); 1963 print_trace_line(iter);
1947 trace_print_seq(m, &iter->seq); 1964 ret = trace_print_seq(m, &iter->seq);
1965 /*
1966 * If we overflow the seq_file buffer, then it will
1967 * ask us for this data again at start up.
1968 * Use that instead.
1969 * ret is 0 if seq_file write succeeded.
1970 * -1 otherwise.
1971 */
1972 iter->leftover = ret;
1948 } 1973 }
1949 1974
1950 return 0; 1975 return 0;
@@ -2898,6 +2923,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2898 else 2923 else
2899 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); 2924 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2900 2925
2926
2927 if (iter->trace->pipe_close)
2928 iter->trace->pipe_close(iter);
2929
2901 mutex_unlock(&trace_types_lock); 2930 mutex_unlock(&trace_types_lock);
2902 2931
2903 free_cpumask_var(iter->started); 2932 free_cpumask_var(iter->started);
@@ -3320,6 +3349,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3320 return cnt; 3349 return cnt;
3321} 3350}
3322 3351
3352static int mark_printk(const char *fmt, ...)
3353{
3354 int ret;
3355 va_list args;
3356 va_start(args, fmt);
3357 ret = trace_vprintk(0, fmt, args);
3358 va_end(args);
3359 return ret;
3360}
3361
3323static ssize_t 3362static ssize_t
3324tracing_mark_write(struct file *filp, const char __user *ubuf, 3363tracing_mark_write(struct file *filp, const char __user *ubuf,
3325 size_t cnt, loff_t *fpos) 3364 size_t cnt, loff_t *fpos)
@@ -3346,7 +3385,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3346 } else 3385 } else
3347 buf[cnt] = '\0'; 3386 buf[cnt] = '\0';
3348 3387
3349 cnt = trace_vprintk(0, buf, NULL); 3388 cnt = mark_printk("%s", buf);
3350 kfree(buf); 3389 kfree(buf);
3351 *fpos += cnt; 3390 *fpos += cnt;
3352 3391
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1d7f4830a80d..7fa33cab6962 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,6 +272,7 @@ struct tracer_flags {
272 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
273 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
274 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
275 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
276 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
277 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -290,6 +291,7 @@ struct tracer {
290 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
291 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
292 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
293 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
294 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
295 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..a43d009c561a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21};
22
23struct fgraph_data {
24 struct fgraph_cpu_data *cpu_data;
25
26 /* Place to preserve last processed entry. */
27 struct ftrace_graph_ent_entry ent;
28 struct ftrace_graph_ret_entry ret;
29 int failed;
30 int cpu;
20}; 31};
21 32
22#define TRACE_GRAPH_INDENT 2 33#define TRACE_GRAPH_INDENT 2
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 395 if (!data)
385 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
386 397
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 398 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 399
389 if (*last_pid == pid) 400 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 401 return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 446get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 447 struct ftrace_graph_ent_entry *curr)
437{ 448{
438 struct ring_buffer_iter *ring_iter; 449 struct fgraph_data *data = iter->private;
450 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 451 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 452 struct ftrace_graph_ret_entry *next;
441 453
442 ring_iter = iter->buffer_iter[iter->cpu]; 454 /*
455 * If the previous output failed to write to the seq buffer,
456 * then we just reuse the data from before.
457 */
458 if (data && data->failed) {
459 curr = &data->ent;
460 next = &data->ret;
461 } else {
443 462
444 /* First peek to compare current entry and the next one */ 463 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 464
446 event = ring_buffer_iter_peek(ring_iter, NULL); 465 /* First peek to compare current entry and the next one */
447 else { 466 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 467 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 468 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 469 /*
451 NULL); 470 * We need to consume the current entry to see
452 } 471 * the next one.
472 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL);
476 }
453 477
454 if (!event) 478 if (!event)
455 return NULL; 479 return NULL;
480
481 next = ring_buffer_event_data(event);
456 482
457 next = ring_buffer_event_data(event); 483 if (data) {
484 /*
485 * Save current and next entries for later reference
486 * if the output fails.
487 */
488 data->ent = *curr;
489 data->ret = *next;
490 }
491 }
458 492
459 if (next->ent.type != TRACE_GRAPH_RET) 493 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 494 return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
640 674
641 if (data) { 675 if (data) {
642 int cpu = iter->cpu; 676 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
688 722
689 if (data) { 723 if (data) {
690 int cpu = iter->cpu; 724 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth); 725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
692 726
693 *depth = call->depth; 727 *depth = call->depth;
694 } 728 }
@@ -782,19 +816,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 817 struct trace_iterator *iter)
784{ 818{
785 int cpu = iter->cpu; 819 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 820 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 821 struct ftrace_graph_ret_entry *leaf_ret;
822 static enum print_line_t ret;
823 int cpu = iter->cpu;
788 824
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 826 return TRACE_TYPE_PARTIAL_LINE;
791 827
792 leaf_ret = get_return_for_leaf(iter, field); 828 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 829 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 831 else
796 return print_graph_entry_nested(iter, field, s, cpu); 832 ret = print_graph_entry_nested(iter, field, s, cpu);
797 833
834 if (data) {
835 /*
836 * If we failed to write our output, then we need to make
837 * note of it. Because we already consumed our entry.
838 */
839 if (s->full) {
840 data->failed = 1;
841 data->cpu = cpu;
842 } else
843 data->failed = 0;
844 }
845
846 return ret;
798} 847}
799 848
800static enum print_line_t 849static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
810 859
811 if (data) { 860 if (data) {
812 int cpu = iter->cpu; 861 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
814 863
815 /* 864 /*
816 * Comments display at + 1 to depth. This is the 865 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 922 int i;
874 923
875 if (data) 924 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 926
878 if (print_graph_prologue(iter, s, 0, 0)) 927 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 928 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 990enum print_line_t
942print_graph_function(struct trace_iterator *iter) 991print_graph_function(struct trace_iterator *iter)
943{ 992{
993 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 995 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 996 struct trace_seq *s = &iter->seq;
997 int cpu = iter->cpu;
998 int ret;
999
1000 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1001 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1002 return TRACE_TYPE_HANDLED;
1003 }
1004
1005 /*
1006 * If the last output failed, there's a possibility we need
1007 * to print out the missing entry which would never go out.
1008 */
1009 if (data && data->failed) {
1010 field = &data->ent;
1011 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME;
1016 }
1017 iter->cpu = cpu;
1018 return ret;
1019 }
946 1020
947 switch (entry->type) { 1021 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1022 case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1026 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1027 * it can be safely saved at the stack.
954 */ 1028 */
955 struct ftrace_graph_ent_entry *field, saved; 1029 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1030 trace_assign_type(field, entry);
957 saved = *field; 1031 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1032 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1104static void graph_trace_open(struct trace_iterator *iter)
1031{ 1105{
1032 /* pid and depth on the last trace processed */ 1106 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1107 struct fgraph_data *data;
1034 int cpu; 1108 int cpu;
1035 1109
1110 iter->private = NULL;
1111
1112 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1113 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1114 goto out_err;
1038 else 1115
1039 for_each_possible_cpu(cpu) { 1116 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1117 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1118 goto out_err_free;
1042 *pid = -1; 1119
1043 *depth = 0; 1120 for_each_possible_cpu(cpu) {
1044 } 1121 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1122 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1123 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1124 *pid = -1;
1125 *depth = 0;
1126 *ignore = 0;
1127 }
1045 1128
1046 iter->private = data; 1129 iter->private = data;
1130
1131 return;
1132
1133 out_err_free:
1134 kfree(data);
1135 out_err:
1136 pr_warning("function graph tracer: not enough memory\n");
1047} 1137}
1048 1138
1049static void graph_trace_close(struct trace_iterator *iter) 1139static void graph_trace_close(struct trace_iterator *iter)
1050{ 1140{
1051 free_percpu(iter->private); 1141 struct fgraph_data *data = iter->private;
1142
1143 if (data) {
1144 free_percpu(data->cpu_data);
1145 kfree(data);
1146 }
1052} 1147}
1053 1148
1054static struct tracer graph_trace __read_mostly = { 1149static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1150 .name = "function_graph",
1056 .open = graph_trace_open, 1151 .open = graph_trace_open,
1152 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1153 .close = graph_trace_close,
1154 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1155 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1156 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1157 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..b52d397e57eb 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
606 */ 606 */
607 struct trace_probe *tp; 607 struct trace_probe *tp;
608 int i, ret = 0; 608 int i, ret = 0;
609 int is_return = 0; 609 int is_return = 0, is_delete = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0; 611 unsigned long offset = 0;
612 void *addr = NULL; 612 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN]; 613 char buf[MAX_EVENT_NAME_LEN];
614 614
615 if (argc < 2) { 615 /* argc must be >= 1 */
616 pr_info("Probe point is not specified.\n");
617 return -EINVAL;
618 }
619
620 if (argv[0][0] == 'p') 616 if (argv[0][0] == 'p')
621 is_return = 0; 617 is_return = 0;
622 else if (argv[0][0] == 'r') 618 else if (argv[0][0] == 'r')
623 is_return = 1; 619 is_return = 1;
620 else if (argv[0][0] == '-')
621 is_delete = 1;
624 else { 622 else {
625 pr_info("Probe definition must be started with 'p' or 'r'.\n"); 623 pr_info("Probe definition must be started with 'p', 'r' or"
624 " '-'.\n");
626 return -EINVAL; 625 return -EINVAL;
627 } 626 }
628 627
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
642 return -EINVAL; 641 return -EINVAL;
643 } 642 }
644 } 643 }
644 if (!group)
645 group = KPROBE_EVENT_SYSTEM;
645 646
647 if (is_delete) {
648 if (!event) {
649 pr_info("Delete command needs an event name.\n");
650 return -EINVAL;
651 }
652 tp = find_probe_event(event, group);
653 if (!tp) {
654 pr_info("Event %s/%s doesn't exist.\n", group, event);
655 return -ENOENT;
656 }
657 /* delete an event */
658 unregister_trace_probe(tp);
659 free_trace_probe(tp);
660 return 0;
661 }
662
663 if (argc < 2) {
664 pr_info("Probe point is not specified.\n");
665 return -EINVAL;
666 }
646 if (isdigit(argv[1][0])) { 667 if (isdigit(argv[1][0])) {
647 if (is_return) { 668 if (is_return) {
648 pr_info("Return probe point must be a symbol.\n"); 669 pr_info("Return probe point must be a symbol.\n");
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
671 argc -= 2; argv += 2; 692 argc -= 2; argv += 2;
672 693
673 /* setup a probe */ 694 /* setup a probe */
674 if (!group)
675 group = KPROBE_EVENT_SYSTEM;
676 if (!event) { 695 if (!event) {
677 /* Make a new event name */ 696 /* Make a new event name */
678 if (symbol) 697 if (symbol)
@@ -1114,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1114 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1115 1134
1116 ret = trace_define_common_fields(event_call); 1135 ret = trace_define_common_fields(event_call);
1117 if (!ret) 1136 if (ret)
1118 return ret; 1137 return ret;
1119 1138
1120 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1139 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
@@ -1132,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1132 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1151 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1133 1152
1134 ret = trace_define_common_fields(event_call); 1153 ret = trace_define_common_fields(event_call);
1135 if (!ret) 1154 if (ret)
1136 return ret; 1155 return ret;
1137 1156
1138 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1157 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..acb87d4a4ac1 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
79} 79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */ 80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81 81
82void ksym_hbp_handler(struct perf_event *hbp, void *data) 82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
83{ 85{
84 struct ring_buffer_event *event; 86 struct ring_buffer_event *event;
85 struct ksym_trace_entry *entry; 87 struct ksym_trace_entry *entry;
86 struct pt_regs *regs = data;
87 struct ring_buffer *buffer; 88 struct ring_buffer *buffer;
88 int pc; 89 int pc;
89 90
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
85 va_list ap; 93 va_list ap;
86 int ret; 94 int ret;
87 95
88 if (!len) 96 if (s->full || !len)
89 return 0; 97 return 0;
90 98
91 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
93 va_end(ap); 101 va_end(ap);
94 102
95 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
96 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
97 return 0; 106 return 0;
107 }
98 108
99 s->len += ret; 109 s->len += ret;
100 110
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
119 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
120 int ret; 130 int ret;
121 131
122 if (!len) 132 if (s->full || !len)
123 return 0; 133 return 0;
124 134
125 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
126 136
127 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
128 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
129 return 0; 140 return 0;
141 }
130 142
131 s->len += ret; 143 s->len += ret;
132 144
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
139 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
140 int ret; 152 int ret;
141 153
142 if (!len) 154 if (s->full || !len)
143 return 0; 155 return 0;
144 156
145 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
146 158
147 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
148 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
149 return 0; 162 return 0;
163 }
150 164
151 s->len += ret; 165 s->len += ret;
152 166
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
167{ 181{
168 int len = strlen(str); 182 int len = strlen(str);
169 183
170 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
185 return 0;
186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
171 return 0; 189 return 0;
190 }
172 191
173 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
174 s->len += len; 193 s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
178 197
179int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
180{ 199{
181 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
182 return 0; 201 return 0;
183 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
184 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
185 209
186 return 1; 210 return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
188 212
189int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
190{ 214{
191 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
192 return 0; 216 return 0;
193 217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
220 return 0;
221 }
222
194 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
195 s->len += len; 224 s->len += len;
196 225
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
203 const unsigned char *data = mem; 232 const unsigned char *data = mem;
204 int i, j; 233 int i, j;
205 234
235 if (s->full)
236 return 0;
237
206#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
207 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
208#else 240#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
220{ 252{
221 void *ret; 253 void *ret;
222 254
223 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
224 return NULL; 260 return NULL;
261 }
225 262
226 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
227 s->len += len; 264 s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
233{ 270{
234 unsigned char *p; 271 unsigned char *p;
235 272
236 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
237 return 0; 278 return 0;
279 }
280
238 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
239 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
240 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
247 return 1; 290 return 1;
248 } 291 }
249 292
293 s->full = 1;
250 return 0; 294 return 0;
251} 295}
252 296
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
373 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
374 int ret = 1; 418 int ret = 1;
375 419
420 if (s->full)
421 return 0;
422
376 if (mm) { 423 if (mm) {
377 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
378 425
diff --git a/lib/checksum.c b/lib/checksum.c
index b2e2fd468461..097508732f34 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -37,7 +37,8 @@
37 37
38#include <asm/byteorder.h> 38#include <asm/byteorder.h>
39 39
40static inline unsigned short from32to16(unsigned long x) 40#ifndef do_csum
41static inline unsigned short from32to16(unsigned int x)
41{ 42{
42 /* add up 16-bit and 16-bit for 16+c bit */ 43 /* add up 16-bit and 16-bit for 16+c bit */
43 x = (x & 0xffff) + (x >> 16); 44 x = (x & 0xffff) + (x >> 16);
@@ -49,16 +50,16 @@ static inline unsigned short from32to16(unsigned long x)
49static unsigned int do_csum(const unsigned char *buff, int len) 50static unsigned int do_csum(const unsigned char *buff, int len)
50{ 51{
51 int odd, count; 52 int odd, count;
52 unsigned long result = 0; 53 unsigned int result = 0;
53 54
54 if (len <= 0) 55 if (len <= 0)
55 goto out; 56 goto out;
56 odd = 1 & (unsigned long) buff; 57 odd = 1 & (unsigned long) buff;
57 if (odd) { 58 if (odd) {
58#ifdef __LITTLE_ENDIAN 59#ifdef __LITTLE_ENDIAN
59 result = *buff;
60#else
61 result += (*buff << 8); 60 result += (*buff << 8);
61#else
62 result = *buff;
62#endif 63#endif
63 len--; 64 len--;
64 buff++; 65 buff++;
@@ -73,9 +74,9 @@ static unsigned int do_csum(const unsigned char *buff, int len)
73 } 74 }
74 count >>= 1; /* nr of 32-bit words.. */ 75 count >>= 1; /* nr of 32-bit words.. */
75 if (count) { 76 if (count) {
76 unsigned long carry = 0; 77 unsigned int carry = 0;
77 do { 78 do {
78 unsigned long w = *(unsigned int *) buff; 79 unsigned int w = *(unsigned int *) buff;
79 count--; 80 count--;
80 buff += 4; 81 buff += 4;
81 result += carry; 82 result += carry;
@@ -102,6 +103,7 @@ static unsigned int do_csum(const unsigned char *buff, int len)
102out: 103out:
103 return result; 104 return result;
104} 105}
106#endif
105 107
106/* 108/*
107 * This is a version of ip_compute_csum() optimized for IP headers, 109 * This is a version of ip_compute_csum() optimized for IP headers,
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481c96ba..a6c9166996a9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
604 604
605#define BAD_ALIEN_MAGIC 0x01020304ul 605#define BAD_ALIEN_MAGIC 0x01020304ul
606 606
607/*
608 * chicken and egg problem: delay the per-cpu array allocation
609 * until the general caches are up.
610 */
611static enum {
612 NONE,
613 PARTIAL_AC,
614 PARTIAL_L3,
615 EARLY,
616 FULL
617} g_cpucache_up;
618
619/*
620 * used by boot code to determine if it can use slab based allocator
621 */
622int slab_is_available(void)
623{
624 return g_cpucache_up >= EARLY;
625}
626
607#ifdef CONFIG_LOCKDEP 627#ifdef CONFIG_LOCKDEP
608 628
609/* 629/*
@@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
620static struct lock_class_key on_slab_l3_key; 640static struct lock_class_key on_slab_l3_key;
621static struct lock_class_key on_slab_alc_key; 641static struct lock_class_key on_slab_alc_key;
622 642
623static inline void init_lock_keys(void) 643static void init_node_lock_keys(int q)
624
625{ 644{
626 int q;
627 struct cache_sizes *s = malloc_sizes; 645 struct cache_sizes *s = malloc_sizes;
628 646
629 while (s->cs_size != ULONG_MAX) { 647 if (g_cpucache_up != FULL)
630 for_each_node(q) { 648 return;
631 struct array_cache **alc; 649
632 int r; 650 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
633 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; 651 struct array_cache **alc;
634 if (!l3 || OFF_SLAB(s->cs_cachep)) 652 struct kmem_list3 *l3;
635 continue; 653 int r;
636 lockdep_set_class(&l3->list_lock, &on_slab_l3_key); 654
637 alc = l3->alien; 655 l3 = s->cs_cachep->nodelists[q];
638 /* 656 if (!l3 || OFF_SLAB(s->cs_cachep))
639 * FIXME: This check for BAD_ALIEN_MAGIC 657 return;
640 * should go away when common slab code is taught to 658 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
641 * work even without alien caches. 659 alc = l3->alien;
642 * Currently, non NUMA code returns BAD_ALIEN_MAGIC 660 /*
643 * for alloc_alien_cache, 661 * FIXME: This check for BAD_ALIEN_MAGIC
644 */ 662 * should go away when common slab code is taught to
645 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) 663 * work even without alien caches.
646 continue; 664 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
647 for_each_node(r) { 665 * for alloc_alien_cache,
648 if (alc[r]) 666 */
649 lockdep_set_class(&alc[r]->lock, 667 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
650 &on_slab_alc_key); 668 return;
651 } 669 for_each_node(r) {
670 if (alc[r])
671 lockdep_set_class(&alc[r]->lock,
672 &on_slab_alc_key);
652 } 673 }
653 s++;
654 } 674 }
655} 675}
676
677static inline void init_lock_keys(void)
678{
679 int node;
680
681 for_each_node(node)
682 init_node_lock_keys(node);
683}
656#else 684#else
685static void init_node_lock_keys(int q)
686{
687}
688
657static inline void init_lock_keys(void) 689static inline void init_lock_keys(void)
658{ 690{
659} 691}
@@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
665static DEFINE_MUTEX(cache_chain_mutex); 697static DEFINE_MUTEX(cache_chain_mutex);
666static struct list_head cache_chain; 698static struct list_head cache_chain;
667 699
668/*
669 * chicken and egg problem: delay the per-cpu array allocation
670 * until the general caches are up.
671 */
672static enum {
673 NONE,
674 PARTIAL_AC,
675 PARTIAL_L3,
676 EARLY,
677 FULL
678} g_cpucache_up;
679
680/*
681 * used by boot code to determine if it can use slab based allocator
682 */
683int slab_is_available(void)
684{
685 return g_cpucache_up >= EARLY;
686}
687
688static DEFINE_PER_CPU(struct delayed_work, reap_work); 700static DEFINE_PER_CPU(struct delayed_work, reap_work);
689 701
690static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 702static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
1254 kfree(shared); 1266 kfree(shared);
1255 free_alien_cache(alien); 1267 free_alien_cache(alien);
1256 } 1268 }
1269 init_node_lock_keys(node);
1270
1257 return 0; 1271 return 0;
1258bad: 1272bad:
1259 cpuup_canceled(cpu); 1273 cpuup_canceled(cpu);
@@ -3103,13 +3117,19 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3103 } else { 3117 } else {
3104 STATS_INC_ALLOCMISS(cachep); 3118 STATS_INC_ALLOCMISS(cachep);
3105 objp = cache_alloc_refill(cachep, flags); 3119 objp = cache_alloc_refill(cachep, flags);
3120 /*
3121 * the 'ac' may be updated by cache_alloc_refill(),
3122 * and kmemleak_erase() requires its correct value.
3123 */
3124 ac = cpu_cache_get(cachep);
3106 } 3125 }
3107 /* 3126 /*
3108 * To avoid a false negative, if an object that is in one of the 3127 * To avoid a false negative, if an object that is in one of the
3109 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 3128 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3110 * treat the array pointers as a reference to the object. 3129 * treat the array pointers as a reference to the object.
3111 */ 3130 */
3112 kmemleak_erase(&ac->entry[ac->avail]); 3131 if (objp)
3132 kmemleak_erase(&ac->entry[ac->avail]);
3113 return objp; 3133 return objp;
3114} 3134}
3115 3135
@@ -3306,7 +3326,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3306 cache_alloc_debugcheck_before(cachep, flags); 3326 cache_alloc_debugcheck_before(cachep, flags);
3307 local_irq_save(save_flags); 3327 local_irq_save(save_flags);
3308 3328
3309 if (unlikely(nodeid == -1)) 3329 if (nodeid == -1)
3310 nodeid = numa_node_id(); 3330 nodeid = numa_node_id();
3311 3331
3312 if (unlikely(!cachep->nodelists[nodeid])) { 3332 if (unlikely(!cachep->nodelists[nodeid])) {
diff --git a/mm/slub.c b/mm/slub.c
index 4996fc719552..da0ce55965dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1735,7 +1735,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1735 } 1735 }
1736 local_irq_restore(flags); 1736 local_irq_restore(flags);
1737 1737
1738 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1738 if (unlikely(gfpflags & __GFP_ZERO) && object)
1739 memset(object, 0, objsize); 1739 memset(object, 0, objsize);
1740 1740
1741 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); 1741 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
@@ -4371,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4371 return len + sprintf(buf + len, "\n"); 4371 return len + sprintf(buf + len, "\n");
4372} 4372}
4373 4373
4374static void clear_stat(struct kmem_cache *s, enum stat_item si)
4375{
4376 int cpu;
4377
4378 for_each_online_cpu(cpu)
4379 get_cpu_slab(s, cpu)->stat[si] = 0;
4380}
4381
4374#define STAT_ATTR(si, text) \ 4382#define STAT_ATTR(si, text) \
4375static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 4383static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4376{ \ 4384{ \
4377 return show_stat(s, buf, si); \ 4385 return show_stat(s, buf, si); \
4378} \ 4386} \
4379SLAB_ATTR_RO(text); \ 4387static ssize_t text##_store(struct kmem_cache *s, \
4388 const char *buf, size_t length) \
4389{ \
4390 if (buf[0] != '0') \
4391 return -EINVAL; \
4392 clear_stat(s, si); \
4393 return length; \
4394} \
4395SLAB_ATTR(text); \
4380 4396
4381STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 4397STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4382STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 4398STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index 29525500df00..c69cbe9b2426 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -41,7 +41,9 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
41MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" 41MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
42 " write operations on the kernel symbol"); 42 " write operations on the kernel symbol");
43 43
44static void sample_hbp_handler(struct perf_event *temp, void *data) 44static void sample_hbp_handler(struct perf_event *bp, int nmi,
45 struct perf_sample_data *data,
46 struct pt_regs *regs)
45{ 47{
46 printk(KERN_INFO "%s value is changed\n", ksym_name); 48 printk(KERN_INFO "%s value is changed\n", ksym_name);
47 dump_stack(); 49 dump_stack();
@@ -51,8 +53,9 @@ static void sample_hbp_handler(struct perf_event *temp, void *data)
51static int __init hw_break_module_init(void) 53static int __init hw_break_module_init(void)
52{ 54{
53 int ret; 55 int ret;
54 DEFINE_BREAKPOINT_ATTR(attr); 56 struct perf_event_attr attr;
55 57
58 hw_breakpoint_init(&attr);
56 attr.bp_addr = kallsyms_lookup_name(ksym_name); 59 attr.bp_addr = kallsyms_lookup_name(ksym_name);
57 attr.bp_len = HW_BREAKPOINT_LEN_4; 60 attr.bp_len = HW_BREAKPOINT_LEN_4;
58 attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 61 attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 44b0ce35c28a..eac4d852e7cd 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -8,16 +8,16 @@ perf-kmem - Tool to trace/measure kernel memory(slab) properties
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf kmem' {record} [<options>] 11'perf kmem' {record|stat} [<options>]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15There's two variants of perf kmem: 15There are two variants of perf kmem:
16 16
17 'perf kmem record <command>' to record the kmem events 17 'perf kmem record <command>' to record the kmem events
18 of an arbitrary workload. 18 of an arbitrary workload.
19 19
20 'perf kmem' to report kernel memory statistics. 20 'perf kmem stat' to report kernel memory statistics.
21 21
22OPTIONS 22OPTIONS
23------- 23-------
@@ -25,8 +25,11 @@ OPTIONS
25--input=<file>:: 25--input=<file>::
26 Select the input file (default: perf.data) 26 Select the input file (default: perf.data)
27 27
28--stat=<caller|alloc>:: 28--caller::
29 Select per callsite or per allocation statistics 29 Show per-callsite statistics
30
31--alloc::
32 Show per-allocation statistics
30 33
31-s <key[,key2...]>:: 34-s <key[,key2...]>::
32--sort=<key[,key2...]>:: 35--sort=<key[,key2...]>::
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 9270594e6dfd..8fa6bf99fcb5 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -8,10 +8,13 @@ perf-probe - Define new dynamic tracepoints
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...] 11'perf probe' [options] --add='PROBE' [...]
12or 12or
13'perf probe' [options] 'PROBE' ['PROBE' ...] 13'perf probe' [options] PROBE
14 14or
15'perf probe' [options] --del='[GROUP:]EVENT' [...]
16or
17'perf probe' --list
15 18
16DESCRIPTION 19DESCRIPTION
17----------- 20-----------
@@ -31,8 +34,16 @@ OPTIONS
31 Be more verbose (show parsed arguments, etc). 34 Be more verbose (show parsed arguments, etc).
32 35
33-a:: 36-a::
34--add:: 37--add=::
35 Define a probe point (see PROBE SYNTAX for detail) 38 Define a probe event (see PROBE SYNTAX for detail).
39
40-d::
41--del=::
42 Delete a probe event.
43
44-l::
45--list::
46 List up current probe events.
36 47
37PROBE SYNTAX 48PROBE SYNTAX
38------------ 49------------
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 7dee9d19ab7a..dcb6143a0002 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -19,7 +19,7 @@ static char const *input_name = "perf.data";
19static int force; 19static int force;
20 20
21static const char *const buildid_list_usage[] = { 21static const char *const buildid_list_usage[] = {
22 "perf report [<options>]", 22 "perf buildid-list [<options>]",
23 NULL 23 NULL
24}; 24};
25 25
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 047fef74bd52..5f209514f657 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -57,11 +57,6 @@ static struct rb_root root_caller_sorted;
57static unsigned long total_requested, total_allocated; 57static unsigned long total_requested, total_allocated;
58static unsigned long nr_allocs, nr_cross_allocs; 58static unsigned long nr_allocs, nr_cross_allocs;
59 59
60struct raw_event_sample {
61 u32 size;
62 char data[0];
63};
64
65#define PATH_SYS_NODE "/sys/devices/system/node" 60#define PATH_SYS_NODE "/sys/devices/system/node"
66 61
67static void init_cpunode_map(void) 62static void init_cpunode_map(void)
@@ -201,7 +196,7 @@ static void insert_caller_stat(unsigned long call_site,
201 } 196 }
202} 197}
203 198
204static void process_alloc_event(struct raw_event_sample *raw, 199static void process_alloc_event(void *data,
205 struct event *event, 200 struct event *event,
206 int cpu, 201 int cpu,
207 u64 timestamp __used, 202 u64 timestamp __used,
@@ -214,10 +209,10 @@ static void process_alloc_event(struct raw_event_sample *raw,
214 int bytes_alloc; 209 int bytes_alloc;
215 int node1, node2; 210 int node1, node2;
216 211
217 ptr = raw_field_value(event, "ptr", raw->data); 212 ptr = raw_field_value(event, "ptr", data);
218 call_site = raw_field_value(event, "call_site", raw->data); 213 call_site = raw_field_value(event, "call_site", data);
219 bytes_req = raw_field_value(event, "bytes_req", raw->data); 214 bytes_req = raw_field_value(event, "bytes_req", data);
220 bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); 215 bytes_alloc = raw_field_value(event, "bytes_alloc", data);
221 216
222 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); 217 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
223 insert_caller_stat(call_site, bytes_req, bytes_alloc); 218 insert_caller_stat(call_site, bytes_req, bytes_alloc);
@@ -227,7 +222,7 @@ static void process_alloc_event(struct raw_event_sample *raw,
227 222
228 if (node) { 223 if (node) {
229 node1 = cpunode_map[cpu]; 224 node1 = cpunode_map[cpu];
230 node2 = raw_field_value(event, "node", raw->data); 225 node2 = raw_field_value(event, "node", data);
231 if (node1 != node2) 226 if (node1 != node2)
232 nr_cross_allocs++; 227 nr_cross_allocs++;
233 } 228 }
@@ -262,7 +257,7 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
262 return NULL; 257 return NULL;
263} 258}
264 259
265static void process_free_event(struct raw_event_sample *raw, 260static void process_free_event(void *data,
266 struct event *event, 261 struct event *event,
267 int cpu, 262 int cpu,
268 u64 timestamp __used, 263 u64 timestamp __used,
@@ -271,7 +266,7 @@ static void process_free_event(struct raw_event_sample *raw,
271 unsigned long ptr; 266 unsigned long ptr;
272 struct alloc_stat *s_alloc, *s_caller; 267 struct alloc_stat *s_alloc, *s_caller;
273 268
274 ptr = raw_field_value(event, "ptr", raw->data); 269 ptr = raw_field_value(event, "ptr", data);
275 270
276 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 271 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
277 if (!s_alloc) 272 if (!s_alloc)
@@ -289,66 +284,53 @@ static void process_free_event(struct raw_event_sample *raw,
289} 284}
290 285
291static void 286static void
292process_raw_event(event_t *raw_event __used, void *more_data, 287process_raw_event(event_t *raw_event __used, void *data,
293 int cpu, u64 timestamp, struct thread *thread) 288 int cpu, u64 timestamp, struct thread *thread)
294{ 289{
295 struct raw_event_sample *raw = more_data;
296 struct event *event; 290 struct event *event;
297 int type; 291 int type;
298 292
299 type = trace_parse_common_type(raw->data); 293 type = trace_parse_common_type(data);
300 event = trace_find_event(type); 294 event = trace_find_event(type);
301 295
302 if (!strcmp(event->name, "kmalloc") || 296 if (!strcmp(event->name, "kmalloc") ||
303 !strcmp(event->name, "kmem_cache_alloc")) { 297 !strcmp(event->name, "kmem_cache_alloc")) {
304 process_alloc_event(raw, event, cpu, timestamp, thread, 0); 298 process_alloc_event(data, event, cpu, timestamp, thread, 0);
305 return; 299 return;
306 } 300 }
307 301
308 if (!strcmp(event->name, "kmalloc_node") || 302 if (!strcmp(event->name, "kmalloc_node") ||
309 !strcmp(event->name, "kmem_cache_alloc_node")) { 303 !strcmp(event->name, "kmem_cache_alloc_node")) {
310 process_alloc_event(raw, event, cpu, timestamp, thread, 1); 304 process_alloc_event(data, event, cpu, timestamp, thread, 1);
311 return; 305 return;
312 } 306 }
313 307
314 if (!strcmp(event->name, "kfree") || 308 if (!strcmp(event->name, "kfree") ||
315 !strcmp(event->name, "kmem_cache_free")) { 309 !strcmp(event->name, "kmem_cache_free")) {
316 process_free_event(raw, event, cpu, timestamp, thread); 310 process_free_event(data, event, cpu, timestamp, thread);
317 return; 311 return;
318 } 312 }
319} 313}
320 314
321static int process_sample_event(event_t *event) 315static int process_sample_event(event_t *event)
322{ 316{
323 u64 ip = event->ip.ip; 317 struct sample_data data;
324 u64 timestamp = -1; 318 struct thread *thread;
325 u32 cpu = -1;
326 u64 period = 1;
327 void *more_data = event->ip.__more_data;
328 struct thread *thread = threads__findnew(event->ip.pid);
329 319
330 if (sample_type & PERF_SAMPLE_TIME) { 320 memset(&data, 0, sizeof(data));
331 timestamp = *(u64 *)more_data; 321 data.time = -1;
332 more_data += sizeof(u64); 322 data.cpu = -1;
333 } 323 data.period = 1;
334
335 if (sample_type & PERF_SAMPLE_CPU) {
336 cpu = *(u32 *)more_data;
337 more_data += sizeof(u32);
338 more_data += sizeof(u32); /* reserved */
339 }
340 324
341 if (sample_type & PERF_SAMPLE_PERIOD) { 325 event__parse_sample(event, sample_type, &data);
342 period = *(u64 *)more_data;
343 more_data += sizeof(u64);
344 }
345 326
346 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 327 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
347 event->header.misc, 328 event->header.misc,
348 event->ip.pid, event->ip.tid, 329 data.pid, data.tid,
349 (void *)(long)ip, 330 (void *)(long)data.ip,
350 (long long)period); 331 (long long)data.period);
351 332
333 thread = threads__findnew(event->ip.pid);
352 if (thread == NULL) { 334 if (thread == NULL) {
353 pr_debug("problem processing %d event, skipping it.\n", 335 pr_debug("problem processing %d event, skipping it.\n",
354 event->header.type); 336 event->header.type);
@@ -357,7 +339,8 @@ static int process_sample_event(event_t *event)
357 339
358 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 340 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
359 341
360 process_raw_event(event, more_data, cpu, timestamp, thread); 342 process_raw_event(event, data.raw_data, data.cpu,
343 data.time, thread);
361 344
362 return 0; 345 return 0;
363} 346}
@@ -543,7 +526,7 @@ static int __cmd_kmem(void)
543} 526}
544 527
545static const char * const kmem_usage[] = { 528static const char * const kmem_usage[] = {
546 "perf kmem [<options>] {record}", 529 "perf kmem [<options>] {record|stat}",
547 NULL 530 NULL
548}; 531};
549 532
@@ -703,18 +686,17 @@ static int parse_sort_opt(const struct option *opt __used,
703 return 0; 686 return 0;
704} 687}
705 688
706static int parse_stat_opt(const struct option *opt __used, 689static int parse_caller_opt(const struct option *opt __used,
707 const char *arg, int unset __used) 690 const char *arg __used, int unset __used)
708{ 691{
709 if (!arg) 692 caller_flag = (alloc_flag + 1);
710 return -1; 693 return 0;
694}
711 695
712 if (strcmp(arg, "alloc") == 0) 696static int parse_alloc_opt(const struct option *opt __used,
713 alloc_flag = (caller_flag + 1); 697 const char *arg __used, int unset __used)
714 else if (strcmp(arg, "caller") == 0) 698{
715 caller_flag = (alloc_flag + 1); 699 alloc_flag = (caller_flag + 1);
716 else
717 return -1;
718 return 0; 700 return 0;
719} 701}
720 702
@@ -739,14 +721,17 @@ static int parse_line_opt(const struct option *opt __used,
739static const struct option kmem_options[] = { 721static const struct option kmem_options[] = {
740 OPT_STRING('i', "input", &input_name, "file", 722 OPT_STRING('i', "input", &input_name, "file",
741 "input file name"), 723 "input file name"),
742 OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>", 724 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
743 "stat selector, Pass 'alloc' or 'caller'.", 725 "show per-callsite statistics",
744 parse_stat_opt), 726 parse_caller_opt),
727 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
728 "show per-allocation statistics",
729 parse_alloc_opt),
745 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 730 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
746 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", 731 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
747 parse_sort_opt), 732 parse_sort_opt),
748 OPT_CALLBACK('l', "line", NULL, "num", 733 OPT_CALLBACK('l', "line", NULL, "num",
749 "show n lins", 734 "show n lines",
750 parse_line_opt), 735 parse_line_opt),
751 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 736 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
752 OPT_END() 737 OPT_END()
@@ -790,18 +775,22 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
790 775
791 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); 776 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
792 777
793 if (argc && !strncmp(argv[0], "rec", 3)) 778 if (!argc)
794 return __cmd_record(argc, argv);
795 else if (argc)
796 usage_with_options(kmem_usage, kmem_options); 779 usage_with_options(kmem_usage, kmem_options);
797 780
798 if (list_empty(&caller_sort)) 781 if (!strncmp(argv[0], "rec", 3)) {
799 setup_sorting(&caller_sort, default_sort_order); 782 return __cmd_record(argc, argv);
800 if (list_empty(&alloc_sort)) 783 } else if (!strcmp(argv[0], "stat")) {
801 setup_sorting(&alloc_sort, default_sort_order); 784 setup_cpunode_map();
785
786 if (list_empty(&caller_sort))
787 setup_sorting(&caller_sort, default_sort_order);
788 if (list_empty(&alloc_sort))
789 setup_sorting(&alloc_sort, default_sort_order);
802 790
803 setup_cpunode_map(); 791 return __cmd_kmem();
792 }
804 793
805 return __cmd_kmem(); 794 return 0;
806} 795}
807 796
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index a58e11b7ea80..5a47c1e11f77 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -35,6 +35,7 @@
35#include "perf.h" 35#include "perf.h"
36#include "builtin.h" 36#include "builtin.h"
37#include "util/util.h" 37#include "util/util.h"
38#include "util/strlist.h"
38#include "util/event.h" 39#include "util/event.h"
39#include "util/debug.h" 40#include "util/debug.h"
40#include "util/parse-options.h" 41#include "util/parse-options.h"
@@ -43,11 +44,12 @@
43#include "util/probe-event.h" 44#include "util/probe-event.h"
44 45
45/* Default vmlinux search paths */ 46/* Default vmlinux search paths */
46#define NR_SEARCH_PATH 3 47#define NR_SEARCH_PATH 4
47const char *default_search_path[NR_SEARCH_PATH] = { 48const char *default_search_path[NR_SEARCH_PATH] = {
48"/lib/modules/%s/build/vmlinux", /* Custom build kernel */ 49"/lib/modules/%s/build/vmlinux", /* Custom build kernel */
49"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */ 50"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */
50"/boot/vmlinux-debug-%s", /* Ubuntu */ 51"/boot/vmlinux-debug-%s", /* Ubuntu */
52"./vmlinux", /* CWD */
51}; 53};
52 54
53#define MAX_PATH_LEN 256 55#define MAX_PATH_LEN 256
@@ -60,6 +62,7 @@ static struct {
60 int need_dwarf; 62 int need_dwarf;
61 int nr_probe; 63 int nr_probe;
62 struct probe_point probes[MAX_PROBES]; 64 struct probe_point probes[MAX_PROBES];
65 struct strlist *dellist;
63} session; 66} session;
64 67
65static bool listing; 68static bool listing;
@@ -79,6 +82,25 @@ static void parse_probe_event(const char *str)
79 pr_debug("%d arguments\n", pp->nr_args); 82 pr_debug("%d arguments\n", pp->nr_args);
80} 83}
81 84
85static void parse_probe_event_argv(int argc, const char **argv)
86{
87 int i, len;
88 char *buf;
89
90 /* Bind up rest arguments */
91 len = 0;
92 for (i = 0; i < argc; i++)
93 len += strlen(argv[i]) + 1;
94 buf = zalloc(len + 1);
95 if (!buf)
96 die("Failed to allocate memory for binding arguments.");
97 len = 0;
98 for (i = 0; i < argc; i++)
99 len += sprintf(&buf[len], "%s ", argv[i]);
100 parse_probe_event(buf);
101 free(buf);
102}
103
82static int opt_add_probe_event(const struct option *opt __used, 104static int opt_add_probe_event(const struct option *opt __used,
83 const char *str, int unset __used) 105 const char *str, int unset __used)
84{ 106{
@@ -87,6 +109,17 @@ static int opt_add_probe_event(const struct option *opt __used,
87 return 0; 109 return 0;
88} 110}
89 111
112static int opt_del_probe_event(const struct option *opt __used,
113 const char *str, int unset __used)
114{
115 if (str) {
116 if (!session.dellist)
117 session.dellist = strlist__new(true, NULL);
118 strlist__add(session.dellist, str);
119 }
120 return 0;
121}
122
90#ifndef NO_LIBDWARF 123#ifndef NO_LIBDWARF
91static int open_default_vmlinux(void) 124static int open_default_vmlinux(void)
92{ 125{
@@ -121,6 +154,7 @@ static int open_default_vmlinux(void)
121static const char * const probe_usage[] = { 154static const char * const probe_usage[] = {
122 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", 155 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
123 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", 156 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
157 "perf probe [<options>] --del '[GROUP:]EVENT' ...",
124 "perf probe --list", 158 "perf probe --list",
125 NULL 159 NULL
126}; 160};
@@ -132,7 +166,9 @@ static const struct option options[] = {
132 OPT_STRING('k', "vmlinux", &session.vmlinux, "file", 166 OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
133 "vmlinux/module pathname"), 167 "vmlinux/module pathname"),
134#endif 168#endif
135 OPT_BOOLEAN('l', "list", &listing, "list up current probes"), 169 OPT_BOOLEAN('l', "list", &listing, "list up current probe events"),
170 OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
171 opt_del_probe_event),
136 OPT_CALLBACK('a', "add", NULL, 172 OPT_CALLBACK('a', "add", NULL,
137#ifdef NO_LIBDWARF 173#ifdef NO_LIBDWARF
138 "FUNC[+OFFS|%return] [ARG ...]", 174 "FUNC[+OFFS|%return] [ARG ...]",
@@ -160,7 +196,7 @@ static const struct option options[] = {
160 196
161int cmd_probe(int argc, const char **argv, const char *prefix __used) 197int cmd_probe(int argc, const char **argv, const char *prefix __used)
162{ 198{
163 int i, j, ret; 199 int i, ret;
164#ifndef NO_LIBDWARF 200#ifndef NO_LIBDWARF
165 int fd; 201 int fd;
166#endif 202#endif
@@ -168,40 +204,52 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
168 204
169 argc = parse_options(argc, argv, options, probe_usage, 205 argc = parse_options(argc, argv, options, probe_usage,
170 PARSE_OPT_STOP_AT_NON_OPTION); 206 PARSE_OPT_STOP_AT_NON_OPTION);
171 for (i = 0; i < argc; i++) 207 if (argc > 0)
172 parse_probe_event(argv[i]); 208 parse_probe_event_argv(argc, argv);
173 209
174 if ((session.nr_probe == 0 && !listing) || 210 if ((session.nr_probe == 0 && !session.dellist && !listing))
175 (session.nr_probe != 0 && listing))
176 usage_with_options(probe_usage, options); 211 usage_with_options(probe_usage, options);
177 212
178 if (listing) { 213 if (listing) {
214 if (session.nr_probe != 0 || session.dellist) {
215 pr_warning(" Error: Don't use --list with"
216 " --add/--del.\n");
217 usage_with_options(probe_usage, options);
218 }
179 show_perf_probe_events(); 219 show_perf_probe_events();
180 return 0; 220 return 0;
181 } 221 }
182 222
223 if (session.dellist) {
224 del_trace_kprobe_events(session.dellist);
225 strlist__delete(session.dellist);
226 if (session.nr_probe == 0)
227 return 0;
228 }
229
183 if (session.need_dwarf) 230 if (session.need_dwarf)
184#ifdef NO_LIBDWARF 231#ifdef NO_LIBDWARF
185 die("Debuginfo-analysis is not supported"); 232 die("Debuginfo-analysis is not supported");
186#else /* !NO_LIBDWARF */ 233#else /* !NO_LIBDWARF */
187 pr_debug("Some probes require debuginfo.\n"); 234 pr_debug("Some probes require debuginfo.\n");
188 235
189 if (session.vmlinux) 236 if (session.vmlinux) {
237 pr_debug("Try to open %s.", session.vmlinux);
190 fd = open(session.vmlinux, O_RDONLY); 238 fd = open(session.vmlinux, O_RDONLY);
191 else 239 } else
192 fd = open_default_vmlinux(); 240 fd = open_default_vmlinux();
193 if (fd < 0) { 241 if (fd < 0) {
194 if (session.need_dwarf) 242 if (session.need_dwarf)
195 die("Could not open vmlinux/module file."); 243 die("Could not open debuginfo file.");
196 244
197 pr_warning("Could not open vmlinux/module file." 245 pr_debug("Could not open vmlinux/module file."
198 " Try to use symbols.\n"); 246 " Try to use symbols.\n");
199 goto end_dwarf; 247 goto end_dwarf;
200 } 248 }
201 249
202 /* Searching probe points */ 250 /* Searching probe points */
203 for (j = 0; j < session.nr_probe; j++) { 251 for (i = 0; i < session.nr_probe; i++) {
204 pp = &session.probes[j]; 252 pp = &session.probes[i];
205 if (pp->found) 253 if (pp->found)
206 continue; 254 continue;
207 255
@@ -223,8 +271,8 @@ end_dwarf:
223#endif /* !NO_LIBDWARF */ 271#endif /* !NO_LIBDWARF */
224 272
225 /* Synthesize probes without dwarf */ 273 /* Synthesize probes without dwarf */
226 for (j = 0; j < session.nr_probe; j++) { 274 for (i = 0; i < session.nr_probe; i++) {
227 pp = &session.probes[j]; 275 pp = &session.probes[i];
228 if (pp->found) /* This probe is already found. */ 276 if (pp->found) /* This probe is already found. */
229 continue; 277 continue;
230 278
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 383c4ab4f9af..2b9eb3a553ed 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -605,44 +605,41 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
605 605
606static int process_sample_event(event_t *event) 606static int process_sample_event(event_t *event)
607{ 607{
608 u64 ip = event->ip.ip; 608 struct sample_data data;
609 u64 period = 1;
610 void *more_data = event->ip.__more_data;
611 struct ip_callchain *chain = NULL;
612 int cpumode; 609 int cpumode;
613 struct addr_location al; 610 struct addr_location al;
614 struct thread *thread = threads__findnew(event->ip.pid); 611 struct thread *thread;
615 612
616 if (sample_type & PERF_SAMPLE_PERIOD) { 613 memset(&data, 0, sizeof(data));
617 period = *(u64 *)more_data; 614 data.period = 1;
618 more_data += sizeof(u64); 615
619 } 616 event__parse_sample(event, sample_type, &data);
620 617
621 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 618 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
622 event->header.misc, 619 event->header.misc,
623 event->ip.pid, event->ip.tid, 620 data.pid, data.tid,
624 (void *)(long)ip, 621 (void *)(long)data.ip,
625 (long long)period); 622 (long long)data.period);
626 623
627 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 624 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
628 unsigned int i; 625 unsigned int i;
629 626
630 chain = (void *)more_data; 627 dump_printf("... chain: nr:%Lu\n", data.callchain->nr);
631
632 dump_printf("... chain: nr:%Lu\n", chain->nr);
633 628
634 if (validate_chain(chain, event) < 0) { 629 if (validate_chain(data.callchain, event) < 0) {
635 pr_debug("call-chain problem with event, " 630 pr_debug("call-chain problem with event, "
636 "skipping it.\n"); 631 "skipping it.\n");
637 return 0; 632 return 0;
638 } 633 }
639 634
640 if (dump_trace) { 635 if (dump_trace) {
641 for (i = 0; i < chain->nr; i++) 636 for (i = 0; i < data.callchain->nr; i++)
642 dump_printf("..... %2d: %016Lx\n", i, chain->ips[i]); 637 dump_printf("..... %2d: %016Lx\n",
638 i, data.callchain->ips[i]);
643 } 639 }
644 } 640 }
645 641
642 thread = threads__findnew(data.pid);
646 if (thread == NULL) { 643 if (thread == NULL) {
647 pr_debug("problem processing %d event, skipping it.\n", 644 pr_debug("problem processing %d event, skipping it.\n",
648 event->header.type); 645 event->header.type);
@@ -657,7 +654,7 @@ static int process_sample_event(event_t *event)
657 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 654 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
658 655
659 thread__find_addr_location(thread, cpumode, 656 thread__find_addr_location(thread, cpumode,
660 MAP__FUNCTION, ip, &al, NULL); 657 MAP__FUNCTION, data.ip, &al, NULL);
661 /* 658 /*
662 * We have to do this here as we may have a dso with no symbol hit that 659 * We have to do this here as we may have a dso with no symbol hit that
663 * has a name longer than the ones with symbols sampled. 660 * has a name longer than the ones with symbols sampled.
@@ -675,12 +672,12 @@ static int process_sample_event(event_t *event)
675 if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name)) 672 if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name))
676 return 0; 673 return 0;
677 674
678 if (hist_entry__add(&al, chain, period)) { 675 if (hist_entry__add(&al, data.callchain, data.period)) {
679 pr_debug("problem incrementing symbol count, skipping event\n"); 676 pr_debug("problem incrementing symbol count, skipping event\n");
680 return -1; 677 return -1;
681 } 678 }
682 679
683 event__stats.total += period; 680 event__stats.total += data.period;
684 681
685 return 0; 682 return 0;
686} 683}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 26b782f26ee1..7cca7c15b40a 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -13,7 +13,6 @@
13#include "util/debug.h" 13#include "util/debug.h"
14#include "util/data_map.h" 14#include "util/data_map.h"
15 15
16#include <sys/types.h>
17#include <sys/prctl.h> 16#include <sys/prctl.h>
18 17
19#include <semaphore.h> 18#include <semaphore.h>
@@ -141,6 +140,7 @@ struct work_atoms {
141 struct thread *thread; 140 struct thread *thread;
142 struct rb_node node; 141 struct rb_node node;
143 u64 max_lat; 142 u64 max_lat;
143 u64 max_lat_at;
144 u64 total_lat; 144 u64 total_lat;
145 u64 nb_atoms; 145 u64 nb_atoms;
146 u64 total_runtime; 146 u64 total_runtime;
@@ -414,34 +414,33 @@ static u64 get_cpu_usage_nsec_parent(void)
414 return sum; 414 return sum;
415} 415}
416 416
417static u64 get_cpu_usage_nsec_self(void) 417static int self_open_counters(void)
418{ 418{
419 char filename [] = "/proc/1234567890/sched"; 419 struct perf_event_attr attr;
420 unsigned long msecs, nsecs; 420 int fd;
421 char *line = NULL;
422 u64 total = 0;
423 size_t len = 0;
424 ssize_t chars;
425 FILE *file;
426 int ret;
427 421
428 sprintf(filename, "/proc/%d/sched", getpid()); 422 memset(&attr, 0, sizeof(attr));
429 file = fopen(filename, "r");
430 BUG_ON(!file);
431 423
432 while ((chars = getline(&line, &len, file)) != -1) { 424 attr.type = PERF_TYPE_SOFTWARE;
433 ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n", 425 attr.config = PERF_COUNT_SW_TASK_CLOCK;
434 &msecs, &nsecs); 426
435 if (ret == 2) { 427 fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
436 total = msecs*1e6 + nsecs;
437 break;
438 }
439 }
440 if (line)
441 free(line);
442 fclose(file);
443 428
444 return total; 429 if (fd < 0)
430 die("Error: sys_perf_event_open() syscall returned"
431 "with %d (%s)\n", fd, strerror(errno));
432 return fd;
433}
434
435static u64 get_cpu_usage_nsec_self(int fd)
436{
437 u64 runtime;
438 int ret;
439
440 ret = read(fd, &runtime, sizeof(runtime));
441 BUG_ON(ret != sizeof(runtime));
442
443 return runtime;
445} 444}
446 445
447static void *thread_func(void *ctx) 446static void *thread_func(void *ctx)
@@ -450,9 +449,11 @@ static void *thread_func(void *ctx)
450 u64 cpu_usage_0, cpu_usage_1; 449 u64 cpu_usage_0, cpu_usage_1;
451 unsigned long i, ret; 450 unsigned long i, ret;
452 char comm2[22]; 451 char comm2[22];
452 int fd;
453 453
454 sprintf(comm2, ":%s", this_task->comm); 454 sprintf(comm2, ":%s", this_task->comm);
455 prctl(PR_SET_NAME, comm2); 455 prctl(PR_SET_NAME, comm2);
456 fd = self_open_counters();
456 457
457again: 458again:
458 ret = sem_post(&this_task->ready_for_work); 459 ret = sem_post(&this_task->ready_for_work);
@@ -462,16 +463,15 @@ again:
462 ret = pthread_mutex_unlock(&start_work_mutex); 463 ret = pthread_mutex_unlock(&start_work_mutex);
463 BUG_ON(ret); 464 BUG_ON(ret);
464 465
465 cpu_usage_0 = get_cpu_usage_nsec_self(); 466 cpu_usage_0 = get_cpu_usage_nsec_self(fd);
466 467
467 for (i = 0; i < this_task->nr_events; i++) { 468 for (i = 0; i < this_task->nr_events; i++) {
468 this_task->curr_event = i; 469 this_task->curr_event = i;
469 process_sched_event(this_task, this_task->atoms[i]); 470 process_sched_event(this_task, this_task->atoms[i]);
470 } 471 }
471 472
472 cpu_usage_1 = get_cpu_usage_nsec_self(); 473 cpu_usage_1 = get_cpu_usage_nsec_self(fd);
473 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 474 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
474
475 ret = sem_post(&this_task->work_done_sem); 475 ret = sem_post(&this_task->work_done_sem);
476 BUG_ON(ret); 476 BUG_ON(ret);
477 477
@@ -628,11 +628,6 @@ static void test_calibrations(void)
628 printf("the sleep test took %Ld nsecs\n", T1-T0); 628 printf("the sleep test took %Ld nsecs\n", T1-T0);
629} 629}
630 630
631struct raw_event_sample {
632 u32 size;
633 char data[0];
634};
635
636#define FILL_FIELD(ptr, field, event, data) \ 631#define FILL_FIELD(ptr, field, event, data) \
637 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 632 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
638 633
@@ -1019,8 +1014,10 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
1019 1014
1020 delta = atom->sched_in_time - atom->wake_up_time; 1015 delta = atom->sched_in_time - atom->wake_up_time;
1021 atoms->total_lat += delta; 1016 atoms->total_lat += delta;
1022 if (delta > atoms->max_lat) 1017 if (delta > atoms->max_lat) {
1023 atoms->max_lat = delta; 1018 atoms->max_lat = delta;
1019 atoms->max_lat_at = timestamp;
1020 }
1024 atoms->nb_atoms++; 1021 atoms->nb_atoms++;
1025} 1022}
1026 1023
@@ -1216,10 +1213,11 @@ static void output_lat_thread(struct work_atoms *work_list)
1216 1213
1217 avg = work_list->total_lat / work_list->nb_atoms; 1214 avg = work_list->total_lat / work_list->nb_atoms;
1218 1215
1219 printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", 1216 printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
1220 (double)work_list->total_runtime / 1e6, 1217 (double)work_list->total_runtime / 1e6,
1221 work_list->nb_atoms, (double)avg / 1e6, 1218 work_list->nb_atoms, (double)avg / 1e6,
1222 (double)work_list->max_lat / 1e6); 1219 (double)work_list->max_lat / 1e6,
1220 (double)work_list->max_lat_at / 1e9);
1223} 1221}
1224 1222
1225static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1223static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1356,7 +1354,7 @@ static void sort_lat(void)
1356static struct trace_sched_handler *trace_handler; 1354static struct trace_sched_handler *trace_handler;
1357 1355
1358static void 1356static void
1359process_sched_wakeup_event(struct raw_event_sample *raw, 1357process_sched_wakeup_event(void *data,
1360 struct event *event, 1358 struct event *event,
1361 int cpu __used, 1359 int cpu __used,
1362 u64 timestamp __used, 1360 u64 timestamp __used,
@@ -1364,13 +1362,13 @@ process_sched_wakeup_event(struct raw_event_sample *raw,
1364{ 1362{
1365 struct trace_wakeup_event wakeup_event; 1363 struct trace_wakeup_event wakeup_event;
1366 1364
1367 FILL_COMMON_FIELDS(wakeup_event, event, raw->data); 1365 FILL_COMMON_FIELDS(wakeup_event, event, data);
1368 1366
1369 FILL_ARRAY(wakeup_event, comm, event, raw->data); 1367 FILL_ARRAY(wakeup_event, comm, event, data);
1370 FILL_FIELD(wakeup_event, pid, event, raw->data); 1368 FILL_FIELD(wakeup_event, pid, event, data);
1371 FILL_FIELD(wakeup_event, prio, event, raw->data); 1369 FILL_FIELD(wakeup_event, prio, event, data);
1372 FILL_FIELD(wakeup_event, success, event, raw->data); 1370 FILL_FIELD(wakeup_event, success, event, data);
1373 FILL_FIELD(wakeup_event, cpu, event, raw->data); 1371 FILL_FIELD(wakeup_event, cpu, event, data);
1374 1372
1375 if (trace_handler->wakeup_event) 1373 if (trace_handler->wakeup_event)
1376 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); 1374 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
@@ -1469,7 +1467,7 @@ map_switch_event(struct trace_switch_event *switch_event,
1469 1467
1470 1468
1471static void 1469static void
1472process_sched_switch_event(struct raw_event_sample *raw, 1470process_sched_switch_event(void *data,
1473 struct event *event, 1471 struct event *event,
1474 int this_cpu, 1472 int this_cpu,
1475 u64 timestamp __used, 1473 u64 timestamp __used,
@@ -1477,15 +1475,15 @@ process_sched_switch_event(struct raw_event_sample *raw,
1477{ 1475{
1478 struct trace_switch_event switch_event; 1476 struct trace_switch_event switch_event;
1479 1477
1480 FILL_COMMON_FIELDS(switch_event, event, raw->data); 1478 FILL_COMMON_FIELDS(switch_event, event, data);
1481 1479
1482 FILL_ARRAY(switch_event, prev_comm, event, raw->data); 1480 FILL_ARRAY(switch_event, prev_comm, event, data);
1483 FILL_FIELD(switch_event, prev_pid, event, raw->data); 1481 FILL_FIELD(switch_event, prev_pid, event, data);
1484 FILL_FIELD(switch_event, prev_prio, event, raw->data); 1482 FILL_FIELD(switch_event, prev_prio, event, data);
1485 FILL_FIELD(switch_event, prev_state, event, raw->data); 1483 FILL_FIELD(switch_event, prev_state, event, data);
1486 FILL_ARRAY(switch_event, next_comm, event, raw->data); 1484 FILL_ARRAY(switch_event, next_comm, event, data);
1487 FILL_FIELD(switch_event, next_pid, event, raw->data); 1485 FILL_FIELD(switch_event, next_pid, event, data);
1488 FILL_FIELD(switch_event, next_prio, event, raw->data); 1486 FILL_FIELD(switch_event, next_prio, event, data);
1489 1487
1490 if (curr_pid[this_cpu] != (u32)-1) { 1488 if (curr_pid[this_cpu] != (u32)-1) {
1491 /* 1489 /*
@@ -1502,7 +1500,7 @@ process_sched_switch_event(struct raw_event_sample *raw,
1502} 1500}
1503 1501
1504static void 1502static void
1505process_sched_runtime_event(struct raw_event_sample *raw, 1503process_sched_runtime_event(void *data,
1506 struct event *event, 1504 struct event *event,
1507 int cpu __used, 1505 int cpu __used,
1508 u64 timestamp __used, 1506 u64 timestamp __used,
@@ -1510,17 +1508,17 @@ process_sched_runtime_event(struct raw_event_sample *raw,
1510{ 1508{
1511 struct trace_runtime_event runtime_event; 1509 struct trace_runtime_event runtime_event;
1512 1510
1513 FILL_ARRAY(runtime_event, comm, event, raw->data); 1511 FILL_ARRAY(runtime_event, comm, event, data);
1514 FILL_FIELD(runtime_event, pid, event, raw->data); 1512 FILL_FIELD(runtime_event, pid, event, data);
1515 FILL_FIELD(runtime_event, runtime, event, raw->data); 1513 FILL_FIELD(runtime_event, runtime, event, data);
1516 FILL_FIELD(runtime_event, vruntime, event, raw->data); 1514 FILL_FIELD(runtime_event, vruntime, event, data);
1517 1515
1518 if (trace_handler->runtime_event) 1516 if (trace_handler->runtime_event)
1519 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); 1517 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
1520} 1518}
1521 1519
1522static void 1520static void
1523process_sched_fork_event(struct raw_event_sample *raw, 1521process_sched_fork_event(void *data,
1524 struct event *event, 1522 struct event *event,
1525 int cpu __used, 1523 int cpu __used,
1526 u64 timestamp __used, 1524 u64 timestamp __used,
@@ -1528,12 +1526,12 @@ process_sched_fork_event(struct raw_event_sample *raw,
1528{ 1526{
1529 struct trace_fork_event fork_event; 1527 struct trace_fork_event fork_event;
1530 1528
1531 FILL_COMMON_FIELDS(fork_event, event, raw->data); 1529 FILL_COMMON_FIELDS(fork_event, event, data);
1532 1530
1533 FILL_ARRAY(fork_event, parent_comm, event, raw->data); 1531 FILL_ARRAY(fork_event, parent_comm, event, data);
1534 FILL_FIELD(fork_event, parent_pid, event, raw->data); 1532 FILL_FIELD(fork_event, parent_pid, event, data);
1535 FILL_ARRAY(fork_event, child_comm, event, raw->data); 1533 FILL_ARRAY(fork_event, child_comm, event, data);
1536 FILL_FIELD(fork_event, child_pid, event, raw->data); 1534 FILL_FIELD(fork_event, child_pid, event, data);
1537 1535
1538 if (trace_handler->fork_event) 1536 if (trace_handler->fork_event)
1539 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); 1537 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
@@ -1550,7 +1548,7 @@ process_sched_exit_event(struct event *event,
1550} 1548}
1551 1549
1552static void 1550static void
1553process_sched_migrate_task_event(struct raw_event_sample *raw, 1551process_sched_migrate_task_event(void *data,
1554 struct event *event, 1552 struct event *event,
1555 int cpu __used, 1553 int cpu __used,
1556 u64 timestamp __used, 1554 u64 timestamp __used,
@@ -1558,80 +1556,66 @@ process_sched_migrate_task_event(struct raw_event_sample *raw,
1558{ 1556{
1559 struct trace_migrate_task_event migrate_task_event; 1557 struct trace_migrate_task_event migrate_task_event;
1560 1558
1561 FILL_COMMON_FIELDS(migrate_task_event, event, raw->data); 1559 FILL_COMMON_FIELDS(migrate_task_event, event, data);
1562 1560
1563 FILL_ARRAY(migrate_task_event, comm, event, raw->data); 1561 FILL_ARRAY(migrate_task_event, comm, event, data);
1564 FILL_FIELD(migrate_task_event, pid, event, raw->data); 1562 FILL_FIELD(migrate_task_event, pid, event, data);
1565 FILL_FIELD(migrate_task_event, prio, event, raw->data); 1563 FILL_FIELD(migrate_task_event, prio, event, data);
1566 FILL_FIELD(migrate_task_event, cpu, event, raw->data); 1564 FILL_FIELD(migrate_task_event, cpu, event, data);
1567 1565
1568 if (trace_handler->migrate_task_event) 1566 if (trace_handler->migrate_task_event)
1569 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread); 1567 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
1570} 1568}
1571 1569
1572static void 1570static void
1573process_raw_event(event_t *raw_event __used, void *more_data, 1571process_raw_event(event_t *raw_event __used, void *data,
1574 int cpu, u64 timestamp, struct thread *thread) 1572 int cpu, u64 timestamp, struct thread *thread)
1575{ 1573{
1576 struct raw_event_sample *raw = more_data;
1577 struct event *event; 1574 struct event *event;
1578 int type; 1575 int type;
1579 1576
1580 type = trace_parse_common_type(raw->data); 1577
1578 type = trace_parse_common_type(data);
1581 event = trace_find_event(type); 1579 event = trace_find_event(type);
1582 1580
1583 if (!strcmp(event->name, "sched_switch")) 1581 if (!strcmp(event->name, "sched_switch"))
1584 process_sched_switch_event(raw, event, cpu, timestamp, thread); 1582 process_sched_switch_event(data, event, cpu, timestamp, thread);
1585 if (!strcmp(event->name, "sched_stat_runtime")) 1583 if (!strcmp(event->name, "sched_stat_runtime"))
1586 process_sched_runtime_event(raw, event, cpu, timestamp, thread); 1584 process_sched_runtime_event(data, event, cpu, timestamp, thread);
1587 if (!strcmp(event->name, "sched_wakeup")) 1585 if (!strcmp(event->name, "sched_wakeup"))
1588 process_sched_wakeup_event(raw, event, cpu, timestamp, thread); 1586 process_sched_wakeup_event(data, event, cpu, timestamp, thread);
1589 if (!strcmp(event->name, "sched_wakeup_new")) 1587 if (!strcmp(event->name, "sched_wakeup_new"))
1590 process_sched_wakeup_event(raw, event, cpu, timestamp, thread); 1588 process_sched_wakeup_event(data, event, cpu, timestamp, thread);
1591 if (!strcmp(event->name, "sched_process_fork")) 1589 if (!strcmp(event->name, "sched_process_fork"))
1592 process_sched_fork_event(raw, event, cpu, timestamp, thread); 1590 process_sched_fork_event(data, event, cpu, timestamp, thread);
1593 if (!strcmp(event->name, "sched_process_exit")) 1591 if (!strcmp(event->name, "sched_process_exit"))
1594 process_sched_exit_event(event, cpu, timestamp, thread); 1592 process_sched_exit_event(event, cpu, timestamp, thread);
1595 if (!strcmp(event->name, "sched_migrate_task")) 1593 if (!strcmp(event->name, "sched_migrate_task"))
1596 process_sched_migrate_task_event(raw, event, cpu, timestamp, thread); 1594 process_sched_migrate_task_event(data, event, cpu, timestamp, thread);
1597} 1595}
1598 1596
1599static int process_sample_event(event_t *event) 1597static int process_sample_event(event_t *event)
1600{ 1598{
1599 struct sample_data data;
1601 struct thread *thread; 1600 struct thread *thread;
1602 u64 ip = event->ip.ip;
1603 u64 timestamp = -1;
1604 u32 cpu = -1;
1605 u64 period = 1;
1606 void *more_data = event->ip.__more_data;
1607 1601
1608 if (!(sample_type & PERF_SAMPLE_RAW)) 1602 if (!(sample_type & PERF_SAMPLE_RAW))
1609 return 0; 1603 return 0;
1610 1604
1611 thread = threads__findnew(event->ip.pid); 1605 memset(&data, 0, sizeof(data));
1606 data.time = -1;
1607 data.cpu = -1;
1608 data.period = -1;
1612 1609
1613 if (sample_type & PERF_SAMPLE_TIME) { 1610 event__parse_sample(event, sample_type, &data);
1614 timestamp = *(u64 *)more_data;
1615 more_data += sizeof(u64);
1616 }
1617
1618 if (sample_type & PERF_SAMPLE_CPU) {
1619 cpu = *(u32 *)more_data;
1620 more_data += sizeof(u32);
1621 more_data += sizeof(u32); /* reserved */
1622 }
1623
1624 if (sample_type & PERF_SAMPLE_PERIOD) {
1625 period = *(u64 *)more_data;
1626 more_data += sizeof(u64);
1627 }
1628 1611
1629 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 1612 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
1630 event->header.misc, 1613 event->header.misc,
1631 event->ip.pid, event->ip.tid, 1614 data.pid, data.tid,
1632 (void *)(long)ip, 1615 (void *)(long)data.ip,
1633 (long long)period); 1616 (long long)data.period);
1634 1617
1618 thread = threads__findnew(data.pid);
1635 if (thread == NULL) { 1619 if (thread == NULL) {
1636 pr_debug("problem processing %d event, skipping it.\n", 1620 pr_debug("problem processing %d event, skipping it.\n",
1637 event->header.type); 1621 event->header.type);
@@ -1640,10 +1624,10 @@ static int process_sample_event(event_t *event)
1640 1624
1641 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1625 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1642 1626
1643 if (profile_cpu != -1 && profile_cpu != (int) cpu) 1627 if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
1644 return 0; 1628 return 0;
1645 1629
1646 process_raw_event(event, more_data, cpu, timestamp, thread); 1630 process_raw_event(event, data.raw_data, data.cpu, data.time, thread);
1647 1631
1648 return 0; 1632 return 0;
1649} 1633}
@@ -1724,9 +1708,9 @@ static void __cmd_lat(void)
1724 read_events(); 1708 read_events();
1725 sort_lat(); 1709 sort_lat();
1726 1710
1727 printf("\n -----------------------------------------------------------------------------------------\n"); 1711 printf("\n ---------------------------------------------------------------------------------------------------------------\n");
1728 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n"); 1712 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
1729 printf(" -----------------------------------------------------------------------------------------\n"); 1713 printf(" ---------------------------------------------------------------------------------------------------------------\n");
1730 1714
1731 next = rb_first(&sorted_atom_root); 1715 next = rb_first(&sorted_atom_root);
1732 1716
@@ -1902,13 +1886,18 @@ static int __cmd_record(int argc, const char **argv)
1902 1886
1903int cmd_sched(int argc, const char **argv, const char *prefix __used) 1887int cmd_sched(int argc, const char **argv, const char *prefix __used)
1904{ 1888{
1905 symbol__init(0);
1906
1907 argc = parse_options(argc, argv, sched_options, sched_usage, 1889 argc = parse_options(argc, argv, sched_options, sched_usage,
1908 PARSE_OPT_STOP_AT_NON_OPTION); 1890 PARSE_OPT_STOP_AT_NON_OPTION);
1909 if (!argc) 1891 if (!argc)
1910 usage_with_options(sched_usage, sched_options); 1892 usage_with_options(sched_usage, sched_options);
1911 1893
1894 /*
1895 * Aliased to 'perf trace' for now:
1896 */
1897 if (!strcmp(argv[0], "trace"))
1898 return cmd_trace(argc, argv, prefix);
1899
1900 symbol__init(0);
1912 if (!strncmp(argv[0], "rec", 3)) { 1901 if (!strncmp(argv[0], "rec", 3)) {
1913 return __cmd_record(argc, argv); 1902 return __cmd_record(argc, argv);
1914 } else if (!strncmp(argv[0], "lat", 3)) { 1903 } else if (!strncmp(argv[0], "lat", 3)) {
@@ -1932,11 +1921,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
1932 usage_with_options(replay_usage, replay_options); 1921 usage_with_options(replay_usage, replay_options);
1933 } 1922 }
1934 __cmd_replay(); 1923 __cmd_replay();
1935 } else if (!strcmp(argv[0], "trace")) {
1936 /*
1937 * Aliased to 'perf trace' for now:
1938 */
1939 return cmd_trace(argc, argv, prefix);
1940 } else { 1924 } else {
1941 usage_with_options(sched_usage, sched_options); 1925 usage_with_options(sched_usage, sched_options);
1942 } 1926 }
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index cb58b6605fcc..f472df9561ee 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -302,12 +302,11 @@ process_exit_event(event_t *event)
302} 302}
303 303
304struct trace_entry { 304struct trace_entry {
305 u32 size;
306 unsigned short type; 305 unsigned short type;
307 unsigned char flags; 306 unsigned char flags;
308 unsigned char preempt_count; 307 unsigned char preempt_count;
309 int pid; 308 int pid;
310 int tgid; 309 int lock_depth;
311}; 310};
312 311
313struct power_entry { 312struct power_entry {
@@ -484,43 +483,22 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
484static int 483static int
485process_sample_event(event_t *event) 484process_sample_event(event_t *event)
486{ 485{
487 int cursor = 0; 486 struct sample_data data;
488 u64 addr = 0;
489 u64 stamp = 0;
490 u32 cpu = 0;
491 u32 pid = 0;
492 struct trace_entry *te; 487 struct trace_entry *te;
493 488
494 if (sample_type & PERF_SAMPLE_IP) 489 memset(&data, 0, sizeof(data));
495 cursor++;
496
497 if (sample_type & PERF_SAMPLE_TID) {
498 pid = event->sample.array[cursor]>>32;
499 cursor++;
500 }
501 if (sample_type & PERF_SAMPLE_TIME) {
502 stamp = event->sample.array[cursor++];
503 490
504 if (!first_time || first_time > stamp) 491 event__parse_sample(event, sample_type, &data);
505 first_time = stamp;
506 if (last_time < stamp)
507 last_time = stamp;
508 492
493 if (sample_type & PERF_SAMPLE_TIME) {
494 if (!first_time || first_time > data.time)
495 first_time = data.time;
496 if (last_time < data.time)
497 last_time = data.time;
509 } 498 }
510 if (sample_type & PERF_SAMPLE_ADDR)
511 addr = event->sample.array[cursor++];
512 if (sample_type & PERF_SAMPLE_ID)
513 cursor++;
514 if (sample_type & PERF_SAMPLE_STREAM_ID)
515 cursor++;
516 if (sample_type & PERF_SAMPLE_CPU)
517 cpu = event->sample.array[cursor++] & 0xFFFFFFFF;
518 if (sample_type & PERF_SAMPLE_PERIOD)
519 cursor++;
520
521 te = (void *)&event->sample.array[cursor];
522 499
523 if (sample_type & PERF_SAMPLE_RAW && te->size > 0) { 500 te = (void *)data.raw_data;
501 if (sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
524 char *event_str; 502 char *event_str;
525 struct power_entry *pe; 503 struct power_entry *pe;
526 504
@@ -532,19 +510,19 @@ process_sample_event(event_t *event)
532 return 0; 510 return 0;
533 511
534 if (strcmp(event_str, "power:power_start") == 0) 512 if (strcmp(event_str, "power:power_start") == 0)
535 c_state_start(cpu, stamp, pe->value); 513 c_state_start(data.cpu, data.time, pe->value);
536 514
537 if (strcmp(event_str, "power:power_end") == 0) 515 if (strcmp(event_str, "power:power_end") == 0)
538 c_state_end(cpu, stamp); 516 c_state_end(data.cpu, data.time);
539 517
540 if (strcmp(event_str, "power:power_frequency") == 0) 518 if (strcmp(event_str, "power:power_frequency") == 0)
541 p_state_change(cpu, stamp, pe->value); 519 p_state_change(data.cpu, data.time, pe->value);
542 520
543 if (strcmp(event_str, "sched:sched_wakeup") == 0) 521 if (strcmp(event_str, "sched:sched_wakeup") == 0)
544 sched_wakeup(cpu, stamp, pid, te); 522 sched_wakeup(data.cpu, data.time, data.pid, te);
545 523
546 if (strcmp(event_str, "sched:sched_switch") == 0) 524 if (strcmp(event_str, "sched:sched_switch") == 0)
547 sched_switch(cpu, stamp, te); 525 sched_switch(data.cpu, data.time, te);
548 } 526 }
549 return 0; 527 return 0;
550} 528}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index abb914aa7be6..c2fcc34486f5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -66,58 +66,40 @@ static u64 sample_type;
66 66
67static int process_sample_event(event_t *event) 67static int process_sample_event(event_t *event)
68{ 68{
69 u64 ip = event->ip.ip; 69 struct sample_data data;
70 u64 timestamp = -1; 70 struct thread *thread;
71 u32 cpu = -1;
72 u64 period = 1;
73 void *more_data = event->ip.__more_data;
74 struct thread *thread = threads__findnew(event->ip.pid);
75
76 if (sample_type & PERF_SAMPLE_TIME) {
77 timestamp = *(u64 *)more_data;
78 more_data += sizeof(u64);
79 }
80 71
81 if (sample_type & PERF_SAMPLE_CPU) { 72 memset(&data, 0, sizeof(data));
82 cpu = *(u32 *)more_data; 73 data.time = -1;
83 more_data += sizeof(u32); 74 data.cpu = -1;
84 more_data += sizeof(u32); /* reserved */ 75 data.period = 1;
85 }
86 76
87 if (sample_type & PERF_SAMPLE_PERIOD) { 77 event__parse_sample(event, sample_type, &data);
88 period = *(u64 *)more_data;
89 more_data += sizeof(u64);
90 }
91 78
92 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 79 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
93 event->header.misc, 80 event->header.misc,
94 event->ip.pid, event->ip.tid, 81 data.pid, data.tid,
95 (void *)(long)ip, 82 (void *)(long)data.ip,
96 (long long)period); 83 (long long)data.period);
97 84
85 thread = threads__findnew(event->ip.pid);
98 if (thread == NULL) { 86 if (thread == NULL) {
99 pr_debug("problem processing %d event, skipping it.\n", 87 pr_debug("problem processing %d event, skipping it.\n",
100 event->header.type); 88 event->header.type);
101 return -1; 89 return -1;
102 } 90 }
103 91
104 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
105
106 if (sample_type & PERF_SAMPLE_RAW) { 92 if (sample_type & PERF_SAMPLE_RAW) {
107 struct {
108 u32 size;
109 char data[0];
110 } *raw = more_data;
111
112 /* 93 /*
113 * FIXME: better resolve from pid from the struct trace_entry 94 * FIXME: better resolve from pid from the struct trace_entry
114 * field, although it should be the same than this perf 95 * field, although it should be the same than this perf
115 * event pid 96 * event pid
116 */ 97 */
117 scripting_ops->process_event(cpu, raw->data, raw->size, 98 scripting_ops->process_event(data.cpu, data.raw_data,
118 timestamp, thread->comm); 99 data.raw_size,
100 data.time, thread->comm);
119 } 101 }
120 event__stats.total += period; 102 event__stats.total += data.period;
121 103
122 return 0; 104 return 0;
123} 105}
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
index ca0bedf637c2..59b65d0bd7c1 100644
--- a/tools/perf/util/data_map.c
+++ b/tools/perf/util/data_map.c
@@ -100,11 +100,11 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
100 } 100 }
101} 101}
102 102
103int perf_header__read_build_ids(int input, off_t offset, off_t size) 103int perf_header__read_build_ids(int input, u64 offset, u64 size)
104{ 104{
105 struct build_id_event bev; 105 struct build_id_event bev;
106 char filename[PATH_MAX]; 106 char filename[PATH_MAX];
107 off_t limit = offset + size; 107 u64 limit = offset + size;
108 int err = -1; 108 int err = -1;
109 109
110 while (offset < limit) { 110 while (offset < limit) {
diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h
index 3180ff7e3633..258a87bcc4fb 100644
--- a/tools/perf/util/data_map.h
+++ b/tools/perf/util/data_map.h
@@ -27,6 +27,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
27 int full_paths, 27 int full_paths,
28 int *cwdlen, 28 int *cwdlen,
29 char **cwd); 29 char **cwd);
30int perf_header__read_build_ids(int input, off_t offset, off_t file_size); 30int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
31 31
32#endif 32#endif
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 414b89d1bde9..4dcecafa85dc 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -310,3 +310,70 @@ int event__preprocess_sample(const event_t *self, struct addr_location *al,
310 al->level == 'H' ? "[hypervisor]" : "<not found>"); 310 al->level == 'H' ? "[hypervisor]" : "<not found>");
311 return 0; 311 return 0;
312} 312}
313
314int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
315{
316 u64 *array = event->sample.array;
317
318 if (type & PERF_SAMPLE_IP) {
319 data->ip = event->ip.ip;
320 array++;
321 }
322
323 if (type & PERF_SAMPLE_TID) {
324 u32 *p = (u32 *)array;
325 data->pid = p[0];
326 data->tid = p[1];
327 array++;
328 }
329
330 if (type & PERF_SAMPLE_TIME) {
331 data->time = *array;
332 array++;
333 }
334
335 if (type & PERF_SAMPLE_ADDR) {
336 data->addr = *array;
337 array++;
338 }
339
340 if (type & PERF_SAMPLE_ID) {
341 data->id = *array;
342 array++;
343 }
344
345 if (type & PERF_SAMPLE_STREAM_ID) {
346 data->stream_id = *array;
347 array++;
348 }
349
350 if (type & PERF_SAMPLE_CPU) {
351 u32 *p = (u32 *)array;
352 data->cpu = *p;
353 array++;
354 }
355
356 if (type & PERF_SAMPLE_PERIOD) {
357 data->period = *array;
358 array++;
359 }
360
361 if (type & PERF_SAMPLE_READ) {
362 pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
363 return -1;
364 }
365
366 if (type & PERF_SAMPLE_CALLCHAIN) {
367 data->callchain = (struct ip_callchain *)array;
368 array += 1 + data->callchain->nr;
369 }
370
371 if (type & PERF_SAMPLE_RAW) {
372 u32 *p = (u32 *)array;
373 data->raw_size = *p;
374 p++;
375 data->raw_data = p;
376 }
377
378 return 0;
379}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a4cc8105cf67..c7a78eef8e52 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -56,11 +56,25 @@ struct read_event {
56 u64 id; 56 u64 id;
57}; 57};
58 58
59struct sample_event{ 59struct sample_event {
60 struct perf_event_header header; 60 struct perf_event_header header;
61 u64 array[]; 61 u64 array[];
62}; 62};
63 63
64struct sample_data {
65 u64 ip;
66 u32 pid, tid;
67 u64 time;
68 u64 addr;
69 u64 id;
70 u64 stream_id;
71 u32 cpu;
72 u64 period;
73 struct ip_callchain *callchain;
74 u32 raw_size;
75 void *raw_data;
76};
77
64#define BUILD_ID_SIZE 20 78#define BUILD_ID_SIZE 20
65 79
66struct build_id_event { 80struct build_id_event {
@@ -155,5 +169,6 @@ int event__process_task(event_t *self);
155struct addr_location; 169struct addr_location;
156int event__preprocess_sample(const event_t *self, struct addr_location *al, 170int event__preprocess_sample(const event_t *self, struct addr_location *al,
157 symbol_filter_t filter); 171 symbol_filter_t filter);
172int event__parse_sample(event_t *event, u64 type, struct sample_data *data);
158 173
159#endif /* __PERF_RECORD_H */ 174#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4805e6dfd23c..59a9c0b3033e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -187,7 +187,9 @@ static int do_write(int fd, const void *buf, size_t size)
187 187
188static int __dsos__write_buildid_table(struct list_head *head, int fd) 188static int __dsos__write_buildid_table(struct list_head *head, int fd)
189{ 189{
190#define NAME_ALIGN 64
190 struct dso *pos; 191 struct dso *pos;
192 static const char zero_buf[NAME_ALIGN];
191 193
192 list_for_each_entry(pos, head, node) { 194 list_for_each_entry(pos, head, node) {
193 int err; 195 int err;
@@ -197,14 +199,17 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
197 if (!pos->has_build_id) 199 if (!pos->has_build_id)
198 continue; 200 continue;
199 len = pos->long_name_len + 1; 201 len = pos->long_name_len + 1;
200 len = ALIGN(len, 64); 202 len = ALIGN(len, NAME_ALIGN);
201 memset(&b, 0, sizeof(b)); 203 memset(&b, 0, sizeof(b));
202 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); 204 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
203 b.header.size = sizeof(b) + len; 205 b.header.size = sizeof(b) + len;
204 err = do_write(fd, &b, sizeof(b)); 206 err = do_write(fd, &b, sizeof(b));
205 if (err < 0) 207 if (err < 0)
206 return err; 208 return err;
207 err = do_write(fd, pos->long_name, len); 209 err = do_write(fd, pos->long_name, pos->long_name_len + 1);
210 if (err < 0)
211 return err;
212 err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
208 if (err < 0) 213 if (err < 0)
209 return err; 214 return err;
210 } 215 }
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9e5dbd66d34d..e5bc0fb016b2 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -197,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
197 if (id == config) { 197 if (id == config) {
198 closedir(evt_dir); 198 closedir(evt_dir);
199 closedir(sys_dir); 199 closedir(sys_dir);
200 path = zalloc(sizeof(path)); 200 path = zalloc(sizeof(*path));
201 path->system = malloc(MAX_EVENT_LENGTH); 201 path->system = malloc(MAX_EVENT_LENGTH);
202 if (!path->system) { 202 if (!path->system) {
203 free(path); 203 free(path);
@@ -467,7 +467,6 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
467 while ((evt_ent = readdir(evt_dir))) { 467 while ((evt_ent = readdir(evt_dir))) {
468 char event_opt[MAX_EVOPT_LEN + 1]; 468 char event_opt[MAX_EVOPT_LEN + 1];
469 int len; 469 int len;
470 unsigned int rem = MAX_EVOPT_LEN;
471 470
472 if (!strcmp(evt_ent->d_name, ".") 471 if (!strcmp(evt_ent->d_name, ".")
473 || !strcmp(evt_ent->d_name, "..") 472 || !strcmp(evt_ent->d_name, "..")
@@ -475,20 +474,12 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
475 || !strcmp(evt_ent->d_name, "filter")) 474 || !strcmp(evt_ent->d_name, "filter"))
476 continue; 475 continue;
477 476
478 len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s", sys_name, 477 len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
479 evt_ent->d_name); 478 evt_ent->d_name, flags ? ":" : "",
479 flags ?: "");
480 if (len < 0) 480 if (len < 0)
481 return EVT_FAILED; 481 return EVT_FAILED;
482 482
483 rem -= len;
484 if (flags) {
485 if (rem < strlen(flags) + 1)
486 return EVT_FAILED;
487
488 strcat(event_opt, ":");
489 strcat(event_opt, flags);
490 }
491
492 if (parse_events(NULL, event_opt, 0)) 483 if (parse_events(NULL, event_opt, 0))
493 return EVT_FAILED; 484 return EVT_FAILED;
494 } 485 }
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 6d8af48c925e..efebd5b476b3 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -430,6 +430,9 @@ int usage_with_options_internal(const char * const *usagestr,
430 pos = fprintf(stderr, " "); 430 pos = fprintf(stderr, " ");
431 if (opts->short_name) 431 if (opts->short_name)
432 pos += fprintf(stderr, "-%c", opts->short_name); 432 pos += fprintf(stderr, "-%c", opts->short_name);
433 else
434 pos += fprintf(stderr, " ");
435
433 if (opts->long_name && opts->short_name) 436 if (opts->long_name && opts->short_name)
434 pos += fprintf(stderr, ", "); 437 pos += fprintf(stderr, ", ");
435 if (opts->long_name) 438 if (opts->long_name)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index cd7fbda5e2a5..d14a4585bcaf 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -48,6 +48,9 @@
48 48
49/* If there is no space to write, returns -E2BIG. */ 49/* If there is no space to write, returns -E2BIG. */
50static int e_snprintf(char *str, size_t size, const char *format, ...) 50static int e_snprintf(char *str, size_t size, const char *format, ...)
51 __attribute__((format(printf, 3, 4)));
52
53static int e_snprintf(char *str, size_t size, const char *format, ...)
51{ 54{
52 int ret; 55 int ret;
53 va_list ap; 56 va_list ap;
@@ -258,7 +261,7 @@ int synthesize_perf_probe_event(struct probe_point *pp)
258 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function, 261 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function,
259 offs, pp->retprobe ? "%return" : "", line); 262 offs, pp->retprobe ? "%return" : "", line);
260 else 263 else
261 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line); 264 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", pp->file, line);
262 if (ret <= 0) 265 if (ret <= 0)
263 goto error; 266 goto error;
264 len = ret; 267 len = ret;
@@ -373,14 +376,32 @@ static void clear_probe_point(struct probe_point *pp)
373 free(pp->args); 376 free(pp->args);
374 for (i = 0; i < pp->found; i++) 377 for (i = 0; i < pp->found; i++)
375 free(pp->probes[i]); 378 free(pp->probes[i]);
376 memset(pp, 0, sizeof(pp)); 379 memset(pp, 0, sizeof(*pp));
380}
381
382/* Show an event */
383static void show_perf_probe_event(const char *group, const char *event,
384 const char *place, struct probe_point *pp)
385{
386 int i;
387 char buf[128];
388
389 e_snprintf(buf, 128, "%s:%s", group, event);
390 printf(" %-40s (on %s", buf, place);
391
392 if (pp->nr_args > 0) {
393 printf(" with");
394 for (i = 0; i < pp->nr_args; i++)
395 printf(" %s", pp->args[i]);
396 }
397 printf(")\n");
377} 398}
378 399
379/* List up current perf-probe events */ 400/* List up current perf-probe events */
380void show_perf_probe_events(void) 401void show_perf_probe_events(void)
381{ 402{
382 unsigned int i; 403 unsigned int i;
383 int fd; 404 int fd, nr;
384 char *group, *event; 405 char *group, *event;
385 struct probe_point pp; 406 struct probe_point pp;
386 struct strlist *rawlist; 407 struct strlist *rawlist;
@@ -393,8 +414,13 @@ void show_perf_probe_events(void)
393 for (i = 0; i < strlist__nr_entries(rawlist); i++) { 414 for (i = 0; i < strlist__nr_entries(rawlist); i++) {
394 ent = strlist__entry(rawlist, i); 415 ent = strlist__entry(rawlist, i);
395 parse_trace_kprobe_event(ent->s, &group, &event, &pp); 416 parse_trace_kprobe_event(ent->s, &group, &event, &pp);
417 /* Synthesize only event probe point */
418 nr = pp.nr_args;
419 pp.nr_args = 0;
396 synthesize_perf_probe_event(&pp); 420 synthesize_perf_probe_event(&pp);
397 printf("[%s:%s]\t%s\n", group, event, pp.probes[0]); 421 pp.nr_args = nr;
422 /* Show an event */
423 show_perf_probe_event(group, event, pp.probes[0], &pp);
398 free(group); 424 free(group);
399 free(event); 425 free(event);
400 clear_probe_point(&pp); 426 clear_probe_point(&pp);
@@ -404,21 +430,28 @@ void show_perf_probe_events(void)
404} 430}
405 431
406/* Get current perf-probe event names */ 432/* Get current perf-probe event names */
407static struct strlist *get_perf_event_names(int fd) 433static struct strlist *get_perf_event_names(int fd, bool include_group)
408{ 434{
409 unsigned int i; 435 unsigned int i;
410 char *group, *event; 436 char *group, *event;
437 char buf[128];
411 struct strlist *sl, *rawlist; 438 struct strlist *sl, *rawlist;
412 struct str_node *ent; 439 struct str_node *ent;
413 440
414 rawlist = get_trace_kprobe_event_rawlist(fd); 441 rawlist = get_trace_kprobe_event_rawlist(fd);
415 442
416 sl = strlist__new(false, NULL); 443 sl = strlist__new(true, NULL);
417 for (i = 0; i < strlist__nr_entries(rawlist); i++) { 444 for (i = 0; i < strlist__nr_entries(rawlist); i++) {
418 ent = strlist__entry(rawlist, i); 445 ent = strlist__entry(rawlist, i);
419 parse_trace_kprobe_event(ent->s, &group, &event, NULL); 446 parse_trace_kprobe_event(ent->s, &group, &event, NULL);
420 strlist__add(sl, event); 447 if (include_group) {
448 if (e_snprintf(buf, 128, "%s:%s", group, event) < 0)
449 die("Failed to copy group:event name.");
450 strlist__add(sl, buf);
451 } else
452 strlist__add(sl, event);
421 free(group); 453 free(group);
454 free(event);
422 } 455 }
423 456
424 strlist__delete(rawlist); 457 strlist__delete(rawlist);
@@ -426,24 +459,30 @@ static struct strlist *get_perf_event_names(int fd)
426 return sl; 459 return sl;
427} 460}
428 461
429static int write_trace_kprobe_event(int fd, const char *buf) 462static void write_trace_kprobe_event(int fd, const char *buf)
430{ 463{
431 int ret; 464 int ret;
432 465
466 pr_debug("Writing event: %s\n", buf);
433 ret = write(fd, buf, strlen(buf)); 467 ret = write(fd, buf, strlen(buf));
434 if (ret <= 0) 468 if (ret <= 0)
435 die("Failed to create event."); 469 die("Failed to write event: %s", strerror(errno));
436 else
437 printf("Added new event: %s\n", buf);
438
439 return ret;
440} 470}
441 471
442static void get_new_event_name(char *buf, size_t len, const char *base, 472static void get_new_event_name(char *buf, size_t len, const char *base,
443 struct strlist *namelist) 473 struct strlist *namelist)
444{ 474{
445 int i, ret; 475 int i, ret;
446 for (i = 0; i < MAX_EVENT_INDEX; i++) { 476
477 /* Try no suffix */
478 ret = e_snprintf(buf, len, "%s", base);
479 if (ret < 0)
480 die("snprintf() failed: %s", strerror(-ret));
481 if (!strlist__has_entry(namelist, buf))
482 return;
483
484 /* Try to add suffix */
485 for (i = 1; i < MAX_EVENT_INDEX; i++) {
447 ret = e_snprintf(buf, len, "%s_%d", base, i); 486 ret = e_snprintf(buf, len, "%s_%d", base, i);
448 if (ret < 0) 487 if (ret < 0)
449 die("snprintf() failed: %s", strerror(-ret)); 488 die("snprintf() failed: %s", strerror(-ret));
@@ -464,7 +503,7 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
464 503
465 fd = open_kprobe_events(O_RDWR, O_APPEND); 504 fd = open_kprobe_events(O_RDWR, O_APPEND);
466 /* Get current event names */ 505 /* Get current event names */
467 namelist = get_perf_event_names(fd); 506 namelist = get_perf_event_names(fd, false);
468 507
469 for (j = 0; j < nr_probes; j++) { 508 for (j = 0; j < nr_probes; j++) {
470 pp = probes + j; 509 pp = probes + j;
@@ -476,9 +515,73 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
476 PERFPROBE_GROUP, event, 515 PERFPROBE_GROUP, event,
477 pp->probes[i]); 516 pp->probes[i]);
478 write_trace_kprobe_event(fd, buf); 517 write_trace_kprobe_event(fd, buf);
518 printf("Added new event:\n");
519 /* Get the first parameter (probe-point) */
520 sscanf(pp->probes[i], "%s", buf);
521 show_perf_probe_event(PERFPROBE_GROUP, event,
522 buf, pp);
479 /* Add added event name to namelist */ 523 /* Add added event name to namelist */
480 strlist__add(namelist, event); 524 strlist__add(namelist, event);
481 } 525 }
482 } 526 }
527 /* Show how to use the event. */
528 printf("\nYou can now use it on all perf tools, such as:\n\n");
529 printf("\tperf record -e %s:%s -a sleep 1\n\n", PERFPROBE_GROUP, event);
530
531 strlist__delete(namelist);
532 close(fd);
533}
534
535static void del_trace_kprobe_event(int fd, const char *group,
536 const char *event, struct strlist *namelist)
537{
538 char buf[128];
539
540 if (e_snprintf(buf, 128, "%s:%s", group, event) < 0)
541 die("Failed to copy event.");
542 if (!strlist__has_entry(namelist, buf)) {
543 pr_warning("Warning: event \"%s\" is not found.\n", buf);
544 return;
545 }
546 /* Convert from perf-probe event to trace-kprobe event */
547 if (e_snprintf(buf, 128, "-:%s/%s", group, event) < 0)
548 die("Failed to copy event.");
549
550 write_trace_kprobe_event(fd, buf);
551 printf("Remove event: %s:%s\n", group, event);
552}
553
554void del_trace_kprobe_events(struct strlist *dellist)
555{
556 int fd;
557 unsigned int i;
558 const char *group, *event;
559 char *p, *str;
560 struct str_node *ent;
561 struct strlist *namelist;
562
563 fd = open_kprobe_events(O_RDWR, O_APPEND);
564 /* Get current event names */
565 namelist = get_perf_event_names(fd, true);
566
567 for (i = 0; i < strlist__nr_entries(dellist); i++) {
568 ent = strlist__entry(dellist, i);
569 str = strdup(ent->s);
570 if (!str)
571 die("Failed to copy event.");
572 p = strchr(str, ':');
573 if (p) {
574 group = str;
575 *p = '\0';
576 event = p + 1;
577 } else {
578 group = PERFPROBE_GROUP;
579 event = str;
580 }
581 del_trace_kprobe_event(fd, group, event, namelist);
582 free(str);
583 }
584 strlist__delete(namelist);
483 close(fd); 585 close(fd);
484} 586}
587
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 0c6fe56fe38a..f752159124ae 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -10,6 +10,7 @@ extern void parse_trace_kprobe_event(const char *str, char **group,
10 char **event, struct probe_point *pp); 10 char **event, struct probe_point *pp);
11extern int synthesize_trace_kprobe_event(struct probe_point *pp); 11extern int synthesize_trace_kprobe_event(struct probe_point *pp);
12extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes); 12extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes);
13extern void del_trace_kprobe_events(struct strlist *dellist);
13extern void show_perf_probe_events(void); 14extern void show_perf_probe_events(void);
14 15
15/* Maximum index number of event-name postfix */ 16/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 293cdfc1b8ca..4585f1d86792 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -106,7 +106,7 @@ static int strtailcmp(const char *s1, const char *s2)
106{ 106{
107 int i1 = strlen(s1); 107 int i1 = strlen(s1);
108 int i2 = strlen(s2); 108 int i2 = strlen(s2);
109 while (--i1 > 0 && --i2 > 0) { 109 while (--i1 >= 0 && --i2 >= 0) {
110 if (s1[i1] != s2[i2]) 110 if (s1[i1] != s2[i2])
111 return s1[i1] - s2[i2]; 111 return s1[i1] - s2[i2];
112 } 112 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index fffcb937cdcb..e7508ad3450f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -938,8 +938,9 @@ static bool __dsos__read_build_ids(struct list_head *head)
938 938
939bool dsos__read_build_ids(void) 939bool dsos__read_build_ids(void)
940{ 940{
941 return __dsos__read_build_ids(&dsos__kernel) || 941 bool kbuildids = __dsos__read_build_ids(&dsos__kernel),
942 __dsos__read_build_ids(&dsos__user); 942 ubuildids = __dsos__read_build_ids(&dsos__user);
943 return kbuildids || ubuildids;
943} 944}
944 945
945/* 946/*
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0302405aa2ca..c5c32be040bf 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -177,7 +177,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
177 func_count++; 177 func_count++;
178 } 178 }
179 179
180 func_list = malloc_or_die(sizeof(*func_list) * func_count + 1); 180 func_list = malloc_or_die(sizeof(*func_list) * (func_count + 1));
181 181
182 i = 0; 182 i = 0;
183 while (list) { 183 while (list) {
@@ -1477,7 +1477,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
1477 goto out_free; 1477 goto out_free;
1478 1478
1479 field = malloc_or_die(sizeof(*field)); 1479 field = malloc_or_die(sizeof(*field));
1480 memset(field, 0, sizeof(field)); 1480 memset(field, 0, sizeof(*field));
1481 1481
1482 value = arg_eval(arg); 1482 value = arg_eval(arg);
1483 field->value = strdup(value); 1483 field->value = strdup(value);
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c
index 51e833fd58c3..a5ffe60db5d6 100644
--- a/tools/perf/util/trace-event-perl.c
+++ b/tools/perf/util/trace-event-perl.c
@@ -32,9 +32,6 @@
32 32
33void xs_init(pTHX); 33void xs_init(pTHX);
34 34
35void boot_Perf__Trace__Context(pTHX_ CV *cv);
36void boot_DynaLoader(pTHX_ CV *cv);
37
38void xs_init(pTHX) 35void xs_init(pTHX)
39{ 36{
40 const char *file = __FILE__; 37 const char *file = __FILE__;
@@ -573,26 +570,72 @@ struct scripting_ops perl_scripting_ops = {
573 .generate_script = perl_generate_script, 570 .generate_script = perl_generate_script,
574}; 571};
575 572
576#ifdef NO_LIBPERL 573static void print_unsupported_msg(void)
577void setup_perl_scripting(void)
578{ 574{
579 fprintf(stderr, "Perl scripting not supported." 575 fprintf(stderr, "Perl scripting not supported."
580 " Install libperl and rebuild perf to enable it. e.g. " 576 " Install libperl and rebuild perf to enable it.\n"
581 "apt-get install libperl-dev (ubuntu), yum install " 577 "For example:\n # apt-get install libperl-dev (ubuntu)"
582 "perl-ExtUtils-Embed (Fedora), etc.\n"); 578 "\n # yum install perl-ExtUtils-Embed (Fedora)"
579 "\n etc.\n");
583} 580}
584#else 581
585void setup_perl_scripting(void) 582static int perl_start_script_unsupported(const char *script __unused)
583{
584 print_unsupported_msg();
585
586 return -1;
587}
588
589static int perl_stop_script_unsupported(void)
590{
591 return 0;
592}
593
594static void perl_process_event_unsupported(int cpu __unused,
595 void *data __unused,
596 int size __unused,
597 unsigned long long nsecs __unused,
598 char *comm __unused)
599{
600}
601
602static int perl_generate_script_unsupported(const char *outfile __unused)
603{
604 print_unsupported_msg();
605
606 return -1;
607}
608
609struct scripting_ops perl_scripting_unsupported_ops = {
610 .name = "Perl",
611 .start_script = perl_start_script_unsupported,
612 .stop_script = perl_stop_script_unsupported,
613 .process_event = perl_process_event_unsupported,
614 .generate_script = perl_generate_script_unsupported,
615};
616
617static void register_perl_scripting(struct scripting_ops *scripting_ops)
586{ 618{
587 int err; 619 int err;
588 err = script_spec_register("Perl", &perl_scripting_ops); 620 err = script_spec_register("Perl", scripting_ops);
589 if (err) 621 if (err)
590 die("error registering Perl script extension"); 622 die("error registering Perl script extension");
591 623
592 err = script_spec_register("pl", &perl_scripting_ops); 624 err = script_spec_register("pl", scripting_ops);
593 if (err) 625 if (err)
594 die("error registering pl script extension"); 626 die("error registering pl script extension");
595 627
596 scripting_context = malloc(sizeof(struct scripting_context)); 628 scripting_context = malloc(sizeof(struct scripting_context));
597} 629}
630
631#ifdef NO_LIBPERL
632void setup_perl_scripting(void)
633{
634 register_perl_scripting(&perl_scripting_unsupported_ops);
635}
636#else
637void setup_perl_scripting(void)
638{
639 register_perl_scripting(&perl_scripting_ops);
640}
598#endif 641#endif
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
index 8fe0d866fe1a..e88fb26137bb 100644
--- a/tools/perf/util/trace-event-perl.h
+++ b/tools/perf/util/trace-event-perl.h
@@ -34,9 +34,13 @@ typedef int INTERP;
34#define dXSUB_SYS 34#define dXSUB_SYS
35#define pTHX_ 35#define pTHX_
36static inline void newXS(const char *a, void *b, const char *c) {} 36static inline void newXS(const char *a, void *b, const char *c) {}
37static void boot_Perf__Trace__Context(pTHX_ CV *cv) {}
38static void boot_DynaLoader(pTHX_ CV *cv) {}
37#else 39#else
38#include <EXTERN.h> 40#include <EXTERN.h>
39#include <perl.h> 41#include <perl.h>
42void boot_Perf__Trace__Context(pTHX_ CV *cv);
43void boot_DynaLoader(pTHX_ CV *cv);
40typedef PerlInterpreter * INTERP; 44typedef PerlInterpreter * INTERP;
41#endif 45#endif
42 46
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 342dfdd43f87..1744422cafcb 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -145,8 +145,9 @@ static void read_proc_kallsyms(void)
145 if (!size) 145 if (!size)
146 return; 146 return;
147 147
148 buf = malloc_or_die(size); 148 buf = malloc_or_die(size + 1);
149 read_or_die(buf, size); 149 read_or_die(buf, size);
150 buf[size] = '\0';
150 151
151 parse_proc_kallsyms(buf, size); 152 parse_proc_kallsyms(buf, size);
152 153