diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-05 18:30:21 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-05 18:30:21 -0500 |
commit | c3fa27d1367fac63ac8533d6f20ea851d0d70a10 (patch) | |
tree | e7731554085e22b6b63411b1ebb401079f3e0bbb /arch/x86/kernel | |
parent | 96fa2b508d2d3fe040cf4ef2fffb955f0a537ea1 (diff) | |
parent | d103d01e4b19f185d3c85f77402b605534c32e89 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (470 commits)
x86: Fix comments of register/stack access functions
perf tools: Replace %m with %a in sscanf
hw-breakpoints: Keep track of user disabled breakpoints
tracing/syscalls: Make syscall events print callbacks static
tracing: Add DEFINE_EVENT(), DEFINE_SINGLE_EVENT() support to docbook
perf: Don't free perf_mmap_data until work has been done
perf_event: Fix compile error
perf tools: Fix _GNU_SOURCE macro related strndup() build error
trace_syscalls: Remove unused syscall_name_to_nr()
trace_syscalls: Simplify syscall profile
trace_syscalls: Remove duplicate init_enter_##sname()
trace_syscalls: Add syscall_nr field to struct syscall_metadata
trace_syscalls: Remove enter_id exit_id
trace_syscalls: Set event_enter_##sname->data to its metadata
trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit
perf_event: Initialize data.period in perf_swevent_hrtimer()
perf probe: Simplify event naming
perf probe: Add --list option for listing current probe events
perf probe: Add argv_split() from lib/argv_split.c
perf probe: Move probe event utility functions to probe-event.c
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 103 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 205 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 24 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 8 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 555 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 243 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 415 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 73 |
21 files changed, 1365 insertions, 365 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd678..4f2e66e29ecc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
41 | obj-y += bootflag.o e820.o | 41 | obj-y += bootflag.o e820.o |
42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
43 | obj-y += alternative.o i8253.o pci-nommu.o | 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
44 | obj-y += tsc.o io_delay.o rtc.o | 44 | obj-y += tsc.o io_delay.o rtc.o |
45 | 45 | ||
46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9b..1d2cb383410e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
6 | ifdef CONFIG_FUNCTION_TRACER | 6 | ifdef CONFIG_FUNCTION_TRACER |
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | CFLAGS_REMOVE_perf_event.o = -pg | ||
8 | endif | 9 | endif |
9 | 10 | ||
10 | # Make sure load_percpu_segment has no stackprotector | 11 | # Make sure load_percpu_segment has no stackprotector |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..9053be5d95cd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
838 | } | 838 | } |
839 | 839 | ||
840 | #ifdef CONFIG_X86_MCE | ||
841 | /* Init Machine Check Exception if available. */ | 840 | /* Init Machine Check Exception if available. */ |
842 | mcheck_init(c); | 841 | mcheck_cpu_init(c); |
843 | #endif | ||
844 | 842 | ||
845 | select_idle_routine(c); | 843 | select_idle_routine(c); |
846 | 844 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca8115..0bcaa3875863 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -46,6 +46,9 @@ | |||
46 | 46 | ||
47 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
48 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/mce.h> | ||
51 | |||
49 | int mce_disabled __read_mostly; | 52 | int mce_disabled __read_mostly; |
50 | 53 | ||
51 | #define MISC_MCELOG_MINOR 227 | 54 | #define MISC_MCELOG_MINOR 227 |
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 88 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 89 | static int cpu_missing; |
87 | 90 | ||
88 | static void default_decode_mce(struct mce *m) | 91 | /* |
92 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
93 | * MCE errors in a human-readable form. | ||
94 | */ | ||
95 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
96 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
97 | |||
98 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
99 | void *data) | ||
89 | { | 100 | { |
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 101 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); |
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 102 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); |
103 | |||
104 | return NOTIFY_STOP; | ||
92 | } | 105 | } |
93 | 106 | ||
94 | /* | 107 | static struct notifier_block mce_dec_nb = { |
95 | * CPU/chipset specific EDAC code can register a callback here to print | 108 | .notifier_call = default_decode_mce, |
96 | * MCE errors in a human-readable form: | 109 | .priority = -1, |
97 | */ | 110 | }; |
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
100 | 111 | ||
101 | /* MCA banks polled by the period polling timer for corrected events */ | 112 | /* MCA banks polled by the period polling timer for corrected events */ |
102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 113 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce) | |||
141 | { | 152 | { |
142 | unsigned next, entry; | 153 | unsigned next, entry; |
143 | 154 | ||
155 | /* Emit the trace record: */ | ||
156 | trace_mce_record(mce); | ||
157 | |||
144 | mce->finished = 0; | 158 | mce->finished = 0; |
145 | wmb(); | 159 | wmb(); |
146 | for (;;) { | 160 | for (;;) { |
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m) | |||
204 | 218 | ||
205 | /* | 219 | /* |
206 | * Print out human-readable details about the MCE error, | 220 | * Print out human-readable details about the MCE error, |
207 | * (if the CPU has an implementation for that): | 221 | * (if the CPU has an implementation for that) |
208 | */ | 222 | */ |
209 | x86_mce_decode_callback(m); | 223 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
210 | } | 224 | } |
211 | 225 | ||
212 | static void print_mce_head(void) | 226 | static void print_mce_head(void) |
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ | |||
1122 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ | 1136 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1123 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1137 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1124 | 1138 | ||
1125 | static void mcheck_timer(unsigned long data) | 1139 | static void mce_start_timer(unsigned long data) |
1126 | { | 1140 | { |
1127 | struct timer_list *t = &per_cpu(mce_timer, data); | 1141 | struct timer_list *t = &per_cpu(mce_timer, data); |
1128 | int *n; | 1142 | int *n; |
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void) | |||
1187 | } | 1201 | } |
1188 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1202 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1189 | 1203 | ||
1190 | static int mce_banks_init(void) | 1204 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1191 | { | 1205 | { |
1192 | int i; | 1206 | int i; |
1193 | 1207 | ||
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void) | |||
1206 | /* | 1220 | /* |
1207 | * Initialize Machine Checks for a CPU. | 1221 | * Initialize Machine Checks for a CPU. |
1208 | */ | 1222 | */ |
1209 | static int __cpuinit mce_cap_init(void) | 1223 | static int __cpuinit __mcheck_cpu_cap_init(void) |
1210 | { | 1224 | { |
1211 | unsigned b; | 1225 | unsigned b; |
1212 | u64 cap; | 1226 | u64 cap; |
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) | |||
1228 | WARN_ON(banks != 0 && b != banks); | 1242 | WARN_ON(banks != 0 && b != banks); |
1229 | banks = b; | 1243 | banks = b; |
1230 | if (!mce_banks) { | 1244 | if (!mce_banks) { |
1231 | int err = mce_banks_init(); | 1245 | int err = __mcheck_cpu_mce_banks_init(); |
1232 | 1246 | ||
1233 | if (err) | 1247 | if (err) |
1234 | return err; | 1248 | return err; |
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) | |||
1244 | return 0; | 1258 | return 0; |
1245 | } | 1259 | } |
1246 | 1260 | ||
1247 | static void mce_init(void) | 1261 | static void __mcheck_cpu_init_generic(void) |
1248 | { | 1262 | { |
1249 | mce_banks_t all_banks; | 1263 | mce_banks_t all_banks; |
1250 | u64 cap; | 1264 | u64 cap; |
@@ -1273,7 +1287,7 @@ static void mce_init(void) | |||
1273 | } | 1287 | } |
1274 | 1288 | ||
1275 | /* Add per CPU specific workarounds here */ | 1289 | /* Add per CPU specific workarounds here */ |
1276 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | 1290 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1277 | { | 1291 | { |
1278 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1292 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1279 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1293 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1341 | return 0; | 1355 | return 0; |
1342 | } | 1356 | } |
1343 | 1357 | ||
1344 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1358 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1345 | { | 1359 | { |
1346 | if (c->x86 != 5) | 1360 | if (c->x86 != 5) |
1347 | return; | 1361 | return; |
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1355 | } | 1369 | } |
1356 | } | 1370 | } |
1357 | 1371 | ||
1358 | static void mce_cpu_features(struct cpuinfo_x86 *c) | 1372 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
1359 | { | 1373 | { |
1360 | switch (c->x86_vendor) { | 1374 | switch (c->x86_vendor) { |
1361 | case X86_VENDOR_INTEL: | 1375 | case X86_VENDOR_INTEL: |
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1369 | } | 1383 | } |
1370 | } | 1384 | } |
1371 | 1385 | ||
1372 | static void mce_init_timer(void) | 1386 | static void __mcheck_cpu_init_timer(void) |
1373 | { | 1387 | { |
1374 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1388 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1375 | int *n = &__get_cpu_var(mce_next_interval); | 1389 | int *n = &__get_cpu_var(mce_next_interval); |
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void) | |||
1380 | *n = check_interval * HZ; | 1394 | *n = check_interval * HZ; |
1381 | if (!*n) | 1395 | if (!*n) |
1382 | return; | 1396 | return; |
1383 | setup_timer(t, mcheck_timer, smp_processor_id()); | 1397 | setup_timer(t, mce_start_timer, smp_processor_id()); |
1384 | t->expires = round_jiffies(jiffies + *n); | 1398 | t->expires = round_jiffies(jiffies + *n); |
1385 | add_timer_on(t, smp_processor_id()); | 1399 | add_timer_on(t, smp_processor_id()); |
1386 | } | 1400 | } |
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1400 | * Called for each booted CPU to set up machine checks. | 1414 | * Called for each booted CPU to set up machine checks. |
1401 | * Must be called with preempt off: | 1415 | * Must be called with preempt off: |
1402 | */ | 1416 | */ |
1403 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 1417 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1404 | { | 1418 | { |
1405 | if (mce_disabled) | 1419 | if (mce_disabled) |
1406 | return; | 1420 | return; |
1407 | 1421 | ||
1408 | mce_ancient_init(c); | 1422 | __mcheck_cpu_ancient_init(c); |
1409 | 1423 | ||
1410 | if (!mce_available(c)) | 1424 | if (!mce_available(c)) |
1411 | return; | 1425 | return; |
1412 | 1426 | ||
1413 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { | 1427 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1414 | mce_disabled = 1; | 1428 | mce_disabled = 1; |
1415 | return; | 1429 | return; |
1416 | } | 1430 | } |
1417 | 1431 | ||
1418 | machine_check_vector = do_machine_check; | 1432 | machine_check_vector = do_machine_check; |
1419 | 1433 | ||
1420 | mce_init(); | 1434 | __mcheck_cpu_init_generic(); |
1421 | mce_cpu_features(c); | 1435 | __mcheck_cpu_init_vendor(c); |
1422 | mce_init_timer(); | 1436 | __mcheck_cpu_init_timer(); |
1423 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1437 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1438 | |||
1424 | } | 1439 | } |
1425 | 1440 | ||
1426 | /* | 1441 | /* |
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str) | |||
1640 | } | 1655 | } |
1641 | __setup("mce", mcheck_enable); | 1656 | __setup("mce", mcheck_enable); |
1642 | 1657 | ||
1658 | int __init mcheck_init(void) | ||
1659 | { | ||
1660 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1661 | |||
1662 | mcheck_intel_therm_init(); | ||
1663 | |||
1664 | return 0; | ||
1665 | } | ||
1666 | |||
1643 | /* | 1667 | /* |
1644 | * Sysfs support | 1668 | * Sysfs support |
1645 | */ | 1669 | */ |
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable); | |||
1648 | * Disable machine checks on suspend and shutdown. We can't really handle | 1672 | * Disable machine checks on suspend and shutdown. We can't really handle |
1649 | * them later. | 1673 | * them later. |
1650 | */ | 1674 | */ |
1651 | static int mce_disable(void) | 1675 | static int mce_disable_error_reporting(void) |
1652 | { | 1676 | { |
1653 | int i; | 1677 | int i; |
1654 | 1678 | ||
@@ -1663,12 +1687,12 @@ static int mce_disable(void) | |||
1663 | 1687 | ||
1664 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | 1688 | static int mce_suspend(struct sys_device *dev, pm_message_t state) |
1665 | { | 1689 | { |
1666 | return mce_disable(); | 1690 | return mce_disable_error_reporting(); |
1667 | } | 1691 | } |
1668 | 1692 | ||
1669 | static int mce_shutdown(struct sys_device *dev) | 1693 | static int mce_shutdown(struct sys_device *dev) |
1670 | { | 1694 | { |
1671 | return mce_disable(); | 1695 | return mce_disable_error_reporting(); |
1672 | } | 1696 | } |
1673 | 1697 | ||
1674 | /* | 1698 | /* |
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev) | |||
1678 | */ | 1702 | */ |
1679 | static int mce_resume(struct sys_device *dev) | 1703 | static int mce_resume(struct sys_device *dev) |
1680 | { | 1704 | { |
1681 | mce_init(); | 1705 | __mcheck_cpu_init_generic(); |
1682 | mce_cpu_features(¤t_cpu_data); | 1706 | __mcheck_cpu_init_vendor(¤t_cpu_data); |
1683 | 1707 | ||
1684 | return 0; | 1708 | return 0; |
1685 | } | 1709 | } |
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data) | |||
1689 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1713 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1690 | if (!mce_available(¤t_cpu_data)) | 1714 | if (!mce_available(¤t_cpu_data)) |
1691 | return; | 1715 | return; |
1692 | mce_init(); | 1716 | __mcheck_cpu_init_generic(); |
1693 | mce_init_timer(); | 1717 | __mcheck_cpu_init_timer(); |
1694 | } | 1718 | } |
1695 | 1719 | ||
1696 | /* Reinit MCEs after user configuration changes */ | 1720 | /* Reinit MCEs after user configuration changes */ |
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all) | |||
1716 | cmci_reenable(); | 1740 | cmci_reenable(); |
1717 | cmci_recheck(); | 1741 | cmci_recheck(); |
1718 | if (all) | 1742 | if (all) |
1719 | mce_init_timer(); | 1743 | __mcheck_cpu_init_timer(); |
1720 | } | 1744 | } |
1721 | 1745 | ||
1722 | static struct sysdev_class mce_sysclass = { | 1746 | static struct sysdev_class mce_sysclass = { |
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1929 | } | 1953 | } |
1930 | 1954 | ||
1931 | /* Make sure there are no machine checks on offlined CPUs. */ | 1955 | /* Make sure there are no machine checks on offlined CPUs. */ |
1932 | static void mce_disable_cpu(void *h) | 1956 | static void __cpuinit mce_disable_cpu(void *h) |
1933 | { | 1957 | { |
1934 | unsigned long action = *(unsigned long *)h; | 1958 | unsigned long action = *(unsigned long *)h; |
1935 | int i; | 1959 | int i; |
1936 | 1960 | ||
1937 | if (!mce_available(¤t_cpu_data)) | 1961 | if (!mce_available(¤t_cpu_data)) |
1938 | return; | 1962 | return; |
1963 | |||
1939 | if (!(action & CPU_TASKS_FROZEN)) | 1964 | if (!(action & CPU_TASKS_FROZEN)) |
1940 | cmci_clear(); | 1965 | cmci_clear(); |
1941 | for (i = 0; i < banks; i++) { | 1966 | for (i = 0; i < banks; i++) { |
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h) | |||
1946 | } | 1971 | } |
1947 | } | 1972 | } |
1948 | 1973 | ||
1949 | static void mce_reenable_cpu(void *h) | 1974 | static void __cpuinit mce_reenable_cpu(void *h) |
1950 | { | 1975 | { |
1951 | unsigned long action = *(unsigned long *)h; | 1976 | unsigned long action = *(unsigned long *)h; |
1952 | int i; | 1977 | int i; |
@@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void) | |||
2025 | } | 2050 | } |
2026 | } | 2051 | } |
2027 | 2052 | ||
2028 | static __init int mce_init_device(void) | 2053 | static __init int mcheck_init_device(void) |
2029 | { | 2054 | { |
2030 | int err; | 2055 | int err; |
2031 | int i = 0; | 2056 | int i = 0; |
@@ -2053,7 +2078,7 @@ static __init int mce_init_device(void) | |||
2053 | return err; | 2078 | return err; |
2054 | } | 2079 | } |
2055 | 2080 | ||
2056 | device_initcall(mce_init_device); | 2081 | device_initcall(mcheck_init_device); |
2057 | 2082 | ||
2058 | /* | 2083 | /* |
2059 | * Old style boot options parsing. Only for compatibility. | 2084 | * Old style boot options parsing. Only for compatibility. |
@@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val) | |||
2101 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, | 2126 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2102 | fake_panic_set, "%llu\n"); | 2127 | fake_panic_set, "%llu\n"); |
2103 | 2128 | ||
2104 | static int __init mce_debugfs_init(void) | 2129 | static int __init mcheck_debugfs_init(void) |
2105 | { | 2130 | { |
2106 | struct dentry *dmce, *ffake_panic; | 2131 | struct dentry *dmce, *ffake_panic; |
2107 | 2132 | ||
@@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void) | |||
2115 | 2140 | ||
2116 | return 0; | 2141 | return 0; |
2117 | } | 2142 | } |
2118 | late_initcall(mce_debugfs_init); | 2143 | late_initcall(mcheck_debugfs_init); |
2119 | #endif | 2144 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba75330..4fef985fc221 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); | |||
49 | 49 | ||
50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
51 | 51 | ||
52 | static u32 lvtthmr_init __read_mostly; | ||
53 | |||
52 | #ifdef CONFIG_SYSFS | 54 | #ifdef CONFIG_SYSFS |
53 | #define define_therm_throt_sysdev_one_ro(_name) \ | 55 | #define define_therm_throt_sysdev_one_ro(_name) \ |
54 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |||
254 | ack_APIC_irq(); | 256 | ack_APIC_irq(); |
255 | } | 257 | } |
256 | 258 | ||
259 | void __init mcheck_intel_therm_init(void) | ||
260 | { | ||
261 | /* | ||
262 | * This function is only called on boot CPU. Save the init thermal | ||
263 | * LVT value on BSP and use that value to restore APs' thermal LVT | ||
264 | * entry BIOS programmed later | ||
265 | */ | ||
266 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && | ||
267 | cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) | ||
268 | lvtthmr_init = apic_read(APIC_LVTTHMR); | ||
269 | } | ||
270 | |||
257 | void intel_init_thermal(struct cpuinfo_x86 *c) | 271 | void intel_init_thermal(struct cpuinfo_x86 *c) |
258 | { | 272 | { |
259 | unsigned int cpu = smp_processor_id(); | 273 | unsigned int cpu = smp_processor_id(); |
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
270 | * since it might be delivered via SMI already: | 284 | * since it might be delivered via SMI already: |
271 | */ | 285 | */ |
272 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 286 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
273 | h = apic_read(APIC_LVTTHMR); | 287 | |
288 | /* | ||
289 | * The initial value of thermal LVT entries on all APs always reads | ||
290 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | ||
291 | * sequence to them and LVT registers are reset to 0s except for | ||
292 | * the mask bits which are set to 1s when APs receive INIT IPI. | ||
293 | * Always restore the value that BIOS has programmed on AP based on | ||
294 | * BSP's info we saved since BIOS is always setting the same value | ||
295 | * for all threads/cores | ||
296 | */ | ||
297 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
298 | |||
299 | h = lvtthmr_init; | ||
300 | |||
274 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 301 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
275 | printk(KERN_DEBUG | 302 | printk(KERN_DEBUG |
276 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 303 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..c1bbed1021d9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -77,6 +77,18 @@ struct cpu_hw_events { | |||
77 | struct debug_store *ds; | 77 | struct debug_store *ds; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct event_constraint { | ||
81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
82 | int code; | ||
83 | }; | ||
84 | |||
85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | ||
86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | ||
87 | |||
88 | #define for_each_event_constraint(e, c) \ | ||
89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | ||
90 | |||
91 | |||
80 | /* | 92 | /* |
81 | * struct x86_pmu - generic x86 pmu | 93 | * struct x86_pmu - generic x86 pmu |
82 | */ | 94 | */ |
@@ -102,6 +114,8 @@ struct x86_pmu { | |||
102 | u64 intel_ctrl; | 114 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | 115 | void (*enable_bts)(u64 config); |
104 | void (*disable_bts)(void); | 116 | void (*disable_bts)(void); |
117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | ||
118 | struct hw_perf_event *hwc); | ||
105 | }; | 119 | }; |
106 | 120 | ||
107 | static struct x86_pmu x86_pmu __read_mostly; | 121 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | |||
110 | .enabled = 1, | 124 | .enabled = 1, |
111 | }; | 125 | }; |
112 | 126 | ||
127 | static const struct event_constraint *event_constraints; | ||
128 | |||
113 | /* | 129 | /* |
114 | * Not sure about some of these | 130 | * Not sure about some of these |
115 | */ | 131 | */ |
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) | |||
155 | return hw_event & P6_EVNTSEL_MASK; | 171 | return hw_event & P6_EVNTSEL_MASK; |
156 | } | 172 | } |
157 | 173 | ||
174 | static const struct event_constraint intel_p6_event_constraints[] = | ||
175 | { | ||
176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
182 | EVENT_CONSTRAINT_END | ||
183 | }; | ||
158 | 184 | ||
159 | /* | 185 | /* |
160 | * Intel PerfMon v3. Used on Core2 and later. | 186 | * Intel PerfMon v3. Used on Core2 and later. |
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = | |||
170 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
171 | }; | 197 | }; |
172 | 198 | ||
199 | static const struct event_constraint intel_core_event_constraints[] = | ||
200 | { | ||
201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
210 | EVENT_CONSTRAINT_END | ||
211 | }; | ||
212 | |||
213 | static const struct event_constraint intel_nehalem_event_constraints[] = | ||
214 | { | ||
215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | ||
221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | ||
223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | ||
224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | ||
225 | EVENT_CONSTRAINT_END | ||
226 | }; | ||
227 | |||
173 | static u64 intel_pmu_event_map(int hw_event) | 228 | static u64 intel_pmu_event_map(int hw_event) |
174 | { | 229 | { |
175 | return intel_perfmon_event_map[hw_event]; | 230 | return intel_perfmon_event_map[hw_event]; |
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids | |||
190 | [PERF_COUNT_HW_CACHE_OP_MAX] | 245 | [PERF_COUNT_HW_CACHE_OP_MAX] |
191 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 246 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
192 | 247 | ||
193 | static const u64 nehalem_hw_cache_event_ids | 248 | static __initconst u64 nehalem_hw_cache_event_ids |
194 | [PERF_COUNT_HW_CACHE_MAX] | 249 | [PERF_COUNT_HW_CACHE_MAX] |
195 | [PERF_COUNT_HW_CACHE_OP_MAX] | 250 | [PERF_COUNT_HW_CACHE_OP_MAX] |
196 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 251 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids | |||
281 | }, | 336 | }, |
282 | }; | 337 | }; |
283 | 338 | ||
284 | static const u64 core2_hw_cache_event_ids | 339 | static __initconst u64 core2_hw_cache_event_ids |
285 | [PERF_COUNT_HW_CACHE_MAX] | 340 | [PERF_COUNT_HW_CACHE_MAX] |
286 | [PERF_COUNT_HW_CACHE_OP_MAX] | 341 | [PERF_COUNT_HW_CACHE_OP_MAX] |
287 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 342 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids | |||
372 | }, | 427 | }, |
373 | }; | 428 | }; |
374 | 429 | ||
375 | static const u64 atom_hw_cache_event_ids | 430 | static __initconst u64 atom_hw_cache_event_ids |
376 | [PERF_COUNT_HW_CACHE_MAX] | 431 | [PERF_COUNT_HW_CACHE_MAX] |
377 | [PERF_COUNT_HW_CACHE_OP_MAX] | 432 | [PERF_COUNT_HW_CACHE_OP_MAX] |
378 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 433 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
469 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | 524 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL |
470 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | 525 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL |
471 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | 526 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL |
472 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | 527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL |
473 | 528 | ||
474 | #define CORE_EVNTSEL_MASK \ | 529 | #define CORE_EVNTSEL_MASK \ |
475 | (CORE_EVNTSEL_EVENT_MASK | \ | 530 | (CORE_EVNTSEL_EVENT_MASK | \ |
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
481 | return hw_event & CORE_EVNTSEL_MASK; | 536 | return hw_event & CORE_EVNTSEL_MASK; |
482 | } | 537 | } |
483 | 538 | ||
484 | static const u64 amd_hw_cache_event_ids | 539 | static __initconst u64 amd_hw_cache_event_ids |
485 | [PERF_COUNT_HW_CACHE_MAX] | 540 | [PERF_COUNT_HW_CACHE_MAX] |
486 | [PERF_COUNT_HW_CACHE_OP_MAX] | 541 | [PERF_COUNT_HW_CACHE_OP_MAX] |
487 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 542 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
932 | */ | 987 | */ |
933 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | 988 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
934 | 989 | ||
990 | hwc->idx = -1; | ||
991 | |||
935 | /* | 992 | /* |
936 | * Count user and OS events unless requested not to. | 993 | * Count user and OS events unless requested not to. |
937 | */ | 994 | */ |
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | |||
1334 | x86_pmu_enable_event(hwc, idx); | 1391 | x86_pmu_enable_event(hwc, idx); |
1335 | } | 1392 | } |
1336 | 1393 | ||
1337 | static int | 1394 | static int fixed_mode_idx(struct hw_perf_event *hwc) |
1338 | fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | ||
1339 | { | 1395 | { |
1340 | unsigned int hw_event; | 1396 | unsigned int hw_event; |
1341 | 1397 | ||
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | |||
1349 | if (!x86_pmu.num_events_fixed) | 1405 | if (!x86_pmu.num_events_fixed) |
1350 | return -1; | 1406 | return -1; |
1351 | 1407 | ||
1408 | /* | ||
1409 | * fixed counters do not take all possible filters | ||
1410 | */ | ||
1411 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | ||
1412 | return -1; | ||
1413 | |||
1352 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1414 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1415 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
1354 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1416 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | |||
1360 | } | 1422 | } |
1361 | 1423 | ||
1362 | /* | 1424 | /* |
1363 | * Find a PMC slot for the freshly enabled / scheduled in event: | 1425 | * generic counter allocator: get next free counter |
1364 | */ | 1426 | */ |
1365 | static int x86_pmu_enable(struct perf_event *event) | 1427 | static int |
1428 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1429 | { | ||
1430 | int idx; | ||
1431 | |||
1432 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); | ||
1433 | return idx == x86_pmu.num_events ? -1 : idx; | ||
1434 | } | ||
1435 | |||
1436 | /* | ||
1437 | * intel-specific counter allocator: check event constraints | ||
1438 | */ | ||
1439 | static int | ||
1440 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1441 | { | ||
1442 | const struct event_constraint *event_constraint; | ||
1443 | int i, code; | ||
1444 | |||
1445 | if (!event_constraints) | ||
1446 | goto skip; | ||
1447 | |||
1448 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | ||
1449 | |||
1450 | for_each_event_constraint(event_constraint, event_constraints) { | ||
1451 | if (code == event_constraint->code) { | ||
1452 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | ||
1453 | if (!test_and_set_bit(i, cpuc->used_mask)) | ||
1454 | return i; | ||
1455 | } | ||
1456 | return -1; | ||
1457 | } | ||
1458 | } | ||
1459 | skip: | ||
1460 | return gen_get_event_idx(cpuc, hwc); | ||
1461 | } | ||
1462 | |||
1463 | static int | ||
1464 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1366 | { | 1465 | { |
1367 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1368 | struct hw_perf_event *hwc = &event->hw; | ||
1369 | int idx; | 1466 | int idx; |
1370 | 1467 | ||
1371 | idx = fixed_mode_idx(event, hwc); | 1468 | idx = fixed_mode_idx(hwc); |
1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { | 1469 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | 1470 | /* BTS is already occupied. */ |
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | 1471 | if (test_and_set_bit(idx, cpuc->used_mask)) |
1375 | return -EAGAIN; | 1472 | return -EAGAIN; |
1376 | 1473 | ||
1377 | hwc->config_base = 0; | 1474 | hwc->config_base = 0; |
1378 | hwc->event_base = 0; | 1475 | hwc->event_base = 0; |
1379 | hwc->idx = idx; | 1476 | hwc->idx = idx; |
1380 | } else if (idx >= 0) { | 1477 | } else if (idx >= 0) { |
1381 | /* | 1478 | /* |
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) | |||
1396 | } else { | 1493 | } else { |
1397 | idx = hwc->idx; | 1494 | idx = hwc->idx; |
1398 | /* Try to get the previous generic event again */ | 1495 | /* Try to get the previous generic event again */ |
1399 | if (test_and_set_bit(idx, cpuc->used_mask)) { | 1496 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { |
1400 | try_generic: | 1497 | try_generic: |
1401 | idx = find_first_zero_bit(cpuc->used_mask, | 1498 | idx = x86_pmu.get_event_idx(cpuc, hwc); |
1402 | x86_pmu.num_events); | 1499 | if (idx == -1) |
1403 | if (idx == x86_pmu.num_events) | ||
1404 | return -EAGAIN; | 1500 | return -EAGAIN; |
1405 | 1501 | ||
1406 | set_bit(idx, cpuc->used_mask); | 1502 | set_bit(idx, cpuc->used_mask); |
1407 | hwc->idx = idx; | 1503 | hwc->idx = idx; |
1408 | } | 1504 | } |
1409 | hwc->config_base = x86_pmu.eventsel; | 1505 | hwc->config_base = x86_pmu.eventsel; |
1410 | hwc->event_base = x86_pmu.perfctr; | 1506 | hwc->event_base = x86_pmu.perfctr; |
1411 | } | 1507 | } |
1412 | 1508 | ||
1509 | return idx; | ||
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1514 | */ | ||
1515 | static int x86_pmu_enable(struct perf_event *event) | ||
1516 | { | ||
1517 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1518 | struct hw_perf_event *hwc = &event->hw; | ||
1519 | int idx; | ||
1520 | |||
1521 | idx = x86_schedule_event(cpuc, hwc); | ||
1522 | if (idx < 0) | ||
1523 | return idx; | ||
1524 | |||
1413 | perf_events_lapic_init(); | 1525 | perf_events_lapic_init(); |
1414 | 1526 | ||
1415 | x86_pmu.disable(hwc, idx); | 1527 | x86_pmu.disable(hwc, idx); |
@@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
1852 | .priority = 1 | 1964 | .priority = 1 |
1853 | }; | 1965 | }; |
1854 | 1966 | ||
1855 | static struct x86_pmu p6_pmu = { | 1967 | static __initconst struct x86_pmu p6_pmu = { |
1856 | .name = "p6", | 1968 | .name = "p6", |
1857 | .handle_irq = p6_pmu_handle_irq, | 1969 | .handle_irq = p6_pmu_handle_irq, |
1858 | .disable_all = p6_pmu_disable_all, | 1970 | .disable_all = p6_pmu_disable_all, |
@@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = { | |||
1877 | */ | 1989 | */ |
1878 | .event_bits = 32, | 1990 | .event_bits = 32, |
1879 | .event_mask = (1ULL << 32) - 1, | 1991 | .event_mask = (1ULL << 32) - 1, |
1992 | .get_event_idx = intel_get_event_idx, | ||
1880 | }; | 1993 | }; |
1881 | 1994 | ||
1882 | static struct x86_pmu intel_pmu = { | 1995 | static __initconst struct x86_pmu intel_pmu = { |
1883 | .name = "Intel", | 1996 | .name = "Intel", |
1884 | .handle_irq = intel_pmu_handle_irq, | 1997 | .handle_irq = intel_pmu_handle_irq, |
1885 | .disable_all = intel_pmu_disable_all, | 1998 | .disable_all = intel_pmu_disable_all, |
@@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = { | |||
1900 | .max_period = (1ULL << 31) - 1, | 2013 | .max_period = (1ULL << 31) - 1, |
1901 | .enable_bts = intel_pmu_enable_bts, | 2014 | .enable_bts = intel_pmu_enable_bts, |
1902 | .disable_bts = intel_pmu_disable_bts, | 2015 | .disable_bts = intel_pmu_disable_bts, |
2016 | .get_event_idx = intel_get_event_idx, | ||
1903 | }; | 2017 | }; |
1904 | 2018 | ||
1905 | static struct x86_pmu amd_pmu = { | 2019 | static __initconst struct x86_pmu amd_pmu = { |
1906 | .name = "AMD", | 2020 | .name = "AMD", |
1907 | .handle_irq = amd_pmu_handle_irq, | 2021 | .handle_irq = amd_pmu_handle_irq, |
1908 | .disable_all = amd_pmu_disable_all, | 2022 | .disable_all = amd_pmu_disable_all, |
@@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = { | |||
1920 | .apic = 1, | 2034 | .apic = 1, |
1921 | /* use highest bit to detect overflow */ | 2035 | /* use highest bit to detect overflow */ |
1922 | .max_period = (1ULL << 47) - 1, | 2036 | .max_period = (1ULL << 47) - 1, |
2037 | .get_event_idx = gen_get_event_idx, | ||
1923 | }; | 2038 | }; |
1924 | 2039 | ||
1925 | static int p6_pmu_init(void) | 2040 | static __init int p6_pmu_init(void) |
1926 | { | 2041 | { |
1927 | switch (boot_cpu_data.x86_model) { | 2042 | switch (boot_cpu_data.x86_model) { |
1928 | case 1: | 2043 | case 1: |
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void) | |||
1932 | case 7: | 2047 | case 7: |
1933 | case 8: | 2048 | case 8: |
1934 | case 11: /* Pentium III */ | 2049 | case 11: /* Pentium III */ |
2050 | event_constraints = intel_p6_event_constraints; | ||
1935 | break; | 2051 | break; |
1936 | case 9: | 2052 | case 9: |
1937 | case 13: | 2053 | case 13: |
1938 | /* Pentium M */ | 2054 | /* Pentium M */ |
2055 | event_constraints = intel_p6_event_constraints; | ||
1939 | break; | 2056 | break; |
1940 | default: | 2057 | default: |
1941 | pr_cont("unsupported p6 CPU model %d ", | 2058 | pr_cont("unsupported p6 CPU model %d ", |
@@ -1954,7 +2071,7 @@ static int p6_pmu_init(void) | |||
1954 | return 0; | 2071 | return 0; |
1955 | } | 2072 | } |
1956 | 2073 | ||
1957 | static int intel_pmu_init(void) | 2074 | static __init int intel_pmu_init(void) |
1958 | { | 2075 | { |
1959 | union cpuid10_edx edx; | 2076 | union cpuid10_edx edx; |
1960 | union cpuid10_eax eax; | 2077 | union cpuid10_eax eax; |
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void) | |||
2007 | sizeof(hw_cache_event_ids)); | 2124 | sizeof(hw_cache_event_ids)); |
2008 | 2125 | ||
2009 | pr_cont("Core2 events, "); | 2126 | pr_cont("Core2 events, "); |
2127 | event_constraints = intel_core_event_constraints; | ||
2010 | break; | 2128 | break; |
2011 | default: | 2129 | default: |
2012 | case 26: | 2130 | case 26: |
2013 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 2131 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
2014 | sizeof(hw_cache_event_ids)); | 2132 | sizeof(hw_cache_event_ids)); |
2015 | 2133 | ||
2134 | event_constraints = intel_nehalem_event_constraints; | ||
2016 | pr_cont("Nehalem/Corei7 events, "); | 2135 | pr_cont("Nehalem/Corei7 events, "); |
2017 | break; | 2136 | break; |
2018 | case 28: | 2137 | case 28: |
@@ -2025,7 +2144,7 @@ static int intel_pmu_init(void) | |||
2025 | return 0; | 2144 | return 0; |
2026 | } | 2145 | } |
2027 | 2146 | ||
2028 | static int amd_pmu_init(void) | 2147 | static __init int amd_pmu_init(void) |
2029 | { | 2148 | { |
2030 | /* Performance-monitoring supported from K7 and later: */ | 2149 | /* Performance-monitoring supported from K7 and later: */ |
2031 | if (boot_cpu_data.x86 < 6) | 2150 | if (boot_cpu_data.x86 < 6) |
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = { | |||
2105 | .unthrottle = x86_pmu_unthrottle, | 2224 | .unthrottle = x86_pmu_unthrottle, |
2106 | }; | 2225 | }; |
2107 | 2226 | ||
2227 | static int | ||
2228 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2229 | { | ||
2230 | struct hw_perf_event fake_event = event->hw; | ||
2231 | |||
2232 | if (event->pmu && event->pmu != &pmu) | ||
2233 | return 0; | ||
2234 | |||
2235 | return x86_schedule_event(cpuc, &fake_event) >= 0; | ||
2236 | } | ||
2237 | |||
2238 | static int validate_group(struct perf_event *event) | ||
2239 | { | ||
2240 | struct perf_event *sibling, *leader = event->group_leader; | ||
2241 | struct cpu_hw_events fake_pmu; | ||
2242 | |||
2243 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | ||
2244 | |||
2245 | if (!validate_event(&fake_pmu, leader)) | ||
2246 | return -ENOSPC; | ||
2247 | |||
2248 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
2249 | if (!validate_event(&fake_pmu, sibling)) | ||
2250 | return -ENOSPC; | ||
2251 | } | ||
2252 | |||
2253 | if (!validate_event(&fake_pmu, event)) | ||
2254 | return -ENOSPC; | ||
2255 | |||
2256 | return 0; | ||
2257 | } | ||
2258 | |||
2108 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 2259 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2109 | { | 2260 | { |
2110 | int err; | 2261 | int err; |
2111 | 2262 | ||
2112 | err = __hw_perf_event_init(event); | 2263 | err = __hw_perf_event_init(event); |
2264 | if (!err) { | ||
2265 | if (event->group_leader != event) | ||
2266 | err = validate_group(event); | ||
2267 | } | ||
2113 | if (err) { | 2268 | if (err) { |
2114 | if (event->destroy) | 2269 | if (event->destroy) |
2115 | event->destroy(event); | 2270 | event->destroy(event); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 7d52e9da5e0c..50b9c220e121 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork) | |||
334 | END(ret_from_fork) | 334 | END(ret_from_fork) |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * Interrupt exit functions should be protected against kprobes | ||
338 | */ | ||
339 | .pushsection .kprobes.text, "ax" | ||
340 | /* | ||
337 | * Return to user mode is not as complex as all this looks, | 341 | * Return to user mode is not as complex as all this looks, |
338 | * but we want the default path for a system call return to | 342 | * but we want the default path for a system call return to |
339 | * go as quickly as possible which is why some of this is | 343 | * go as quickly as possible which is why some of this is |
@@ -383,6 +387,10 @@ need_resched: | |||
383 | END(resume_kernel) | 387 | END(resume_kernel) |
384 | #endif | 388 | #endif |
385 | CFI_ENDPROC | 389 | CFI_ENDPROC |
390 | /* | ||
391 | * End of kprobes section | ||
392 | */ | ||
393 | .popsection | ||
386 | 394 | ||
387 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 395 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
388 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 396 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
@@ -513,6 +521,10 @@ sysexit_audit: | |||
513 | PTGS_TO_GS_EX | 521 | PTGS_TO_GS_EX |
514 | ENDPROC(ia32_sysenter_target) | 522 | ENDPROC(ia32_sysenter_target) |
515 | 523 | ||
524 | /* | ||
525 | * syscall stub including irq exit should be protected against kprobes | ||
526 | */ | ||
527 | .pushsection .kprobes.text, "ax" | ||
516 | # system call handler stub | 528 | # system call handler stub |
517 | ENTRY(system_call) | 529 | ENTRY(system_call) |
518 | RING0_INT_FRAME # can't unwind into user space anyway | 530 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -705,6 +717,10 @@ syscall_badsys: | |||
705 | jmp resume_userspace | 717 | jmp resume_userspace |
706 | END(syscall_badsys) | 718 | END(syscall_badsys) |
707 | CFI_ENDPROC | 719 | CFI_ENDPROC |
720 | /* | ||
721 | * End of kprobes section | ||
722 | */ | ||
723 | .popsection | ||
708 | 724 | ||
709 | /* | 725 | /* |
710 | * System calls that need a pt_regs pointer. | 726 | * System calls that need a pt_regs pointer. |
@@ -814,6 +830,10 @@ common_interrupt: | |||
814 | ENDPROC(common_interrupt) | 830 | ENDPROC(common_interrupt) |
815 | CFI_ENDPROC | 831 | CFI_ENDPROC |
816 | 832 | ||
833 | /* | ||
834 | * Irq entries should be protected against kprobes | ||
835 | */ | ||
836 | .pushsection .kprobes.text, "ax" | ||
817 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 837 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
818 | ENTRY(name) \ | 838 | ENTRY(name) \ |
819 | RING0_INT_FRAME; \ | 839 | RING0_INT_FRAME; \ |
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) | |||
980 | jmp error_code | 1000 | jmp error_code |
981 | CFI_ENDPROC | 1001 | CFI_ENDPROC |
982 | END(spurious_interrupt_bug) | 1002 | END(spurious_interrupt_bug) |
1003 | /* | ||
1004 | * End of kprobes section | ||
1005 | */ | ||
1006 | .popsection | ||
983 | 1007 | ||
984 | ENTRY(kernel_thread_helper) | 1008 | ENTRY(kernel_thread_helper) |
985 | pushl $0 # fake return address for unwinder | 1009 | pushl $0 # fake return address for unwinder |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bd5bbddddf91..722df1b1152d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -803,6 +803,10 @@ END(interrupt) | |||
803 | call \func | 803 | call \func |
804 | .endm | 804 | .endm |
805 | 805 | ||
806 | /* | ||
807 | * Interrupt entry/exit should be protected against kprobes | ||
808 | */ | ||
809 | .pushsection .kprobes.text, "ax" | ||
806 | /* | 810 | /* |
807 | * The interrupt stubs push (~vector+0x80) onto the stack and | 811 | * The interrupt stubs push (~vector+0x80) onto the stack and |
808 | * then jump to common_interrupt. | 812 | * then jump to common_interrupt. |
@@ -941,6 +945,10 @@ ENTRY(retint_kernel) | |||
941 | 945 | ||
942 | CFI_ENDPROC | 946 | CFI_ENDPROC |
943 | END(common_interrupt) | 947 | END(common_interrupt) |
948 | /* | ||
949 | * End of kprobes section | ||
950 | */ | ||
951 | .popsection | ||
944 | 952 | ||
945 | /* | 953 | /* |
946 | * APIC interrupts. | 954 | * APIC interrupts. |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..d42f65ac4927 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
21 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
22 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
27 | * using the CPU's debug registers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/perf_event.h> | ||
31 | #include <linux/hw_breakpoint.h> | ||
32 | #include <linux/irqflags.h> | ||
33 | #include <linux/notifier.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/kprobes.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/sched.h> | ||
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | ||
43 | |||
44 | #include <asm/hw_breakpoint.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/debugreg.h> | ||
47 | |||
48 | /* Per cpu debug control register value */ | ||
49 | DEFINE_PER_CPU(unsigned long, cpu_dr7); | ||
50 | EXPORT_PER_CPU_SYMBOL(cpu_dr7); | ||
51 | |||
52 | /* Per cpu debug address registers values */ | ||
53 | static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); | ||
54 | |||
55 | /* | ||
56 | * Stores the breakpoints currently in use on each breakpoint address | ||
57 | * register for each cpus | ||
58 | */ | ||
59 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); | ||
60 | |||
61 | |||
62 | static inline unsigned long | ||
63 | __encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
64 | { | ||
65 | unsigned long bp_info; | ||
66 | |||
67 | bp_info = (len | type) & 0xf; | ||
68 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
69 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); | ||
70 | |||
71 | return bp_info; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
76 | * as stored in debug register 7. | ||
77 | */ | ||
78 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
79 | { | ||
80 | return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Decode the length and type bits for a particular breakpoint as | ||
85 | * stored in debug register 7. Return the "enabled" status. | ||
86 | */ | ||
87 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) | ||
88 | { | ||
89 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
90 | |||
91 | *len = (bp_info & 0xc) | 0x40; | ||
92 | *type = (bp_info & 0x3) | 0x80; | ||
93 | |||
94 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Install a perf counter breakpoint. | ||
99 | * | ||
100 | * We seek a free debug address register and use it for this | ||
101 | * breakpoint. Eventually we enable it in the debug control register. | ||
102 | * | ||
103 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
104 | * and registers local to this cpu. | ||
105 | */ | ||
106 | int arch_install_hw_breakpoint(struct perf_event *bp) | ||
107 | { | ||
108 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
109 | unsigned long *dr7; | ||
110 | int i; | ||
111 | |||
112 | for (i = 0; i < HBP_NUM; i++) { | ||
113 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
114 | |||
115 | if (!*slot) { | ||
116 | *slot = bp; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
122 | return -EBUSY; | ||
123 | |||
124 | set_debugreg(info->address, i); | ||
125 | __get_cpu_var(cpu_debugreg[i]) = info->address; | ||
126 | |||
127 | dr7 = &__get_cpu_var(cpu_dr7); | ||
128 | *dr7 |= encode_dr7(i, info->len, info->type); | ||
129 | |||
130 | set_debugreg(*dr7, 7); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Uninstall the breakpoint contained in the given counter. | ||
137 | * | ||
138 | * First we search the debug address register it uses and then we disable | ||
139 | * it. | ||
140 | * | ||
141 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
142 | * and registers local to this cpu. | ||
143 | */ | ||
144 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) | ||
145 | { | ||
146 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
147 | unsigned long *dr7; | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < HBP_NUM; i++) { | ||
151 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
152 | |||
153 | if (*slot == bp) { | ||
154 | *slot = NULL; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
160 | return; | ||
161 | |||
162 | dr7 = &__get_cpu_var(cpu_dr7); | ||
163 | *dr7 &= ~__encode_dr7(i, info->len, info->type); | ||
164 | |||
165 | set_debugreg(*dr7, 7); | ||
166 | } | ||
167 | |||
168 | static int get_hbp_len(u8 hbp_len) | ||
169 | { | ||
170 | unsigned int len_in_bytes = 0; | ||
171 | |||
172 | switch (hbp_len) { | ||
173 | case X86_BREAKPOINT_LEN_1: | ||
174 | len_in_bytes = 1; | ||
175 | break; | ||
176 | case X86_BREAKPOINT_LEN_2: | ||
177 | len_in_bytes = 2; | ||
178 | break; | ||
179 | case X86_BREAKPOINT_LEN_4: | ||
180 | len_in_bytes = 4; | ||
181 | break; | ||
182 | #ifdef CONFIG_X86_64 | ||
183 | case X86_BREAKPOINT_LEN_8: | ||
184 | len_in_bytes = 8; | ||
185 | break; | ||
186 | #endif | ||
187 | } | ||
188 | return len_in_bytes; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Check for virtual address in user space. | ||
193 | */ | ||
194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
195 | { | ||
196 | unsigned int len; | ||
197 | |||
198 | len = get_hbp_len(hbp_len); | ||
199 | |||
200 | return (va <= TASK_SIZE - len); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check for virtual address in kernel space. | ||
205 | */ | ||
206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
207 | { | ||
208 | unsigned int len; | ||
209 | |||
210 | len = get_hbp_len(hbp_len); | ||
211 | |||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Store a breakpoint's encoded address, length, and type. | ||
217 | */ | ||
218 | static int arch_store_info(struct perf_event *bp) | ||
219 | { | ||
220 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
221 | /* | ||
222 | * For kernel-addresses, either the address or symbol name can be | ||
223 | * specified. | ||
224 | */ | ||
225 | if (info->name) | ||
226 | info->address = (unsigned long) | ||
227 | kallsyms_lookup_name(info->name); | ||
228 | if (info->address) | ||
229 | return 0; | ||
230 | |||
231 | return -EINVAL; | ||
232 | } | ||
233 | |||
234 | int arch_bp_generic_fields(int x86_len, int x86_type, | ||
235 | int *gen_len, int *gen_type) | ||
236 | { | ||
237 | /* Len */ | ||
238 | switch (x86_len) { | ||
239 | case X86_BREAKPOINT_LEN_1: | ||
240 | *gen_len = HW_BREAKPOINT_LEN_1; | ||
241 | break; | ||
242 | case X86_BREAKPOINT_LEN_2: | ||
243 | *gen_len = HW_BREAKPOINT_LEN_2; | ||
244 | break; | ||
245 | case X86_BREAKPOINT_LEN_4: | ||
246 | *gen_len = HW_BREAKPOINT_LEN_4; | ||
247 | break; | ||
248 | #ifdef CONFIG_X86_64 | ||
249 | case X86_BREAKPOINT_LEN_8: | ||
250 | *gen_len = HW_BREAKPOINT_LEN_8; | ||
251 | break; | ||
252 | #endif | ||
253 | default: | ||
254 | return -EINVAL; | ||
255 | } | ||
256 | |||
257 | /* Type */ | ||
258 | switch (x86_type) { | ||
259 | case X86_BREAKPOINT_EXECUTE: | ||
260 | *gen_type = HW_BREAKPOINT_X; | ||
261 | break; | ||
262 | case X86_BREAKPOINT_WRITE: | ||
263 | *gen_type = HW_BREAKPOINT_W; | ||
264 | break; | ||
265 | case X86_BREAKPOINT_RW: | ||
266 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
267 | break; | ||
268 | default: | ||
269 | return -EINVAL; | ||
270 | } | ||
271 | |||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | |||
276 | static int arch_build_bp_info(struct perf_event *bp) | ||
277 | { | ||
278 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
279 | |||
280 | info->address = bp->attr.bp_addr; | ||
281 | |||
282 | /* Len */ | ||
283 | switch (bp->attr.bp_len) { | ||
284 | case HW_BREAKPOINT_LEN_1: | ||
285 | info->len = X86_BREAKPOINT_LEN_1; | ||
286 | break; | ||
287 | case HW_BREAKPOINT_LEN_2: | ||
288 | info->len = X86_BREAKPOINT_LEN_2; | ||
289 | break; | ||
290 | case HW_BREAKPOINT_LEN_4: | ||
291 | info->len = X86_BREAKPOINT_LEN_4; | ||
292 | break; | ||
293 | #ifdef CONFIG_X86_64 | ||
294 | case HW_BREAKPOINT_LEN_8: | ||
295 | info->len = X86_BREAKPOINT_LEN_8; | ||
296 | break; | ||
297 | #endif | ||
298 | default: | ||
299 | return -EINVAL; | ||
300 | } | ||
301 | |||
302 | /* Type */ | ||
303 | switch (bp->attr.bp_type) { | ||
304 | case HW_BREAKPOINT_W: | ||
305 | info->type = X86_BREAKPOINT_WRITE; | ||
306 | break; | ||
307 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
308 | info->type = X86_BREAKPOINT_RW; | ||
309 | break; | ||
310 | case HW_BREAKPOINT_X: | ||
311 | info->type = X86_BREAKPOINT_EXECUTE; | ||
312 | break; | ||
313 | default: | ||
314 | return -EINVAL; | ||
315 | } | ||
316 | |||
317 | return 0; | ||
318 | } | ||
319 | /* | ||
320 | * Validate the arch-specific HW Breakpoint register settings | ||
321 | */ | ||
322 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
323 | struct task_struct *tsk) | ||
324 | { | ||
325 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
326 | unsigned int align; | ||
327 | int ret; | ||
328 | |||
329 | |||
330 | ret = arch_build_bp_info(bp); | ||
331 | if (ret) | ||
332 | return ret; | ||
333 | |||
334 | ret = -EINVAL; | ||
335 | |||
336 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
337 | /* | ||
338 | * Ptrace-refactoring code | ||
339 | * For now, we'll allow instruction breakpoint only for user-space | ||
340 | * addresses | ||
341 | */ | ||
342 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
343 | info->len != X86_BREAKPOINT_EXECUTE) | ||
344 | return ret; | ||
345 | |||
346 | switch (info->len) { | ||
347 | case X86_BREAKPOINT_LEN_1: | ||
348 | align = 0; | ||
349 | break; | ||
350 | case X86_BREAKPOINT_LEN_2: | ||
351 | align = 1; | ||
352 | break; | ||
353 | case X86_BREAKPOINT_LEN_4: | ||
354 | align = 3; | ||
355 | break; | ||
356 | #ifdef CONFIG_X86_64 | ||
357 | case X86_BREAKPOINT_LEN_8: | ||
358 | align = 7; | ||
359 | break; | ||
360 | #endif | ||
361 | default: | ||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | if (bp->callback) | ||
366 | ret = arch_store_info(bp); | ||
367 | |||
368 | if (ret < 0) | ||
369 | return ret; | ||
370 | /* | ||
371 | * Check that the low-order bits of the address are appropriate | ||
372 | * for the alignment implied by len. | ||
373 | */ | ||
374 | if (info->address & align) | ||
375 | return -EINVAL; | ||
376 | |||
377 | /* Check that the virtual address is in the proper range */ | ||
378 | if (tsk) { | ||
379 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
380 | return -EFAULT; | ||
381 | } else { | ||
382 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
383 | return -EFAULT; | ||
384 | } | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * Dump the debug register contents to the user. | ||
391 | * We can't dump our per cpu values because it | ||
392 | * may contain cpu wide breakpoint, something that | ||
393 | * doesn't belong to the current task. | ||
394 | * | ||
395 | * TODO: include non-ptrace user breakpoints (perf) | ||
396 | */ | ||
397 | void aout_dump_debugregs(struct user *dump) | ||
398 | { | ||
399 | int i; | ||
400 | int dr7 = 0; | ||
401 | struct perf_event *bp; | ||
402 | struct arch_hw_breakpoint *info; | ||
403 | struct thread_struct *thread = ¤t->thread; | ||
404 | |||
405 | for (i = 0; i < HBP_NUM; i++) { | ||
406 | bp = thread->ptrace_bps[i]; | ||
407 | |||
408 | if (bp && !bp->attr.disabled) { | ||
409 | dump->u_debugreg[i] = bp->attr.bp_addr; | ||
410 | info = counter_arch_bp(bp); | ||
411 | dr7 |= encode_dr7(i, info->len, info->type); | ||
412 | } else { | ||
413 | dump->u_debugreg[i] = 0; | ||
414 | } | ||
415 | } | ||
416 | |||
417 | dump->u_debugreg[4] = 0; | ||
418 | dump->u_debugreg[5] = 0; | ||
419 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
420 | |||
421 | dump->u_debugreg[7] = dr7; | ||
422 | } | ||
423 | EXPORT_SYMBOL_GPL(aout_dump_debugregs); | ||
424 | |||
425 | /* | ||
426 | * Release the user breakpoints used by ptrace | ||
427 | */ | ||
428 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | ||
429 | { | ||
430 | int i; | ||
431 | struct thread_struct *t = &tsk->thread; | ||
432 | |||
433 | for (i = 0; i < HBP_NUM; i++) { | ||
434 | unregister_hw_breakpoint(t->ptrace_bps[i]); | ||
435 | t->ptrace_bps[i] = NULL; | ||
436 | } | ||
437 | } | ||
438 | |||
439 | void hw_breakpoint_restore(void) | ||
440 | { | ||
441 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | ||
442 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | ||
443 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | ||
444 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | ||
445 | set_debugreg(current->thread.debugreg6, 6); | ||
446 | set_debugreg(__get_cpu_var(cpu_dr7), 7); | ||
447 | } | ||
448 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | ||
449 | |||
450 | /* | ||
451 | * Handle debug exception notifications. | ||
452 | * | ||
453 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
454 | * | ||
455 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
456 | * i) When the causative address is from user-space and the exception | ||
457 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
458 | * switching | ||
459 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
460 | * as BD, BS or BT) indicating that more than one debug condition is | ||
461 | * met and requires some more action in do_debug(). | ||
462 | * | ||
463 | * NOTIFY_STOP returned for all other cases | ||
464 | * | ||
465 | */ | ||
466 | static int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
467 | { | ||
468 | int i, cpu, rc = NOTIFY_STOP; | ||
469 | struct perf_event *bp; | ||
470 | unsigned long dr7, dr6; | ||
471 | unsigned long *dr6_p; | ||
472 | |||
473 | /* The DR6 value is pointed by args->err */ | ||
474 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
475 | dr6 = *dr6_p; | ||
476 | |||
477 | /* Do an early return if no trap bits are set in DR6 */ | ||
478 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
479 | return NOTIFY_DONE; | ||
480 | |||
481 | get_debugreg(dr7, 7); | ||
482 | /* Disable breakpoints during exception handling */ | ||
483 | set_debugreg(0UL, 7); | ||
484 | /* | ||
485 | * Assert that local interrupts are disabled | ||
486 | * Reset the DRn bits in the virtualized register value. | ||
487 | * The ptrace trigger routine will add in whatever is needed. | ||
488 | */ | ||
489 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
490 | cpu = get_cpu(); | ||
491 | |||
492 | /* Handle all the breakpoints that were triggered */ | ||
493 | for (i = 0; i < HBP_NUM; ++i) { | ||
494 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
495 | continue; | ||
496 | |||
497 | /* | ||
498 | * The counter may be concurrently released but that can only | ||
499 | * occur from a call_rcu() path. We can then safely fetch | ||
500 | * the breakpoint, use its callback, touch its counter | ||
501 | * while we are in an rcu_read_lock() path. | ||
502 | */ | ||
503 | rcu_read_lock(); | ||
504 | |||
505 | bp = per_cpu(bp_per_reg[i], cpu); | ||
506 | if (bp) | ||
507 | rc = NOTIFY_DONE; | ||
508 | /* | ||
509 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
510 | * exception handling | ||
511 | */ | ||
512 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
513 | /* | ||
514 | * bp can be NULL due to lazy debug register switching | ||
515 | * or due to concurrent perf counter removing. | ||
516 | */ | ||
517 | if (!bp) { | ||
518 | rcu_read_unlock(); | ||
519 | break; | ||
520 | } | ||
521 | |||
522 | (bp->callback)(bp, args->regs); | ||
523 | |||
524 | rcu_read_unlock(); | ||
525 | } | ||
526 | if (dr6 & (~DR_TRAP_BITS)) | ||
527 | rc = NOTIFY_DONE; | ||
528 | |||
529 | set_debugreg(dr7, 7); | ||
530 | put_cpu(); | ||
531 | |||
532 | return rc; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Handle debug exception notifications. | ||
537 | */ | ||
538 | int __kprobes hw_breakpoint_exceptions_notify( | ||
539 | struct notifier_block *unused, unsigned long val, void *data) | ||
540 | { | ||
541 | if (val != DIE_DEBUG) | ||
542 | return NOTIFY_DONE; | ||
543 | |||
544 | return hw_breakpoint_handler(data); | ||
545 | } | ||
546 | |||
547 | void hw_breakpoint_pmu_read(struct perf_event *bp) | ||
548 | { | ||
549 | /* TODO */ | ||
550 | } | ||
551 | |||
552 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) | ||
553 | { | ||
554 | /* TODO */ | ||
555 | } | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd5278568..19212cb01558 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); | 92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); |
93 | seq_printf(p, " TLB shootdowns\n"); | 93 | seq_printf(p, " TLB shootdowns\n"); |
94 | #endif | 94 | #endif |
95 | #ifdef CONFIG_X86_MCE | 95 | #ifdef CONFIG_X86_THERMAL_VECTOR |
96 | seq_printf(p, "%*s: ", prec, "TRM"); | 96 | seq_printf(p, "%*s: ", prec, "TRM"); |
97 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
99 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
100 | # ifdef CONFIG_X86_MCE_THRESHOLD | 100 | #endif |
101 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
101 | seq_printf(p, "%*s: ", prec, "THR"); | 102 | seq_printf(p, "%*s: ", prec, "THR"); |
102 | for_each_online_cpu(j) | 103 | for_each_online_cpu(j) |
103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 104 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 105 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | ||
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
194 | sum += irq_stats(cpu)->irq_call_count; | 194 | sum += irq_stats(cpu)->irq_call_count; |
195 | sum += irq_stats(cpu)->irq_tlb_count; | 195 | sum += irq_stats(cpu)->irq_tlb_count; |
196 | #endif | 196 | #endif |
197 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_THERMAL_VECTOR |
198 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
199 | # ifdef CONFIG_X86_MCE_THRESHOLD | 199 | #endif |
200 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 201 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | ||
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..34e86b67550c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | 45 | ||
46 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 47 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 48 | #include <asm/system.h> |
48 | 49 | ||
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 435 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 436 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 437 | args->err, "c", "", regs); |
438 | /* | ||
439 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
440 | * denote completion of processing | ||
441 | */ | ||
442 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 443 | ||
438 | return NOTIFY_STOP; | 444 | return NOTIFY_STOP; |
439 | } | 445 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..3fe86d706a14 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,12 +48,15 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
51 | 52 | ||
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 55 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | ||
59 | #include <asm/debugreg.h> | ||
57 | 60 | ||
58 | void jprobe_return_end(void); | 61 | void jprobe_return_end(void); |
59 | 62 | ||
@@ -106,50 +109,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 109 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 111 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 112 | #undef W |
154 | 113 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 114 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -244,6 +203,75 @@ retry: | |||
244 | } | 203 | } |
245 | } | 204 | } |
246 | 205 | ||
206 | /* Recover the probed instruction at addr for further analysis. */ | ||
207 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
208 | { | ||
209 | struct kprobe *kp; | ||
210 | kp = get_kprobe((void *)addr); | ||
211 | if (!kp) | ||
212 | return -EINVAL; | ||
213 | |||
214 | /* | ||
215 | * Basically, kp->ainsn.insn has an original instruction. | ||
216 | * However, RIP-relative instruction can not do single-stepping | ||
217 | * at different place, fix_riprel() tweaks the displacement of | ||
218 | * that instruction. In that case, we can't recover the instruction | ||
219 | * from the kp->ainsn.insn. | ||
220 | * | ||
221 | * On the other hand, kp->opcode has a copy of the first byte of | ||
222 | * the probed instruction, which is overwritten by int3. And | ||
223 | * the instruction at kp->addr is not modified by kprobes except | ||
224 | * for the first byte, we can recover the original instruction | ||
225 | * from it and kp->opcode. | ||
226 | */ | ||
227 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
228 | buf[0] = kp->opcode; | ||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | /* Dummy buffers for kallsyms_lookup */ | ||
233 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
234 | |||
235 | /* Check if paddr is at an instruction boundary */ | ||
236 | static int __kprobes can_probe(unsigned long paddr) | ||
237 | { | ||
238 | int ret; | ||
239 | unsigned long addr, offset = 0; | ||
240 | struct insn insn; | ||
241 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
242 | |||
243 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
244 | return 0; | ||
245 | |||
246 | /* Decode instructions */ | ||
247 | addr = paddr - offset; | ||
248 | while (addr < paddr) { | ||
249 | kernel_insn_init(&insn, (void *)addr); | ||
250 | insn_get_opcode(&insn); | ||
251 | |||
252 | /* | ||
253 | * Check if the instruction has been modified by another | ||
254 | * kprobe, in which case we replace the breakpoint by the | ||
255 | * original instruction in our buffer. | ||
256 | */ | ||
257 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
258 | ret = recover_probed_instruction(buf, addr); | ||
259 | if (ret) | ||
260 | /* | ||
261 | * Another debugging subsystem might insert | ||
262 | * this breakpoint. In that case, we can't | ||
263 | * recover it. | ||
264 | */ | ||
265 | return 0; | ||
266 | kernel_insn_init(&insn, buf); | ||
267 | } | ||
268 | insn_get_length(&insn); | ||
269 | addr += insn.length; | ||
270 | } | ||
271 | |||
272 | return (addr == paddr); | ||
273 | } | ||
274 | |||
247 | /* | 275 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 276 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 277 | */ |
@@ -277,68 +305,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
277 | static void __kprobes fix_riprel(struct kprobe *p) | 305 | static void __kprobes fix_riprel(struct kprobe *p) |
278 | { | 306 | { |
279 | #ifdef CONFIG_X86_64 | 307 | #ifdef CONFIG_X86_64 |
280 | u8 *insn = p->ainsn.insn; | 308 | struct insn insn; |
281 | s64 disp; | 309 | kernel_insn_init(&insn, p->ainsn.insn); |
282 | int need_modrm; | ||
283 | |||
284 | /* Skip legacy instruction prefixes. */ | ||
285 | while (1) { | ||
286 | switch (*insn) { | ||
287 | case 0x66: | ||
288 | case 0x67: | ||
289 | case 0x2e: | ||
290 | case 0x3e: | ||
291 | case 0x26: | ||
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | ||
301 | break; | ||
302 | } | ||
303 | 310 | ||
304 | /* Skip REX instruction prefix. */ | 311 | if (insn_rip_relative(&insn)) { |
305 | if (is_REX_prefix(insn)) | 312 | s64 newdisp; |
306 | ++insn; | 313 | u8 *disp; |
307 | 314 | insn_get_displacement(&insn); | |
308 | if (*insn == 0x0f) { | 315 | /* |
309 | /* Two-byte opcode. */ | 316 | * The copied instruction uses the %rip-relative addressing |
310 | ++insn; | 317 | * mode. Adjust the displacement for the difference between |
311 | need_modrm = test_bit(*insn, | 318 | * the original location of this instruction and the location |
312 | (unsigned long *)twobyte_has_modrm); | 319 | * of the copy that will actually be run. The tricky bit here |
313 | } else | 320 | * is making sure that the sign extension happens correctly in |
314 | /* One-byte opcode. */ | 321 | * this calculation, since we need a signed 32-bit result to |
315 | need_modrm = test_bit(*insn, | 322 | * be sign-extended to 64 bits when it's added to the %rip |
316 | (unsigned long *)onebyte_has_modrm); | 323 | * value and yield the same 64-bit result that the sign- |
317 | 324 | * extension of the original signed 32-bit displacement would | |
318 | if (need_modrm) { | 325 | * have given. |
319 | u8 modrm = *++insn; | 326 | */ |
320 | if ((modrm & 0xc7) == 0x05) { | 327 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - |
321 | /* %rip+disp32 addressing mode */ | 328 | (u8 *) p->ainsn.insn; |
322 | /* Displacement follows ModRM byte. */ | 329 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
323 | ++insn; | 330 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); |
324 | /* | 331 | *(s32 *) disp = (s32) newdisp; |
325 | * The copied instruction uses the %rip-relative | ||
326 | * addressing mode. Adjust the displacement for the | ||
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 332 | } |
343 | #endif | 333 | #endif |
344 | } | 334 | } |
@@ -359,6 +349,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 349 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 350 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 351 | { |
352 | if (!can_probe((unsigned long)p->addr)) | ||
353 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 354 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 355 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 356 | if (!p->ainsn.insn) |
@@ -472,17 +464,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 464 | { |
473 | switch (kcb->kprobe_status) { | 465 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 466 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 467 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | 468 | save_previous_kprobe(kcb); |
488 | set_current_kprobe(p, regs, kcb); | 469 | set_current_kprobe(p, regs, kcb); |
@@ -491,18 +472,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
491 | kcb->kprobe_status = KPROBE_REENTER; | 472 | kcb->kprobe_status = KPROBE_REENTER; |
492 | break; | 473 | break; |
493 | case KPROBE_HIT_SS: | 474 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 475 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 476 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 477 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 478 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 479 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 480 | */ |
500 | * to, or just after, single-stepping of a probed | 481 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 482 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 483 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 484 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 485 | default: |
507 | /* impossible cases */ | 486 | /* impossible cases */ |
508 | WARN_ON(1); | 487 | WARN_ON(1); |
@@ -967,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 946 | ret = NOTIFY_STOP; |
968 | break; | 947 | break; |
969 | case DIE_DEBUG: | 948 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 949 | if (post_kprobe_handler(args->regs)) { |
950 | /* | ||
951 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
952 | * denote completion of processing | ||
953 | */ | ||
954 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 955 | ret = NOTIFY_STOP; |
956 | } | ||
972 | break; | 957 | break; |
973 | case DIE_GPF: | 958 | case DIE_GPF: |
974 | /* | 959 | /* |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..c843f8406da2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 203 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 204 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 205 | local_irq_disable(); |
206 | hw_breakpoint_disable(); | ||
205 | 207 | ||
206 | if (image->preserve_context) { | 208 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 209 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 23 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 24 | unsigned long addr) |
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 283 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 284 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 285 | local_irq_disable(); |
286 | hw_breakpoint_disable(); | ||
285 | 287 | ||
286 | if (image->preserve_context) { | 288 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 289 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..744508e7cfdd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <trace/events/power.h> | 12 | #include <trace/events/power.h> |
13 | #include <linux/hw_breakpoint.h> | ||
13 | #include <asm/system.h> | 14 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 16 | #include <asm/syscalls.h> |
@@ -17,6 +18,7 @@ | |||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 19 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 20 | #include <asm/ds.h> |
21 | #include <asm/debugreg.h> | ||
20 | 22 | ||
21 | unsigned long idle_halt; | 23 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 24 | EXPORT_SYMBOL(idle_halt); |
@@ -103,14 +105,7 @@ void flush_thread(void) | |||
103 | } | 105 | } |
104 | #endif | 106 | #endif |
105 | 107 | ||
106 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 108 | flush_ptrace_hw_breakpoint(tsk); |
107 | |||
108 | tsk->thread.debugreg0 = 0; | ||
109 | tsk->thread.debugreg1 = 0; | ||
110 | tsk->thread.debugreg2 = 0; | ||
111 | tsk->thread.debugreg3 = 0; | ||
112 | tsk->thread.debugreg6 = 0; | ||
113 | tsk->thread.debugreg7 = 0; | ||
114 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 109 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
115 | /* | 110 | /* |
116 | * Forget coprocessor state.. | 111 | * Forget coprocessor state.. |
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
192 | else if (next->debugctlmsr != prev->debugctlmsr) | 187 | else if (next->debugctlmsr != prev->debugctlmsr) |
193 | update_debugctlmsr(next->debugctlmsr); | 188 | update_debugctlmsr(next->debugctlmsr); |
194 | 189 | ||
195 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
196 | set_debugreg(next->debugreg0, 0); | ||
197 | set_debugreg(next->debugreg1, 1); | ||
198 | set_debugreg(next->debugreg2, 2); | ||
199 | set_debugreg(next->debugreg3, 3); | ||
200 | /* no 4 and 5 */ | ||
201 | set_debugreg(next->debugreg6, 6); | ||
202 | set_debugreg(next->debugreg7, 7); | ||
203 | } | ||
204 | |||
205 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 190 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
206 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 191 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
207 | /* prev and next are different */ | 192 | /* prev and next are different */ |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..d5bd3132ee70 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/idle.h> | 58 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 59 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 60 | #include <asm/ds.h> |
61 | #include <asm/debugreg.h> | ||
61 | 62 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 64 | ||
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
259 | 260 | ||
260 | task_user_gs(p) = get_user_gs(regs); | 261 | task_user_gs(p) = get_user_gs(regs); |
261 | 262 | ||
263 | p->thread.io_bitmap_ptr = NULL; | ||
262 | tsk = current; | 264 | tsk = current; |
265 | err = -ENOMEM; | ||
266 | |||
267 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
268 | |||
263 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 269 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
264 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 270 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
265 | IO_BITMAP_BYTES, GFP_KERNEL); | 271 | IO_BITMAP_BYTES, GFP_KERNEL); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa490..70cf15873f3d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/idle.h> | 52 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 53 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 54 | #include <asm/ds.h> |
55 | #include <asm/debugreg.h> | ||
55 | 56 | ||
56 | asmlinkage extern void ret_from_fork(void); | 57 | asmlinkage extern void ret_from_fork(void); |
57 | 58 | ||
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
297 | 298 | ||
298 | p->thread.fs = me->thread.fs; | 299 | p->thread.fs = me->thread.fs; |
299 | p->thread.gs = me->thread.gs; | 300 | p->thread.gs = me->thread.gs; |
301 | p->thread.io_bitmap_ptr = NULL; | ||
300 | 302 | ||
301 | savesegment(gs, p->thread.gsindex); | 303 | savesegment(gs, p->thread.gsindex); |
302 | savesegment(fs, p->thread.fsindex); | 304 | savesegment(fs, p->thread.fsindex); |
303 | savesegment(es, p->thread.es); | 305 | savesegment(es, p->thread.es); |
304 | savesegment(ds, p->thread.ds); | 306 | savesegment(ds, p->thread.ds); |
305 | 307 | ||
308 | err = -ENOMEM; | ||
309 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
310 | |||
306 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 311 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
307 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 312 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
308 | if (!p->thread.io_bitmap_ptr) { | 313 | if (!p->thread.io_bitmap_ptr) { |
@@ -341,6 +346,7 @@ out: | |||
341 | kfree(p->thread.io_bitmap_ptr); | 346 | kfree(p->thread.io_bitmap_ptr); |
342 | p->thread.io_bitmap_max = 0; | 347 | p->thread.io_bitmap_max = 0; |
343 | } | 348 | } |
349 | |||
344 | return err; | 350 | return err; |
345 | } | 351 | } |
346 | 352 | ||
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
495 | */ | 501 | */ |
496 | if (preload_fpu) | 502 | if (preload_fpu) |
497 | __math_state_restore(); | 503 | __math_state_restore(); |
504 | |||
498 | return prev_p; | 505 | return prev_p; |
499 | } | 506 | } |
500 | 507 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66a..04d182a7cfdb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/prctl.h> | 36 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 38 | #include <asm/ds.h> |
39 | #include <asm/hw_breakpoint.h> | ||
37 | 40 | ||
38 | #include "tls.h" | 41 | #include "tls.h" |
39 | 42 | ||
@@ -49,6 +52,118 @@ enum x86_regset { | |||
49 | REGSET_IOPERM32, | 52 | REGSET_IOPERM32, |
50 | }; | 53 | }; |
51 | 54 | ||
55 | struct pt_regs_offset { | ||
56 | const char *name; | ||
57 | int offset; | ||
58 | }; | ||
59 | |||
60 | #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} | ||
61 | #define REG_OFFSET_END {.name = NULL, .offset = 0} | ||
62 | |||
63 | static const struct pt_regs_offset regoffset_table[] = { | ||
64 | #ifdef CONFIG_X86_64 | ||
65 | REG_OFFSET_NAME(r15), | ||
66 | REG_OFFSET_NAME(r14), | ||
67 | REG_OFFSET_NAME(r13), | ||
68 | REG_OFFSET_NAME(r12), | ||
69 | REG_OFFSET_NAME(r11), | ||
70 | REG_OFFSET_NAME(r10), | ||
71 | REG_OFFSET_NAME(r9), | ||
72 | REG_OFFSET_NAME(r8), | ||
73 | #endif | ||
74 | REG_OFFSET_NAME(bx), | ||
75 | REG_OFFSET_NAME(cx), | ||
76 | REG_OFFSET_NAME(dx), | ||
77 | REG_OFFSET_NAME(si), | ||
78 | REG_OFFSET_NAME(di), | ||
79 | REG_OFFSET_NAME(bp), | ||
80 | REG_OFFSET_NAME(ax), | ||
81 | #ifdef CONFIG_X86_32 | ||
82 | REG_OFFSET_NAME(ds), | ||
83 | REG_OFFSET_NAME(es), | ||
84 | REG_OFFSET_NAME(fs), | ||
85 | REG_OFFSET_NAME(gs), | ||
86 | #endif | ||
87 | REG_OFFSET_NAME(orig_ax), | ||
88 | REG_OFFSET_NAME(ip), | ||
89 | REG_OFFSET_NAME(cs), | ||
90 | REG_OFFSET_NAME(flags), | ||
91 | REG_OFFSET_NAME(sp), | ||
92 | REG_OFFSET_NAME(ss), | ||
93 | REG_OFFSET_END, | ||
94 | }; | ||
95 | |||
96 | /** | ||
97 | * regs_query_register_offset() - query register offset from its name | ||
98 | * @name: the name of a register | ||
99 | * | ||
100 | * regs_query_register_offset() returns the offset of a register in struct | ||
101 | * pt_regs from its name. If the name is invalid, this returns -EINVAL; | ||
102 | */ | ||
103 | int regs_query_register_offset(const char *name) | ||
104 | { | ||
105 | const struct pt_regs_offset *roff; | ||
106 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
107 | if (!strcmp(roff->name, name)) | ||
108 | return roff->offset; | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * regs_query_register_name() - query register name from its offset | ||
114 | * @offset: the offset of a register in struct pt_regs. | ||
115 | * | ||
116 | * regs_query_register_name() returns the name of a register from its | ||
117 | * offset in struct pt_regs. If the @offset is invalid, this returns NULL; | ||
118 | */ | ||
119 | const char *regs_query_register_name(unsigned int offset) | ||
120 | { | ||
121 | const struct pt_regs_offset *roff; | ||
122 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
123 | if (roff->offset == offset) | ||
124 | return roff->name; | ||
125 | return NULL; | ||
126 | } | ||
127 | |||
128 | static const int arg_offs_table[] = { | ||
129 | #ifdef CONFIG_X86_32 | ||
130 | [0] = offsetof(struct pt_regs, ax), | ||
131 | [1] = offsetof(struct pt_regs, dx), | ||
132 | [2] = offsetof(struct pt_regs, cx) | ||
133 | #else /* CONFIG_X86_64 */ | ||
134 | [0] = offsetof(struct pt_regs, di), | ||
135 | [1] = offsetof(struct pt_regs, si), | ||
136 | [2] = offsetof(struct pt_regs, dx), | ||
137 | [3] = offsetof(struct pt_regs, cx), | ||
138 | [4] = offsetof(struct pt_regs, r8), | ||
139 | [5] = offsetof(struct pt_regs, r9) | ||
140 | #endif | ||
141 | }; | ||
142 | |||
143 | /** | ||
144 | * regs_get_argument_nth() - get Nth argument at function call | ||
145 | * @regs: pt_regs which contains registers at function entry. | ||
146 | * @n: argument number. | ||
147 | * | ||
148 | * regs_get_argument_nth() returns @n th argument of a function call. | ||
149 | * Since usually the kernel stack will be changed right after function entry, | ||
150 | * you must use this at function entry. If the @n th entry is NOT in the | ||
151 | * kernel stack or pt_regs, this returns 0. | ||
152 | */ | ||
153 | unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) | ||
154 | { | ||
155 | if (n < ARRAY_SIZE(arg_offs_table)) | ||
156 | return *(unsigned long *)((char *)regs + arg_offs_table[n]); | ||
157 | else { | ||
158 | /* | ||
159 | * The typical case: arg n is on the stack. | ||
160 | * (Note: stack[0] = return address, so skip it) | ||
161 | */ | ||
162 | n -= ARRAY_SIZE(arg_offs_table); | ||
163 | return regs_get_kernel_stack_nth(regs, 1 + n); | ||
164 | } | ||
165 | } | ||
166 | |||
52 | /* | 167 | /* |
53 | * does not yet catch signals sent when the child dies. | 168 | * does not yet catch signals sent when the child dies. |
54 | * in exit.c or in signal.c. | 169 | * in exit.c or in signal.c. |
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, | |||
137 | return 0; | 252 | return 0; |
138 | } | 253 | } |
139 | 254 | ||
140 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
141 | { | ||
142 | return TASK_SIZE - 3; | ||
143 | } | ||
144 | |||
145 | #else /* CONFIG_X86_64 */ | 255 | #else /* CONFIG_X86_64 */ |
146 | 256 | ||
147 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 257 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, | |||
266 | return 0; | 376 | return 0; |
267 | } | 377 | } |
268 | 378 | ||
269 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
270 | { | ||
271 | #ifdef CONFIG_IA32_EMULATION | ||
272 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
273 | return IA32_PAGE_OFFSET - 3; | ||
274 | #endif | ||
275 | return TASK_SIZE_MAX - 7; | ||
276 | } | ||
277 | |||
278 | #endif /* CONFIG_X86_32 */ | 379 | #endif /* CONFIG_X86_32 */ |
279 | 380 | ||
280 | static unsigned long get_flags(struct task_struct *task) | 381 | static unsigned long get_flags(struct task_struct *task) |
@@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target, | |||
454 | return ret; | 555 | return ret; |
455 | } | 556 | } |
456 | 557 | ||
558 | static void ptrace_triggered(struct perf_event *bp, void *data) | ||
559 | { | ||
560 | int i; | ||
561 | struct thread_struct *thread = &(current->thread); | ||
562 | |||
563 | /* | ||
564 | * Store in the virtual DR6 register the fact that the breakpoint | ||
565 | * was hit so the thread's debugger will see it. | ||
566 | */ | ||
567 | for (i = 0; i < HBP_NUM; i++) { | ||
568 | if (thread->ptrace_bps[i] == bp) | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
573 | } | ||
574 | |||
457 | /* | 575 | /* |
458 | * This function is trivial and will be inlined by the compiler. | 576 | * Walk through every ptrace breakpoints for this thread and |
459 | * Having it separates the implementation details of debug | 577 | * build the dr7 value on top of their attributes. |
460 | * registers from the interface details of ptrace. | 578 | * |
461 | */ | 579 | */ |
462 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 580 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
463 | { | 581 | { |
464 | switch (n) { | 582 | int i; |
465 | case 0: return child->thread.debugreg0; | 583 | int dr7 = 0; |
466 | case 1: return child->thread.debugreg1; | 584 | struct arch_hw_breakpoint *info; |
467 | case 2: return child->thread.debugreg2; | 585 | |
468 | case 3: return child->thread.debugreg3; | 586 | for (i = 0; i < HBP_NUM; i++) { |
469 | case 6: return child->thread.debugreg6; | 587 | if (bp[i] && !bp[i]->attr.disabled) { |
470 | case 7: return child->thread.debugreg7; | 588 | info = counter_arch_bp(bp[i]); |
589 | dr7 |= encode_dr7(i, info->len, info->type); | ||
590 | } | ||
471 | } | 591 | } |
472 | return 0; | 592 | |
593 | return dr7; | ||
473 | } | 594 | } |
474 | 595 | ||
475 | static int ptrace_set_debugreg(struct task_struct *child, | 596 | static struct perf_event * |
476 | int n, unsigned long data) | 597 | ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, |
598 | struct task_struct *tsk, int disabled) | ||
477 | { | 599 | { |
478 | int i; | 600 | int err; |
601 | int gen_len, gen_type; | ||
602 | DEFINE_BREAKPOINT_ATTR(attr); | ||
479 | 603 | ||
480 | if (unlikely(n == 4 || n == 5)) | 604 | /* |
481 | return -EIO; | 605 | * We shoud have at least an inactive breakpoint at this |
606 | * slot. It means the user is writing dr7 without having | ||
607 | * written the address register first | ||
608 | */ | ||
609 | if (!bp) | ||
610 | return ERR_PTR(-EINVAL); | ||
482 | 611 | ||
483 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 612 | err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
484 | return -EIO; | 613 | if (err) |
614 | return ERR_PTR(err); | ||
485 | 615 | ||
486 | switch (n) { | 616 | attr = bp->attr; |
487 | case 0: child->thread.debugreg0 = data; break; | 617 | attr.bp_len = gen_len; |
488 | case 1: child->thread.debugreg1 = data; break; | 618 | attr.bp_type = gen_type; |
489 | case 2: child->thread.debugreg2 = data; break; | 619 | attr.disabled = disabled; |
490 | case 3: child->thread.debugreg3 = data; break; | ||
491 | 620 | ||
492 | case 6: | 621 | return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); |
493 | if ((data & ~0xffffffffUL) != 0) | 622 | } |
494 | return -EIO; | 623 | |
495 | child->thread.debugreg6 = data; | 624 | /* |
496 | break; | 625 | * Handle ptrace writes to debug register 7. |
626 | */ | ||
627 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
628 | { | ||
629 | struct thread_struct *thread = &(tsk->thread); | ||
630 | unsigned long old_dr7; | ||
631 | int i, orig_ret = 0, rc = 0; | ||
632 | int enabled, second_pass = 0; | ||
633 | unsigned len, type; | ||
634 | struct perf_event *bp; | ||
635 | |||
636 | data &= ~DR_CONTROL_RESERVED; | ||
637 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
638 | restore: | ||
639 | /* | ||
640 | * Loop through all the hardware breakpoints, making the | ||
641 | * appropriate changes to each. | ||
642 | */ | ||
643 | for (i = 0; i < HBP_NUM; i++) { | ||
644 | enabled = decode_dr7(data, i, &len, &type); | ||
645 | bp = thread->ptrace_bps[i]; | ||
646 | |||
647 | if (!enabled) { | ||
648 | if (bp) { | ||
649 | /* | ||
650 | * Don't unregister the breakpoints right-away, | ||
651 | * unless all register_user_hw_breakpoint() | ||
652 | * requests have succeeded. This prevents | ||
653 | * any window of opportunity for debug | ||
654 | * register grabbing by other users. | ||
655 | */ | ||
656 | if (!second_pass) | ||
657 | continue; | ||
658 | |||
659 | thread->ptrace_bps[i] = NULL; | ||
660 | bp = ptrace_modify_breakpoint(bp, len, type, | ||
661 | tsk, 1); | ||
662 | if (IS_ERR(bp)) { | ||
663 | rc = PTR_ERR(bp); | ||
664 | thread->ptrace_bps[i] = NULL; | ||
665 | break; | ||
666 | } | ||
667 | thread->ptrace_bps[i] = bp; | ||
668 | } | ||
669 | continue; | ||
670 | } | ||
671 | |||
672 | bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); | ||
673 | |||
674 | /* Incorrect bp, or we have a bug in bp API */ | ||
675 | if (IS_ERR(bp)) { | ||
676 | rc = PTR_ERR(bp); | ||
677 | thread->ptrace_bps[i] = NULL; | ||
678 | break; | ||
679 | } | ||
680 | thread->ptrace_bps[i] = bp; | ||
681 | } | ||
682 | /* | ||
683 | * Make a second pass to free the remaining unused breakpoints | ||
684 | * or to restore the original breakpoints if an error occurred. | ||
685 | */ | ||
686 | if (!second_pass) { | ||
687 | second_pass = 1; | ||
688 | if (rc < 0) { | ||
689 | orig_ret = rc; | ||
690 | data = old_dr7; | ||
691 | } | ||
692 | goto restore; | ||
693 | } | ||
694 | return ((orig_ret < 0) ? orig_ret : rc); | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
699 | */ | ||
700 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
701 | { | ||
702 | struct thread_struct *thread = &(tsk->thread); | ||
703 | unsigned long val = 0; | ||
497 | 704 | ||
498 | case 7: | 705 | if (n < HBP_NUM) { |
706 | struct perf_event *bp; | ||
707 | bp = thread->ptrace_bps[n]; | ||
708 | if (!bp) | ||
709 | return 0; | ||
710 | val = bp->hw.info.address; | ||
711 | } else if (n == 6) { | ||
712 | val = thread->debugreg6; | ||
713 | } else if (n == 7) { | ||
714 | val = ptrace_get_dr7(thread->ptrace_bps); | ||
715 | } | ||
716 | return val; | ||
717 | } | ||
718 | |||
719 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
720 | unsigned long addr) | ||
721 | { | ||
722 | struct perf_event *bp; | ||
723 | struct thread_struct *t = &tsk->thread; | ||
724 | DEFINE_BREAKPOINT_ATTR(attr); | ||
725 | |||
726 | if (!t->ptrace_bps[nr]) { | ||
499 | /* | 727 | /* |
500 | * Sanity-check data. Take one half-byte at once with | 728 | * Put stub len and type to register (reserve) an inactive but |
501 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 729 | * correct bp |
502 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
503 | * 2 and 3 are LENi. Given a list of invalid values, | ||
504 | * we do mask |= 1 << invalid_value, so that | ||
505 | * (mask >> check) & 1 is a correct test for invalid | ||
506 | * values. | ||
507 | * | ||
508 | * R/Wi contains the type of the breakpoint / | ||
509 | * watchpoint, LENi contains the length of the watched | ||
510 | * data in the watchpoint case. | ||
511 | * | ||
512 | * The invalid values are: | ||
513 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
514 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
515 | * mask |= 0x4444. | ||
516 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
517 | * 0x1110. | ||
518 | * | ||
519 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
520 | * | ||
521 | * See the Intel Manual "System Programming Guide", | ||
522 | * 15.2.4 | ||
523 | * | ||
524 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
525 | * mode (i.e. even for 32-bit userspace software, but | ||
526 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
527 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
528 | */ | 730 | */ |
529 | #ifdef CONFIG_X86_32 | 731 | attr.bp_addr = addr; |
530 | #define DR7_MASK 0x5f54 | 732 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
531 | #else | 733 | attr.bp_type = HW_BREAKPOINT_W; |
532 | #define DR7_MASK 0x5554 | 734 | attr.disabled = 1; |
533 | #endif | 735 | |
534 | data &= ~DR_CONTROL_RESERVED; | 736 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); |
535 | for (i = 0; i < 4; i++) | 737 | } else { |
536 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 738 | bp = t->ptrace_bps[nr]; |
537 | return -EIO; | 739 | t->ptrace_bps[nr] = NULL; |
538 | child->thread.debugreg7 = data; | 740 | |
539 | if (data) | 741 | attr = bp->attr; |
540 | set_tsk_thread_flag(child, TIF_DEBUG); | 742 | attr.bp_addr = addr; |
541 | else | 743 | bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); |
542 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
543 | break; | ||
544 | } | 744 | } |
745 | /* | ||
746 | * CHECKME: the previous code returned -EIO if the addr wasn't a | ||
747 | * valid task virtual addr. The new one will return -EINVAL in this | ||
748 | * case. | ||
749 | * -EINVAL may be what we want for in-kernel breakpoints users, but | ||
750 | * -EIO looks better for ptrace, since we refuse a register writing | ||
751 | * for the user. And anyway this is the previous behaviour. | ||
752 | */ | ||
753 | if (IS_ERR(bp)) | ||
754 | return PTR_ERR(bp); | ||
755 | |||
756 | t->ptrace_bps[nr] = bp; | ||
545 | 757 | ||
546 | return 0; | 758 | return 0; |
547 | } | 759 | } |
548 | 760 | ||
549 | /* | 761 | /* |
762 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
763 | */ | ||
764 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
765 | { | ||
766 | struct thread_struct *thread = &(tsk->thread); | ||
767 | int rc = 0; | ||
768 | |||
769 | /* There are no DR4 or DR5 registers */ | ||
770 | if (n == 4 || n == 5) | ||
771 | return -EIO; | ||
772 | |||
773 | if (n == 6) { | ||
774 | thread->debugreg6 = val; | ||
775 | goto ret_path; | ||
776 | } | ||
777 | if (n < HBP_NUM) { | ||
778 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
779 | if (rc) | ||
780 | return rc; | ||
781 | } | ||
782 | /* All that's left is DR7 */ | ||
783 | if (n == 7) | ||
784 | rc = ptrace_write_dr7(tsk, val); | ||
785 | |||
786 | ret_path: | ||
787 | return rc; | ||
788 | } | ||
789 | |||
790 | /* | ||
550 | * These access the current or another (stopped) task's io permission | 791 | * These access the current or another (stopped) task's io permission |
551 | * bitmap for debugging or core dump. | 792 | * bitmap for debugging or core dump. |
552 | */ | 793 | */ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be21..c0ca8f921c91 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -109,6 +109,7 @@ | |||
109 | #ifdef CONFIG_X86_64 | 109 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 110 | #include <asm/numa_64.h> |
111 | #endif | 111 | #endif |
112 | #include <asm/mce.h> | ||
112 | 113 | ||
113 | /* | 114 | /* |
114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 115 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p) | |||
1031 | #endif | 1032 | #endif |
1032 | #endif | 1033 | #endif |
1033 | x86_init.oem.banner(); | 1034 | x86_init.oem.banner(); |
1035 | |||
1036 | mcheck_init(); | ||
1034 | } | 1037 | } |
1035 | 1038 | ||
1036 | #ifdef CONFIG_X86_32 | 1039 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..fbf3b07c8567 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 799 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 801 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 802 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 803 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 804 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc3..33399176512a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
530 | { | 530 | { |
531 | struct task_struct *tsk = current; | 531 | struct task_struct *tsk = current; |
532 | unsigned long condition; | 532 | unsigned long dr6; |
533 | int si_code; | 533 | int si_code; |
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(dr6, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | 537 | /* Catch kmemcheck conditions first of all! */ |
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 538 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 539 | return; |
540 | 540 | ||
541 | /* DR6 may or may not be cleared by the CPU */ | ||
542 | set_debugreg(0, 6); | ||
541 | /* | 543 | /* |
542 | * The processor cleared BTF, so don't mark that we need it set. | 544 | * The processor cleared BTF, so don't mark that we need it set. |
543 | */ | 545 | */ |
544 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 546 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
545 | tsk->thread.debugctlmsr = 0; | 547 | tsk->thread.debugctlmsr = 0; |
546 | 548 | ||
547 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 549 | /* Store the virtualized DR6 value */ |
548 | SIGTRAP) == NOTIFY_STOP) | 550 | tsk->thread.debugreg6 = dr6; |
551 | |||
552 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
553 | SIGTRAP) == NOTIFY_STOP) | ||
549 | return; | 554 | return; |
550 | 555 | ||
551 | /* It's safe to allow irq's after DR6 has been saved */ | 556 | /* It's safe to allow irq's after DR6 has been saved */ |
552 | preempt_conditional_sti(regs); | 557 | preempt_conditional_sti(regs); |
553 | 558 | ||
554 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 559 | if (regs->flags & X86_VM_MASK) { |
555 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 560 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
556 | if (!tsk->thread.debugreg7) | 561 | error_code, 1); |
557 | goto clear_dr7; | 562 | return; |
558 | } | 563 | } |
559 | 564 | ||
560 | #ifdef CONFIG_X86_32 | ||
561 | if (regs->flags & X86_VM_MASK) | ||
562 | goto debug_vm86; | ||
563 | #endif | ||
564 | |||
565 | /* Save debug status register where ptrace can see it */ | ||
566 | tsk->thread.debugreg6 = condition; | ||
567 | |||
568 | /* | 565 | /* |
569 | * Single-stepping through TF: make sure we ignore any events in | 566 | * Single-stepping through system calls: ignore any exceptions in |
570 | * kernel space (but re-enable TF when returning to user mode). | 567 | * kernel space, but re-enable TF when returning to user mode. |
568 | * | ||
569 | * We already checked v86 mode above, so we can check for kernel mode | ||
570 | * by just checking the CPL of CS. | ||
571 | */ | 571 | */ |
572 | if (condition & DR_STEP) { | 572 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
573 | if (!user_mode(regs)) | 573 | tsk->thread.debugreg6 &= ~DR_STEP; |
574 | goto clear_TF_reenable; | 574 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
575 | regs->flags &= ~X86_EFLAGS_TF; | ||
575 | } | 576 | } |
576 | 577 | si_code = get_si_code(tsk->thread.debugreg6); | |
577 | si_code = get_si_code(condition); | 578 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
578 | /* Ok, finally something we can handle */ | 579 | send_sigtrap(tsk, regs, error_code, si_code); |
579 | send_sigtrap(tsk, regs, error_code, si_code); | ||
580 | |||
581 | /* | ||
582 | * Disable additional traps. They'll be re-enabled when | ||
583 | * the signal is delivered. | ||
584 | */ | ||
585 | clear_dr7: | ||
586 | set_debugreg(0, 7); | ||
587 | preempt_conditional_cli(regs); | 580 | preempt_conditional_cli(regs); |
588 | return; | ||
589 | 581 | ||
590 | #ifdef CONFIG_X86_32 | ||
591 | debug_vm86: | ||
592 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
593 | dec_preempt_count(); | ||
594 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
595 | conditional_cli(regs); | ||
596 | return; | ||
597 | #endif | ||
598 | |||
599 | clear_TF_reenable: | ||
600 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
601 | regs->flags &= ~X86_EFLAGS_TF; | ||
602 | preempt_conditional_cli(regs); | ||
603 | return; | 582 | return; |
604 | } | 583 | } |
605 | 584 | ||