diff options
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 89 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 20 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_ibs.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 7 |
5 files changed, 114 insertions, 17 deletions
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index dab39350e51e..cb4e43bce98a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -196,11 +196,16 @@ static inline u32 get_ibs_caps(void) { return 0; } | |||
196 | extern void perf_events_lapic_init(void); | 196 | extern void perf_events_lapic_init(void); |
197 | 197 | ||
198 | /* | 198 | /* |
199 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | 199 | * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise |
200 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | 200 | * unused and ABI specified to be 0, so nobody should care what we do with |
201 | * care what we do with it. | 201 | * them. |
202 | * | ||
203 | * EXACT - the IP points to the exact instruction that triggered the | ||
204 | * event (HW bugs exempt). | ||
205 | * VM - original X86_VM_MASK; see set_linear_ip(). | ||
202 | */ | 206 | */ |
203 | #define PERF_EFLAGS_EXACT (1UL << 3) | 207 | #define PERF_EFLAGS_EXACT (1UL << 3) |
208 | #define PERF_EFLAGS_VM (1UL << 5) | ||
204 | 209 | ||
205 | struct pt_regs; | 210 | struct pt_regs; |
206 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 211 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 29557aa06dda..915b876edd1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/timer.h> | 34 | #include <asm/timer.h> |
35 | #include <asm/desc.h> | ||
36 | #include <asm/ldt.h> | ||
35 | 37 | ||
36 | #include "perf_event.h" | 38 | #include "perf_event.h" |
37 | 39 | ||
@@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size) | |||
1738 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); | 1740 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); |
1739 | } | 1741 | } |
1740 | 1742 | ||
1743 | static unsigned long get_segment_base(unsigned int segment) | ||
1744 | { | ||
1745 | struct desc_struct *desc; | ||
1746 | int idx = segment >> 3; | ||
1747 | |||
1748 | if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { | ||
1749 | if (idx > LDT_ENTRIES) | ||
1750 | return 0; | ||
1751 | |||
1752 | if (idx > current->active_mm->context.size) | ||
1753 | return 0; | ||
1754 | |||
1755 | desc = current->active_mm->context.ldt; | ||
1756 | } else { | ||
1757 | if (idx > GDT_ENTRIES) | ||
1758 | return 0; | ||
1759 | |||
1760 | desc = __this_cpu_ptr(&gdt_page.gdt[0]); | ||
1761 | } | ||
1762 | |||
1763 | return get_desc_base(desc + idx); | ||
1764 | } | ||
1765 | |||
1741 | #ifdef CONFIG_COMPAT | 1766 | #ifdef CONFIG_COMPAT |
1742 | 1767 | ||
1743 | #include <asm/compat.h> | 1768 | #include <asm/compat.h> |
@@ -1746,13 +1771,17 @@ static inline int | |||
1746 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1771 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1747 | { | 1772 | { |
1748 | /* 32-bit process in 64-bit kernel. */ | 1773 | /* 32-bit process in 64-bit kernel. */ |
1774 | unsigned long ss_base, cs_base; | ||
1749 | struct stack_frame_ia32 frame; | 1775 | struct stack_frame_ia32 frame; |
1750 | const void __user *fp; | 1776 | const void __user *fp; |
1751 | 1777 | ||
1752 | if (!test_thread_flag(TIF_IA32)) | 1778 | if (!test_thread_flag(TIF_IA32)) |
1753 | return 0; | 1779 | return 0; |
1754 | 1780 | ||
1755 | fp = compat_ptr(regs->bp); | 1781 | cs_base = get_segment_base(regs->cs); |
1782 | ss_base = get_segment_base(regs->ss); | ||
1783 | |||
1784 | fp = compat_ptr(ss_base + regs->bp); | ||
1756 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1785 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1757 | unsigned long bytes; | 1786 | unsigned long bytes; |
1758 | frame.next_frame = 0; | 1787 | frame.next_frame = 0; |
@@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1765 | if (!valid_user_frame(fp, sizeof(frame))) | 1794 | if (!valid_user_frame(fp, sizeof(frame))) |
1766 | break; | 1795 | break; |
1767 | 1796 | ||
1768 | perf_callchain_store(entry, frame.return_address); | 1797 | perf_callchain_store(entry, cs_base + frame.return_address); |
1769 | fp = compat_ptr(frame.next_frame); | 1798 | fp = compat_ptr(ss_base + frame.next_frame); |
1770 | } | 1799 | } |
1771 | return 1; | 1800 | return 1; |
1772 | } | 1801 | } |
@@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1789 | return; | 1818 | return; |
1790 | } | 1819 | } |
1791 | 1820 | ||
1821 | /* | ||
1822 | * We don't know what to do with VM86 stacks.. ignore them for now. | ||
1823 | */ | ||
1824 | if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) | ||
1825 | return; | ||
1826 | |||
1792 | fp = (void __user *)regs->bp; | 1827 | fp = (void __user *)regs->bp; |
1793 | 1828 | ||
1794 | perf_callchain_store(entry, regs->ip); | 1829 | perf_callchain_store(entry, regs->ip); |
@@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1816 | } | 1851 | } |
1817 | } | 1852 | } |
1818 | 1853 | ||
1819 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1854 | /* |
1855 | * Deal with code segment offsets for the various execution modes: | ||
1856 | * | ||
1857 | * VM86 - the good olde 16 bit days, where the linear address is | ||
1858 | * 20 bits and we use regs->ip + 0x10 * regs->cs. | ||
1859 | * | ||
1860 | * IA32 - Where we need to look at GDT/LDT segment descriptor tables | ||
1861 | * to figure out what the 32bit base address is. | ||
1862 | * | ||
1863 | * X32 - has TIF_X32 set, but is running in x86_64 | ||
1864 | * | ||
1865 | * X86_64 - CS,DS,SS,ES are all zero based. | ||
1866 | */ | ||
1867 | static unsigned long code_segment_base(struct pt_regs *regs) | ||
1820 | { | 1868 | { |
1821 | unsigned long ip; | 1869 | /* |
1870 | * If we are in VM86 mode, add the segment offset to convert to a | ||
1871 | * linear address. | ||
1872 | */ | ||
1873 | if (regs->flags & X86_VM_MASK) | ||
1874 | return 0x10 * regs->cs; | ||
1875 | |||
1876 | /* | ||
1877 | * For IA32 we look at the GDT/LDT segment base to convert the | ||
1878 | * effective IP to a linear address. | ||
1879 | */ | ||
1880 | #ifdef CONFIG_X86_32 | ||
1881 | if (user_mode(regs) && regs->cs != __USER_CS) | ||
1882 | return get_segment_base(regs->cs); | ||
1883 | #else | ||
1884 | if (test_thread_flag(TIF_IA32)) { | ||
1885 | if (user_mode(regs) && regs->cs != __USER32_CS) | ||
1886 | return get_segment_base(regs->cs); | ||
1887 | } | ||
1888 | #endif | ||
1889 | return 0; | ||
1890 | } | ||
1822 | 1891 | ||
1892 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1893 | { | ||
1823 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | 1894 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) |
1824 | ip = perf_guest_cbs->get_guest_ip(); | 1895 | return perf_guest_cbs->get_guest_ip(); |
1825 | else | ||
1826 | ip = instruction_pointer(regs); | ||
1827 | 1896 | ||
1828 | return ip; | 1897 | return regs->ip + code_segment_base(regs); |
1829 | } | 1898 | } |
1830 | 1899 | ||
1831 | unsigned long perf_misc_flags(struct pt_regs *regs) | 1900 | unsigned long perf_misc_flags(struct pt_regs *regs) |
@@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1838 | else | 1907 | else |
1839 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | 1908 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; |
1840 | } else { | 1909 | } else { |
1841 | if (!kernel_ip(regs->ip)) | 1910 | if (user_mode(regs)) |
1842 | misc |= PERF_RECORD_MISC_USER; | 1911 | misc |= PERF_RECORD_MISC_USER; |
1843 | else | 1912 | else |
1844 | misc |= PERF_RECORD_MISC_KERNEL; | 1913 | misc |= PERF_RECORD_MISC_KERNEL; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 821d53b696d1..6605a81ba339 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip) | |||
516 | #endif | 516 | #endif |
517 | } | 517 | } |
518 | 518 | ||
519 | /* | ||
520 | * Not all PMUs provide the right context information to place the reported IP | ||
521 | * into full context. Specifically segment registers are typically not | ||
522 | * supplied. | ||
523 | * | ||
524 | * Assuming the address is a linear address (it is for IBS), we fake the CS and | ||
525 | * vm86 mode using the known zero-based code segment and 'fix up' the registers | ||
526 | * to reflect this. | ||
527 | * | ||
528 | * Intel PEBS/LBR appear to typically provide the effective address, nothing | ||
529 | * much we can do about that but pray and treat it like a linear address. | ||
530 | */ | ||
531 | static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) | ||
532 | { | ||
533 | regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; | ||
534 | if (regs->flags & X86_VM_MASK) | ||
535 | regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); | ||
536 | regs->ip = ip; | ||
537 | } | ||
538 | |||
519 | #ifdef CONFIG_CPU_SUP_AMD | 539 | #ifdef CONFIG_CPU_SUP_AMD |
520 | 540 | ||
521 | int amd_pmu_init(void); | 541 | int amd_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index da9bcdcd9856..7bfb5bec8630 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -13,6 +13,8 @@ | |||
13 | 13 | ||
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | 15 | ||
16 | #include "perf_event.h" | ||
17 | |||
16 | static u32 ibs_caps; | 18 | static u32 ibs_caps; |
17 | 19 | ||
18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 20 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | |||
536 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { | 538 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { |
537 | regs.flags &= ~PERF_EFLAGS_EXACT; | 539 | regs.flags &= ~PERF_EFLAGS_EXACT; |
538 | } else { | 540 | } else { |
539 | instruction_pointer_set(®s, ibs_data.regs[1]); | 541 | set_linear_ip(®s, ibs_data.regs[1]); |
540 | regs.flags |= PERF_EFLAGS_EXACT; | 542 | regs.flags |= PERF_EFLAGS_EXACT; |
541 | } | 543 | } |
542 | 544 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 629ae0b7ad90..e38d97bf4259 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
499 | * We sampled a branch insn, rewind using the LBR stack | 499 | * We sampled a branch insn, rewind using the LBR stack |
500 | */ | 500 | */ |
501 | if (ip == to) { | 501 | if (ip == to) { |
502 | regs->ip = from; | 502 | set_linear_ip(regs, from); |
503 | return 1; | 503 | return 1; |
504 | } | 504 | } |
505 | 505 | ||
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
529 | } while (to < ip); | 529 | } while (to < ip); |
530 | 530 | ||
531 | if (to == ip) { | 531 | if (to == ip) { |
532 | regs->ip = old_to; | 532 | set_linear_ip(regs, old_to); |
533 | return 1; | 533 | return 1; |
534 | } | 534 | } |
535 | 535 | ||
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
569 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | 569 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. |
570 | */ | 570 | */ |
571 | regs = *iregs; | 571 | regs = *iregs; |
572 | regs.ip = pebs->ip; | 572 | regs.flags = pebs->flags; |
573 | set_linear_ip(®s, pebs->ip); | ||
573 | regs.bp = pebs->bp; | 574 | regs.bp = pebs->bp; |
574 | regs.sp = pebs->sp; | 575 | regs.sp = pebs->sp; |
575 | 576 | ||