aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 22:55:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 22:55:20 -0500
commit0019fab3558c8c8135c88c3d38f24ab0a5408aa6 (patch)
treed51fe9edb3e05efe5089e3360510f513cccb6b0b
parentb64c5fda3868cb29d5dae0909561aa7d93fb7330 (diff)
parent6c8d8b3c69cef1330e0c5cbc2a8b9268024927a0 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "Two fixlets and a cleanup." * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_32: Return actual stack when requesting sp from regs x86: Don't clobber top of pt_regs in nested NMI x86/asm: Clean up copy_page_*() comments and code
-rw-r--r--arch/x86/include/asm/ptrace.h9
-rw-r--r--arch/x86/kernel/entry_64.S41
-rw-r--r--arch/x86/lib/copy_page_64.S120
3 files changed, 95 insertions, 75 deletions
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19f16ebaf4fa..54d80fddb739 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -239,6 +239,15 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
239{ 239{
240 if (unlikely(offset > MAX_REG_OFFSET)) 240 if (unlikely(offset > MAX_REG_OFFSET))
241 return 0; 241 return 0;
242#ifdef CONFIG_X86_32
243 /*
244 * Traps from the kernel do not save sp and ss.
245 * Use the helper function to retrieve sp.
246 */
247 if (offset == offsetof(struct pt_regs, sp) &&
248 regs->cs == __KERNEL_CS)
249 return kernel_stack_pointer(regs);
250#endif
242 return *(unsigned long *)((unsigned long)regs + offset); 251 return *(unsigned long *)((unsigned long)regs + offset);
243} 252}
244 253
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2a3806b95831..31b46128a63d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1699,9 +1699,10 @@ nested_nmi:
1699 1699
17001: 17001:
1701 /* Set up the interrupted NMIs stack to jump to repeat_nmi */ 1701 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1702 leaq -6*8(%rsp), %rdx 1702 leaq -1*8(%rsp), %rdx
1703 movq %rdx, %rsp 1703 movq %rdx, %rsp
1704 CFI_ADJUST_CFA_OFFSET 6*8 1704 CFI_ADJUST_CFA_OFFSET 1*8
1705 leaq -10*8(%rsp), %rdx
1705 pushq_cfi $__KERNEL_DS 1706 pushq_cfi $__KERNEL_DS
1706 pushq_cfi %rdx 1707 pushq_cfi %rdx
1707 pushfq_cfi 1708 pushfq_cfi
@@ -1709,8 +1710,8 @@ nested_nmi:
1709 pushq_cfi $repeat_nmi 1710 pushq_cfi $repeat_nmi
1710 1711
1711 /* Put stack back */ 1712 /* Put stack back */
1712 addq $(11*8), %rsp 1713 addq $(6*8), %rsp
1713 CFI_ADJUST_CFA_OFFSET -11*8 1714 CFI_ADJUST_CFA_OFFSET -6*8
1714 1715
1715nested_nmi_out: 1716nested_nmi_out:
1716 popq_cfi %rdx 1717 popq_cfi %rdx
@@ -1736,18 +1737,18 @@ first_nmi:
1736 * +-------------------------+ 1737 * +-------------------------+
1737 * | NMI executing variable | 1738 * | NMI executing variable |
1738 * +-------------------------+ 1739 * +-------------------------+
1739 * | Saved SS |
1740 * | Saved Return RSP |
1741 * | Saved RFLAGS |
1742 * | Saved CS |
1743 * | Saved RIP |
1744 * +-------------------------+
1745 * | copied SS | 1740 * | copied SS |
1746 * | copied Return RSP | 1741 * | copied Return RSP |
1747 * | copied RFLAGS | 1742 * | copied RFLAGS |
1748 * | copied CS | 1743 * | copied CS |
1749 * | copied RIP | 1744 * | copied RIP |
1750 * +-------------------------+ 1745 * +-------------------------+
1746 * | Saved SS |
1747 * | Saved Return RSP |
1748 * | Saved RFLAGS |
1749 * | Saved CS |
1750 * | Saved RIP |
1751 * +-------------------------+
1751 * | pt_regs | 1752 * | pt_regs |
1752 * +-------------------------+ 1753 * +-------------------------+
1753 * 1754 *
@@ -1763,9 +1764,14 @@ first_nmi:
1763 /* Set the NMI executing variable on the stack. */ 1764 /* Set the NMI executing variable on the stack. */
1764 pushq_cfi $1 1765 pushq_cfi $1
1765 1766
1767 /*
1768 * Leave room for the "copied" frame
1769 */
1770 subq $(5*8), %rsp
1771
1766 /* Copy the stack frame to the Saved frame */ 1772 /* Copy the stack frame to the Saved frame */
1767 .rept 5 1773 .rept 5
1768 pushq_cfi 6*8(%rsp) 1774 pushq_cfi 11*8(%rsp)
1769 .endr 1775 .endr
1770 CFI_DEF_CFA_OFFSET SS+8-RIP 1776 CFI_DEF_CFA_OFFSET SS+8-RIP
1771 1777
@@ -1786,12 +1792,15 @@ repeat_nmi:
1786 * is benign for the non-repeat case, where 1 was pushed just above 1792 * is benign for the non-repeat case, where 1 was pushed just above
1787 * to this very stack slot). 1793 * to this very stack slot).
1788 */ 1794 */
1789 movq $1, 5*8(%rsp) 1795 movq $1, 10*8(%rsp)
1790 1796
1791 /* Make another copy, this one may be modified by nested NMIs */ 1797 /* Make another copy, this one may be modified by nested NMIs */
1798 addq $(10*8), %rsp
1799 CFI_ADJUST_CFA_OFFSET -10*8
1792 .rept 5 1800 .rept 5
1793 pushq_cfi 4*8(%rsp) 1801 pushq_cfi -6*8(%rsp)
1794 .endr 1802 .endr
1803 subq $(5*8), %rsp
1795 CFI_DEF_CFA_OFFSET SS+8-RIP 1804 CFI_DEF_CFA_OFFSET SS+8-RIP
1796end_repeat_nmi: 1805end_repeat_nmi:
1797 1806
@@ -1842,8 +1851,12 @@ nmi_swapgs:
1842 SWAPGS_UNSAFE_STACK 1851 SWAPGS_UNSAFE_STACK
1843nmi_restore: 1852nmi_restore:
1844 RESTORE_ALL 8 1853 RESTORE_ALL 8
1854
1855 /* Pop the extra iret frame */
1856 addq $(5*8), %rsp
1857
1845 /* Clear the NMI executing stack variable */ 1858 /* Clear the NMI executing stack variable */
1846 movq $0, 10*8(%rsp) 1859 movq $0, 5*8(%rsp)
1847 jmp irq_return 1860 jmp irq_return
1848 CFI_ENDPROC 1861 CFI_ENDPROC
1849END(nmi) 1862END(nmi)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6b34d04d096a..176cca67212b 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -5,91 +5,89 @@
5#include <asm/alternative-asm.h> 5#include <asm/alternative-asm.h>
6 6
7 ALIGN 7 ALIGN
8copy_page_c: 8copy_page_rep:
9 CFI_STARTPROC 9 CFI_STARTPROC
10 movl $4096/8,%ecx 10 movl $4096/8, %ecx
11 rep movsq 11 rep movsq
12 ret 12 ret
13 CFI_ENDPROC 13 CFI_ENDPROC
14ENDPROC(copy_page_c) 14ENDPROC(copy_page_rep)
15 15
16/* Don't use streaming store because it's better when the target 16/*
17 ends up in cache. */ 17 * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
18 18 * Could vary the prefetch distance based on SMP/UP.
19/* Could vary the prefetch distance based on SMP/UP */ 19*/
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $2*8,%rsp 23 subq $2*8, %rsp
24 CFI_ADJUST_CFA_OFFSET 2*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx, (%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12, 1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 29
30 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5, %ecx
31 .p2align 4 31 .p2align 4
32.Loop64: 32.Loop64:
33 dec %rcx 33 dec %rcx
34 34 movq 0x8*0(%rsi), %rax
35 movq (%rsi), %rax 35 movq 0x8*1(%rsi), %rbx
36 movq 8 (%rsi), %rbx 36 movq 0x8*2(%rsi), %rdx
37 movq 16 (%rsi), %rdx 37 movq 0x8*3(%rsi), %r8
38 movq 24 (%rsi), %r8 38 movq 0x8*4(%rsi), %r9
39 movq 32 (%rsi), %r9 39 movq 0x8*5(%rsi), %r10
40 movq 40 (%rsi), %r10 40 movq 0x8*6(%rsi), %r11
41 movq 48 (%rsi), %r11 41 movq 0x8*7(%rsi), %r12
42 movq 56 (%rsi), %r12
43 42
44 prefetcht0 5*64(%rsi) 43 prefetcht0 5*64(%rsi)
45 44
46 movq %rax, (%rdi) 45 movq %rax, 0x8*0(%rdi)
47 movq %rbx, 8 (%rdi) 46 movq %rbx, 0x8*1(%rdi)
48 movq %rdx, 16 (%rdi) 47 movq %rdx, 0x8*2(%rdi)
49 movq %r8, 24 (%rdi) 48 movq %r8, 0x8*3(%rdi)
50 movq %r9, 32 (%rdi) 49 movq %r9, 0x8*4(%rdi)
51 movq %r10, 40 (%rdi) 50 movq %r10, 0x8*5(%rdi)
52 movq %r11, 48 (%rdi) 51 movq %r11, 0x8*6(%rdi)
53 movq %r12, 56 (%rdi) 52 movq %r12, 0x8*7(%rdi)
54 53
55 leaq 64 (%rsi), %rsi 54 leaq 64 (%rsi), %rsi
56 leaq 64 (%rdi), %rdi 55 leaq 64 (%rdi), %rdi
57 56
58 jnz .Loop64 57 jnz .Loop64
59 58
60 movl $5,%ecx 59 movl $5, %ecx
61 .p2align 4 60 .p2align 4
62.Loop2: 61.Loop2:
63 decl %ecx 62 decl %ecx
64 63
65 movq (%rsi), %rax 64 movq 0x8*0(%rsi), %rax
66 movq 8 (%rsi), %rbx 65 movq 0x8*1(%rsi), %rbx
67 movq 16 (%rsi), %rdx 66 movq 0x8*2(%rsi), %rdx
68 movq 24 (%rsi), %r8 67 movq 0x8*3(%rsi), %r8
69 movq 32 (%rsi), %r9 68 movq 0x8*4(%rsi), %r9
70 movq 40 (%rsi), %r10 69 movq 0x8*5(%rsi), %r10
71 movq 48 (%rsi), %r11 70 movq 0x8*6(%rsi), %r11
72 movq 56 (%rsi), %r12 71 movq 0x8*7(%rsi), %r12
73 72
74 movq %rax, (%rdi) 73 movq %rax, 0x8*0(%rdi)
75 movq %rbx, 8 (%rdi) 74 movq %rbx, 0x8*1(%rdi)
76 movq %rdx, 16 (%rdi) 75 movq %rdx, 0x8*2(%rdi)
77 movq %r8, 24 (%rdi) 76 movq %r8, 0x8*3(%rdi)
78 movq %r9, 32 (%rdi) 77 movq %r9, 0x8*4(%rdi)
79 movq %r10, 40 (%rdi) 78 movq %r10, 0x8*5(%rdi)
80 movq %r11, 48 (%rdi) 79 movq %r11, 0x8*6(%rdi)
81 movq %r12, 56 (%rdi) 80 movq %r12, 0x8*7(%rdi)
82 81
83 leaq 64(%rdi),%rdi 82 leaq 64(%rdi), %rdi
84 leaq 64(%rsi),%rsi 83 leaq 64(%rsi), %rsi
85
86 jnz .Loop2 84 jnz .Loop2
87 85
88 movq (%rsp),%rbx 86 movq (%rsp), %rbx
89 CFI_RESTORE rbx 87 CFI_RESTORE rbx
90 movq 1*8(%rsp),%r12 88 movq 1*8(%rsp), %r12
91 CFI_RESTORE r12 89 CFI_RESTORE r12
92 addq $2*8,%rsp 90 addq $2*8, %rsp
93 CFI_ADJUST_CFA_OFFSET -2*8 91 CFI_ADJUST_CFA_OFFSET -2*8
94 ret 92 ret
95.Lcopy_page_end: 93.Lcopy_page_end:
@@ -103,7 +101,7 @@ ENDPROC(copy_page)
103 101
104 .section .altinstr_replacement,"ax" 102 .section .altinstr_replacement,"ax"
1051: .byte 0xeb /* jmp <disp8> */ 1031: .byte 0xeb /* jmp <disp8> */
106 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ 104 .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
1072: 1052:
108 .previous 106 .previous
109 .section .altinstructions,"a" 107 .section .altinstructions,"a"