aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-22 12:13:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-22 12:13:24 -0400
commite17fdf5c6778ff77d93dd769910992e4073b9348 (patch)
treed1a7ca2b1faf4301b39300fbd82f9b91e605a77e /arch
parent95211279c5ad00a317c98221d7e4365e02f20836 (diff)
parenta240ada241dafe290e7532d1ddeb98fdf1419068 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86: Include probe_roms.h in probe_roms.c x86/32: Print control and debug registers for kerenel context x86: Tighten dependencies of CPU_SUP_*_32 x86/numa: Improve internode cache alignment x86: Fix the NMI nesting comments x86-64: Improve insn scheduling in SAVE_ARGS_IRQ x86-64: Fix CFI annotations for NMI nesting code bitops: Add missing parentheses to new get_order macro bitops: Optimise get_order() bitops: Adjust the comment on get_order() to describe the size==0 case x86/spinlocks: Eliminate TICKET_MASK x86-64: Handle byte-wise tail copying in memcpy() without a loop x86-64: Fix memcpy() to support sizes of 4Gb and above x86-64: Fix memset() to support sizes of 4Gb and above x86-64: Slightly shorten copy_page()
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/spinlock_types.h1
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/entry_64.S71
-rw-r--r--arch/x86/kernel/probe_roms.c1
-rw-r--r--arch/x86/lib/copy_page_64.S12
-rw-r--r--arch/x86/lib/memcpy_64.S44
-rw-r--r--arch/x86/lib/memset_64.S33
9 files changed, 83 insertions, 90 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 3c57033e2211..706e12e9984b 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -303,7 +303,6 @@ config X86_GENERIC
303config X86_INTERNODE_CACHE_SHIFT 303config X86_INTERNODE_CACHE_SHIFT
304 int 304 int
305 default "12" if X86_VSMP 305 default "12" if X86_VSMP
306 default "7" if NUMA
307 default X86_L1_CACHE_SHIFT 306 default X86_L1_CACHE_SHIFT
308 307
309config X86_CMPXCHG 308config X86_CMPXCHG
@@ -441,7 +440,7 @@ config CPU_SUP_INTEL
441config CPU_SUP_CYRIX_32 440config CPU_SUP_CYRIX_32
442 default y 441 default y
443 bool "Support Cyrix processors" if PROCESSOR_SELECT 442 bool "Support Cyrix processors" if PROCESSOR_SELECT
444 depends on !64BIT 443 depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
445 ---help--- 444 ---help---
446 This enables detection, tunings and quirks for Cyrix processors 445 This enables detection, tunings and quirks for Cyrix processors
447 446
@@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
495config CPU_SUP_UMC_32 494config CPU_SUP_UMC_32
496 default y 495 default y
497 bool "Support UMC processors" if PROCESSOR_SELECT 496 bool "Support UMC processors" if PROCESSOR_SELECT
498 depends on !64BIT 497 depends on M386 || M486 || (EXPERT && !64BIT)
499 ---help--- 498 ---help---
500 This enables detection, tunings and quirks for UMC processors 499 This enables detection, tunings and quirks for UMC processors
501 500
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a82c2bf504b6..76bfa2cf301d 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
88{ 88{
89 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 89 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
90 90
91 return !!(tmp.tail ^ tmp.head); 91 return tmp.tail != tmp.head;
92} 92}
93 93
94static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 94static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
95{ 95{
96 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 96 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
97 97
98 return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; 98 return (__ticket_t)(tmp.tail - tmp.head) > 1;
99} 99}
100 100
101#ifndef CONFIG_PARAVIRT_SPINLOCKS 101#ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 8ebd5df7451e..ad0ad07fc006 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
16#endif 16#endif
17 17
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 18#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
20 19
21typedef struct arch_spinlock { 20typedef struct arch_spinlock {
22 union { 21 union {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index c99f9ed013d5..88ec9129271d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
87 int i; 87 int i;
88 88
89 print_modules(); 89 print_modules();
90 __show_regs(regs, 0); 90 __show_regs(regs, !user_mode_vm(regs));
91 91
92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", 92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
93 TASK_COMM_LEN, current->comm, task_pid_nr(current), 93 TASK_COMM_LEN, current->comm, task_pid_nr(current),
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1333d9851778..a63dabe153ca 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
320 movq %rsp, %rsi 320 movq %rsp, %rsi
321 321
322 leaq -RBP(%rsp),%rdi /* arg1 for handler */ 322 leaq -RBP(%rsp),%rdi /* arg1 for handler */
323 testl $3, CS(%rdi) 323 testl $3, CS-RBP(%rsi)
324 je 1f 324 je 1f
325 SWAPGS 325 SWAPGS
326 /* 326 /*
@@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
330 * moving irq_enter into assembly, which would be too much work) 330 * moving irq_enter into assembly, which would be too much work)
331 */ 331 */
3321: incl PER_CPU_VAR(irq_count) 3321: incl PER_CPU_VAR(irq_count)
333 jne 2f 333 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
334 mov PER_CPU_VAR(irq_stack_ptr),%rsp
335 CFI_DEF_CFA_REGISTER rsi 334 CFI_DEF_CFA_REGISTER rsi
336 335
3372: /* Store previous stack value */ 336 /* Store previous stack value */
338 pushq %rsi 337 pushq %rsi
339 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ 338 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
340 0x77 /* DW_OP_breg7 */, 0, \ 339 0x77 /* DW_OP_breg7 */, 0, \
@@ -1530,6 +1529,7 @@ ENTRY(nmi)
1530 1529
1531 /* Use %rdx as out temp variable throughout */ 1530 /* Use %rdx as out temp variable throughout */
1532 pushq_cfi %rdx 1531 pushq_cfi %rdx
1532 CFI_REL_OFFSET rdx, 0
1533 1533
1534 /* 1534 /*
1535 * If %cs was not the kernel segment, then the NMI triggered in user 1535 * If %cs was not the kernel segment, then the NMI triggered in user
@@ -1554,6 +1554,7 @@ ENTRY(nmi)
1554 */ 1554 */
1555 lea 6*8(%rsp), %rdx 1555 lea 6*8(%rsp), %rdx
1556 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi 1556 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1557 CFI_REMEMBER_STATE
1557 1558
1558nested_nmi: 1559nested_nmi:
1559 /* 1560 /*
@@ -1585,10 +1586,12 @@ nested_nmi:
1585 1586
1586nested_nmi_out: 1587nested_nmi_out:
1587 popq_cfi %rdx 1588 popq_cfi %rdx
1589 CFI_RESTORE rdx
1588 1590
1589 /* No need to check faults here */ 1591 /* No need to check faults here */
1590 INTERRUPT_RETURN 1592 INTERRUPT_RETURN
1591 1593
1594 CFI_RESTORE_STATE
1592first_nmi: 1595first_nmi:
1593 /* 1596 /*
1594 * Because nested NMIs will use the pushed location that we 1597 * Because nested NMIs will use the pushed location that we
@@ -1620,10 +1623,15 @@ first_nmi:
1620 * | pt_regs | 1623 * | pt_regs |
1621 * +-------------------------+ 1624 * +-------------------------+
1622 * 1625 *
1623 * The saved RIP is used to fix up the copied RIP that a nested 1626 * The saved stack frame is used to fix up the copied stack frame
1624 * NMI may zero out. The original stack frame and the temp storage 1627 * that a nested NMI may change to make the interrupted NMI iret jump
1628 * to the repeat_nmi. The original stack frame and the temp storage
1625 * is also used by nested NMIs and can not be trusted on exit. 1629 * is also used by nested NMIs and can not be trusted on exit.
1626 */ 1630 */
1631 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1632 movq (%rsp), %rdx
1633 CFI_RESTORE rdx
1634
1627 /* Set the NMI executing variable on the stack. */ 1635 /* Set the NMI executing variable on the stack. */
1628 pushq_cfi $1 1636 pushq_cfi $1
1629 1637
@@ -1631,22 +1639,39 @@ first_nmi:
1631 .rept 5 1639 .rept 5
1632 pushq_cfi 6*8(%rsp) 1640 pushq_cfi 6*8(%rsp)
1633 .endr 1641 .endr
1642 CFI_DEF_CFA_OFFSET SS+8-RIP
1643
1644 /* Everything up to here is safe from nested NMIs */
1645
1646 /*
1647 * If there was a nested NMI, the first NMI's iret will return
1648 * here. But NMIs are still enabled and we can take another
1649 * nested NMI. The nested NMI checks the interrupted RIP to see
1650 * if it is between repeat_nmi and end_repeat_nmi, and if so
1651 * it will just return, as we are about to repeat an NMI anyway.
1652 * This makes it safe to copy to the stack frame that a nested
1653 * NMI will update.
1654 */
1655repeat_nmi:
1656 /*
1657 * Update the stack variable to say we are still in NMI (the update
1658 * is benign for the non-repeat case, where 1 was pushed just above
1659 * to this very stack slot).
1660 */
1661 movq $1, 5*8(%rsp)
1634 1662
1635 /* Make another copy, this one may be modified by nested NMIs */ 1663 /* Make another copy, this one may be modified by nested NMIs */
1636 .rept 5 1664 .rept 5
1637 pushq_cfi 4*8(%rsp) 1665 pushq_cfi 4*8(%rsp)
1638 .endr 1666 .endr
1639 1667 CFI_DEF_CFA_OFFSET SS+8-RIP
1640 /* Do not pop rdx, nested NMIs will corrupt it */ 1668end_repeat_nmi:
1641 movq 11*8(%rsp), %rdx
1642 1669
1643 /* 1670 /*
1644 * Everything below this point can be preempted by a nested 1671 * Everything below this point can be preempted by a nested
1645 * NMI if the first NMI took an exception. Repeated NMIs 1672 * NMI if the first NMI took an exception and reset our iret stack
1646 * caused by an exception and nested NMI will start here, and 1673 * so that we repeat another NMI.
1647 * can still be preempted by another NMI.
1648 */ 1674 */
1649restart_nmi:
1650 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1675 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1651 subq $ORIG_RAX-R15, %rsp 1676 subq $ORIG_RAX-R15, %rsp
1652 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1677 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1675,26 +1700,6 @@ nmi_restore:
1675 CFI_ENDPROC 1700 CFI_ENDPROC
1676END(nmi) 1701END(nmi)
1677 1702
1678 /*
1679 * If an NMI hit an iret because of an exception or breakpoint,
1680 * it can lose its NMI context, and a nested NMI may come in.
1681 * In that case, the nested NMI will change the preempted NMI's
1682 * stack to jump to here when it does the final iret.
1683 */
1684repeat_nmi:
1685 INTR_FRAME
1686 /* Update the stack variable to say we are still in NMI */
1687 movq $1, 5*8(%rsp)
1688
1689 /* copy the saved stack back to copy stack */
1690 .rept 5
1691 pushq_cfi 4*8(%rsp)
1692 .endr
1693
1694 jmp restart_nmi
1695 CFI_ENDPROC
1696end_repeat_nmi:
1697
1698ENTRY(ignore_sysret) 1703ENTRY(ignore_sysret)
1699 CFI_STARTPROC 1704 CFI_STARTPROC
1700 mov $-ENOSYS,%eax 1705 mov $-ENOSYS,%eax
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 34e06e84ce31..0bc72e2069e3 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -12,6 +12,7 @@
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/export.h> 13#include <linux/export.h>
14 14
15#include <asm/probe_roms.h>
15#include <asm/pci-direct.h> 16#include <asm/pci-direct.h>
16#include <asm/e820.h> 17#include <asm/e820.h>
17#include <asm/mmzone.h> 18#include <asm/mmzone.h>
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 01c805ba5359..6b34d04d096a 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $3*8,%rsp 23 subq $2*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
31 29
32 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5,%ecx
33 .p2align 4 31 .p2align 4
@@ -91,10 +89,8 @@ ENTRY(copy_page)
91 CFI_RESTORE rbx 89 CFI_RESTORE rbx
92 movq 1*8(%rsp),%r12 90 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12 91 CFI_RESTORE r12
94 movq 2*8(%rsp),%r13 92 addq $2*8,%rsp
95 CFI_RESTORE r13 93 CFI_ADJUST_CFA_OFFSET -2*8
96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
98 ret 94 ret
99.Lcopy_page_end: 95.Lcopy_page_end:
100 CFI_ENDPROC 96 CFI_ENDPROC
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0ecdea..1c273be7c97e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
27 .section .altinstr_replacement, "ax", @progbits 27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c: 28.Lmemcpy_c:
29 movq %rdi, %rax 29 movq %rdi, %rax
30 30 movq %rdx, %rcx
31 movl %edx, %ecx 31 shrq $3, %rcx
32 shrl $3, %ecx
33 andl $7, %edx 32 andl $7, %edx
34 rep movsq 33 rep movsq
35 movl %edx, %ecx 34 movl %edx, %ecx
@@ -48,8 +47,7 @@
48 .section .altinstr_replacement, "ax", @progbits 47 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e: 48.Lmemcpy_c_e:
50 movq %rdi, %rax 49 movq %rdi, %rax
51 50 movq %rdx, %rcx
52 movl %edx, %ecx
53 rep movsb 51 rep movsb
54 ret 52 ret
55.Lmemcpy_e_e: 53.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
60 CFI_STARTPROC 58 CFI_STARTPROC
61 movq %rdi, %rax 59 movq %rdi, %rax
62 60
63 /* 61 cmpq $0x20, %rdx
64 * Use 32bit CMP here to avoid long NOP padding.
65 */
66 cmp $0x20, %edx
67 jb .Lhandle_tail 62 jb .Lhandle_tail
68 63
69 /* 64 /*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
72 */ 67 */
73 cmp %dil, %sil 68 cmp %dil, %sil
74 jl .Lcopy_backward 69 jl .Lcopy_backward
75 subl $0x20, %edx 70 subq $0x20, %rdx
76.Lcopy_forward_loop: 71.Lcopy_forward_loop:
77 subq $0x20, %rdx 72 subq $0x20, %rdx
78 73
@@ -91,7 +86,7 @@ ENTRY(memcpy)
91 movq %r11, 3*8(%rdi) 86 movq %r11, 3*8(%rdi)
92 leaq 4*8(%rdi), %rdi 87 leaq 4*8(%rdi), %rdi
93 jae .Lcopy_forward_loop 88 jae .Lcopy_forward_loop
94 addq $0x20, %rdx 89 addl $0x20, %edx
95 jmp .Lhandle_tail 90 jmp .Lhandle_tail
96 91
97.Lcopy_backward: 92.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
123 /* 118 /*
124 * Calculate copy position to head. 119 * Calculate copy position to head.
125 */ 120 */
126 addq $0x20, %rdx 121 addl $0x20, %edx
127 subq %rdx, %rsi 122 subq %rdx, %rsi
128 subq %rdx, %rdi 123 subq %rdx, %rdi
129.Lhandle_tail: 124.Lhandle_tail:
130 cmpq $16, %rdx 125 cmpl $16, %edx
131 jb .Lless_16bytes 126 jb .Lless_16bytes
132 127
133 /* 128 /*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
144 retq 139 retq
145 .p2align 4 140 .p2align 4
146.Lless_16bytes: 141.Lless_16bytes:
147 cmpq $8, %rdx 142 cmpl $8, %edx
148 jb .Lless_8bytes 143 jb .Lless_8bytes
149 /* 144 /*
150 * Move data from 8 bytes to 15 bytes. 145 * Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
156 retq 151 retq
157 .p2align 4 152 .p2align 4
158.Lless_8bytes: 153.Lless_8bytes:
159 cmpq $4, %rdx 154 cmpl $4, %edx
160 jb .Lless_3bytes 155 jb .Lless_3bytes
161 156
162 /* 157 /*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
169 retq 164 retq
170 .p2align 4 165 .p2align 4
171.Lless_3bytes: 166.Lless_3bytes:
172 cmpl $0, %edx 167 subl $1, %edx
173 je .Lend 168 jb .Lend
174 /* 169 /*
175 * Move data from 1 bytes to 3 bytes. 170 * Move data from 1 bytes to 3 bytes.
176 */ 171 */
177.Lloop_1: 172 movzbl (%rsi), %ecx
178 movb (%rsi), %r8b 173 jz .Lstore_1byte
179 movb %r8b, (%rdi) 174 movzbq 1(%rsi), %r8
180 incq %rdi 175 movzbq (%rsi, %rdx), %r9
181 incq %rsi 176 movb %r8b, 1(%rdi)
182 decl %edx 177 movb %r9b, (%rdi, %rdx)
183 jnz .Lloop_1 178.Lstore_1byte:
179 movb %cl, (%rdi)
184 180
185.Lend: 181.Lend:
186 retq 182 retq
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454b78a3..2dcb3808cbda 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
19 .section .altinstr_replacement, "ax", @progbits 19 .section .altinstr_replacement, "ax", @progbits
20.Lmemset_c: 20.Lmemset_c:
21 movq %rdi,%r9 21 movq %rdi,%r9
22 movl %edx,%r8d 22 movq %rdx,%rcx
23 andl $7,%r8d 23 andl $7,%edx
24 movl %edx,%ecx 24 shrq $3,%rcx
25 shrl $3,%ecx
26 /* expand byte value */ 25 /* expand byte value */
27 movzbl %sil,%esi 26 movzbl %sil,%esi
28 movabs $0x0101010101010101,%rax 27 movabs $0x0101010101010101,%rax
29 mulq %rsi /* with rax, clobbers rdx */ 28 imulq %rsi,%rax
30 rep stosq 29 rep stosq
31 movl %r8d,%ecx 30 movl %edx,%ecx
32 rep stosb 31 rep stosb
33 movq %r9,%rax 32 movq %r9,%rax
34 ret 33 ret
@@ -50,7 +49,7 @@
50.Lmemset_c_e: 49.Lmemset_c_e:
51 movq %rdi,%r9 50 movq %rdi,%r9
52 movb %sil,%al 51 movb %sil,%al
53 movl %edx,%ecx 52 movq %rdx,%rcx
54 rep stosb 53 rep stosb
55 movq %r9,%rax 54 movq %r9,%rax
56 ret 55 ret
@@ -61,12 +60,11 @@ ENTRY(memset)
61ENTRY(__memset) 60ENTRY(__memset)
62 CFI_STARTPROC 61 CFI_STARTPROC
63 movq %rdi,%r10 62 movq %rdi,%r10
64 movq %rdx,%r11
65 63
66 /* expand byte value */ 64 /* expand byte value */
67 movzbl %sil,%ecx 65 movzbl %sil,%ecx
68 movabs $0x0101010101010101,%rax 66 movabs $0x0101010101010101,%rax
69 mul %rcx /* with rax, clobbers rdx */ 67 imulq %rcx,%rax
70 68
71 /* align dst */ 69 /* align dst */
72 movl %edi,%r9d 70 movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
75 CFI_REMEMBER_STATE 73 CFI_REMEMBER_STATE
76.Lafter_bad_alignment: 74.Lafter_bad_alignment:
77 75
78 movl %r11d,%ecx 76 movq %rdx,%rcx
79 shrl $6,%ecx 77 shrq $6,%rcx
80 jz .Lhandle_tail 78 jz .Lhandle_tail
81 79
82 .p2align 4 80 .p2align 4
83.Lloop_64: 81.Lloop_64:
84 decl %ecx 82 decq %rcx
85 movq %rax,(%rdi) 83 movq %rax,(%rdi)
86 movq %rax,8(%rdi) 84 movq %rax,8(%rdi)
87 movq %rax,16(%rdi) 85 movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
97 to predict jump tables. */ 95 to predict jump tables. */
98 .p2align 4 96 .p2align 4
99.Lhandle_tail: 97.Lhandle_tail:
100 movl %r11d,%ecx 98 movl %edx,%ecx
101 andl $63&(~7),%ecx 99 andl $63&(~7),%ecx
102 jz .Lhandle_7 100 jz .Lhandle_7
103 shrl $3,%ecx 101 shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
109 jnz .Lloop_8 107 jnz .Lloop_8
110 108
111.Lhandle_7: 109.Lhandle_7:
112 movl %r11d,%ecx 110 andl $7,%edx
113 andl $7,%ecx
114 jz .Lende 111 jz .Lende
115 .p2align 4 112 .p2align 4
116.Lloop_1: 113.Lloop_1:
117 decl %ecx 114 decl %edx
118 movb %al,(%rdi) 115 movb %al,(%rdi)
119 leaq 1(%rdi),%rdi 116 leaq 1(%rdi),%rdi
120 jnz .Lloop_1 117 jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
125 122
126 CFI_RESTORE_STATE 123 CFI_RESTORE_STATE
127.Lbad_alignment: 124.Lbad_alignment:
128 cmpq $7,%r11 125 cmpq $7,%rdx
129 jbe .Lhandle_7 126 jbe .Lhandle_7
130 movq %rax,(%rdi) /* unaligned store */ 127 movq %rax,(%rdi) /* unaligned store */
131 movq $8,%r8 128 movq $8,%r8
132 subq %r9,%r8 129 subq %r9,%r8
133 addq %r8,%rdi 130 addq %r8,%rdi
134 subq %r8,%r11 131 subq %r8,%rdx
135 jmp .Lafter_bad_alignment 132 jmp .Lafter_bad_alignment
136.Lfinal: 133.Lfinal:
137 CFI_ENDPROC 134 CFI_ENDPROC