diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 12:13:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 12:13:24 -0400 |
commit | e17fdf5c6778ff77d93dd769910992e4073b9348 (patch) | |
tree | d1a7ca2b1faf4301b39300fbd82f9b91e605a77e /arch | |
parent | 95211279c5ad00a317c98221d7e4365e02f20836 (diff) | |
parent | a240ada241dafe290e7532d1ddeb98fdf1419068 (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86: Include probe_roms.h in probe_roms.c
x86/32: Print control and debug registers for kerenel context
x86: Tighten dependencies of CPU_SUP_*_32
x86/numa: Improve internode cache alignment
x86: Fix the NMI nesting comments
x86-64: Improve insn scheduling in SAVE_ARGS_IRQ
x86-64: Fix CFI annotations for NMI nesting code
bitops: Add missing parentheses to new get_order macro
bitops: Optimise get_order()
bitops: Adjust the comment on get_order() to describe the size==0 case
x86/spinlocks: Eliminate TICKET_MASK
x86-64: Handle byte-wise tail copying in memcpy() without a loop
x86-64: Fix memcpy() to support sizes of 4Gb and above
x86-64: Fix memset() to support sizes of 4Gb and above
x86-64: Slightly shorten copy_page()
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig.cpu | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock_types.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 71 | ||||
-rw-r--r-- | arch/x86/kernel/probe_roms.c | 1 | ||||
-rw-r--r-- | arch/x86/lib/copy_page_64.S | 12 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 44 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 33 |
9 files changed, 83 insertions, 90 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 3c57033e2211..706e12e9984b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -303,7 +303,6 @@ config X86_GENERIC | |||
303 | config X86_INTERNODE_CACHE_SHIFT | 303 | config X86_INTERNODE_CACHE_SHIFT |
304 | int | 304 | int |
305 | default "12" if X86_VSMP | 305 | default "12" if X86_VSMP |
306 | default "7" if NUMA | ||
307 | default X86_L1_CACHE_SHIFT | 306 | default X86_L1_CACHE_SHIFT |
308 | 307 | ||
309 | config X86_CMPXCHG | 308 | config X86_CMPXCHG |
@@ -441,7 +440,7 @@ config CPU_SUP_INTEL | |||
441 | config CPU_SUP_CYRIX_32 | 440 | config CPU_SUP_CYRIX_32 |
442 | default y | 441 | default y |
443 | bool "Support Cyrix processors" if PROCESSOR_SELECT | 442 | bool "Support Cyrix processors" if PROCESSOR_SELECT |
444 | depends on !64BIT | 443 | depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) |
445 | ---help--- | 444 | ---help--- |
446 | This enables detection, tunings and quirks for Cyrix processors | 445 | This enables detection, tunings and quirks for Cyrix processors |
447 | 446 | ||
@@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32 | |||
495 | config CPU_SUP_UMC_32 | 494 | config CPU_SUP_UMC_32 |
496 | default y | 495 | default y |
497 | bool "Support UMC processors" if PROCESSOR_SELECT | 496 | bool "Support UMC processors" if PROCESSOR_SELECT |
498 | depends on !64BIT | 497 | depends on M386 || M486 || (EXPERT && !64BIT) |
499 | ---help--- | 498 | ---help--- |
500 | This enables detection, tunings and quirks for UMC processors | 499 | This enables detection, tunings and quirks for UMC processors |
501 | 500 | ||
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index a82c2bf504b6..76bfa2cf301d 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | |||
88 | { | 88 | { |
89 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | 89 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
90 | 90 | ||
91 | return !!(tmp.tail ^ tmp.head); | 91 | return tmp.tail != tmp.head; |
92 | } | 92 | } |
93 | 93 | ||
94 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) | 94 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) |
95 | { | 95 | { |
96 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | 96 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
97 | 97 | ||
98 | return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; | 98 | return (__ticket_t)(tmp.tail - tmp.head) > 1; |
99 | } | 99 | } |
100 | 100 | ||
101 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | 101 | #ifndef CONFIG_PARAVIRT_SPINLOCKS |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 8ebd5df7451e..ad0ad07fc006 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -16,7 +16,6 @@ typedef u32 __ticketpair_t; | |||
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) | 18 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) |
19 | #define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1)) | ||
20 | 19 | ||
21 | typedef struct arch_spinlock { | 20 | typedef struct arch_spinlock { |
22 | union { | 21 | union { |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index c99f9ed013d5..88ec9129271d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs) | |||
87 | int i; | 87 | int i; |
88 | 88 | ||
89 | print_modules(); | 89 | print_modules(); |
90 | __show_regs(regs, 0); | 90 | __show_regs(regs, !user_mode_vm(regs)); |
91 | 91 | ||
92 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", | 92 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", |
93 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | 93 | TASK_COMM_LEN, current->comm, task_pid_nr(current), |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1333d9851778..a63dabe153ca 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64) | |||
320 | movq %rsp, %rsi | 320 | movq %rsp, %rsi |
321 | 321 | ||
322 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | 322 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ |
323 | testl $3, CS(%rdi) | 323 | testl $3, CS-RBP(%rsi) |
324 | je 1f | 324 | je 1f |
325 | SWAPGS | 325 | SWAPGS |
326 | /* | 326 | /* |
@@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64) | |||
330 | * moving irq_enter into assembly, which would be too much work) | 330 | * moving irq_enter into assembly, which would be too much work) |
331 | */ | 331 | */ |
332 | 1: incl PER_CPU_VAR(irq_count) | 332 | 1: incl PER_CPU_VAR(irq_count) |
333 | jne 2f | 333 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
334 | mov PER_CPU_VAR(irq_stack_ptr),%rsp | ||
335 | CFI_DEF_CFA_REGISTER rsi | 334 | CFI_DEF_CFA_REGISTER rsi |
336 | 335 | ||
337 | 2: /* Store previous stack value */ | 336 | /* Store previous stack value */ |
338 | pushq %rsi | 337 | pushq %rsi |
339 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | 338 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ |
340 | 0x77 /* DW_OP_breg7 */, 0, \ | 339 | 0x77 /* DW_OP_breg7 */, 0, \ |
@@ -1530,6 +1529,7 @@ ENTRY(nmi) | |||
1530 | 1529 | ||
1531 | /* Use %rdx as out temp variable throughout */ | 1530 | /* Use %rdx as out temp variable throughout */ |
1532 | pushq_cfi %rdx | 1531 | pushq_cfi %rdx |
1532 | CFI_REL_OFFSET rdx, 0 | ||
1533 | 1533 | ||
1534 | /* | 1534 | /* |
1535 | * If %cs was not the kernel segment, then the NMI triggered in user | 1535 | * If %cs was not the kernel segment, then the NMI triggered in user |
@@ -1554,6 +1554,7 @@ ENTRY(nmi) | |||
1554 | */ | 1554 | */ |
1555 | lea 6*8(%rsp), %rdx | 1555 | lea 6*8(%rsp), %rdx |
1556 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | 1556 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi |
1557 | CFI_REMEMBER_STATE | ||
1557 | 1558 | ||
1558 | nested_nmi: | 1559 | nested_nmi: |
1559 | /* | 1560 | /* |
@@ -1585,10 +1586,12 @@ nested_nmi: | |||
1585 | 1586 | ||
1586 | nested_nmi_out: | 1587 | nested_nmi_out: |
1587 | popq_cfi %rdx | 1588 | popq_cfi %rdx |
1589 | CFI_RESTORE rdx | ||
1588 | 1590 | ||
1589 | /* No need to check faults here */ | 1591 | /* No need to check faults here */ |
1590 | INTERRUPT_RETURN | 1592 | INTERRUPT_RETURN |
1591 | 1593 | ||
1594 | CFI_RESTORE_STATE | ||
1592 | first_nmi: | 1595 | first_nmi: |
1593 | /* | 1596 | /* |
1594 | * Because nested NMIs will use the pushed location that we | 1597 | * Because nested NMIs will use the pushed location that we |
@@ -1620,10 +1623,15 @@ first_nmi: | |||
1620 | * | pt_regs | | 1623 | * | pt_regs | |
1621 | * +-------------------------+ | 1624 | * +-------------------------+ |
1622 | * | 1625 | * |
1623 | * The saved RIP is used to fix up the copied RIP that a nested | 1626 | * The saved stack frame is used to fix up the copied stack frame |
1624 | * NMI may zero out. The original stack frame and the temp storage | 1627 | * that a nested NMI may change to make the interrupted NMI iret jump |
1628 | * to the repeat_nmi. The original stack frame and the temp storage | ||
1625 | * is also used by nested NMIs and can not be trusted on exit. | 1629 | * is also used by nested NMIs and can not be trusted on exit. |
1626 | */ | 1630 | */ |
1631 | /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ | ||
1632 | movq (%rsp), %rdx | ||
1633 | CFI_RESTORE rdx | ||
1634 | |||
1627 | /* Set the NMI executing variable on the stack. */ | 1635 | /* Set the NMI executing variable on the stack. */ |
1628 | pushq_cfi $1 | 1636 | pushq_cfi $1 |
1629 | 1637 | ||
@@ -1631,22 +1639,39 @@ first_nmi: | |||
1631 | .rept 5 | 1639 | .rept 5 |
1632 | pushq_cfi 6*8(%rsp) | 1640 | pushq_cfi 6*8(%rsp) |
1633 | .endr | 1641 | .endr |
1642 | CFI_DEF_CFA_OFFSET SS+8-RIP | ||
1643 | |||
1644 | /* Everything up to here is safe from nested NMIs */ | ||
1645 | |||
1646 | /* | ||
1647 | * If there was a nested NMI, the first NMI's iret will return | ||
1648 | * here. But NMIs are still enabled and we can take another | ||
1649 | * nested NMI. The nested NMI checks the interrupted RIP to see | ||
1650 | * if it is between repeat_nmi and end_repeat_nmi, and if so | ||
1651 | * it will just return, as we are about to repeat an NMI anyway. | ||
1652 | * This makes it safe to copy to the stack frame that a nested | ||
1653 | * NMI will update. | ||
1654 | */ | ||
1655 | repeat_nmi: | ||
1656 | /* | ||
1657 | * Update the stack variable to say we are still in NMI (the update | ||
1658 | * is benign for the non-repeat case, where 1 was pushed just above | ||
1659 | * to this very stack slot). | ||
1660 | */ | ||
1661 | movq $1, 5*8(%rsp) | ||
1634 | 1662 | ||
1635 | /* Make another copy, this one may be modified by nested NMIs */ | 1663 | /* Make another copy, this one may be modified by nested NMIs */ |
1636 | .rept 5 | 1664 | .rept 5 |
1637 | pushq_cfi 4*8(%rsp) | 1665 | pushq_cfi 4*8(%rsp) |
1638 | .endr | 1666 | .endr |
1639 | 1667 | CFI_DEF_CFA_OFFSET SS+8-RIP | |
1640 | /* Do not pop rdx, nested NMIs will corrupt it */ | 1668 | end_repeat_nmi: |
1641 | movq 11*8(%rsp), %rdx | ||
1642 | 1669 | ||
1643 | /* | 1670 | /* |
1644 | * Everything below this point can be preempted by a nested | 1671 | * Everything below this point can be preempted by a nested |
1645 | * NMI if the first NMI took an exception. Repeated NMIs | 1672 | * NMI if the first NMI took an exception and reset our iret stack |
1646 | * caused by an exception and nested NMI will start here, and | 1673 | * so that we repeat another NMI. |
1647 | * can still be preempted by another NMI. | ||
1648 | */ | 1674 | */ |
1649 | restart_nmi: | ||
1650 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1675 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1651 | subq $ORIG_RAX-R15, %rsp | 1676 | subq $ORIG_RAX-R15, %rsp |
1652 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1677 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
@@ -1675,26 +1700,6 @@ nmi_restore: | |||
1675 | CFI_ENDPROC | 1700 | CFI_ENDPROC |
1676 | END(nmi) | 1701 | END(nmi) |
1677 | 1702 | ||
1678 | /* | ||
1679 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1680 | * it can lose its NMI context, and a nested NMI may come in. | ||
1681 | * In that case, the nested NMI will change the preempted NMI's | ||
1682 | * stack to jump to here when it does the final iret. | ||
1683 | */ | ||
1684 | repeat_nmi: | ||
1685 | INTR_FRAME | ||
1686 | /* Update the stack variable to say we are still in NMI */ | ||
1687 | movq $1, 5*8(%rsp) | ||
1688 | |||
1689 | /* copy the saved stack back to copy stack */ | ||
1690 | .rept 5 | ||
1691 | pushq_cfi 4*8(%rsp) | ||
1692 | .endr | ||
1693 | |||
1694 | jmp restart_nmi | ||
1695 | CFI_ENDPROC | ||
1696 | end_repeat_nmi: | ||
1697 | |||
1698 | ENTRY(ignore_sysret) | 1703 | ENTRY(ignore_sysret) |
1699 | CFI_STARTPROC | 1704 | CFI_STARTPROC |
1700 | mov $-ENOSYS,%eax | 1705 | mov $-ENOSYS,%eax |
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 34e06e84ce31..0bc72e2069e3 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/export.h> | 13 | #include <linux/export.h> |
14 | 14 | ||
15 | #include <asm/probe_roms.h> | ||
15 | #include <asm/pci-direct.h> | 16 | #include <asm/pci-direct.h> |
16 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
17 | #include <asm/mmzone.h> | 18 | #include <asm/mmzone.h> |
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 01c805ba5359..6b34d04d096a 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c) | |||
20 | 20 | ||
21 | ENTRY(copy_page) | 21 | ENTRY(copy_page) |
22 | CFI_STARTPROC | 22 | CFI_STARTPROC |
23 | subq $3*8,%rsp | 23 | subq $2*8,%rsp |
24 | CFI_ADJUST_CFA_OFFSET 3*8 | 24 | CFI_ADJUST_CFA_OFFSET 2*8 |
25 | movq %rbx,(%rsp) | 25 | movq %rbx,(%rsp) |
26 | CFI_REL_OFFSET rbx, 0 | 26 | CFI_REL_OFFSET rbx, 0 |
27 | movq %r12,1*8(%rsp) | 27 | movq %r12,1*8(%rsp) |
28 | CFI_REL_OFFSET r12, 1*8 | 28 | CFI_REL_OFFSET r12, 1*8 |
29 | movq %r13,2*8(%rsp) | ||
30 | CFI_REL_OFFSET r13, 2*8 | ||
31 | 29 | ||
32 | movl $(4096/64)-5,%ecx | 30 | movl $(4096/64)-5,%ecx |
33 | .p2align 4 | 31 | .p2align 4 |
@@ -91,10 +89,8 @@ ENTRY(copy_page) | |||
91 | CFI_RESTORE rbx | 89 | CFI_RESTORE rbx |
92 | movq 1*8(%rsp),%r12 | 90 | movq 1*8(%rsp),%r12 |
93 | CFI_RESTORE r12 | 91 | CFI_RESTORE r12 |
94 | movq 2*8(%rsp),%r13 | 92 | addq $2*8,%rsp |
95 | CFI_RESTORE r13 | 93 | CFI_ADJUST_CFA_OFFSET -2*8 |
96 | addq $3*8,%rsp | ||
97 | CFI_ADJUST_CFA_OFFSET -3*8 | ||
98 | ret | 94 | ret |
99 | .Lcopy_page_end: | 95 | .Lcopy_page_end: |
100 | CFI_ENDPROC | 96 | CFI_ENDPROC |
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index efbf2a0ecdea..1c273be7c97e 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -27,9 +27,8 @@ | |||
27 | .section .altinstr_replacement, "ax", @progbits | 27 | .section .altinstr_replacement, "ax", @progbits |
28 | .Lmemcpy_c: | 28 | .Lmemcpy_c: |
29 | movq %rdi, %rax | 29 | movq %rdi, %rax |
30 | 30 | movq %rdx, %rcx | |
31 | movl %edx, %ecx | 31 | shrq $3, %rcx |
32 | shrl $3, %ecx | ||
33 | andl $7, %edx | 32 | andl $7, %edx |
34 | rep movsq | 33 | rep movsq |
35 | movl %edx, %ecx | 34 | movl %edx, %ecx |
@@ -48,8 +47,7 @@ | |||
48 | .section .altinstr_replacement, "ax", @progbits | 47 | .section .altinstr_replacement, "ax", @progbits |
49 | .Lmemcpy_c_e: | 48 | .Lmemcpy_c_e: |
50 | movq %rdi, %rax | 49 | movq %rdi, %rax |
51 | 50 | movq %rdx, %rcx | |
52 | movl %edx, %ecx | ||
53 | rep movsb | 51 | rep movsb |
54 | ret | 52 | ret |
55 | .Lmemcpy_e_e: | 53 | .Lmemcpy_e_e: |
@@ -60,10 +58,7 @@ ENTRY(memcpy) | |||
60 | CFI_STARTPROC | 58 | CFI_STARTPROC |
61 | movq %rdi, %rax | 59 | movq %rdi, %rax |
62 | 60 | ||
63 | /* | 61 | cmpq $0x20, %rdx |
64 | * Use 32bit CMP here to avoid long NOP padding. | ||
65 | */ | ||
66 | cmp $0x20, %edx | ||
67 | jb .Lhandle_tail | 62 | jb .Lhandle_tail |
68 | 63 | ||
69 | /* | 64 | /* |
@@ -72,7 +67,7 @@ ENTRY(memcpy) | |||
72 | */ | 67 | */ |
73 | cmp %dil, %sil | 68 | cmp %dil, %sil |
74 | jl .Lcopy_backward | 69 | jl .Lcopy_backward |
75 | subl $0x20, %edx | 70 | subq $0x20, %rdx |
76 | .Lcopy_forward_loop: | 71 | .Lcopy_forward_loop: |
77 | subq $0x20, %rdx | 72 | subq $0x20, %rdx |
78 | 73 | ||
@@ -91,7 +86,7 @@ ENTRY(memcpy) | |||
91 | movq %r11, 3*8(%rdi) | 86 | movq %r11, 3*8(%rdi) |
92 | leaq 4*8(%rdi), %rdi | 87 | leaq 4*8(%rdi), %rdi |
93 | jae .Lcopy_forward_loop | 88 | jae .Lcopy_forward_loop |
94 | addq $0x20, %rdx | 89 | addl $0x20, %edx |
95 | jmp .Lhandle_tail | 90 | jmp .Lhandle_tail |
96 | 91 | ||
97 | .Lcopy_backward: | 92 | .Lcopy_backward: |
@@ -123,11 +118,11 @@ ENTRY(memcpy) | |||
123 | /* | 118 | /* |
124 | * Calculate copy position to head. | 119 | * Calculate copy position to head. |
125 | */ | 120 | */ |
126 | addq $0x20, %rdx | 121 | addl $0x20, %edx |
127 | subq %rdx, %rsi | 122 | subq %rdx, %rsi |
128 | subq %rdx, %rdi | 123 | subq %rdx, %rdi |
129 | .Lhandle_tail: | 124 | .Lhandle_tail: |
130 | cmpq $16, %rdx | 125 | cmpl $16, %edx |
131 | jb .Lless_16bytes | 126 | jb .Lless_16bytes |
132 | 127 | ||
133 | /* | 128 | /* |
@@ -144,7 +139,7 @@ ENTRY(memcpy) | |||
144 | retq | 139 | retq |
145 | .p2align 4 | 140 | .p2align 4 |
146 | .Lless_16bytes: | 141 | .Lless_16bytes: |
147 | cmpq $8, %rdx | 142 | cmpl $8, %edx |
148 | jb .Lless_8bytes | 143 | jb .Lless_8bytes |
149 | /* | 144 | /* |
150 | * Move data from 8 bytes to 15 bytes. | 145 | * Move data from 8 bytes to 15 bytes. |
@@ -156,7 +151,7 @@ ENTRY(memcpy) | |||
156 | retq | 151 | retq |
157 | .p2align 4 | 152 | .p2align 4 |
158 | .Lless_8bytes: | 153 | .Lless_8bytes: |
159 | cmpq $4, %rdx | 154 | cmpl $4, %edx |
160 | jb .Lless_3bytes | 155 | jb .Lless_3bytes |
161 | 156 | ||
162 | /* | 157 | /* |
@@ -169,18 +164,19 @@ ENTRY(memcpy) | |||
169 | retq | 164 | retq |
170 | .p2align 4 | 165 | .p2align 4 |
171 | .Lless_3bytes: | 166 | .Lless_3bytes: |
172 | cmpl $0, %edx | 167 | subl $1, %edx |
173 | je .Lend | 168 | jb .Lend |
174 | /* | 169 | /* |
175 | * Move data from 1 bytes to 3 bytes. | 170 | * Move data from 1 bytes to 3 bytes. |
176 | */ | 171 | */ |
177 | .Lloop_1: | 172 | movzbl (%rsi), %ecx |
178 | movb (%rsi), %r8b | 173 | jz .Lstore_1byte |
179 | movb %r8b, (%rdi) | 174 | movzbq 1(%rsi), %r8 |
180 | incq %rdi | 175 | movzbq (%rsi, %rdx), %r9 |
181 | incq %rsi | 176 | movb %r8b, 1(%rdi) |
182 | decl %edx | 177 | movb %r9b, (%rdi, %rdx) |
183 | jnz .Lloop_1 | 178 | .Lstore_1byte: |
179 | movb %cl, (%rdi) | ||
184 | 180 | ||
185 | .Lend: | 181 | .Lend: |
186 | retq | 182 | retq |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 79bd454b78a3..2dcb3808cbda 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -19,16 +19,15 @@ | |||
19 | .section .altinstr_replacement, "ax", @progbits | 19 | .section .altinstr_replacement, "ax", @progbits |
20 | .Lmemset_c: | 20 | .Lmemset_c: |
21 | movq %rdi,%r9 | 21 | movq %rdi,%r9 |
22 | movl %edx,%r8d | 22 | movq %rdx,%rcx |
23 | andl $7,%r8d | 23 | andl $7,%edx |
24 | movl %edx,%ecx | 24 | shrq $3,%rcx |
25 | shrl $3,%ecx | ||
26 | /* expand byte value */ | 25 | /* expand byte value */ |
27 | movzbl %sil,%esi | 26 | movzbl %sil,%esi |
28 | movabs $0x0101010101010101,%rax | 27 | movabs $0x0101010101010101,%rax |
29 | mulq %rsi /* with rax, clobbers rdx */ | 28 | imulq %rsi,%rax |
30 | rep stosq | 29 | rep stosq |
31 | movl %r8d,%ecx | 30 | movl %edx,%ecx |
32 | rep stosb | 31 | rep stosb |
33 | movq %r9,%rax | 32 | movq %r9,%rax |
34 | ret | 33 | ret |
@@ -50,7 +49,7 @@ | |||
50 | .Lmemset_c_e: | 49 | .Lmemset_c_e: |
51 | movq %rdi,%r9 | 50 | movq %rdi,%r9 |
52 | movb %sil,%al | 51 | movb %sil,%al |
53 | movl %edx,%ecx | 52 | movq %rdx,%rcx |
54 | rep stosb | 53 | rep stosb |
55 | movq %r9,%rax | 54 | movq %r9,%rax |
56 | ret | 55 | ret |
@@ -61,12 +60,11 @@ ENTRY(memset) | |||
61 | ENTRY(__memset) | 60 | ENTRY(__memset) |
62 | CFI_STARTPROC | 61 | CFI_STARTPROC |
63 | movq %rdi,%r10 | 62 | movq %rdi,%r10 |
64 | movq %rdx,%r11 | ||
65 | 63 | ||
66 | /* expand byte value */ | 64 | /* expand byte value */ |
67 | movzbl %sil,%ecx | 65 | movzbl %sil,%ecx |
68 | movabs $0x0101010101010101,%rax | 66 | movabs $0x0101010101010101,%rax |
69 | mul %rcx /* with rax, clobbers rdx */ | 67 | imulq %rcx,%rax |
70 | 68 | ||
71 | /* align dst */ | 69 | /* align dst */ |
72 | movl %edi,%r9d | 70 | movl %edi,%r9d |
@@ -75,13 +73,13 @@ ENTRY(__memset) | |||
75 | CFI_REMEMBER_STATE | 73 | CFI_REMEMBER_STATE |
76 | .Lafter_bad_alignment: | 74 | .Lafter_bad_alignment: |
77 | 75 | ||
78 | movl %r11d,%ecx | 76 | movq %rdx,%rcx |
79 | shrl $6,%ecx | 77 | shrq $6,%rcx |
80 | jz .Lhandle_tail | 78 | jz .Lhandle_tail |
81 | 79 | ||
82 | .p2align 4 | 80 | .p2align 4 |
83 | .Lloop_64: | 81 | .Lloop_64: |
84 | decl %ecx | 82 | decq %rcx |
85 | movq %rax,(%rdi) | 83 | movq %rax,(%rdi) |
86 | movq %rax,8(%rdi) | 84 | movq %rax,8(%rdi) |
87 | movq %rax,16(%rdi) | 85 | movq %rax,16(%rdi) |
@@ -97,7 +95,7 @@ ENTRY(__memset) | |||
97 | to predict jump tables. */ | 95 | to predict jump tables. */ |
98 | .p2align 4 | 96 | .p2align 4 |
99 | .Lhandle_tail: | 97 | .Lhandle_tail: |
100 | movl %r11d,%ecx | 98 | movl %edx,%ecx |
101 | andl $63&(~7),%ecx | 99 | andl $63&(~7),%ecx |
102 | jz .Lhandle_7 | 100 | jz .Lhandle_7 |
103 | shrl $3,%ecx | 101 | shrl $3,%ecx |
@@ -109,12 +107,11 @@ ENTRY(__memset) | |||
109 | jnz .Lloop_8 | 107 | jnz .Lloop_8 |
110 | 108 | ||
111 | .Lhandle_7: | 109 | .Lhandle_7: |
112 | movl %r11d,%ecx | 110 | andl $7,%edx |
113 | andl $7,%ecx | ||
114 | jz .Lende | 111 | jz .Lende |
115 | .p2align 4 | 112 | .p2align 4 |
116 | .Lloop_1: | 113 | .Lloop_1: |
117 | decl %ecx | 114 | decl %edx |
118 | movb %al,(%rdi) | 115 | movb %al,(%rdi) |
119 | leaq 1(%rdi),%rdi | 116 | leaq 1(%rdi),%rdi |
120 | jnz .Lloop_1 | 117 | jnz .Lloop_1 |
@@ -125,13 +122,13 @@ ENTRY(__memset) | |||
125 | 122 | ||
126 | CFI_RESTORE_STATE | 123 | CFI_RESTORE_STATE |
127 | .Lbad_alignment: | 124 | .Lbad_alignment: |
128 | cmpq $7,%r11 | 125 | cmpq $7,%rdx |
129 | jbe .Lhandle_7 | 126 | jbe .Lhandle_7 |
130 | movq %rax,(%rdi) /* unaligned store */ | 127 | movq %rax,(%rdi) /* unaligned store */ |
131 | movq $8,%r8 | 128 | movq $8,%r8 |
132 | subq %r9,%r8 | 129 | subq %r9,%r8 |
133 | addq %r8,%rdi | 130 | addq %r8,%rdi |
134 | subq %r8,%r11 | 131 | subq %r8,%rdx |
135 | jmp .Lafter_bad_alignment | 132 | jmp .Lafter_bad_alignment |
136 | .Lfinal: | 133 | .Lfinal: |
137 | CFI_ENDPROC | 134 | CFI_ENDPROC |