From 2a6c7b6bbfefbe37dbbc31a209dbf0166ee51a07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:15:55 +0200 Subject: x86_64: prepare shared lib/memset.S Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86_64/lib/memset.S | 133 ----------------------------------------------- 1 file changed, 133 deletions(-) delete mode 100644 arch/x86_64/lib/memset.S (limited to 'arch/x86_64/lib/memset.S') diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S deleted file mode 100644 index 2c5948116bd2..000000000000 --- a/arch/x86_64/lib/memset.S +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright 2002 Andi Kleen, SuSE Labs */ - -#include -#include - -/* - * ISO C memset - set a memory block to a byte value. - * - * rdi destination - * rsi value (char) - * rdx count (bytes) - * - * rax original destination - */ - ALIGN -memset_c: - CFI_STARTPROC - movq %rdi,%r9 - movl %edx,%r8d - andl $7,%r8d - movl %edx,%ecx - shrl $3,%ecx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - mulq %rsi /* with rax, clobbers rdx */ - rep stosq - movl %r8d,%ecx - rep stosb - movq %r9,%rax - ret - CFI_ENDPROC -ENDPROC(memset_c) - -ENTRY(memset) -ENTRY(__memset) - CFI_STARTPROC - movq %rdi,%r10 - movq %rdx,%r11 - - /* expand byte value */ - movzbl %sil,%ecx - movabs $0x0101010101010101,%rax - mul %rcx /* with rax, clobbers rdx */ - - /* align dst */ - movl %edi,%r9d - andl $7,%r9d - jnz .Lbad_alignment - CFI_REMEMBER_STATE -.Lafter_bad_alignment: - - movl %r11d,%ecx - shrl $6,%ecx - jz .Lhandle_tail - - .p2align 4 -.Lloop_64: - decl %ecx - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) - movq %rax,32(%rdi) - movq %rax,40(%rdi) - movq %rax,48(%rdi) - movq %rax,56(%rdi) - leaq 64(%rdi),%rdi - jnz .Lloop_64 - - /* Handle tail in loops. The loops should be faster than hard - to predict jump tables. */ - .p2align 4 -.Lhandle_tail: - movl %r11d,%ecx - andl $63&(~7),%ecx - jz .Lhandle_7 - shrl $3,%ecx - .p2align 4 -.Lloop_8: - decl %ecx - movq %rax,(%rdi) - leaq 8(%rdi),%rdi - jnz .Lloop_8 - -.Lhandle_7: - movl %r11d,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: - decl %ecx - movb %al,(%rdi) - leaq 1(%rdi),%rdi - jnz .Lloop_1 - -.Lende: - movq %r10,%rax - ret - - CFI_RESTORE_STATE -.Lbad_alignment: - cmpq $7,%r11 - jbe .Lhandle_7 - movq %rax,(%rdi) /* unaligned store */ - movq $8,%r8 - subq %r9,%r8 - addq %r8,%rdi - subq %r8,%r11 - jmp .Lafter_bad_alignment -.Lfinal: - CFI_ENDPROC -ENDPROC(memset) -ENDPROC(__memset) - - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include - - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp */ - .byte (memset_c - memset) - (2f - 1b) /* offset */ -2: - .previous - .section .altinstructions,"a" - .align 8 - .quad memset - .quad 1b - .byte X86_FEATURE_REP_GOOD - .byte .Lfinal - memset - .byte 2b - 1b - .previous -- cgit v1.2.2