Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/x86_64/lib/copy_user.S
1 files changed, 294 insertions, 0 deletions
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
new file mode 100644
index 000000000000..bd556c804248
--- /dev/null
+++ b/arch/x86_64/lib/copy_user.S
@@ -0,0 +1,294 @@
+/* Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License v2.
+ * 
+ * Functions to copy from and to user space.            
+ */              
+#define FIX_ALIGNMENT 1
+                
+        #include <asm/current.h>
+        #include <asm/offset.h>
+        #include <asm/thread_info.h>
+        #include <asm/cpufeature.h>
+/* Standard copy_to_user with segment limit checking */         
+        .globl copy_to_user
+        .p2align 4      
+copy_to_user:
+        GET_THREAD_INFO(%rax)
+        movq %rdi,%rcx
+        addq %rdx,%rcx
+        jc  bad_to_user
+        cmpq threadinfo_addr_limit(%rax),%rcx
+        jae bad_to_user
+2:      
+        .byte 0xe9      /* 32bit jump */
+        .long .Lcug-1f
+1:
+        .section .altinstr_replacement,"ax"
+3:      .byte 0xe9                      /* replacement jmp with 8 bit immediate */
+        .long copy_user_generic_c-1b    /* offset */
+        .previous
+        .section .altinstructions,"a"
+        .align 8
+        .quad  2b
+        .quad  3b
+        .byte  X86_FEATURE_K8_C
+        .byte  5
+        .byte  5
+        .previous
+/* Standard copy_from_user with segment limit checking */       
+        .globl copy_from_user
+        .p2align 4      
+copy_from_user:
+        GET_THREAD_INFO(%rax)
+        movq %rsi,%rcx
+        addq %rdx,%rcx
+        jc  bad_from_user
+        cmpq threadinfo_addr_limit(%rax),%rcx
+        jae  bad_from_user
+        /* FALL THROUGH to copy_user_generic */
+        
+        .section .fixup,"ax"
+        /* must zero dest */
+bad_from_user:
+        movl %edx,%ecx
+        xorl %eax,%eax
+        rep
+        stosb
+bad_to_user:
+        movl    %edx,%eax
+        ret
+        .previous
+        
+                
+/*
+ * copy_user_generic - memory copy with exception handling.
+ *      
+ * Input:       
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:              
+ * eax uncopied bytes or 0 if successful.
+ */
+        .globl copy_user_generic        
+        .p2align 4
+copy_user_generic:      
+        .byte 0x66,0x66,0x90    /* 5 byte nop for replacement jump */   
+        .byte 0x66,0x90
+1:              
+        .section .altinstr_replacement,"ax"
+2:      .byte 0xe9                   /* near jump with 32bit immediate */
+        .long copy_user_generic_c-1b /* offset */
+        .previous
+        .section .altinstructions,"a"
+        .align 8
+        .quad  copy_user_generic
+        .quad  2b
+        .byte  X86_FEATURE_K8_C
+        .byte  5
+        .byte  5
+        .previous
+.Lcug:  
+        pushq %rbx
+        xorl %eax,%eax          /*zero for the exception handler */
+#ifdef FIX_ALIGNMENT
+        /* check for bad alignment of destination */
+        movl %edi,%ecx
+        andl $7,%ecx
+        jnz  .Lbad_alignment
+.Lafter_bad_alignment:
+#endif
+        movq %rdx,%rcx
+        movl $64,%ebx   
+        shrq $6,%rdx
+        decq %rdx
+        js   .Lhandle_tail
+        
+        .p2align 4
+.Lloop:
+.Ls1:   movq (%rsi),%r11
+.Ls2:   movq 1*8(%rsi),%r8
+.Ls3:   movq 2*8(%rsi),%r9
+.Ls4:   movq 3*8(%rsi),%r10
+.Ld1:   movq %r11,(%rdi)
+.Ld2:   movq %r8,1*8(%rdi)
+.Ld3:   movq %r9,2*8(%rdi)
+.Ld4:   movq %r10,3*8(%rdi)
+                
+.Ls5:   movq 4*8(%rsi),%r11
+.Ls6:   movq 5*8(%rsi),%r8
+.Ls7:   movq 6*8(%rsi),%r9
+.Ls8:   movq 7*8(%rsi),%r10
+.Ld5:   movq %r11,4*8(%rdi)
+.Ld6:   movq %r8,5*8(%rdi)
+.Ld7:   movq %r9,6*8(%rdi)
+.Ld8:   movq %r10,7*8(%rdi)
+        
+        decq %rdx
+        leaq 64(%rsi),%rsi
+        leaq 64(%rdi),%rdi
+        
+        jns  .Lloop
+        .p2align 4
+.Lhandle_tail:
+        movl %ecx,%edx
+        andl $63,%ecx
+        shrl $3,%ecx
+        jz   .Lhandle_7
+        movl $8,%ebx
+        .p2align 4
+.Lloop_8:
+.Ls9:   movq (%rsi),%r8
+.Ld9:   movq %r8,(%rdi)
+        decl %ecx
+        leaq 8(%rdi),%rdi
+        leaq 8(%rsi),%rsi
+        jnz .Lloop_8
+        
+.Lhandle_7:             
+        movl %edx,%ecx  
+        andl $7,%ecx
+        jz   .Lende
+        .p2align 4
+.Lloop_1:
+.Ls10:  movb (%rsi),%bl
+.Ld10:  movb %bl,(%rdi)
+        incq %rdi
+        incq %rsi
+        decl %ecx
+        jnz .Lloop_1
+                        
+.Lende:
+        popq %rbx
+        ret     
+#ifdef FIX_ALIGNMENT                            
+        /* align destination */
+        .p2align 4
+.Lbad_alignment:
+        movl $8,%r9d
+        subl %ecx,%r9d
+        movl %r9d,%ecx
+        cmpq %r9,%rdx
+        jz   .Lhandle_7
+        js   .Lhandle_7
+.Lalign_1:              
+.Ls11:  movb (%rsi),%bl
+.Ld11:  movb %bl,(%rdi)
+        incq %rsi
+        incq %rdi
+        decl %ecx
+        jnz .Lalign_1
+        subq %r9,%rdx
+        jmp .Lafter_bad_alignment
+#endif
+        
+        /* table sorted by exception address */ 
+        .section __ex_table,"a"
+        .align 8
+        .quad .Ls1,.Ls1e
+        .quad .Ls2,.Ls2e
+        .quad .Ls3,.Ls3e
+        .quad .Ls4,.Ls4e        
+        .quad .Ld1,.Ls1e
+        .quad .Ld2,.Ls2e
+        .quad .Ld3,.Ls3e
+        .quad .Ld4,.Ls4e
+        .quad .Ls5,.Ls5e
+        .quad .Ls6,.Ls6e
+        .quad .Ls7,.Ls7e
+        .quad .Ls8,.Ls8e        
+        .quad .Ld5,.Ls5e
+        .quad .Ld6,.Ls6e
+        .quad .Ld7,.Ls7e
+        .quad .Ld8,.Ls8e
+        .quad .Ls9,.Le_quad
+        .quad .Ld9,.Le_quad
+        .quad .Ls10,.Le_byte
+        .quad .Ld10,.Le_byte
+#ifdef FIX_ALIGNMENT    
+        .quad .Ls11,.Lzero_rest
+        .quad .Ld11,.Lzero_rest
+#endif
+        .quad .Le5,.Le_zero
+        .previous
+        /* compute 64-offset for main loop. 8 bytes accuracy with error on the 
+           pessimistic side. this is gross. it would be better to fix the 
+           interface. */        
+        /* eax: zero, ebx: 64 */
+.Ls1e:  addl $8,%eax
+.Ls2e:  addl $8,%eax
+.Ls3e:  addl $8,%eax
+.Ls4e:  addl $8,%eax
+.Ls5e:  addl $8,%eax
+.Ls6e:  addl $8,%eax
+.Ls7e:  addl $8,%eax
+.Ls8e:  addl $8,%eax
+        addq %rbx,%rdi  /* +64 */
+        subq %rax,%rdi  /* correct destination with computed offset */
+        shlq $6,%rdx    /* loop counter * 64 (stride length) */
+        addq %rax,%rdx  /* add offset to loopcnt */
+        andl $63,%ecx   /* remaining bytes */
+        addq %rcx,%rdx  /* add them */
+        jmp .Lzero_rest
+        /* exception on quad word loop in tail handling */
+        /* ecx: loopcnt/8, %edx: length, rdi: correct */
+.Le_quad:
+        shll $3,%ecx
+        andl $7,%edx
+        addl %ecx,%edx
+        /* edx: bytes to zero, rdi: dest, eax:zero */
+.Lzero_rest:
+        movq %rdx,%rcx
+.Le_byte:
+        xorl %eax,%eax
+.Le5:   rep 
+        stosb
+        /* when there is another exception while zeroing the rest just return */
+.Le_zero:               
+        movq %rdx,%rax
+        jmp .Lende
+        /* C stepping K8 run faster using the string copy instructions.
+           This is also a lot simpler. Use them when possible.
+           Patch in jmps to this code instead of copying it fully
+           to avoid unwanted aliasing in the exception tables. */
+                
+ /* rdi destination
+  * rsi source
+  * rdx count
+  *
+  * Output:             
+  * eax uncopied bytes or 0 if successfull.
+  */                    
+copy_user_generic_c:
+        movl %edx,%ecx
+        shrl $3,%ecx
+        andl $7,%edx    
+1:      rep 
+        movsq 
+        movl %edx,%ecx
+2:      rep
+        movsb
+4:      movl %ecx,%eax
+        ret
+3:      lea (%rdx,%rcx,8),%rax
+        ret
+        
+        .section __ex_table,"a"
+        .quad 1b,3b
+        .quad 2b,4b
+        .previous
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/x86_64/lib/copy_user.S

diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S new file mode 100644 index 000000000000..bd556c804248 --- /dev/null +++ b/arch/x86_64/lib/copy_user.S
@@ -0,0 +1,294 @@
	1	/* Copyright 2002 Andi Kleen, SuSE Labs.
	2	* Subject to the GNU Public License v2.
	3	*
	4	* Functions to copy from and to user space.
	5	*/
	6
	7	#define FIX_ALIGNMENT 1
	8
	9	#include <asm/current.h>
	10	#include <asm/offset.h>
	11	#include <asm/thread_info.h>
	12	#include <asm/cpufeature.h>
	13
	14	/* Standard copy_to_user with segment limit checking */
	15	.globl copy_to_user
	16	.p2align 4
	17	copy_to_user:
	18	GET_THREAD_INFO(%rax)
	19	movq %rdi,%rcx
	20	addq %rdx,%rcx
	21	jc bad_to_user
	22	cmpq threadinfo_addr_limit(%rax),%rcx
	23	jae bad_to_user
	24	2:
	25	.byte 0xe9 /* 32bit jump */
	26	.long .Lcug-1f
	27	1:
	28
	29	.section .altinstr_replacement,"ax"
	30	3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
	31	.long copy_user_generic_c-1b /* offset */
	32	.previous
	33	.section .altinstructions,"a"
	34	.align 8
	35	.quad 2b
	36	.quad 3b
	37	.byte X86_FEATURE_K8_C
	38	.byte 5
	39	.byte 5
	40	.previous
	41
	42	/* Standard copy_from_user with segment limit checking */
	43	.globl copy_from_user
	44	.p2align 4
	45	copy_from_user:
	46	GET_THREAD_INFO(%rax)
	47	movq %rsi,%rcx
	48	addq %rdx,%rcx
	49	jc bad_from_user
	50	cmpq threadinfo_addr_limit(%rax),%rcx
	51	jae bad_from_user
	52	/* FALL THROUGH to copy_user_generic */
	53
	54	.section .fixup,"ax"
	55	/* must zero dest */
	56	bad_from_user:
	57	movl %edx,%ecx
	58	xorl %eax,%eax
	59	rep
	60	stosb
	61	bad_to_user:
	62	movl %edx,%eax
	63	ret
	64	.previous
	65
	66
	67	/*
	68	* copy_user_generic - memory copy with exception handling.
	69	*
	70	* Input:
	71	* rdi destination
	72	* rsi source
	73	* rdx count
	74	*
	75	* Output:
	76	* eax uncopied bytes or 0 if successful.
	77	*/
	78	.globl copy_user_generic
	79	.p2align 4
	80	copy_user_generic:
	81	.byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
	82	.byte 0x66,0x90
	83	1:
	84	.section .altinstr_replacement,"ax"
	85	2: .byte 0xe9 /* near jump with 32bit immediate */
	86	.long copy_user_generic_c-1b /* offset */
	87	.previous
	88	.section .altinstructions,"a"
	89	.align 8
	90	.quad copy_user_generic
	91	.quad 2b
	92	.byte X86_FEATURE_K8_C
	93	.byte 5
	94	.byte 5
	95	.previous
	96	.Lcug:
	97	pushq %rbx
	98	xorl %eax,%eax /zero for the exception handler /
	99
	100	#ifdef FIX_ALIGNMENT
	101	/* check for bad alignment of destination */
	102	movl %edi,%ecx
	103	andl $7,%ecx
	104	jnz .Lbad_alignment
	105	.Lafter_bad_alignment:
	106	#endif
	107
	108	movq %rdx,%rcx
	109
	110	movl $64,%ebx
	111	shrq $6,%rdx
	112	decq %rdx
	113	js .Lhandle_tail
	114
	115	.p2align 4
	116	.Lloop:
	117	.Ls1: movq (%rsi),%r11
	118	.Ls2: movq 1*8(%rsi),%r8
	119	.Ls3: movq 2*8(%rsi),%r9
	120	.Ls4: movq 3*8(%rsi),%r10
	121	.Ld1: movq %r11,(%rdi)
	122	.Ld2: movq %r8,1*8(%rdi)
	123	.Ld3: movq %r9,2*8(%rdi)
	124	.Ld4: movq %r10,3*8(%rdi)
	125
	126	.Ls5: movq 4*8(%rsi),%r11
	127	.Ls6: movq 5*8(%rsi),%r8
	128	.Ls7: movq 6*8(%rsi),%r9
	129	.Ls8: movq 7*8(%rsi),%r10
	130	.Ld5: movq %r11,4*8(%rdi)
	131	.Ld6: movq %r8,5*8(%rdi)
	132	.Ld7: movq %r9,6*8(%rdi)
	133	.Ld8: movq %r10,7*8(%rdi)
	134
	135	decq %rdx
	136
	137	leaq 64(%rsi),%rsi
	138	leaq 64(%rdi),%rdi
	139
	140	jns .Lloop
	141
	142	.p2align 4
	143	.Lhandle_tail:
	144	movl %ecx,%edx
	145	andl $63,%ecx
	146	shrl $3,%ecx
	147	jz .Lhandle_7
	148	movl $8,%ebx
	149	.p2align 4
	150	.Lloop_8:
	151	.Ls9: movq (%rsi),%r8
	152	.Ld9: movq %r8,(%rdi)
	153	decl %ecx
	154	leaq 8(%rdi),%rdi
	155	leaq 8(%rsi),%rsi
	156	jnz .Lloop_8
	157
	158	.Lhandle_7:
	159	movl %edx,%ecx
	160	andl $7,%ecx
	161	jz .Lende
	162	.p2align 4
	163	.Lloop_1:
	164	.Ls10: movb (%rsi),%bl
	165	.Ld10: movb %bl,(%rdi)
	166	incq %rdi
	167	incq %rsi
	168	decl %ecx
	169	jnz .Lloop_1
	170
	171	.Lende:
	172	popq %rbx
	173	ret
	174
	175	#ifdef FIX_ALIGNMENT
	176	/* align destination */
	177	.p2align 4
	178	.Lbad_alignment:
	179	movl $8,%r9d
	180	subl %ecx,%r9d
	181	movl %r9d,%ecx
	182	cmpq %r9,%rdx
	183	jz .Lhandle_7
	184	js .Lhandle_7
	185	.Lalign_1:
	186	.Ls11: movb (%rsi),%bl
	187	.Ld11: movb %bl,(%rdi)
	188	incq %rsi
	189	incq %rdi
	190	decl %ecx
	191	jnz .Lalign_1
	192	subq %r9,%rdx
	193	jmp .Lafter_bad_alignment
	194	#endif
	195
	196	/* table sorted by exception address */
	197	.section __ex_table,"a"
	198	.align 8
	199	.quad .Ls1,.Ls1e
	200	.quad .Ls2,.Ls2e
	201	.quad .Ls3,.Ls3e
	202	.quad .Ls4,.Ls4e
	203	.quad .Ld1,.Ls1e
	204	.quad .Ld2,.Ls2e
	205	.quad .Ld3,.Ls3e
	206	.quad .Ld4,.Ls4e
	207	.quad .Ls5,.Ls5e
	208	.quad .Ls6,.Ls6e
	209	.quad .Ls7,.Ls7e
	210	.quad .Ls8,.Ls8e
	211	.quad .Ld5,.Ls5e
	212	.quad .Ld6,.Ls6e
	213	.quad .Ld7,.Ls7e
	214	.quad .Ld8,.Ls8e
	215	.quad .Ls9,.Le_quad
	216	.quad .Ld9,.Le_quad
	217	.quad .Ls10,.Le_byte
	218	.quad .Ld10,.Le_byte
	219	#ifdef FIX_ALIGNMENT
	220	.quad .Ls11,.Lzero_rest
	221	.quad .Ld11,.Lzero_rest
	222	#endif
	223	.quad .Le5,.Le_zero
	224	.previous
	225
	226	/* compute 64-offset for main loop. 8 bytes accuracy with error on the
	227	pessimistic side. this is gross. it would be better to fix the
	228	interface. */
	229	/* eax: zero, ebx: 64 */
	230	.Ls1e: addl $8,%eax
	231	.Ls2e: addl $8,%eax
	232	.Ls3e: addl $8,%eax
	233	.Ls4e: addl $8,%eax
	234	.Ls5e: addl $8,%eax
	235	.Ls6e: addl $8,%eax
	236	.Ls7e: addl $8,%eax
	237	.Ls8e: addl $8,%eax
	238	addq %rbx,%rdi /* +64 */
	239	subq %rax,%rdi /* correct destination with computed offset */
	240
	241	shlq $6,%rdx /* loop counter * 64 (stride length) */
	242	addq %rax,%rdx /* add offset to loopcnt */
	243	andl $63,%ecx /* remaining bytes */
	244	addq %rcx,%rdx /* add them */
	245	jmp .Lzero_rest
	246
	247	/* exception on quad word loop in tail handling */
	248	/* ecx: loopcnt/8, %edx: length, rdi: correct */
	249	.Le_quad:
	250	shll $3,%ecx
	251	andl $7,%edx
	252	addl %ecx,%edx
	253	/* edx: bytes to zero, rdi: dest, eax:zero */
	254	.Lzero_rest:
	255	movq %rdx,%rcx
	256	.Le_byte:
	257	xorl %eax,%eax
	258	.Le5: rep
	259	stosb
	260	/* when there is another exception while zeroing the rest just return */
	261	.Le_zero:
	262	movq %rdx,%rax
	263	jmp .Lende
	264
	265	/* C stepping K8 run faster using the string copy instructions.
	266	This is also a lot simpler. Use them when possible.
	267	Patch in jmps to this code instead of copying it fully
	268	to avoid unwanted aliasing in the exception tables. */
	269
	270	/* rdi destination
	271	* rsi source
	272	* rdx count
	273	*
	274	* Output:
	275	* eax uncopied bytes or 0 if successfull.
	276	*/
	277	copy_user_generic_c:
	278	movl %edx,%ecx
	279	shrl $3,%ecx
	280	andl $7,%edx
	281	1: rep
	282	movsq
	283	movl %edx,%ecx
	284	2: rep
	285	movsb
	286	4: movl %ecx,%eax
	287	ret
	288	3: lea (%rdx,%rcx,8),%rax
	289	ret
	290
	291	.section __ex_table,"a"
	292	.quad 1b,3b
	293	.quad 2b,4b
	294	.previous