diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/x86/lib/copy_page_64.S | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'arch/x86/lib/copy_page_64.S')
-rw-r--r-- | arch/x86/lib/copy_page_64.S | 128 |
1 files changed, 67 insertions, 61 deletions
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 176cca67212..01c805ba535 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -5,90 +5,96 @@ | |||
5 | #include <asm/alternative-asm.h> | 5 | #include <asm/alternative-asm.h> |
6 | 6 | ||
7 | ALIGN | 7 | ALIGN |
8 | copy_page_rep: | 8 | copy_page_c: |
9 | CFI_STARTPROC | 9 | CFI_STARTPROC |
10 | movl $4096/8, %ecx | 10 | movl $4096/8,%ecx |
11 | rep movsq | 11 | rep movsq |
12 | ret | 12 | ret |
13 | CFI_ENDPROC | 13 | CFI_ENDPROC |
14 | ENDPROC(copy_page_rep) | 14 | ENDPROC(copy_page_c) |
15 | 15 | ||
16 | /* | 16 | /* Don't use streaming store because it's better when the target |
17 | * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. | 17 | ends up in cache. */ |
18 | * Could vary the prefetch distance based on SMP/UP. | 18 | |
19 | */ | 19 | /* Could vary the prefetch distance based on SMP/UP */ |
20 | 20 | ||
21 | ENTRY(copy_page) | 21 | ENTRY(copy_page) |
22 | CFI_STARTPROC | 22 | CFI_STARTPROC |
23 | subq $2*8, %rsp | 23 | subq $3*8,%rsp |
24 | CFI_ADJUST_CFA_OFFSET 2*8 | 24 | CFI_ADJUST_CFA_OFFSET 3*8 |
25 | movq %rbx, (%rsp) | 25 | movq %rbx,(%rsp) |
26 | CFI_REL_OFFSET rbx, 0 | 26 | CFI_REL_OFFSET rbx, 0 |
27 | movq %r12, 1*8(%rsp) | 27 | movq %r12,1*8(%rsp) |
28 | CFI_REL_OFFSET r12, 1*8 | 28 | CFI_REL_OFFSET r12, 1*8 |
29 | movq %r13,2*8(%rsp) | ||
30 | CFI_REL_OFFSET r13, 2*8 | ||
29 | 31 | ||
30 | movl $(4096/64)-5, %ecx | 32 | movl $(4096/64)-5,%ecx |
31 | .p2align 4 | 33 | .p2align 4 |
32 | .Loop64: | 34 | .Loop64: |
33 | dec %rcx | 35 | dec %rcx |
34 | movq 0x8*0(%rsi), %rax | 36 | |
35 | movq 0x8*1(%rsi), %rbx | 37 | movq (%rsi), %rax |
36 | movq 0x8*2(%rsi), %rdx | 38 | movq 8 (%rsi), %rbx |
37 | movq 0x8*3(%rsi), %r8 | 39 | movq 16 (%rsi), %rdx |
38 | movq 0x8*4(%rsi), %r9 | 40 | movq 24 (%rsi), %r8 |
39 | movq 0x8*5(%rsi), %r10 | 41 | movq 32 (%rsi), %r9 |
40 | movq 0x8*6(%rsi), %r11 | 42 | movq 40 (%rsi), %r10 |
41 | movq 0x8*7(%rsi), %r12 | 43 | movq 48 (%rsi), %r11 |
44 | movq 56 (%rsi), %r12 | ||
42 | 45 | ||
43 | prefetcht0 5*64(%rsi) | 46 | prefetcht0 5*64(%rsi) |
44 | 47 | ||
45 | movq %rax, 0x8*0(%rdi) | 48 | movq %rax, (%rdi) |
46 | movq %rbx, 0x8*1(%rdi) | 49 | movq %rbx, 8 (%rdi) |
47 | movq %rdx, 0x8*2(%rdi) | 50 | movq %rdx, 16 (%rdi) |
48 | movq %r8, 0x8*3(%rdi) | 51 | movq %r8, 24 (%rdi) |
49 | movq %r9, 0x8*4(%rdi) | 52 | movq %r9, 32 (%rdi) |
50 | movq %r10, 0x8*5(%rdi) | 53 | movq %r10, 40 (%rdi) |
51 | movq %r11, 0x8*6(%rdi) | 54 | movq %r11, 48 (%rdi) |
52 | movq %r12, 0x8*7(%rdi) | 55 | movq %r12, 56 (%rdi) |
53 | 56 | ||
54 | leaq 64 (%rsi), %rsi | 57 | leaq 64 (%rsi), %rsi |
55 | leaq 64 (%rdi), %rdi | 58 | leaq 64 (%rdi), %rdi |
56 | 59 | ||
57 | jnz .Loop64 | 60 | jnz .Loop64 |
58 | 61 | ||
59 | movl $5, %ecx | 62 | movl $5,%ecx |
60 | .p2align 4 | 63 | .p2align 4 |
61 | .Loop2: | 64 | .Loop2: |
62 | decl %ecx | 65 | decl %ecx |
63 | 66 | ||
64 | movq 0x8*0(%rsi), %rax | 67 | movq (%rsi), %rax |
65 | movq 0x8*1(%rsi), %rbx | 68 | movq 8 (%rsi), %rbx |
66 | movq 0x8*2(%rsi), %rdx | 69 | movq 16 (%rsi), %rdx |
67 | movq 0x8*3(%rsi), %r8 | 70 | movq 24 (%rsi), %r8 |
68 | movq 0x8*4(%rsi), %r9 | 71 | movq 32 (%rsi), %r9 |
69 | movq 0x8*5(%rsi), %r10 | 72 | movq 40 (%rsi), %r10 |
70 | movq 0x8*6(%rsi), %r11 | 73 | movq 48 (%rsi), %r11 |
71 | movq 0x8*7(%rsi), %r12 | 74 | movq 56 (%rsi), %r12 |
72 | 75 | ||
73 | movq %rax, 0x8*0(%rdi) | 76 | movq %rax, (%rdi) |
74 | movq %rbx, 0x8*1(%rdi) | 77 | movq %rbx, 8 (%rdi) |
75 | movq %rdx, 0x8*2(%rdi) | 78 | movq %rdx, 16 (%rdi) |
76 | movq %r8, 0x8*3(%rdi) | 79 | movq %r8, 24 (%rdi) |
77 | movq %r9, 0x8*4(%rdi) | 80 | movq %r9, 32 (%rdi) |
78 | movq %r10, 0x8*5(%rdi) | 81 | movq %r10, 40 (%rdi) |
79 | movq %r11, 0x8*6(%rdi) | 82 | movq %r11, 48 (%rdi) |
80 | movq %r12, 0x8*7(%rdi) | 83 | movq %r12, 56 (%rdi) |
81 | 84 | ||
82 | leaq 64(%rdi), %rdi | 85 | leaq 64(%rdi),%rdi |
83 | leaq 64(%rsi), %rsi | 86 | leaq 64(%rsi),%rsi |
87 | |||
84 | jnz .Loop2 | 88 | jnz .Loop2 |
85 | 89 | ||
86 | movq (%rsp), %rbx | 90 | movq (%rsp),%rbx |
87 | CFI_RESTORE rbx | 91 | CFI_RESTORE rbx |
88 | movq 1*8(%rsp), %r12 | 92 | movq 1*8(%rsp),%r12 |
89 | CFI_RESTORE r12 | 93 | CFI_RESTORE r12 |
90 | addq $2*8, %rsp | 94 | movq 2*8(%rsp),%r13 |
91 | CFI_ADJUST_CFA_OFFSET -2*8 | 95 | CFI_RESTORE r13 |
96 | addq $3*8,%rsp | ||
97 | CFI_ADJUST_CFA_OFFSET -3*8 | ||
92 | ret | 98 | ret |
93 | .Lcopy_page_end: | 99 | .Lcopy_page_end: |
94 | CFI_ENDPROC | 100 | CFI_ENDPROC |
@@ -101,7 +107,7 @@ ENDPROC(copy_page) | |||
101 | 107 | ||
102 | .section .altinstr_replacement,"ax" | 108 | .section .altinstr_replacement,"ax" |
103 | 1: .byte 0xeb /* jmp <disp8> */ | 109 | 1: .byte 0xeb /* jmp <disp8> */ |
104 | .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ | 110 | .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ |
105 | 2: | 111 | 2: |
106 | .previous | 112 | .previous |
107 | .section .altinstructions,"a" | 113 | .section .altinstructions,"a" |