diff options
| author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-02 13:35:28 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-02 13:35:28 -0400 |
| commit | 114d5b1ca265f8a582dcbf0030da20ccdddbe8e1 (patch) | |
| tree | 7c3ada09fe64eadf184a0b59e83b176fd2a72b65 /arch | |
| parent | 2b3b29080d702e5488f214276170ab46adc40ee5 (diff) | |
| parent | 25e5566ed38650f7990041fcd20571d6ddd2a040 (diff) | |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6:
[SPARC64]: Fix missing load-twin usage in Niagara-1 memcpy.
[SPARC64]: Fix put_user() calls in binfmt_aout32.c
[SPARC]: Fix EBUS use of uninitialized variable.
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/sparc/kernel/ebus.c | 2 | ||||
| -rw-r--r-- | arch/sparc64/kernel/binfmt_aout32.c | 4 | ||||
| -rw-r--r-- | arch/sparc64/kernel/ebus.c | 5 | ||||
| -rw-r--r-- | arch/sparc64/lib/NGcopy_from_user.S | 8 | ||||
| -rw-r--r-- | arch/sparc64/lib/NGcopy_to_user.S | 8 | ||||
| -rw-r--r-- | arch/sparc64/lib/NGmemcpy.S | 371 |
6 files changed, 229 insertions, 169 deletions
diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c index e2d02fd13f35..d850785b2080 100644 --- a/arch/sparc/kernel/ebus.c +++ b/arch/sparc/kernel/ebus.c | |||
| @@ -156,6 +156,8 @@ void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *d | |||
| 156 | dev->prom_node = dp; | 156 | dev->prom_node = dp; |
| 157 | 157 | ||
| 158 | regs = of_get_property(dp, "reg", &len); | 158 | regs = of_get_property(dp, "reg", &len); |
| 159 | if (!regs) | ||
| 160 | len = 0; | ||
| 159 | if (len % sizeof(struct linux_prom_registers)) { | 161 | if (len % sizeof(struct linux_prom_registers)) { |
| 160 | prom_printf("UGH: proplen for %s was %d, need multiple of %d\n", | 162 | prom_printf("UGH: proplen for %s was %d, need multiple of %d\n", |
| 161 | dev->prom_node->name, len, | 163 | dev->prom_node->name, len, |
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index f205fc7cbcd0..d208cc7804f2 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c | |||
| @@ -177,7 +177,7 @@ static u32 __user *create_aout32_tables(char __user *p, struct linux_binprm *bpr | |||
| 177 | get_user(c,p++); | 177 | get_user(c,p++); |
| 178 | } while (c); | 178 | } while (c); |
| 179 | } | 179 | } |
| 180 | put_user(NULL,argv); | 180 | put_user(0,argv); |
| 181 | current->mm->arg_end = current->mm->env_start = (unsigned long) p; | 181 | current->mm->arg_end = current->mm->env_start = (unsigned long) p; |
| 182 | while (envc-->0) { | 182 | while (envc-->0) { |
| 183 | char c; | 183 | char c; |
| @@ -186,7 +186,7 @@ static u32 __user *create_aout32_tables(char __user *p, struct linux_binprm *bpr | |||
| 186 | get_user(c,p++); | 186 | get_user(c,p++); |
| 187 | } while (c); | 187 | } while (c); |
| 188 | } | 188 | } |
| 189 | put_user(NULL,envp); | 189 | put_user(0,envp); |
| 190 | current->mm->env_end = (unsigned long) p; | 190 | current->mm->env_end = (unsigned long) p; |
| 191 | return sp; | 191 | return sp; |
| 192 | } | 192 | } |
diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index bc9ae36f7a43..04ab81cb4f48 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c | |||
| @@ -375,7 +375,10 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de | |||
| 375 | dev->num_addrs = 0; | 375 | dev->num_addrs = 0; |
| 376 | dev->num_irqs = 0; | 376 | dev->num_irqs = 0; |
| 377 | } else { | 377 | } else { |
| 378 | (void) of_get_property(dp, "reg", &len); | 378 | const int *regs = of_get_property(dp, "reg", &len); |
| 379 | |||
| 380 | if (!regs) | ||
| 381 | len = 0; | ||
| 379 | dev->num_addrs = len / sizeof(struct linux_prom_registers); | 382 | dev->num_addrs = len / sizeof(struct linux_prom_registers); |
| 380 | 383 | ||
| 381 | for (i = 0; i < dev->num_addrs; i++) | 384 | for (i = 0; i < dev->num_addrs; i++) |
diff --git a/arch/sparc64/lib/NGcopy_from_user.S b/arch/sparc64/lib/NGcopy_from_user.S index 2d93456f76dd..e7f433f71b42 100644 --- a/arch/sparc64/lib/NGcopy_from_user.S +++ b/arch/sparc64/lib/NGcopy_from_user.S | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* NGcopy_from_user.S: Niagara optimized copy from userspace. | 1 | /* NGcopy_from_user.S: Niagara optimized copy from userspace. |
| 2 | * | 2 | * |
| 3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | 3 | * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #define EX_LD(x) \ | 6 | #define EX_LD(x) \ |
| @@ -8,8 +8,8 @@ | |||
| 8 | .section .fixup; \ | 8 | .section .fixup; \ |
| 9 | .align 4; \ | 9 | .align 4; \ |
| 10 | 99: wr %g0, ASI_AIUS, %asi;\ | 10 | 99: wr %g0, ASI_AIUS, %asi;\ |
| 11 | retl; \ | 11 | ret; \ |
| 12 | mov 1, %o0; \ | 12 | restore %g0, 1, %o0; \ |
| 13 | .section __ex_table,"a";\ | 13 | .section __ex_table,"a";\ |
| 14 | .align 4; \ | 14 | .align 4; \ |
| 15 | .word 98b, 99b; \ | 15 | .word 98b, 99b; \ |
| @@ -24,7 +24,7 @@ | |||
| 24 | #define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest | 24 | #define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest |
| 25 | #define LOAD_TWIN(addr_reg,dest0,dest1) \ | 25 | #define LOAD_TWIN(addr_reg,dest0,dest1) \ |
| 26 | ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 | 26 | ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 |
| 27 | #define EX_RETVAL(x) 0 | 27 | #define EX_RETVAL(x) %g0 |
| 28 | 28 | ||
| 29 | #ifdef __KERNEL__ | 29 | #ifdef __KERNEL__ |
| 30 | #define PREAMBLE \ | 30 | #define PREAMBLE \ |
diff --git a/arch/sparc64/lib/NGcopy_to_user.S b/arch/sparc64/lib/NGcopy_to_user.S index 34112d5054ef..6ea01c5532a0 100644 --- a/arch/sparc64/lib/NGcopy_to_user.S +++ b/arch/sparc64/lib/NGcopy_to_user.S | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* NGcopy_to_user.S: Niagara optimized copy to userspace. | 1 | /* NGcopy_to_user.S: Niagara optimized copy to userspace. |
| 2 | * | 2 | * |
| 3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | 3 | * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #define EX_ST(x) \ | 6 | #define EX_ST(x) \ |
| @@ -8,8 +8,8 @@ | |||
| 8 | .section .fixup; \ | 8 | .section .fixup; \ |
| 9 | .align 4; \ | 9 | .align 4; \ |
| 10 | 99: wr %g0, ASI_AIUS, %asi;\ | 10 | 99: wr %g0, ASI_AIUS, %asi;\ |
| 11 | retl; \ | 11 | ret; \ |
| 12 | mov 1, %o0; \ | 12 | restore %g0, 1, %o0; \ |
| 13 | .section __ex_table,"a";\ | 13 | .section __ex_table,"a";\ |
| 14 | .align 4; \ | 14 | .align 4; \ |
| 15 | .word 98b, 99b; \ | 15 | .word 98b, 99b; \ |
| @@ -23,7 +23,7 @@ | |||
| 23 | #define FUNC_NAME NGcopy_to_user | 23 | #define FUNC_NAME NGcopy_to_user |
| 24 | #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS | 24 | #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS |
| 25 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS | 25 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS |
| 26 | #define EX_RETVAL(x) 0 | 26 | #define EX_RETVAL(x) %g0 |
| 27 | 27 | ||
| 28 | #ifdef __KERNEL__ | 28 | #ifdef __KERNEL__ |
| 29 | /* Writing to %asi is _expensive_ so we hardcode it. | 29 | /* Writing to %asi is _expensive_ so we hardcode it. |
diff --git a/arch/sparc64/lib/NGmemcpy.S b/arch/sparc64/lib/NGmemcpy.S index 66063a9a66b8..605cb3f09900 100644 --- a/arch/sparc64/lib/NGmemcpy.S +++ b/arch/sparc64/lib/NGmemcpy.S | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* NGmemcpy.S: Niagara optimized memcpy. | 1 | /* NGmemcpy.S: Niagara optimized memcpy. |
| 2 | * | 2 | * |
| 3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | 3 | * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
| @@ -16,6 +16,12 @@ | |||
| 16 | wr %g0, ASI_PNF, %asi | 16 | wr %g0, ASI_PNF, %asi |
| 17 | #endif | 17 | #endif |
| 18 | 18 | ||
| 19 | #ifdef __sparc_v9__ | ||
| 20 | #define SAVE_AMOUNT 128 | ||
| 21 | #else | ||
| 22 | #define SAVE_AMOUNT 64 | ||
| 23 | #endif | ||
| 24 | |||
| 19 | #ifndef STORE_ASI | 25 | #ifndef STORE_ASI |
| 20 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | 26 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P |
| 21 | #endif | 27 | #endif |
| @@ -50,7 +56,11 @@ | |||
| 50 | #endif | 56 | #endif |
| 51 | 57 | ||
| 52 | #ifndef STORE_INIT | 58 | #ifndef STORE_INIT |
| 59 | #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA | ||
| 53 | #define STORE_INIT(src,addr) stxa src, [addr] %asi | 60 | #define STORE_INIT(src,addr) stxa src, [addr] %asi |
| 61 | #else | ||
| 62 | #define STORE_INIT(src,addr) stx src, [addr + 0x00] | ||
| 63 | #endif | ||
| 54 | #endif | 64 | #endif |
| 55 | 65 | ||
| 56 | #ifndef FUNC_NAME | 66 | #ifndef FUNC_NAME |
| @@ -73,18 +83,19 @@ | |||
| 73 | 83 | ||
| 74 | .globl FUNC_NAME | 84 | .globl FUNC_NAME |
| 75 | .type FUNC_NAME,#function | 85 | .type FUNC_NAME,#function |
| 76 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | 86 | FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ |
| 77 | srlx %o2, 31, %g2 | 87 | PREAMBLE |
| 88 | save %sp, -SAVE_AMOUNT, %sp | ||
| 89 | srlx %i2, 31, %g2 | ||
| 78 | cmp %g2, 0 | 90 | cmp %g2, 0 |
| 79 | tne %xcc, 5 | 91 | tne %xcc, 5 |
| 80 | PREAMBLE | 92 | mov %i0, %o0 |
| 81 | mov %o0, GLOBAL_SPARE | 93 | cmp %i2, 0 |
| 82 | cmp %o2, 0 | ||
| 83 | be,pn %XCC, 85f | 94 | be,pn %XCC, 85f |
| 84 | or %o0, %o1, %o3 | 95 | or %o0, %i1, %i3 |
| 85 | cmp %o2, 16 | 96 | cmp %i2, 16 |
| 86 | blu,a,pn %XCC, 80f | 97 | blu,a,pn %XCC, 80f |
| 87 | or %o3, %o2, %o3 | 98 | or %i3, %i2, %i3 |
| 88 | 99 | ||
| 89 | /* 2 blocks (128 bytes) is the minimum we can do the block | 100 | /* 2 blocks (128 bytes) is the minimum we can do the block |
| 90 | * copy with. We need to ensure that we'll iterate at least | 101 | * copy with. We need to ensure that we'll iterate at least |
| @@ -93,31 +104,31 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 93 | * to (64 - 1) bytes from the length before we perform the | 104 | * to (64 - 1) bytes from the length before we perform the |
| 94 | * block copy loop. | 105 | * block copy loop. |
| 95 | */ | 106 | */ |
| 96 | cmp %o2, (2 * 64) | 107 | cmp %i2, (2 * 64) |
| 97 | blu,pt %XCC, 70f | 108 | blu,pt %XCC, 70f |
| 98 | andcc %o3, 0x7, %g0 | 109 | andcc %i3, 0x7, %g0 |
| 99 | 110 | ||
| 100 | /* %o0: dst | 111 | /* %o0: dst |
| 101 | * %o1: src | 112 | * %i1: src |
| 102 | * %o2: len (known to be >= 128) | 113 | * %i2: len (known to be >= 128) |
| 103 | * | 114 | * |
| 104 | * The block copy loops will use %o4/%o5,%g2/%g3 as | 115 | * The block copy loops will use %i4/%i5,%g2/%g3 as |
| 105 | * temporaries while copying the data. | 116 | * temporaries while copying the data. |
| 106 | */ | 117 | */ |
| 107 | 118 | ||
| 108 | LOAD(prefetch, %o1, #one_read) | 119 | LOAD(prefetch, %i1, #one_read) |
| 109 | wr %g0, STORE_ASI, %asi | 120 | wr %g0, STORE_ASI, %asi |
| 110 | 121 | ||
| 111 | /* Align destination on 64-byte boundary. */ | 122 | /* Align destination on 64-byte boundary. */ |
| 112 | andcc %o0, (64 - 1), %o4 | 123 | andcc %o0, (64 - 1), %i4 |
| 113 | be,pt %XCC, 2f | 124 | be,pt %XCC, 2f |
| 114 | sub %o4, 64, %o4 | 125 | sub %i4, 64, %i4 |
| 115 | sub %g0, %o4, %o4 ! bytes to align dst | 126 | sub %g0, %i4, %i4 ! bytes to align dst |
| 116 | sub %o2, %o4, %o2 | 127 | sub %i2, %i4, %i2 |
| 117 | 1: subcc %o4, 1, %o4 | 128 | 1: subcc %i4, 1, %i4 |
| 118 | EX_LD(LOAD(ldub, %o1, %g1)) | 129 | EX_LD(LOAD(ldub, %i1, %g1)) |
| 119 | EX_ST(STORE(stb, %g1, %o0)) | 130 | EX_ST(STORE(stb, %g1, %o0)) |
| 120 | add %o1, 1, %o1 | 131 | add %i1, 1, %i1 |
| 121 | bne,pt %XCC, 1b | 132 | bne,pt %XCC, 1b |
| 122 | add %o0, 1, %o0 | 133 | add %o0, 1, %o0 |
| 123 | 134 | ||
| @@ -136,111 +147,155 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 136 | * aligned store data at a time, this is easy to ensure. | 147 | * aligned store data at a time, this is easy to ensure. |
| 137 | */ | 148 | */ |
| 138 | 2: | 149 | 2: |
| 139 | andcc %o1, (16 - 1), %o4 | 150 | andcc %i1, (16 - 1), %i4 |
| 140 | andn %o2, (64 - 1), %g1 ! block copy loop iterator | 151 | andn %i2, (64 - 1), %g1 ! block copy loop iterator |
| 141 | sub %o2, %g1, %o2 ! final sub-block copy bytes | ||
| 142 | be,pt %XCC, 50f | 152 | be,pt %XCC, 50f |
| 143 | cmp %o4, 8 | 153 | sub %i2, %g1, %i2 ! final sub-block copy bytes |
| 144 | be,a,pt %XCC, 10f | 154 | |
| 145 | sub %o1, 0x8, %o1 | 155 | cmp %i4, 8 |
| 156 | be,pt %XCC, 10f | ||
| 157 | sub %i1, %i4, %i1 | ||
| 146 | 158 | ||
| 147 | /* Neither 8-byte nor 16-byte aligned, shift and mask. */ | 159 | /* Neither 8-byte nor 16-byte aligned, shift and mask. */ |
| 148 | mov %g1, %o4 | 160 | and %i4, 0x7, GLOBAL_SPARE |
| 149 | and %o1, 0x7, %g1 | 161 | sll GLOBAL_SPARE, 3, GLOBAL_SPARE |
| 150 | sll %g1, 3, %g1 | 162 | mov 64, %i5 |
| 151 | mov 64, %o3 | 163 | EX_LD(LOAD_TWIN(%i1, %g2, %g3)) |
| 152 | andn %o1, 0x7, %o1 | 164 | sub %i5, GLOBAL_SPARE, %i5 |
| 153 | EX_LD(LOAD(ldx, %o1, %g2)) | 165 | mov 16, %o4 |
| 154 | sub %o3, %g1, %o3 | 166 | mov 32, %o5 |
| 155 | sllx %g2, %g1, %g2 | 167 | mov 48, %o7 |
| 168 | mov 64, %i3 | ||
| 169 | |||
| 170 | bg,pn %XCC, 9f | ||
| 171 | nop | ||
| 156 | 172 | ||
| 157 | #define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ | 173 | #define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \ |
| 158 | EX_LD(LOAD(ldx, SRC, TMP1)); \ | 174 | sllx WORD1, POST_SHIFT, WORD1; \ |
| 159 | srlx TMP1, PRE_SHIFT, TMP2; \ | 175 | srlx WORD2, PRE_SHIFT, TMP; \ |
| 160 | or TMP2, PRE_VAL, TMP2; \ | 176 | sllx WORD2, POST_SHIFT, WORD2; \ |
| 161 | EX_ST(STORE_INIT(TMP2, DST)); \ | 177 | or WORD1, TMP, WORD1; \ |
| 162 | sllx TMP1, POST_SHIFT, PRE_VAL; | 178 | srlx WORD3, PRE_SHIFT, TMP; \ |
| 163 | 179 | or WORD2, TMP, WORD2; | |
| 164 | 1: add %o1, 0x8, %o1 | 180 | |
| 165 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) | 181 | 8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) |
| 166 | add %o1, 0x8, %o1 | 182 | MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) |
| 167 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) | 183 | LOAD(prefetch, %i1 + %i3, #one_read) |
| 168 | add %o1, 0x8, %o1 | 184 | |
| 169 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) | 185 | EX_ST(STORE_INIT(%g2, %o0 + 0x00)) |
| 170 | add %o1, 0x8, %o1 | 186 | EX_ST(STORE_INIT(%g3, %o0 + 0x08)) |
| 171 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) | 187 | |
| 172 | add %o1, 32, %o1 | 188 | EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) |
| 173 | LOAD(prefetch, %o1, #one_read) | 189 | MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) |
| 174 | sub %o1, 32 - 8, %o1 | 190 | |
| 175 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) | 191 | EX_ST(STORE_INIT(%o2, %o0 + 0x10)) |
| 176 | add %o1, 8, %o1 | 192 | EX_ST(STORE_INIT(%o3, %o0 + 0x18)) |
| 177 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) | 193 | |
| 178 | add %o1, 8, %o1 | 194 | EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) |
| 179 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) | 195 | MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) |
| 180 | add %o1, 8, %o1 | 196 | |
| 181 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) | 197 | EX_ST(STORE_INIT(%g2, %o0 + 0x20)) |
| 182 | subcc %o4, 64, %o4 | 198 | EX_ST(STORE_INIT(%g3, %o0 + 0x28)) |
| 183 | bne,pt %XCC, 1b | 199 | |
| 200 | EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) | ||
| 201 | add %i1, 64, %i1 | ||
| 202 | MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) | ||
| 203 | |||
| 204 | EX_ST(STORE_INIT(%o2, %o0 + 0x30)) | ||
| 205 | EX_ST(STORE_INIT(%o3, %o0 + 0x38)) | ||
| 206 | |||
| 207 | subcc %g1, 64, %g1 | ||
| 208 | bne,pt %XCC, 8b | ||
| 184 | add %o0, 64, %o0 | 209 | add %o0, 64, %o0 |
| 185 | 210 | ||
| 186 | #undef SWIVEL_ONE_DWORD | 211 | ba,pt %XCC, 60f |
| 212 | add %i1, %i4, %i1 | ||
| 213 | |||
| 214 | 9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) | ||
| 215 | MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) | ||
| 216 | LOAD(prefetch, %i1 + %i3, #one_read) | ||
| 217 | |||
| 218 | EX_ST(STORE_INIT(%g3, %o0 + 0x00)) | ||
| 219 | EX_ST(STORE_INIT(%o2, %o0 + 0x08)) | ||
| 220 | |||
| 221 | EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) | ||
| 222 | MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) | ||
| 223 | |||
| 224 | EX_ST(STORE_INIT(%o3, %o0 + 0x10)) | ||
| 225 | EX_ST(STORE_INIT(%g2, %o0 + 0x18)) | ||
| 226 | |||
| 227 | EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) | ||
| 228 | MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) | ||
| 229 | |||
| 230 | EX_ST(STORE_INIT(%g3, %o0 + 0x20)) | ||
| 231 | EX_ST(STORE_INIT(%o2, %o0 + 0x28)) | ||
| 232 | |||
| 233 | EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) | ||
| 234 | add %i1, 64, %i1 | ||
| 235 | MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) | ||
| 236 | |||
| 237 | EX_ST(STORE_INIT(%o3, %o0 + 0x30)) | ||
| 238 | EX_ST(STORE_INIT(%g2, %o0 + 0x38)) | ||
| 239 | |||
| 240 | subcc %g1, 64, %g1 | ||
| 241 | bne,pt %XCC, 9b | ||
| 242 | add %o0, 64, %o0 | ||
| 187 | 243 | ||
| 188 | srl %g1, 3, %g1 | ||
| 189 | ba,pt %XCC, 60f | 244 | ba,pt %XCC, 60f |
| 190 | add %o1, %g1, %o1 | 245 | add %i1, %i4, %i1 |
| 191 | 246 | ||
| 192 | 10: /* Destination is 64-byte aligned, source was only 8-byte | 247 | 10: /* Destination is 64-byte aligned, source was only 8-byte |
| 193 | * aligned but it has been subtracted by 8 and we perform | 248 | * aligned but it has been subtracted by 8 and we perform |
| 194 | * one twin load ahead, then add 8 back into source when | 249 | * one twin load ahead, then add 8 back into source when |
| 195 | * we finish the loop. | 250 | * we finish the loop. |
| 196 | */ | 251 | */ |
| 197 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | 252 | EX_LD(LOAD_TWIN(%i1, %o4, %o5)) |
| 198 | 1: add %o1, 16, %o1 | 253 | mov 16, %o7 |
| 199 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | 254 | mov 32, %g2 |
| 200 | add %o1, 16 + 32, %o1 | 255 | mov 48, %g3 |
| 201 | LOAD(prefetch, %o1, #one_read) | 256 | mov 64, %o1 |
| 202 | sub %o1, 32, %o1 | 257 | 1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) |
| 258 | LOAD(prefetch, %i1 + %o1, #one_read) | ||
| 203 | EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line | 259 | EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line |
| 204 | EX_ST(STORE_INIT(%g2, %o0 + 0x08)) | 260 | EX_ST(STORE_INIT(%o2, %o0 + 0x08)) |
| 205 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | 261 | EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) |
| 206 | add %o1, 16, %o1 | 262 | EX_ST(STORE_INIT(%o3, %o0 + 0x10)) |
| 207 | EX_ST(STORE_INIT(%g3, %o0 + 0x10)) | ||
| 208 | EX_ST(STORE_INIT(%o4, %o0 + 0x18)) | 263 | EX_ST(STORE_INIT(%o4, %o0 + 0x18)) |
| 209 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | 264 | EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) |
| 210 | add %o1, 16, %o1 | ||
| 211 | EX_ST(STORE_INIT(%o5, %o0 + 0x20)) | 265 | EX_ST(STORE_INIT(%o5, %o0 + 0x20)) |
| 212 | EX_ST(STORE_INIT(%g2, %o0 + 0x28)) | 266 | EX_ST(STORE_INIT(%o2, %o0 + 0x28)) |
| 213 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | 267 | EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) |
| 214 | EX_ST(STORE_INIT(%g3, %o0 + 0x30)) | 268 | add %i1, 64, %i1 |
| 269 | EX_ST(STORE_INIT(%o3, %o0 + 0x30)) | ||
| 215 | EX_ST(STORE_INIT(%o4, %o0 + 0x38)) | 270 | EX_ST(STORE_INIT(%o4, %o0 + 0x38)) |
| 216 | subcc %g1, 64, %g1 | 271 | subcc %g1, 64, %g1 |
| 217 | bne,pt %XCC, 1b | 272 | bne,pt %XCC, 1b |
| 218 | add %o0, 64, %o0 | 273 | add %o0, 64, %o0 |
| 219 | 274 | ||
| 220 | ba,pt %XCC, 60f | 275 | ba,pt %XCC, 60f |
| 221 | add %o1, 0x8, %o1 | 276 | add %i1, 0x8, %i1 |
| 222 | 277 | ||
| 223 | 50: /* Destination is 64-byte aligned, and source is 16-byte | 278 | 50: /* Destination is 64-byte aligned, and source is 16-byte |
| 224 | * aligned. | 279 | * aligned. |
| 225 | */ | 280 | */ |
| 226 | 1: EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | 281 | mov 16, %o7 |
| 227 | add %o1, 16, %o1 | 282 | mov 32, %g2 |
| 228 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | 283 | mov 48, %g3 |
| 229 | add %o1, 16 + 32, %o1 | 284 | mov 64, %o1 |
| 230 | LOAD(prefetch, %o1, #one_read) | 285 | 1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) |
| 231 | sub %o1, 32, %o1 | 286 | EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) |
| 287 | LOAD(prefetch, %i1 + %o1, #one_read) | ||
| 232 | EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line | 288 | EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line |
| 233 | EX_ST(STORE_INIT(%o5, %o0 + 0x08)) | 289 | EX_ST(STORE_INIT(%o5, %o0 + 0x08)) |
| 234 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | 290 | EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) |
| 235 | add %o1, 16, %o1 | 291 | EX_ST(STORE_INIT(%o2, %o0 + 0x10)) |
| 236 | EX_ST(STORE_INIT(%g2, %o0 + 0x10)) | 292 | EX_ST(STORE_INIT(%o3, %o0 + 0x18)) |
| 237 | EX_ST(STORE_INIT(%g3, %o0 + 0x18)) | 293 | EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) |
| 238 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | 294 | add %i1, 64, %i1 |
| 239 | add %o1, 16, %o1 | ||
| 240 | EX_ST(STORE_INIT(%o4, %o0 + 0x20)) | 295 | EX_ST(STORE_INIT(%o4, %o0 + 0x20)) |
| 241 | EX_ST(STORE_INIT(%o5, %o0 + 0x28)) | 296 | EX_ST(STORE_INIT(%o5, %o0 + 0x28)) |
| 242 | EX_ST(STORE_INIT(%g2, %o0 + 0x30)) | 297 | EX_ST(STORE_INIT(%o2, %o0 + 0x30)) |
| 243 | EX_ST(STORE_INIT(%g3, %o0 + 0x38)) | 298 | EX_ST(STORE_INIT(%o3, %o0 + 0x38)) |
| 244 | subcc %g1, 64, %g1 | 299 | subcc %g1, 64, %g1 |
| 245 | bne,pt %XCC, 1b | 300 | bne,pt %XCC, 1b |
| 246 | add %o0, 64, %o0 | 301 | add %o0, 64, %o0 |
| @@ -249,47 +304,47 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 249 | 60: | 304 | 60: |
| 250 | membar #Sync | 305 | membar #Sync |
| 251 | 306 | ||
| 252 | /* %o2 contains any final bytes still needed to be copied | 307 | /* %i2 contains any final bytes still needed to be copied |
| 253 | * over. If anything is left, we copy it one byte at a time. | 308 | * over. If anything is left, we copy it one byte at a time. |
| 254 | */ | 309 | */ |
| 255 | RESTORE_ASI(%o3) | 310 | RESTORE_ASI(%i3) |
| 256 | brz,pt %o2, 85f | 311 | brz,pt %i2, 85f |
| 257 | sub %o0, %o1, %o3 | 312 | sub %o0, %i1, %i3 |
| 258 | ba,a,pt %XCC, 90f | 313 | ba,a,pt %XCC, 90f |
| 259 | 314 | ||
| 260 | .align 64 | 315 | .align 64 |
| 261 | 70: /* 16 < len <= 64 */ | 316 | 70: /* 16 < len <= 64 */ |
| 262 | bne,pn %XCC, 75f | 317 | bne,pn %XCC, 75f |
| 263 | sub %o0, %o1, %o3 | 318 | sub %o0, %i1, %i3 |
| 264 | 319 | ||
| 265 | 72: | 320 | 72: |
| 266 | andn %o2, 0xf, %o4 | 321 | andn %i2, 0xf, %i4 |
| 267 | and %o2, 0xf, %o2 | 322 | and %i2, 0xf, %i2 |
| 268 | 1: subcc %o4, 0x10, %o4 | 323 | 1: subcc %i4, 0x10, %i4 |
| 269 | EX_LD(LOAD(ldx, %o1, %o5)) | 324 | EX_LD(LOAD(ldx, %i1, %i5)) |
| 270 | add %o1, 0x08, %o1 | 325 | add %i1, 0x08, %i1 |
| 271 | EX_LD(LOAD(ldx, %o1, %g1)) | 326 | EX_LD(LOAD(ldx, %i1, %g1)) |
| 272 | sub %o1, 0x08, %o1 | 327 | sub %i1, 0x08, %i1 |
| 273 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 328 | EX_ST(STORE(stx, %i5, %i1 + %i3)) |
| 274 | add %o1, 0x8, %o1 | 329 | add %i1, 0x8, %i1 |
| 275 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | 330 | EX_ST(STORE(stx, %g1, %i1 + %i3)) |
| 276 | bgu,pt %XCC, 1b | 331 | bgu,pt %XCC, 1b |
| 277 | add %o1, 0x8, %o1 | 332 | add %i1, 0x8, %i1 |
| 278 | 73: andcc %o2, 0x8, %g0 | 333 | 73: andcc %i2, 0x8, %g0 |
| 279 | be,pt %XCC, 1f | 334 | be,pt %XCC, 1f |
| 280 | nop | 335 | nop |
| 281 | sub %o2, 0x8, %o2 | 336 | sub %i2, 0x8, %i2 |
| 282 | EX_LD(LOAD(ldx, %o1, %o5)) | 337 | EX_LD(LOAD(ldx, %i1, %i5)) |
| 283 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 338 | EX_ST(STORE(stx, %i5, %i1 + %i3)) |
| 284 | add %o1, 0x8, %o1 | 339 | add %i1, 0x8, %i1 |
| 285 | 1: andcc %o2, 0x4, %g0 | 340 | 1: andcc %i2, 0x4, %g0 |
| 286 | be,pt %XCC, 1f | 341 | be,pt %XCC, 1f |
| 287 | nop | 342 | nop |
| 288 | sub %o2, 0x4, %o2 | 343 | sub %i2, 0x4, %i2 |
| 289 | EX_LD(LOAD(lduw, %o1, %o5)) | 344 | EX_LD(LOAD(lduw, %i1, %i5)) |
| 290 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | 345 | EX_ST(STORE(stw, %i5, %i1 + %i3)) |
| 291 | add %o1, 0x4, %o1 | 346 | add %i1, 0x4, %i1 |
| 292 | 1: cmp %o2, 0 | 347 | 1: cmp %i2, 0 |
| 293 | be,pt %XCC, 85f | 348 | be,pt %XCC, 85f |
| 294 | nop | 349 | nop |
| 295 | ba,pt %xcc, 90f | 350 | ba,pt %xcc, 90f |
| @@ -300,71 +355,71 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 300 | sub %g1, 0x8, %g1 | 355 | sub %g1, 0x8, %g1 |
| 301 | be,pn %icc, 2f | 356 | be,pn %icc, 2f |
| 302 | sub %g0, %g1, %g1 | 357 | sub %g0, %g1, %g1 |
| 303 | sub %o2, %g1, %o2 | 358 | sub %i2, %g1, %i2 |
| 304 | 359 | ||
| 305 | 1: subcc %g1, 1, %g1 | 360 | 1: subcc %g1, 1, %g1 |
| 306 | EX_LD(LOAD(ldub, %o1, %o5)) | 361 | EX_LD(LOAD(ldub, %i1, %i5)) |
| 307 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | 362 | EX_ST(STORE(stb, %i5, %i1 + %i3)) |
| 308 | bgu,pt %icc, 1b | 363 | bgu,pt %icc, 1b |
| 309 | add %o1, 1, %o1 | 364 | add %i1, 1, %i1 |
| 310 | 365 | ||
| 311 | 2: add %o1, %o3, %o0 | 366 | 2: add %i1, %i3, %o0 |
| 312 | andcc %o1, 0x7, %g1 | 367 | andcc %i1, 0x7, %g1 |
| 313 | bne,pt %icc, 8f | 368 | bne,pt %icc, 8f |
| 314 | sll %g1, 3, %g1 | 369 | sll %g1, 3, %g1 |
| 315 | 370 | ||
| 316 | cmp %o2, 16 | 371 | cmp %i2, 16 |
| 317 | bgeu,pt %icc, 72b | 372 | bgeu,pt %icc, 72b |
| 318 | nop | 373 | nop |
| 319 | ba,a,pt %xcc, 73b | 374 | ba,a,pt %xcc, 73b |
| 320 | 375 | ||
| 321 | 8: mov 64, %o3 | 376 | 8: mov 64, %i3 |
| 322 | andn %o1, 0x7, %o1 | 377 | andn %i1, 0x7, %i1 |
| 323 | EX_LD(LOAD(ldx, %o1, %g2)) | 378 | EX_LD(LOAD(ldx, %i1, %g2)) |
| 324 | sub %o3, %g1, %o3 | 379 | sub %i3, %g1, %i3 |
| 325 | andn %o2, 0x7, %o4 | 380 | andn %i2, 0x7, %i4 |
| 326 | sllx %g2, %g1, %g2 | 381 | sllx %g2, %g1, %g2 |
| 327 | 1: add %o1, 0x8, %o1 | 382 | 1: add %i1, 0x8, %i1 |
| 328 | EX_LD(LOAD(ldx, %o1, %g3)) | 383 | EX_LD(LOAD(ldx, %i1, %g3)) |
| 329 | subcc %o4, 0x8, %o4 | 384 | subcc %i4, 0x8, %i4 |
| 330 | srlx %g3, %o3, %o5 | 385 | srlx %g3, %i3, %i5 |
| 331 | or %o5, %g2, %o5 | 386 | or %i5, %g2, %i5 |
| 332 | EX_ST(STORE(stx, %o5, %o0)) | 387 | EX_ST(STORE(stx, %i5, %o0)) |
| 333 | add %o0, 0x8, %o0 | 388 | add %o0, 0x8, %o0 |
| 334 | bgu,pt %icc, 1b | 389 | bgu,pt %icc, 1b |
| 335 | sllx %g3, %g1, %g2 | 390 | sllx %g3, %g1, %g2 |
| 336 | 391 | ||
| 337 | srl %g1, 3, %g1 | 392 | srl %g1, 3, %g1 |
| 338 | andcc %o2, 0x7, %o2 | 393 | andcc %i2, 0x7, %i2 |
| 339 | be,pn %icc, 85f | 394 | be,pn %icc, 85f |
| 340 | add %o1, %g1, %o1 | 395 | add %i1, %g1, %i1 |
| 341 | ba,pt %xcc, 90f | 396 | ba,pt %xcc, 90f |
| 342 | sub %o0, %o1, %o3 | 397 | sub %o0, %i1, %i3 |
| 343 | 398 | ||
| 344 | .align 64 | 399 | .align 64 |
| 345 | 80: /* 0 < len <= 16 */ | 400 | 80: /* 0 < len <= 16 */ |
| 346 | andcc %o3, 0x3, %g0 | 401 | andcc %i3, 0x3, %g0 |
| 347 | bne,pn %XCC, 90f | 402 | bne,pn %XCC, 90f |
| 348 | sub %o0, %o1, %o3 | 403 | sub %o0, %i1, %i3 |
| 349 | 404 | ||
| 350 | 1: | 405 | 1: |
| 351 | subcc %o2, 4, %o2 | 406 | subcc %i2, 4, %i2 |
| 352 | EX_LD(LOAD(lduw, %o1, %g1)) | 407 | EX_LD(LOAD(lduw, %i1, %g1)) |
| 353 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | 408 | EX_ST(STORE(stw, %g1, %i1 + %i3)) |
| 354 | bgu,pt %XCC, 1b | 409 | bgu,pt %XCC, 1b |
| 355 | add %o1, 4, %o1 | 410 | add %i1, 4, %i1 |
| 356 | 411 | ||
| 357 | 85: retl | 412 | 85: ret |
| 358 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 413 | restore EX_RETVAL(%i0), %g0, %o0 |
| 359 | 414 | ||
| 360 | .align 32 | 415 | .align 32 |
| 361 | 90: | 416 | 90: |
| 362 | subcc %o2, 1, %o2 | 417 | subcc %i2, 1, %i2 |
| 363 | EX_LD(LOAD(ldub, %o1, %g1)) | 418 | EX_LD(LOAD(ldub, %i1, %g1)) |
| 364 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | 419 | EX_ST(STORE(stb, %g1, %i1 + %i3)) |
| 365 | bgu,pt %XCC, 90b | 420 | bgu,pt %XCC, 90b |
| 366 | add %o1, 1, %o1 | 421 | add %i1, 1, %i1 |
| 367 | retl | 422 | ret |
| 368 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 423 | restore EX_RETVAL(%i0), %g0, %o0 |
| 369 | 424 | ||
| 370 | .size FUNC_NAME, .-FUNC_NAME | 425 | .size FUNC_NAME, .-FUNC_NAME |
