aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-31 11:03:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-31 11:03:56 -0400
commit04ed7d9c78c7497146eb760377d031ffd1c91fb5 (patch)
tree683ccf47c7a2552a4baa47f5771f71da1e6871dd
parent2a26d99b251b8625d27aed14e97fc10707a3a81f (diff)
parenta74ad5e660a9ee1d071665e7e8ad822784a2dc7f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
Pull sparc fixes from David Miller: "Several sparc64 bug fixes here: 1) Make the user copy routines on sparc64 return a properly accurate residual length when an exception occurs. 2) We can get enormous kernel TLB range flush requests from vmalloc unmaps, so handle these more gracefully by doing full flushes instead of going page-by-page. 3) Cope properly with negative branch offsets in sparc jump-label support, from James Clarke. 4) Some old-style decl GCC warning fixups from Tobias Klauser" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc: sparc64: Handle extremely large kernel TLB range flushes more gracefully. sparc64: Fix illegal relative branches in hypervisor patched TLB cross-call code. sparc64: Fix instruction count in comment for __hypervisor_flush_tlb_pending. sparc64: Handle extremely large kernel TSB range flushes sanely. sparc: Handle negative offsets in arch_jump_label_transform sparc64: Fix illegal relative branches in hypervisor patched TLB code. sparc64: Delete now unused user copy fixup functions. sparc64: Delete now unused user copy assembler helpers. sparc64: Convert U3copy_{from,to}_user to accurate exception reporting. sparc64: Convert NG2copy_{from,to}_user to accurate exception reporting. sparc64: Convert NGcopy_{from,to}_user to accurate exception reporting. sparc64: Convert NG4copy_{from,to}_user to accurate exception reporting. sparc64: Convert U1copy_{from,to}_user to accurate exception reporting. sparc64: Convert GENcopy_{from,to}_user to accurate exception reporting. sparc64: Convert copy_in_user to accurate exception reporting. sparc64: Prepare to move to more saner user copy exception handling. sparc64: Delete __ret_efault. sparc32: Fix old style declaration GCC warnings sparc64: Fix old style declaration GCC warnings sparc64: Setup a scheduling domain for highest level cache.
-rw-r--r--arch/sparc/include/asm/cpudata_64.h5
-rw-r--r--arch/sparc/include/asm/spinlock_32.h2
-rw-r--r--arch/sparc/include/asm/spinlock_64.h12
-rw-r--r--arch/sparc/include/asm/topology_64.h8
-rw-r--r--arch/sparc/include/asm/uaccess_64.h28
-rw-r--r--arch/sparc/kernel/head_64.S37
-rw-r--r--arch/sparc/kernel/jump_label.c23
-rw-r--r--arch/sparc/kernel/mdesc.c46
-rw-r--r--arch/sparc/kernel/smp_64.c8
-rw-r--r--arch/sparc/lib/GENcopy_from_user.S4
-rw-r--r--arch/sparc/lib/GENcopy_to_user.S4
-rw-r--r--arch/sparc/lib/GENmemcpy.S48
-rw-r--r--arch/sparc/lib/Makefile2
-rw-r--r--arch/sparc/lib/NG2copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG2copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG2memcpy.S228
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG4memcpy.S294
-rw-r--r--arch/sparc/lib/NGcopy_from_user.S4
-rw-r--r--arch/sparc/lib/NGcopy_to_user.S4
-rw-r--r--arch/sparc/lib/NGmemcpy.S233
-rw-r--r--arch/sparc/lib/U1copy_from_user.S8
-rw-r--r--arch/sparc/lib/U1copy_to_user.S8
-rw-r--r--arch/sparc/lib/U1memcpy.S345
-rw-r--r--arch/sparc/lib/U3copy_from_user.S8
-rw-r--r--arch/sparc/lib/U3copy_to_user.S8
-rw-r--r--arch/sparc/lib/U3memcpy.S227
-rw-r--r--arch/sparc/lib/copy_in_user.S35
-rw-r--r--arch/sparc/lib/user_fixup.c71
-rw-r--r--arch/sparc/mm/tsb.c17
-rw-r--r--arch/sparc/mm/ultra.S374
32 files changed, 1400 insertions, 723 deletions
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6cfdabb6054..5b0ed48e5b0c 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,9 +24,10 @@ typedef struct {
24 unsigned int icache_line_size; 24 unsigned int icache_line_size;
25 unsigned int ecache_size; 25 unsigned int ecache_size;
26 unsigned int ecache_line_size; 26 unsigned int ecache_line_size;
27 unsigned short sock_id; 27 unsigned short sock_id; /* physical package */
28 unsigned short core_id; 28 unsigned short core_id;
29 int proc_id; 29 unsigned short max_cache_id; /* groupings of highest shared cache */
30 unsigned short proc_id; /* strand (aka HW thread) id */
30} cpuinfo_sparc; 31} cpuinfo_sparc;
31 32
32DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); 33DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index d9c5876c6121..8011e79f59c9 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
134 *(volatile __u32 *)&lp->lock = ~0U; 134 *(volatile __u32 *)&lp->lock = ~0U;
135} 135}
136 136
137static void inline arch_write_unlock(arch_rwlock_t *lock) 137static inline void arch_write_unlock(arch_rwlock_t *lock)
138{ 138{
139 __asm__ __volatile__( 139 __asm__ __volatile__(
140" st %%g0, [%0]" 140" st %%g0, [%0]"
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 87990b7c6b0d..07c9f2e9bf57 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla
96 96
97/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ 97/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
98 98
99static void inline arch_read_lock(arch_rwlock_t *lock) 99static inline void arch_read_lock(arch_rwlock_t *lock)
100{ 100{
101 unsigned long tmp1, tmp2; 101 unsigned long tmp1, tmp2;
102 102
@@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock)
119 : "memory"); 119 : "memory");
120} 120}
121 121
122static int inline arch_read_trylock(arch_rwlock_t *lock) 122static inline int arch_read_trylock(arch_rwlock_t *lock)
123{ 123{
124 int tmp1, tmp2; 124 int tmp1, tmp2;
125 125
@@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock)
140 return tmp1; 140 return tmp1;
141} 141}
142 142
143static void inline arch_read_unlock(arch_rwlock_t *lock) 143static inline void arch_read_unlock(arch_rwlock_t *lock)
144{ 144{
145 unsigned long tmp1, tmp2; 145 unsigned long tmp1, tmp2;
146 146
@@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock)
156 : "memory"); 156 : "memory");
157} 157}
158 158
159static void inline arch_write_lock(arch_rwlock_t *lock) 159static inline void arch_write_lock(arch_rwlock_t *lock)
160{ 160{
161 unsigned long mask, tmp1, tmp2; 161 unsigned long mask, tmp1, tmp2;
162 162
@@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock)
181 : "memory"); 181 : "memory");
182} 182}
183 183
184static void inline arch_write_unlock(arch_rwlock_t *lock) 184static inline void arch_write_unlock(arch_rwlock_t *lock)
185{ 185{
186 __asm__ __volatile__( 186 __asm__ __volatile__(
187" stw %%g0, [%0]" 187" stw %%g0, [%0]"
@@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock)
190 : "memory"); 190 : "memory");
191} 191}
192 192
193static int inline arch_write_trylock(arch_rwlock_t *lock) 193static inline int arch_write_trylock(arch_rwlock_t *lock)
194{ 194{
195 unsigned long mask, tmp1, tmp2, result; 195 unsigned long mask, tmp1, tmp2, result;
196 196
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index bec481aaca16..7b4898a36eee 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -44,14 +44,20 @@ int __node_distance(int, int);
44#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) 44#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
45#define topology_core_id(cpu) (cpu_data(cpu).core_id) 45#define topology_core_id(cpu) (cpu_data(cpu).core_id)
46#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) 46#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
47#define topology_core_cache_cpumask(cpu) (&cpu_core_sib_cache_map[cpu])
47#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) 48#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
48#endif /* CONFIG_SMP */ 49#endif /* CONFIG_SMP */
49 50
50extern cpumask_t cpu_core_map[NR_CPUS]; 51extern cpumask_t cpu_core_map[NR_CPUS];
51extern cpumask_t cpu_core_sib_map[NR_CPUS]; 52extern cpumask_t cpu_core_sib_map[NR_CPUS];
53extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
54
55/**
56 * Return cores that shares the last level cache.
57 */
52static inline const struct cpumask *cpu_coregroup_mask(int cpu) 58static inline const struct cpumask *cpu_coregroup_mask(int cpu)
53{ 59{
54 return &cpu_core_map[cpu]; 60 return &cpu_core_sib_cache_map[cpu];
55} 61}
56 62
57#endif /* _ASM_SPARC64_TOPOLOGY_H */ 63#endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index b68acc563235..5373136c412b 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si
82 return 1; 82 return 1;
83} 83}
84 84
85void __ret_efault(void);
86void __retl_efault(void); 85void __retl_efault(void);
87 86
88/* Uh, these should become the main single-value transfer routines.. 87/* Uh, these should become the main single-value transfer routines..
@@ -189,55 +188,34 @@ int __get_user_bad(void);
189unsigned long __must_check ___copy_from_user(void *to, 188unsigned long __must_check ___copy_from_user(void *to,
190 const void __user *from, 189 const void __user *from,
191 unsigned long size); 190 unsigned long size);
192unsigned long copy_from_user_fixup(void *to, const void __user *from,
193 unsigned long size);
194static inline unsigned long __must_check 191static inline unsigned long __must_check
195copy_from_user(void *to, const void __user *from, unsigned long size) 192copy_from_user(void *to, const void __user *from, unsigned long size)
196{ 193{
197 unsigned long ret;
198
199 check_object_size(to, size, false); 194 check_object_size(to, size, false);
200 195
201 ret = ___copy_from_user(to, from, size); 196 return ___copy_from_user(to, from, size);
202 if (unlikely(ret))
203 ret = copy_from_user_fixup(to, from, size);
204
205 return ret;
206} 197}
207#define __copy_from_user copy_from_user 198#define __copy_from_user copy_from_user
208 199
209unsigned long __must_check ___copy_to_user(void __user *to, 200unsigned long __must_check ___copy_to_user(void __user *to,
210 const void *from, 201 const void *from,
211 unsigned long size); 202 unsigned long size);
212unsigned long copy_to_user_fixup(void __user *to, const void *from,
213 unsigned long size);
214static inline unsigned long __must_check 203static inline unsigned long __must_check
215copy_to_user(void __user *to, const void *from, unsigned long size) 204copy_to_user(void __user *to, const void *from, unsigned long size)
216{ 205{
217 unsigned long ret;
218
219 check_object_size(from, size, true); 206 check_object_size(from, size, true);
220 207
221 ret = ___copy_to_user(to, from, size); 208 return ___copy_to_user(to, from, size);
222 if (unlikely(ret))
223 ret = copy_to_user_fixup(to, from, size);
224 return ret;
225} 209}
226#define __copy_to_user copy_to_user 210#define __copy_to_user copy_to_user
227 211
228unsigned long __must_check ___copy_in_user(void __user *to, 212unsigned long __must_check ___copy_in_user(void __user *to,
229 const void __user *from, 213 const void __user *from,
230 unsigned long size); 214 unsigned long size);
231unsigned long copy_in_user_fixup(void __user *to, void __user *from,
232 unsigned long size);
233static inline unsigned long __must_check 215static inline unsigned long __must_check
234copy_in_user(void __user *to, void __user *from, unsigned long size) 216copy_in_user(void __user *to, void __user *from, unsigned long size)
235{ 217{
236 unsigned long ret = ___copy_in_user(to, from, size); 218 return ___copy_in_user(to, from, size);
237
238 if (unlikely(ret))
239 ret = copy_in_user_fixup(to, from, size);
240 return ret;
241} 219}
242#define __copy_in_user copy_in_user 220#define __copy_in_user copy_in_user
243 221
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index beba6c11554c..6aa3da152c20 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -926,48 +926,11 @@ tlb_type: .word 0 /* Must NOT end up in BSS */
926EXPORT_SYMBOL(tlb_type) 926EXPORT_SYMBOL(tlb_type)
927 .section ".fixup",#alloc,#execinstr 927 .section ".fixup",#alloc,#execinstr
928 928
929 .globl __ret_efault, __retl_efault, __ret_one, __retl_one
930ENTRY(__ret_efault)
931 ret
932 restore %g0, -EFAULT, %o0
933ENDPROC(__ret_efault)
934EXPORT_SYMBOL(__ret_efault)
935
936ENTRY(__retl_efault) 929ENTRY(__retl_efault)
937 retl 930 retl
938 mov -EFAULT, %o0 931 mov -EFAULT, %o0
939ENDPROC(__retl_efault) 932ENDPROC(__retl_efault)
940 933
941ENTRY(__retl_one)
942 retl
943 mov 1, %o0
944ENDPROC(__retl_one)
945
946ENTRY(__retl_one_fp)
947 VISExitHalf
948 retl
949 mov 1, %o0
950ENDPROC(__retl_one_fp)
951
952ENTRY(__ret_one_asi)
953 wr %g0, ASI_AIUS, %asi
954 ret
955 restore %g0, 1, %o0
956ENDPROC(__ret_one_asi)
957
958ENTRY(__retl_one_asi)
959 wr %g0, ASI_AIUS, %asi
960 retl
961 mov 1, %o0
962ENDPROC(__retl_one_asi)
963
964ENTRY(__retl_one_asi_fp)
965 wr %g0, ASI_AIUS, %asi
966 VISExitHalf
967 retl
968 mov 1, %o0
969ENDPROC(__retl_one_asi_fp)
970
971ENTRY(__retl_o1) 934ENTRY(__retl_o1)
972 retl 935 retl
973 mov %o1, %o0 936 mov %o1, %o0
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 59bbeff55024..07933b9e9ce0 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -13,19 +13,30 @@
13void arch_jump_label_transform(struct jump_entry *entry, 13void arch_jump_label_transform(struct jump_entry *entry,
14 enum jump_label_type type) 14 enum jump_label_type type)
15{ 15{
16 u32 val;
17 u32 *insn = (u32 *) (unsigned long) entry->code; 16 u32 *insn = (u32 *) (unsigned long) entry->code;
17 u32 val;
18 18
19 if (type == JUMP_LABEL_JMP) { 19 if (type == JUMP_LABEL_JMP) {
20 s32 off = (s32)entry->target - (s32)entry->code; 20 s32 off = (s32)entry->target - (s32)entry->code;
21 bool use_v9_branch = false;
22
23 BUG_ON(off & 3);
21 24
22#ifdef CONFIG_SPARC64 25#ifdef CONFIG_SPARC64
23 /* ba,pt %xcc, . + (off << 2) */ 26 if (off <= 0xfffff && off >= -0x100000)
24 val = 0x10680000 | ((u32) off >> 2); 27 use_v9_branch = true;
25#else
26 /* ba . + (off << 2) */
27 val = 0x10800000 | ((u32) off >> 2);
28#endif 28#endif
29 if (use_v9_branch) {
30 /* WDISP19 - target is . + immed << 2 */
31 /* ba,pt %xcc, . + off */
32 val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
33 } else {
34 /* WDISP22 - target is . + immed << 2 */
35 BUG_ON(off > 0x7fffff);
36 BUG_ON(off < -0x800000);
37 /* ba . + off */
38 val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
39 }
29 } else { 40 } else {
30 val = 0x01000000; 41 val = 0x01000000;
31 } 42 }
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 11228861d9b4..8a6982dfd733 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node,
645 cpu_data(*id).core_id = core_id; 645 cpu_data(*id).core_id = core_id;
646} 646}
647 647
648static void __mark_sock_id(struct mdesc_handle *hp, u64 node, 648static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
649 int sock_id) 649 int max_cache_id)
650{ 650{
651 const u64 *id = mdesc_get_property(hp, node, "id", NULL); 651 const u64 *id = mdesc_get_property(hp, node, "id", NULL);
652 652
653 if (*id < num_possible_cpus()) 653 if (*id < num_possible_cpus()) {
654 cpu_data(*id).sock_id = sock_id; 654 cpu_data(*id).max_cache_id = max_cache_id;
655
656 /**
657 * On systems without explicit socket descriptions socket
658 * is max_cache_id
659 */
660 cpu_data(*id).sock_id = max_cache_id;
661 }
655} 662}
656 663
657static void mark_core_ids(struct mdesc_handle *hp, u64 mp, 664static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
@@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
660 find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); 667 find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
661} 668}
662 669
663static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, 670static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
664 int sock_id) 671 int max_cache_id)
665{ 672{
666 find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); 673 find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
674 max_cache_id, 10);
667} 675}
668 676
669static void set_core_ids(struct mdesc_handle *hp) 677static void set_core_ids(struct mdesc_handle *hp)
@@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp)
694 } 702 }
695} 703}
696 704
697static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) 705static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
698{ 706{
699 u64 mp; 707 u64 mp;
700 int idx = 1; 708 int idx = 1;
701 int fnd = 0; 709 int fnd = 0;
702 710
703 /* Identify unique sockets by looking for cpus backpointed to by 711 /**
704 * shared level n caches. 712 * Identify unique highest level of shared cache by looking for cpus
713 * backpointed to by shared level N caches.
705 */ 714 */
706 mdesc_for_each_node_by_name(hp, mp, "cache") { 715 mdesc_for_each_node_by_name(hp, mp, "cache") {
707 const u64 *cur_lvl; 716 const u64 *cur_lvl;
@@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
709 cur_lvl = mdesc_get_property(hp, mp, "level", NULL); 718 cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
710 if (*cur_lvl != level) 719 if (*cur_lvl != level)
711 continue; 720 continue;
712 721 mark_max_cache_ids(hp, mp, idx);
713 mark_sock_ids(hp, mp, idx);
714 idx++; 722 idx++;
715 fnd = 1; 723 fnd = 1;
716 } 724 }
@@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp)
745{ 753{
746 u64 mp; 754 u64 mp;
747 755
748 /* If machine description exposes sockets data use it. 756 /**
749 * Otherwise fallback to use shared L3 or L2 caches. 757 * Find the highest level of shared cache which pre-T7 is also
758 * the socket.
750 */ 759 */
760 if (!set_max_cache_ids_by_cache(hp, 3))
761 set_max_cache_ids_by_cache(hp, 2);
762
763 /* If machine description exposes sockets data use it.*/
751 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); 764 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
752 if (mp != MDESC_NODE_NULL) 765 if (mp != MDESC_NODE_NULL)
753 return set_sock_ids_by_socket(hp, mp); 766 set_sock_ids_by_socket(hp, mp);
754
755 if (!set_sock_ids_by_cache(hp, 3))
756 set_sock_ids_by_cache(hp, 2);
757} 767}
758 768
759static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) 769static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba6cd31..8182f7caf5b1 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
63cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { 63cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
64 [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 64 [0 ... NR_CPUS-1] = CPU_MASK_NONE };
65 65
66cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
67 [0 ... NR_CPUS - 1] = CPU_MASK_NONE };
68
66EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 69EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
67EXPORT_SYMBOL(cpu_core_map); 70EXPORT_SYMBOL(cpu_core_map);
68EXPORT_SYMBOL(cpu_core_sib_map); 71EXPORT_SYMBOL(cpu_core_sib_map);
72EXPORT_SYMBOL(cpu_core_sib_cache_map);
69 73
70static cpumask_t smp_commenced_mask; 74static cpumask_t smp_commenced_mask;
71 75
@@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void)
1265 unsigned int j; 1269 unsigned int j;
1266 1270
1267 for_each_present_cpu(j) { 1271 for_each_present_cpu(j) {
1272 if (cpu_data(i).max_cache_id ==
1273 cpu_data(j).max_cache_id)
1274 cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
1275
1268 if (cpu_data(i).sock_id == cpu_data(j).sock_id) 1276 if (cpu_data(i).sock_id == cpu_data(j).sock_id)
1269 cpumask_set_cpu(j, &cpu_core_sib_map[i]); 1277 cpumask_set_cpu(j, &cpu_core_sib_map[i]);
1270 } 1278 }
diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
index b7d0bd6b1406..69a439fa2fc1 100644
--- a/arch/sparc/lib/GENcopy_from_user.S
+++ b/arch/sparc/lib/GENcopy_from_user.S
@@ -3,11 +3,11 @@
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_LD(x) \ 6#define EX_LD(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one; \ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
index 780550e1afc7..9947427ce354 100644
--- a/arch/sparc/lib/GENcopy_to_user.S
+++ b/arch/sparc/lib/GENcopy_to_user.S
@@ -3,11 +3,11 @@
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_ST(x) \ 6#define EX_ST(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one; \ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
index 89358ee94851..059ea24ad73d 100644
--- a/arch/sparc/lib/GENmemcpy.S
+++ b/arch/sparc/lib/GENmemcpy.S
@@ -4,21 +4,18 @@
4 */ 4 */
5 5
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7#include <linux/linkage.h>
7#define GLOBAL_SPARE %g7 8#define GLOBAL_SPARE %g7
8#else 9#else
9#define GLOBAL_SPARE %g5 10#define GLOBAL_SPARE %g5
10#endif 11#endif
11 12
12#ifndef EX_LD 13#ifndef EX_LD
13#define EX_LD(x) x 14#define EX_LD(x,y) x
14#endif 15#endif
15 16
16#ifndef EX_ST 17#ifndef EX_ST
17#define EX_ST(x) x 18#define EX_ST(x,y) x
18#endif
19
20#ifndef EX_RETVAL
21#define EX_RETVAL(x) x
22#endif 19#endif
23 20
24#ifndef LOAD 21#ifndef LOAD
@@ -45,6 +42,29 @@
45 .register %g3,#scratch 42 .register %g3,#scratch
46 43
47 .text 44 .text
45
46#ifndef EX_RETVAL
47#define EX_RETVAL(x) x
48ENTRY(GEN_retl_o4_1)
49 add %o4, %o2, %o4
50 retl
51 add %o4, 1, %o0
52ENDPROC(GEN_retl_o4_1)
53ENTRY(GEN_retl_g1_8)
54 add %g1, %o2, %g1
55 retl
56 add %g1, 8, %o0
57ENDPROC(GEN_retl_g1_8)
58ENTRY(GEN_retl_o2_4)
59 retl
60 add %o2, 4, %o0
61ENDPROC(GEN_retl_o2_4)
62ENTRY(GEN_retl_o2_1)
63 retl
64 add %o2, 1, %o0
65ENDPROC(GEN_retl_o2_1)
66#endif
67
48 .align 64 68 .align 64
49 69
50 .globl FUNC_NAME 70 .globl FUNC_NAME
@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
73 sub %g0, %o4, %o4 93 sub %g0, %o4, %o4
74 sub %o2, %o4, %o2 94 sub %o2, %o4, %o2
751: subcc %o4, 1, %o4 951: subcc %o4, 1, %o4
76 EX_LD(LOAD(ldub, %o1, %g1)) 96 EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
77 EX_ST(STORE(stb, %g1, %o0)) 97 EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
78 add %o1, 1, %o1 98 add %o1, 1, %o1
79 bne,pt %XCC, 1b 99 bne,pt %XCC, 1b
80 add %o0, 1, %o0 100 add %o0, 1, %o0
@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
82 andn %o2, 0x7, %g1 102 andn %o2, 0x7, %g1
83 sub %o2, %g1, %o2 103 sub %o2, %g1, %o2
841: subcc %g1, 0x8, %g1 1041: subcc %g1, 0x8, %g1
85 EX_LD(LOAD(ldx, %o1, %g2)) 105 EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
86 EX_ST(STORE(stx, %g2, %o0)) 106 EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
87 add %o1, 0x8, %o1 107 add %o1, 0x8, %o1
88 bne,pt %XCC, 1b 108 bne,pt %XCC, 1b
89 add %o0, 0x8, %o0 109 add %o0, 0x8, %o0
@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
100 120
1011: 1211:
102 subcc %o2, 4, %o2 122 subcc %o2, 4, %o2
103 EX_LD(LOAD(lduw, %o1, %g1)) 123 EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
104 EX_ST(STORE(stw, %g1, %o1 + %o3)) 124 EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
105 bgu,pt %XCC, 1b 125 bgu,pt %XCC, 1b
106 add %o1, 4, %o1 126 add %o1, 4, %o1
107 127
@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
111 .align 32 131 .align 32
11290: 13290:
113 subcc %o2, 1, %o2 133 subcc %o2, 1, %o2
114 EX_LD(LOAD(ldub, %o1, %g1)) 134 EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
115 EX_ST(STORE(stb, %g1, %o1 + %o3)) 135 EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
116 bgu,pt %XCC, 90b 136 bgu,pt %XCC, 90b
117 add %o1, 1, %o1 137 add %o1, 1, %o1
118 retl 138 retl
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 885f00e81d1a..69912d2f8b54 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
38lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o 38lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
39lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o 39lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
40 40
41lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o 41lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
42lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o 42lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
43 43
44obj-$(CONFIG_SPARC64) += iomap.o 44obj-$(CONFIG_SPARC64) += iomap.o
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
index d5242b8c4f94..b79a6998d87c 100644
--- a/arch/sparc/lib/NG2copy_from_user.S
+++ b/arch/sparc/lib/NG2copy_from_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_LD(x) \ 6#define EX_LD(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one_asi;\ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \ 14#define EX_LD_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\ 18 .word 98b, y##_fp; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
index 4e962d993b10..dcec55f254ab 100644
--- a/arch/sparc/lib/NG2copy_to_user.S
+++ b/arch/sparc/lib/NG2copy_to_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_ST(x) \ 6#define EX_ST(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one_asi;\ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \ 14#define EX_ST_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\ 18 .word 98b, y##_fp; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index d5f585df2f3f..c629dbd121b6 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7#include <linux/linkage.h>
7#include <asm/visasm.h> 8#include <asm/visasm.h>
8#include <asm/asi.h> 9#include <asm/asi.h>
9#define GLOBAL_SPARE %g7 10#define GLOBAL_SPARE %g7
@@ -32,21 +33,17 @@
32#endif 33#endif
33 34
34#ifndef EX_LD 35#ifndef EX_LD
35#define EX_LD(x) x 36#define EX_LD(x,y) x
36#endif 37#endif
37#ifndef EX_LD_FP 38#ifndef EX_LD_FP
38#define EX_LD_FP(x) x 39#define EX_LD_FP(x,y) x
39#endif 40#endif
40 41
41#ifndef EX_ST 42#ifndef EX_ST
42#define EX_ST(x) x 43#define EX_ST(x,y) x
43#endif 44#endif
44#ifndef EX_ST_FP 45#ifndef EX_ST_FP
45#define EX_ST_FP(x) x 46#define EX_ST_FP(x,y) x
46#endif
47
48#ifndef EX_RETVAL
49#define EX_RETVAL(x) x
50#endif 47#endif
51 48
52#ifndef LOAD 49#ifndef LOAD
@@ -140,45 +137,110 @@
140 fsrc2 %x6, %f12; \ 137 fsrc2 %x6, %f12; \
141 fsrc2 %x7, %f14; 138 fsrc2 %x7, %f14;
142#define FREG_LOAD_1(base, x0) \ 139#define FREG_LOAD_1(base, x0) \
143 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) 140 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
144#define FREG_LOAD_2(base, x0, x1) \ 141#define FREG_LOAD_2(base, x0, x1) \
145 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ 142 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
146 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); 143 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
147#define FREG_LOAD_3(base, x0, x1, x2) \ 144#define FREG_LOAD_3(base, x0, x1, x2) \
148 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ 145 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
149 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ 146 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
150 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); 147 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
151#define FREG_LOAD_4(base, x0, x1, x2, x3) \ 148#define FREG_LOAD_4(base, x0, x1, x2, x3) \
152 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ 149 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
153 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ 150 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
154 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ 151 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
155 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); 152 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
156#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ 153#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
157 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ 154 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
158 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ 155 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
159 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ 156 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
160 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ 157 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
161 EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); 158 EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
162#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ 159#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
163 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ 160 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
164 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ 161 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
165 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ 162 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
166 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ 163 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
167 EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ 164 EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
168 EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); 165 EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
169#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ 166#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
170 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ 167 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
171 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ 168 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
172 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ 169 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
173 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ 170 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
174 EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ 171 EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
175 EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ 172 EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
176 EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); 173 EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
177 174
178 .register %g2,#scratch 175 .register %g2,#scratch
179 .register %g3,#scratch 176 .register %g3,#scratch
180 177
181 .text 178 .text
179#ifndef EX_RETVAL
180#define EX_RETVAL(x) x
181__restore_fp:
182 VISExitHalf
183__restore_asi:
184 retl
185 wr %g0, ASI_AIUS, %asi
186ENTRY(NG2_retl_o2)
187 ba,pt %xcc, __restore_asi
188 mov %o2, %o0
189ENDPROC(NG2_retl_o2)
190ENTRY(NG2_retl_o2_plus_1)
191 ba,pt %xcc, __restore_asi
192 add %o2, 1, %o0
193ENDPROC(NG2_retl_o2_plus_1)
194ENTRY(NG2_retl_o2_plus_4)
195 ba,pt %xcc, __restore_asi
196 add %o2, 4, %o0
197ENDPROC(NG2_retl_o2_plus_4)
198ENTRY(NG2_retl_o2_plus_8)
199 ba,pt %xcc, __restore_asi
200 add %o2, 8, %o0
201ENDPROC(NG2_retl_o2_plus_8)
202ENTRY(NG2_retl_o2_plus_o4_plus_1)
203 add %o4, 1, %o4
204 ba,pt %xcc, __restore_asi
205 add %o2, %o4, %o0
206ENDPROC(NG2_retl_o2_plus_o4_plus_1)
207ENTRY(NG2_retl_o2_plus_o4_plus_8)
208 add %o4, 8, %o4
209 ba,pt %xcc, __restore_asi
210 add %o2, %o4, %o0
211ENDPROC(NG2_retl_o2_plus_o4_plus_8)
212ENTRY(NG2_retl_o2_plus_o4_plus_16)
213 add %o4, 16, %o4
214 ba,pt %xcc, __restore_asi
215 add %o2, %o4, %o0
216ENDPROC(NG2_retl_o2_plus_o4_plus_16)
217ENTRY(NG2_retl_o2_plus_g1_fp)
218 ba,pt %xcc, __restore_fp
219 add %o2, %g1, %o0
220ENDPROC(NG2_retl_o2_plus_g1_fp)
221ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
222 add %g1, 64, %g1
223 ba,pt %xcc, __restore_fp
224 add %o2, %g1, %o0
225ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
226ENTRY(NG2_retl_o2_plus_g1_plus_1)
227 add %g1, 1, %g1
228 ba,pt %xcc, __restore_asi
229 add %o2, %g1, %o0
230ENDPROC(NG2_retl_o2_plus_g1_plus_1)
231ENTRY(NG2_retl_o2_and_7_plus_o4)
232 and %o2, 7, %o2
233 ba,pt %xcc, __restore_asi
234 add %o2, %o4, %o0
235ENDPROC(NG2_retl_o2_and_7_plus_o4)
236ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
237 and %o2, 7, %o2
238 add %o4, 8, %o4
239 ba,pt %xcc, __restore_asi
240 add %o2, %o4, %o0
241ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
242#endif
243
182 .align 64 244 .align 64
183 245
184 .globl FUNC_NAME 246 .globl FUNC_NAME
@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
230 sub %g0, %o4, %o4 ! bytes to align dst 292 sub %g0, %o4, %o4 ! bytes to align dst
231 sub %o2, %o4, %o2 293 sub %o2, %o4, %o2
2321: subcc %o4, 1, %o4 2941: subcc %o4, 1, %o4
233 EX_LD(LOAD(ldub, %o1, %g1)) 295 EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
234 EX_ST(STORE(stb, %g1, %o0)) 296 EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
235 add %o1, 1, %o1 297 add %o1, 1, %o1
236 bne,pt %XCC, 1b 298 bne,pt %XCC, 1b
237 add %o0, 1, %o0 299 add %o0, 1, %o0
@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
281 nop 343 nop
282 /* fall through for 0 < low bits < 8 */ 344 /* fall through for 0 < low bits < 8 */
283110: sub %o4, 64, %g2 345110: sub %o4, 64, %g2
284 EX_LD_FP(LOAD_BLK(%g2, %f0)) 346 EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
2851: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 3471: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
286 EX_LD_FP(LOAD_BLK(%o4, %f16)) 348 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
287 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) 349 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
288 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 350 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
289 FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) 351 FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
290 subcc %g1, 64, %g1 352 subcc %g1, 64, %g1
291 add %o4, 64, %o4 353 add %o4, 64, %o4
@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
296 358
297120: sub %o4, 56, %g2 359120: sub %o4, 56, %g2
298 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) 360 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
2991: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 3611: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
300 EX_LD_FP(LOAD_BLK(%o4, %f16)) 362 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
301 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) 363 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
302 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 364 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
303 FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) 365 FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
304 subcc %g1, 64, %g1 366 subcc %g1, 64, %g1
305 add %o4, 64, %o4 367 add %o4, 64, %o4
@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
310 372
311130: sub %o4, 48, %g2 373130: sub %o4, 48, %g2
312 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) 374 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
3131: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 3751: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
314 EX_LD_FP(LOAD_BLK(%o4, %f16)) 376 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
315 FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) 377 FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
316 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 378 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
317 FREG_MOVE_6(f20, f22, f24, f26, f28, f30) 379 FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
318 subcc %g1, 64, %g1 380 subcc %g1, 64, %g1
319 add %o4, 64, %o4 381 add %o4, 64, %o4
@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
324 386
325140: sub %o4, 40, %g2 387140: sub %o4, 40, %g2
326 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) 388 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
3271: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 3891: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
328 EX_LD_FP(LOAD_BLK(%o4, %f16)) 390 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
329 FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) 391 FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
330 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 392 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
331 FREG_MOVE_5(f22, f24, f26, f28, f30) 393 FREG_MOVE_5(f22, f24, f26, f28, f30)
332 subcc %g1, 64, %g1 394 subcc %g1, 64, %g1
333 add %o4, 64, %o4 395 add %o4, 64, %o4
@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
338 400
339150: sub %o4, 32, %g2 401150: sub %o4, 32, %g2
340 FREG_LOAD_4(%g2, f0, f2, f4, f6) 402 FREG_LOAD_4(%g2, f0, f2, f4, f6)
3411: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 4031: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
342 EX_LD_FP(LOAD_BLK(%o4, %f16)) 404 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
343 FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) 405 FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
344 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 406 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
345 FREG_MOVE_4(f24, f26, f28, f30) 407 FREG_MOVE_4(f24, f26, f28, f30)
346 subcc %g1, 64, %g1 408 subcc %g1, 64, %g1
347 add %o4, 64, %o4 409 add %o4, 64, %o4
@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
352 414
353160: sub %o4, 24, %g2 415160: sub %o4, 24, %g2
354 FREG_LOAD_3(%g2, f0, f2, f4) 416 FREG_LOAD_3(%g2, f0, f2, f4)
3551: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 4171: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
356 EX_LD_FP(LOAD_BLK(%o4, %f16)) 418 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
357 FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) 419 FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
358 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 420 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
359 FREG_MOVE_3(f26, f28, f30) 421 FREG_MOVE_3(f26, f28, f30)
360 subcc %g1, 64, %g1 422 subcc %g1, 64, %g1
361 add %o4, 64, %o4 423 add %o4, 64, %o4
@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
366 428
367170: sub %o4, 16, %g2 429170: sub %o4, 16, %g2
368 FREG_LOAD_2(%g2, f0, f2) 430 FREG_LOAD_2(%g2, f0, f2)
3691: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 4311: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
370 EX_LD_FP(LOAD_BLK(%o4, %f16)) 432 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
371 FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) 433 FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
372 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 434 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
373 FREG_MOVE_2(f28, f30) 435 FREG_MOVE_2(f28, f30)
374 subcc %g1, 64, %g1 436 subcc %g1, 64, %g1
375 add %o4, 64, %o4 437 add %o4, 64, %o4
@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
380 442
381180: sub %o4, 8, %g2 443180: sub %o4, 8, %g2
382 FREG_LOAD_1(%g2, f0) 444 FREG_LOAD_1(%g2, f0)
3831: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 4451: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
384 EX_LD_FP(LOAD_BLK(%o4, %f16)) 446 EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
385 FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) 447 FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
386 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 448 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
387 FREG_MOVE_1(f30) 449 FREG_MOVE_1(f30)
388 subcc %g1, 64, %g1 450 subcc %g1, 64, %g1
389 add %o4, 64, %o4 451 add %o4, 64, %o4
@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
393 nop 455 nop
394 456
395190: 457190:
3961: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 4581: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
397 subcc %g1, 64, %g1 459 subcc %g1, 64, %g1
398 EX_LD_FP(LOAD_BLK(%o4, %f0)) 460 EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
399 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) 461 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
400 add %o4, 64, %o4 462 add %o4, 64, %o4
401 bne,pt %xcc, 1b 463 bne,pt %xcc, 1b
402 LOAD(prefetch, %o4 + 64, #one_read) 464 LOAD(prefetch, %o4 + 64, #one_read)
@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
423 andn %o2, 0xf, %o4 485 andn %o2, 0xf, %o4
424 and %o2, 0xf, %o2 486 and %o2, 0xf, %o2
4251: subcc %o4, 0x10, %o4 4871: subcc %o4, 0x10, %o4
426 EX_LD(LOAD(ldx, %o1, %o5)) 488 EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
427 add %o1, 0x08, %o1 489 add %o1, 0x08, %o1
428 EX_LD(LOAD(ldx, %o1, %g1)) 490 EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
429 sub %o1, 0x08, %o1 491 sub %o1, 0x08, %o1
430 EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) 492 EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
431 add %o1, 0x8, %o1 493 add %o1, 0x8, %o1
432 EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) 494 EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
433 bgu,pt %XCC, 1b 495 bgu,pt %XCC, 1b
434 add %o1, 0x8, %o1 496 add %o1, 0x8, %o1
43573: andcc %o2, 0x8, %g0 49773: andcc %o2, 0x8, %g0
436 be,pt %XCC, 1f 498 be,pt %XCC, 1f
437 nop 499 nop
438 sub %o2, 0x8, %o2 500 sub %o2, 0x8, %o2
439 EX_LD(LOAD(ldx, %o1, %o5)) 501 EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
440 EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) 502 EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
441 add %o1, 0x8, %o1 503 add %o1, 0x8, %o1
4421: andcc %o2, 0x4, %g0 5041: andcc %o2, 0x4, %g0
443 be,pt %XCC, 1f 505 be,pt %XCC, 1f
444 nop 506 nop
445 sub %o2, 0x4, %o2 507 sub %o2, 0x4, %o2
446 EX_LD(LOAD(lduw, %o1, %o5)) 508 EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
447 EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) 509 EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
448 add %o1, 0x4, %o1 510 add %o1, 0x4, %o1
4491: cmp %o2, 0 5111: cmp %o2, 0
450 be,pt %XCC, 85f 512 be,pt %XCC, 85f
@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
460 sub %o2, %g1, %o2 522 sub %o2, %g1, %o2
461 523
4621: subcc %g1, 1, %g1 5241: subcc %g1, 1, %g1
463 EX_LD(LOAD(ldub, %o1, %o5)) 525 EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
464 EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) 526 EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
465 bgu,pt %icc, 1b 527 bgu,pt %icc, 1b
466 add %o1, 1, %o1 528 add %o1, 1, %o1
467 529
@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
477 539
4788: mov 64, GLOBAL_SPARE 5408: mov 64, GLOBAL_SPARE
479 andn %o1, 0x7, %o1 541 andn %o1, 0x7, %o1
480 EX_LD(LOAD(ldx, %o1, %g2)) 542 EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
481 sub GLOBAL_SPARE, %g1, GLOBAL_SPARE 543 sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
482 andn %o2, 0x7, %o4 544 andn %o2, 0x7, %o4
483 sllx %g2, %g1, %g2 545 sllx %g2, %g1, %g2
4841: add %o1, 0x8, %o1 5461: add %o1, 0x8, %o1
485 EX_LD(LOAD(ldx, %o1, %g3)) 547 EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
486 subcc %o4, 0x8, %o4 548 subcc %o4, 0x8, %o4
487 srlx %g3, GLOBAL_SPARE, %o5 549 srlx %g3, GLOBAL_SPARE, %o5
488 or %o5, %g2, %o5 550 or %o5, %g2, %o5
489 EX_ST(STORE(stx, %o5, %o0)) 551 EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
490 add %o0, 0x8, %o0 552 add %o0, 0x8, %o0
491 bgu,pt %icc, 1b 553 bgu,pt %icc, 1b
492 sllx %g3, %g1, %g2 554 sllx %g3, %g1, %g2
@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
506 568
5071: 5691:
508 subcc %o2, 4, %o2 570 subcc %o2, 4, %o2
509 EX_LD(LOAD(lduw, %o1, %g1)) 571 EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
510 EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) 572 EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
511 bgu,pt %XCC, 1b 573 bgu,pt %XCC, 1b
512 add %o1, 4, %o1 574 add %o1, 4, %o1
513 575
@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
517 .align 32 579 .align 32
51890: 58090:
519 subcc %o2, 1, %o2 581 subcc %o2, 1, %o2
520 EX_LD(LOAD(ldub, %o1, %g1)) 582 EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
521 EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) 583 EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
522 bgu,pt %XCC, 90b 584 bgu,pt %XCC, 90b
523 add %o1, 1, %o1 585 add %o1, 1, %o1
524 retl 586 retl
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
index 2e8ee7ad07a9..16a286c1a528 100644
--- a/arch/sparc/lib/NG4copy_from_user.S
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_LD(x) \ 6#define EX_LD(x, y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one_asi;\ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \ 14#define EX_LD_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\ 18 .word 98b, y##_fp; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
index be0bf4590df8..6b0276ffc858 100644
--- a/arch/sparc/lib/NG4copy_to_user.S
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_ST(x) \ 6#define EX_ST(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one_asi;\ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \ 14#define EX_ST_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\ 18 .word 98b, y##_fp; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 8e13ee1f4454..75bb93b1437f 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7#include <linux/linkage.h>
7#include <asm/visasm.h> 8#include <asm/visasm.h>
8#include <asm/asi.h> 9#include <asm/asi.h>
9#define GLOBAL_SPARE %g7 10#define GLOBAL_SPARE %g7
@@ -46,22 +47,19 @@
46#endif 47#endif
47 48
48#ifndef EX_LD 49#ifndef EX_LD
49#define EX_LD(x) x 50#define EX_LD(x,y) x
50#endif 51#endif
51#ifndef EX_LD_FP 52#ifndef EX_LD_FP
52#define EX_LD_FP(x) x 53#define EX_LD_FP(x,y) x
53#endif 54#endif
54 55
55#ifndef EX_ST 56#ifndef EX_ST
56#define EX_ST(x) x 57#define EX_ST(x,y) x
57#endif 58#endif
58#ifndef EX_ST_FP 59#ifndef EX_ST_FP
59#define EX_ST_FP(x) x 60#define EX_ST_FP(x,y) x
60#endif 61#endif
61 62
62#ifndef EX_RETVAL
63#define EX_RETVAL(x) x
64#endif
65 63
66#ifndef LOAD 64#ifndef LOAD
67#define LOAD(type,addr,dest) type [addr], dest 65#define LOAD(type,addr,dest) type [addr], dest
@@ -94,6 +92,158 @@
94 .register %g3,#scratch 92 .register %g3,#scratch
95 93
96 .text 94 .text
95#ifndef EX_RETVAL
96#define EX_RETVAL(x) x
97__restore_asi_fp:
98 VISExitHalf
99__restore_asi:
100 retl
101 wr %g0, ASI_AIUS, %asi
102
103ENTRY(NG4_retl_o2)
104 ba,pt %xcc, __restore_asi
105 mov %o2, %o0
106ENDPROC(NG4_retl_o2)
107ENTRY(NG4_retl_o2_plus_1)
108 ba,pt %xcc, __restore_asi
109 add %o2, 1, %o0
110ENDPROC(NG4_retl_o2_plus_1)
111ENTRY(NG4_retl_o2_plus_4)
112 ba,pt %xcc, __restore_asi
113 add %o2, 4, %o0
114ENDPROC(NG4_retl_o2_plus_4)
115ENTRY(NG4_retl_o2_plus_o5)
116 ba,pt %xcc, __restore_asi
117 add %o2, %o5, %o0
118ENDPROC(NG4_retl_o2_plus_o5)
119ENTRY(NG4_retl_o2_plus_o5_plus_4)
120 add %o5, 4, %o5
121 ba,pt %xcc, __restore_asi
122 add %o2, %o5, %o0
123ENDPROC(NG4_retl_o2_plus_o5_plus_4)
124ENTRY(NG4_retl_o2_plus_o5_plus_8)
125 add %o5, 8, %o5
126 ba,pt %xcc, __restore_asi
127 add %o2, %o5, %o0
128ENDPROC(NG4_retl_o2_plus_o5_plus_8)
129ENTRY(NG4_retl_o2_plus_o5_plus_16)
130 add %o5, 16, %o5
131 ba,pt %xcc, __restore_asi
132 add %o2, %o5, %o0
133ENDPROC(NG4_retl_o2_plus_o5_plus_16)
134ENTRY(NG4_retl_o2_plus_o5_plus_24)
135 add %o5, 24, %o5
136 ba,pt %xcc, __restore_asi
137 add %o2, %o5, %o0
138ENDPROC(NG4_retl_o2_plus_o5_plus_24)
139ENTRY(NG4_retl_o2_plus_o5_plus_32)
140 add %o5, 32, %o5
141 ba,pt %xcc, __restore_asi
142 add %o2, %o5, %o0
143ENDPROC(NG4_retl_o2_plus_o5_plus_32)
144ENTRY(NG4_retl_o2_plus_g1)
145 ba,pt %xcc, __restore_asi
146 add %o2, %g1, %o0
147ENDPROC(NG4_retl_o2_plus_g1)
148ENTRY(NG4_retl_o2_plus_g1_plus_1)
149 add %g1, 1, %g1
150 ba,pt %xcc, __restore_asi
151 add %o2, %g1, %o0
152ENDPROC(NG4_retl_o2_plus_g1_plus_1)
153ENTRY(NG4_retl_o2_plus_g1_plus_8)
154 add %g1, 8, %g1
155 ba,pt %xcc, __restore_asi
156 add %o2, %g1, %o0
157ENDPROC(NG4_retl_o2_plus_g1_plus_8)
158ENTRY(NG4_retl_o2_plus_o4)
159 ba,pt %xcc, __restore_asi
160 add %o2, %o4, %o0
161ENDPROC(NG4_retl_o2_plus_o4)
162ENTRY(NG4_retl_o2_plus_o4_plus_8)
163 add %o4, 8, %o4
164 ba,pt %xcc, __restore_asi
165 add %o2, %o4, %o0
166ENDPROC(NG4_retl_o2_plus_o4_plus_8)
167ENTRY(NG4_retl_o2_plus_o4_plus_16)
168 add %o4, 16, %o4
169 ba,pt %xcc, __restore_asi
170 add %o2, %o4, %o0
171ENDPROC(NG4_retl_o2_plus_o4_plus_16)
172ENTRY(NG4_retl_o2_plus_o4_plus_24)
173 add %o4, 24, %o4
174 ba,pt %xcc, __restore_asi
175 add %o2, %o4, %o0
176ENDPROC(NG4_retl_o2_plus_o4_plus_24)
177ENTRY(NG4_retl_o2_plus_o4_plus_32)
178 add %o4, 32, %o4
179 ba,pt %xcc, __restore_asi
180 add %o2, %o4, %o0
181ENDPROC(NG4_retl_o2_plus_o4_plus_32)
182ENTRY(NG4_retl_o2_plus_o4_plus_40)
183 add %o4, 40, %o4
184 ba,pt %xcc, __restore_asi
185 add %o2, %o4, %o0
186ENDPROC(NG4_retl_o2_plus_o4_plus_40)
187ENTRY(NG4_retl_o2_plus_o4_plus_48)
188 add %o4, 48, %o4
189 ba,pt %xcc, __restore_asi
190 add %o2, %o4, %o0
191ENDPROC(NG4_retl_o2_plus_o4_plus_48)
192ENTRY(NG4_retl_o2_plus_o4_plus_56)
193 add %o4, 56, %o4
194 ba,pt %xcc, __restore_asi
195 add %o2, %o4, %o0
196ENDPROC(NG4_retl_o2_plus_o4_plus_56)
197ENTRY(NG4_retl_o2_plus_o4_plus_64)
198 add %o4, 64, %o4
199 ba,pt %xcc, __restore_asi
200 add %o2, %o4, %o0
201ENDPROC(NG4_retl_o2_plus_o4_plus_64)
202ENTRY(NG4_retl_o2_plus_o4_fp)
203 ba,pt %xcc, __restore_asi_fp
204 add %o2, %o4, %o0
205ENDPROC(NG4_retl_o2_plus_o4_fp)
206ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
207 add %o4, 8, %o4
208 ba,pt %xcc, __restore_asi_fp
209 add %o2, %o4, %o0
210ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
211ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
212 add %o4, 16, %o4
213 ba,pt %xcc, __restore_asi_fp
214 add %o2, %o4, %o0
215ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
216ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
217 add %o4, 24, %o4
218 ba,pt %xcc, __restore_asi_fp
219 add %o2, %o4, %o0
220ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
221ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
222 add %o4, 32, %o4
223 ba,pt %xcc, __restore_asi_fp
224 add %o2, %o4, %o0
225ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
226ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
227 add %o4, 40, %o4
228 ba,pt %xcc, __restore_asi_fp
229 add %o2, %o4, %o0
230ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
231ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
232 add %o4, 48, %o4
233 ba,pt %xcc, __restore_asi_fp
234 add %o2, %o4, %o0
235ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
236ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
237 add %o4, 56, %o4
238 ba,pt %xcc, __restore_asi_fp
239 add %o2, %o4, %o0
240ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
241ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
242 add %o4, 64, %o4
243 ba,pt %xcc, __restore_asi_fp
244 add %o2, %o4, %o0
245ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
246#endif
97 .align 64 247 .align 64
98 248
99 .globl FUNC_NAME 249 .globl FUNC_NAME
@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
124 brz,pt %g1, 51f 274 brz,pt %g1, 51f
125 sub %o2, %g1, %o2 275 sub %o2, %g1, %o2
126 276
1271: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) 277
2781: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
128 add %o1, 1, %o1 279 add %o1, 1, %o1
129 subcc %g1, 1, %g1 280 subcc %g1, 1, %g1
130 add %o0, 1, %o0 281 add %o0, 1, %o0
131 bne,pt %icc, 1b 282 bne,pt %icc, 1b
132 EX_ST(STORE(stb, %g2, %o0 - 0x01)) 283 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
133 284
13451: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 28551: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
135 LOAD(prefetch, %o1 + 0x080, #n_reads_strong) 286 LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
154 brz,pt %g1, .Llarge_aligned 305 brz,pt %g1, .Llarge_aligned
155 sub %o2, %g1, %o2 306 sub %o2, %g1, %o2
156 307
1571: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) 3081: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
158 add %o1, 8, %o1 309 add %o1, 8, %o1
159 subcc %g1, 8, %g1 310 subcc %g1, 8, %g1
160 add %o0, 8, %o0 311 add %o0, 8, %o0
161 bne,pt %icc, 1b 312 bne,pt %icc, 1b
162 EX_ST(STORE(stx, %g2, %o0 - 0x08)) 313 EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
163 314
164.Llarge_aligned: 315.Llarge_aligned:
165 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ 316 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
166 andn %o2, 0x3f, %o4 317 andn %o2, 0x3f, %o4
167 sub %o2, %o4, %o2 318 sub %o2, %o4, %o2
168 319
1691: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 3201: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
170 add %o1, 0x40, %o1 321 add %o1, 0x40, %o1
171 EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) 322 EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
172 subcc %o4, 0x40, %o4 323 subcc %o4, 0x40, %o4
173 EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) 324 EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
174 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) 325 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
175 EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) 326 EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
176 EX_ST(STORE_INIT(%g1, %o0)) 327 EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
177 add %o0, 0x08, %o0 328 add %o0, 0x08, %o0
178 EX_ST(STORE_INIT(%g2, %o0)) 329 EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
179 add %o0, 0x08, %o0 330 add %o0, 0x08, %o0
180 EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) 331 EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
181 EX_ST(STORE_INIT(%g3, %o0)) 332 EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
182 add %o0, 0x08, %o0 333 add %o0, 0x08, %o0
183 EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) 334 EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
184 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) 335 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
185 add %o0, 0x08, %o0 336 add %o0, 0x08, %o0
186 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) 337 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
187 EX_ST(STORE_INIT(%o5, %o0)) 338 EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
188 add %o0, 0x08, %o0 339 add %o0, 0x08, %o0
189 EX_ST(STORE_INIT(%g2, %o0)) 340 EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
190 add %o0, 0x08, %o0 341 add %o0, 0x08, %o0
191 EX_ST(STORE_INIT(%g3, %o0)) 342 EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
192 add %o0, 0x08, %o0 343 add %o0, 0x08, %o0
193 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) 344 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
194 add %o0, 0x08, %o0 345 add %o0, 0x08, %o0
195 bne,pt %icc, 1b 346 bne,pt %icc, 1b
196 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 347 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
216 sub %o2, %o4, %o2 367 sub %o2, %o4, %o2
217 alignaddr %o1, %g0, %g1 368 alignaddr %o1, %g0, %g1
218 add %o1, %o4, %o1 369 add %o1, %o4, %o1
219 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) 370 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
2201: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) 3711: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
221 subcc %o4, 0x40, %o4 372 subcc %o4, 0x40, %o4
222 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) 373 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
223 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) 374 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
224 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) 375 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
225 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) 376 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
226 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) 377 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
227 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) 378 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
228 faligndata %f0, %f2, %f16 379 faligndata %f0, %f2, %f16
229 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) 380 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
230 faligndata %f2, %f4, %f18 381 faligndata %f2, %f4, %f18
231 add %g1, 0x40, %g1 382 add %g1, 0x40, %g1
232 faligndata %f4, %f6, %f20 383 faligndata %f4, %f6, %f20
@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
235 faligndata %f10, %f12, %f26 386 faligndata %f10, %f12, %f26
236 faligndata %f12, %f14, %f28 387 faligndata %f12, %f14, %f28
237 faligndata %f14, %f0, %f30 388 faligndata %f14, %f0, %f30
238 EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) 389 EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
239 EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) 390 EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
240 EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) 391 EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
241 EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) 392 EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
242 EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) 393 EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
243 EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) 394 EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
244 EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) 395 EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
245 EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) 396 EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
246 add %o0, 0x40, %o0 397 add %o0, 0x40, %o0
247 bne,pt %icc, 1b 398 bne,pt %icc, 1b
248 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) 399 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
270 andncc %o2, 0x20 - 1, %o5 421 andncc %o2, 0x20 - 1, %o5
271 be,pn %icc, 2f 422 be,pn %icc, 2f
272 sub %o2, %o5, %o2 423 sub %o2, %o5, %o2
2731: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 4241: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
274 EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) 425 EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
275 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) 426 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
276 EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) 427 EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
277 add %o1, 0x20, %o1 428 add %o1, 0x20, %o1
278 subcc %o5, 0x20, %o5 429 subcc %o5, 0x20, %o5
279 EX_ST(STORE(stx, %g1, %o0 + 0x00)) 430 EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
280 EX_ST(STORE(stx, %g2, %o0 + 0x08)) 431 EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
281 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) 432 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
282 EX_ST(STORE(stx, %o4, %o0 + 0x18)) 433 EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
283 bne,pt %icc, 1b 434 bne,pt %icc, 1b
284 add %o0, 0x20, %o0 435 add %o0, 0x20, %o0
2852: andcc %o2, 0x18, %o5 4362: andcc %o2, 0x18, %o5
286 be,pt %icc, 3f 437 be,pt %icc, 3f
287 sub %o2, %o5, %o2 438 sub %o2, %o5, %o2
2881: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 439
4401: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
289 add %o1, 0x08, %o1 441 add %o1, 0x08, %o1
290 add %o0, 0x08, %o0 442 add %o0, 0x08, %o0
291 subcc %o5, 0x08, %o5 443 subcc %o5, 0x08, %o5
292 bne,pt %icc, 1b 444 bne,pt %icc, 1b
293 EX_ST(STORE(stx, %g1, %o0 - 0x08)) 445 EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
2943: brz,pt %o2, .Lexit 4463: brz,pt %o2, .Lexit
295 cmp %o2, 0x04 447 cmp %o2, 0x04
296 bl,pn %icc, .Ltiny 448 bl,pn %icc, .Ltiny
297 nop 449 nop
298 EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) 450 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
299 add %o1, 0x04, %o1 451 add %o1, 0x04, %o1
300 add %o0, 0x04, %o0 452 add %o0, 0x04, %o0
301 subcc %o2, 0x04, %o2 453 subcc %o2, 0x04, %o2
302 bne,pn %icc, .Ltiny 454 bne,pn %icc, .Ltiny
303 EX_ST(STORE(stw, %g1, %o0 - 0x04)) 455 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
304 ba,a,pt %icc, .Lexit 456 ba,a,pt %icc, .Lexit
305.Lmedium_unaligned: 457.Lmedium_unaligned:
306 /* First get dest 8 byte aligned. */ 458 /* First get dest 8 byte aligned. */
@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
309 brz,pt %g1, 2f 461 brz,pt %g1, 2f
310 sub %o2, %g1, %o2 462 sub %o2, %g1, %o2
311 463
3121: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) 4641: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
313 add %o1, 1, %o1 465 add %o1, 1, %o1
314 subcc %g1, 1, %g1 466 subcc %g1, 1, %g1
315 add %o0, 1, %o0 467 add %o0, 1, %o0
316 bne,pt %icc, 1b 468 bne,pt %icc, 1b
317 EX_ST(STORE(stb, %g2, %o0 - 0x01)) 469 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
3182: 4702:
319 and %o1, 0x7, %g1 471 and %o1, 0x7, %g1
320 brz,pn %g1, .Lmedium_noprefetch 472 brz,pn %g1, .Lmedium_noprefetch
@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
322 mov 64, %g2 474 mov 64, %g2
323 sub %g2, %g1, %g2 475 sub %g2, %g1, %g2
324 andn %o1, 0x7, %o1 476 andn %o1, 0x7, %o1
325 EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) 477 EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
326 sllx %o4, %g1, %o4 478 sllx %o4, %g1, %o4
327 andn %o2, 0x08 - 1, %o5 479 andn %o2, 0x08 - 1, %o5
328 sub %o2, %o5, %o2 480 sub %o2, %o5, %o2
3291: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) 4811: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
330 add %o1, 0x08, %o1 482 add %o1, 0x08, %o1
331 subcc %o5, 0x08, %o5 483 subcc %o5, 0x08, %o5
332 srlx %g3, %g2, GLOBAL_SPARE 484 srlx %g3, %g2, GLOBAL_SPARE
333 or GLOBAL_SPARE, %o4, GLOBAL_SPARE 485 or GLOBAL_SPARE, %o4, GLOBAL_SPARE
334 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) 486 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
335 add %o0, 0x08, %o0 487 add %o0, 0x08, %o0
336 bne,pt %icc, 1b 488 bne,pt %icc, 1b
337 sllx %g3, %g1, %o4 489 sllx %g3, %g1, %o4
@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
342 ba,pt %icc, .Lsmall_unaligned 494 ba,pt %icc, .Lsmall_unaligned
343 495
344.Ltiny: 496.Ltiny:
345 EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) 497 EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
346 subcc %o2, 1, %o2 498 subcc %o2, 1, %o2
347 be,pn %icc, .Lexit 499 be,pn %icc, .Lexit
348 EX_ST(STORE(stb, %g1, %o0 + 0x00)) 500 EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
349 EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) 501 EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
350 subcc %o2, 1, %o2 502 subcc %o2, 1, %o2
351 be,pn %icc, .Lexit 503 be,pn %icc, .Lexit
352 EX_ST(STORE(stb, %g1, %o0 + 0x01)) 504 EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
353 EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) 505 EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
354 ba,pt %icc, .Lexit 506 ba,pt %icc, .Lexit
355 EX_ST(STORE(stb, %g1, %o0 + 0x02)) 507 EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
356 508
357.Lsmall: 509.Lsmall:
358 andcc %g2, 0x3, %g0 510 andcc %g2, 0x3, %g0
@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
360 andn %o2, 0x4 - 1, %o5 512 andn %o2, 0x4 - 1, %o5
361 sub %o2, %o5, %o2 513 sub %o2, %o5, %o2
3621: 5141:
363 EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) 515 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
364 add %o1, 0x04, %o1 516 add %o1, 0x04, %o1
365 subcc %o5, 0x04, %o5 517 subcc %o5, 0x04, %o5
366 add %o0, 0x04, %o0 518 add %o0, 0x04, %o0
367 bne,pt %icc, 1b 519 bne,pt %icc, 1b
368 EX_ST(STORE(stw, %g1, %o0 - 0x04)) 520 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
369 brz,pt %o2, .Lexit 521 brz,pt %o2, .Lexit
370 nop 522 nop
371 ba,a,pt %icc, .Ltiny 523 ba,a,pt %icc, .Ltiny
372 524
373.Lsmall_unaligned: 525.Lsmall_unaligned:
3741: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) 5261: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
375 add %o1, 1, %o1 527 add %o1, 1, %o1
376 add %o0, 1, %o0 528 add %o0, 1, %o0
377 subcc %o2, 1, %o2 529 subcc %o2, 1, %o2
378 bne,pt %icc, 1b 530 bne,pt %icc, 1b
379 EX_ST(STORE(stb, %g1, %o0 - 0x01)) 531 EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
380 ba,a,pt %icc, .Lexit 532 ba,a,pt %icc, .Lexit
381 .size FUNC_NAME, .-FUNC_NAME 533 .size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
index 5d1e4d1ac21e..9cd42fcbc781 100644
--- a/arch/sparc/lib/NGcopy_from_user.S
+++ b/arch/sparc/lib/NGcopy_from_user.S
@@ -3,11 +3,11 @@
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_LD(x) \ 6#define EX_LD(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __ret_one_asi;\ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
index ff630dcb273c..5c358afd464e 100644
--- a/arch/sparc/lib/NGcopy_to_user.S
+++ b/arch/sparc/lib/NGcopy_to_user.S
@@ -3,11 +3,11 @@
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#define EX_ST(x) \ 6#define EX_ST(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __ret_one_asi;\ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
index 96a14caf6966..d88c4ed50a00 100644
--- a/arch/sparc/lib/NGmemcpy.S
+++ b/arch/sparc/lib/NGmemcpy.S
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7#include <linux/linkage.h>
7#include <asm/asi.h> 8#include <asm/asi.h>
8#include <asm/thread_info.h> 9#include <asm/thread_info.h>
9#define GLOBAL_SPARE %g7 10#define GLOBAL_SPARE %g7
@@ -27,15 +28,11 @@
27#endif 28#endif
28 29
29#ifndef EX_LD 30#ifndef EX_LD
30#define EX_LD(x) x 31#define EX_LD(x,y) x
31#endif 32#endif
32 33
33#ifndef EX_ST 34#ifndef EX_ST
34#define EX_ST(x) x 35#define EX_ST(x,y) x
35#endif
36
37#ifndef EX_RETVAL
38#define EX_RETVAL(x) x
39#endif 36#endif
40 37
41#ifndef LOAD 38#ifndef LOAD
@@ -79,6 +76,92 @@
79 .register %g3,#scratch 76 .register %g3,#scratch
80 77
81 .text 78 .text
79#ifndef EX_RETVAL
80#define EX_RETVAL(x) x
81__restore_asi:
82 ret
83 wr %g0, ASI_AIUS, %asi
84 restore
85ENTRY(NG_ret_i2_plus_i4_plus_1)
86 ba,pt %xcc, __restore_asi
87 add %i2, %i5, %i0
88ENDPROC(NG_ret_i2_plus_i4_plus_1)
89ENTRY(NG_ret_i2_plus_g1)
90 ba,pt %xcc, __restore_asi
91 add %i2, %g1, %i0
92ENDPROC(NG_ret_i2_plus_g1)
93ENTRY(NG_ret_i2_plus_g1_minus_8)
94 sub %g1, 8, %g1
95 ba,pt %xcc, __restore_asi
96 add %i2, %g1, %i0
97ENDPROC(NG_ret_i2_plus_g1_minus_8)
98ENTRY(NG_ret_i2_plus_g1_minus_16)
99 sub %g1, 16, %g1
100 ba,pt %xcc, __restore_asi
101 add %i2, %g1, %i0
102ENDPROC(NG_ret_i2_plus_g1_minus_16)
103ENTRY(NG_ret_i2_plus_g1_minus_24)
104 sub %g1, 24, %g1
105 ba,pt %xcc, __restore_asi
106 add %i2, %g1, %i0
107ENDPROC(NG_ret_i2_plus_g1_minus_24)
108ENTRY(NG_ret_i2_plus_g1_minus_32)
109 sub %g1, 32, %g1
110 ba,pt %xcc, __restore_asi
111 add %i2, %g1, %i0
112ENDPROC(NG_ret_i2_plus_g1_minus_32)
113ENTRY(NG_ret_i2_plus_g1_minus_40)
114 sub %g1, 40, %g1
115 ba,pt %xcc, __restore_asi
116 add %i2, %g1, %i0
117ENDPROC(NG_ret_i2_plus_g1_minus_40)
118ENTRY(NG_ret_i2_plus_g1_minus_48)
119 sub %g1, 48, %g1
120 ba,pt %xcc, __restore_asi
121 add %i2, %g1, %i0
122ENDPROC(NG_ret_i2_plus_g1_minus_48)
123ENTRY(NG_ret_i2_plus_g1_minus_56)
124 sub %g1, 56, %g1
125 ba,pt %xcc, __restore_asi
126 add %i2, %g1, %i0
127ENDPROC(NG_ret_i2_plus_g1_minus_56)
128ENTRY(NG_ret_i2_plus_i4)
129 ba,pt %xcc, __restore_asi
130 add %i2, %i4, %i0
131ENDPROC(NG_ret_i2_plus_i4)
132ENTRY(NG_ret_i2_plus_i4_minus_8)
133 sub %i4, 8, %i4
134 ba,pt %xcc, __restore_asi
135 add %i2, %i4, %i0
136ENDPROC(NG_ret_i2_plus_i4_minus_8)
137ENTRY(NG_ret_i2_plus_8)
138 ba,pt %xcc, __restore_asi
139 add %i2, 8, %i0
140ENDPROC(NG_ret_i2_plus_8)
141ENTRY(NG_ret_i2_plus_4)
142 ba,pt %xcc, __restore_asi
143 add %i2, 4, %i0
144ENDPROC(NG_ret_i2_plus_4)
145ENTRY(NG_ret_i2_plus_1)
146 ba,pt %xcc, __restore_asi
147 add %i2, 1, %i0
148ENDPROC(NG_ret_i2_plus_1)
149ENTRY(NG_ret_i2_plus_g1_plus_1)
150 add %g1, 1, %g1
151 ba,pt %xcc, __restore_asi
152 add %i2, %g1, %i0
153ENDPROC(NG_ret_i2_plus_g1_plus_1)
154ENTRY(NG_ret_i2)
155 ba,pt %xcc, __restore_asi
156 mov %i2, %i0
157ENDPROC(NG_ret_i2)
158ENTRY(NG_ret_i2_and_7_plus_i4)
159 and %i2, 7, %i2
160 ba,pt %xcc, __restore_asi
161 add %i2, %i4, %i0
162ENDPROC(NG_ret_i2_and_7_plus_i4)
163#endif
164
82 .align 64 165 .align 64
83 166
84 .globl FUNC_NAME 167 .globl FUNC_NAME
@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
126 sub %g0, %i4, %i4 ! bytes to align dst 209 sub %g0, %i4, %i4 ! bytes to align dst
127 sub %i2, %i4, %i2 210 sub %i2, %i4, %i2
1281: subcc %i4, 1, %i4 2111: subcc %i4, 1, %i4
129 EX_LD(LOAD(ldub, %i1, %g1)) 212 EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
130 EX_ST(STORE(stb, %g1, %o0)) 213 EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
131 add %i1, 1, %i1 214 add %i1, 1, %i1
132 bne,pt %XCC, 1b 215 bne,pt %XCC, 1b
133 add %o0, 1, %o0 216 add %o0, 1, %o0
@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
160 and %i4, 0x7, GLOBAL_SPARE 243 and %i4, 0x7, GLOBAL_SPARE
161 sll GLOBAL_SPARE, 3, GLOBAL_SPARE 244 sll GLOBAL_SPARE, 3, GLOBAL_SPARE
162 mov 64, %i5 245 mov 64, %i5
163 EX_LD(LOAD_TWIN(%i1, %g2, %g3)) 246 EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
164 sub %i5, GLOBAL_SPARE, %i5 247 sub %i5, GLOBAL_SPARE, %i5
165 mov 16, %o4 248 mov 16, %o4
166 mov 32, %o5 249 mov 32, %o5
@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
178 srlx WORD3, PRE_SHIFT, TMP; \ 261 srlx WORD3, PRE_SHIFT, TMP; \
179 or WORD2, TMP, WORD2; 262 or WORD2, TMP, WORD2;
180 263
1818: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) 2648: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
182 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) 265 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
183 LOAD(prefetch, %i1 + %i3, #one_read) 266 LOAD(prefetch, %i1 + %i3, #one_read)
184 267
185 EX_ST(STORE_INIT(%g2, %o0 + 0x00)) 268 EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
186 EX_ST(STORE_INIT(%g3, %o0 + 0x08)) 269 EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
187 270
188 EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) 271 EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
189 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) 272 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
190 273
191 EX_ST(STORE_INIT(%o2, %o0 + 0x10)) 274 EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
192 EX_ST(STORE_INIT(%o3, %o0 + 0x18)) 275 EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
193 276
194 EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) 277 EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
195 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) 278 MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
196 279
197 EX_ST(STORE_INIT(%g2, %o0 + 0x20)) 280 EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
198 EX_ST(STORE_INIT(%g3, %o0 + 0x28)) 281 EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
199 282
200 EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) 283 EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
201 add %i1, 64, %i1 284 add %i1, 64, %i1
202 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) 285 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
203 286
204 EX_ST(STORE_INIT(%o2, %o0 + 0x30)) 287 EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
205 EX_ST(STORE_INIT(%o3, %o0 + 0x38)) 288 EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
206 289
207 subcc %g1, 64, %g1 290 subcc %g1, 64, %g1
208 bne,pt %XCC, 8b 291 bne,pt %XCC, 8b
@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
211 ba,pt %XCC, 60f 294 ba,pt %XCC, 60f
212 add %i1, %i4, %i1 295 add %i1, %i4, %i1
213 296
2149: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) 2979: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
215 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) 298 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
216 LOAD(prefetch, %i1 + %i3, #one_read) 299 LOAD(prefetch, %i1 + %i3, #one_read)
217 300
218 EX_ST(STORE_INIT(%g3, %o0 + 0x00)) 301 EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
219 EX_ST(STORE_INIT(%o2, %o0 + 0x08)) 302 EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
220 303
221 EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) 304 EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
222 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) 305 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
223 306
224 EX_ST(STORE_INIT(%o3, %o0 + 0x10)) 307 EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
225 EX_ST(STORE_INIT(%g2, %o0 + 0x18)) 308 EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
226 309
227 EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) 310 EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
228 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) 311 MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
229 312
230 EX_ST(STORE_INIT(%g3, %o0 + 0x20)) 313 EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
231 EX_ST(STORE_INIT(%o2, %o0 + 0x28)) 314 EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
232 315
233 EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) 316 EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
234 add %i1, 64, %i1 317 add %i1, 64, %i1
235 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) 318 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
236 319
237 EX_ST(STORE_INIT(%o3, %o0 + 0x30)) 320 EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
238 EX_ST(STORE_INIT(%g2, %o0 + 0x38)) 321 EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
239 322
240 subcc %g1, 64, %g1 323 subcc %g1, 64, %g1
241 bne,pt %XCC, 9b 324 bne,pt %XCC, 9b
@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
249 * one twin load ahead, then add 8 back into source when 332 * one twin load ahead, then add 8 back into source when
250 * we finish the loop. 333 * we finish the loop.
251 */ 334 */
252 EX_LD(LOAD_TWIN(%i1, %o4, %o5)) 335 EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
253 mov 16, %o7 336 mov 16, %o7
254 mov 32, %g2 337 mov 32, %g2
255 mov 48, %g3 338 mov 48, %g3
256 mov 64, %o1 339 mov 64, %o1
2571: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) 3401: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
258 LOAD(prefetch, %i1 + %o1, #one_read) 341 LOAD(prefetch, %i1 + %o1, #one_read)
259 EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line 342 EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
260 EX_ST(STORE_INIT(%o2, %o0 + 0x08)) 343 EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
261 EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) 344 EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
262 EX_ST(STORE_INIT(%o3, %o0 + 0x10)) 345 EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
263 EX_ST(STORE_INIT(%o4, %o0 + 0x18)) 346 EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
264 EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) 347 EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
265 EX_ST(STORE_INIT(%o5, %o0 + 0x20)) 348 EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
266 EX_ST(STORE_INIT(%o2, %o0 + 0x28)) 349 EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
267 EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) 350 EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
268 add %i1, 64, %i1 351 add %i1, 64, %i1
269 EX_ST(STORE_INIT(%o3, %o0 + 0x30)) 352 EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
270 EX_ST(STORE_INIT(%o4, %o0 + 0x38)) 353 EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
271 subcc %g1, 64, %g1 354 subcc %g1, 64, %g1
272 bne,pt %XCC, 1b 355 bne,pt %XCC, 1b
273 add %o0, 64, %o0 356 add %o0, 64, %o0
@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
282 mov 32, %g2 365 mov 32, %g2
283 mov 48, %g3 366 mov 48, %g3
284 mov 64, %o1 367 mov 64, %o1
2851: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) 3681: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
286 EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) 369 EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
287 LOAD(prefetch, %i1 + %o1, #one_read) 370 LOAD(prefetch, %i1 + %o1, #one_read)
288 EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line 371 EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
289 EX_ST(STORE_INIT(%o5, %o0 + 0x08)) 372 EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
290 EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) 373 EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
291 EX_ST(STORE_INIT(%o2, %o0 + 0x10)) 374 EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
292 EX_ST(STORE_INIT(%o3, %o0 + 0x18)) 375 EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
293 EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) 376 EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
294 add %i1, 64, %i1 377 add %i1, 64, %i1
295 EX_ST(STORE_INIT(%o4, %o0 + 0x20)) 378 EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
296 EX_ST(STORE_INIT(%o5, %o0 + 0x28)) 379 EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
297 EX_ST(STORE_INIT(%o2, %o0 + 0x30)) 380 EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
298 EX_ST(STORE_INIT(%o3, %o0 + 0x38)) 381 EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
299 subcc %g1, 64, %g1 382 subcc %g1, 64, %g1
300 bne,pt %XCC, 1b 383 bne,pt %XCC, 1b
301 add %o0, 64, %o0 384 add %o0, 64, %o0
@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
321 andn %i2, 0xf, %i4 404 andn %i2, 0xf, %i4
322 and %i2, 0xf, %i2 405 and %i2, 0xf, %i2
3231: subcc %i4, 0x10, %i4 4061: subcc %i4, 0x10, %i4
324 EX_LD(LOAD(ldx, %i1, %o4)) 407 EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
325 add %i1, 0x08, %i1 408 add %i1, 0x08, %i1
326 EX_LD(LOAD(ldx, %i1, %g1)) 409 EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
327 sub %i1, 0x08, %i1 410 sub %i1, 0x08, %i1
328 EX_ST(STORE(stx, %o4, %i1 + %i3)) 411 EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
329 add %i1, 0x8, %i1 412 add %i1, 0x8, %i1
330 EX_ST(STORE(stx, %g1, %i1 + %i3)) 413 EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
331 bgu,pt %XCC, 1b 414 bgu,pt %XCC, 1b
332 add %i1, 0x8, %i1 415 add %i1, 0x8, %i1
33373: andcc %i2, 0x8, %g0 41673: andcc %i2, 0x8, %g0
334 be,pt %XCC, 1f 417 be,pt %XCC, 1f
335 nop 418 nop
336 sub %i2, 0x8, %i2 419 sub %i2, 0x8, %i2
337 EX_LD(LOAD(ldx, %i1, %o4)) 420 EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
338 EX_ST(STORE(stx, %o4, %i1 + %i3)) 421 EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
339 add %i1, 0x8, %i1 422 add %i1, 0x8, %i1
3401: andcc %i2, 0x4, %g0 4231: andcc %i2, 0x4, %g0
341 be,pt %XCC, 1f 424 be,pt %XCC, 1f
342 nop 425 nop
343 sub %i2, 0x4, %i2 426 sub %i2, 0x4, %i2
344 EX_LD(LOAD(lduw, %i1, %i5)) 427 EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
345 EX_ST(STORE(stw, %i5, %i1 + %i3)) 428 EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
346 add %i1, 0x4, %i1 429 add %i1, 0x4, %i1
3471: cmp %i2, 0 4301: cmp %i2, 0
348 be,pt %XCC, 85f 431 be,pt %XCC, 85f
@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
358 sub %i2, %g1, %i2 441 sub %i2, %g1, %i2
359 442
3601: subcc %g1, 1, %g1 4431: subcc %g1, 1, %g1
361 EX_LD(LOAD(ldub, %i1, %i5)) 444 EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
362 EX_ST(STORE(stb, %i5, %i1 + %i3)) 445 EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
363 bgu,pt %icc, 1b 446 bgu,pt %icc, 1b
364 add %i1, 1, %i1 447 add %i1, 1, %i1
365 448
@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
375 458
3768: mov 64, %i3 4598: mov 64, %i3
377 andn %i1, 0x7, %i1 460 andn %i1, 0x7, %i1
378 EX_LD(LOAD(ldx, %i1, %g2)) 461 EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
379 sub %i3, %g1, %i3 462 sub %i3, %g1, %i3
380 andn %i2, 0x7, %i4 463 andn %i2, 0x7, %i4
381 sllx %g2, %g1, %g2 464 sllx %g2, %g1, %g2
3821: add %i1, 0x8, %i1 4651: add %i1, 0x8, %i1
383 EX_LD(LOAD(ldx, %i1, %g3)) 466 EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
384 subcc %i4, 0x8, %i4 467 subcc %i4, 0x8, %i4
385 srlx %g3, %i3, %i5 468 srlx %g3, %i3, %i5
386 or %i5, %g2, %i5 469 or %i5, %g2, %i5
387 EX_ST(STORE(stx, %i5, %o0)) 470 EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
388 add %o0, 0x8, %o0 471 add %o0, 0x8, %o0
389 bgu,pt %icc, 1b 472 bgu,pt %icc, 1b
390 sllx %g3, %g1, %g2 473 sllx %g3, %g1, %g2
@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
404 487
4051: 4881:
406 subcc %i2, 4, %i2 489 subcc %i2, 4, %i2
407 EX_LD(LOAD(lduw, %i1, %g1)) 490 EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
408 EX_ST(STORE(stw, %g1, %i1 + %i3)) 491 EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
409 bgu,pt %XCC, 1b 492 bgu,pt %XCC, 1b
410 add %i1, 4, %i1 493 add %i1, 4, %i1
411 494
@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
415 .align 32 498 .align 32
41690: 49990:
417 subcc %i2, 1, %i2 500 subcc %i2, 1, %i2
418 EX_LD(LOAD(ldub, %i1, %g1)) 501 EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
419 EX_ST(STORE(stb, %g1, %i1 + %i3)) 502 EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
420 bgu,pt %XCC, 90b 503 bgu,pt %XCC, 90b
421 add %i1, 1, %i1 504 add %i1, 1, %i1
422 ret 505 ret
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
index ecc5692fa2b4..bb6ff73229e3 100644
--- a/arch/sparc/lib/U1copy_from_user.S
+++ b/arch/sparc/lib/U1copy_from_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
4 */ 4 */
5 5
6#define EX_LD(x) \ 6#define EX_LD(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one; \ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \ 14#define EX_LD_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_fp;\ 18 .word 98b, y; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
index 9eea392e44d4..ed92ce739558 100644
--- a/arch/sparc/lib/U1copy_to_user.S
+++ b/arch/sparc/lib/U1copy_to_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
4 */ 4 */
5 5
6#define EX_ST(x) \ 6#define EX_ST(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one; \ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \ 14#define EX_ST_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_fp;\ 18 .word 98b, y; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index 97e1b211090c..4f0d50b33a72 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -5,6 +5,7 @@
5 */ 5 */
6 6
7#ifdef __KERNEL__ 7#ifdef __KERNEL__
8#include <linux/linkage.h>
8#include <asm/visasm.h> 9#include <asm/visasm.h>
9#include <asm/asi.h> 10#include <asm/asi.h>
10#include <asm/export.h> 11#include <asm/export.h>
@@ -24,21 +25,17 @@
24#endif 25#endif
25 26
26#ifndef EX_LD 27#ifndef EX_LD
27#define EX_LD(x) x 28#define EX_LD(x,y) x
28#endif 29#endif
29#ifndef EX_LD_FP 30#ifndef EX_LD_FP
30#define EX_LD_FP(x) x 31#define EX_LD_FP(x,y) x
31#endif 32#endif
32 33
33#ifndef EX_ST 34#ifndef EX_ST
34#define EX_ST(x) x 35#define EX_ST(x,y) x
35#endif 36#endif
36#ifndef EX_ST_FP 37#ifndef EX_ST_FP
37#define EX_ST_FP(x) x 38#define EX_ST_FP(x,y) x
38#endif
39
40#ifndef EX_RETVAL
41#define EX_RETVAL(x) x
42#endif 39#endif
43 40
44#ifndef LOAD 41#ifndef LOAD
@@ -79,53 +76,169 @@
79 faligndata %f7, %f8, %f60; \ 76 faligndata %f7, %f8, %f60; \
80 faligndata %f8, %f9, %f62; 77 faligndata %f8, %f9, %f62;
81 78
82#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ 79#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
83 EX_LD_FP(LOAD_BLK(%src, %fdest)); \ 80 EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
84 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ 81 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
85 add %src, 0x40, %src; \ 82 add %src, 0x40, %src; \
86 subcc %len, 0x40, %len; \ 83 subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
87 be,pn %xcc, jmptgt; \ 84 be,pn %xcc, jmptgt; \
88 add %dest, 0x40, %dest; \ 85 add %dest, 0x40, %dest; \
89 86
90#define LOOP_CHUNK1(src, dest, len, branch_dest) \ 87#define LOOP_CHUNK1(src, dest, branch_dest) \
91 MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) 88 MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
92#define LOOP_CHUNK2(src, dest, len, branch_dest) \ 89#define LOOP_CHUNK2(src, dest, branch_dest) \
93 MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) 90 MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
94#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 91#define LOOP_CHUNK3(src, dest, branch_dest) \
95 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 92 MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
96 93
97#define DO_SYNC membar #Sync; 94#define DO_SYNC membar #Sync;
98#define STORE_SYNC(dest, fsrc) \ 95#define STORE_SYNC(dest, fsrc) \
99 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ 96 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
100 add %dest, 0x40, %dest; \ 97 add %dest, 0x40, %dest; \
101 DO_SYNC 98 DO_SYNC
102 99
103#define STORE_JUMP(dest, fsrc, target) \ 100#define STORE_JUMP(dest, fsrc, target) \
104 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ 101 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
105 add %dest, 0x40, %dest; \ 102 add %dest, 0x40, %dest; \
106 ba,pt %xcc, target; \ 103 ba,pt %xcc, target; \
107 nop; 104 nop;
108 105
109#define FINISH_VISCHUNK(dest, f0, f1, left) \ 106#define FINISH_VISCHUNK(dest, f0, f1) \
110 subcc %left, 8, %left;\ 107 subcc %g3, 8, %g3; \
111 bl,pn %xcc, 95f; \ 108 bl,pn %xcc, 95f; \
112 faligndata %f0, %f1, %f48; \ 109 faligndata %f0, %f1, %f48; \
113 EX_ST_FP(STORE(std, %f48, %dest)); \ 110 EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
114 add %dest, 8, %dest; 111 add %dest, 8, %dest;
115 112
116#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 113#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
117 subcc %left, 8, %left; \ 114 subcc %g3, 8, %g3; \
118 bl,pn %xcc, 95f; \ 115 bl,pn %xcc, 95f; \
119 fsrc2 %f0, %f1; 116 fsrc2 %f0, %f1;
120 117
121#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ 118#define UNEVEN_VISCHUNK(dest, f0, f1) \
122 UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 119 UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
123 ba,a,pt %xcc, 93f; 120 ba,a,pt %xcc, 93f;
124 121
125 .register %g2,#scratch 122 .register %g2,#scratch
126 .register %g3,#scratch 123 .register %g3,#scratch
127 124
128 .text 125 .text
126#ifndef EX_RETVAL
127#define EX_RETVAL(x) x
128ENTRY(U1_g1_1_fp)
129 VISExitHalf
130 add %g1, 1, %g1
131 add %g1, %g2, %g1
132 retl
133 add %g1, %o2, %o0
134ENDPROC(U1_g1_1_fp)
135ENTRY(U1_g2_0_fp)
136 VISExitHalf
137 retl
138 add %g2, %o2, %o0
139ENDPROC(U1_g2_0_fp)
140ENTRY(U1_g2_8_fp)
141 VISExitHalf
142 add %g2, 8, %g2
143 retl
144 add %g2, %o2, %o0
145ENDPROC(U1_g2_8_fp)
146ENTRY(U1_gs_0_fp)
147 VISExitHalf
148 add %GLOBAL_SPARE, %g3, %o0
149 retl
150 add %o0, %o2, %o0
151ENDPROC(U1_gs_0_fp)
152ENTRY(U1_gs_80_fp)
153 VISExitHalf
154 add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
155 add %GLOBAL_SPARE, %g3, %o0
156 retl
157 add %o0, %o2, %o0
158ENDPROC(U1_gs_80_fp)
159ENTRY(U1_gs_40_fp)
160 VISExitHalf
161 add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
162 add %GLOBAL_SPARE, %g3, %o0
163 retl
164 add %o0, %o2, %o0
165ENDPROC(U1_gs_40_fp)
166ENTRY(U1_g3_0_fp)
167 VISExitHalf
168 retl
169 add %g3, %o2, %o0
170ENDPROC(U1_g3_0_fp)
171ENTRY(U1_g3_8_fp)
172 VISExitHalf
173 add %g3, 8, %g3
174 retl
175 add %g3, %o2, %o0
176ENDPROC(U1_g3_8_fp)
177ENTRY(U1_o2_0_fp)
178 VISExitHalf
179 retl
180 mov %o2, %o0
181ENDPROC(U1_o2_0_fp)
182ENTRY(U1_o2_1_fp)
183 VISExitHalf
184 retl
185 add %o2, 1, %o0
186ENDPROC(U1_o2_1_fp)
187ENTRY(U1_gs_0)
188 VISExitHalf
189 retl
190 add %GLOBAL_SPARE, %o2, %o0
191ENDPROC(U1_gs_0)
192ENTRY(U1_gs_8)
193 VISExitHalf
194 add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
195 retl
196 add %GLOBAL_SPARE, 0x8, %o0
197ENDPROC(U1_gs_8)
198ENTRY(U1_gs_10)
199 VISExitHalf
200 add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
201 retl
202 add %GLOBAL_SPARE, 0x10, %o0
203ENDPROC(U1_gs_10)
204ENTRY(U1_o2_0)
205 retl
206 mov %o2, %o0
207ENDPROC(U1_o2_0)
208ENTRY(U1_o2_8)
209 retl
210 add %o2, 8, %o0
211ENDPROC(U1_o2_8)
212ENTRY(U1_o2_4)
213 retl
214 add %o2, 4, %o0
215ENDPROC(U1_o2_4)
216ENTRY(U1_o2_1)
217 retl
218 add %o2, 1, %o0
219ENDPROC(U1_o2_1)
220ENTRY(U1_g1_0)
221 retl
222 add %g1, %o2, %o0
223ENDPROC(U1_g1_0)
224ENTRY(U1_g1_1)
225 add %g1, 1, %g1
226 retl
227 add %g1, %o2, %o0
228ENDPROC(U1_g1_1)
229ENTRY(U1_gs_0_o2_adj)
230 and %o2, 7, %o2
231 retl
232 add %GLOBAL_SPARE, %o2, %o0
233ENDPROC(U1_gs_0_o2_adj)
234ENTRY(U1_gs_8_o2_adj)
235 and %o2, 7, %o2
236 add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
237 retl
238 add %GLOBAL_SPARE, %o2, %o0
239ENDPROC(U1_gs_8_o2_adj)
240#endif
241
129 .align 64 242 .align 64
130 243
131 .globl FUNC_NAME 244 .globl FUNC_NAME
@@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
167 and %g2, 0x38, %g2 280 and %g2, 0x38, %g2
168 281
1691: subcc %g1, 0x1, %g1 2821: subcc %g1, 0x1, %g1
170 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) 283 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
171 EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) 284 EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
172 bgu,pt %XCC, 1b 285 bgu,pt %XCC, 1b
173 add %o1, 0x1, %o1 286 add %o1, 0x1, %o1
174 287
@@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
179 be,pt %icc, 3f 292 be,pt %icc, 3f
180 alignaddr %o1, %g0, %o1 293 alignaddr %o1, %g0, %o1
181 294
182 EX_LD_FP(LOAD(ldd, %o1, %f4)) 295 EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
1831: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) 2961: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
184 add %o1, 0x8, %o1 297 add %o1, 0x8, %o1
185 subcc %g2, 0x8, %g2 298 subcc %g2, 0x8, %g2
186 faligndata %f4, %f6, %f0 299 faligndata %f4, %f6, %f0
187 EX_ST_FP(STORE(std, %f0, %o0)) 300 EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
188 be,pn %icc, 3f 301 be,pn %icc, 3f
189 add %o0, 0x8, %o0 302 add %o0, 0x8, %o0
190 303
191 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) 304 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
192 add %o1, 0x8, %o1 305 add %o1, 0x8, %o1
193 subcc %g2, 0x8, %g2 306 subcc %g2, 0x8, %g2
194 faligndata %f6, %f4, %f0 307 faligndata %f6, %f4, %f0
195 EX_ST_FP(STORE(std, %f0, %o0)) 308 EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
196 bne,pt %icc, 1b 309 bne,pt %icc, 1b
197 add %o0, 0x8, %o0 310 add %o0, 0x8, %o0
198 311
@@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
215 add %g1, %GLOBAL_SPARE, %g1 328 add %g1, %GLOBAL_SPARE, %g1
216 subcc %o2, %g3, %o2 329 subcc %o2, %g3, %o2
217 330
218 EX_LD_FP(LOAD_BLK(%o1, %f0)) 331 EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
219 add %o1, 0x40, %o1 332 add %o1, 0x40, %o1
220 add %g1, %g3, %g1 333 add %g1, %g3, %g1
221 EX_LD_FP(LOAD_BLK(%o1, %f16)) 334 EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
222 add %o1, 0x40, %o1 335 add %o1, 0x40, %o1
223 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 336 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
224 EX_LD_FP(LOAD_BLK(%o1, %f32)) 337 EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
225 add %o1, 0x40, %o1 338 add %o1, 0x40, %o1
226 339
227 /* There are 8 instances of the unrolled loop, 340 /* There are 8 instances of the unrolled loop,
@@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
241 354
242 .align 64 355 .align 64
2431: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 3561: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
244 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 357 LOOP_CHUNK1(o1, o0, 1f)
245 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 358 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
246 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 359 LOOP_CHUNK2(o1, o0, 2f)
247 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 360 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
248 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 361 LOOP_CHUNK3(o1, o0, 3f)
249 ba,pt %xcc, 1b+4 362 ba,pt %xcc, 1b+4
250 faligndata %f0, %f2, %f48 363 faligndata %f0, %f2, %f48
2511: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 3641: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
@@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
262 STORE_JUMP(o0, f48, 56f) 375 STORE_JUMP(o0, f48, 56f)
263 376
2641: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 3771: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
265 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 378 LOOP_CHUNK1(o1, o0, 1f)
266 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 379 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
267 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 380 LOOP_CHUNK2(o1, o0, 2f)
268 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 381 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
269 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 382 LOOP_CHUNK3(o1, o0, 3f)
270 ba,pt %xcc, 1b+4 383 ba,pt %xcc, 1b+4
271 faligndata %f2, %f4, %f48 384 faligndata %f2, %f4, %f48
2721: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 3851: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
@@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
283 STORE_JUMP(o0, f48, 57f) 396 STORE_JUMP(o0, f48, 57f)
284 397
2851: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 3981: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
286 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 399 LOOP_CHUNK1(o1, o0, 1f)
287 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 400 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
288 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 401 LOOP_CHUNK2(o1, o0, 2f)
289 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 402 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
290 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 403 LOOP_CHUNK3(o1, o0, 3f)
291 ba,pt %xcc, 1b+4 404 ba,pt %xcc, 1b+4
292 faligndata %f4, %f6, %f48 405 faligndata %f4, %f6, %f48
2931: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 4061: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
@@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
304 STORE_JUMP(o0, f48, 58f) 417 STORE_JUMP(o0, f48, 58f)
305 418
3061: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 4191: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
307 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 420 LOOP_CHUNK1(o1, o0, 1f)
308 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 421 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
309 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 422 LOOP_CHUNK2(o1, o0, 2f)
310 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 423 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
311 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 424 LOOP_CHUNK3(o1, o0, 3f)
312 ba,pt %xcc, 1b+4 425 ba,pt %xcc, 1b+4
313 faligndata %f6, %f8, %f48 426 faligndata %f6, %f8, %f48
3141: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 4271: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
@@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
325 STORE_JUMP(o0, f48, 59f) 438 STORE_JUMP(o0, f48, 59f)
326 439
3271: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 4401: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
328 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 441 LOOP_CHUNK1(o1, o0, 1f)
329 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 442 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
330 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 443 LOOP_CHUNK2(o1, o0, 2f)
331 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 444 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
332 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 445 LOOP_CHUNK3(o1, o0, 3f)
333 ba,pt %xcc, 1b+4 446 ba,pt %xcc, 1b+4
334 faligndata %f8, %f10, %f48 447 faligndata %f8, %f10, %f48
3351: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 4481: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
@@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
346 STORE_JUMP(o0, f48, 60f) 459 STORE_JUMP(o0, f48, 60f)
347 460
3481: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 4611: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
349 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 462 LOOP_CHUNK1(o1, o0, 1f)
350 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 463 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
351 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 464 LOOP_CHUNK2(o1, o0, 2f)
352 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 465 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
353 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 466 LOOP_CHUNK3(o1, o0, 3f)
354 ba,pt %xcc, 1b+4 467 ba,pt %xcc, 1b+4
355 faligndata %f10, %f12, %f48 468 faligndata %f10, %f12, %f48
3561: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 4691: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
@@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
367 STORE_JUMP(o0, f48, 61f) 480 STORE_JUMP(o0, f48, 61f)
368 481
3691: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 4821: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
370 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 483 LOOP_CHUNK1(o1, o0, 1f)
371 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 484 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
372 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 485 LOOP_CHUNK2(o1, o0, 2f)
373 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 486 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
374 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 487 LOOP_CHUNK3(o1, o0, 3f)
375 ba,pt %xcc, 1b+4 488 ba,pt %xcc, 1b+4
376 faligndata %f12, %f14, %f48 489 faligndata %f12, %f14, %f48
3771: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 4901: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
@@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
388 STORE_JUMP(o0, f48, 62f) 501 STORE_JUMP(o0, f48, 62f)
389 502
3901: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 5031: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
391 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 504 LOOP_CHUNK1(o1, o0, 1f)
392 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 505 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
393 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 506 LOOP_CHUNK2(o1, o0, 2f)
394 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 507 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
395 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 508 LOOP_CHUNK3(o1, o0, 3f)
396 ba,pt %xcc, 1b+4 509 ba,pt %xcc, 1b+4
397 faligndata %f14, %f16, %f48 510 faligndata %f14, %f16, %f48
3981: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 5111: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
@@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
408 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 521 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
409 STORE_JUMP(o0, f48, 63f) 522 STORE_JUMP(o0, f48, 63f)
410 523
41140: FINISH_VISCHUNK(o0, f0, f2, g3) 52440: FINISH_VISCHUNK(o0, f0, f2)
41241: FINISH_VISCHUNK(o0, f2, f4, g3) 52541: FINISH_VISCHUNK(o0, f2, f4)
41342: FINISH_VISCHUNK(o0, f4, f6, g3) 52642: FINISH_VISCHUNK(o0, f4, f6)
41443: FINISH_VISCHUNK(o0, f6, f8, g3) 52743: FINISH_VISCHUNK(o0, f6, f8)
41544: FINISH_VISCHUNK(o0, f8, f10, g3) 52844: FINISH_VISCHUNK(o0, f8, f10)
41645: FINISH_VISCHUNK(o0, f10, f12, g3) 52945: FINISH_VISCHUNK(o0, f10, f12)
41746: FINISH_VISCHUNK(o0, f12, f14, g3) 53046: FINISH_VISCHUNK(o0, f12, f14)
41847: UNEVEN_VISCHUNK(o0, f14, f0, g3) 53147: UNEVEN_VISCHUNK(o0, f14, f0)
41948: FINISH_VISCHUNK(o0, f16, f18, g3) 53248: FINISH_VISCHUNK(o0, f16, f18)
42049: FINISH_VISCHUNK(o0, f18, f20, g3) 53349: FINISH_VISCHUNK(o0, f18, f20)
42150: FINISH_VISCHUNK(o0, f20, f22, g3) 53450: FINISH_VISCHUNK(o0, f20, f22)
42251: FINISH_VISCHUNK(o0, f22, f24, g3) 53551: FINISH_VISCHUNK(o0, f22, f24)
42352: FINISH_VISCHUNK(o0, f24, f26, g3) 53652: FINISH_VISCHUNK(o0, f24, f26)
42453: FINISH_VISCHUNK(o0, f26, f28, g3) 53753: FINISH_VISCHUNK(o0, f26, f28)
42554: FINISH_VISCHUNK(o0, f28, f30, g3) 53854: FINISH_VISCHUNK(o0, f28, f30)
42655: UNEVEN_VISCHUNK(o0, f30, f0, g3) 53955: UNEVEN_VISCHUNK(o0, f30, f0)
42756: FINISH_VISCHUNK(o0, f32, f34, g3) 54056: FINISH_VISCHUNK(o0, f32, f34)
42857: FINISH_VISCHUNK(o0, f34, f36, g3) 54157: FINISH_VISCHUNK(o0, f34, f36)
42958: FINISH_VISCHUNK(o0, f36, f38, g3) 54258: FINISH_VISCHUNK(o0, f36, f38)
43059: FINISH_VISCHUNK(o0, f38, f40, g3) 54359: FINISH_VISCHUNK(o0, f38, f40)
43160: FINISH_VISCHUNK(o0, f40, f42, g3) 54460: FINISH_VISCHUNK(o0, f40, f42)
43261: FINISH_VISCHUNK(o0, f42, f44, g3) 54561: FINISH_VISCHUNK(o0, f42, f44)
43362: FINISH_VISCHUNK(o0, f44, f46, g3) 54662: FINISH_VISCHUNK(o0, f44, f46)
43463: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 54763: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
435 548
43693: EX_LD_FP(LOAD(ldd, %o1, %f2)) 54993: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
437 add %o1, 8, %o1 550 add %o1, 8, %o1
438 subcc %g3, 8, %g3 551 subcc %g3, 8, %g3
439 faligndata %f0, %f2, %f8 552 faligndata %f0, %f2, %f8
440 EX_ST_FP(STORE(std, %f8, %o0)) 553 EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
441 bl,pn %xcc, 95f 554 bl,pn %xcc, 95f
442 add %o0, 8, %o0 555 add %o0, 8, %o0
443 EX_LD_FP(LOAD(ldd, %o1, %f0)) 556 EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
444 add %o1, 8, %o1 557 add %o1, 8, %o1
445 subcc %g3, 8, %g3 558 subcc %g3, 8, %g3
446 faligndata %f2, %f0, %f8 559 faligndata %f2, %f0, %f8
447 EX_ST_FP(STORE(std, %f8, %o0)) 560 EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
448 bge,pt %xcc, 93b 561 bge,pt %xcc, 93b
449 add %o0, 8, %o0 562 add %o0, 8, %o0
450 563
45195: brz,pt %o2, 2f 56495: brz,pt %o2, 2f
452 mov %g1, %o1 565 mov %g1, %o1
453 566
4541: EX_LD_FP(LOAD(ldub, %o1, %o3)) 5671: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
455 add %o1, 1, %o1 568 add %o1, 1, %o1
456 subcc %o2, 1, %o2 569 subcc %o2, 1, %o2
457 EX_ST_FP(STORE(stb, %o3, %o0)) 570 EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
458 bne,pt %xcc, 1b 571 bne,pt %xcc, 1b
459 add %o0, 1, %o0 572 add %o0, 1, %o0
460 573
@@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
470 583
47172: andn %o2, 0xf, %GLOBAL_SPARE 58472: andn %o2, 0xf, %GLOBAL_SPARE
472 and %o2, 0xf, %o2 585 and %o2, 0xf, %o2
4731: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 5861: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
474 EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) 587 EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
475 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE 588 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
476 EX_ST(STORE(stx, %o5, %o1 + %o3)) 589 EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
477 add %o1, 0x8, %o1 590 add %o1, 0x8, %o1
478 EX_ST(STORE(stx, %g1, %o1 + %o3)) 591 EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
479 bgu,pt %XCC, 1b 592 bgu,pt %XCC, 1b
480 add %o1, 0x8, %o1 593 add %o1, 0x8, %o1
48173: andcc %o2, 0x8, %g0 59473: andcc %o2, 0x8, %g0
482 be,pt %XCC, 1f 595 be,pt %XCC, 1f
483 nop 596 nop
484 EX_LD(LOAD(ldx, %o1, %o5)) 597 EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
485 sub %o2, 0x8, %o2 598 sub %o2, 0x8, %o2
486 EX_ST(STORE(stx, %o5, %o1 + %o3)) 599 EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
487 add %o1, 0x8, %o1 600 add %o1, 0x8, %o1
4881: andcc %o2, 0x4, %g0 6011: andcc %o2, 0x4, %g0
489 be,pt %XCC, 1f 602 be,pt %XCC, 1f
490 nop 603 nop
491 EX_LD(LOAD(lduw, %o1, %o5)) 604 EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
492 sub %o2, 0x4, %o2 605 sub %o2, 0x4, %o2
493 EX_ST(STORE(stw, %o5, %o1 + %o3)) 606 EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
494 add %o1, 0x4, %o1 607 add %o1, 0x4, %o1
4951: cmp %o2, 0 6081: cmp %o2, 0
496 be,pt %XCC, 85f 609 be,pt %XCC, 85f
@@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
504 sub %g0, %g1, %g1 617 sub %g0, %g1, %g1
505 sub %o2, %g1, %o2 618 sub %o2, %g1, %o2
506 619
5071: EX_LD(LOAD(ldub, %o1, %o5)) 6201: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
508 subcc %g1, 1, %g1 621 subcc %g1, 1, %g1
509 EX_ST(STORE(stb, %o5, %o1 + %o3)) 622 EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
510 bgu,pt %icc, 1b 623 bgu,pt %icc, 1b
511 add %o1, 1, %o1 624 add %o1, 1, %o1
512 625
@@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
522 635
5238: mov 64, %o3 6368: mov 64, %o3
524 andn %o1, 0x7, %o1 637 andn %o1, 0x7, %o1
525 EX_LD(LOAD(ldx, %o1, %g2)) 638 EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
526 sub %o3, %g1, %o3 639 sub %o3, %g1, %o3
527 andn %o2, 0x7, %GLOBAL_SPARE 640 andn %o2, 0x7, %GLOBAL_SPARE
528 sllx %g2, %g1, %g2 641 sllx %g2, %g1, %g2
5291: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 6421: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
530 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE 643 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
531 add %o1, 0x8, %o1 644 add %o1, 0x8, %o1
532 srlx %g3, %o3, %o5 645 srlx %g3, %o3, %o5
533 or %o5, %g2, %o5 646 or %o5, %g2, %o5
534 EX_ST(STORE(stx, %o5, %o0)) 647 EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
535 add %o0, 0x8, %o0 648 add %o0, 0x8, %o0
536 bgu,pt %icc, 1b 649 bgu,pt %icc, 1b
537 sllx %g3, %g1, %g2 650 sllx %g3, %g1, %g2
@@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
549 bne,pn %XCC, 90f 662 bne,pn %XCC, 90f
550 sub %o0, %o1, %o3 663 sub %o0, %o1, %o3
551 664
5521: EX_LD(LOAD(lduw, %o1, %g1)) 6651: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
553 subcc %o2, 4, %o2 666 subcc %o2, 4, %o2
554 EX_ST(STORE(stw, %g1, %o1 + %o3)) 667 EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
555 bgu,pt %XCC, 1b 668 bgu,pt %XCC, 1b
556 add %o1, 4, %o1 669 add %o1, 4, %o1
557 670
@@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
559 mov EX_RETVAL(%o4), %o0 672 mov EX_RETVAL(%o4), %o0
560 673
561 .align 32 674 .align 32
56290: EX_LD(LOAD(ldub, %o1, %g1)) 67590: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
563 subcc %o2, 1, %o2 676 subcc %o2, 1, %o2
564 EX_ST(STORE(stb, %g1, %o1 + %o3)) 677 EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
565 bgu,pt %XCC, 90b 678 bgu,pt %XCC, 90b
566 add %o1, 1, %o1 679 add %o1, 1, %o1
567 retl 680 retl
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
index 88ad73d86fe4..db73010a1af8 100644
--- a/arch/sparc/lib/U3copy_from_user.S
+++ b/arch/sparc/lib/U3copy_from_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
4 */ 4 */
5 5
6#define EX_LD(x) \ 6#define EX_LD(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one; \ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \ 14#define EX_LD_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_fp;\ 18 .word 98b, y##_fp; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
index 845139d75537..c4ee858e352a 100644
--- a/arch/sparc/lib/U3copy_to_user.S
+++ b/arch/sparc/lib/U3copy_to_user.S
@@ -3,19 +3,19 @@
3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
4 */ 4 */
5 5
6#define EX_ST(x) \ 6#define EX_ST(x,y) \
798: x; \ 798: x; \
8 .section __ex_table,"a";\ 8 .section __ex_table,"a";\
9 .align 4; \ 9 .align 4; \
10 .word 98b, __retl_one; \ 10 .word 98b, y; \
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \ 14#define EX_ST_FP(x,y) \
1598: x; \ 1598: x; \
16 .section __ex_table,"a";\ 16 .section __ex_table,"a";\
17 .align 4; \ 17 .align 4; \
18 .word 98b, __retl_one_fp;\ 18 .word 98b, y##_fp; \
19 .text; \ 19 .text; \
20 .align 4; 20 .align 4;
21 21
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
index 491ee69e4995..54f98706b03b 100644
--- a/arch/sparc/lib/U3memcpy.S
+++ b/arch/sparc/lib/U3memcpy.S
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7#include <linux/linkage.h>
7#include <asm/visasm.h> 8#include <asm/visasm.h>
8#include <asm/asi.h> 9#include <asm/asi.h>
9#define GLOBAL_SPARE %g7 10#define GLOBAL_SPARE %g7
@@ -22,21 +23,17 @@
22#endif 23#endif
23 24
24#ifndef EX_LD 25#ifndef EX_LD
25#define EX_LD(x) x 26#define EX_LD(x,y) x
26#endif 27#endif
27#ifndef EX_LD_FP 28#ifndef EX_LD_FP
28#define EX_LD_FP(x) x 29#define EX_LD_FP(x,y) x
29#endif 30#endif
30 31
31#ifndef EX_ST 32#ifndef EX_ST
32#define EX_ST(x) x 33#define EX_ST(x,y) x
33#endif 34#endif
34#ifndef EX_ST_FP 35#ifndef EX_ST_FP
35#define EX_ST_FP(x) x 36#define EX_ST_FP(x,y) x
36#endif
37
38#ifndef EX_RETVAL
39#define EX_RETVAL(x) x
40#endif 37#endif
41 38
42#ifndef LOAD 39#ifndef LOAD
@@ -77,6 +74,87 @@
77 */ 74 */
78 75
79 .text 76 .text
77#ifndef EX_RETVAL
78#define EX_RETVAL(x) x
79__restore_fp:
80 VISExitHalf
81 retl
82 nop
83ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
84 add %g1, 1, %g1
85 add %g2, %g1, %g2
86 ba,pt %xcc, __restore_fp
87 add %o2, %g2, %o0
88ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
89ENTRY(U3_retl_o2_plus_g2_fp)
90 ba,pt %xcc, __restore_fp
91 add %o2, %g2, %o0
92ENDPROC(U3_retl_o2_plus_g2_fp)
93ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
94 add %g2, 8, %g2
95 ba,pt %xcc, __restore_fp
96 add %o2, %g2, %o0
97ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
98ENTRY(U3_retl_o2)
99 retl
100 mov %o2, %o0
101ENDPROC(U3_retl_o2)
102ENTRY(U3_retl_o2_plus_1)
103 retl
104 add %o2, 1, %o0
105ENDPROC(U3_retl_o2_plus_1)
106ENTRY(U3_retl_o2_plus_4)
107 retl
108 add %o2, 4, %o0
109ENDPROC(U3_retl_o2_plus_4)
110ENTRY(U3_retl_o2_plus_8)
111 retl
112 add %o2, 8, %o0
113ENDPROC(U3_retl_o2_plus_8)
114ENTRY(U3_retl_o2_plus_g1_plus_1)
115 add %g1, 1, %g1
116 retl
117 add %o2, %g1, %o0
118ENDPROC(U3_retl_o2_plus_g1_plus_1)
119ENTRY(U3_retl_o2_fp)
120 ba,pt %xcc, __restore_fp
121 mov %o2, %o0
122ENDPROC(U3_retl_o2_fp)
123ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
124 sll %o3, 6, %o3
125 add %o3, 0x80, %o3
126 ba,pt %xcc, __restore_fp
127 add %o2, %o3, %o0
128ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
129ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
130 sll %o3, 6, %o3
131 add %o3, 0x40, %o3
132 ba,pt %xcc, __restore_fp
133 add %o2, %o3, %o0
134ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
135ENTRY(U3_retl_o2_plus_GS_plus_0x10)
136 add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
137 retl
138 add %o2, GLOBAL_SPARE, %o0
139ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
140ENTRY(U3_retl_o2_plus_GS_plus_0x08)
141 add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
142 retl
143 add %o2, GLOBAL_SPARE, %o0
144ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
145ENTRY(U3_retl_o2_and_7_plus_GS)
146 and %o2, 7, %o2
147 retl
148 add %o2, GLOBAL_SPARE, %o2
149ENDPROC(U3_retl_o2_and_7_plus_GS)
150ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
151 add GLOBAL_SPARE, 8, GLOBAL_SPARE
152 and %o2, 7, %o2
153 retl
154 add %o2, GLOBAL_SPARE, %o2
155ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
156#endif
157
80 .align 64 158 .align 64
81 159
82 /* The cheetah's flexible spine, oversized liver, enlarged heart, 160 /* The cheetah's flexible spine, oversized liver, enlarged heart,
@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
126 and %g2, 0x38, %g2 204 and %g2, 0x38, %g2
127 205
1281: subcc %g1, 0x1, %g1 2061: subcc %g1, 0x1, %g1
129 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) 207 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
130 EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) 208 EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
131 bgu,pt %XCC, 1b 209 bgu,pt %XCC, 1b
132 add %o1, 0x1, %o1 210 add %o1, 0x1, %o1
133 211
@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
138 be,pt %icc, 3f 216 be,pt %icc, 3f
139 alignaddr %o1, %g0, %o1 217 alignaddr %o1, %g0, %o1
140 218
141 EX_LD_FP(LOAD(ldd, %o1, %f4)) 219 EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
1421: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) 2201: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
143 add %o1, 0x8, %o1 221 add %o1, 0x8, %o1
144 subcc %g2, 0x8, %g2 222 subcc %g2, 0x8, %g2
145 faligndata %f4, %f6, %f0 223 faligndata %f4, %f6, %f0
146 EX_ST_FP(STORE(std, %f0, %o0)) 224 EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
147 be,pn %icc, 3f 225 be,pn %icc, 3f
148 add %o0, 0x8, %o0 226 add %o0, 0x8, %o0
149 227
150 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) 228 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
151 add %o1, 0x8, %o1 229 add %o1, 0x8, %o1
152 subcc %g2, 0x8, %g2 230 subcc %g2, 0x8, %g2
153 faligndata %f6, %f4, %f2 231 faligndata %f6, %f4, %f2
154 EX_ST_FP(STORE(std, %f2, %o0)) 232 EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
155 bne,pt %icc, 1b 233 bne,pt %icc, 1b
156 add %o0, 0x8, %o0 234 add %o0, 0x8, %o0
157 235
@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
161 LOAD(prefetch, %o1 + 0x080, #one_read) 239 LOAD(prefetch, %o1 + 0x080, #one_read)
162 LOAD(prefetch, %o1 + 0x0c0, #one_read) 240 LOAD(prefetch, %o1 + 0x0c0, #one_read)
163 LOAD(prefetch, %o1 + 0x100, #one_read) 241 LOAD(prefetch, %o1 + 0x100, #one_read)
164 EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) 242 EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
165 LOAD(prefetch, %o1 + 0x140, #one_read) 243 LOAD(prefetch, %o1 + 0x140, #one_read)
166 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) 244 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
167 LOAD(prefetch, %o1 + 0x180, #one_read) 245 LOAD(prefetch, %o1 + 0x180, #one_read)
168 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) 246 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
169 LOAD(prefetch, %o1 + 0x1c0, #one_read) 247 LOAD(prefetch, %o1 + 0x1c0, #one_read)
170 faligndata %f0, %f2, %f16 248 faligndata %f0, %f2, %f16
171 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) 249 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
172 faligndata %f2, %f4, %f18 250 faligndata %f2, %f4, %f18
173 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) 251 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
174 faligndata %f4, %f6, %f20 252 faligndata %f4, %f6, %f20
175 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) 253 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
176 faligndata %f6, %f8, %f22 254 faligndata %f6, %f8, %f22
177 255
178 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) 256 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
179 faligndata %f8, %f10, %f24 257 faligndata %f8, %f10, %f24
180 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) 258 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
181 faligndata %f10, %f12, %f26 259 faligndata %f10, %f12, %f26
182 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) 260 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
183 261
184 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE 262 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
185 add %o1, 0x40, %o1 263 add %o1, 0x40, %o1
@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
190 268
191 .align 64 269 .align 64
1921: 2701:
193 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) 271 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
194 faligndata %f12, %f14, %f28 272 faligndata %f12, %f14, %f28
195 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) 273 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
196 faligndata %f14, %f0, %f30 274 faligndata %f14, %f0, %f30
197 EX_ST_FP(STORE_BLK(%f16, %o0)) 275 EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
198 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) 276 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
199 faligndata %f0, %f2, %f16 277 faligndata %f0, %f2, %f16
200 add %o0, 0x40, %o0 278 add %o0, 0x40, %o0
201 279
202 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) 280 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
203 faligndata %f2, %f4, %f18 281 faligndata %f2, %f4, %f18
204 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) 282 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
205 faligndata %f4, %f6, %f20 283 faligndata %f4, %f6, %f20
206 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) 284 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
207 subcc %o3, 0x01, %o3 285 subcc %o3, 0x01, %o3
208 faligndata %f6, %f8, %f22 286 faligndata %f6, %f8, %f22
209 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) 287 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
210 288
211 faligndata %f8, %f10, %f24 289 faligndata %f8, %f10, %f24
212 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) 290 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
213 LOAD(prefetch, %o1 + 0x1c0, #one_read) 291 LOAD(prefetch, %o1 + 0x1c0, #one_read)
214 faligndata %f10, %f12, %f26 292 faligndata %f10, %f12, %f26
215 bg,pt %XCC, 1b 293 bg,pt %XCC, 1b
@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
217 295
218 /* Finally we copy the last full 64-byte block. */ 296 /* Finally we copy the last full 64-byte block. */
2192: 2972:
220 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) 298 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
221 faligndata %f12, %f14, %f28 299 faligndata %f12, %f14, %f28
222 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) 300 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
223 faligndata %f14, %f0, %f30 301 faligndata %f14, %f0, %f30
224 EX_ST_FP(STORE_BLK(%f16, %o0)) 302 EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
225 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) 303 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
226 faligndata %f0, %f2, %f16 304 faligndata %f0, %f2, %f16
227 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) 305 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
228 faligndata %f2, %f4, %f18 306 faligndata %f2, %f4, %f18
229 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) 307 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
230 faligndata %f4, %f6, %f20 308 faligndata %f4, %f6, %f20
231 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) 309 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
232 faligndata %f6, %f8, %f22 310 faligndata %f6, %f8, %f22
233 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) 311 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
234 faligndata %f8, %f10, %f24 312 faligndata %f8, %f10, %f24
235 cmp %g1, 0 313 cmp %g1, 0
236 be,pt %XCC, 1f 314 be,pt %XCC, 1f
237 add %o0, 0x40, %o0 315 add %o0, 0x40, %o0
238 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) 316 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
2391: faligndata %f10, %f12, %f26 3171: faligndata %f10, %f12, %f26
240 faligndata %f12, %f14, %f28 318 faligndata %f12, %f14, %f28
241 faligndata %f14, %f0, %f30 319 faligndata %f14, %f0, %f30
242 EX_ST_FP(STORE_BLK(%f16, %o0)) 320 EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
243 add %o0, 0x40, %o0 321 add %o0, 0x40, %o0
244 add %o1, 0x40, %o1 322 add %o1, 0x40, %o1
245 membar #Sync 323 membar #Sync
@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
259 337
260 sub %o2, %g2, %o2 338 sub %o2, %g2, %o2
261 be,a,pt %XCC, 1f 339 be,a,pt %XCC, 1f
262 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) 340 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
263 341
2641: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) 3421: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
265 add %o1, 0x8, %o1 343 add %o1, 0x8, %o1
266 subcc %g2, 0x8, %g2 344 subcc %g2, 0x8, %g2
267 faligndata %f0, %f2, %f8 345 faligndata %f0, %f2, %f8
268 EX_ST_FP(STORE(std, %f8, %o0)) 346 EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
269 be,pn %XCC, 2f 347 be,pn %XCC, 2f
270 add %o0, 0x8, %o0 348 add %o0, 0x8, %o0
271 EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) 349 EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
272 add %o1, 0x8, %o1 350 add %o1, 0x8, %o1
273 subcc %g2, 0x8, %g2 351 subcc %g2, 0x8, %g2
274 faligndata %f2, %f0, %f8 352 faligndata %f2, %f0, %f8
275 EX_ST_FP(STORE(std, %f8, %o0)) 353 EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
276 bne,pn %XCC, 1b 354 bne,pn %XCC, 1b
277 add %o0, 0x8, %o0 355 add %o0, 0x8, %o0
278 356
@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
292 andcc %o2, 0x8, %g0 370 andcc %o2, 0x8, %g0
293 be,pt %icc, 1f 371 be,pt %icc, 1f
294 nop 372 nop
295 EX_LD(LOAD(ldx, %o1, %o5)) 373 EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
296 EX_ST(STORE(stx, %o5, %o1 + %o3)) 374 EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
297 add %o1, 0x8, %o1 375 add %o1, 0x8, %o1
376 sub %o2, 8, %o2
298 377
2991: andcc %o2, 0x4, %g0 3781: andcc %o2, 0x4, %g0
300 be,pt %icc, 1f 379 be,pt %icc, 1f
301 nop 380 nop
302 EX_LD(LOAD(lduw, %o1, %o5)) 381 EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
303 EX_ST(STORE(stw, %o5, %o1 + %o3)) 382 EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
304 add %o1, 0x4, %o1 383 add %o1, 0x4, %o1
384 sub %o2, 4, %o2
305 385
3061: andcc %o2, 0x2, %g0 3861: andcc %o2, 0x2, %g0
307 be,pt %icc, 1f 387 be,pt %icc, 1f
308 nop 388 nop
309 EX_LD(LOAD(lduh, %o1, %o5)) 389 EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
310 EX_ST(STORE(sth, %o5, %o1 + %o3)) 390 EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
311 add %o1, 0x2, %o1 391 add %o1, 0x2, %o1
392 sub %o2, 2, %o2
312 393
3131: andcc %o2, 0x1, %g0 3941: andcc %o2, 0x1, %g0
314 be,pt %icc, 85f 395 be,pt %icc, 85f
315 nop 396 nop
316 EX_LD(LOAD(ldub, %o1, %o5)) 397 EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
317 ba,pt %xcc, 85f 398 ba,pt %xcc, 85f
318 EX_ST(STORE(stb, %o5, %o1 + %o3)) 399 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
319 400
320 .align 64 401 .align 64
32170: /* 16 < len <= 64 */ 40270: /* 16 < len <= 64 */
@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
326 andn %o2, 0xf, GLOBAL_SPARE 407 andn %o2, 0xf, GLOBAL_SPARE
327 and %o2, 0xf, %o2 408 and %o2, 0xf, %o2
3281: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE 4091: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
329 EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 410 EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
330 EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) 411 EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
331 EX_ST(STORE(stx, %o5, %o1 + %o3)) 412 EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
332 add %o1, 0x8, %o1 413 add %o1, 0x8, %o1
333 EX_ST(STORE(stx, %g1, %o1 + %o3)) 414 EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
334 bgu,pt %XCC, 1b 415 bgu,pt %XCC, 1b
335 add %o1, 0x8, %o1 416 add %o1, 0x8, %o1
33673: andcc %o2, 0x8, %g0 41773: andcc %o2, 0x8, %g0
337 be,pt %XCC, 1f 418 be,pt %XCC, 1f
338 nop 419 nop
339 sub %o2, 0x8, %o2 420 sub %o2, 0x8, %o2
340 EX_LD(LOAD(ldx, %o1, %o5)) 421 EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
341 EX_ST(STORE(stx, %o5, %o1 + %o3)) 422 EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
342 add %o1, 0x8, %o1 423 add %o1, 0x8, %o1
3431: andcc %o2, 0x4, %g0 4241: andcc %o2, 0x4, %g0
344 be,pt %XCC, 1f 425 be,pt %XCC, 1f
345 nop 426 nop
346 sub %o2, 0x4, %o2 427 sub %o2, 0x4, %o2
347 EX_LD(LOAD(lduw, %o1, %o5)) 428 EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
348 EX_ST(STORE(stw, %o5, %o1 + %o3)) 429 EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
349 add %o1, 0x4, %o1 430 add %o1, 0x4, %o1
3501: cmp %o2, 0 4311: cmp %o2, 0
351 be,pt %XCC, 85f 432 be,pt %XCC, 85f
@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
361 sub %o2, %g1, %o2 442 sub %o2, %g1, %o2
362 443
3631: subcc %g1, 1, %g1 4441: subcc %g1, 1, %g1
364 EX_LD(LOAD(ldub, %o1, %o5)) 445 EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
365 EX_ST(STORE(stb, %o5, %o1 + %o3)) 446 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
366 bgu,pt %icc, 1b 447 bgu,pt %icc, 1b
367 add %o1, 1, %o1 448 add %o1, 1, %o1
368 449
@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
378 459
3798: mov 64, %o3 4608: mov 64, %o3
380 andn %o1, 0x7, %o1 461 andn %o1, 0x7, %o1
381 EX_LD(LOAD(ldx, %o1, %g2)) 462 EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
382 sub %o3, %g1, %o3 463 sub %o3, %g1, %o3
383 andn %o2, 0x7, GLOBAL_SPARE 464 andn %o2, 0x7, GLOBAL_SPARE
384 sllx %g2, %g1, %g2 465 sllx %g2, %g1, %g2
3851: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 4661: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
386 subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE 467 subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
387 add %o1, 0x8, %o1 468 add %o1, 0x8, %o1
388 srlx %g3, %o3, %o5 469 srlx %g3, %o3, %o5
389 or %o5, %g2, %o5 470 or %o5, %g2, %o5
390 EX_ST(STORE(stx, %o5, %o0)) 471 EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
391 add %o0, 0x8, %o0 472 add %o0, 0x8, %o0
392 bgu,pt %icc, 1b 473 bgu,pt %icc, 1b
393 sllx %g3, %g1, %g2 474 sllx %g3, %g1, %g2
@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
407 488
4081: 4891:
409 subcc %o2, 4, %o2 490 subcc %o2, 4, %o2
410 EX_LD(LOAD(lduw, %o1, %g1)) 491 EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
411 EX_ST(STORE(stw, %g1, %o1 + %o3)) 492 EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
412 bgu,pt %XCC, 1b 493 bgu,pt %XCC, 1b
413 add %o1, 4, %o1 494 add %o1, 4, %o1
414 495
@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
418 .align 32 499 .align 32
41990: 50090:
420 subcc %o2, 1, %o2 501 subcc %o2, 1, %o2
421 EX_LD(LOAD(ldub, %o1, %g1)) 502 EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
422 EX_ST(STORE(stb, %g1, %o1 + %o3)) 503 EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
423 bgu,pt %XCC, 90b 504 bgu,pt %XCC, 90b
424 add %o1, 1, %o1 505 add %o1, 1, %o1
425 retl 506 retl
diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
index 482de093bdae..0252b218de45 100644
--- a/arch/sparc/lib/copy_in_user.S
+++ b/arch/sparc/lib/copy_in_user.S
@@ -9,18 +9,33 @@
9 9
10#define XCC xcc 10#define XCC xcc
11 11
12#define EX(x,y) \ 12#define EX(x,y,z) \
1398: x,y; \ 1398: x,y; \
14 .section __ex_table,"a";\ 14 .section __ex_table,"a";\
15 .align 4; \ 15 .align 4; \
16 .word 98b, __retl_one; \ 16 .word 98b, z; \
17 .text; \ 17 .text; \
18 .align 4; 18 .align 4;
19 19
20#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
21#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
22#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
23
20 .register %g2,#scratch 24 .register %g2,#scratch
21 .register %g3,#scratch 25 .register %g3,#scratch
22 26
23 .text 27 .text
28__retl_o4_plus_8:
29 add %o4, %o2, %o4
30 retl
31 add %o4, 8, %o0
32__retl_o2_plus_4:
33 retl
34 add %o2, 4, %o0
35__retl_o2_plus_1:
36 retl
37 add %o2, 1, %o0
38
24 .align 32 39 .align 32
25 40
26 /* Don't try to get too fancy here, just nice and 41 /* Don't try to get too fancy here, just nice and
@@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
45 andn %o2, 0x7, %o4 60 andn %o2, 0x7, %o4
46 and %o2, 0x7, %o2 61 and %o2, 0x7, %o2
471: subcc %o4, 0x8, %o4 621: subcc %o4, 0x8, %o4
48 EX(ldxa [%o1] %asi, %o5) 63 EX_O4(ldxa [%o1] %asi, %o5)
49 EX(stxa %o5, [%o0] %asi) 64 EX_O4(stxa %o5, [%o0] %asi)
50 add %o1, 0x8, %o1 65 add %o1, 0x8, %o1
51 bgu,pt %XCC, 1b 66 bgu,pt %XCC, 1b
52 add %o0, 0x8, %o0 67 add %o0, 0x8, %o0
@@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
54 be,pt %XCC, 1f 69 be,pt %XCC, 1f
55 nop 70 nop
56 sub %o2, 0x4, %o2 71 sub %o2, 0x4, %o2
57 EX(lduwa [%o1] %asi, %o5) 72 EX_O2_4(lduwa [%o1] %asi, %o5)
58 EX(stwa %o5, [%o0] %asi) 73 EX_O2_4(stwa %o5, [%o0] %asi)
59 add %o1, 0x4, %o1 74 add %o1, 0x4, %o1
60 add %o0, 0x4, %o0 75 add %o0, 0x4, %o0
611: cmp %o2, 0 761: cmp %o2, 0
@@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
71 86
7282: 8782:
73 subcc %o2, 4, %o2 88 subcc %o2, 4, %o2
74 EX(lduwa [%o1] %asi, %g1) 89 EX_O2_4(lduwa [%o1] %asi, %g1)
75 EX(stwa %g1, [%o0] %asi) 90 EX_O2_4(stwa %g1, [%o0] %asi)
76 add %o1, 4, %o1 91 add %o1, 4, %o1
77 bgu,pt %XCC, 82b 92 bgu,pt %XCC, 82b
78 add %o0, 4, %o0 93 add %o0, 4, %o0
@@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
83 .align 32 98 .align 32
8490: 9990:
85 subcc %o2, 1, %o2 100 subcc %o2, 1, %o2
86 EX(lduba [%o1] %asi, %g1) 101 EX_O2_1(lduba [%o1] %asi, %g1)
87 EX(stba %g1, [%o0] %asi) 102 EX_O2_1(stba %g1, [%o0] %asi)
88 add %o1, 1, %o1 103 add %o1, 1, %o1
89 bgu,pt %XCC, 90b 104 bgu,pt %XCC, 90b
90 add %o0, 1, %o0 105 add %o0, 1, %o0
diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
deleted file mode 100644
index ac96ae236709..000000000000
--- a/arch/sparc/lib/user_fixup.c
+++ /dev/null
@@ -1,71 +0,0 @@
1/* user_fixup.c: Fix up user copy faults.
2 *
3 * Copyright (C) 2004 David S. Miller <davem@redhat.com>
4 */
5
6#include <linux/compiler.h>
7#include <linux/kernel.h>
8#include <linux/string.h>
9#include <linux/errno.h>
10#include <linux/module.h>
11
12#include <asm/uaccess.h>
13
14/* Calculating the exact fault address when using
15 * block loads and stores can be very complicated.
16 *
17 * Instead of trying to be clever and handling all
18 * of the cases, just fix things up simply here.
19 */
20
21static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
22{
23 unsigned long fault_addr = current_thread_info()->fault_address;
24 unsigned long end = start + size;
25
26 if (fault_addr < start || fault_addr >= end) {
27 *offset = 0;
28 } else {
29 *offset = fault_addr - start;
30 size = end - fault_addr;
31 }
32 return size;
33}
34
35unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
36{
37 unsigned long offset;
38
39 size = compute_size((unsigned long) from, size, &offset);
40 if (likely(size))
41 memset(to + offset, 0, size);
42
43 return size;
44}
45EXPORT_SYMBOL(copy_from_user_fixup);
46
47unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
48{
49 unsigned long offset;
50
51 return compute_size((unsigned long) to, size, &offset);
52}
53EXPORT_SYMBOL(copy_to_user_fixup);
54
55unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
56{
57 unsigned long fault_addr = current_thread_info()->fault_address;
58 unsigned long start = (unsigned long) to;
59 unsigned long end = start + size;
60
61 if (fault_addr >= start && fault_addr < end)
62 return end - fault_addr;
63
64 start = (unsigned long) from;
65 end = start + size;
66 if (fault_addr >= start && fault_addr < end)
67 return end - fault_addr;
68
69 return size;
70}
71EXPORT_SYMBOL(copy_in_user_fixup);
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index f2b77112e9d8..e20fbbafb0b0 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
27 return (tag == (vaddr >> 22)); 27 return (tag == (vaddr >> 22));
28} 28}
29 29
30static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
31{
32 unsigned long idx;
33
34 for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
35 struct tsb *ent = &swapper_tsb[idx];
36 unsigned long match = idx << 13;
37
38 match |= (ent->tag << 22);
39 if (match >= start && match < end)
40 ent->tag = (1UL << TSB_TAG_INVALID_BIT);
41 }
42}
43
30/* TSB flushes need only occur on the processor initiating the address 44/* TSB flushes need only occur on the processor initiating the address
31 * space modification, not on each cpu the address space has run on. 45 * space modification, not on each cpu the address space has run on.
32 * Only the TLB flush needs that treatment. 46 * Only the TLB flush needs that treatment.
@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
36{ 50{
37 unsigned long v; 51 unsigned long v;
38 52
53 if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
54 return flush_tsb_kernel_range_scan(start, end);
55
39 for (v = start; v < end; v += PAGE_SIZE) { 56 for (v = start; v < end; v += PAGE_SIZE) {
40 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 57 unsigned long hash = tsb_hash(v, PAGE_SHIFT,
41 KERNEL_TSB_NENTRIES); 58 KERNEL_TSB_NENTRIES);
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index b4f4733abc6e..5d2fd6cd3189 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -30,7 +30,7 @@
30 .text 30 .text
31 .align 32 31 .align 32
32 .globl __flush_tlb_mm 32 .globl __flush_tlb_mm
33__flush_tlb_mm: /* 18 insns */ 33__flush_tlb_mm: /* 19 insns */
34 /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ 34 /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
35 ldxa [%o1] ASI_DMMU, %g2 35 ldxa [%o1] ASI_DMMU, %g2
36 cmp %g2, %o0 36 cmp %g2, %o0
@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
81 81
82 .align 32 82 .align 32
83 .globl __flush_tlb_pending 83 .globl __flush_tlb_pending
84__flush_tlb_pending: /* 26 insns */ 84__flush_tlb_pending: /* 27 insns */
85 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ 85 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
86 rdpr %pstate, %g7 86 rdpr %pstate, %g7
87 sllx %o1, 3, %o1 87 sllx %o1, 3, %o1
@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
113 113
114 .align 32 114 .align 32
115 .globl __flush_tlb_kernel_range 115 .globl __flush_tlb_kernel_range
116__flush_tlb_kernel_range: /* 16 insns */ 116__flush_tlb_kernel_range: /* 31 insns */
117 /* %o0=start, %o1=end */ 117 /* %o0=start, %o1=end */
118 cmp %o0, %o1 118 cmp %o0, %o1
119 be,pn %xcc, 2f 119 be,pn %xcc, 2f
120 sub %o1, %o0, %o3
121 srlx %o3, 18, %o4
122 brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
120 sethi %hi(PAGE_SIZE), %o4 123 sethi %hi(PAGE_SIZE), %o4
121 sub %o1, %o0, %o3
122 sub %o3, %o4, %o3 124 sub %o3, %o4, %o3
123 or %o0, 0x20, %o0 ! Nucleus 125 or %o0, 0x20, %o0 ! Nucleus
1241: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP 1261: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
131 retl 133 retl
132 nop 134 nop
133 nop 135 nop
136 nop
137 nop
138 nop
139 nop
140 nop
141 nop
142 nop
143 nop
144 nop
145 nop
146 nop
147 nop
148 nop
149
150__spitfire_flush_tlb_kernel_range_slow:
151 mov 63 * 8, %o4
1521: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
153 andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
154 bne,pn %xcc, 2f
155 mov TLB_TAG_ACCESS, %o3
156 stxa %g0, [%o3] ASI_IMMU
157 stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
158 membar #Sync
1592: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
160 andcc %o3, 0x40, %g0
161 bne,pn %xcc, 2f
162 mov TLB_TAG_ACCESS, %o3
163 stxa %g0, [%o3] ASI_DMMU
164 stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
165 membar #Sync
1662: sub %o4, 8, %o4
167 brgez,pt %o4, 1b
168 nop
169 retl
170 nop
134 171
135__spitfire_flush_tlb_mm_slow: 172__spitfire_flush_tlb_mm_slow:
136 rdpr %pstate, %g1 173 rdpr %pstate, %g1
@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
285 retl 322 retl
286 wrpr %g7, 0x0, %pstate 323 wrpr %g7, 0x0, %pstate
287 324
325__cheetah_flush_tlb_kernel_range: /* 31 insns */
326 /* %o0=start, %o1=end */
327 cmp %o0, %o1
328 be,pn %xcc, 2f
329 sub %o1, %o0, %o3
330 srlx %o3, 18, %o4
331 brnz,pn %o4, 3f
332 sethi %hi(PAGE_SIZE), %o4
333 sub %o3, %o4, %o3
334 or %o0, 0x20, %o0 ! Nucleus
3351: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
336 stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
337 membar #Sync
338 brnz,pt %o3, 1b
339 sub %o3, %o4, %o3
3402: sethi %hi(KERNBASE), %o3
341 flush %o3
342 retl
343 nop
3443: mov 0x80, %o4
345 stxa %g0, [%o4] ASI_DMMU_DEMAP
346 membar #Sync
347 stxa %g0, [%o4] ASI_IMMU_DEMAP
348 membar #Sync
349 retl
350 nop
351 nop
352 nop
353 nop
354 nop
355 nop
356 nop
357 nop
358
288#ifdef DCACHE_ALIASING_POSSIBLE 359#ifdef DCACHE_ALIASING_POSSIBLE
289__cheetah_flush_dcache_page: /* 11 insns */ 360__cheetah_flush_dcache_page: /* 11 insns */
290 sethi %hi(PAGE_OFFSET), %g1 361 sethi %hi(PAGE_OFFSET), %g1
@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
309 ret 380 ret
310 restore 381 restore
311 382
312__hypervisor_flush_tlb_mm: /* 10 insns */ 383__hypervisor_flush_tlb_mm: /* 19 insns */
313 mov %o0, %o2 /* ARG2: mmu context */ 384 mov %o0, %o2 /* ARG2: mmu context */
314 mov 0, %o0 /* ARG0: CPU lists unimplemented */ 385 mov 0, %o0 /* ARG0: CPU lists unimplemented */
315 mov 0, %o1 /* ARG1: CPU lists unimplemented */ 386 mov 0, %o1 /* ARG1: CPU lists unimplemented */
316 mov HV_MMU_ALL, %o3 /* ARG3: flags */ 387 mov HV_MMU_ALL, %o3 /* ARG3: flags */
317 mov HV_FAST_MMU_DEMAP_CTX, %o5 388 mov HV_FAST_MMU_DEMAP_CTX, %o5
318 ta HV_FAST_TRAP 389 ta HV_FAST_TRAP
319 brnz,pn %o0, __hypervisor_tlb_tl0_error 390 brnz,pn %o0, 1f
320 mov HV_FAST_MMU_DEMAP_CTX, %o1 391 mov HV_FAST_MMU_DEMAP_CTX, %o1
321 retl 392 retl
322 nop 393 nop
3941: sethi %hi(__hypervisor_tlb_tl0_error), %o5
395 jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
396 nop
397 nop
398 nop
399 nop
400 nop
401 nop
402 nop
323 403
324__hypervisor_flush_tlb_page: /* 11 insns */ 404__hypervisor_flush_tlb_page: /* 22 insns */
325 /* %o0 = context, %o1 = vaddr */ 405 /* %o0 = context, %o1 = vaddr */
326 mov %o0, %g2 406 mov %o0, %g2
327 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ 407 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
330 srlx %o0, PAGE_SHIFT, %o0 410 srlx %o0, PAGE_SHIFT, %o0
331 sllx %o0, PAGE_SHIFT, %o0 411 sllx %o0, PAGE_SHIFT, %o0
332 ta HV_MMU_UNMAP_ADDR_TRAP 412 ta HV_MMU_UNMAP_ADDR_TRAP
333 brnz,pn %o0, __hypervisor_tlb_tl0_error 413 brnz,pn %o0, 1f
334 mov HV_MMU_UNMAP_ADDR_TRAP, %o1 414 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
335 retl 415 retl
336 nop 416 nop
4171: sethi %hi(__hypervisor_tlb_tl0_error), %o2
418 jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
419 nop
420 nop
421 nop
422 nop
423 nop
424 nop
425 nop
426 nop
427 nop
337 428
338__hypervisor_flush_tlb_pending: /* 16 insns */ 429__hypervisor_flush_tlb_pending: /* 27 insns */
339 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ 430 /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
340 sllx %o1, 3, %g1 431 sllx %o1, 3, %g1
341 mov %o2, %g2 432 mov %o2, %g2
@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
347 srlx %o0, PAGE_SHIFT, %o0 438 srlx %o0, PAGE_SHIFT, %o0
348 sllx %o0, PAGE_SHIFT, %o0 439 sllx %o0, PAGE_SHIFT, %o0
349 ta HV_MMU_UNMAP_ADDR_TRAP 440 ta HV_MMU_UNMAP_ADDR_TRAP
350 brnz,pn %o0, __hypervisor_tlb_tl0_error 441 brnz,pn %o0, 1f
351 mov HV_MMU_UNMAP_ADDR_TRAP, %o1 442 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
352 brnz,pt %g1, 1b 443 brnz,pt %g1, 1b
353 nop 444 nop
354 retl 445 retl
355 nop 446 nop
4471: sethi %hi(__hypervisor_tlb_tl0_error), %o2
448 jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
449 nop
450 nop
451 nop
452 nop
453 nop
454 nop
455 nop
456 nop
457 nop
356 458
357__hypervisor_flush_tlb_kernel_range: /* 16 insns */ 459__hypervisor_flush_tlb_kernel_range: /* 31 insns */
358 /* %o0=start, %o1=end */ 460 /* %o0=start, %o1=end */
359 cmp %o0, %o1 461 cmp %o0, %o1
360 be,pn %xcc, 2f 462 be,pn %xcc, 2f
361 sethi %hi(PAGE_SIZE), %g3 463 sub %o1, %o0, %g2
362 mov %o0, %g1 464 srlx %g2, 18, %g3
363 sub %o1, %g1, %g2 465 brnz,pn %g3, 4f
466 mov %o0, %g1
467 sethi %hi(PAGE_SIZE), %g3
364 sub %g2, %g3, %g2 468 sub %g2, %g3, %g2
3651: add %g1, %g2, %o0 /* ARG0: virtual address */ 4691: add %g1, %g2, %o0 /* ARG0: virtual address */
366 mov 0, %o1 /* ARG1: mmu context */ 470 mov 0, %o1 /* ARG1: mmu context */
367 mov HV_MMU_ALL, %o2 /* ARG2: flags */ 471 mov HV_MMU_ALL, %o2 /* ARG2: flags */
368 ta HV_MMU_UNMAP_ADDR_TRAP 472 ta HV_MMU_UNMAP_ADDR_TRAP
369 brnz,pn %o0, __hypervisor_tlb_tl0_error 473 brnz,pn %o0, 3f
370 mov HV_MMU_UNMAP_ADDR_TRAP, %o1 474 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
371 brnz,pt %g2, 1b 475 brnz,pt %g2, 1b
372 sub %g2, %g3, %g2 476 sub %g2, %g3, %g2
3732: retl 4772: retl
374 nop 478 nop
4793: sethi %hi(__hypervisor_tlb_tl0_error), %o2
480 jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
481 nop
4824: mov 0, %o0 /* ARG0: CPU lists unimplemented */
483 mov 0, %o1 /* ARG1: CPU lists unimplemented */
484 mov 0, %o2 /* ARG2: mmu context == nucleus */
485 mov HV_MMU_ALL, %o3 /* ARG3: flags */
486 mov HV_FAST_MMU_DEMAP_CTX, %o5
487 ta HV_FAST_TRAP
488 brnz,pn %o0, 3b
489 mov HV_FAST_MMU_DEMAP_CTX, %o1
490 retl
491 nop
375 492
376#ifdef DCACHE_ALIASING_POSSIBLE 493#ifdef DCACHE_ALIASING_POSSIBLE
377 /* XXX Niagara and friends have an 8K cache, so no aliasing is 494 /* XXX Niagara and friends have an 8K cache, so no aliasing is
@@ -394,43 +511,6 @@ tlb_patch_one:
394 retl 511 retl
395 nop 512 nop
396 513
397 .globl cheetah_patch_cachetlbops
398cheetah_patch_cachetlbops:
399 save %sp, -128, %sp
400
401 sethi %hi(__flush_tlb_mm), %o0
402 or %o0, %lo(__flush_tlb_mm), %o0
403 sethi %hi(__cheetah_flush_tlb_mm), %o1
404 or %o1, %lo(__cheetah_flush_tlb_mm), %o1
405 call tlb_patch_one
406 mov 19, %o2
407
408 sethi %hi(__flush_tlb_page), %o0
409 or %o0, %lo(__flush_tlb_page), %o0
410 sethi %hi(__cheetah_flush_tlb_page), %o1
411 or %o1, %lo(__cheetah_flush_tlb_page), %o1
412 call tlb_patch_one
413 mov 22, %o2
414
415 sethi %hi(__flush_tlb_pending), %o0
416 or %o0, %lo(__flush_tlb_pending), %o0
417 sethi %hi(__cheetah_flush_tlb_pending), %o1
418 or %o1, %lo(__cheetah_flush_tlb_pending), %o1
419 call tlb_patch_one
420 mov 27, %o2
421
422#ifdef DCACHE_ALIASING_POSSIBLE
423 sethi %hi(__flush_dcache_page), %o0
424 or %o0, %lo(__flush_dcache_page), %o0
425 sethi %hi(__cheetah_flush_dcache_page), %o1
426 or %o1, %lo(__cheetah_flush_dcache_page), %o1
427 call tlb_patch_one
428 mov 11, %o2
429#endif /* DCACHE_ALIASING_POSSIBLE */
430
431 ret
432 restore
433
434#ifdef CONFIG_SMP 514#ifdef CONFIG_SMP
435 /* These are all called by the slaves of a cross call, at 515 /* These are all called by the slaves of a cross call, at
436 * trap level 1, with interrupts fully disabled. 516 * trap level 1, with interrupts fully disabled.
@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
447 */ 527 */
448 .align 32 528 .align 32
449 .globl xcall_flush_tlb_mm 529 .globl xcall_flush_tlb_mm
450xcall_flush_tlb_mm: /* 21 insns */ 530xcall_flush_tlb_mm: /* 24 insns */
451 mov PRIMARY_CONTEXT, %g2 531 mov PRIMARY_CONTEXT, %g2
452 ldxa [%g2] ASI_DMMU, %g3 532 ldxa [%g2] ASI_DMMU, %g3
453 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 533 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
469 nop 549 nop
470 nop 550 nop
471 nop 551 nop
552 nop
553 nop
554 nop
472 555
473 .globl xcall_flush_tlb_page 556 .globl xcall_flush_tlb_page
474xcall_flush_tlb_page: /* 17 insns */ 557xcall_flush_tlb_page: /* 20 insns */
475 /* %g5=context, %g1=vaddr */ 558 /* %g5=context, %g1=vaddr */
476 mov PRIMARY_CONTEXT, %g4 559 mov PRIMARY_CONTEXT, %g4
477 ldxa [%g4] ASI_DMMU, %g2 560 ldxa [%g4] ASI_DMMU, %g2
@@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
490 retry 573 retry
491 nop 574 nop
492 nop 575 nop
576 nop
577 nop
578 nop
493 579
494 .globl xcall_flush_tlb_kernel_range 580 .globl xcall_flush_tlb_kernel_range
495xcall_flush_tlb_kernel_range: /* 25 insns */ 581xcall_flush_tlb_kernel_range: /* 44 insns */
496 sethi %hi(PAGE_SIZE - 1), %g2 582 sethi %hi(PAGE_SIZE - 1), %g2
497 or %g2, %lo(PAGE_SIZE - 1), %g2 583 or %g2, %lo(PAGE_SIZE - 1), %g2
498 andn %g1, %g2, %g1 584 andn %g1, %g2, %g1
499 andn %g7, %g2, %g7 585 andn %g7, %g2, %g7
500 sub %g7, %g1, %g3 586 sub %g7, %g1, %g3
501 add %g2, 1, %g2 587 srlx %g3, 18, %g2
588 brnz,pn %g2, 2f
589 add %g2, 1, %g2
502 sub %g3, %g2, %g3 590 sub %g3, %g2, %g3
503 or %g1, 0x20, %g1 ! Nucleus 591 or %g1, 0x20, %g1 ! Nucleus
5041: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP 5921: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
507 brnz,pt %g3, 1b 595 brnz,pt %g3, 1b
508 sub %g3, %g2, %g3 596 sub %g3, %g2, %g3
509 retry 597 retry
510 nop 5982: mov 63 * 8, %g1
511 nop 5991: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
600 andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
601 bne,pn %xcc, 2f
602 mov TLB_TAG_ACCESS, %g2
603 stxa %g0, [%g2] ASI_IMMU
604 stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
605 membar #Sync
6062: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
607 andcc %g2, 0x40, %g0
608 bne,pn %xcc, 2f
609 mov TLB_TAG_ACCESS, %g2
610 stxa %g0, [%g2] ASI_DMMU
611 stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
612 membar #Sync
6132: sub %g1, 8, %g1
614 brgez,pt %g1, 1b
615 nop
616 retry
512 nop 617 nop
513 nop 618 nop
514 nop 619 nop
@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
637 742
638 retry 743 retry
639 744
745__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
746 sethi %hi(PAGE_SIZE - 1), %g2
747 or %g2, %lo(PAGE_SIZE - 1), %g2
748 andn %g1, %g2, %g1
749 andn %g7, %g2, %g7
750 sub %g7, %g1, %g3
751 srlx %g3, 18, %g2
752 brnz,pn %g2, 2f
753 add %g2, 1, %g2
754 sub %g3, %g2, %g3
755 or %g1, 0x20, %g1 ! Nucleus
7561: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
757 stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
758 membar #Sync
759 brnz,pt %g3, 1b
760 sub %g3, %g2, %g3
761 retry
7622: mov 0x80, %g2
763 stxa %g0, [%g2] ASI_DMMU_DEMAP
764 membar #Sync
765 stxa %g0, [%g2] ASI_IMMU_DEMAP
766 membar #Sync
767 retry
768 nop
769 nop
770 nop
771 nop
772 nop
773 nop
774 nop
775 nop
776 nop
777 nop
778 nop
779 nop
780 nop
781 nop
782 nop
783 nop
784 nop
785 nop
786 nop
787 nop
788 nop
789 nop
790
640#ifdef DCACHE_ALIASING_POSSIBLE 791#ifdef DCACHE_ALIASING_POSSIBLE
641 .align 32 792 .align 32
642 .globl xcall_flush_dcache_page_cheetah 793 .globl xcall_flush_dcache_page_cheetah
@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
700 ba,a,pt %xcc, rtrap 851 ba,a,pt %xcc, rtrap
701 852
702 .globl __hypervisor_xcall_flush_tlb_mm 853 .globl __hypervisor_xcall_flush_tlb_mm
703__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ 854__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
704 /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ 855 /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
705 mov %o0, %g2 856 mov %o0, %g2
706 mov %o1, %g3 857 mov %o1, %g3
@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
714 mov HV_FAST_MMU_DEMAP_CTX, %o5 865 mov HV_FAST_MMU_DEMAP_CTX, %o5
715 ta HV_FAST_TRAP 866 ta HV_FAST_TRAP
716 mov HV_FAST_MMU_DEMAP_CTX, %g6 867 mov HV_FAST_MMU_DEMAP_CTX, %g6
717 brnz,pn %o0, __hypervisor_tlb_xcall_error 868 brnz,pn %o0, 1f
718 mov %o0, %g5 869 mov %o0, %g5
719 mov %g2, %o0 870 mov %g2, %o0
720 mov %g3, %o1 871 mov %g3, %o1
@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
723 mov %g7, %o5 874 mov %g7, %o5
724 membar #Sync 875 membar #Sync
725 retry 876 retry
8771: sethi %hi(__hypervisor_tlb_xcall_error), %g4
878 jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
879 nop
726 880
727 .globl __hypervisor_xcall_flush_tlb_page 881 .globl __hypervisor_xcall_flush_tlb_page
728__hypervisor_xcall_flush_tlb_page: /* 17 insns */ 882__hypervisor_xcall_flush_tlb_page: /* 20 insns */
729 /* %g5=ctx, %g1=vaddr */ 883 /* %g5=ctx, %g1=vaddr */
730 mov %o0, %g2 884 mov %o0, %g2
731 mov %o1, %g3 885 mov %o1, %g3
@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
737 sllx %o0, PAGE_SHIFT, %o0 891 sllx %o0, PAGE_SHIFT, %o0
738 ta HV_MMU_UNMAP_ADDR_TRAP 892 ta HV_MMU_UNMAP_ADDR_TRAP
739 mov HV_MMU_UNMAP_ADDR_TRAP, %g6 893 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
740 brnz,a,pn %o0, __hypervisor_tlb_xcall_error 894 brnz,a,pn %o0, 1f
741 mov %o0, %g5 895 mov %o0, %g5
742 mov %g2, %o0 896 mov %g2, %o0
743 mov %g3, %o1 897 mov %g3, %o1
744 mov %g4, %o2 898 mov %g4, %o2
745 membar #Sync 899 membar #Sync
746 retry 900 retry
9011: sethi %hi(__hypervisor_tlb_xcall_error), %g4
902 jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
903 nop
747 904
748 .globl __hypervisor_xcall_flush_tlb_kernel_range 905 .globl __hypervisor_xcall_flush_tlb_kernel_range
749__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ 906__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
750 /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ 907 /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
751 sethi %hi(PAGE_SIZE - 1), %g2 908 sethi %hi(PAGE_SIZE - 1), %g2
752 or %g2, %lo(PAGE_SIZE - 1), %g2 909 or %g2, %lo(PAGE_SIZE - 1), %g2
753 andn %g1, %g2, %g1 910 andn %g1, %g2, %g1
754 andn %g7, %g2, %g7 911 andn %g7, %g2, %g7
755 sub %g7, %g1, %g3 912 sub %g7, %g1, %g3
913 srlx %g3, 18, %g7
756 add %g2, 1, %g2 914 add %g2, 1, %g2
757 sub %g3, %g2, %g3 915 sub %g3, %g2, %g3
758 mov %o0, %g2 916 mov %o0, %g2
759 mov %o1, %g4 917 mov %o1, %g4
760 mov %o2, %g7 918 brnz,pn %g7, 2f
919 mov %o2, %g7
7611: add %g1, %g3, %o0 /* ARG0: virtual address */ 9201: add %g1, %g3, %o0 /* ARG0: virtual address */
762 mov 0, %o1 /* ARG1: mmu context */ 921 mov 0, %o1 /* ARG1: mmu context */
763 mov HV_MMU_ALL, %o2 /* ARG2: flags */ 922 mov HV_MMU_ALL, %o2 /* ARG2: flags */
764 ta HV_MMU_UNMAP_ADDR_TRAP 923 ta HV_MMU_UNMAP_ADDR_TRAP
765 mov HV_MMU_UNMAP_ADDR_TRAP, %g6 924 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
766 brnz,pn %o0, __hypervisor_tlb_xcall_error 925 brnz,pn %o0, 1f
767 mov %o0, %g5 926 mov %o0, %g5
768 sethi %hi(PAGE_SIZE), %o2 927 sethi %hi(PAGE_SIZE), %o2
769 brnz,pt %g3, 1b 928 brnz,pt %g3, 1b
770 sub %g3, %o2, %g3 929 sub %g3, %o2, %g3
771 mov %g2, %o0 9305: mov %g2, %o0
772 mov %g4, %o1 931 mov %g4, %o1
773 mov %g7, %o2 932 mov %g7, %o2
774 membar #Sync 933 membar #Sync
775 retry 934 retry
9351: sethi %hi(__hypervisor_tlb_xcall_error), %g4
936 jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
937 nop
9382: mov %o3, %g1
939 mov %o5, %g3
940 mov 0, %o0 /* ARG0: CPU lists unimplemented */
941 mov 0, %o1 /* ARG1: CPU lists unimplemented */
942 mov 0, %o2 /* ARG2: mmu context == nucleus */
943 mov HV_MMU_ALL, %o3 /* ARG3: flags */
944 mov HV_FAST_MMU_DEMAP_CTX, %o5
945 ta HV_FAST_TRAP
946 mov %g1, %o3
947 brz,pt %o0, 5b
948 mov %g3, %o5
949 mov HV_FAST_MMU_DEMAP_CTX, %g6
950 ba,pt %xcc, 1b
951 clr %g5
776 952
777 /* These just get rescheduled to PIL vectors. */ 953 /* These just get rescheduled to PIL vectors. */
778 .globl xcall_call_function 954 .globl xcall_call_function
@@ -809,6 +985,58 @@ xcall_kgdb_capture:
809 985
810#endif /* CONFIG_SMP */ 986#endif /* CONFIG_SMP */
811 987
988 .globl cheetah_patch_cachetlbops
989cheetah_patch_cachetlbops:
990 save %sp, -128, %sp
991
992 sethi %hi(__flush_tlb_mm), %o0
993 or %o0, %lo(__flush_tlb_mm), %o0
994 sethi %hi(__cheetah_flush_tlb_mm), %o1
995 or %o1, %lo(__cheetah_flush_tlb_mm), %o1
996 call tlb_patch_one
997 mov 19, %o2
998
999 sethi %hi(__flush_tlb_page), %o0
1000 or %o0, %lo(__flush_tlb_page), %o0
1001 sethi %hi(__cheetah_flush_tlb_page), %o1
1002 or %o1, %lo(__cheetah_flush_tlb_page), %o1
1003 call tlb_patch_one
1004 mov 22, %o2
1005
1006 sethi %hi(__flush_tlb_pending), %o0
1007 or %o0, %lo(__flush_tlb_pending), %o0
1008 sethi %hi(__cheetah_flush_tlb_pending), %o1
1009 or %o1, %lo(__cheetah_flush_tlb_pending), %o1
1010 call tlb_patch_one
1011 mov 27, %o2
1012
1013 sethi %hi(__flush_tlb_kernel_range), %o0
1014 or %o0, %lo(__flush_tlb_kernel_range), %o0
1015 sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
1016 or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
1017 call tlb_patch_one
1018 mov 31, %o2
1019
1020#ifdef DCACHE_ALIASING_POSSIBLE
1021 sethi %hi(__flush_dcache_page), %o0
1022 or %o0, %lo(__flush_dcache_page), %o0
1023 sethi %hi(__cheetah_flush_dcache_page), %o1
1024 or %o1, %lo(__cheetah_flush_dcache_page), %o1
1025 call tlb_patch_one
1026 mov 11, %o2
1027#endif /* DCACHE_ALIASING_POSSIBLE */
1028
1029#ifdef CONFIG_SMP
1030 sethi %hi(xcall_flush_tlb_kernel_range), %o0
1031 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
1032 sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
1033 or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
1034 call tlb_patch_one
1035 mov 44, %o2
1036#endif /* CONFIG_SMP */
1037
1038 ret
1039 restore
812 1040
813 .globl hypervisor_patch_cachetlbops 1041 .globl hypervisor_patch_cachetlbops
814hypervisor_patch_cachetlbops: 1042hypervisor_patch_cachetlbops:
@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
819 sethi %hi(__hypervisor_flush_tlb_mm), %o1 1047 sethi %hi(__hypervisor_flush_tlb_mm), %o1
820 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 1048 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
821 call tlb_patch_one 1049 call tlb_patch_one
822 mov 10, %o2 1050 mov 19, %o2
823 1051
824 sethi %hi(__flush_tlb_page), %o0 1052 sethi %hi(__flush_tlb_page), %o0
825 or %o0, %lo(__flush_tlb_page), %o0 1053 or %o0, %lo(__flush_tlb_page), %o0
826 sethi %hi(__hypervisor_flush_tlb_page), %o1 1054 sethi %hi(__hypervisor_flush_tlb_page), %o1
827 or %o1, %lo(__hypervisor_flush_tlb_page), %o1 1055 or %o1, %lo(__hypervisor_flush_tlb_page), %o1
828 call tlb_patch_one 1056 call tlb_patch_one
829 mov 11, %o2 1057 mov 22, %o2
830 1058
831 sethi %hi(__flush_tlb_pending), %o0 1059 sethi %hi(__flush_tlb_pending), %o0
832 or %o0, %lo(__flush_tlb_pending), %o0 1060 or %o0, %lo(__flush_tlb_pending), %o0
833 sethi %hi(__hypervisor_flush_tlb_pending), %o1 1061 sethi %hi(__hypervisor_flush_tlb_pending), %o1
834 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 1062 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
835 call tlb_patch_one 1063 call tlb_patch_one
836 mov 16, %o2 1064 mov 27, %o2
837 1065
838 sethi %hi(__flush_tlb_kernel_range), %o0 1066 sethi %hi(__flush_tlb_kernel_range), %o0
839 or %o0, %lo(__flush_tlb_kernel_range), %o0 1067 or %o0, %lo(__flush_tlb_kernel_range), %o0
840 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 1068 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
841 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 1069 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
842 call tlb_patch_one 1070 call tlb_patch_one
843 mov 16, %o2 1071 mov 31, %o2
844 1072
845#ifdef DCACHE_ALIASING_POSSIBLE 1073#ifdef DCACHE_ALIASING_POSSIBLE
846 sethi %hi(__flush_dcache_page), %o0 1074 sethi %hi(__flush_dcache_page), %o0
@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
857 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 1085 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
858 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 1086 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
859 call tlb_patch_one 1087 call tlb_patch_one
860 mov 21, %o2 1088 mov 24, %o2
861 1089
862 sethi %hi(xcall_flush_tlb_page), %o0 1090 sethi %hi(xcall_flush_tlb_page), %o0
863 or %o0, %lo(xcall_flush_tlb_page), %o0 1091 or %o0, %lo(xcall_flush_tlb_page), %o0
864 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 1092 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
865 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 1093 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
866 call tlb_patch_one 1094 call tlb_patch_one
867 mov 17, %o2 1095 mov 20, %o2
868 1096
869 sethi %hi(xcall_flush_tlb_kernel_range), %o0 1097 sethi %hi(xcall_flush_tlb_kernel_range), %o0
870 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 1098 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
871 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 1099 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
872 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 1100 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
873 call tlb_patch_one 1101 call tlb_patch_one
874 mov 25, %o2 1102 mov 44, %o2
875#endif /* CONFIG_SMP */ 1103#endif /* CONFIG_SMP */
876 1104
877 ret 1105 ret