aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/mm
diff options
context:
space:
mode:
authorThiemo Seufer <ths@networkno.de>2008-02-18 14:32:49 -0500
committerRalf Baechle <ralf@linux-mips.org>2008-04-28 12:14:23 -0400
commitfb2a27e743cd565c25cd896911e494482a8b7251 (patch)
tree791190ead6211e829443185fb1d4b4109fbd9a34 /arch/mips/mm
parent064922a805ec7aadfafdd27aa6b4908d737c3c1d (diff)
[MIPS] Reimplement clear_page/copy_page
Fold the SB-1 specific implementation of clear_page/copy_page in the generic version, and rewrite that one in tlbex style. The immediate benefits: - It converts the compile-time workaround for SB-1 pass 1 prefetches to a more efficient run-time check. - It allows adjustment of loop unfolling, which helps to reduce the number of redundant cdex cache ops. - It fixes some esoteric cornercases (the cache line length calculations can go wrong, and support for 64k pages without prefetch instructions will overflow the addiu immediate). - Somewhat better guesses of "good" prefetch values. Signed-off-by: Thiemo Seufer <ths@networkno.de> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/mm')
-rw-r--r--arch/mips/mm/Makefile37
-rw-r--r--arch/mips/mm/page.c684
-rw-r--r--arch/mips/mm/pg-r4k.c534
-rw-r--r--arch/mips/mm/pg-sb1.c302
-rw-r--r--arch/mips/mm/uasm.c26
-rw-r--r--arch/mips/mm/uasm.h4
6 files changed, 725 insertions, 862 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index c6f832e0f41c..48731020ca0e 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -4,30 +4,29 @@
4 4
5obj-y += cache.o dma-default.o extable.o fault.o \ 5obj-y += cache.o dma-default.o extable.o fault.o \
6 init.o pgtable.o tlbex.o tlbex-fault.o \ 6 init.o pgtable.o tlbex.o tlbex-fault.o \
7 uasm.o 7 uasm.o page.o
8 8
9obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o 9obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o
10obj-$(CONFIG_64BIT) += pgtable-64.o 10obj-$(CONFIG_64BIT) += pgtable-64.o
11obj-$(CONFIG_HIGHMEM) += highmem.o 11obj-$(CONFIG_HIGHMEM) += highmem.o
12 12
13obj-$(CONFIG_CPU_LOONGSON2) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 13obj-$(CONFIG_CPU_LOONGSON2) += c-r4k.o cex-gen.o tlb-r4k.o
14obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 14obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o tlb-r4k.o
15obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 15obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o tlb-r4k.o
16obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 16obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o tlb-r4k.o
17obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 17obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o tlb-r4k.o
18obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o pg-r4k.o 18obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o
19obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 19obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o tlb-r4k.o
20obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 20obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o tlb-r4k.o
21obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 21obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o tlb-r4k.o
22obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 22obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o tlb-r4k.o
23obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o 23obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o tlb-r8k.o
24obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 24obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o tlb-r4k.o
25obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 25obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o tlb-r4k.o
26obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o pg-sb1.o \ 26obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o
27 tlb-r4k.o 27obj-$(CONFIG_CPU_TX39XX) += c-tx39.o tlb-r3k.o
28obj-$(CONFIG_CPU_TX39XX) += c-tx39.o pg-r4k.o tlb-r3k.o 28obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o tlb-r4k.o
29obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o 29obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o tlb-r4k.o
30obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
31 30
32obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o 31obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o
33obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o 32obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
new file mode 100644
index 000000000000..d827d6144369
--- /dev/null
+++ b/arch/mips/mm/page.c
@@ -0,0 +1,684 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
7 * Copyright (C) 2007 Maciej W. Rozycki
8 * Copyright (C) 2008 Thiemo Seufer
9 */
10#include <linux/init.h>
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/mm.h>
14#include <linux/module.h>
15#include <linux/proc_fs.h>
16
17#include <asm/bugs.h>
18#include <asm/cacheops.h>
19#include <asm/inst.h>
20#include <asm/io.h>
21#include <asm/page.h>
22#include <asm/pgtable.h>
23#include <asm/prefetch.h>
24#include <asm/system.h>
25#include <asm/bootinfo.h>
26#include <asm/mipsregs.h>
27#include <asm/mmu_context.h>
28#include <asm/cpu.h>
29#include <asm/war.h>
30
31#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
32#include <asm/sibyte/sb1250.h>
33#include <asm/sibyte/sb1250_regs.h>
34#include <asm/sibyte/sb1250_dma.h>
35#endif
36
37#include "uasm.h"
38
39/* Registers used in the assembled routines. */
40#define ZERO 0
41#define AT 2
42#define A0 4
43#define A1 5
44#define A2 6
45#define T0 8
46#define T1 9
47#define T2 10
48#define T3 11
49#define T9 25
50#define RA 31
51
52/* Handle labels (which must be positive integers). */
53enum label_id {
54 label_clear_nopref = 1,
55 label_clear_pref,
56 label_copy_nopref,
57 label_copy_pref_both,
58 label_copy_pref_store,
59};
60
61UASM_L_LA(_clear_nopref)
62UASM_L_LA(_clear_pref)
63UASM_L_LA(_copy_nopref)
64UASM_L_LA(_copy_pref_both)
65UASM_L_LA(_copy_pref_store)
66
67/* We need one branch and therefore one relocation per target label. */
68static struct uasm_label __cpuinitdata labels[5];
69static struct uasm_reloc __cpuinitdata relocs[5];
70
71#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010)
72#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020)
73
74/*
75 * Maximum sizes:
76 *
77 * R4000 128 bytes S-cache: 0x058 bytes
78 * R4600 v1.7: 0x05c bytes
79 * R4600 v2.0: 0x060 bytes
80 * With prefetching, 16 word strides 0x120 bytes
81 */
82
83static u32 clear_page_array[0x120 / 4];
84
85#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
86void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
87#else
88void clear_page(void *page) __attribute__((alias("clear_page_array")));
89#endif
90
91EXPORT_SYMBOL(clear_page);
92
93/*
94 * Maximum sizes:
95 *
96 * R4000 128 bytes S-cache: 0x11c bytes
97 * R4600 v1.7: 0x080 bytes
98 * R4600 v2.0: 0x07c bytes
99 * With prefetching, 16 word strides 0x540 bytes
100 */
101static u32 copy_page_array[0x540 / 4];
102
103#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
104void
105copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
106#else
107void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
108#endif
109
110EXPORT_SYMBOL(copy_page);
111
112
113static int pref_bias_clear_store __cpuinitdata;
114static int pref_bias_copy_load __cpuinitdata;
115static int pref_bias_copy_store __cpuinitdata;
116
117static u32 pref_src_mode __cpuinitdata;
118static u32 pref_dst_mode __cpuinitdata;
119
120static int clear_word_size __cpuinitdata;
121static int copy_word_size __cpuinitdata;
122
123static int half_clear_loop_size __cpuinitdata;
124static int half_copy_loop_size __cpuinitdata;
125
126static int cache_line_size __cpuinitdata;
127#define cache_line_mask() (cache_line_size - 1)
128
129static inline void __cpuinit
130pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
131{
132 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
133 if (off > 0x7fff) {
134 uasm_i_lui(buf, T9, uasm_rel_hi(off));
135 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
136 } else
137 uasm_i_addiu(buf, T9, ZERO, off);
138 uasm_i_daddu(buf, reg1, reg2, T9);
139 } else {
140 if (off > 0x7fff) {
141 uasm_i_lui(buf, T9, uasm_rel_hi(off));
142 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
143 UASM_i_ADDU(buf, reg1, reg2, T9);
144 } else
145 UASM_i_ADDIU(buf, reg1, reg2, off);
146 }
147}
148
149static void __cpuinit set_prefetch_parameters(void)
150{
151 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
152 clear_word_size = 8;
153 else
154 clear_word_size = 4;
155
156 if (cpu_has_64bit_gp_regs)
157 copy_word_size = 8;
158 else
159 copy_word_size = 4;
160
161 /*
162 * The pref's used here are using "streaming" hints, which cause the
163 * copied data to be kicked out of the cache sooner. A page copy often
164 * ends up copying a lot more data than is commonly used, so this seems
165 * to make sense in terms of reducing cache pollution, but I've no real
166 * performance data to back this up.
167 */
168 if (cpu_has_prefetch) {
169 /*
170 * XXX: Most prefetch bias values in here are based on
171 * guesswork.
172 */
173 cache_line_size = cpu_dcache_line_size();
174 switch (current_cpu_type()) {
175 case CPU_TX49XX:
176 /* TX49 supports only Pref_Load */
177 pref_bias_copy_load = 256;
178 break;
179
180 case CPU_RM9000:
181 /*
182 * As a workaround for erratum G105 which make the
183 * PrepareForStore hint unusable we fall back to
184 * StoreRetained on the RM9000. Once it is known which
185 * versions of the RM9000 we'll be able to condition-
186 * alize this.
187 */
188
189 case CPU_R10000:
190 case CPU_R12000:
191 case CPU_R14000:
192 /*
193 * Those values have been experimentally tuned for an
194 * Origin 200.
195 */
196 pref_bias_clear_store = 512;
197 pref_bias_copy_load = 256;
198 pref_bias_copy_store = 256;
199 pref_src_mode = Pref_LoadStreamed;
200 pref_dst_mode = Pref_StoreStreamed;
201 break;
202
203 case CPU_SB1:
204 case CPU_SB1A:
205 pref_bias_clear_store = 128;
206 pref_bias_copy_load = 128;
207 pref_bias_copy_store = 128;
208 /*
209 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
210 * hints are broken.
211 */
212 if (current_cpu_type() == CPU_SB1 &&
213 (current_cpu_data.processor_id & 0xff) < 0x02) {
214 pref_src_mode = Pref_Load;
215 pref_dst_mode = Pref_Store;
216 } else {
217 pref_src_mode = Pref_LoadStreamed;
218 pref_dst_mode = Pref_StoreStreamed;
219 }
220 break;
221
222 default:
223 pref_bias_clear_store = 128;
224 pref_bias_copy_load = 256;
225 pref_bias_copy_store = 128;
226 pref_src_mode = Pref_LoadStreamed;
227 pref_dst_mode = Pref_PrepareForStore;
228 break;
229 }
230 } else {
231 if (cpu_has_cache_cdex_s)
232 cache_line_size = cpu_scache_line_size();
233 else if (cpu_has_cache_cdex_p)
234 cache_line_size = cpu_dcache_line_size();
235 }
236 /*
237 * Too much unrolling will overflow the available space in
238 * clear_space_array / copy_page_array. 8 words sounds generous,
239 * but a R4000 with 128 byte L2 line length can exceed even that.
240 */
241 half_clear_loop_size = min(8 * clear_word_size,
242 max(cache_line_size >> 1,
243 4 * clear_word_size));
244 half_copy_loop_size = min(8 * copy_word_size,
245 max(cache_line_size >> 1,
246 4 * copy_word_size));
247}
248
249static void __cpuinit build_clear_store(u32 **buf, int off)
250{
251 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
252 uasm_i_sd(buf, ZERO, off, A0);
253 } else {
254 uasm_i_sw(buf, ZERO, off, A0);
255 }
256}
257
258static inline void __cpuinit build_clear_pref(u32 **buf, int off)
259{
260 if (off & cache_line_mask())
261 return;
262
263 if (pref_bias_clear_store) {
264 uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
265 A0);
266 } else if (cpu_has_cache_cdex_s) {
267 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
268 } else if (cpu_has_cache_cdex_p) {
269 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
270 uasm_i_nop(buf);
271 uasm_i_nop(buf);
272 uasm_i_nop(buf);
273 uasm_i_nop(buf);
274 }
275
276 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
277 uasm_i_lw(buf, ZERO, ZERO, AT);
278
279 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
280 }
281}
282
283void __cpuinit build_clear_page(void)
284{
285 int off;
286 u32 *buf = (u32 *)&clear_page_array;
287 struct uasm_label *l = labels;
288 struct uasm_reloc *r = relocs;
289 int i;
290
291 memset(labels, 0, sizeof(labels));
292 memset(relocs, 0, sizeof(relocs));
293
294 set_prefetch_parameters();
295
296 /*
297 * This algorithm makes the following assumptions:
298 * - The prefetch bias is a multiple of 2 words.
299 * - The prefetch bias is less than one page.
300 */
301 BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
302 BUG_ON(PAGE_SIZE < pref_bias_clear_store);
303
304 off = PAGE_SIZE - pref_bias_clear_store;
305 if (off > 0xffff || !pref_bias_clear_store)
306 pg_addiu(&buf, A2, A0, off);
307 else
308 uasm_i_ori(&buf, A2, A0, off);
309
310 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
311 uasm_i_lui(&buf, AT, 0xa000);
312
313 off = min(8, pref_bias_clear_store / cache_line_size) *
314 cache_line_size;
315 while (off) {
316 build_clear_pref(&buf, -off);
317 off -= cache_line_size;
318 }
319 uasm_l_clear_pref(&l, buf);
320 do {
321 build_clear_pref(&buf, off);
322 build_clear_store(&buf, off);
323 off += clear_word_size;
324 } while (off < half_clear_loop_size);
325 pg_addiu(&buf, A0, A0, 2 * off);
326 off = -off;
327 do {
328 build_clear_pref(&buf, off);
329 if (off == -clear_word_size)
330 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
331 build_clear_store(&buf, off);
332 off += clear_word_size;
333 } while (off < 0);
334
335 if (pref_bias_clear_store) {
336 pg_addiu(&buf, A2, A0, pref_bias_clear_store);
337 uasm_l_clear_nopref(&l, buf);
338 off = 0;
339 do {
340 build_clear_store(&buf, off);
341 off += clear_word_size;
342 } while (off < half_clear_loop_size);
343 pg_addiu(&buf, A0, A0, 2 * off);
344 off = -off;
345 do {
346 if (off == -clear_word_size)
347 uasm_il_bne(&buf, &r, A0, A2,
348 label_clear_nopref);
349 build_clear_store(&buf, off);
350 off += clear_word_size;
351 } while (off < 0);
352 }
353
354 uasm_i_jr(&buf, RA);
355 uasm_i_nop(&buf);
356
357 BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array));
358
359 uasm_resolve_relocs(relocs, labels);
360
361 pr_debug("Synthesized clear page handler (%u instructions).\n",
362 (u32)(buf - clear_page_array));
363
364 pr_debug("\t.set push\n");
365 pr_debug("\t.set noreorder\n");
366 for (i = 0; i < (buf - clear_page_array); i++)
367 pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
368 pr_debug("\t.set pop\n");
369}
370
371static void __cpuinit build_copy_load(u32 **buf, int reg, int off)
372{
373 if (cpu_has_64bit_gp_regs) {
374 uasm_i_ld(buf, reg, off, A1);
375 } else {
376 uasm_i_lw(buf, reg, off, A1);
377 }
378}
379
380static void __cpuinit build_copy_store(u32 **buf, int reg, int off)
381{
382 if (cpu_has_64bit_gp_regs) {
383 uasm_i_sd(buf, reg, off, A0);
384 } else {
385 uasm_i_sw(buf, reg, off, A0);
386 }
387}
388
389static inline void build_copy_load_pref(u32 **buf, int off)
390{
391 if (off & cache_line_mask())
392 return;
393
394 if (pref_bias_copy_load)
395 uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
396}
397
398static inline void build_copy_store_pref(u32 **buf, int off)
399{
400 if (off & cache_line_mask())
401 return;
402
403 if (pref_bias_copy_store) {
404 uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
405 A0);
406 } else if (cpu_has_cache_cdex_s) {
407 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
408 } else if (cpu_has_cache_cdex_p) {
409 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
410 uasm_i_nop(buf);
411 uasm_i_nop(buf);
412 uasm_i_nop(buf);
413 uasm_i_nop(buf);
414 }
415
416 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
417 uasm_i_lw(buf, ZERO, ZERO, AT);
418
419 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
420 }
421}
422
423void __cpuinit build_copy_page(void)
424{
425 int off;
426 u32 *buf = (u32 *)&copy_page_array;
427 struct uasm_label *l = labels;
428 struct uasm_reloc *r = relocs;
429 int i;
430
431 memset(labels, 0, sizeof(labels));
432 memset(relocs, 0, sizeof(relocs));
433
434 set_prefetch_parameters();
435
436 /*
437 * This algorithm makes the following assumptions:
438 * - All prefetch biases are multiples of 8 words.
439 * - The prefetch biases are less than one page.
440 * - The store prefetch bias isn't greater than the load
441 * prefetch bias.
442 */
443 BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
444 BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
445 BUG_ON(PAGE_SIZE < pref_bias_copy_load);
446 BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
447
448 off = PAGE_SIZE - pref_bias_copy_load;
449 if (off > 0xffff || !pref_bias_copy_load)
450 pg_addiu(&buf, A2, A0, off);
451 else
452 uasm_i_ori(&buf, A2, A0, off);
453
454 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
455 uasm_i_lui(&buf, AT, 0xa000);
456
457 off = min(8, pref_bias_copy_load / cache_line_size) * cache_line_size;
458 while (off) {
459 build_copy_load_pref(&buf, -off);
460 off -= cache_line_size;
461 }
462 off = min(8, pref_bias_copy_store / cache_line_size) * cache_line_size;
463 while (off) {
464 build_copy_store_pref(&buf, -off);
465 off -= cache_line_size;
466 }
467 uasm_l_copy_pref_both(&l, buf);
468 do {
469 build_copy_load_pref(&buf, off);
470 build_copy_load(&buf, T0, off);
471 build_copy_load_pref(&buf, off + copy_word_size);
472 build_copy_load(&buf, T1, off + copy_word_size);
473 build_copy_load_pref(&buf, off + 2 * copy_word_size);
474 build_copy_load(&buf, T2, off + 2 * copy_word_size);
475 build_copy_load_pref(&buf, off + 3 * copy_word_size);
476 build_copy_load(&buf, T3, off + 3 * copy_word_size);
477 build_copy_store_pref(&buf, off);
478 build_copy_store(&buf, T0, off);
479 build_copy_store_pref(&buf, off + copy_word_size);
480 build_copy_store(&buf, T1, off + copy_word_size);
481 build_copy_store_pref(&buf, off + 2 * copy_word_size);
482 build_copy_store(&buf, T2, off + 2 * copy_word_size);
483 build_copy_store_pref(&buf, off + 3 * copy_word_size);
484 build_copy_store(&buf, T3, off + 3 * copy_word_size);
485 off += 4 * copy_word_size;
486 } while (off < half_copy_loop_size);
487 pg_addiu(&buf, A1, A1, 2 * off);
488 pg_addiu(&buf, A0, A0, 2 * off);
489 off = -off;
490 do {
491 build_copy_load_pref(&buf, off);
492 build_copy_load(&buf, T0, off);
493 build_copy_load_pref(&buf, off + copy_word_size);
494 build_copy_load(&buf, T1, off + copy_word_size);
495 build_copy_load_pref(&buf, off + 2 * copy_word_size);
496 build_copy_load(&buf, T2, off + 2 * copy_word_size);
497 build_copy_load_pref(&buf, off + 3 * copy_word_size);
498 build_copy_load(&buf, T3, off + 3 * copy_word_size);
499 build_copy_store_pref(&buf, off);
500 build_copy_store(&buf, T0, off);
501 build_copy_store_pref(&buf, off + copy_word_size);
502 build_copy_store(&buf, T1, off + copy_word_size);
503 build_copy_store_pref(&buf, off + 2 * copy_word_size);
504 build_copy_store(&buf, T2, off + 2 * copy_word_size);
505 build_copy_store_pref(&buf, off + 3 * copy_word_size);
506 if (off == -(4 * copy_word_size))
507 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
508 build_copy_store(&buf, T3, off + 3 * copy_word_size);
509 off += 4 * copy_word_size;
510 } while (off < 0);
511
512 if (pref_bias_copy_load - pref_bias_copy_store) {
513 pg_addiu(&buf, A2, A0,
514 pref_bias_copy_load - pref_bias_copy_store);
515 uasm_l_copy_pref_store(&l, buf);
516 off = 0;
517 do {
518 build_copy_load(&buf, T0, off);
519 build_copy_load(&buf, T1, off + copy_word_size);
520 build_copy_load(&buf, T2, off + 2 * copy_word_size);
521 build_copy_load(&buf, T3, off + 3 * copy_word_size);
522 build_copy_store_pref(&buf, off);
523 build_copy_store(&buf, T0, off);
524 build_copy_store_pref(&buf, off + copy_word_size);
525 build_copy_store(&buf, T1, off + copy_word_size);
526 build_copy_store_pref(&buf, off + 2 * copy_word_size);
527 build_copy_store(&buf, T2, off + 2 * copy_word_size);
528 build_copy_store_pref(&buf, off + 3 * copy_word_size);
529 build_copy_store(&buf, T3, off + 3 * copy_word_size);
530 off += 4 * copy_word_size;
531 } while (off < half_copy_loop_size);
532 pg_addiu(&buf, A1, A1, 2 * off);
533 pg_addiu(&buf, A0, A0, 2 * off);
534 off = -off;
535 do {
536 build_copy_load(&buf, T0, off);
537 build_copy_load(&buf, T1, off + copy_word_size);
538 build_copy_load(&buf, T2, off + 2 * copy_word_size);
539 build_copy_load(&buf, T3, off + 3 * copy_word_size);
540 build_copy_store_pref(&buf, off);
541 build_copy_store(&buf, T0, off);
542 build_copy_store_pref(&buf, off + copy_word_size);
543 build_copy_store(&buf, T1, off + copy_word_size);
544 build_copy_store_pref(&buf, off + 2 * copy_word_size);
545 build_copy_store(&buf, T2, off + 2 * copy_word_size);
546 build_copy_store_pref(&buf, off + 3 * copy_word_size);
547 if (off == -(4 * copy_word_size))
548 uasm_il_bne(&buf, &r, A2, A0,
549 label_copy_pref_store);
550 build_copy_store(&buf, T3, off + 3 * copy_word_size);
551 off += 4 * copy_word_size;
552 } while (off < 0);
553 }
554
555 if (pref_bias_copy_store) {
556 pg_addiu(&buf, A2, A0, pref_bias_copy_store);
557 uasm_l_copy_nopref(&l, buf);
558 off = 0;
559 do {
560 build_copy_load(&buf, T0, off);
561 build_copy_load(&buf, T1, off + copy_word_size);
562 build_copy_load(&buf, T2, off + 2 * copy_word_size);
563 build_copy_load(&buf, T3, off + 3 * copy_word_size);
564 build_copy_store(&buf, T0, off);
565 build_copy_store(&buf, T1, off + copy_word_size);
566 build_copy_store(&buf, T2, off + 2 * copy_word_size);
567 build_copy_store(&buf, T3, off + 3 * copy_word_size);
568 off += 4 * copy_word_size;
569 } while (off < half_copy_loop_size);
570 pg_addiu(&buf, A1, A1, 2 * off);
571 pg_addiu(&buf, A0, A0, 2 * off);
572 off = -off;
573 do {
574 build_copy_load(&buf, T0, off);
575 build_copy_load(&buf, T1, off + copy_word_size);
576 build_copy_load(&buf, T2, off + 2 * copy_word_size);
577 build_copy_load(&buf, T3, off + 3 * copy_word_size);
578 build_copy_store(&buf, T0, off);
579 build_copy_store(&buf, T1, off + copy_word_size);
580 build_copy_store(&buf, T2, off + 2 * copy_word_size);
581 if (off == -(4 * copy_word_size))
582 uasm_il_bne(&buf, &r, A2, A0,
583 label_copy_nopref);
584 build_copy_store(&buf, T3, off + 3 * copy_word_size);
585 off += 4 * copy_word_size;
586 } while (off < 0);
587 }
588
589 uasm_i_jr(&buf, RA);
590 uasm_i_nop(&buf);
591
592 BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array));
593
594 uasm_resolve_relocs(relocs, labels);
595
596 pr_debug("Synthesized copy page handler (%u instructions).\n",
597 (u32)(buf - copy_page_array));
598
599 pr_debug("\t.set push\n");
600 pr_debug("\t.set noreorder\n");
601 for (i = 0; i < (buf - copy_page_array); i++)
602 pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
603 pr_debug("\t.set pop\n");
604}
605
606#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
607
608/*
609 * Pad descriptors to cacheline, since each is exclusively owned by a
610 * particular CPU.
611 */
612struct dmadscr {
613 u64 dscr_a;
614 u64 dscr_b;
615 u64 pad_a;
616 u64 pad_b;
617} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
618
619void sb1_dma_init(void)
620{
621 int i;
622
623 for (i = 0; i < DM_NUM_CHANNELS; i++) {
624 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
625 V_DM_DSCR_BASE_RINGSZ(1);
626 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
627
628 __raw_writeq(base_val, base_reg);
629 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
630 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
631 }
632}
633
634void clear_page(void *page)
635{
636 u64 to_phys = CPHYSADDR((unsigned long)page);
637 unsigned int cpu = smp_processor_id();
638
639 /* if the page is not in KSEG0, use old way */
640 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
641 return clear_page_cpu(page);
642
643 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
644 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
645 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
646 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
647
648 /*
649 * Don't really want to do it this way, but there's no
650 * reliable way to delay completion detection.
651 */
652 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
653 & M_DM_DSCR_BASE_INTERRUPT))
654 ;
655 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
656}
657
658void copy_page(void *to, void *from)
659{
660 u64 from_phys = CPHYSADDR((unsigned long)from);
661 u64 to_phys = CPHYSADDR((unsigned long)to);
662 unsigned int cpu = smp_processor_id();
663
664 /* if any page is not in KSEG0, use old way */
665 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
666 || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
667 return copy_page_cpu(to, from);
668
669 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
670 M_DM_DSCRA_INTERRUPT;
671 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
672 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
673
674 /*
675 * Don't really want to do it this way, but there's no
676 * reliable way to delay completion detection.
677 */
678 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
679 & M_DM_DSCR_BASE_INTERRUPT))
680 ;
681 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
682}
683
684#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c
deleted file mode 100644
index 455dedb5b39e..000000000000
--- a/arch/mips/mm/pg-r4k.c
+++ /dev/null
@@ -1,534 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
7 * Copyright (C) 2007 Maciej W. Rozycki
8 */
9#include <linux/init.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/proc_fs.h>
15
16#include <asm/bugs.h>
17#include <asm/cacheops.h>
18#include <asm/inst.h>
19#include <asm/io.h>
20#include <asm/page.h>
21#include <asm/pgtable.h>
22#include <asm/prefetch.h>
23#include <asm/system.h>
24#include <asm/bootinfo.h>
25#include <asm/mipsregs.h>
26#include <asm/mmu_context.h>
27#include <asm/cpu.h>
28#include <asm/war.h>
29
30#define half_scache_line_size() (cpu_scache_line_size() >> 1)
31#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010)
32#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020)
33
34
35/*
36 * Maximum sizes:
37 *
38 * R4000 128 bytes S-cache: 0x58 bytes
39 * R4600 v1.7: 0x5c bytes
40 * R4600 v2.0: 0x60 bytes
41 * With prefetching, 16 byte strides 0xa0 bytes
42 */
43
44static unsigned int clear_page_array[0x130 / 4];
45
46void clear_page(void * page) __attribute__((alias("clear_page_array")));
47
48EXPORT_SYMBOL(clear_page);
49
50/*
51 * Maximum sizes:
52 *
53 * R4000 128 bytes S-cache: 0x11c bytes
54 * R4600 v1.7: 0x080 bytes
55 * R4600 v2.0: 0x07c bytes
56 * With prefetching, 16 byte strides 0x0b8 bytes
57 */
58static unsigned int copy_page_array[0x148 / 4];
59
60void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
61
62EXPORT_SYMBOL(copy_page);
63
64/*
65 * This is suboptimal for 32-bit kernels; we assume that R10000 is only used
66 * with 64-bit kernels. The prefetch offsets have been experimentally tuned
67 * an Origin 200.
68 */
69static int pref_offset_clear __cpuinitdata = 512;
70static int pref_offset_copy __cpuinitdata = 256;
71
72static unsigned int pref_src_mode __cpuinitdata;
73static unsigned int pref_dst_mode __cpuinitdata;
74
75static int load_offset __cpuinitdata;
76static int store_offset __cpuinitdata;
77
78static unsigned int __cpuinitdata *dest, *epc;
79
80static unsigned int instruction_pending;
81static union mips_instruction delayed_mi;
82
83static void __cpuinit emit_instruction(union mips_instruction mi)
84{
85 if (instruction_pending)
86 *epc++ = delayed_mi.word;
87
88 instruction_pending = 1;
89 delayed_mi = mi;
90}
91
92static inline void flush_delay_slot_or_nop(void)
93{
94 if (instruction_pending) {
95 *epc++ = delayed_mi.word;
96 instruction_pending = 0;
97 return;
98 }
99
100 *epc++ = 0;
101}
102
103static inline unsigned int *label(void)
104{
105 if (instruction_pending) {
106 *epc++ = delayed_mi.word;
107 instruction_pending = 0;
108 }
109
110 return epc;
111}
112
113static inline void build_insn_word(unsigned int word)
114{
115 union mips_instruction mi;
116
117 mi.word = word;
118
119 emit_instruction(mi);
120}
121
122static inline void build_nop(void)
123{
124 build_insn_word(0); /* nop */
125}
126
127static inline void build_src_pref(int advance)
128{
129 if (!(load_offset & (cpu_dcache_line_size() - 1)) && advance) {
130 union mips_instruction mi;
131
132 mi.i_format.opcode = pref_op;
133 mi.i_format.rs = 5; /* $a1 */
134 mi.i_format.rt = pref_src_mode;
135 mi.i_format.simmediate = load_offset + advance;
136
137 emit_instruction(mi);
138 }
139}
140
141static inline void __build_load_reg(int reg)
142{
143 union mips_instruction mi;
144 unsigned int width;
145
146 if (cpu_has_64bit_gp_regs) {
147 mi.i_format.opcode = ld_op;
148 width = 8;
149 } else {
150 mi.i_format.opcode = lw_op;
151 width = 4;
152 }
153 mi.i_format.rs = 5; /* $a1 */
154 mi.i_format.rt = reg; /* $reg */
155 mi.i_format.simmediate = load_offset;
156
157 load_offset += width;
158 emit_instruction(mi);
159}
160
161static inline void build_load_reg(int reg)
162{
163 if (cpu_has_prefetch)
164 build_src_pref(pref_offset_copy);
165
166 __build_load_reg(reg);
167}
168
169static inline void build_dst_pref(int advance)
170{
171 if (!(store_offset & (cpu_dcache_line_size() - 1)) && advance) {
172 union mips_instruction mi;
173
174 mi.i_format.opcode = pref_op;
175 mi.i_format.rs = 4; /* $a0 */
176 mi.i_format.rt = pref_dst_mode;
177 mi.i_format.simmediate = store_offset + advance;
178
179 emit_instruction(mi);
180 }
181}
182
183static inline void build_cdex_s(void)
184{
185 union mips_instruction mi;
186
187 if ((store_offset & (cpu_scache_line_size() - 1)))
188 return;
189
190 mi.c_format.opcode = cache_op;
191 mi.c_format.rs = 4; /* $a0 */
192 mi.c_format.c_op = 3; /* Create Dirty Exclusive */
193 mi.c_format.cache = 3; /* Secondary Data Cache */
194 mi.c_format.simmediate = store_offset;
195
196 emit_instruction(mi);
197}
198
199static inline void build_cdex_p(void)
200{
201 union mips_instruction mi;
202
203 if (store_offset & (cpu_dcache_line_size() - 1))
204 return;
205
206 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
207 build_nop();
208 build_nop();
209 build_nop();
210 build_nop();
211 }
212
213 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
214 build_insn_word(0x8c200000); /* lw $zero, ($at) */
215
216 mi.c_format.opcode = cache_op;
217 mi.c_format.rs = 4; /* $a0 */
218 mi.c_format.c_op = 3; /* Create Dirty Exclusive */
219 mi.c_format.cache = 1; /* Data Cache */
220 mi.c_format.simmediate = store_offset;
221
222 emit_instruction(mi);
223}
224
225static void __cpuinit __build_store_reg(int reg)
226{
227 union mips_instruction mi;
228 unsigned int width;
229
230 if (cpu_has_64bit_gp_regs ||
231 (cpu_has_64bit_zero_reg && reg == 0)) {
232 mi.i_format.opcode = sd_op;
233 width = 8;
234 } else {
235 mi.i_format.opcode = sw_op;
236 width = 4;
237 }
238 mi.i_format.rs = 4; /* $a0 */
239 mi.i_format.rt = reg; /* $reg */
240 mi.i_format.simmediate = store_offset;
241
242 store_offset += width;
243 emit_instruction(mi);
244}
245
246static inline void build_store_reg(int reg)
247{
248 int pref_off = cpu_has_prefetch ?
249 (reg ? pref_offset_copy : pref_offset_clear) : 0;
250 if (pref_off)
251 build_dst_pref(pref_off);
252 else if (cpu_has_cache_cdex_s)
253 build_cdex_s();
254 else if (cpu_has_cache_cdex_p)
255 build_cdex_p();
256
257 __build_store_reg(reg);
258}
259
260static inline void build_addiu_rt_rs(unsigned int rt, unsigned int rs,
261 unsigned long offset)
262{
263 union mips_instruction mi;
264
265 BUG_ON(offset > 0x7fff);
266
267 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
268 mi.i_format.opcode = addiu_op;
269 mi.i_format.rs = 0; /* $zero */
270 mi.i_format.rt = 25; /* $t9 */
271 mi.i_format.simmediate = offset;
272 emit_instruction(mi);
273
274 mi.r_format.opcode = spec_op;
275 mi.r_format.rs = rs;
276 mi.r_format.rt = 25; /* $t9 */
277 mi.r_format.rd = rt;
278 mi.r_format.re = 0;
279 mi.r_format.func = daddu_op;
280 } else {
281 mi.i_format.opcode = cpu_has_64bit_gp_regs ?
282 daddiu_op : addiu_op;
283 mi.i_format.rs = rs;
284 mi.i_format.rt = rt;
285 mi.i_format.simmediate = offset;
286 }
287 emit_instruction(mi);
288}
289
290static inline void build_addiu_a2_a0(unsigned long offset)
291{
292 build_addiu_rt_rs(6, 4, offset); /* $a2, $a0, offset */
293}
294
295static inline void build_addiu_a2(unsigned long offset)
296{
297 build_addiu_rt_rs(6, 6, offset); /* $a2, $a2, offset */
298}
299
300static inline void build_addiu_a1(unsigned long offset)
301{
302 build_addiu_rt_rs(5, 5, offset); /* $a1, $a1, offset */
303
304 load_offset -= offset;
305}
306
307static inline void build_addiu_a0(unsigned long offset)
308{
309 build_addiu_rt_rs(4, 4, offset); /* $a0, $a0, offset */
310
311 store_offset -= offset;
312}
313
314static inline void build_bne(unsigned int *dest)
315{
316 union mips_instruction mi;
317
318 mi.i_format.opcode = bne_op;
319 mi.i_format.rs = 6; /* $a2 */
320 mi.i_format.rt = 4; /* $a0 */
321 mi.i_format.simmediate = dest - epc - 1;
322
323 *epc++ = mi.word;
324 flush_delay_slot_or_nop();
325}
326
327static inline void build_jr_ra(void)
328{
329 union mips_instruction mi;
330
331 mi.r_format.opcode = spec_op;
332 mi.r_format.rs = 31;
333 mi.r_format.rt = 0;
334 mi.r_format.rd = 0;
335 mi.r_format.re = 0;
336 mi.r_format.func = jr_op;
337
338 *epc++ = mi.word;
339 flush_delay_slot_or_nop();
340}
341
342void __cpuinit build_clear_page(void)
343{
344 unsigned int loop_start;
345 unsigned long off;
346 int i;
347
348 epc = (unsigned int *) &clear_page_array;
349 instruction_pending = 0;
350 store_offset = 0;
351
352 if (cpu_has_prefetch) {
353 switch (current_cpu_type()) {
354 case CPU_TX49XX:
355 /* TX49 supports only Pref_Load */
356 pref_offset_clear = 0;
357 pref_offset_copy = 0;
358 break;
359
360 case CPU_RM9000:
361 /*
362 * As a workaround for erratum G105 which make the
363 * PrepareForStore hint unusable we fall back to
364 * StoreRetained on the RM9000. Once it is known which
365 * versions of the RM9000 we'll be able to condition-
366 * alize this.
367 */
368
369 case CPU_R10000:
370 case CPU_R12000:
371 case CPU_R14000:
372 pref_src_mode = Pref_LoadStreamed;
373 pref_dst_mode = Pref_StoreStreamed;
374 break;
375
376 default:
377 pref_src_mode = Pref_LoadStreamed;
378 pref_dst_mode = Pref_PrepareForStore;
379 break;
380 }
381 }
382
383 off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0);
384 if (off > 0x7fff) {
385 build_addiu_a2_a0(off >> 1);
386 build_addiu_a2(off >> 1);
387 } else
388 build_addiu_a2_a0(off);
389
390 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
391 build_insn_word(0x3c01a000); /* lui $at, 0xa000 */
392
393dest = label();
394 do {
395 build_store_reg(0);
396 build_store_reg(0);
397 build_store_reg(0);
398 build_store_reg(0);
399 } while (store_offset < half_scache_line_size());
400 build_addiu_a0(2 * store_offset);
401 loop_start = store_offset;
402 do {
403 build_store_reg(0);
404 build_store_reg(0);
405 build_store_reg(0);
406 build_store_reg(0);
407 } while ((store_offset - loop_start) < half_scache_line_size());
408 build_bne(dest);
409
410 if (cpu_has_prefetch && pref_offset_clear) {
411 build_addiu_a2_a0(pref_offset_clear);
412 dest = label();
413 loop_start = store_offset;
414 do {
415 __build_store_reg(0);
416 __build_store_reg(0);
417 __build_store_reg(0);
418 __build_store_reg(0);
419 } while ((store_offset - loop_start) < half_scache_line_size());
420 build_addiu_a0(2 * store_offset);
421 loop_start = store_offset;
422 do {
423 __build_store_reg(0);
424 __build_store_reg(0);
425 __build_store_reg(0);
426 __build_store_reg(0);
427 } while ((store_offset - loop_start) < half_scache_line_size());
428 build_bne(dest);
429 }
430
431 build_jr_ra();
432
433 BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array));
434
435 pr_info("Synthesized clear page handler (%u instructions).\n",
436 (unsigned int)(epc - clear_page_array));
437
438 pr_debug("\t.set push\n");
439 pr_debug("\t.set noreorder\n");
440 for (i = 0; i < (epc - clear_page_array); i++)
441 pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
442 pr_debug("\t.set pop\n");
443}
444
445void __cpuinit build_copy_page(void)
446{
447 unsigned int loop_start;
448 unsigned long off;
449 int i;
450
451 epc = (unsigned int *) &copy_page_array;
452 store_offset = load_offset = 0;
453 instruction_pending = 0;
454
455 off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0);
456 if (off > 0x7fff) {
457 build_addiu_a2_a0(off >> 1);
458 build_addiu_a2(off >> 1);
459 } else
460 build_addiu_a2_a0(off);
461
462 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
463 build_insn_word(0x3c01a000); /* lui $at, 0xa000 */
464
465dest = label();
466 loop_start = store_offset;
467 do {
468 build_load_reg( 8);
469 build_load_reg( 9);
470 build_load_reg(10);
471 build_load_reg(11);
472 build_store_reg( 8);
473 build_store_reg( 9);
474 build_store_reg(10);
475 build_store_reg(11);
476 } while ((store_offset - loop_start) < half_scache_line_size());
477 build_addiu_a0(2 * store_offset);
478 build_addiu_a1(2 * load_offset);
479 loop_start = store_offset;
480 do {
481 build_load_reg( 8);
482 build_load_reg( 9);
483 build_load_reg(10);
484 build_load_reg(11);
485 build_store_reg( 8);
486 build_store_reg( 9);
487 build_store_reg(10);
488 build_store_reg(11);
489 } while ((store_offset - loop_start) < half_scache_line_size());
490 build_bne(dest);
491
492 if (cpu_has_prefetch && pref_offset_copy) {
493 build_addiu_a2_a0(pref_offset_copy);
494 dest = label();
495 loop_start = store_offset;
496 do {
497 __build_load_reg( 8);
498 __build_load_reg( 9);
499 __build_load_reg(10);
500 __build_load_reg(11);
501 __build_store_reg( 8);
502 __build_store_reg( 9);
503 __build_store_reg(10);
504 __build_store_reg(11);
505 } while ((store_offset - loop_start) < half_scache_line_size());
506 build_addiu_a0(2 * store_offset);
507 build_addiu_a1(2 * load_offset);
508 loop_start = store_offset;
509 do {
510 __build_load_reg( 8);
511 __build_load_reg( 9);
512 __build_load_reg(10);
513 __build_load_reg(11);
514 __build_store_reg( 8);
515 __build_store_reg( 9);
516 __build_store_reg(10);
517 __build_store_reg(11);
518 } while ((store_offset - loop_start) < half_scache_line_size());
519 build_bne(dest);
520 }
521
522 build_jr_ra();
523
524 BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array));
525
526 pr_info("Synthesized copy page handler (%u instructions).\n",
527 (unsigned int)(epc - copy_page_array));
528
529 pr_debug("\t.set push\n");
530 pr_debug("\t.set noreorder\n");
531 for (i = 0; i < (epc - copy_page_array); i++)
532 pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
533 pr_debug("\t.set pop\n");
534}
diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c
deleted file mode 100644
index 49e289d05414..000000000000
--- a/arch/mips/mm/pg-sb1.c
+++ /dev/null
@@ -1,302 +0,0 @@
1/*
2 * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
3 * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
4 * Copyright (C) 2000 SiByte, Inc.
5 * Copyright (C) 2005 Thiemo Seufer
6 *
7 * Written by Justin Carlson of SiByte, Inc.
8 * and Kip Walker of Broadcom Corp.
9 *
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 */
25#include <linux/module.h>
26#include <linux/sched.h>
27#include <linux/smp.h>
28
29#include <asm/io.h>
30#include <asm/sibyte/sb1250.h>
31#include <asm/sibyte/sb1250_regs.h>
32#include <asm/sibyte/sb1250_dma.h>
33
34#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
35#define SB1_PREF_LOAD_STREAMED_HINT "0"
36#define SB1_PREF_STORE_STREAMED_HINT "1"
37#else
38#define SB1_PREF_LOAD_STREAMED_HINT "4"
39#define SB1_PREF_STORE_STREAMED_HINT "5"
40#endif
41
42static inline void clear_page_cpu(void *page)
43{
44 unsigned char *addr = (unsigned char *) page;
45 unsigned char *end = addr + PAGE_SIZE;
46
47 /*
48 * JDCXXX - This should be bottlenecked by the write buffer, but these
49 * things tend to be mildly unpredictable...should check this on the
50 * performance model
51 *
52 * We prefetch 4 lines ahead. We're also "cheating" slightly here...
53 * since we know we're on an SB1, we force the assembler to take
54 * 64-bit operands to speed things up
55 */
56 __asm__ __volatile__(
57 " .set push \n"
58 " .set mips4 \n"
59 " .set noreorder \n"
60#ifdef CONFIG_CPU_HAS_PREFETCH
61 " daddiu %0, %0, 128 \n"
62 " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n"
63 /* Prefetch the first 4 lines */
64 " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n"
65 " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n"
66 " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
67 "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */
68 " sd $0, -120(%0) \n"
69 " sd $0, -112(%0) \n"
70 " sd $0, -104(%0) \n"
71 " daddiu %0, %0, 32 \n"
72 " bnel %0, %1, 1b \n"
73 " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
74 " daddiu %0, %0, -128 \n"
75#endif
76 " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */
77 "1: sd $0, 8(%0) \n"
78 " sd $0, 16(%0) \n"
79 " sd $0, 24(%0) \n"
80 " daddiu %0, %0, 32 \n"
81 " bnel %0, %1, 1b \n"
82 " sd $0, 0(%0) \n"
83 " .set pop \n"
84 : "+r" (addr)
85 : "r" (end)
86 : "memory");
87}
88
89static inline void copy_page_cpu(void *to, void *from)
90{
91 unsigned char *src = (unsigned char *)from;
92 unsigned char *dst = (unsigned char *)to;
93 unsigned char *end = src + PAGE_SIZE;
94
95 /*
96 * The pref's used here are using "streaming" hints, which cause the
97 * copied data to be kicked out of the cache sooner. A page copy often
98 * ends up copying a lot more data than is commonly used, so this seems
99 * to make sense in terms of reducing cache pollution, but I've no real
100 * performance data to back this up
101 */
102 __asm__ __volatile__(
103 " .set push \n"
104 " .set mips4 \n"
105 " .set noreorder \n"
106#ifdef CONFIG_CPU_HAS_PREFETCH
107 " daddiu %0, %0, 128 \n"
108 " daddiu %1, %1, 128 \n"
109 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n"
110 /* Prefetch the first 4 lines */
111 " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
112 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n"
113 " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n"
114 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n"
115 " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n"
116 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
117 "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n"
118# ifdef CONFIG_64BIT
119 " ld $8, -128(%0) \n" /* Block copy a cacheline */
120 " ld $9, -120(%0) \n"
121 " ld $10, -112(%0) \n"
122 " ld $11, -104(%0) \n"
123 " sd $8, -128(%1) \n"
124 " sd $9, -120(%1) \n"
125 " sd $10, -112(%1) \n"
126 " sd $11, -104(%1) \n"
127# else
128 " lw $2, -128(%0) \n" /* Block copy a cacheline */
129 " lw $3, -124(%0) \n"
130 " lw $6, -120(%0) \n"
131 " lw $7, -116(%0) \n"
132 " lw $8, -112(%0) \n"
133 " lw $9, -108(%0) \n"
134 " lw $10, -104(%0) \n"
135 " lw $11, -100(%0) \n"
136 " sw $2, -128(%1) \n"
137 " sw $3, -124(%1) \n"
138 " sw $6, -120(%1) \n"
139 " sw $7, -116(%1) \n"
140 " sw $8, -112(%1) \n"
141 " sw $9, -108(%1) \n"
142 " sw $10, -104(%1) \n"
143 " sw $11, -100(%1) \n"
144# endif
145 " daddiu %0, %0, 32 \n"
146 " daddiu %1, %1, 32 \n"
147 " bnel %0, %2, 1b \n"
148 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
149 " daddiu %0, %0, -128 \n"
150 " daddiu %1, %1, -128 \n"
151#endif
152#ifdef CONFIG_64BIT
153 " ld $8, 0(%0) \n" /* Block copy a cacheline */
154 "1: ld $9, 8(%0) \n"
155 " ld $10, 16(%0) \n"
156 " ld $11, 24(%0) \n"
157 " sd $8, 0(%1) \n"
158 " sd $9, 8(%1) \n"
159 " sd $10, 16(%1) \n"
160 " sd $11, 24(%1) \n"
161#else
162 " lw $2, 0(%0) \n" /* Block copy a cacheline */
163 "1: lw $3, 4(%0) \n"
164 " lw $6, 8(%0) \n"
165 " lw $7, 12(%0) \n"
166 " lw $8, 16(%0) \n"
167 " lw $9, 20(%0) \n"
168 " lw $10, 24(%0) \n"
169 " lw $11, 28(%0) \n"
170 " sw $2, 0(%1) \n"
171 " sw $3, 4(%1) \n"
172 " sw $6, 8(%1) \n"
173 " sw $7, 12(%1) \n"
174 " sw $8, 16(%1) \n"
175 " sw $9, 20(%1) \n"
176 " sw $10, 24(%1) \n"
177 " sw $11, 28(%1) \n"
178#endif
179 " daddiu %0, %0, 32 \n"
180 " daddiu %1, %1, 32 \n"
181 " bnel %0, %2, 1b \n"
182#ifdef CONFIG_64BIT
183 " ld $8, 0(%0) \n"
184#else
185 " lw $2, 0(%0) \n"
186#endif
187 " .set pop \n"
188 : "+r" (src), "+r" (dst)
189 : "r" (end)
190#ifdef CONFIG_64BIT
191 : "$8", "$9", "$10", "$11", "memory");
192#else
193 : "$2", "$3", "$6", "$7", "$8", "$9", "$10", "$11", "memory");
194#endif
195}
196
197
198#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
199
200/*
201 * Pad descriptors to cacheline, since each is exclusively owned by a
202 * particular CPU.
203 */
204typedef struct dmadscr_s {
205 u64 dscr_a;
206 u64 dscr_b;
207 u64 pad_a;
208 u64 pad_b;
209} dmadscr_t;
210
211static dmadscr_t page_descr[DM_NUM_CHANNELS]
212 __attribute__((aligned(SMP_CACHE_BYTES)));
213
214void sb1_dma_init(void)
215{
216 int i;
217
218 for (i = 0; i < DM_NUM_CHANNELS; i++) {
219 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
220 V_DM_DSCR_BASE_RINGSZ(1);
221 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
222
223 __raw_writeq(base_val, base_reg);
224 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
225 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
226 }
227}
228
229void clear_page(void *page)
230{
231 u64 to_phys = CPHYSADDR((unsigned long)page);
232 unsigned int cpu = smp_processor_id();
233
234 /* if the page is not in KSEG0, use old way */
235 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
236 return clear_page_cpu(page);
237
238 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
239 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
240 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
241 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
242
243 /*
244 * Don't really want to do it this way, but there's no
245 * reliable way to delay completion detection.
246 */
247 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
248 & M_DM_DSCR_BASE_INTERRUPT))
249 ;
250 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
251}
252
253void copy_page(void *to, void *from)
254{
255 u64 from_phys = CPHYSADDR((unsigned long)from);
256 u64 to_phys = CPHYSADDR((unsigned long)to);
257 unsigned int cpu = smp_processor_id();
258
259 /* if any page is not in KSEG0, use old way */
260 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
261 || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
262 return copy_page_cpu(to, from);
263
264 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
265 M_DM_DSCRA_INTERRUPT;
266 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
267 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
268
269 /*
270 * Don't really want to do it this way, but there's no
271 * reliable way to delay completion detection.
272 */
273 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
274 & M_DM_DSCR_BASE_INTERRUPT))
275 ;
276 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
277}
278
279#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
280
281void clear_page(void *page)
282{
283 return clear_page_cpu(page);
284}
285
286void copy_page(void *to, void *from)
287{
288 return copy_page_cpu(to, from);
289}
290
291#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
292
293EXPORT_SYMBOL(clear_page);
294EXPORT_SYMBOL(copy_page);
295
296void __cpuinit build_clear_page(void)
297{
298}
299
300void __cpuinit build_copy_page(void)
301{
302}
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 1a6f7704cc89..1655aa69e133 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -58,13 +58,13 @@ enum opcode {
58 insn_invalid, 58 insn_invalid,
59 insn_addu, insn_addiu, insn_and, insn_andi, insn_beq, 59 insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
60 insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, 60 insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
61 insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0, 61 insn_bne, insn_cache, insn_daddu, insn_daddiu, insn_dmfc0,
62 insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, 62 insn_dmtc0, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl,
63 insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld, 63 insn_dsrl32, insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr,
64 insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0, 64 insn_ld, insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0,
65 insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, 65 insn_mtc0, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd,
66 insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi, 66 insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw,
67 insn_tlbwr, insn_xor, insn_xori 67 insn_tlbp, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori
68}; 68};
69 69
70struct insn { 70struct insn {
@@ -94,6 +94,7 @@ static struct insn insn_table[] __cpuinitdata = {
94 { insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM }, 94 { insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM },
95 { insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM }, 95 { insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM },
96 { insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, 96 { insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
97 { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
97 { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, 98 { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
98 { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, 99 { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
99 { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, 100 { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
@@ -116,6 +117,7 @@ static struct insn insn_table[] __cpuinitdata = {
116 { insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, 117 { insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET},
117 { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, 118 { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET},
118 { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, 119 { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM },
120 { insn_pref, M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
119 { insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 }, 121 { insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 },
120 { insn_sc, M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, 122 { insn_sc, M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
121 { insn_scd, M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, 123 { insn_scd, M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
@@ -337,6 +339,7 @@ I_u1s2(_bgezl)
337I_u1s2(_bltz) 339I_u1s2(_bltz)
338I_u1s2(_bltzl) 340I_u1s2(_bltzl)
339I_u1u2s3(_bne) 341I_u1u2s3(_bne)
342I_u2s3u1(_cache)
340I_u1u2u3(_dmfc0) 343I_u1u2u3(_dmfc0)
341I_u1u2u3(_dmtc0) 344I_u1u2u3(_dmtc0)
342I_u2u1s3(_daddiu) 345I_u2u1s3(_daddiu)
@@ -359,6 +362,7 @@ I_u2s3u1(_lw)
359I_u1u2u3(_mfc0) 362I_u1u2u3(_mfc0)
360I_u1u2u3(_mtc0) 363I_u1u2u3(_mtc0)
361I_u2u1u3(_ori) 364I_u2u1u3(_ori)
365I_u2s3u1(_pref)
362I_0(_rfe) 366I_0(_rfe)
363I_u2s3u1(_sc) 367I_u2s3u1(_sc)
364I_u2s3u1(_scd) 368I_u2s3u1(_scd)
@@ -555,6 +559,14 @@ uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid)
555} 559}
556 560
557void __cpuinit 561void __cpuinit
562uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
563 unsigned int reg2, int lid)
564{
565 uasm_r_mips_pc16(r, *p, lid);
566 uasm_i_bne(p, reg1, reg2, 0);
567}
568
569void __cpuinit
558uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) 570uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid)
559{ 571{
560 uasm_r_mips_pc16(r, *p, lid); 572 uasm_r_mips_pc16(r, *p, lid);
diff --git a/arch/mips/mm/uasm.h b/arch/mips/mm/uasm.h
index fe0574f6e77d..0d6a66f32030 100644
--- a/arch/mips/mm/uasm.h
+++ b/arch/mips/mm/uasm.h
@@ -55,6 +55,7 @@ Ip_u1s2(_bgezl);
55Ip_u1s2(_bltz); 55Ip_u1s2(_bltz);
56Ip_u1s2(_bltzl); 56Ip_u1s2(_bltzl);
57Ip_u1u2s3(_bne); 57Ip_u1u2s3(_bne);
58Ip_u2s3u1(_cache);
58Ip_u1u2u3(_dmfc0); 59Ip_u1u2u3(_dmfc0);
59Ip_u1u2u3(_dmtc0); 60Ip_u1u2u3(_dmtc0);
60Ip_u2u1s3(_daddiu); 61Ip_u2u1s3(_daddiu);
@@ -77,6 +78,7 @@ Ip_u2s3u1(_lw);
77Ip_u1u2u3(_mfc0); 78Ip_u1u2u3(_mfc0);
78Ip_u1u2u3(_mtc0); 79Ip_u1u2u3(_mtc0);
79Ip_u2u1u3(_ori); 80Ip_u2u1u3(_ori);
81Ip_u2s3u1(_pref);
80Ip_0(_rfe); 82Ip_0(_rfe);
81Ip_u2s3u1(_sc); 83Ip_u2s3u1(_sc);
82Ip_u2s3u1(_scd); 84Ip_u2s3u1(_scd);
@@ -177,6 +179,8 @@ void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
177void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid); 179void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid);
178void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); 180void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
179void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); 181void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
182void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
183 unsigned int reg2, int lid);
180void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); 184void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
181void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); 185void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
182void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid); 186void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);