diff options
author | John David Anglin <dave.anglin@bell.net> | 2013-02-03 18:00:54 -0500 |
---|---|---|
committer | Helge Deller <deller@gmx.de> | 2013-02-20 16:49:29 -0500 |
commit | 6d2ddc2f9414d5ad0d3f014416020579ccce1baf (patch) | |
tree | 9417c7592d5cb1fe4d5bb4c93985ac16085d9de1 /arch/parisc | |
parent | 7633453978c54874849c5f40487ac9e14d43fc12 (diff) |
parisc: fixes and cleanups in page cache flushing (2/4)
Implement clear_page_asm and copy_page_asm. These are optimized routines to
clear and copy a page. I tested prefetch optimizations in clear_page_asm and
copy_page_asm but didn't see any significant performance improvement on rp3440.
I'm not sure if these are routines are significantly faster than memset and/or
memcpy, but they are there for further performance evaluation.
TLB purge operations on PA 1.X SMP machines are now serialized with the help of
the new tlb_lock() and tlb_unlock() macros, since on some PA-RISC machines, TLB
purges need to be serialized in software. Obviously, lock isn't needed in UP
kernels. On PA 2.0 machines, there is a local TLB instruction which is much
less disruptive to the memory subsystem. No lock is needed for local purge.
Loops are also unrolled in flush_instruction_cache_local and
flush_data_cache_local.
The implementation of what used to be copy_user_page (now copy_user_page_asm)
is now fixed. Additionally 64-bit support is now added. Read the preceding
comment which I didn't change. I left the comment but it is now inaccurate.
Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
Diffstat (limited to 'arch/parisc')
-rw-r--r-- | arch/parisc/kernel/pacache.S | 335 |
1 files changed, 290 insertions, 45 deletions
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 5d7218ad885c..312b48422a56 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S | |||
@@ -199,7 +199,6 @@ ENTRY(flush_instruction_cache_local) | |||
199 | .callinfo NO_CALLS | 199 | .callinfo NO_CALLS |
200 | .entry | 200 | .entry |
201 | 201 | ||
202 | mtsp %r0, %sr1 | ||
203 | load32 cache_info, %r1 | 202 | load32 cache_info, %r1 |
204 | 203 | ||
205 | /* Flush Instruction Cache */ | 204 | /* Flush Instruction Cache */ |
@@ -208,7 +207,8 @@ ENTRY(flush_instruction_cache_local) | |||
208 | LDREG ICACHE_STRIDE(%r1), %arg1 | 207 | LDREG ICACHE_STRIDE(%r1), %arg1 |
209 | LDREG ICACHE_COUNT(%r1), %arg2 | 208 | LDREG ICACHE_COUNT(%r1), %arg2 |
210 | LDREG ICACHE_LOOP(%r1), %arg3 | 209 | LDREG ICACHE_LOOP(%r1), %arg3 |
211 | rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ | 210 | rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ |
211 | mtsp %r0, %sr1 | ||
212 | addib,COND(=) -1, %arg3, fioneloop /* Preadjust and test */ | 212 | addib,COND(=) -1, %arg3, fioneloop /* Preadjust and test */ |
213 | movb,<,n %arg3, %r31, fisync /* If loop < 0, do sync */ | 213 | movb,<,n %arg3, %r31, fisync /* If loop < 0, do sync */ |
214 | 214 | ||
@@ -220,7 +220,33 @@ fimanyloop: /* Loop if LOOP >= 2 */ | |||
220 | addib,COND(<=),n -1, %arg2, fisync /* Outer loop decr */ | 220 | addib,COND(<=),n -1, %arg2, fisync /* Outer loop decr */ |
221 | 221 | ||
222 | fioneloop: /* Loop if LOOP = 1 */ | 222 | fioneloop: /* Loop if LOOP = 1 */ |
223 | addib,COND(>) -1, %arg2, fioneloop /* Outer loop count decr */ | 223 | /* Some implementations may flush with a single fice instruction */ |
224 | cmpib,COND(>>=),n 15, %arg2, fioneloop2 | ||
225 | |||
226 | fioneloop1: | ||
227 | fice,m %arg1(%sr1, %arg0) | ||
228 | fice,m %arg1(%sr1, %arg0) | ||
229 | fice,m %arg1(%sr1, %arg0) | ||
230 | fice,m %arg1(%sr1, %arg0) | ||
231 | fice,m %arg1(%sr1, %arg0) | ||
232 | fice,m %arg1(%sr1, %arg0) | ||
233 | fice,m %arg1(%sr1, %arg0) | ||
234 | fice,m %arg1(%sr1, %arg0) | ||
235 | fice,m %arg1(%sr1, %arg0) | ||
236 | fice,m %arg1(%sr1, %arg0) | ||
237 | fice,m %arg1(%sr1, %arg0) | ||
238 | fice,m %arg1(%sr1, %arg0) | ||
239 | fice,m %arg1(%sr1, %arg0) | ||
240 | fice,m %arg1(%sr1, %arg0) | ||
241 | fice,m %arg1(%sr1, %arg0) | ||
242 | addib,COND(>) -16, %arg2, fioneloop1 | ||
243 | fice,m %arg1(%sr1, %arg0) | ||
244 | |||
245 | /* Check if done */ | ||
246 | cmpb,COND(=),n %arg2, %r0, fisync /* Predict branch taken */ | ||
247 | |||
248 | fioneloop2: | ||
249 | addib,COND(>) -1, %arg2, fioneloop2 /* Outer loop count decr */ | ||
224 | fice,m %arg1(%sr1, %arg0) /* Fice for one loop */ | 250 | fice,m %arg1(%sr1, %arg0) /* Fice for one loop */ |
225 | 251 | ||
226 | fisync: | 252 | fisync: |
@@ -240,8 +266,7 @@ ENTRY(flush_data_cache_local) | |||
240 | .callinfo NO_CALLS | 266 | .callinfo NO_CALLS |
241 | .entry | 267 | .entry |
242 | 268 | ||
243 | mtsp %r0, %sr1 | 269 | load32 cache_info, %r1 |
244 | load32 cache_info, %r1 | ||
245 | 270 | ||
246 | /* Flush Data Cache */ | 271 | /* Flush Data Cache */ |
247 | 272 | ||
@@ -249,7 +274,8 @@ ENTRY(flush_data_cache_local) | |||
249 | LDREG DCACHE_STRIDE(%r1), %arg1 | 274 | LDREG DCACHE_STRIDE(%r1), %arg1 |
250 | LDREG DCACHE_COUNT(%r1), %arg2 | 275 | LDREG DCACHE_COUNT(%r1), %arg2 |
251 | LDREG DCACHE_LOOP(%r1), %arg3 | 276 | LDREG DCACHE_LOOP(%r1), %arg3 |
252 | rsm PSW_SM_I, %r22 | 277 | rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ |
278 | mtsp %r0, %sr1 | ||
253 | addib,COND(=) -1, %arg3, fdoneloop /* Preadjust and test */ | 279 | addib,COND(=) -1, %arg3, fdoneloop /* Preadjust and test */ |
254 | movb,<,n %arg3, %r31, fdsync /* If loop < 0, do sync */ | 280 | movb,<,n %arg3, %r31, fdsync /* If loop < 0, do sync */ |
255 | 281 | ||
@@ -261,7 +287,33 @@ fdmanyloop: /* Loop if LOOP >= 2 */ | |||
261 | addib,COND(<=),n -1, %arg2, fdsync /* Outer loop decr */ | 287 | addib,COND(<=),n -1, %arg2, fdsync /* Outer loop decr */ |
262 | 288 | ||
263 | fdoneloop: /* Loop if LOOP = 1 */ | 289 | fdoneloop: /* Loop if LOOP = 1 */ |
264 | addib,COND(>) -1, %arg2, fdoneloop /* Outer loop count decr */ | 290 | /* Some implementations may flush with a single fdce instruction */ |
291 | cmpib,COND(>>=),n 15, %arg2, fdoneloop2 | ||
292 | |||
293 | fdoneloop1: | ||
294 | fdce,m %arg1(%sr1, %arg0) | ||
295 | fdce,m %arg1(%sr1, %arg0) | ||
296 | fdce,m %arg1(%sr1, %arg0) | ||
297 | fdce,m %arg1(%sr1, %arg0) | ||
298 | fdce,m %arg1(%sr1, %arg0) | ||
299 | fdce,m %arg1(%sr1, %arg0) | ||
300 | fdce,m %arg1(%sr1, %arg0) | ||
301 | fdce,m %arg1(%sr1, %arg0) | ||
302 | fdce,m %arg1(%sr1, %arg0) | ||
303 | fdce,m %arg1(%sr1, %arg0) | ||
304 | fdce,m %arg1(%sr1, %arg0) | ||
305 | fdce,m %arg1(%sr1, %arg0) | ||
306 | fdce,m %arg1(%sr1, %arg0) | ||
307 | fdce,m %arg1(%sr1, %arg0) | ||
308 | fdce,m %arg1(%sr1, %arg0) | ||
309 | addib,COND(>) -16, %arg2, fdoneloop1 | ||
310 | fdce,m %arg1(%sr1, %arg0) | ||
311 | |||
312 | /* Check if done */ | ||
313 | cmpb,COND(=),n %arg2, %r0, fdsync /* Predict branch taken */ | ||
314 | |||
315 | fdoneloop2: | ||
316 | addib,COND(>) -1, %arg2, fdoneloop2 /* Outer loop count decr */ | ||
265 | fdce,m %arg1(%sr1, %arg0) /* Fdce for one loop */ | 317 | fdce,m %arg1(%sr1, %arg0) /* Fdce for one loop */ |
266 | 318 | ||
267 | fdsync: | 319 | fdsync: |
@@ -277,7 +329,104 @@ ENDPROC(flush_data_cache_local) | |||
277 | 329 | ||
278 | .align 16 | 330 | .align 16 |
279 | 331 | ||
280 | ENTRY(copy_user_page_asm) | 332 | /* Macros to serialize TLB purge operations on SMP. */ |
333 | |||
334 | .macro tlb_lock la,flags,tmp | ||
335 | #ifdef CONFIG_SMP | ||
336 | ldil L%pa_tlb_lock,%r1 | ||
337 | ldo R%pa_tlb_lock(%r1),\la | ||
338 | rsm PSW_SM_I,\flags | ||
339 | 1: LDCW 0(\la),\tmp | ||
340 | cmpib,<>,n 0,\tmp,3f | ||
341 | 2: ldw 0(\la),\tmp | ||
342 | cmpb,<> %r0,\tmp,1b | ||
343 | nop | ||
344 | b,n 2b | ||
345 | 3: | ||
346 | #endif | ||
347 | .endm | ||
348 | |||
349 | .macro tlb_unlock la,flags,tmp | ||
350 | #ifdef CONFIG_SMP | ||
351 | ldi 1,\tmp | ||
352 | stw \tmp,0(\la) | ||
353 | mtsm \flags | ||
354 | #endif | ||
355 | .endm | ||
356 | |||
357 | /* Clear page using kernel mapping. */ | ||
358 | |||
359 | ENTRY(clear_page_asm) | ||
360 | .proc | ||
361 | .callinfo NO_CALLS | ||
362 | .entry | ||
363 | |||
364 | #ifdef CONFIG_64BIT | ||
365 | |||
366 | /* Unroll the loop. */ | ||
367 | ldi (PAGE_SIZE / 128), %r1 | ||
368 | |||
369 | 1: | ||
370 | std %r0, 0(%r26) | ||
371 | std %r0, 8(%r26) | ||
372 | std %r0, 16(%r26) | ||
373 | std %r0, 24(%r26) | ||
374 | std %r0, 32(%r26) | ||
375 | std %r0, 40(%r26) | ||
376 | std %r0, 48(%r26) | ||
377 | std %r0, 56(%r26) | ||
378 | std %r0, 64(%r26) | ||
379 | std %r0, 72(%r26) | ||
380 | std %r0, 80(%r26) | ||
381 | std %r0, 88(%r26) | ||
382 | std %r0, 96(%r26) | ||
383 | std %r0, 104(%r26) | ||
384 | std %r0, 112(%r26) | ||
385 | std %r0, 120(%r26) | ||
386 | |||
387 | /* Note reverse branch hint for addib is taken. */ | ||
388 | addib,COND(>),n -1, %r1, 1b | ||
389 | ldo 128(%r26), %r26 | ||
390 | |||
391 | #else | ||
392 | |||
393 | /* | ||
394 | * Note that until (if) we start saving the full 64-bit register | ||
395 | * values on interrupt, we can't use std on a 32 bit kernel. | ||
396 | */ | ||
397 | ldi (PAGE_SIZE / 64), %r1 | ||
398 | |||
399 | 1: | ||
400 | stw %r0, 0(%r26) | ||
401 | stw %r0, 4(%r26) | ||
402 | stw %r0, 8(%r26) | ||
403 | stw %r0, 12(%r26) | ||
404 | stw %r0, 16(%r26) | ||
405 | stw %r0, 20(%r26) | ||
406 | stw %r0, 24(%r26) | ||
407 | stw %r0, 28(%r26) | ||
408 | stw %r0, 32(%r26) | ||
409 | stw %r0, 36(%r26) | ||
410 | stw %r0, 40(%r26) | ||
411 | stw %r0, 44(%r26) | ||
412 | stw %r0, 48(%r26) | ||
413 | stw %r0, 52(%r26) | ||
414 | stw %r0, 56(%r26) | ||
415 | stw %r0, 60(%r26) | ||
416 | |||
417 | addib,COND(>),n -1, %r1, 1b | ||
418 | ldo 64(%r26), %r26 | ||
419 | #endif | ||
420 | bv %r0(%r2) | ||
421 | nop | ||
422 | .exit | ||
423 | |||
424 | .procend | ||
425 | ENDPROC(clear_page_asm) | ||
426 | |||
427 | /* Copy page using kernel mapping. */ | ||
428 | |||
429 | ENTRY(copy_page_asm) | ||
281 | .proc | 430 | .proc |
282 | .callinfo NO_CALLS | 431 | .callinfo NO_CALLS |
283 | .entry | 432 | .entry |
@@ -285,18 +434,14 @@ ENTRY(copy_user_page_asm) | |||
285 | #ifdef CONFIG_64BIT | 434 | #ifdef CONFIG_64BIT |
286 | /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. | 435 | /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. |
287 | * Unroll the loop by hand and arrange insn appropriately. | 436 | * Unroll the loop by hand and arrange insn appropriately. |
288 | * GCC probably can do this just as well. | 437 | * Prefetch doesn't improve performance on rp3440. |
438 | * GCC probably can do this just as well... | ||
289 | */ | 439 | */ |
290 | 440 | ||
291 | ldd 0(%r25), %r19 | ||
292 | ldi (PAGE_SIZE / 128), %r1 | 441 | ldi (PAGE_SIZE / 128), %r1 |
293 | 442 | ||
294 | ldw 64(%r25), %r0 /* prefetch 1 cacheline ahead */ | 443 | 1: ldd 0(%r25), %r19 |
295 | ldw 128(%r25), %r0 /* prefetch 2 */ | 444 | ldd 8(%r25), %r20 |
296 | |||
297 | 1: ldd 8(%r25), %r20 | ||
298 | ldw 192(%r25), %r0 /* prefetch 3 */ | ||
299 | ldw 256(%r25), %r0 /* prefetch 4 */ | ||
300 | 445 | ||
301 | ldd 16(%r25), %r21 | 446 | ldd 16(%r25), %r21 |
302 | ldd 24(%r25), %r22 | 447 | ldd 24(%r25), %r22 |
@@ -330,20 +475,16 @@ ENTRY(copy_user_page_asm) | |||
330 | 475 | ||
331 | ldd 112(%r25), %r21 | 476 | ldd 112(%r25), %r21 |
332 | ldd 120(%r25), %r22 | 477 | ldd 120(%r25), %r22 |
478 | ldo 128(%r25), %r25 | ||
333 | std %r19, 96(%r26) | 479 | std %r19, 96(%r26) |
334 | std %r20, 104(%r26) | 480 | std %r20, 104(%r26) |
335 | 481 | ||
336 | ldo 128(%r25), %r25 | ||
337 | std %r21, 112(%r26) | 482 | std %r21, 112(%r26) |
338 | std %r22, 120(%r26) | 483 | std %r22, 120(%r26) |
339 | ldo 128(%r26), %r26 | ||
340 | 484 | ||
341 | /* conditional branches nullify on forward taken branch, and on | 485 | /* Note reverse branch hint for addib is taken. */ |
342 | * non-taken backward branch. Note that .+4 is a backwards branch. | 486 | addib,COND(>),n -1, %r1, 1b |
343 | * The ldd should only get executed if the branch is taken. | 487 | ldo 128(%r26), %r26 |
344 | */ | ||
345 | addib,COND(>),n -1, %r1, 1b /* bundle 10 */ | ||
346 | ldd 0(%r25), %r19 /* start next loads */ | ||
347 | 488 | ||
348 | #else | 489 | #else |
349 | 490 | ||
@@ -399,7 +540,7 @@ ENTRY(copy_user_page_asm) | |||
399 | .exit | 540 | .exit |
400 | 541 | ||
401 | .procend | 542 | .procend |
402 | ENDPROC(copy_user_page_asm) | 543 | ENDPROC(copy_page_asm) |
403 | 544 | ||
404 | /* | 545 | /* |
405 | * NOTE: Code in clear_user_page has a hard coded dependency on the | 546 | * NOTE: Code in clear_user_page has a hard coded dependency on the |
@@ -422,8 +563,6 @@ ENDPROC(copy_user_page_asm) | |||
422 | * %r23 physical page (shifted for tlb insert) of "from" translation | 563 | * %r23 physical page (shifted for tlb insert) of "from" translation |
423 | */ | 564 | */ |
424 | 565 | ||
425 | #if 0 | ||
426 | |||
427 | /* | 566 | /* |
428 | * We can't do this since copy_user_page is used to bring in | 567 | * We can't do this since copy_user_page is used to bring in |
429 | * file data that might have instructions. Since the data would | 568 | * file data that might have instructions. Since the data would |
@@ -435,6 +574,7 @@ ENDPROC(copy_user_page_asm) | |||
435 | * use it if more information is passed into copy_user_page(). | 574 | * use it if more information is passed into copy_user_page(). |
436 | * Have to do some measurements to see if it is worthwhile to | 575 | * Have to do some measurements to see if it is worthwhile to |
437 | * lobby for such a change. | 576 | * lobby for such a change. |
577 | * | ||
438 | */ | 578 | */ |
439 | 579 | ||
440 | ENTRY(copy_user_page_asm) | 580 | ENTRY(copy_user_page_asm) |
@@ -442,16 +582,21 @@ ENTRY(copy_user_page_asm) | |||
442 | .callinfo NO_CALLS | 582 | .callinfo NO_CALLS |
443 | .entry | 583 | .entry |
444 | 584 | ||
585 | /* Convert virtual `to' and `from' addresses to physical addresses. | ||
586 | Move `from' physical address to non shadowed register. */ | ||
445 | ldil L%(__PAGE_OFFSET), %r1 | 587 | ldil L%(__PAGE_OFFSET), %r1 |
446 | sub %r26, %r1, %r26 | 588 | sub %r26, %r1, %r26 |
447 | sub %r25, %r1, %r23 /* move physical addr into non shadowed reg */ | 589 | sub %r25, %r1, %r23 |
448 | 590 | ||
449 | ldil L%(TMPALIAS_MAP_START), %r28 | 591 | ldil L%(TMPALIAS_MAP_START), %r28 |
450 | /* FIXME for different page sizes != 4k */ | 592 | /* FIXME for different page sizes != 4k */ |
451 | #ifdef CONFIG_64BIT | 593 | #ifdef CONFIG_64BIT |
452 | extrd,u %r26,56,32, %r26 /* convert phys addr to tlb insert format */ | 594 | #if (TMPALIAS_MAP_START >= 0x80000000) |
453 | extrd,u %r23,56,32, %r23 /* convert phys addr to tlb insert format */ | 595 | depdi 0, 31,32, %r28 /* clear any sign extension */ |
454 | depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */ | 596 | #endif |
597 | extrd,u %r26,56,32, %r26 /* convert phys addr to tlb insert format */ | ||
598 | extrd,u %r23,56,32, %r23 /* convert phys addr to tlb insert format */ | ||
599 | depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */ | ||
455 | depdi 0, 63,12, %r28 /* Clear any offset bits */ | 600 | depdi 0, 63,12, %r28 /* Clear any offset bits */ |
456 | copy %r28, %r29 | 601 | copy %r28, %r29 |
457 | depdi 1, 41,1, %r29 /* Form aliased virtual address 'from' */ | 602 | depdi 1, 41,1, %r29 /* Form aliased virtual address 'from' */ |
@@ -466,10 +611,76 @@ ENTRY(copy_user_page_asm) | |||
466 | 611 | ||
467 | /* Purge any old translations */ | 612 | /* Purge any old translations */ |
468 | 613 | ||
614 | #ifdef CONFIG_PA20 | ||
615 | pdtlb,l 0(%r28) | ||
616 | pdtlb,l 0(%r29) | ||
617 | #else | ||
618 | tlb_lock %r20,%r21,%r22 | ||
469 | pdtlb 0(%r28) | 619 | pdtlb 0(%r28) |
470 | pdtlb 0(%r29) | 620 | pdtlb 0(%r29) |
621 | tlb_unlock %r20,%r21,%r22 | ||
622 | #endif | ||
623 | |||
624 | #ifdef CONFIG_64BIT | ||
625 | /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. | ||
626 | * Unroll the loop by hand and arrange insn appropriately. | ||
627 | * GCC probably can do this just as well. | ||
628 | */ | ||
471 | 629 | ||
472 | ldi 64, %r1 | 630 | ldd 0(%r29), %r19 |
631 | ldi (PAGE_SIZE / 128), %r1 | ||
632 | |||
633 | 1: ldd 8(%r29), %r20 | ||
634 | |||
635 | ldd 16(%r29), %r21 | ||
636 | ldd 24(%r29), %r22 | ||
637 | std %r19, 0(%r28) | ||
638 | std %r20, 8(%r28) | ||
639 | |||
640 | ldd 32(%r29), %r19 | ||
641 | ldd 40(%r29), %r20 | ||
642 | std %r21, 16(%r28) | ||
643 | std %r22, 24(%r28) | ||
644 | |||
645 | ldd 48(%r29), %r21 | ||
646 | ldd 56(%r29), %r22 | ||
647 | std %r19, 32(%r28) | ||
648 | std %r20, 40(%r28) | ||
649 | |||
650 | ldd 64(%r29), %r19 | ||
651 | ldd 72(%r29), %r20 | ||
652 | std %r21, 48(%r28) | ||
653 | std %r22, 56(%r28) | ||
654 | |||
655 | ldd 80(%r29), %r21 | ||
656 | ldd 88(%r29), %r22 | ||
657 | std %r19, 64(%r28) | ||
658 | std %r20, 72(%r28) | ||
659 | |||
660 | ldd 96(%r29), %r19 | ||
661 | ldd 104(%r29), %r20 | ||
662 | std %r21, 80(%r28) | ||
663 | std %r22, 88(%r28) | ||
664 | |||
665 | ldd 112(%r29), %r21 | ||
666 | ldd 120(%r29), %r22 | ||
667 | std %r19, 96(%r28) | ||
668 | std %r20, 104(%r28) | ||
669 | |||
670 | ldo 128(%r29), %r29 | ||
671 | std %r21, 112(%r28) | ||
672 | std %r22, 120(%r28) | ||
673 | ldo 128(%r28), %r28 | ||
674 | |||
675 | /* conditional branches nullify on forward taken branch, and on | ||
676 | * non-taken backward branch. Note that .+4 is a backwards branch. | ||
677 | * The ldd should only get executed if the branch is taken. | ||
678 | */ | ||
679 | addib,COND(>),n -1, %r1, 1b /* bundle 10 */ | ||
680 | ldd 0(%r29), %r19 /* start next loads */ | ||
681 | |||
682 | #else | ||
683 | ldi (PAGE_SIZE / 64), %r1 | ||
473 | 684 | ||
474 | /* | 685 | /* |
475 | * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw | 686 | * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw |
@@ -480,9 +691,7 @@ ENTRY(copy_user_page_asm) | |||
480 | * use ldd/std on a 32 bit kernel. | 691 | * use ldd/std on a 32 bit kernel. |
481 | */ | 692 | */ |
482 | 693 | ||
483 | 694 | 1: ldw 0(%r29), %r19 | |
484 | 1: | ||
485 | ldw 0(%r29), %r19 | ||
486 | ldw 4(%r29), %r20 | 695 | ldw 4(%r29), %r20 |
487 | ldw 8(%r29), %r21 | 696 | ldw 8(%r29), %r21 |
488 | ldw 12(%r29), %r22 | 697 | ldw 12(%r29), %r22 |
@@ -515,8 +724,10 @@ ENTRY(copy_user_page_asm) | |||
515 | stw %r21, 56(%r28) | 724 | stw %r21, 56(%r28) |
516 | stw %r22, 60(%r28) | 725 | stw %r22, 60(%r28) |
517 | ldo 64(%r28), %r28 | 726 | ldo 64(%r28), %r28 |
727 | |||
518 | addib,COND(>) -1, %r1,1b | 728 | addib,COND(>) -1, %r1,1b |
519 | ldo 64(%r29), %r29 | 729 | ldo 64(%r29), %r29 |
730 | #endif | ||
520 | 731 | ||
521 | bv %r0(%r2) | 732 | bv %r0(%r2) |
522 | nop | 733 | nop |
@@ -524,9 +735,8 @@ ENTRY(copy_user_page_asm) | |||
524 | 735 | ||
525 | .procend | 736 | .procend |
526 | ENDPROC(copy_user_page_asm) | 737 | ENDPROC(copy_user_page_asm) |
527 | #endif | ||
528 | 738 | ||
529 | ENTRY(__clear_user_page_asm) | 739 | ENTRY(clear_user_page_asm) |
530 | .proc | 740 | .proc |
531 | .callinfo NO_CALLS | 741 | .callinfo NO_CALLS |
532 | .entry | 742 | .entry |
@@ -550,7 +760,13 @@ ENTRY(__clear_user_page_asm) | |||
550 | 760 | ||
551 | /* Purge any old translation */ | 761 | /* Purge any old translation */ |
552 | 762 | ||
763 | #ifdef CONFIG_PA20 | ||
764 | pdtlb,l 0(%r28) | ||
765 | #else | ||
766 | tlb_lock %r20,%r21,%r22 | ||
553 | pdtlb 0(%r28) | 767 | pdtlb 0(%r28) |
768 | tlb_unlock %r20,%r21,%r22 | ||
769 | #endif | ||
554 | 770 | ||
555 | #ifdef CONFIG_64BIT | 771 | #ifdef CONFIG_64BIT |
556 | ldi (PAGE_SIZE / 128), %r1 | 772 | ldi (PAGE_SIZE / 128), %r1 |
@@ -580,8 +796,7 @@ ENTRY(__clear_user_page_asm) | |||
580 | #else /* ! CONFIG_64BIT */ | 796 | #else /* ! CONFIG_64BIT */ |
581 | ldi (PAGE_SIZE / 64), %r1 | 797 | ldi (PAGE_SIZE / 64), %r1 |
582 | 798 | ||
583 | 1: | 799 | 1: stw %r0, 0(%r28) |
584 | stw %r0, 0(%r28) | ||
585 | stw %r0, 4(%r28) | 800 | stw %r0, 4(%r28) |
586 | stw %r0, 8(%r28) | 801 | stw %r0, 8(%r28) |
587 | stw %r0, 12(%r28) | 802 | stw %r0, 12(%r28) |
@@ -606,7 +821,7 @@ ENTRY(__clear_user_page_asm) | |||
606 | .exit | 821 | .exit |
607 | 822 | ||
608 | .procend | 823 | .procend |
609 | ENDPROC(__clear_user_page_asm) | 824 | ENDPROC(clear_user_page_asm) |
610 | 825 | ||
611 | ENTRY(flush_dcache_page_asm) | 826 | ENTRY(flush_dcache_page_asm) |
612 | .proc | 827 | .proc |
@@ -630,7 +845,13 @@ ENTRY(flush_dcache_page_asm) | |||
630 | 845 | ||
631 | /* Purge any old translation */ | 846 | /* Purge any old translation */ |
632 | 847 | ||
848 | #ifdef CONFIG_PA20 | ||
849 | pdtlb,l 0(%r28) | ||
850 | #else | ||
851 | tlb_lock %r20,%r21,%r22 | ||
633 | pdtlb 0(%r28) | 852 | pdtlb 0(%r28) |
853 | tlb_unlock %r20,%r21,%r22 | ||
854 | #endif | ||
634 | 855 | ||
635 | ldil L%dcache_stride, %r1 | 856 | ldil L%dcache_stride, %r1 |
636 | ldw R%dcache_stride(%r1), %r1 | 857 | ldw R%dcache_stride(%r1), %r1 |
@@ -663,8 +884,17 @@ ENTRY(flush_dcache_page_asm) | |||
663 | fdc,m %r1(%r28) | 884 | fdc,m %r1(%r28) |
664 | 885 | ||
665 | sync | 886 | sync |
887 | |||
888 | #ifdef CONFIG_PA20 | ||
889 | pdtlb,l 0(%r25) | ||
890 | #else | ||
891 | tlb_lock %r20,%r21,%r22 | ||
892 | pdtlb 0(%r25) | ||
893 | tlb_unlock %r20,%r21,%r22 | ||
894 | #endif | ||
895 | |||
666 | bv %r0(%r2) | 896 | bv %r0(%r2) |
667 | pdtlb (%r25) | 897 | nop |
668 | .exit | 898 | .exit |
669 | 899 | ||
670 | .procend | 900 | .procend |
@@ -692,7 +922,13 @@ ENTRY(flush_icache_page_asm) | |||
692 | 922 | ||
693 | /* Purge any old translation */ | 923 | /* Purge any old translation */ |
694 | 924 | ||
695 | pitlb (%sr4,%r28) | 925 | #ifdef CONFIG_PA20 |
926 | pitlb,l %r0(%sr4,%r28) | ||
927 | #else | ||
928 | tlb_lock %r20,%r21,%r22 | ||
929 | pitlb (%sr4,%r28) | ||
930 | tlb_unlock %r20,%r21,%r22 | ||
931 | #endif | ||
696 | 932 | ||
697 | ldil L%icache_stride, %r1 | 933 | ldil L%icache_stride, %r1 |
698 | ldw R%icache_stride(%r1), %r1 | 934 | ldw R%icache_stride(%r1), %r1 |
@@ -727,8 +963,17 @@ ENTRY(flush_icache_page_asm) | |||
727 | fic,m %r1(%sr4,%r28) | 963 | fic,m %r1(%sr4,%r28) |
728 | 964 | ||
729 | sync | 965 | sync |
966 | |||
967 | #ifdef CONFIG_PA20 | ||
968 | pitlb,l %r0(%sr4,%r25) | ||
969 | #else | ||
970 | tlb_lock %r20,%r21,%r22 | ||
971 | pitlb (%sr4,%r25) | ||
972 | tlb_unlock %r20,%r21,%r22 | ||
973 | #endif | ||
974 | |||
730 | bv %r0(%r2) | 975 | bv %r0(%r2) |
731 | pitlb (%sr4,%r25) | 976 | nop |
732 | .exit | 977 | .exit |
733 | 978 | ||
734 | .procend | 979 | .procend |
@@ -777,7 +1022,7 @@ ENTRY(flush_kernel_dcache_page_asm) | |||
777 | .procend | 1022 | .procend |
778 | ENDPROC(flush_kernel_dcache_page_asm) | 1023 | ENDPROC(flush_kernel_dcache_page_asm) |
779 | 1024 | ||
780 | ENTRY(purge_kernel_dcache_page) | 1025 | ENTRY(purge_kernel_dcache_page_asm) |
781 | .proc | 1026 | .proc |
782 | .callinfo NO_CALLS | 1027 | .callinfo NO_CALLS |
783 | .entry | 1028 | .entry |
@@ -817,7 +1062,7 @@ ENTRY(purge_kernel_dcache_page) | |||
817 | .exit | 1062 | .exit |
818 | 1063 | ||
819 | .procend | 1064 | .procend |
820 | ENDPROC(purge_kernel_dcache_page) | 1065 | ENDPROC(purge_kernel_dcache_page_asm) |
821 | 1066 | ||
822 | ENTRY(flush_user_dcache_range_asm) | 1067 | ENTRY(flush_user_dcache_range_asm) |
823 | .proc | 1068 | .proc |