aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-01-31 21:29:18 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:11:13 -0500
commit74bf4312fff083ab25c3f357cc653ada7995e5f6 (patch)
treec23dea461e32485f4cd7ca4b8c33c632655eb906 /include
parent30d4d1ffed7098afe2641536d67eef150499da02 (diff)
[SPARC64]: Move away from virtual page tables, part 1.
We now use the TSB hardware assist features of the UltraSPARC MMUs. SMP is currently knowingly broken, we need to find another place to store the per-cpu base pointers. We hid them away in the TSB base register, and that obviously will not work any more :-) Another known broken case is non-8KB base page size. Also noticed that flush_tlb_all() is not referenced anywhere, only the internal __flush_tlb_all() (local cpu only) is used by the sparc64 port, so we can get rid of flush_tlb_all(). The kernel gets it's own 8KB TSB (swapper_tsb) and each address space gets it's own private 8K TSB. Later we can add code to dynamically increase the size of per-process TSB as the RSS grows. An 8KB TSB is good enough for up to about a 4MB RSS, after which the TSB starts to incur many capacity and conflict misses. We even accumulate OBP translations into the kernel TSB. Another area for refinement is large page size support. We could use a secondary address space TSB to handle those. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/asm-sparc64/mmu.h1
-rw-r--r--include/asm-sparc64/mmu_context.h46
-rw-r--r--include/asm-sparc64/pgalloc.h1
-rw-r--r--include/asm-sparc64/pgtable.h9
-rw-r--r--include/asm-sparc64/processor.h14
-rw-r--r--include/asm-sparc64/tlbflush.h25
-rw-r--r--include/asm-sparc64/tsb.h165
7 files changed, 204 insertions, 57 deletions
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 8627eed6e83d..36384cf7faa6 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -92,6 +92,7 @@
92 92
93typedef struct { 93typedef struct {
94 unsigned long sparc64_ctx_val; 94 unsigned long sparc64_ctx_val;
95 unsigned long *sparc64_tsb;
95} mm_context_t; 96} mm_context_t;
96 97
97#endif /* !__ASSEMBLY__ */ 98#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 57ee7b306189..34640a370ab4 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -25,7 +25,13 @@ extern void get_new_mmu_context(struct mm_struct *mm);
25 * This just needs to set mm->context to an invalid context. 25 * This just needs to set mm->context to an invalid context.
26 */ 26 */
27#define init_new_context(__tsk, __mm) \ 27#define init_new_context(__tsk, __mm) \
28 (((__mm)->context.sparc64_ctx_val = 0UL), 0) 28({ unsigned long __pg = get_zeroed_page(GFP_KERNEL); \
29 (__mm)->context.sparc64_ctx_val = 0UL; \
30 (__mm)->context.sparc64_tsb = \
31 (unsigned long *) __pg; \
32 (__pg ? 0 : -ENOMEM); \
33})
34
29 35
30/* Destroy a dead context. This occurs when mmput drops the 36/* Destroy a dead context. This occurs when mmput drops the
31 * mm_users count to zero, the mmaps have been released, and 37 * mm_users count to zero, the mmaps have been released, and
@@ -35,7 +41,8 @@ extern void get_new_mmu_context(struct mm_struct *mm);
35 * this task if valid. 41 * this task if valid.
36 */ 42 */
37#define destroy_context(__mm) \ 43#define destroy_context(__mm) \
38do { spin_lock(&ctx_alloc_lock); \ 44do { free_page((unsigned long)(__mm)->context.sparc64_tsb); \
45 spin_lock(&ctx_alloc_lock); \
39 if (CTX_VALID((__mm)->context)) { \ 46 if (CTX_VALID((__mm)->context)) { \
40 unsigned long nr = CTX_NRBITS((__mm)->context); \ 47 unsigned long nr = CTX_NRBITS((__mm)->context); \
41 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \ 48 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
@@ -43,35 +50,7 @@ do { spin_lock(&ctx_alloc_lock); \
43 spin_unlock(&ctx_alloc_lock); \ 50 spin_unlock(&ctx_alloc_lock); \
44} while(0) 51} while(0)
45 52
46/* Reload the two core values used by TLB miss handler 53extern unsigned long tsb_context_switch(unsigned long pgd_pa, unsigned long *tsb);
47 * processing on sparc64. They are:
48 * 1) The physical address of mm->pgd, when full page
49 * table walks are necessary, this is where the
50 * search begins.
51 * 2) A "PGD cache". For 32-bit tasks only pgd[0] is
52 * ever used since that maps the entire low 4GB
53 * completely. To speed up TLB miss processing we
54 * make this value available to the handlers. This
55 * decreases the amount of memory traffic incurred.
56 */
57#define reload_tlbmiss_state(__tsk, __mm) \
58do { \
59 register unsigned long paddr asm("o5"); \
60 register unsigned long pgd_cache asm("o4"); \
61 paddr = __pa((__mm)->pgd); \
62 pgd_cache = 0UL; \
63 if (task_thread_info(__tsk)->flags & _TIF_32BIT) \
64 pgd_cache = get_pgd_cache((__mm)->pgd); \
65 __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \
66 "mov %3, %%g4\n\t" \
67 "mov %0, %%g7\n\t" \
68 "stxa %1, [%%g4] %2\n\t" \
69 "membar #Sync\n\t" \
70 "wrpr %%g0, 0x096, %%pstate" \
71 : /* no outputs */ \
72 : "r" (paddr), "r" (pgd_cache),\
73 "i" (ASI_DMMU), "i" (TSB_REG)); \
74} while(0)
75 54
76/* Set MMU context in the actual hardware. */ 55/* Set MMU context in the actual hardware. */
77#define load_secondary_context(__mm) \ 56#define load_secondary_context(__mm) \
@@ -101,7 +80,8 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
101 80
102 if (!ctx_valid || (old_mm != mm)) { 81 if (!ctx_valid || (old_mm != mm)) {
103 load_secondary_context(mm); 82 load_secondary_context(mm);
104 reload_tlbmiss_state(tsk, mm); 83 tsb_context_switch(__pa(mm->pgd),
84 mm->context.sparc64_tsb);
105 } 85 }
106 86
107 /* Even if (mm == old_mm) we _must_ check 87 /* Even if (mm == old_mm) we _must_ check
@@ -139,7 +119,7 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm
139 119
140 load_secondary_context(mm); 120 load_secondary_context(mm);
141 __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); 121 __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
142 reload_tlbmiss_state(current, mm); 122 tsb_context_switch(__pa(mm->pgd), mm->context.sparc64_tsb);
143} 123}
144 124
145#endif /* !(__ASSEMBLY__) */ 125#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index a96067cca963..baf59c00ea47 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -61,6 +61,7 @@ static __inline__ void free_pgd_slow(pgd_t *pgd)
61 free_page((unsigned long)pgd); 61 free_page((unsigned long)pgd);
62} 62}
63 63
64/* XXX This crap can die, no longer using virtual page tables... */
64#ifdef DCACHE_ALIASING_POSSIBLE 65#ifdef DCACHE_ALIASING_POSSIBLE
65#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL) 66#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL)
66#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL) 67#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index f0a9b44d3eb5..f3ba1e058195 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -25,7 +25,8 @@
25#include <asm/const.h> 25#include <asm/const.h>
26 26
27/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB). 27/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
28 * The page copy blockops can use 0x2000000 to 0x10000000. 28 * The page copy blockops can use 0x2000000 to 0x4000000.
29 * The TSB is mapped in the 0x4000000 to 0x6000000 range.
29 * The PROM resides in an area spanning 0xf0000000 to 0x100000000. 30 * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
30 * The vmalloc area spans 0x100000000 to 0x200000000. 31 * The vmalloc area spans 0x100000000 to 0x200000000.
31 * Since modules need to be in the lowest 32-bits of the address space, 32 * Since modules need to be in the lowest 32-bits of the address space,
@@ -34,6 +35,7 @@
34 * 0x400000000. 35 * 0x400000000.
35 */ 36 */
36#define TLBTEMP_BASE _AC(0x0000000002000000,UL) 37#define TLBTEMP_BASE _AC(0x0000000002000000,UL)
38#define TSBMAP_BASE _AC(0x0000000004000000,UL)
37#define MODULES_VADDR _AC(0x0000000010000000,UL) 39#define MODULES_VADDR _AC(0x0000000010000000,UL)
38#define MODULES_LEN _AC(0x00000000e0000000,UL) 40#define MODULES_LEN _AC(0x00000000e0000000,UL)
39#define MODULES_END _AC(0x00000000f0000000,UL) 41#define MODULES_END _AC(0x00000000f0000000,UL)
@@ -296,11 +298,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
296/* to find an entry in a kernel page-table-directory */ 298/* to find an entry in a kernel page-table-directory */
297#define pgd_offset_k(address) pgd_offset(&init_mm, address) 299#define pgd_offset_k(address) pgd_offset(&init_mm, address)
298 300
299/* extract the pgd cache used for optimizing the tlb miss
300 * slow path when executing 32-bit compat processes
301 */
302#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11)
303
304/* Find an entry in the second-level page table.. */ 301/* Find an entry in the second-level page table.. */
305#define pmd_offset(pudp, address) \ 302#define pmd_offset(pudp, address) \
306 ((pmd_t *) pud_page(*(pudp)) + \ 303 ((pmd_t *) pud_page(*(pudp)) + \
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index cd8d9b4c8658..b3889f3f943a 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -28,6 +28,8 @@
28 * User lives in his very own context, and cannot reference us. Note 28 * User lives in his very own context, and cannot reference us. Note
29 * that TASK_SIZE is a misnomer, it really gives maximum user virtual 29 * that TASK_SIZE is a misnomer, it really gives maximum user virtual
30 * address that the kernel will allocate out. 30 * address that the kernel will allocate out.
31 *
32 * XXX No longer using virtual page tables, kill this upper limit...
31 */ 33 */
32#define VA_BITS 44 34#define VA_BITS 44
33#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
@@ -37,18 +39,6 @@
37#endif 39#endif
38#define TASK_SIZE ((unsigned long)-VPTE_SIZE) 40#define TASK_SIZE ((unsigned long)-VPTE_SIZE)
39 41
40/*
41 * The vpte base must be able to hold the entire vpte, half
42 * of which lives above, and half below, the base. And it
43 * is placed as close to the highest address range as possible.
44 */
45#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2))
46#if 1
47#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE
48#else
49#define VPTE_BASE_CHEETAH 0xffe0000000000000
50#endif
51
52#ifndef __ASSEMBLY__ 42#ifndef __ASSEMBLY__
53 43
54typedef struct { 44typedef struct {
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3ef9909ac3ac..9ad5d9c51d42 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -5,6 +5,11 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <asm/mmu_context.h> 6#include <asm/mmu_context.h>
7 7
8/* TSB flush operations. */
9struct mmu_gather;
10extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
11extern void flush_tsb_user(struct mmu_gather *mp);
12
8/* TLB flush operations. */ 13/* TLB flush operations. */
9 14
10extern void flush_tlb_pending(void); 15extern void flush_tlb_pending(void);
@@ -14,28 +19,36 @@ extern void flush_tlb_pending(void);
14#define flush_tlb_page(vma,addr) flush_tlb_pending() 19#define flush_tlb_page(vma,addr) flush_tlb_pending()
15#define flush_tlb_mm(mm) flush_tlb_pending() 20#define flush_tlb_mm(mm) flush_tlb_pending()
16 21
22/* Local cpu only. */
17extern void __flush_tlb_all(void); 23extern void __flush_tlb_all(void);
24
18extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r); 25extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
19 26
20extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); 27extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
21 28
22#ifndef CONFIG_SMP 29#ifndef CONFIG_SMP
23 30
24#define flush_tlb_all() __flush_tlb_all()
25#define flush_tlb_kernel_range(start,end) \ 31#define flush_tlb_kernel_range(start,end) \
26 __flush_tlb_kernel_range(start,end) 32do { flush_tsb_kernel_range(start,end); \
33 __flush_tlb_kernel_range(start,end); \
34} while (0)
27 35
28#else /* CONFIG_SMP */ 36#else /* CONFIG_SMP */
29 37
30extern void smp_flush_tlb_all(void);
31extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); 38extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
32 39
33#define flush_tlb_all() smp_flush_tlb_all()
34#define flush_tlb_kernel_range(start, end) \ 40#define flush_tlb_kernel_range(start, end) \
35 smp_flush_tlb_kernel_range(start, end) 41do { flush_tsb_kernel_range(start,end); \
42 smp_flush_tlb_kernel_range(start, end); \
43} while (0)
36 44
37#endif /* ! CONFIG_SMP */ 45#endif /* ! CONFIG_SMP */
38 46
39extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long); 47static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
48{
49 /* We don't use virtual page tables for TLB miss processing
50 * any more. Nowadays we use the TSB.
51 */
52}
40 53
41#endif /* _SPARC64_TLBFLUSH_H */ 54#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
new file mode 100644
index 000000000000..03d272e0e477
--- /dev/null
+++ b/include/asm-sparc64/tsb.h
@@ -0,0 +1,165 @@
1#ifndef _SPARC64_TSB_H
2#define _SPARC64_TSB_H
3
4/* The sparc64 TSB is similar to the powerpc hashtables. It's a
5 * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes
6 * pointers into this table for 8K and 64K page sizes, and also a
7 * comparison TAG based upon the virtual address and context which
8 * faults.
9 *
10 * TLB miss trap handler software does the actual lookup via something
11 * of the form:
12 *
13 * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
14 * ldxa [%g0] ASI_{D,I}MMU, %g6
15 * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
16 * cmp %g4, %g6
17 * bne,pn %xcc, tsb_miss_{d,i}tlb
18 * mov FAULT_CODE_{D,I}TLB, %g3
19 * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN
20 * retry
21 *
22
23 * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
24 * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu
25 * register which is:
26 *
27 * -------------------------------------------------
28 * | - | CONTEXT | - | VADDR bits 63:22 |
29 * -------------------------------------------------
30 * 63 61 60 48 47 42 41 0
31 *
32 * Like the powerpc hashtables we need to use locking in order to
33 * synchronize while we update the entries. PTE updates need locking
34 * as well.
35 *
36 * We need to carefully choose a lock bits for the TSB entry. We
37 * choose to use bit 47 in the tag. Also, since we never map anything
38 * at page zero in context zero, we use zero as an invalid tag entry.
39 * When the lock bit is set, this forces a tag comparison failure.
40 *
41 * Currently, we allocate an 8K TSB per-process and we use it for both
42 * I-TLB and D-TLB misses. Perhaps at some point we'll add code that
43 * monitors the number of active pages in the process as we get
44 * major/minor faults, and grow the TSB in response. The only trick
45 * in implementing that is synchronizing the freeing of the old TSB
46 * wrt. parallel TSB updates occuring on other processors. On
47 * possible solution is to use RCU for the freeing of the TSB.
48 */
49
50#define TSB_TAG_LOCK (1 << (47 - 32))
51
52#define TSB_MEMBAR membar #StoreStore
53
54#define TSB_LOCK_TAG(TSB, REG1, REG2) \
5599: lduwa [TSB] ASI_N, REG1; \
56 sethi %hi(TSB_TAG_LOCK), REG2;\
57 andcc REG1, REG2, %g0; \
58 bne,pn %icc, 99b; \
59 nop; \
60 casa [TSB] ASI_N, REG1, REG2;\
61 cmp REG1, REG2; \
62 bne,pn %icc, 99b; \
63 nop; \
64 TSB_MEMBAR
65
66#define TSB_WRITE(TSB, TTE, TAG) \
67 stx TTE, [TSB + 0x08]; \
68 TSB_MEMBAR; \
69 stx TAG, [TSB + 0x00];
70
71 /* Do a kernel page table walk. Leaves physical PTE pointer in
72 * REG1. Jumps to FAIL_LABEL on early page table walk termination.
73 * VADDR will not be clobbered, but REG2 will.
74 */
75#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
76 sethi %hi(swapper_pg_dir), REG1; \
77 or REG1, %lo(swapper_pg_dir), REG1; \
78 sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
79 srlx REG2, 64 - PAGE_SHIFT, REG2; \
80 andn REG2, 0x3, REG2; \
81 lduw [REG1 + REG2], REG1; \
82 brz,pn REG1, FAIL_LABEL; \
83 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
84 srlx REG2, 64 - PAGE_SHIFT, REG2; \
85 sllx REG1, 11, REG1; \
86 andn REG2, 0x3, REG2; \
87 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
88 brz,pn REG1, FAIL_LABEL; \
89 sllx VADDR, 64 - PMD_SHIFT, REG2; \
90 srlx REG2, 64 - PAGE_SHIFT, REG2; \
91 sllx REG1, 11, REG1; \
92 andn REG2, 0x7, REG2; \
93 add REG1, REG2, REG1;
94
95 /* Do a user page table walk in MMU globals. Leaves physical PTE
96 * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
97 * termination. Physical base of page tables is in PHYS_PGD which
98 * will not be modified.
99 *
100 * VADDR will not be clobbered, but REG1 and REG2 will.
101 */
102#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
103 sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
104 srlx REG2, 64 - PAGE_SHIFT, REG2; \
105 andn REG2, 0x3, REG2; \
106 lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
107 brz,pn REG1, FAIL_LABEL; \
108 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
109 srlx REG2, 64 - PAGE_SHIFT, REG2; \
110 sllx REG1, 11, REG1; \
111 andn REG2, 0x3, REG2; \
112 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
113 brz,pn REG1, FAIL_LABEL; \
114 sllx VADDR, 64 - PMD_SHIFT, REG2; \
115 srlx REG2, 64 - PAGE_SHIFT, REG2; \
116 sllx REG1, 11, REG1; \
117 andn REG2, 0x7, REG2; \
118 add REG1, REG2, REG1;
119
120/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
121 * If no entry is found, FAIL_LABEL will be branched to. On success
122 * the resulting PTE value will be left in REG1. VADDR is preserved
123 * by this routine.
124 */
125#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
126 sethi %hi(prom_trans), REG1; \
127 or REG1, %lo(prom_trans), REG1; \
12897: ldx [REG1 + 0x00], REG2; \
129 brz,pn REG2, FAIL_LABEL; \
130 nop; \
131 ldx [REG1 + 0x08], REG3; \
132 add REG2, REG3, REG3; \
133 cmp REG2, VADDR; \
134 bgu,pt %xcc, 98f; \
135 cmp VADDR, REG3; \
136 bgeu,pt %xcc, 98f; \
137 ldx [REG1 + 0x10], REG3; \
138 sub VADDR, REG2, REG2; \
139 ba,pt %xcc, 99f; \
140 add REG3, REG2, REG1; \
14198: ba,pt %xcc, 97b; \
142 add REG1, (3 * 8), REG1; \
14399:
144
145 /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
146 * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
147 * and the found TTE will be left in REG1. REG3 and REG4 must
148 * be an even/odd pair of registers.
149 *
150 * VADDR and TAG will be preserved and not clobbered by this macro.
151 */
152 /* XXX non-8K base page size support... */
153#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
154 sethi %hi(swapper_tsb), REG1; \
155 or REG1, %lo(swapper_tsb), REG1; \
156 srlx VADDR, 13, REG2; \
157 and REG2, (512 - 1), REG2; \
158 sllx REG2, 4, REG2; \
159 add REG1, REG2, REG2; \
160 ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \
161 cmp REG3, TAG; \
162 be,a,pt %xcc, OK_LABEL; \
163 mov REG4, REG1;
164
165#endif /* !(_SPARC64_TSB_H) */