diff options
author | David S. Miller <davem@davemloft.net> | 2008-03-21 20:01:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-03-21 20:01:38 -0400 |
commit | 64658743fdd40021e3ac91e8ff260ad06578dd23 (patch) | |
tree | da9bd83e54702156d66ae9b6d282d610376feda8 /arch/sparc64/kernel/trampoline.S | |
parent | 4cfea5a7dfcc2766251e50ca30271a782d5004ad (diff) |
[SPARC64]: Remove most limitations to kernel image size.
Currently kernel images are limited to 8MB in size, and this causes
problems especially when enabling features that take up a lot of
kernel image space such as lockdep.
The code now will align the kernel image size up to 4MB and map that
many locked TLB entries. So, the only practical limitation is the
number of available locked TLB entries which is 16 on Cheetah and 64
on pre-Cheetah sparc64 cpus. Niagara cpus don't actually have hw
locked TLB entry support. Rather, the hypervisor transparently
provides support for "locked" TLB entries since it runs with physical
addressing and does the initial TLB miss processing.
Fully utilizing this change requires some help from SILO, a patch for
which will be submitted to the maintainer. Essentially, SILO will
only currently map up to 8MB for the kernel image and that needs to be
increased.
Note that neither this patch nor the SILO bits will help with network
booting. The openfirmware code will only map up to a certain amount
of kernel image during a network boot and there isn't much we can to
about that other than to implemented a layered network booting
facility. Solaris has this, and calls it "wanboot" and we may
implement something similar at some point.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/kernel/trampoline.S')
-rw-r--r-- | arch/sparc64/kernel/trampoline.S | 188 |
1 files changed, 64 insertions, 124 deletions
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 4ae2e525d68b..56ff55211341 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S | |||
@@ -105,7 +105,7 @@ startup_continue: | |||
105 | wr %g2, 0, %tick_cmpr | 105 | wr %g2, 0, %tick_cmpr |
106 | 106 | ||
107 | /* Call OBP by hand to lock KERNBASE into i/d tlbs. | 107 | /* Call OBP by hand to lock KERNBASE into i/d tlbs. |
108 | * We lock 2 consequetive entries if we are 'bigkernel'. | 108 | * We lock 'num_kernel_image_mappings' consequetive entries. |
109 | */ | 109 | */ |
110 | sethi %hi(prom_entry_lock), %g2 | 110 | sethi %hi(prom_entry_lock), %g2 |
111 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 | 111 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
@@ -119,6 +119,29 @@ startup_continue: | |||
119 | add %l2, -(192 + 128), %sp | 119 | add %l2, -(192 + 128), %sp |
120 | flushw | 120 | flushw |
121 | 121 | ||
122 | /* Setup the loop variables: | ||
123 | * %l3: VADDR base | ||
124 | * %l4: TTE base | ||
125 | * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings' | ||
126 | * %l6: Number of TTE entries to map | ||
127 | * %l7: Highest TTE entry number, we count down | ||
128 | */ | ||
129 | sethi %hi(KERNBASE), %l3 | ||
130 | sethi %hi(kern_locked_tte_data), %l4 | ||
131 | ldx [%l4 + %lo(kern_locked_tte_data)], %l4 | ||
132 | clr %l5 | ||
133 | sethi %hi(num_kernel_image_mappings), %l6 | ||
134 | lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 | ||
135 | add %l6, 1, %l6 | ||
136 | |||
137 | mov 15, %l7 | ||
138 | BRANCH_IF_ANY_CHEETAH(g1,g5,2f) | ||
139 | |||
140 | mov 63, %l7 | ||
141 | 2: | ||
142 | |||
143 | 3: | ||
144 | /* Lock into I-MMU */ | ||
122 | sethi %hi(call_method), %g2 | 145 | sethi %hi(call_method), %g2 |
123 | or %g2, %lo(call_method), %g2 | 146 | or %g2, %lo(call_method), %g2 |
124 | stx %g2, [%sp + 2047 + 128 + 0x00] | 147 | stx %g2, [%sp + 2047 + 128 + 0x00] |
@@ -132,63 +155,26 @@ startup_continue: | |||
132 | sethi %hi(prom_mmu_ihandle_cache), %g2 | 155 | sethi %hi(prom_mmu_ihandle_cache), %g2 |
133 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 | 156 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 |
134 | stx %g2, [%sp + 2047 + 128 + 0x20] | 157 | stx %g2, [%sp + 2047 + 128 + 0x20] |
135 | sethi %hi(KERNBASE), %g2 | ||
136 | stx %g2, [%sp + 2047 + 128 + 0x28] | ||
137 | sethi %hi(kern_locked_tte_data), %g2 | ||
138 | ldx [%g2 + %lo(kern_locked_tte_data)], %g2 | ||
139 | stx %g2, [%sp + 2047 + 128 + 0x30] | ||
140 | |||
141 | mov 15, %g2 | ||
142 | BRANCH_IF_ANY_CHEETAH(g1,g5,1f) | ||
143 | 158 | ||
144 | mov 63, %g2 | 159 | /* Each TTE maps 4MB, convert index to offset. */ |
145 | 1: | 160 | sllx %l5, 22, %g1 |
146 | stx %g2, [%sp + 2047 + 128 + 0x38] | ||
147 | sethi %hi(p1275buf), %g2 | ||
148 | or %g2, %lo(p1275buf), %g2 | ||
149 | ldx [%g2 + 0x08], %o1 | ||
150 | call %o1 | ||
151 | add %sp, (2047 + 128), %o0 | ||
152 | 161 | ||
153 | sethi %hi(bigkernel), %g2 | 162 | add %l3, %g1, %g2 |
154 | lduw [%g2 + %lo(bigkernel)], %g2 | 163 | stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR |
155 | brz,pt %g2, do_dtlb | 164 | add %l4, %g1, %g2 |
156 | nop | 165 | stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE |
157 | 166 | ||
158 | sethi %hi(call_method), %g2 | 167 | /* TTE index is highest minus loop index. */ |
159 | or %g2, %lo(call_method), %g2 | 168 | sub %l7, %l5, %g2 |
160 | stx %g2, [%sp + 2047 + 128 + 0x00] | ||
161 | mov 5, %g2 | ||
162 | stx %g2, [%sp + 2047 + 128 + 0x08] | ||
163 | mov 1, %g2 | ||
164 | stx %g2, [%sp + 2047 + 128 + 0x10] | ||
165 | sethi %hi(itlb_load), %g2 | ||
166 | or %g2, %lo(itlb_load), %g2 | ||
167 | stx %g2, [%sp + 2047 + 128 + 0x18] | ||
168 | sethi %hi(prom_mmu_ihandle_cache), %g2 | ||
169 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 | ||
170 | stx %g2, [%sp + 2047 + 128 + 0x20] | ||
171 | sethi %hi(KERNBASE + 0x400000), %g2 | ||
172 | stx %g2, [%sp + 2047 + 128 + 0x28] | ||
173 | sethi %hi(kern_locked_tte_data), %g2 | ||
174 | ldx [%g2 + %lo(kern_locked_tte_data)], %g2 | ||
175 | sethi %hi(0x400000), %g1 | ||
176 | add %g2, %g1, %g2 | ||
177 | stx %g2, [%sp + 2047 + 128 + 0x30] | ||
178 | |||
179 | mov 14, %g2 | ||
180 | BRANCH_IF_ANY_CHEETAH(g1,g5,1f) | ||
181 | |||
182 | mov 62, %g2 | ||
183 | 1: | ||
184 | stx %g2, [%sp + 2047 + 128 + 0x38] | 169 | stx %g2, [%sp + 2047 + 128 + 0x38] |
170 | |||
185 | sethi %hi(p1275buf), %g2 | 171 | sethi %hi(p1275buf), %g2 |
186 | or %g2, %lo(p1275buf), %g2 | 172 | or %g2, %lo(p1275buf), %g2 |
187 | ldx [%g2 + 0x08], %o1 | 173 | ldx [%g2 + 0x08], %o1 |
188 | call %o1 | 174 | call %o1 |
189 | add %sp, (2047 + 128), %o0 | 175 | add %sp, (2047 + 128), %o0 |
190 | 176 | ||
191 | do_dtlb: | 177 | /* Lock into D-MMU */ |
192 | sethi %hi(call_method), %g2 | 178 | sethi %hi(call_method), %g2 |
193 | or %g2, %lo(call_method), %g2 | 179 | or %g2, %lo(call_method), %g2 |
194 | stx %g2, [%sp + 2047 + 128 + 0x00] | 180 | stx %g2, [%sp + 2047 + 128 + 0x00] |
@@ -202,65 +188,30 @@ do_dtlb: | |||
202 | sethi %hi(prom_mmu_ihandle_cache), %g2 | 188 | sethi %hi(prom_mmu_ihandle_cache), %g2 |
203 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 | 189 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 |
204 | stx %g2, [%sp + 2047 + 128 + 0x20] | 190 | stx %g2, [%sp + 2047 + 128 + 0x20] |
205 | sethi %hi(KERNBASE), %g2 | ||
206 | stx %g2, [%sp + 2047 + 128 + 0x28] | ||
207 | sethi %hi(kern_locked_tte_data), %g2 | ||
208 | ldx [%g2 + %lo(kern_locked_tte_data)], %g2 | ||
209 | stx %g2, [%sp + 2047 + 128 + 0x30] | ||
210 | 191 | ||
211 | mov 15, %g2 | 192 | /* Each TTE maps 4MB, convert index to offset. */ |
212 | BRANCH_IF_ANY_CHEETAH(g1,g5,1f) | 193 | sllx %l5, 22, %g1 |
213 | 194 | ||
214 | mov 63, %g2 | 195 | add %l3, %g1, %g2 |
215 | 1: | 196 | stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR |
197 | add %l4, %g1, %g2 | ||
198 | stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE | ||
216 | 199 | ||
200 | /* TTE index is highest minus loop index. */ | ||
201 | sub %l7, %l5, %g2 | ||
217 | stx %g2, [%sp + 2047 + 128 + 0x38] | 202 | stx %g2, [%sp + 2047 + 128 + 0x38] |
203 | |||
218 | sethi %hi(p1275buf), %g2 | 204 | sethi %hi(p1275buf), %g2 |
219 | or %g2, %lo(p1275buf), %g2 | 205 | or %g2, %lo(p1275buf), %g2 |
220 | ldx [%g2 + 0x08], %o1 | 206 | ldx [%g2 + 0x08], %o1 |
221 | call %o1 | 207 | call %o1 |
222 | add %sp, (2047 + 128), %o0 | 208 | add %sp, (2047 + 128), %o0 |
223 | 209 | ||
224 | sethi %hi(bigkernel), %g2 | 210 | add %l5, 1, %l5 |
225 | lduw [%g2 + %lo(bigkernel)], %g2 | 211 | cmp %l5, %l6 |
226 | brz,pt %g2, do_unlock | 212 | bne,pt %xcc, 3b |
227 | nop | 213 | nop |
228 | 214 | ||
229 | sethi %hi(call_method), %g2 | ||
230 | or %g2, %lo(call_method), %g2 | ||
231 | stx %g2, [%sp + 2047 + 128 + 0x00] | ||
232 | mov 5, %g2 | ||
233 | stx %g2, [%sp + 2047 + 128 + 0x08] | ||
234 | mov 1, %g2 | ||
235 | stx %g2, [%sp + 2047 + 128 + 0x10] | ||
236 | sethi %hi(dtlb_load), %g2 | ||
237 | or %g2, %lo(dtlb_load), %g2 | ||
238 | stx %g2, [%sp + 2047 + 128 + 0x18] | ||
239 | sethi %hi(prom_mmu_ihandle_cache), %g2 | ||
240 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 | ||
241 | stx %g2, [%sp + 2047 + 128 + 0x20] | ||
242 | sethi %hi(KERNBASE + 0x400000), %g2 | ||
243 | stx %g2, [%sp + 2047 + 128 + 0x28] | ||
244 | sethi %hi(kern_locked_tte_data), %g2 | ||
245 | ldx [%g2 + %lo(kern_locked_tte_data)], %g2 | ||
246 | sethi %hi(0x400000), %g1 | ||
247 | add %g2, %g1, %g2 | ||
248 | stx %g2, [%sp + 2047 + 128 + 0x30] | ||
249 | |||
250 | mov 14, %g2 | ||
251 | BRANCH_IF_ANY_CHEETAH(g1,g5,1f) | ||
252 | |||
253 | mov 62, %g2 | ||
254 | 1: | ||
255 | |||
256 | stx %g2, [%sp + 2047 + 128 + 0x38] | ||
257 | sethi %hi(p1275buf), %g2 | ||
258 | or %g2, %lo(p1275buf), %g2 | ||
259 | ldx [%g2 + 0x08], %o1 | ||
260 | call %o1 | ||
261 | add %sp, (2047 + 128), %o0 | ||
262 | |||
263 | do_unlock: | ||
264 | sethi %hi(prom_entry_lock), %g2 | 215 | sethi %hi(prom_entry_lock), %g2 |
265 | stb %g0, [%g2 + %lo(prom_entry_lock)] | 216 | stb %g0, [%g2 + %lo(prom_entry_lock)] |
266 | membar #StoreStore | #StoreLoad | 217 | membar #StoreStore | #StoreLoad |
@@ -269,47 +220,36 @@ do_unlock: | |||
269 | nop | 220 | nop |
270 | 221 | ||
271 | niagara_lock_tlb: | 222 | niagara_lock_tlb: |
223 | sethi %hi(KERNBASE), %l3 | ||
224 | sethi %hi(kern_locked_tte_data), %l4 | ||
225 | ldx [%l4 + %lo(kern_locked_tte_data)], %l4 | ||
226 | clr %l5 | ||
227 | sethi %hi(num_kernel_image_mappings), %l6 | ||
228 | lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 | ||
229 | add %l6, 1, %l6 | ||
230 | |||
231 | 1: | ||
272 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 | 232 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 |
273 | sethi %hi(KERNBASE), %o0 | 233 | sllx %l5, 22, %g2 |
234 | add %l3, %g2, %o0 | ||
274 | clr %o1 | 235 | clr %o1 |
275 | sethi %hi(kern_locked_tte_data), %o2 | 236 | add %l4, %g2, %o2 |
276 | ldx [%o2 + %lo(kern_locked_tte_data)], %o2 | ||
277 | mov HV_MMU_IMMU, %o3 | 237 | mov HV_MMU_IMMU, %o3 |
278 | ta HV_FAST_TRAP | 238 | ta HV_FAST_TRAP |
279 | 239 | ||
280 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 | 240 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 |
281 | sethi %hi(KERNBASE), %o0 | 241 | sllx %l5, 22, %g2 |
242 | add %l3, %g2, %o0 | ||
282 | clr %o1 | 243 | clr %o1 |
283 | sethi %hi(kern_locked_tte_data), %o2 | 244 | add %l4, %g2, %o2 |
284 | ldx [%o2 + %lo(kern_locked_tte_data)], %o2 | ||
285 | mov HV_MMU_DMMU, %o3 | 245 | mov HV_MMU_DMMU, %o3 |
286 | ta HV_FAST_TRAP | 246 | ta HV_FAST_TRAP |
287 | 247 | ||
288 | sethi %hi(bigkernel), %g2 | 248 | add %l5, 1, %l5 |
289 | lduw [%g2 + %lo(bigkernel)], %g2 | 249 | cmp %l5, %l6 |
290 | brz,pt %g2, after_lock_tlb | 250 | bne,pt %xcc, 1b |
291 | nop | 251 | nop |
292 | 252 | ||
293 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 | ||
294 | sethi %hi(KERNBASE + 0x400000), %o0 | ||
295 | clr %o1 | ||
296 | sethi %hi(kern_locked_tte_data), %o2 | ||
297 | ldx [%o2 + %lo(kern_locked_tte_data)], %o2 | ||
298 | sethi %hi(0x400000), %o3 | ||
299 | add %o2, %o3, %o2 | ||
300 | mov HV_MMU_IMMU, %o3 | ||
301 | ta HV_FAST_TRAP | ||
302 | |||
303 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 | ||
304 | sethi %hi(KERNBASE + 0x400000), %o0 | ||
305 | clr %o1 | ||
306 | sethi %hi(kern_locked_tte_data), %o2 | ||
307 | ldx [%o2 + %lo(kern_locked_tte_data)], %o2 | ||
308 | sethi %hi(0x400000), %o3 | ||
309 | add %o2, %o3, %o2 | ||
310 | mov HV_MMU_DMMU, %o3 | ||
311 | ta HV_FAST_TRAP | ||
312 | |||
313 | after_lock_tlb: | 253 | after_lock_tlb: |
314 | wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate | 254 | wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate |
315 | wr %g0, 0, %fprs | 255 | wr %g0, 0, %fprs |