aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-03-21 20:01:38 -0400
committerDavid S. Miller <davem@davemloft.net>2008-03-21 20:01:38 -0400
commit64658743fdd40021e3ac91e8ff260ad06578dd23 (patch)
treeda9bd83e54702156d66ae9b6d282d610376feda8 /arch/sparc64/kernel
parent4cfea5a7dfcc2766251e50ca30271a782d5004ad (diff)
[SPARC64]: Remove most limitations to kernel image size.
Currently kernel images are limited to 8MB in size, and this causes problems especially when enabling features that take up a lot of kernel image space such as lockdep. The code now will align the kernel image size up to 4MB and map that many locked TLB entries. So, the only practical limitation is the number of available locked TLB entries which is 16 on Cheetah and 64 on pre-Cheetah sparc64 cpus. Niagara cpus don't actually have hw locked TLB entry support. Rather, the hypervisor transparently provides support for "locked" TLB entries since it runs with physical addressing and does the initial TLB miss processing. Fully utilizing this change requires some help from SILO, a patch for which will be submitted to the maintainer. Essentially, SILO will only currently map up to 8MB for the kernel image and that needs to be increased. Note that neither this patch nor the SILO bits will help with network booting. The openfirmware code will only map up to a certain amount of kernel image during a network boot and there isn't much we can to about that other than to implemented a layered network booting facility. Solaris has this, and calls it "wanboot" and we may implement something similar at some point. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/kernel')
-rw-r--r--arch/sparc64/kernel/head.S8
-rw-r--r--arch/sparc64/kernel/smp.c17
-rw-r--r--arch/sparc64/kernel/trampoline.S188
3 files changed, 79 insertions, 134 deletions
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 44b105c04dd3..34f8ff57c56b 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -288,8 +288,12 @@ sun4v_chip_type:
288 /* Leave arg2 as-is, prom_mmu_ihandle_cache */ 288 /* Leave arg2 as-is, prom_mmu_ihandle_cache */
289 mov -1, %l3 289 mov -1, %l3
290 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: mode (-1 default) 290 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: mode (-1 default)
291 sethi %hi(8 * 1024 * 1024), %l3 291 /* 4MB align the kernel image size. */
292 stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: size (8MB) 292 set (_end - KERNBASE), %l3
293 set ((4 * 1024 * 1024) - 1), %l4
294 add %l3, %l4, %l3
295 andn %l3, %l4, %l3
296 stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: roundup(ksize, 4MB)
293 sethi %hi(KERNBASE), %l3 297 sethi %hi(KERNBASE), %l3
294 stx %l3, [%sp + 2047 + 128 + 0x38] ! arg5: vaddr (KERNBASE) 298 stx %l3, [%sp + 2047 + 128 + 0x38] ! arg5: vaddr (KERNBASE)
295 stx %g0, [%sp + 2047 + 128 + 0x40] ! arg6: empty 299 stx %g0, [%sp + 2047 + 128 + 0x40] ! arg6: empty
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index cc454731d879..5a1126b363a4 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -284,14 +284,17 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
284{ 284{
285 extern unsigned long sparc64_ttable_tl0; 285 extern unsigned long sparc64_ttable_tl0;
286 extern unsigned long kern_locked_tte_data; 286 extern unsigned long kern_locked_tte_data;
287 extern int bigkernel;
288 struct hvtramp_descr *hdesc; 287 struct hvtramp_descr *hdesc;
289 unsigned long trampoline_ra; 288 unsigned long trampoline_ra;
290 struct trap_per_cpu *tb; 289 struct trap_per_cpu *tb;
291 u64 tte_vaddr, tte_data; 290 u64 tte_vaddr, tte_data;
292 unsigned long hv_err; 291 unsigned long hv_err;
292 int i;
293 293
294 hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL); 294 hdesc = kzalloc(sizeof(*hdesc) +
295 (sizeof(struct hvtramp_mapping) *
296 num_kernel_image_mappings - 1),
297 GFP_KERNEL);
295 if (!hdesc) { 298 if (!hdesc) {
296 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " 299 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
297 "hvtramp_descr.\n"); 300 "hvtramp_descr.\n");
@@ -299,7 +302,7 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
299 } 302 }
300 303
301 hdesc->cpu = cpu; 304 hdesc->cpu = cpu;
302 hdesc->num_mappings = (bigkernel ? 2 : 1); 305 hdesc->num_mappings = num_kernel_image_mappings;
303 306
304 tb = &trap_block[cpu]; 307 tb = &trap_block[cpu];
305 tb->hdesc = hdesc; 308 tb->hdesc = hdesc;
@@ -312,13 +315,11 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
312 tte_vaddr = (unsigned long) KERNBASE; 315 tte_vaddr = (unsigned long) KERNBASE;
313 tte_data = kern_locked_tte_data; 316 tte_data = kern_locked_tte_data;
314 317
315 hdesc->maps[0].vaddr = tte_vaddr; 318 for (i = 0; i < hdesc->num_mappings; i++) {
316 hdesc->maps[0].tte = tte_data; 319 hdesc->maps[i].vaddr = tte_vaddr;
317 if (bigkernel) { 320 hdesc->maps[i].tte = tte_data;
318 tte_vaddr += 0x400000; 321 tte_vaddr += 0x400000;
319 tte_data += 0x400000; 322 tte_data += 0x400000;
320 hdesc->maps[1].vaddr = tte_vaddr;
321 hdesc->maps[1].tte = tte_data;
322 } 323 }
323 324
324 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); 325 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 4ae2e525d68b..56ff55211341 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -105,7 +105,7 @@ startup_continue:
105 wr %g2, 0, %tick_cmpr 105 wr %g2, 0, %tick_cmpr
106 106
107 /* Call OBP by hand to lock KERNBASE into i/d tlbs. 107 /* Call OBP by hand to lock KERNBASE into i/d tlbs.
108 * We lock 2 consequetive entries if we are 'bigkernel'. 108 * We lock 'num_kernel_image_mappings' consequetive entries.
109 */ 109 */
110 sethi %hi(prom_entry_lock), %g2 110 sethi %hi(prom_entry_lock), %g2
1111: ldstub [%g2 + %lo(prom_entry_lock)], %g1 1111: ldstub [%g2 + %lo(prom_entry_lock)], %g1
@@ -119,6 +119,29 @@ startup_continue:
119 add %l2, -(192 + 128), %sp 119 add %l2, -(192 + 128), %sp
120 flushw 120 flushw
121 121
122 /* Setup the loop variables:
123 * %l3: VADDR base
124 * %l4: TTE base
125 * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings'
126 * %l6: Number of TTE entries to map
127 * %l7: Highest TTE entry number, we count down
128 */
129 sethi %hi(KERNBASE), %l3
130 sethi %hi(kern_locked_tte_data), %l4
131 ldx [%l4 + %lo(kern_locked_tte_data)], %l4
132 clr %l5
133 sethi %hi(num_kernel_image_mappings), %l6
134 lduw [%l6 + %lo(num_kernel_image_mappings)], %l6
135 add %l6, 1, %l6
136
137 mov 15, %l7
138 BRANCH_IF_ANY_CHEETAH(g1,g5,2f)
139
140 mov 63, %l7
1412:
142
1433:
144 /* Lock into I-MMU */
122 sethi %hi(call_method), %g2 145 sethi %hi(call_method), %g2
123 or %g2, %lo(call_method), %g2 146 or %g2, %lo(call_method), %g2
124 stx %g2, [%sp + 2047 + 128 + 0x00] 147 stx %g2, [%sp + 2047 + 128 + 0x00]
@@ -132,63 +155,26 @@ startup_continue:
132 sethi %hi(prom_mmu_ihandle_cache), %g2 155 sethi %hi(prom_mmu_ihandle_cache), %g2
133 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 156 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2
134 stx %g2, [%sp + 2047 + 128 + 0x20] 157 stx %g2, [%sp + 2047 + 128 + 0x20]
135 sethi %hi(KERNBASE), %g2
136 stx %g2, [%sp + 2047 + 128 + 0x28]
137 sethi %hi(kern_locked_tte_data), %g2
138 ldx [%g2 + %lo(kern_locked_tte_data)], %g2
139 stx %g2, [%sp + 2047 + 128 + 0x30]
140
141 mov 15, %g2
142 BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
143 158
144 mov 63, %g2 159 /* Each TTE maps 4MB, convert index to offset. */
1451: 160 sllx %l5, 22, %g1
146 stx %g2, [%sp + 2047 + 128 + 0x38]
147 sethi %hi(p1275buf), %g2
148 or %g2, %lo(p1275buf), %g2
149 ldx [%g2 + 0x08], %o1
150 call %o1
151 add %sp, (2047 + 128), %o0
152 161
153 sethi %hi(bigkernel), %g2 162 add %l3, %g1, %g2
154 lduw [%g2 + %lo(bigkernel)], %g2 163 stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR
155 brz,pt %g2, do_dtlb 164 add %l4, %g1, %g2
156 nop 165 stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE
157 166
158 sethi %hi(call_method), %g2 167 /* TTE index is highest minus loop index. */
159 or %g2, %lo(call_method), %g2 168 sub %l7, %l5, %g2
160 stx %g2, [%sp + 2047 + 128 + 0x00]
161 mov 5, %g2
162 stx %g2, [%sp + 2047 + 128 + 0x08]
163 mov 1, %g2
164 stx %g2, [%sp + 2047 + 128 + 0x10]
165 sethi %hi(itlb_load), %g2
166 or %g2, %lo(itlb_load), %g2
167 stx %g2, [%sp + 2047 + 128 + 0x18]
168 sethi %hi(prom_mmu_ihandle_cache), %g2
169 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2
170 stx %g2, [%sp + 2047 + 128 + 0x20]
171 sethi %hi(KERNBASE + 0x400000), %g2
172 stx %g2, [%sp + 2047 + 128 + 0x28]
173 sethi %hi(kern_locked_tte_data), %g2
174 ldx [%g2 + %lo(kern_locked_tte_data)], %g2
175 sethi %hi(0x400000), %g1
176 add %g2, %g1, %g2
177 stx %g2, [%sp + 2047 + 128 + 0x30]
178
179 mov 14, %g2
180 BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
181
182 mov 62, %g2
1831:
184 stx %g2, [%sp + 2047 + 128 + 0x38] 169 stx %g2, [%sp + 2047 + 128 + 0x38]
170
185 sethi %hi(p1275buf), %g2 171 sethi %hi(p1275buf), %g2
186 or %g2, %lo(p1275buf), %g2 172 or %g2, %lo(p1275buf), %g2
187 ldx [%g2 + 0x08], %o1 173 ldx [%g2 + 0x08], %o1
188 call %o1 174 call %o1
189 add %sp, (2047 + 128), %o0 175 add %sp, (2047 + 128), %o0
190 176
191do_dtlb: 177 /* Lock into D-MMU */
192 sethi %hi(call_method), %g2 178 sethi %hi(call_method), %g2
193 or %g2, %lo(call_method), %g2 179 or %g2, %lo(call_method), %g2
194 stx %g2, [%sp + 2047 + 128 + 0x00] 180 stx %g2, [%sp + 2047 + 128 + 0x00]
@@ -202,65 +188,30 @@ do_dtlb:
202 sethi %hi(prom_mmu_ihandle_cache), %g2 188 sethi %hi(prom_mmu_ihandle_cache), %g2
203 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 189 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2
204 stx %g2, [%sp + 2047 + 128 + 0x20] 190 stx %g2, [%sp + 2047 + 128 + 0x20]
205 sethi %hi(KERNBASE), %g2
206 stx %g2, [%sp + 2047 + 128 + 0x28]
207 sethi %hi(kern_locked_tte_data), %g2
208 ldx [%g2 + %lo(kern_locked_tte_data)], %g2
209 stx %g2, [%sp + 2047 + 128 + 0x30]
210 191
211 mov 15, %g2 192 /* Each TTE maps 4MB, convert index to offset. */
212 BRANCH_IF_ANY_CHEETAH(g1,g5,1f) 193 sllx %l5, 22, %g1
213 194
214 mov 63, %g2 195 add %l3, %g1, %g2
2151: 196 stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR
197 add %l4, %g1, %g2
198 stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE
216 199
200 /* TTE index is highest minus loop index. */
201 sub %l7, %l5, %g2
217 stx %g2, [%sp + 2047 + 128 + 0x38] 202 stx %g2, [%sp + 2047 + 128 + 0x38]
203
218 sethi %hi(p1275buf), %g2 204 sethi %hi(p1275buf), %g2
219 or %g2, %lo(p1275buf), %g2 205 or %g2, %lo(p1275buf), %g2
220 ldx [%g2 + 0x08], %o1 206 ldx [%g2 + 0x08], %o1
221 call %o1 207 call %o1
222 add %sp, (2047 + 128), %o0 208 add %sp, (2047 + 128), %o0
223 209
224 sethi %hi(bigkernel), %g2 210 add %l5, 1, %l5
225 lduw [%g2 + %lo(bigkernel)], %g2 211 cmp %l5, %l6
226 brz,pt %g2, do_unlock 212 bne,pt %xcc, 3b
227 nop 213 nop
228 214
229 sethi %hi(call_method), %g2
230 or %g2, %lo(call_method), %g2
231 stx %g2, [%sp + 2047 + 128 + 0x00]
232 mov 5, %g2
233 stx %g2, [%sp + 2047 + 128 + 0x08]
234 mov 1, %g2
235 stx %g2, [%sp + 2047 + 128 + 0x10]
236 sethi %hi(dtlb_load), %g2
237 or %g2, %lo(dtlb_load), %g2
238 stx %g2, [%sp + 2047 + 128 + 0x18]
239 sethi %hi(prom_mmu_ihandle_cache), %g2
240 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2
241 stx %g2, [%sp + 2047 + 128 + 0x20]
242 sethi %hi(KERNBASE + 0x400000), %g2
243 stx %g2, [%sp + 2047 + 128 + 0x28]
244 sethi %hi(kern_locked_tte_data), %g2
245 ldx [%g2 + %lo(kern_locked_tte_data)], %g2
246 sethi %hi(0x400000), %g1
247 add %g2, %g1, %g2
248 stx %g2, [%sp + 2047 + 128 + 0x30]
249
250 mov 14, %g2
251 BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
252
253 mov 62, %g2
2541:
255
256 stx %g2, [%sp + 2047 + 128 + 0x38]
257 sethi %hi(p1275buf), %g2
258 or %g2, %lo(p1275buf), %g2
259 ldx [%g2 + 0x08], %o1
260 call %o1
261 add %sp, (2047 + 128), %o0
262
263do_unlock:
264 sethi %hi(prom_entry_lock), %g2 215 sethi %hi(prom_entry_lock), %g2
265 stb %g0, [%g2 + %lo(prom_entry_lock)] 216 stb %g0, [%g2 + %lo(prom_entry_lock)]
266 membar #StoreStore | #StoreLoad 217 membar #StoreStore | #StoreLoad
@@ -269,47 +220,36 @@ do_unlock:
269 nop 220 nop
270 221
271niagara_lock_tlb: 222niagara_lock_tlb:
223 sethi %hi(KERNBASE), %l3
224 sethi %hi(kern_locked_tte_data), %l4
225 ldx [%l4 + %lo(kern_locked_tte_data)], %l4
226 clr %l5
227 sethi %hi(num_kernel_image_mappings), %l6
228 lduw [%l6 + %lo(num_kernel_image_mappings)], %l6
229 add %l6, 1, %l6
230
2311:
272 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 232 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
273 sethi %hi(KERNBASE), %o0 233 sllx %l5, 22, %g2
234 add %l3, %g2, %o0
274 clr %o1 235 clr %o1
275 sethi %hi(kern_locked_tte_data), %o2 236 add %l4, %g2, %o2
276 ldx [%o2 + %lo(kern_locked_tte_data)], %o2
277 mov HV_MMU_IMMU, %o3 237 mov HV_MMU_IMMU, %o3
278 ta HV_FAST_TRAP 238 ta HV_FAST_TRAP
279 239
280 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 240 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
281 sethi %hi(KERNBASE), %o0 241 sllx %l5, 22, %g2
242 add %l3, %g2, %o0
282 clr %o1 243 clr %o1
283 sethi %hi(kern_locked_tte_data), %o2 244 add %l4, %g2, %o2
284 ldx [%o2 + %lo(kern_locked_tte_data)], %o2
285 mov HV_MMU_DMMU, %o3 245 mov HV_MMU_DMMU, %o3
286 ta HV_FAST_TRAP 246 ta HV_FAST_TRAP
287 247
288 sethi %hi(bigkernel), %g2 248 add %l5, 1, %l5
289 lduw [%g2 + %lo(bigkernel)], %g2 249 cmp %l5, %l6
290 brz,pt %g2, after_lock_tlb 250 bne,pt %xcc, 1b
291 nop 251 nop
292 252
293 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
294 sethi %hi(KERNBASE + 0x400000), %o0
295 clr %o1
296 sethi %hi(kern_locked_tte_data), %o2
297 ldx [%o2 + %lo(kern_locked_tte_data)], %o2
298 sethi %hi(0x400000), %o3
299 add %o2, %o3, %o2
300 mov HV_MMU_IMMU, %o3
301 ta HV_FAST_TRAP
302
303 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
304 sethi %hi(KERNBASE + 0x400000), %o0
305 clr %o1
306 sethi %hi(kern_locked_tte_data), %o2
307 ldx [%o2 + %lo(kern_locked_tte_data)], %o2
308 sethi %hi(0x400000), %o3
309 add %o2, %o3, %o2
310 mov HV_MMU_DMMU, %o3
311 ta HV_FAST_TRAP
312
313after_lock_tlb: 253after_lock_tlb:
314 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate 254 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
315 wr %g0, 0, %fprs 255 wr %g0, 0, %fprs