diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2007-05-26 04:14:43 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-05-29 05:49:49 -0400 |
commit | 22adb358e816ce6aa0afb231ae9d826b0bddc8b0 (patch) | |
tree | 6f9886bf5b4e5c916c72d8d5733211813873c5fc | |
parent | 5cbc30737398b49f62ae8603129ce43ac7db1a41 (diff) |
[SPARC64]: Eliminate NR_CPUS limitations.
Cheetah systems can have cpuids as large as 1023, although physical
systems don't have that many cpus.
Only three limitations existed in the kernel preventing arbitrary
NR_CPUS values:
1) dcache dirty cpu state stored in page->flags on
D-cache aliasing platforms. With some build time
calculations and some build-time BUG checks on
page->flags layout, this one was easily solved.
2) The cheetah XCALL delivery code could only handle
a cpumask with up to 32 cpus set. Some simple looping
logic clears that up too.
3) thread_info->cpu was a u8, easily changed to a u16.
There are a few spots in the kernel that still put NR_CPUS
sized arrays on the kernel stack, but that's not a sparc64
specific problem.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc64/Kconfig | 6 | ||||
-rw-r--r-- | arch/sparc64/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/sparc64/kernel/smp.c | 19 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 22 | ||||
-rw-r--r-- | include/asm-sparc64/cpudata.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/thread_info.h | 8 |
6 files changed, 43 insertions, 16 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 831781cab271..bd00f89eed1e 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig | |||
@@ -147,10 +147,10 @@ config SMP | |||
147 | If you don't know what to do here, say N. | 147 | If you don't know what to do here, say N. |
148 | 148 | ||
149 | config NR_CPUS | 149 | config NR_CPUS |
150 | int "Maximum number of CPUs (2-64)" | 150 | int "Maximum number of CPUs (2-1024)" |
151 | range 2 64 | 151 | range 2 1024 |
152 | depends on SMP | 152 | depends on SMP |
153 | default "32" | 153 | default "64" |
154 | 154 | ||
155 | source "drivers/cpufreq/Kconfig" | 155 | source "drivers/cpufreq/Kconfig" |
156 | 156 | ||
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index baea10a98196..5c11529742d4 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S | |||
@@ -523,7 +523,7 @@ tlb_fixup_done: | |||
523 | #else | 523 | #else |
524 | mov 0, %o0 | 524 | mov 0, %o0 |
525 | #endif | 525 | #endif |
526 | stb %o0, [%g6 + TI_CPU] | 526 | sth %o0, [%g6 + TI_CPU] |
527 | 527 | ||
528 | /* Off we go.... */ | 528 | /* Off we go.... */ |
529 | call start_kernel | 529 | call start_kernel |
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index f7fa873c800d..c550bba3490a 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c | |||
400 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 400 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) |
401 | { | 401 | { |
402 | u64 pstate, ver; | 402 | u64 pstate, ver; |
403 | int nack_busy_id, is_jbus; | 403 | int nack_busy_id, is_jbus, need_more; |
404 | 404 | ||
405 | if (cpus_empty(mask)) | 405 | if (cpus_empty(mask)) |
406 | return; | 406 | return; |
@@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas | |||
416 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); | 416 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); |
417 | 417 | ||
418 | retry: | 418 | retry: |
419 | need_more = 0; | ||
419 | __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" | 420 | __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" |
420 | : : "r" (pstate), "i" (PSTATE_IE)); | 421 | : : "r" (pstate), "i" (PSTATE_IE)); |
421 | 422 | ||
@@ -444,6 +445,10 @@ retry: | |||
444 | : /* no outputs */ | 445 | : /* no outputs */ |
445 | : "r" (target), "i" (ASI_INTR_W)); | 446 | : "r" (target), "i" (ASI_INTR_W)); |
446 | nack_busy_id++; | 447 | nack_busy_id++; |
448 | if (nack_busy_id == 32) { | ||
449 | need_more = 1; | ||
450 | break; | ||
451 | } | ||
447 | } | 452 | } |
448 | } | 453 | } |
449 | 454 | ||
@@ -460,6 +465,16 @@ retry: | |||
460 | if (dispatch_stat == 0UL) { | 465 | if (dispatch_stat == 0UL) { |
461 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" | 466 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" |
462 | : : "r" (pstate)); | 467 | : : "r" (pstate)); |
468 | if (unlikely(need_more)) { | ||
469 | int i, cnt = 0; | ||
470 | for_each_cpu_mask(i, mask) { | ||
471 | cpu_clear(i, mask); | ||
472 | cnt++; | ||
473 | if (cnt == 32) | ||
474 | break; | ||
475 | } | ||
476 | goto retry; | ||
477 | } | ||
463 | return; | 478 | return; |
464 | } | 479 | } |
465 | if (!--stuck) | 480 | if (!--stuck) |
@@ -497,6 +512,8 @@ retry: | |||
497 | if ((dispatch_stat & check_mask) == 0) | 512 | if ((dispatch_stat & check_mask) == 0) |
498 | cpu_clear(i, mask); | 513 | cpu_clear(i, mask); |
499 | this_busy_nack += 2; | 514 | this_busy_nack += 2; |
515 | if (this_busy_nack == 64) | ||
516 | break; | ||
500 | } | 517 | } |
501 | 518 | ||
502 | goto retry; | 519 | goto retry; |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 977698269d3a..087cbf09d0b7 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page) | |||
191 | } | 191 | } |
192 | 192 | ||
193 | #define PG_dcache_dirty PG_arch_1 | 193 | #define PG_dcache_dirty PG_arch_1 |
194 | #define PG_dcache_cpu_shift 24UL | 194 | #define PG_dcache_cpu_shift 32UL |
195 | #define PG_dcache_cpu_mask (256UL - 1UL) | 195 | #define PG_dcache_cpu_mask \ |
196 | 196 | ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL) | |
197 | #if NR_CPUS > 256 | ||
198 | #error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus | ||
199 | #endif | ||
200 | 197 | ||
201 | #define dcache_dirty_cpu(page) \ | 198 | #define dcache_dirty_cpu(page) \ |
202 | (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) | 199 | (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) |
@@ -1349,6 +1346,19 @@ void __init paging_init(void) | |||
1349 | unsigned long end_pfn, pages_avail, shift, phys_base; | 1346 | unsigned long end_pfn, pages_avail, shift, phys_base; |
1350 | unsigned long real_end, i; | 1347 | unsigned long real_end, i; |
1351 | 1348 | ||
1349 | /* These build time checkes make sure that the dcache_dirty_cpu() | ||
1350 | * page->flags usage will work. | ||
1351 | * | ||
1352 | * When a page gets marked as dcache-dirty, we store the | ||
1353 | * cpu number starting at bit 32 in the page->flags. Also, | ||
1354 | * functions like clear_dcache_dirty_cpu use the cpu mask | ||
1355 | * in 13-bit signed-immediate instruction fields. | ||
1356 | */ | ||
1357 | BUILD_BUG_ON(FLAGS_RESERVED != 32); | ||
1358 | BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + | ||
1359 | ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED); | ||
1360 | BUILD_BUG_ON(NR_CPUS > 4096); | ||
1361 | |||
1352 | kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; | 1362 | kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; |
1353 | kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; | 1363 | kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; |
1354 | 1364 | ||
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index f321b1d21227..03c385de7619 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h | |||
@@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, | |||
202 | * the calculations done by the macro mid-stream. | 202 | * the calculations done by the macro mid-stream. |
203 | */ | 203 | */ |
204 | #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ | 204 | #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ |
205 | ldub [THR + TI_CPU], REG1; \ | 205 | lduh [THR + TI_CPU], REG1; \ |
206 | sethi %hi(__per_cpu_shift), REG3; \ | 206 | sethi %hi(__per_cpu_shift), REG3; \ |
207 | sethi %hi(__per_cpu_base), REG2; \ | 207 | sethi %hi(__per_cpu_base), REG2; \ |
208 | ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ | 208 | ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ |
diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index 2ebf7f27bf91..98252cd44dd6 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h | |||
@@ -38,8 +38,8 @@ struct thread_info { | |||
38 | /* D$ line 1 */ | 38 | /* D$ line 1 */ |
39 | struct task_struct *task; | 39 | struct task_struct *task; |
40 | unsigned long flags; | 40 | unsigned long flags; |
41 | __u8 cpu; | ||
42 | __u8 fpsaved[7]; | 41 | __u8 fpsaved[7]; |
42 | __u8 pad; | ||
43 | unsigned long ksp; | 43 | unsigned long ksp; |
44 | 44 | ||
45 | /* D$ line 2 */ | 45 | /* D$ line 2 */ |
@@ -49,7 +49,7 @@ struct thread_info { | |||
49 | int preempt_count; /* 0 => preemptable, <0 => BUG */ | 49 | int preempt_count; /* 0 => preemptable, <0 => BUG */ |
50 | __u8 new_child; | 50 | __u8 new_child; |
51 | __u8 syscall_noerror; | 51 | __u8 syscall_noerror; |
52 | __u16 __pad; | 52 | __u16 cpu; |
53 | 53 | ||
54 | unsigned long *utraps; | 54 | unsigned long *utraps; |
55 | 55 | ||
@@ -83,8 +83,7 @@ struct thread_info { | |||
83 | #define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS) | 83 | #define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS) |
84 | #define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH) | 84 | #define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH) |
85 | #define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED) | 85 | #define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED) |
86 | #define TI_CPU 0x00000010 | 86 | #define TI_FPSAVED 0x00000010 |
87 | #define TI_FPSAVED 0x00000011 | ||
88 | #define TI_KSP 0x00000018 | 87 | #define TI_KSP 0x00000018 |
89 | #define TI_FAULT_ADDR 0x00000020 | 88 | #define TI_FAULT_ADDR 0x00000020 |
90 | #define TI_KREGS 0x00000028 | 89 | #define TI_KREGS 0x00000028 |
@@ -92,6 +91,7 @@ struct thread_info { | |||
92 | #define TI_PRE_COUNT 0x00000038 | 91 | #define TI_PRE_COUNT 0x00000038 |
93 | #define TI_NEW_CHILD 0x0000003c | 92 | #define TI_NEW_CHILD 0x0000003c |
94 | #define TI_SYS_NOERROR 0x0000003d | 93 | #define TI_SYS_NOERROR 0x0000003d |
94 | #define TI_CPU 0x0000003e | ||
95 | #define TI_UTRAPS 0x00000040 | 95 | #define TI_UTRAPS 0x00000040 |
96 | #define TI_REG_WINDOW 0x00000048 | 96 | #define TI_REG_WINDOW 0x00000048 |
97 | #define TI_RWIN_SPTRS 0x000003c8 | 97 | #define TI_RWIN_SPTRS 0x000003c8 |