aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-05-26 04:14:43 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-05-29 05:49:49 -0400
commit22adb358e816ce6aa0afb231ae9d826b0bddc8b0 (patch)
tree6f9886bf5b4e5c916c72d8d5733211813873c5fc
parent5cbc30737398b49f62ae8603129ce43ac7db1a41 (diff)
[SPARC64]: Eliminate NR_CPUS limitations.
Cheetah systems can have cpuids as large as 1023, although physical systems don't have that many cpus. Only three limitations existed in the kernel preventing arbitrary NR_CPUS values: 1) dcache dirty cpu state stored in page->flags on D-cache aliasing platforms. With some build time calculations and some build-time BUG checks on page->flags layout, this one was easily solved. 2) The cheetah XCALL delivery code could only handle a cpumask with up to 32 cpus set. Some simple looping logic clears that up too. 3) thread_info->cpu was a u8, easily changed to a u16. There are a few spots in the kernel that still put NR_CPUS sized arrays on the kernel stack, but that's not a sparc64 specific problem. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/Kconfig6
-rw-r--r--arch/sparc64/kernel/head.S2
-rw-r--r--arch/sparc64/kernel/smp.c19
-rw-r--r--arch/sparc64/mm/init.c22
-rw-r--r--include/asm-sparc64/cpudata.h2
-rw-r--r--include/asm-sparc64/thread_info.h8
6 files changed, 43 insertions, 16 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 831781cab271..bd00f89eed1e 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -147,10 +147,10 @@ config SMP
147 If you don't know what to do here, say N. 147 If you don't know what to do here, say N.
148 148
149config NR_CPUS 149config NR_CPUS
150 int "Maximum number of CPUs (2-64)" 150 int "Maximum number of CPUs (2-1024)"
151 range 2 64 151 range 2 1024
152 depends on SMP 152 depends on SMP
153 default "32" 153 default "64"
154 154
155source "drivers/cpufreq/Kconfig" 155source "drivers/cpufreq/Kconfig"
156 156
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index baea10a98196..5c11529742d4 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -523,7 +523,7 @@ tlb_fixup_done:
523#else 523#else
524 mov 0, %o0 524 mov 0, %o0
525#endif 525#endif
526 stb %o0, [%g6 + TI_CPU] 526 sth %o0, [%g6 + TI_CPU]
527 527
528 /* Off we go.... */ 528 /* Off we go.... */
529 call start_kernel 529 call start_kernel
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index f7fa873c800d..c550bba3490a 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
400static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) 400static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
401{ 401{
402 u64 pstate, ver; 402 u64 pstate, ver;
403 int nack_busy_id, is_jbus; 403 int nack_busy_id, is_jbus, need_more;
404 404
405 if (cpus_empty(mask)) 405 if (cpus_empty(mask))
406 return; 406 return;
@@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
416 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); 416 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
417 417
418retry: 418retry:
419 need_more = 0;
419 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" 420 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
420 : : "r" (pstate), "i" (PSTATE_IE)); 421 : : "r" (pstate), "i" (PSTATE_IE));
421 422
@@ -444,6 +445,10 @@ retry:
444 : /* no outputs */ 445 : /* no outputs */
445 : "r" (target), "i" (ASI_INTR_W)); 446 : "r" (target), "i" (ASI_INTR_W));
446 nack_busy_id++; 447 nack_busy_id++;
448 if (nack_busy_id == 32) {
449 need_more = 1;
450 break;
451 }
447 } 452 }
448 } 453 }
449 454
@@ -460,6 +465,16 @@ retry:
460 if (dispatch_stat == 0UL) { 465 if (dispatch_stat == 0UL) {
461 __asm__ __volatile__("wrpr %0, 0x0, %%pstate" 466 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
462 : : "r" (pstate)); 467 : : "r" (pstate));
468 if (unlikely(need_more)) {
469 int i, cnt = 0;
470 for_each_cpu_mask(i, mask) {
471 cpu_clear(i, mask);
472 cnt++;
473 if (cnt == 32)
474 break;
475 }
476 goto retry;
477 }
463 return; 478 return;
464 } 479 }
465 if (!--stuck) 480 if (!--stuck)
@@ -497,6 +512,8 @@ retry:
497 if ((dispatch_stat & check_mask) == 0) 512 if ((dispatch_stat & check_mask) == 0)
498 cpu_clear(i, mask); 513 cpu_clear(i, mask);
499 this_busy_nack += 2; 514 this_busy_nack += 2;
515 if (this_busy_nack == 64)
516 break;
500 } 517 }
501 518
502 goto retry; 519 goto retry;
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 977698269d3a..087cbf09d0b7 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page)
191} 191}
192 192
193#define PG_dcache_dirty PG_arch_1 193#define PG_dcache_dirty PG_arch_1
194#define PG_dcache_cpu_shift 24UL 194#define PG_dcache_cpu_shift 32UL
195#define PG_dcache_cpu_mask (256UL - 1UL) 195#define PG_dcache_cpu_mask \
196 196 ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
197#if NR_CPUS > 256
198#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
199#endif
200 197
201#define dcache_dirty_cpu(page) \ 198#define dcache_dirty_cpu(page) \
202 (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) 199 (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
@@ -1349,6 +1346,19 @@ void __init paging_init(void)
1349 unsigned long end_pfn, pages_avail, shift, phys_base; 1346 unsigned long end_pfn, pages_avail, shift, phys_base;
1350 unsigned long real_end, i; 1347 unsigned long real_end, i;
1351 1348
1349 /* These build time checkes make sure that the dcache_dirty_cpu()
1350 * page->flags usage will work.
1351 *
1352 * When a page gets marked as dcache-dirty, we store the
1353 * cpu number starting at bit 32 in the page->flags. Also,
1354 * functions like clear_dcache_dirty_cpu use the cpu mask
1355 * in 13-bit signed-immediate instruction fields.
1356 */
1357 BUILD_BUG_ON(FLAGS_RESERVED != 32);
1358 BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
1359 ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
1360 BUILD_BUG_ON(NR_CPUS > 4096);
1361
1352 kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; 1362 kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
1353 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; 1363 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
1354 1364
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index f321b1d21227..03c385de7619 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
202 * the calculations done by the macro mid-stream. 202 * the calculations done by the macro mid-stream.
203 */ 203 */
204#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ 204#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
205 ldub [THR + TI_CPU], REG1; \ 205 lduh [THR + TI_CPU], REG1; \
206 sethi %hi(__per_cpu_shift), REG3; \ 206 sethi %hi(__per_cpu_shift), REG3; \
207 sethi %hi(__per_cpu_base), REG2; \ 207 sethi %hi(__per_cpu_base), REG2; \
208 ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ 208 ldx [REG3 + %lo(__per_cpu_shift)], REG3; \
diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h
index 2ebf7f27bf91..98252cd44dd6 100644
--- a/include/asm-sparc64/thread_info.h
+++ b/include/asm-sparc64/thread_info.h
@@ -38,8 +38,8 @@ struct thread_info {
38 /* D$ line 1 */ 38 /* D$ line 1 */
39 struct task_struct *task; 39 struct task_struct *task;
40 unsigned long flags; 40 unsigned long flags;
41 __u8 cpu;
42 __u8 fpsaved[7]; 41 __u8 fpsaved[7];
42 __u8 pad;
43 unsigned long ksp; 43 unsigned long ksp;
44 44
45 /* D$ line 2 */ 45 /* D$ line 2 */
@@ -49,7 +49,7 @@ struct thread_info {
49 int preempt_count; /* 0 => preemptable, <0 => BUG */ 49 int preempt_count; /* 0 => preemptable, <0 => BUG */
50 __u8 new_child; 50 __u8 new_child;
51 __u8 syscall_noerror; 51 __u8 syscall_noerror;
52 __u16 __pad; 52 __u16 cpu;
53 53
54 unsigned long *utraps; 54 unsigned long *utraps;
55 55
@@ -83,8 +83,7 @@ struct thread_info {
83#define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS) 83#define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS)
84#define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH) 84#define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
85#define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED) 85#define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED)
86#define TI_CPU 0x00000010 86#define TI_FPSAVED 0x00000010
87#define TI_FPSAVED 0x00000011
88#define TI_KSP 0x00000018 87#define TI_KSP 0x00000018
89#define TI_FAULT_ADDR 0x00000020 88#define TI_FAULT_ADDR 0x00000020
90#define TI_KREGS 0x00000028 89#define TI_KREGS 0x00000028
@@ -92,6 +91,7 @@ struct thread_info {
92#define TI_PRE_COUNT 0x00000038 91#define TI_PRE_COUNT 0x00000038
93#define TI_NEW_CHILD 0x0000003c 92#define TI_NEW_CHILD 0x0000003c
94#define TI_SYS_NOERROR 0x0000003d 93#define TI_SYS_NOERROR 0x0000003d
94#define TI_CPU 0x0000003e
95#define TI_UTRAPS 0x00000040 95#define TI_UTRAPS 0x00000040
96#define TI_REG_WINDOW 0x00000048 96#define TI_REG_WINDOW 0x00000048
97#define TI_RWIN_SPTRS 0x000003c8 97#define TI_RWIN_SPTRS 0x000003c8