diff options
Diffstat (limited to 'arch/sparc64/kernel/smp.c')
-rw-r--r-- | arch/sparc64/kernel/smp.c | 155 |
1 files changed, 50 insertions, 105 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 24fdf1d0adc5..c550bba3490a 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <asm/tlb.h> | 40 | #include <asm/tlb.h> |
41 | #include <asm/sections.h> | 41 | #include <asm/sections.h> |
42 | #include <asm/prom.h> | 42 | #include <asm/prom.h> |
43 | #include <asm/mdesc.h> | ||
43 | 44 | ||
44 | extern void calibrate_delay(void); | 45 | extern void calibrate_delay(void); |
45 | 46 | ||
@@ -75,53 +76,6 @@ void smp_bogo(struct seq_file *m) | |||
75 | i, cpu_data(i).clock_tick); | 76 | i, cpu_data(i).clock_tick); |
76 | } | 77 | } |
77 | 78 | ||
78 | void __init smp_store_cpu_info(int id) | ||
79 | { | ||
80 | struct device_node *dp; | ||
81 | int def; | ||
82 | |||
83 | cpu_data(id).udelay_val = loops_per_jiffy; | ||
84 | |||
85 | cpu_find_by_mid(id, &dp); | ||
86 | cpu_data(id).clock_tick = | ||
87 | of_getintprop_default(dp, "clock-frequency", 0); | ||
88 | |||
89 | def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024)); | ||
90 | cpu_data(id).dcache_size = | ||
91 | of_getintprop_default(dp, "dcache-size", def); | ||
92 | |||
93 | def = 32; | ||
94 | cpu_data(id).dcache_line_size = | ||
95 | of_getintprop_default(dp, "dcache-line-size", def); | ||
96 | |||
97 | def = 16 * 1024; | ||
98 | cpu_data(id).icache_size = | ||
99 | of_getintprop_default(dp, "icache-size", def); | ||
100 | |||
101 | def = 32; | ||
102 | cpu_data(id).icache_line_size = | ||
103 | of_getintprop_default(dp, "icache-line-size", def); | ||
104 | |||
105 | def = ((tlb_type == hypervisor) ? | ||
106 | (3 * 1024 * 1024) : | ||
107 | (4 * 1024 * 1024)); | ||
108 | cpu_data(id).ecache_size = | ||
109 | of_getintprop_default(dp, "ecache-size", def); | ||
110 | |||
111 | def = 64; | ||
112 | cpu_data(id).ecache_line_size = | ||
113 | of_getintprop_default(dp, "ecache-line-size", def); | ||
114 | |||
115 | printk("CPU[%d]: Caches " | ||
116 | "D[sz(%d):line_sz(%d)] " | ||
117 | "I[sz(%d):line_sz(%d)] " | ||
118 | "E[sz(%d):line_sz(%d)]\n", | ||
119 | id, | ||
120 | cpu_data(id).dcache_size, cpu_data(id).dcache_line_size, | ||
121 | cpu_data(id).icache_size, cpu_data(id).icache_line_size, | ||
122 | cpu_data(id).ecache_size, cpu_data(id).ecache_line_size); | ||
123 | } | ||
124 | |||
125 | extern void setup_sparc64_timer(void); | 79 | extern void setup_sparc64_timer(void); |
126 | 80 | ||
127 | static volatile unsigned long callin_flag = 0; | 81 | static volatile unsigned long callin_flag = 0; |
@@ -145,7 +99,7 @@ void __init smp_callin(void) | |||
145 | local_irq_enable(); | 99 | local_irq_enable(); |
146 | 100 | ||
147 | calibrate_delay(); | 101 | calibrate_delay(); |
148 | smp_store_cpu_info(cpuid); | 102 | cpu_data(cpuid).udelay_val = loops_per_jiffy; |
149 | callin_flag = 1; | 103 | callin_flag = 1; |
150 | __asm__ __volatile__("membar #Sync\n\t" | 104 | __asm__ __volatile__("membar #Sync\n\t" |
151 | "flush %%g6" : : : "memory"); | 105 | "flush %%g6" : : : "memory"); |
@@ -340,9 +294,8 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) | |||
340 | 294 | ||
341 | prom_startcpu_cpuid(cpu, entry, cookie); | 295 | prom_startcpu_cpuid(cpu, entry, cookie); |
342 | } else { | 296 | } else { |
343 | struct device_node *dp; | 297 | struct device_node *dp = of_find_node_by_cpuid(cpu); |
344 | 298 | ||
345 | cpu_find_by_mid(cpu, &dp); | ||
346 | prom_startcpu(dp->node, entry, cookie); | 299 | prom_startcpu(dp->node, entry, cookie); |
347 | } | 300 | } |
348 | 301 | ||
@@ -447,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c | |||
447 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 400 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) |
448 | { | 401 | { |
449 | u64 pstate, ver; | 402 | u64 pstate, ver; |
450 | int nack_busy_id, is_jbus; | 403 | int nack_busy_id, is_jbus, need_more; |
451 | 404 | ||
452 | if (cpus_empty(mask)) | 405 | if (cpus_empty(mask)) |
453 | return; | 406 | return; |
@@ -463,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas | |||
463 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); | 416 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); |
464 | 417 | ||
465 | retry: | 418 | retry: |
419 | need_more = 0; | ||
466 | __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" | 420 | __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" |
467 | : : "r" (pstate), "i" (PSTATE_IE)); | 421 | : : "r" (pstate), "i" (PSTATE_IE)); |
468 | 422 | ||
@@ -491,6 +445,10 @@ retry: | |||
491 | : /* no outputs */ | 445 | : /* no outputs */ |
492 | : "r" (target), "i" (ASI_INTR_W)); | 446 | : "r" (target), "i" (ASI_INTR_W)); |
493 | nack_busy_id++; | 447 | nack_busy_id++; |
448 | if (nack_busy_id == 32) { | ||
449 | need_more = 1; | ||
450 | break; | ||
451 | } | ||
494 | } | 452 | } |
495 | } | 453 | } |
496 | 454 | ||
@@ -507,6 +465,16 @@ retry: | |||
507 | if (dispatch_stat == 0UL) { | 465 | if (dispatch_stat == 0UL) { |
508 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" | 466 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" |
509 | : : "r" (pstate)); | 467 | : : "r" (pstate)); |
468 | if (unlikely(need_more)) { | ||
469 | int i, cnt = 0; | ||
470 | for_each_cpu_mask(i, mask) { | ||
471 | cpu_clear(i, mask); | ||
472 | cnt++; | ||
473 | if (cnt == 32) | ||
474 | break; | ||
475 | } | ||
476 | goto retry; | ||
477 | } | ||
510 | return; | 478 | return; |
511 | } | 479 | } |
512 | if (!--stuck) | 480 | if (!--stuck) |
@@ -544,6 +512,8 @@ retry: | |||
544 | if ((dispatch_stat & check_mask) == 0) | 512 | if ((dispatch_stat & check_mask) == 0) |
545 | cpu_clear(i, mask); | 513 | cpu_clear(i, mask); |
546 | this_busy_nack += 2; | 514 | this_busy_nack += 2; |
515 | if (this_busy_nack == 64) | ||
516 | break; | ||
547 | } | 517 | } |
548 | 518 | ||
549 | goto retry; | 519 | goto retry; |
@@ -1191,23 +1161,14 @@ int setup_profiling_timer(unsigned int multiplier) | |||
1191 | 1161 | ||
1192 | static void __init smp_tune_scheduling(void) | 1162 | static void __init smp_tune_scheduling(void) |
1193 | { | 1163 | { |
1194 | struct device_node *dp; | 1164 | unsigned int smallest = ~0U; |
1195 | int instance; | 1165 | int i; |
1196 | unsigned int def, smallest = ~0U; | ||
1197 | |||
1198 | def = ((tlb_type == hypervisor) ? | ||
1199 | (3 * 1024 * 1024) : | ||
1200 | (4 * 1024 * 1024)); | ||
1201 | 1166 | ||
1202 | instance = 0; | 1167 | for (i = 0; i < NR_CPUS; i++) { |
1203 | while (!cpu_find_by_instance(instance, &dp, NULL)) { | 1168 | unsigned int val = cpu_data(i).ecache_size; |
1204 | unsigned int val; | ||
1205 | 1169 | ||
1206 | val = of_getintprop_default(dp, "ecache-size", def); | 1170 | if (val && val < smallest) |
1207 | if (val < smallest) | ||
1208 | smallest = val; | 1171 | smallest = val; |
1209 | |||
1210 | instance++; | ||
1211 | } | 1172 | } |
1212 | 1173 | ||
1213 | /* Any value less than 256K is nonsense. */ | 1174 | /* Any value less than 256K is nonsense. */ |
@@ -1230,58 +1191,42 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
1230 | int i; | 1191 | int i; |
1231 | 1192 | ||
1232 | if (num_possible_cpus() > max_cpus) { | 1193 | if (num_possible_cpus() > max_cpus) { |
1233 | int instance, mid; | 1194 | for_each_possible_cpu(i) { |
1234 | 1195 | if (i != boot_cpu_id) { | |
1235 | instance = 0; | 1196 | cpu_clear(i, phys_cpu_present_map); |
1236 | while (!cpu_find_by_instance(instance, NULL, &mid)) { | 1197 | cpu_clear(i, cpu_present_map); |
1237 | if (mid != boot_cpu_id) { | ||
1238 | cpu_clear(mid, phys_cpu_present_map); | ||
1239 | cpu_clear(mid, cpu_present_map); | ||
1240 | if (num_possible_cpus() <= max_cpus) | 1198 | if (num_possible_cpus() <= max_cpus) |
1241 | break; | 1199 | break; |
1242 | } | 1200 | } |
1243 | instance++; | ||
1244 | } | 1201 | } |
1245 | } | 1202 | } |
1246 | 1203 | ||
1247 | for_each_possible_cpu(i) { | 1204 | cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; |
1248 | if (tlb_type == hypervisor) { | ||
1249 | int j; | ||
1250 | |||
1251 | /* XXX get this mapping from machine description */ | ||
1252 | for_each_possible_cpu(j) { | ||
1253 | if ((j >> 2) == (i >> 2)) | ||
1254 | cpu_set(j, cpu_sibling_map[i]); | ||
1255 | } | ||
1256 | } else { | ||
1257 | cpu_set(i, cpu_sibling_map[i]); | ||
1258 | } | ||
1259 | } | ||
1260 | |||
1261 | smp_store_cpu_info(boot_cpu_id); | ||
1262 | smp_tune_scheduling(); | 1205 | smp_tune_scheduling(); |
1263 | } | 1206 | } |
1264 | 1207 | ||
1265 | /* Set this up early so that things like the scheduler can init | 1208 | void __devinit smp_prepare_boot_cpu(void) |
1266 | * properly. We use the same cpu mask for both the present and | ||
1267 | * possible cpu map. | ||
1268 | */ | ||
1269 | void __init smp_setup_cpu_possible_map(void) | ||
1270 | { | 1209 | { |
1271 | int instance, mid; | ||
1272 | |||
1273 | instance = 0; | ||
1274 | while (!cpu_find_by_instance(instance, NULL, &mid)) { | ||
1275 | if (mid < NR_CPUS) { | ||
1276 | cpu_set(mid, phys_cpu_present_map); | ||
1277 | cpu_set(mid, cpu_present_map); | ||
1278 | } | ||
1279 | instance++; | ||
1280 | } | ||
1281 | } | 1210 | } |
1282 | 1211 | ||
1283 | void __devinit smp_prepare_boot_cpu(void) | 1212 | void __devinit smp_fill_in_sib_core_maps(void) |
1284 | { | 1213 | { |
1214 | unsigned int i; | ||
1215 | |||
1216 | for_each_possible_cpu(i) { | ||
1217 | unsigned int j; | ||
1218 | |||
1219 | if (cpu_data(i).core_id == 0) { | ||
1220 | cpu_set(i, cpu_sibling_map[i]); | ||
1221 | continue; | ||
1222 | } | ||
1223 | |||
1224 | for_each_possible_cpu(j) { | ||
1225 | if (cpu_data(i).core_id == | ||
1226 | cpu_data(j).core_id) | ||
1227 | cpu_set(j, cpu_sibling_map[i]); | ||
1228 | } | ||
1229 | } | ||
1285 | } | 1230 | } |
1286 | 1231 | ||
1287 | int __cpuinit __cpu_up(unsigned int cpu) | 1232 | int __cpuinit __cpu_up(unsigned int cpu) |
@@ -1337,7 +1282,7 @@ unsigned long __per_cpu_shift __read_mostly; | |||
1337 | EXPORT_SYMBOL(__per_cpu_base); | 1282 | EXPORT_SYMBOL(__per_cpu_base); |
1338 | EXPORT_SYMBOL(__per_cpu_shift); | 1283 | EXPORT_SYMBOL(__per_cpu_shift); |
1339 | 1284 | ||
1340 | void __init setup_per_cpu_areas(void) | 1285 | void __init real_setup_per_cpu_areas(void) |
1341 | { | 1286 | { |
1342 | unsigned long goal, size, i; | 1287 | unsigned long goal, size, i; |
1343 | char *ptr; | 1288 | char *ptr; |