aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smpboot.c
diff options
context:
space:
mode:
authorLi Shaohua <shaohua.li@intel.com>2005-06-25 17:54:56 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 19:24:30 -0400
commite1367daf3eed5cd619ee88c9907e1e6ddaa58406 (patch)
treedce60efefba356e0a914669587586a6174e41b94 /arch/i386/kernel/smpboot.c
parent0bb3184df537002a742bafddf3f4fb482b7fe610 (diff)
[PATCH] cpu state clean after hot remove
Clean CPU states in order to reuse smp boot code for CPU hotplug. Signed-off-by: Li Shaohua<shaohua.li@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r--arch/i386/kernel/smpboot.c175
1 files changed, 144 insertions, 31 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index fb0b200d1d85..d66bf489a2e9 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -90,6 +90,12 @@ cpumask_t cpu_callout_map;
90EXPORT_SYMBOL(cpu_callout_map); 90EXPORT_SYMBOL(cpu_callout_map);
91static cpumask_t smp_commenced_mask; 91static cpumask_t smp_commenced_mask;
92 92
93/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
94 * is no way to resync one AP against BP. TBD: for prescott and above, we
95 * should use IA64's algorithm
96 */
97static int __devinitdata tsc_sync_disabled;
98
93/* Per CPU bogomips and other parameters */ 99/* Per CPU bogomips and other parameters */
94struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 100struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
95EXPORT_SYMBOL(cpu_data); 101EXPORT_SYMBOL(cpu_data);
@@ -427,7 +433,7 @@ static void __devinit smp_callin(void)
427 /* 433 /*
428 * Synchronize the TSC with the BP 434 * Synchronize the TSC with the BP
429 */ 435 */
430 if (cpu_has_tsc && cpu_khz) 436 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
431 synchronize_tsc_ap(); 437 synchronize_tsc_ap();
432} 438}
433 439
@@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused)
507 lock_ipi_call_lock(); 513 lock_ipi_call_lock();
508 cpu_set(smp_processor_id(), cpu_online_map); 514 cpu_set(smp_processor_id(), cpu_online_map);
509 unlock_ipi_call_lock(); 515 unlock_ipi_call_lock();
516 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
510 517
511 /* We can take interrupts now: we're officially "up". */ 518 /* We can take interrupts now: we're officially "up". */
512 local_irq_enable(); 519 local_irq_enable();
@@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
816#endif /* WAKE_SECONDARY_VIA_INIT */ 823#endif /* WAKE_SECONDARY_VIA_INIT */
817 824
818extern cpumask_t cpu_initialized; 825extern cpumask_t cpu_initialized;
826static inline int alloc_cpu_id(void)
827{
828 cpumask_t tmp_map;
829 int cpu;
830 cpus_complement(tmp_map, cpu_present_map);
831 cpu = first_cpu(tmp_map);
832 if (cpu >= NR_CPUS)
833 return -ENODEV;
834 return cpu;
835}
836
837#ifdef CONFIG_HOTPLUG_CPU
838static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
839static inline struct task_struct * alloc_idle_task(int cpu)
840{
841 struct task_struct *idle;
842
843 if ((idle = cpu_idle_tasks[cpu]) != NULL) {
844 /* initialize thread_struct. we really want to avoid destroy
845 * idle tread
846 */
847 idle->thread.esp = (unsigned long)(((struct pt_regs *)
848 (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
849 init_idle(idle, cpu);
850 return idle;
851 }
852 idle = fork_idle(cpu);
853
854 if (!IS_ERR(idle))
855 cpu_idle_tasks[cpu] = idle;
856 return idle;
857}
858#else
859#define alloc_idle_task(cpu) fork_idle(cpu)
860#endif
819 861
820static int __devinit do_boot_cpu(int apicid) 862static int __devinit do_boot_cpu(int apicid, int cpu)
821/* 863/*
822 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 864 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
823 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 865 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid)
826{ 868{
827 struct task_struct *idle; 869 struct task_struct *idle;
828 unsigned long boot_error; 870 unsigned long boot_error;
829 int timeout, cpu; 871 int timeout;
830 unsigned long start_eip; 872 unsigned long start_eip;
831 unsigned short nmi_high = 0, nmi_low = 0; 873 unsigned short nmi_high = 0, nmi_low = 0;
832 874
833 cpu = ++cpucount; 875 ++cpucount;
876
834 /* 877 /*
835 * We can't use kernel_thread since we must avoid to 878 * We can't use kernel_thread since we must avoid to
836 * reschedule the child. 879 * reschedule the child.
837 */ 880 */
838 idle = fork_idle(cpu); 881 idle = alloc_idle_task(cpu);
839 if (IS_ERR(idle)) 882 if (IS_ERR(idle))
840 panic("failed fork for CPU %d", cpu); 883 panic("failed fork for CPU %d", cpu);
841 idle->thread.eip = (unsigned long) start_secondary; 884 idle->thread.eip = (unsigned long) start_secondary;
@@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid)
902 inquire_remote_apic(apicid); 945 inquire_remote_apic(apicid);
903 } 946 }
904 } 947 }
905 x86_cpu_to_apicid[cpu] = apicid; 948
906 if (boot_error) { 949 if (boot_error) {
907 /* Try to put things back the way they were before ... */ 950 /* Try to put things back the way they were before ... */
908 unmap_cpu_to_logical_apicid(cpu); 951 unmap_cpu_to_logical_apicid(cpu);
909 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ 952 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
910 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 953 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
911 cpucount--; 954 cpucount--;
955 } else {
956 x86_cpu_to_apicid[cpu] = apicid;
957 cpu_set(cpu, cpu_present_map);
912 } 958 }
913 959
914 /* mark "stuck" area as not stuck */ 960 /* mark "stuck" area as not stuck */
@@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid)
917 return boot_error; 963 return boot_error;
918} 964}
919 965
966#ifdef CONFIG_HOTPLUG_CPU
967void cpu_exit_clear(void)
968{
969 int cpu = raw_smp_processor_id();
970
971 idle_task_exit();
972
973 cpucount --;
974 cpu_uninit();
975 irq_ctx_exit(cpu);
976
977 cpu_clear(cpu, cpu_callout_map);
978 cpu_clear(cpu, cpu_callin_map);
979 cpu_clear(cpu, cpu_present_map);
980
981 cpu_clear(cpu, smp_commenced_mask);
982 unmap_cpu_to_logical_apicid(cpu);
983}
984
985struct warm_boot_cpu_info {
986 struct completion *complete;
987 int apicid;
988 int cpu;
989};
990
991static void __devinit do_warm_boot_cpu(void *p)
992{
993 struct warm_boot_cpu_info *info = p;
994 do_boot_cpu(info->apicid, info->cpu);
995 complete(info->complete);
996}
997
998int __devinit smp_prepare_cpu(int cpu)
999{
1000 DECLARE_COMPLETION(done);
1001 struct warm_boot_cpu_info info;
1002 struct work_struct task;
1003 int apicid, ret;
1004
1005 lock_cpu_hotplug();
1006 apicid = x86_cpu_to_apicid[cpu];
1007 if (apicid == BAD_APICID) {
1008 ret = -ENODEV;
1009 goto exit;
1010 }
1011
1012 info.complete = &done;
1013 info.apicid = apicid;
1014 info.cpu = cpu;
1015 INIT_WORK(&task, do_warm_boot_cpu, &info);
1016
1017 tsc_sync_disabled = 1;
1018
1019 /* init low mem mapping */
1020 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1021 sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
1022 flush_tlb_all();
1023 schedule_work(&task);
1024 wait_for_completion(&done);
1025
1026 tsc_sync_disabled = 0;
1027 zap_low_mappings();
1028 ret = 0;
1029exit:
1030 unlock_cpu_hotplug();
1031 return ret;
1032}
1033#endif
1034
920static void smp_tune_scheduling (void) 1035static void smp_tune_scheduling (void)
921{ 1036{
922 unsigned long cachesize; /* kB */ 1037 unsigned long cachesize; /* kB */
@@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1069 if (max_cpus <= cpucount+1) 1184 if (max_cpus <= cpucount+1)
1070 continue; 1185 continue;
1071 1186
1072 if (do_boot_cpu(apicid)) 1187 if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
1073 printk("CPU #%d not responding - cannot use it.\n", 1188 printk("CPU #%d not responding - cannot use it.\n",
1074 apicid); 1189 apicid);
1075 else 1190 else
@@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void)
1149{ 1264{
1150 cpu_set(smp_processor_id(), cpu_online_map); 1265 cpu_set(smp_processor_id(), cpu_online_map);
1151 cpu_set(smp_processor_id(), cpu_callout_map); 1266 cpu_set(smp_processor_id(), cpu_callout_map);
1267 cpu_set(smp_processor_id(), cpu_present_map);
1268 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
1152} 1269}
1153 1270
1154#ifdef CONFIG_HOTPLUG_CPU 1271#ifdef CONFIG_HOTPLUG_CPU
1155 1272static void
1156/* must be called with the cpucontrol mutex held */ 1273remove_siblinginfo(int cpu)
1157static int __devinit cpu_enable(unsigned int cpu)
1158{ 1274{
1159 /* get the target out of its holding state */ 1275 int sibling;
1160 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 1276
1161 wmb(); 1277 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1162 1278 cpu_clear(cpu, cpu_sibling_map[sibling]);
1163 /* wait for the processor to ack it. timeout? */ 1279 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1164 while (!cpu_online(cpu)) 1280 cpu_clear(cpu, cpu_core_map[sibling]);
1165 cpu_relax(); 1281 cpus_clear(cpu_sibling_map[cpu]);
1166 1282 cpus_clear(cpu_core_map[cpu]);
1167 fixup_irqs(cpu_online_map); 1283 phys_proc_id[cpu] = BAD_APICID;
1168 /* counter the disable in fixup_irqs() */ 1284 cpu_core_id[cpu] = BAD_APICID;
1169 local_irq_enable();
1170 return 0;
1171} 1285}
1172 1286
1173int __cpu_disable(void) 1287int __cpu_disable(void)
@@ -1193,6 +1307,8 @@ int __cpu_disable(void)
1193 mdelay(1); 1307 mdelay(1);
1194 local_irq_disable(); 1308 local_irq_disable();
1195 1309
1310 remove_siblinginfo(cpu);
1311
1196 cpu_clear(cpu, map); 1312 cpu_clear(cpu, map);
1197 fixup_irqs(map); 1313 fixup_irqs(map);
1198 /* It's now safe to remove this processor from the online map */ 1314 /* It's now safe to remove this processor from the online map */
@@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu)
1207 1323
1208 for (i = 0; i < 10; i++) { 1324 for (i = 0; i < 10; i++) {
1209 /* They ack this in play_dead by setting CPU_DEAD */ 1325 /* They ack this in play_dead by setting CPU_DEAD */
1210 if (per_cpu(cpu_state, cpu) == CPU_DEAD) 1326 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1327 printk ("CPU %d is now offline\n", cpu);
1211 return; 1328 return;
1329 }
1212 current->state = TASK_UNINTERRUPTIBLE; 1330 current->state = TASK_UNINTERRUPTIBLE;
1213 schedule_timeout(HZ/10); 1331 schedule_timeout(HZ/10);
1214 } 1332 }
@@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu)
1236 return -EIO; 1354 return -EIO;
1237 } 1355 }
1238 1356
1239#ifdef CONFIG_HOTPLUG_CPU
1240 /* Already up, and in cpu_quiescent now? */
1241 if (cpu_isset(cpu, smp_commenced_mask)) {
1242 cpu_enable(cpu);
1243 return 0;
1244 }
1245#endif
1246
1247 local_irq_enable(); 1357 local_irq_enable();
1358 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1248 /* Unleash the CPU! */ 1359 /* Unleash the CPU! */
1249 cpu_set(cpu, smp_commenced_mask); 1360 cpu_set(cpu, smp_commenced_mask);
1250 while (!cpu_isset(cpu, cpu_online_map)) 1361 while (!cpu_isset(cpu, cpu_online_map))
@@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
1258 setup_ioapic_dest(); 1369 setup_ioapic_dest();
1259#endif 1370#endif
1260 zap_low_mappings(); 1371 zap_low_mappings();
1372#ifndef CONFIG_HOTPLUG_CPU
1261 /* 1373 /*
1262 * Disable executability of the SMP trampoline: 1374 * Disable executability of the SMP trampoline:
1263 */ 1375 */
1264 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); 1376 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1377#endif
1265} 1378}
1266 1379
1267void __init smp_intr_init(void) 1380void __init smp_intr_init(void)