diff options
author | Li Shaohua <shaohua.li@intel.com> | 2005-06-25 17:54:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-25 19:24:30 -0400 |
commit | e1367daf3eed5cd619ee88c9907e1e6ddaa58406 (patch) | |
tree | dce60efefba356e0a914669587586a6174e41b94 /arch/i386/kernel/smpboot.c | |
parent | 0bb3184df537002a742bafddf3f4fb482b7fe610 (diff) |
[PATCH] cpu state clean after hot remove
Clean CPU states in order to reuse smp boot code for CPU hotplug.
Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r-- | arch/i386/kernel/smpboot.c | 175 |
1 files changed, 144 insertions, 31 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index fb0b200d1d85..d66bf489a2e9 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -90,6 +90,12 @@ cpumask_t cpu_callout_map; | |||
90 | EXPORT_SYMBOL(cpu_callout_map); | 90 | EXPORT_SYMBOL(cpu_callout_map); |
91 | static cpumask_t smp_commenced_mask; | 91 | static cpumask_t smp_commenced_mask; |
92 | 92 | ||
93 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there | ||
94 | * is no way to resync one AP against BP. TBD: for prescott and above, we | ||
95 | * should use IA64's algorithm | ||
96 | */ | ||
97 | static int __devinitdata tsc_sync_disabled; | ||
98 | |||
93 | /* Per CPU bogomips and other parameters */ | 99 | /* Per CPU bogomips and other parameters */ |
94 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 100 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
95 | EXPORT_SYMBOL(cpu_data); | 101 | EXPORT_SYMBOL(cpu_data); |
@@ -427,7 +433,7 @@ static void __devinit smp_callin(void) | |||
427 | /* | 433 | /* |
428 | * Synchronize the TSC with the BP | 434 | * Synchronize the TSC with the BP |
429 | */ | 435 | */ |
430 | if (cpu_has_tsc && cpu_khz) | 436 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) |
431 | synchronize_tsc_ap(); | 437 | synchronize_tsc_ap(); |
432 | } | 438 | } |
433 | 439 | ||
@@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused) | |||
507 | lock_ipi_call_lock(); | 513 | lock_ipi_call_lock(); |
508 | cpu_set(smp_processor_id(), cpu_online_map); | 514 | cpu_set(smp_processor_id(), cpu_online_map); |
509 | unlock_ipi_call_lock(); | 515 | unlock_ipi_call_lock(); |
516 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | ||
510 | 517 | ||
511 | /* We can take interrupts now: we're officially "up". */ | 518 | /* We can take interrupts now: we're officially "up". */ |
512 | local_irq_enable(); | 519 | local_irq_enable(); |
@@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
816 | #endif /* WAKE_SECONDARY_VIA_INIT */ | 823 | #endif /* WAKE_SECONDARY_VIA_INIT */ |
817 | 824 | ||
818 | extern cpumask_t cpu_initialized; | 825 | extern cpumask_t cpu_initialized; |
826 | static inline int alloc_cpu_id(void) | ||
827 | { | ||
828 | cpumask_t tmp_map; | ||
829 | int cpu; | ||
830 | cpus_complement(tmp_map, cpu_present_map); | ||
831 | cpu = first_cpu(tmp_map); | ||
832 | if (cpu >= NR_CPUS) | ||
833 | return -ENODEV; | ||
834 | return cpu; | ||
835 | } | ||
836 | |||
837 | #ifdef CONFIG_HOTPLUG_CPU | ||
838 | static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS]; | ||
839 | static inline struct task_struct * alloc_idle_task(int cpu) | ||
840 | { | ||
841 | struct task_struct *idle; | ||
842 | |||
843 | if ((idle = cpu_idle_tasks[cpu]) != NULL) { | ||
844 | /* initialize thread_struct. we really want to avoid destroy | ||
845 | * idle tread | ||
846 | */ | ||
847 | idle->thread.esp = (unsigned long)(((struct pt_regs *) | ||
848 | (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1); | ||
849 | init_idle(idle, cpu); | ||
850 | return idle; | ||
851 | } | ||
852 | idle = fork_idle(cpu); | ||
853 | |||
854 | if (!IS_ERR(idle)) | ||
855 | cpu_idle_tasks[cpu] = idle; | ||
856 | return idle; | ||
857 | } | ||
858 | #else | ||
859 | #define alloc_idle_task(cpu) fork_idle(cpu) | ||
860 | #endif | ||
819 | 861 | ||
820 | static int __devinit do_boot_cpu(int apicid) | 862 | static int __devinit do_boot_cpu(int apicid, int cpu) |
821 | /* | 863 | /* |
822 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 864 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
823 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 865 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid) | |||
826 | { | 868 | { |
827 | struct task_struct *idle; | 869 | struct task_struct *idle; |
828 | unsigned long boot_error; | 870 | unsigned long boot_error; |
829 | int timeout, cpu; | 871 | int timeout; |
830 | unsigned long start_eip; | 872 | unsigned long start_eip; |
831 | unsigned short nmi_high = 0, nmi_low = 0; | 873 | unsigned short nmi_high = 0, nmi_low = 0; |
832 | 874 | ||
833 | cpu = ++cpucount; | 875 | ++cpucount; |
876 | |||
834 | /* | 877 | /* |
835 | * We can't use kernel_thread since we must avoid to | 878 | * We can't use kernel_thread since we must avoid to |
836 | * reschedule the child. | 879 | * reschedule the child. |
837 | */ | 880 | */ |
838 | idle = fork_idle(cpu); | 881 | idle = alloc_idle_task(cpu); |
839 | if (IS_ERR(idle)) | 882 | if (IS_ERR(idle)) |
840 | panic("failed fork for CPU %d", cpu); | 883 | panic("failed fork for CPU %d", cpu); |
841 | idle->thread.eip = (unsigned long) start_secondary; | 884 | idle->thread.eip = (unsigned long) start_secondary; |
@@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid) | |||
902 | inquire_remote_apic(apicid); | 945 | inquire_remote_apic(apicid); |
903 | } | 946 | } |
904 | } | 947 | } |
905 | x86_cpu_to_apicid[cpu] = apicid; | 948 | |
906 | if (boot_error) { | 949 | if (boot_error) { |
907 | /* Try to put things back the way they were before ... */ | 950 | /* Try to put things back the way they were before ... */ |
908 | unmap_cpu_to_logical_apicid(cpu); | 951 | unmap_cpu_to_logical_apicid(cpu); |
909 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | 952 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ |
910 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | 953 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ |
911 | cpucount--; | 954 | cpucount--; |
955 | } else { | ||
956 | x86_cpu_to_apicid[cpu] = apicid; | ||
957 | cpu_set(cpu, cpu_present_map); | ||
912 | } | 958 | } |
913 | 959 | ||
914 | /* mark "stuck" area as not stuck */ | 960 | /* mark "stuck" area as not stuck */ |
@@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid) | |||
917 | return boot_error; | 963 | return boot_error; |
918 | } | 964 | } |
919 | 965 | ||
966 | #ifdef CONFIG_HOTPLUG_CPU | ||
967 | void cpu_exit_clear(void) | ||
968 | { | ||
969 | int cpu = raw_smp_processor_id(); | ||
970 | |||
971 | idle_task_exit(); | ||
972 | |||
973 | cpucount --; | ||
974 | cpu_uninit(); | ||
975 | irq_ctx_exit(cpu); | ||
976 | |||
977 | cpu_clear(cpu, cpu_callout_map); | ||
978 | cpu_clear(cpu, cpu_callin_map); | ||
979 | cpu_clear(cpu, cpu_present_map); | ||
980 | |||
981 | cpu_clear(cpu, smp_commenced_mask); | ||
982 | unmap_cpu_to_logical_apicid(cpu); | ||
983 | } | ||
984 | |||
985 | struct warm_boot_cpu_info { | ||
986 | struct completion *complete; | ||
987 | int apicid; | ||
988 | int cpu; | ||
989 | }; | ||
990 | |||
991 | static void __devinit do_warm_boot_cpu(void *p) | ||
992 | { | ||
993 | struct warm_boot_cpu_info *info = p; | ||
994 | do_boot_cpu(info->apicid, info->cpu); | ||
995 | complete(info->complete); | ||
996 | } | ||
997 | |||
998 | int __devinit smp_prepare_cpu(int cpu) | ||
999 | { | ||
1000 | DECLARE_COMPLETION(done); | ||
1001 | struct warm_boot_cpu_info info; | ||
1002 | struct work_struct task; | ||
1003 | int apicid, ret; | ||
1004 | |||
1005 | lock_cpu_hotplug(); | ||
1006 | apicid = x86_cpu_to_apicid[cpu]; | ||
1007 | if (apicid == BAD_APICID) { | ||
1008 | ret = -ENODEV; | ||
1009 | goto exit; | ||
1010 | } | ||
1011 | |||
1012 | info.complete = &done; | ||
1013 | info.apicid = apicid; | ||
1014 | info.cpu = cpu; | ||
1015 | INIT_WORK(&task, do_warm_boot_cpu, &info); | ||
1016 | |||
1017 | tsc_sync_disabled = 1; | ||
1018 | |||
1019 | /* init low mem mapping */ | ||
1020 | memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | ||
1021 | sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); | ||
1022 | flush_tlb_all(); | ||
1023 | schedule_work(&task); | ||
1024 | wait_for_completion(&done); | ||
1025 | |||
1026 | tsc_sync_disabled = 0; | ||
1027 | zap_low_mappings(); | ||
1028 | ret = 0; | ||
1029 | exit: | ||
1030 | unlock_cpu_hotplug(); | ||
1031 | return ret; | ||
1032 | } | ||
1033 | #endif | ||
1034 | |||
920 | static void smp_tune_scheduling (void) | 1035 | static void smp_tune_scheduling (void) |
921 | { | 1036 | { |
922 | unsigned long cachesize; /* kB */ | 1037 | unsigned long cachesize; /* kB */ |
@@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1069 | if (max_cpus <= cpucount+1) | 1184 | if (max_cpus <= cpucount+1) |
1070 | continue; | 1185 | continue; |
1071 | 1186 | ||
1072 | if (do_boot_cpu(apicid)) | 1187 | if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) |
1073 | printk("CPU #%d not responding - cannot use it.\n", | 1188 | printk("CPU #%d not responding - cannot use it.\n", |
1074 | apicid); | 1189 | apicid); |
1075 | else | 1190 | else |
@@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void) | |||
1149 | { | 1264 | { |
1150 | cpu_set(smp_processor_id(), cpu_online_map); | 1265 | cpu_set(smp_processor_id(), cpu_online_map); |
1151 | cpu_set(smp_processor_id(), cpu_callout_map); | 1266 | cpu_set(smp_processor_id(), cpu_callout_map); |
1267 | cpu_set(smp_processor_id(), cpu_present_map); | ||
1268 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | ||
1152 | } | 1269 | } |
1153 | 1270 | ||
1154 | #ifdef CONFIG_HOTPLUG_CPU | 1271 | #ifdef CONFIG_HOTPLUG_CPU |
1155 | 1272 | static void | |
1156 | /* must be called with the cpucontrol mutex held */ | 1273 | remove_siblinginfo(int cpu) |
1157 | static int __devinit cpu_enable(unsigned int cpu) | ||
1158 | { | 1274 | { |
1159 | /* get the target out of its holding state */ | 1275 | int sibling; |
1160 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1276 | |
1161 | wmb(); | 1277 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1162 | 1278 | cpu_clear(cpu, cpu_sibling_map[sibling]); | |
1163 | /* wait for the processor to ack it. timeout? */ | 1279 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) |
1164 | while (!cpu_online(cpu)) | 1280 | cpu_clear(cpu, cpu_core_map[sibling]); |
1165 | cpu_relax(); | 1281 | cpus_clear(cpu_sibling_map[cpu]); |
1166 | 1282 | cpus_clear(cpu_core_map[cpu]); | |
1167 | fixup_irqs(cpu_online_map); | 1283 | phys_proc_id[cpu] = BAD_APICID; |
1168 | /* counter the disable in fixup_irqs() */ | 1284 | cpu_core_id[cpu] = BAD_APICID; |
1169 | local_irq_enable(); | ||
1170 | return 0; | ||
1171 | } | 1285 | } |
1172 | 1286 | ||
1173 | int __cpu_disable(void) | 1287 | int __cpu_disable(void) |
@@ -1193,6 +1307,8 @@ int __cpu_disable(void) | |||
1193 | mdelay(1); | 1307 | mdelay(1); |
1194 | local_irq_disable(); | 1308 | local_irq_disable(); |
1195 | 1309 | ||
1310 | remove_siblinginfo(cpu); | ||
1311 | |||
1196 | cpu_clear(cpu, map); | 1312 | cpu_clear(cpu, map); |
1197 | fixup_irqs(map); | 1313 | fixup_irqs(map); |
1198 | /* It's now safe to remove this processor from the online map */ | 1314 | /* It's now safe to remove this processor from the online map */ |
@@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu) | |||
1207 | 1323 | ||
1208 | for (i = 0; i < 10; i++) { | 1324 | for (i = 0; i < 10; i++) { |
1209 | /* They ack this in play_dead by setting CPU_DEAD */ | 1325 | /* They ack this in play_dead by setting CPU_DEAD */ |
1210 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) | 1326 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1327 | printk ("CPU %d is now offline\n", cpu); | ||
1211 | return; | 1328 | return; |
1329 | } | ||
1212 | current->state = TASK_UNINTERRUPTIBLE; | 1330 | current->state = TASK_UNINTERRUPTIBLE; |
1213 | schedule_timeout(HZ/10); | 1331 | schedule_timeout(HZ/10); |
1214 | } | 1332 | } |
@@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu) | |||
1236 | return -EIO; | 1354 | return -EIO; |
1237 | } | 1355 | } |
1238 | 1356 | ||
1239 | #ifdef CONFIG_HOTPLUG_CPU | ||
1240 | /* Already up, and in cpu_quiescent now? */ | ||
1241 | if (cpu_isset(cpu, smp_commenced_mask)) { | ||
1242 | cpu_enable(cpu); | ||
1243 | return 0; | ||
1244 | } | ||
1245 | #endif | ||
1246 | |||
1247 | local_irq_enable(); | 1357 | local_irq_enable(); |
1358 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | ||
1248 | /* Unleash the CPU! */ | 1359 | /* Unleash the CPU! */ |
1249 | cpu_set(cpu, smp_commenced_mask); | 1360 | cpu_set(cpu, smp_commenced_mask); |
1250 | while (!cpu_isset(cpu, cpu_online_map)) | 1361 | while (!cpu_isset(cpu, cpu_online_map)) |
@@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1258 | setup_ioapic_dest(); | 1369 | setup_ioapic_dest(); |
1259 | #endif | 1370 | #endif |
1260 | zap_low_mappings(); | 1371 | zap_low_mappings(); |
1372 | #ifndef CONFIG_HOTPLUG_CPU | ||
1261 | /* | 1373 | /* |
1262 | * Disable executability of the SMP trampoline: | 1374 | * Disable executability of the SMP trampoline: |
1263 | */ | 1375 | */ |
1264 | set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); | 1376 | set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); |
1377 | #endif | ||
1265 | } | 1378 | } |
1266 | 1379 | ||
1267 | void __init smp_intr_init(void) | 1380 | void __init smp_intr_init(void) |