aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-08-10 13:46:28 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-08-10 13:46:28 -0400
commit2f058256cb64e346f4fb4499ff4e0f1c2791a4b4 (patch)
tree91e06602f4d3abb6812ea8c9bc9ba4501e14c84e /arch/i386/kernel
parent0274aa2506fd2fe89a58dd6cd64d3b3f7b976af8 (diff)
parent86b3786078d63242d3194ffc58ae8dae1d1bbef3 (diff)
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c277
-rw-r--r--arch/i386/kernel/acpi/cstate.c103
-rw-r--r--arch/i386/kernel/acpi/sleep.c27
-rw-r--r--arch/i386/kernel/acpi/wakeup.S5
-rw-r--r--arch/i386/kernel/apic.c84
-rw-r--r--arch/i386/kernel/apm.c20
-rw-r--r--arch/i386/kernel/cpu/common.c52
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c7
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c2
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c43
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h32
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/i386/kernel/cpu/intel.c14
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c24
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c23
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c99
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/i386/kernel/cpu/proc.c2
-rw-r--r--arch/i386/kernel/cpu/transmeta.c6
-rw-r--r--arch/i386/kernel/crash.c223
-rw-r--r--arch/i386/kernel/dmi_scan.c391
-rw-r--r--arch/i386/kernel/efi.c4
-rw-r--r--arch/i386/kernel/head.S6
-rw-r--r--arch/i386/kernel/i386_ksyms.c160
-rw-r--r--arch/i386/kernel/i387.c14
-rw-r--r--arch/i386/kernel/i8259.c12
-rw-r--r--arch/i386/kernel/io_apic.c58
-rw-r--r--arch/i386/kernel/irq.c72
-rw-r--r--arch/i386/kernel/kprobes.c181
-rw-r--r--arch/i386/kernel/machine_kexec.c226
-rw-r--r--arch/i386/kernel/mpparse.c41
-rw-r--r--arch/i386/kernel/nmi.c24
-rw-r--r--arch/i386/kernel/numaq.c9
-rw-r--r--arch/i386/kernel/pci-dma.c3
-rw-r--r--arch/i386/kernel/process.c114
-rw-r--r--arch/i386/kernel/ptrace.c2
-rw-r--r--arch/i386/kernel/reboot.c98
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/setup.c111
-rw-r--r--arch/i386/kernel/signal.c35
-rw-r--r--arch/i386/kernel/smp.c37
-rw-r--r--arch/i386/kernel/smpboot.c367
-rw-r--r--arch/i386/kernel/syscall_table.S9
-rw-r--r--arch/i386/kernel/sysenter.c12
-rw-r--r--arch/i386/kernel/time.c13
-rw-r--r--arch/i386/kernel/time_hpet.c2
-rw-r--r--arch/i386/kernel/timers/common.c14
-rw-r--r--arch/i386/kernel/timers/timer.c9
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c4
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c7
-rw-r--r--arch/i386/kernel/timers/timer_pit.c4
-rw-r--r--arch/i386/kernel/timers/timer_pm.c1
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c13
-rw-r--r--arch/i386/kernel/traps.c72
-rw-r--r--arch/i386/kernel/vmlinux.lds.S62
64 files changed, 2397 insertions, 981 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 51ecd512603d..4cc83b322b36 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
25obj-$(CONFIG_X86_IO_APIC) += io_apic.o 25obj-$(CONFIG_X86_IO_APIC) += io_apic.o
26obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o 26obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
27obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_X86_NUMAQ) += numaq.o 28obj-$(CONFIG_X86_NUMAQ) += numaq.o
28obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o 29obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
29obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index ee75cb286cfe..5e291a20c03d 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -2,3 +2,7 @@ obj-$(CONFIG_ACPI_BOOT) := boot.o
2obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o 2obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
3obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o 3obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
4 4
5ifneq ($(CONFIG_ACPI_PROCESSOR),)
6obj-y += cstate.o
7endif
8
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 848bb97af7ca..b7808a89d945 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -29,6 +29,7 @@
29#include <linux/efi.h> 29#include <linux/efi.h>
30#include <linux/irq.h> 30#include <linux/irq.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/dmi.h>
32 33
33#include <asm/pgtable.h> 34#include <asm/pgtable.h>
34#include <asm/io_apic.h> 35#include <asm/io_apic.h>
@@ -158,9 +159,15 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
158#endif 159#endif
159 160
160#ifdef CONFIG_PCI_MMCONFIG 161#ifdef CONFIG_PCI_MMCONFIG
161static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) 162/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
163struct acpi_table_mcfg_config *pci_mmcfg_config;
164int pci_mmcfg_config_num;
165
166int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
162{ 167{
163 struct acpi_table_mcfg *mcfg; 168 struct acpi_table_mcfg *mcfg;
169 unsigned long i;
170 int config_size;
164 171
165 if (!phys_addr || !size) 172 if (!phys_addr || !size)
166 return -EINVAL; 173 return -EINVAL;
@@ -171,18 +178,38 @@ static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
171 return -ENODEV; 178 return -ENODEV;
172 } 179 }
173 180
174 if (mcfg->base_reserved) { 181 /* how many config structures do we have */
175 printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); 182 pci_mmcfg_config_num = 0;
183 i = size - sizeof(struct acpi_table_mcfg);
184 while (i >= sizeof(struct acpi_table_mcfg_config)) {
185 ++pci_mmcfg_config_num;
186 i -= sizeof(struct acpi_table_mcfg_config);
187 };
188 if (pci_mmcfg_config_num == 0) {
189 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
176 return -ENODEV; 190 return -ENODEV;
177 } 191 }
178 192
179 pci_mmcfg_base_addr = mcfg->base_address; 193 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
194 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
195 if (!pci_mmcfg_config) {
196 printk(KERN_WARNING PREFIX
197 "No memory for MCFG config tables\n");
198 return -ENOMEM;
199 }
200
201 memcpy(pci_mmcfg_config, &mcfg->config, config_size);
202 for (i = 0; i < pci_mmcfg_config_num; ++i) {
203 if (mcfg->config[i].base_reserved) {
204 printk(KERN_ERR PREFIX
205 "MMCONFIG not in low 4GB of memory\n");
206 return -ENODEV;
207 }
208 }
180 209
181 return 0; 210 return 0;
182} 211}
183#else 212#endif /* CONFIG_PCI_MMCONFIG */
184#define acpi_parse_mcfg NULL
185#endif /* !CONFIG_PCI_MMCONFIG */
186 213
187#ifdef CONFIG_X86_LOCAL_APIC 214#ifdef CONFIG_X86_LOCAL_APIC
188static int __init 215static int __init
@@ -506,6 +533,22 @@ acpi_unmap_lsapic(int cpu)
506EXPORT_SYMBOL(acpi_unmap_lsapic); 533EXPORT_SYMBOL(acpi_unmap_lsapic);
507#endif /* CONFIG_ACPI_HOTPLUG_CPU */ 534#endif /* CONFIG_ACPI_HOTPLUG_CPU */
508 535
536int
537acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
538{
539 /* TBD */
540 return -EINVAL;
541}
542EXPORT_SYMBOL(acpi_register_ioapic);
543
544int
545acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
546{
547 /* TBD */
548 return -EINVAL;
549}
550EXPORT_SYMBOL(acpi_unregister_ioapic);
551
509static unsigned long __init 552static unsigned long __init
510acpi_scan_rsdp ( 553acpi_scan_rsdp (
511 unsigned long start, 554 unsigned long start,
@@ -815,6 +858,219 @@ acpi_process_madt(void)
815 return; 858 return;
816} 859}
817 860
861extern int acpi_force;
862
863#ifdef __i386__
864
865#ifdef CONFIG_ACPI_PCI
866static int __init disable_acpi_irq(struct dmi_system_id *d)
867{
868 if (!acpi_force) {
869 printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
870 d->ident);
871 acpi_noirq_set();
872 }
873 return 0;
874}
875
876static int __init disable_acpi_pci(struct dmi_system_id *d)
877{
878 if (!acpi_force) {
879 printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
880 d->ident);
881 acpi_disable_pci();
882 }
883 return 0;
884}
885#endif
886
887static int __init dmi_disable_acpi(struct dmi_system_id *d)
888{
889 if (!acpi_force) {
890 printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
891 disable_acpi();
892 } else {
893 printk(KERN_NOTICE
894 "Warning: DMI blacklist says broken, but acpi forced\n");
895 }
896 return 0;
897}
898
899/*
900 * Limit ACPI to CPU enumeration for HT
901 */
902static int __init force_acpi_ht(struct dmi_system_id *d)
903{
904 if (!acpi_force) {
905 printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
906 disable_acpi();
907 acpi_ht = 1;
908 } else {
909 printk(KERN_NOTICE
910 "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
911 }
912 return 0;
913}
914
915/*
916 * If your system is blacklisted here, but you find that acpi=force
917 * works for you, please contact acpi-devel@sourceforge.net
918 */
919static struct dmi_system_id __initdata acpi_dmi_table[] = {
920 /*
921 * Boxes that need ACPI disabled
922 */
923 {
924 .callback = dmi_disable_acpi,
925 .ident = "IBM Thinkpad",
926 .matches = {
927 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
928 DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
929 },
930 },
931
932 /*
933 * Boxes that need acpi=ht
934 */
935 {
936 .callback = force_acpi_ht,
937 .ident = "FSC Primergy T850",
938 .matches = {
939 DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
940 DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
941 },
942 },
943 {
944 .callback = force_acpi_ht,
945 .ident = "DELL GX240",
946 .matches = {
947 DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
948 DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
949 },
950 },
951 {
952 .callback = force_acpi_ht,
953 .ident = "HP VISUALIZE NT Workstation",
954 .matches = {
955 DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
956 DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
957 },
958 },
959 {
960 .callback = force_acpi_ht,
961 .ident = "Compaq Workstation W8000",
962 .matches = {
963 DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
964 DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
965 },
966 },
967 {
968 .callback = force_acpi_ht,
969 .ident = "ASUS P4B266",
970 .matches = {
971 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
972 DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
973 },
974 },
975 {
976 .callback = force_acpi_ht,
977 .ident = "ASUS P2B-DS",
978 .matches = {
979 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
980 DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
981 },
982 },
983 {
984 .callback = force_acpi_ht,
985 .ident = "ASUS CUR-DLS",
986 .matches = {
987 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
988 DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
989 },
990 },
991 {
992 .callback = force_acpi_ht,
993 .ident = "ABIT i440BX-W83977",
994 .matches = {
995 DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
996 DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
997 },
998 },
999 {
1000 .callback = force_acpi_ht,
1001 .ident = "IBM Bladecenter",
1002 .matches = {
1003 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1004 DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
1005 },
1006 },
1007 {
1008 .callback = force_acpi_ht,
1009 .ident = "IBM eServer xSeries 360",
1010 .matches = {
1011 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1012 DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
1013 },
1014 },
1015 {
1016 .callback = force_acpi_ht,
1017 .ident = "IBM eserver xSeries 330",
1018 .matches = {
1019 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1020 DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
1021 },
1022 },
1023 {
1024 .callback = force_acpi_ht,
1025 .ident = "IBM eserver xSeries 440",
1026 .matches = {
1027 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1028 DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
1029 },
1030 },
1031
1032#ifdef CONFIG_ACPI_PCI
1033 /*
1034 * Boxes that need ACPI PCI IRQ routing disabled
1035 */
1036 {
1037 .callback = disable_acpi_irq,
1038 .ident = "ASUS A7V",
1039 .matches = {
1040 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
1041 DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
1042 /* newer BIOS, Revision 1011, does work */
1043 DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
1044 },
1045 },
1046
1047 /*
1048 * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
1049 */
1050 { /* _BBN 0 bug */
1051 .callback = disable_acpi_pci,
1052 .ident = "ASUS PR-DLS",
1053 .matches = {
1054 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1055 DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
1056 DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
1057 DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
1058 },
1059 },
1060 {
1061 .callback = disable_acpi_pci,
1062 .ident = "Acer TravelMate 36x Laptop",
1063 .matches = {
1064 DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
1065 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
1066 },
1067 },
1068#endif
1069 { }
1070};
1071
1072#endif /* __i386__ */
1073
818/* 1074/*
819 * acpi_boot_table_init() and acpi_boot_init() 1075 * acpi_boot_table_init() and acpi_boot_init()
820 * called from setup_arch(), always. 1076 * called from setup_arch(), always.
@@ -843,6 +1099,10 @@ acpi_boot_table_init(void)
843{ 1099{
844 int error; 1100 int error;
845 1101
1102#ifdef __i386__
1103 dmi_check_system(acpi_dmi_table);
1104#endif
1105
846 /* 1106 /*
847 * If acpi_disabled, bail out 1107 * If acpi_disabled, bail out
848 * One exception: acpi=ht continues far enough to enumerate LAPICs 1108 * One exception: acpi=ht continues far enough to enumerate LAPICs
@@ -870,8 +1130,6 @@ acpi_boot_table_init(void)
870 */ 1130 */
871 error = acpi_blacklisted(); 1131 error = acpi_blacklisted();
872 if (error) { 1132 if (error) {
873 extern int acpi_force;
874
875 if (acpi_force) { 1133 if (acpi_force) {
876 printk(KERN_WARNING PREFIX "acpi=force override\n"); 1134 printk(KERN_WARNING PREFIX "acpi=force override\n");
877 } else { 1135 } else {
@@ -907,7 +1165,6 @@ int __init acpi_boot_init(void)
907 acpi_process_madt(); 1165 acpi_process_madt();
908 1166
909 acpi_table_parse(ACPI_HPET, acpi_parse_hpet); 1167 acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
910 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
911 1168
912 return 0; 1169 return 0;
913} 1170}
diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
new file mode 100644
index 000000000000..4c3036ba65df
--- /dev/null
+++ b/arch/i386/kernel/acpi/cstate.c
@@ -0,0 +1,103 @@
1/*
2 * arch/i386/kernel/acpi/cstate.c
3 *
4 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for SMP C-states on Intel CPUs
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/acpi.h>
13
14#include <acpi/processor.h>
15#include <asm/acpi.h>
16
17static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power
18 *pow)
19{
20 struct acpi_object_list *obj_list;
21 union acpi_object *obj;
22 u32 *buf;
23
24 /* allocate and initialize pdc. It will be used later. */
25 obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
26 if (!obj_list) {
27 printk(KERN_ERR "Memory allocation error\n");
28 return;
29 }
30
31 obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
32 if (!obj) {
33 printk(KERN_ERR "Memory allocation error\n");
34 kfree(obj_list);
35 return;
36 }
37
38 buf = kmalloc(12, GFP_KERNEL);
39 if (!buf) {
40 printk(KERN_ERR "Memory allocation error\n");
41 kfree(obj);
42 kfree(obj_list);
43 return;
44 }
45
46 buf[0] = ACPI_PDC_REVISION_ID;
47 buf[1] = 1;
48 buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
49
50 obj->type = ACPI_TYPE_BUFFER;
51 obj->buffer.length = 12;
52 obj->buffer.pointer = (u8 *) buf;
53 obj_list->count = 1;
54 obj_list->pointer = obj;
55 pow->pdc = obj_list;
56
57 return;
58}
59
60/* Initialize _PDC data based on the CPU vendor */
61void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
62 unsigned int cpu)
63{
64 struct cpuinfo_x86 *c = cpu_data + cpu;
65
66 pow->pdc = NULL;
67 if (c->x86_vendor == X86_VENDOR_INTEL)
68 acpi_processor_power_init_intel_pdc(pow);
69
70 return;
71}
72
73EXPORT_SYMBOL(acpi_processor_power_init_pdc);
74
75/*
76 * Initialize bm_flags based on the CPU cache properties
77 * On SMP it depends on cache configuration
78 * - When cache is not shared among all CPUs, we flush cache
79 * before entering C3.
80 * - When cache is shared among all CPUs, we use bm_check
81 * mechanism as in UP case
82 *
83 * This routine is called only after all the CPUs are online
84 */
85void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
86 unsigned int cpu)
87{
88 struct cpuinfo_x86 *c = cpu_data + cpu;
89
90 flags->bm_check = 0;
91 if (num_online_cpus() == 1)
92 flags->bm_check = 1;
93 else if (c->x86_vendor == X86_VENDOR_INTEL) {
94 /*
95 * Today all CPUs that support C3 share cache.
96 * TBD: This needs to look at cache shared map, once
97 * multi-core detection patch makes to the base.
98 */
99 flags->bm_check = 1;
100 }
101}
102
103EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 28bb0514bb6e..c1af93032ff3 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/acpi.h> 8#include <linux/acpi.h>
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/dmi.h>
10#include <asm/smp.h> 11#include <asm/smp.h>
11#include <asm/tlbflush.h> 12#include <asm/tlbflush.h>
12 13
@@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str)
91 92
92 93
93__setup("acpi_sleep=", acpi_sleep_setup); 94__setup("acpi_sleep=", acpi_sleep_setup);
95
96
97static __init int reset_videomode_after_s3(struct dmi_system_id *d)
98{
99 acpi_video_flags |= 2;
100 return 0;
101}
102
103static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
104 { /* Reset video mode after returning from ACPI S3 sleep */
105 .callback = reset_videomode_after_s3,
106 .ident = "Toshiba Satellite 4030cdt",
107 .matches = {
108 DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
109 },
110 },
111 { }
112};
113
114static int __init acpisleep_dmi_init(void)
115{
116 dmi_check_system(acpisleep_dmi_table);
117 return 0;
118}
119
120core_initcall(acpisleep_dmi_init);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index 39d32484f6f5..44d886c745ec 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -74,8 +74,9 @@ wakeup_code:
74 movw %ax,%fs 74 movw %ax,%fs
75 movw $0x0e00 + 'i', %fs:(0x12) 75 movw $0x0e00 + 'i', %fs:(0x12)
76 76
77 # need a gdt 77 # need a gdt -- use lgdtl to force 32-bit operands, in case
78 lgdt real_save_gdt - wakeup_code 78 # the GDT is located past 16 megabytes.
79 lgdtl real_save_gdt - wakeup_code
79 80
80 movl real_save_cr0 - wakeup_code, %eax 81 movl real_save_cr0 - wakeup_code, %eax
81 movl %eax, %cr0 82 movl %eax, %cr0
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index d509836b70c3..bd1dbf3bd223 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
26#include <linux/mc146818rtc.h> 26#include <linux/mc146818rtc.h>
27#include <linux/kernel_stat.h> 27#include <linux/kernel_stat.h>
28#include <linux/sysdev.h> 28#include <linux/sysdev.h>
29#include <linux/cpu.h>
29 30
30#include <asm/atomic.h> 31#include <asm/atomic.h>
31#include <asm/smp.h> 32#include <asm/smp.h>
@@ -34,12 +35,18 @@
34#include <asm/desc.h> 35#include <asm/desc.h>
35#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
36#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/i8253.h>
37 39
38#include <mach_apic.h> 40#include <mach_apic.h>
39 41
40#include "io_ports.h" 42#include "io_ports.h"
41 43
42/* 44/*
45 * Knob to control our willingness to enable the local APIC.
46 */
47int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
48
49/*
43 * Debug level 50 * Debug level
44 */ 51 */
45int apic_verbosity; 52int apic_verbosity;
@@ -205,7 +212,7 @@ void __init connect_bsp_APIC(void)
205 enable_apic_mode(); 212 enable_apic_mode();
206} 213}
207 214
208void disconnect_bsp_APIC(void) 215void disconnect_bsp_APIC(int virt_wire_setup)
209{ 216{
210 if (pic_mode) { 217 if (pic_mode) {
211 /* 218 /*
@@ -219,6 +226,42 @@ void disconnect_bsp_APIC(void)
219 outb(0x70, 0x22); 226 outb(0x70, 0x22);
220 outb(0x00, 0x23); 227 outb(0x00, 0x23);
221 } 228 }
229 else {
230 /* Go back to Virtual Wire compatibility mode */
231 unsigned long value;
232
233 /* For the spurious interrupt use vector F, and enable it */
234 value = apic_read(APIC_SPIV);
235 value &= ~APIC_VECTOR_MASK;
236 value |= APIC_SPIV_APIC_ENABLED;
237 value |= 0xf;
238 apic_write_around(APIC_SPIV, value);
239
240 if (!virt_wire_setup) {
241 /* For LVT0 make it edge triggered, active high, external and enabled */
242 value = apic_read(APIC_LVT0);
243 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
244 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
245 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
246 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
247 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
248 apic_write_around(APIC_LVT0, value);
249 }
250 else {
251 /* Disable LVT0 */
252 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
253 }
254
255 /* For LVT1 make it edge triggered, active high, nmi and enabled */
256 value = apic_read(APIC_LVT1);
257 value &= ~(
258 APIC_MODE_MASK | APIC_SEND_PENDING |
259 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
260 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
261 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
262 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
263 apic_write_around(APIC_LVT1, value);
264 }
222} 265}
223 266
224void disable_local_APIC(void) 267void disable_local_APIC(void)
@@ -363,7 +406,7 @@ void __init init_bsp_APIC(void)
363 apic_write_around(APIC_LVT1, value); 406 apic_write_around(APIC_LVT1, value);
364} 407}
365 408
366void __init setup_local_APIC (void) 409void __devinit setup_local_APIC(void)
367{ 410{
368 unsigned long oldvalue, value, ver, maxlvt; 411 unsigned long oldvalue, value, ver, maxlvt;
369 412
@@ -634,7 +677,7 @@ static struct sys_device device_lapic = {
634 .cls = &lapic_sysclass, 677 .cls = &lapic_sysclass,
635}; 678};
636 679
637static void __init apic_pm_activate(void) 680static void __devinit apic_pm_activate(void)
638{ 681{
639 apic_pm_state.active = 1; 682 apic_pm_state.active = 1;
640} 683}
@@ -665,26 +708,6 @@ static void apic_pm_activate(void) { }
665 * Original code written by Keir Fraser. 708 * Original code written by Keir Fraser.
666 */ 709 */
667 710
668/*
669 * Knob to control our willingness to enable the local APIC.
670 */
671int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
672
673static int __init lapic_disable(char *str)
674{
675 enable_local_apic = -1;
676 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
677 return 0;
678}
679__setup("nolapic", lapic_disable);
680
681static int __init lapic_enable(char *str)
682{
683 enable_local_apic = 1;
684 return 0;
685}
686__setup("lapic", lapic_enable);
687
688static int __init apic_set_verbosity(char *str) 711static int __init apic_set_verbosity(char *str)
689{ 712{
690 if (strcmp("debug", str) == 0) 713 if (strcmp("debug", str) == 0)
@@ -855,9 +878,8 @@ fake_ioapic_page:
855 * but we do not accept timer interrupts yet. We only allow the BP 878 * but we do not accept timer interrupts yet. We only allow the BP
856 * to calibrate. 879 * to calibrate.
857 */ 880 */
858static unsigned int __init get_8254_timer_count(void) 881static unsigned int __devinit get_8254_timer_count(void)
859{ 882{
860 extern spinlock_t i8253_lock;
861 unsigned long flags; 883 unsigned long flags;
862 884
863 unsigned int count; 885 unsigned int count;
@@ -874,7 +896,7 @@ static unsigned int __init get_8254_timer_count(void)
874} 896}
875 897
876/* next tick in 8254 can be caught by catching timer wraparound */ 898/* next tick in 8254 can be caught by catching timer wraparound */
877static void __init wait_8254_wraparound(void) 899static void __devinit wait_8254_wraparound(void)
878{ 900{
879 unsigned int curr_count, prev_count; 901 unsigned int curr_count, prev_count;
880 902
@@ -894,7 +916,7 @@ static void __init wait_8254_wraparound(void)
894 * Default initialization for 8254 timers. If we use other timers like HPET, 916 * Default initialization for 8254 timers. If we use other timers like HPET,
895 * we override this later 917 * we override this later
896 */ 918 */
897void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound; 919void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
898 920
899/* 921/*
900 * This function sets up the local APIC timer, with a timeout of 922 * This function sets up the local APIC timer, with a timeout of
@@ -930,7 +952,7 @@ static void __setup_APIC_LVTT(unsigned int clocks)
930 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); 952 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
931} 953}
932 954
933static void __init setup_APIC_timer(unsigned int clocks) 955static void __devinit setup_APIC_timer(unsigned int clocks)
934{ 956{
935 unsigned long flags; 957 unsigned long flags;
936 958
@@ -1043,12 +1065,12 @@ void __init setup_boot_APIC_clock(void)
1043 local_irq_enable(); 1065 local_irq_enable();
1044} 1066}
1045 1067
1046void __init setup_secondary_APIC_clock(void) 1068void __devinit setup_secondary_APIC_clock(void)
1047{ 1069{
1048 setup_APIC_timer(calibration_result); 1070 setup_APIC_timer(calibration_result);
1049} 1071}
1050 1072
1051void __init disable_APIC_timer(void) 1073void __devinit disable_APIC_timer(void)
1052{ 1074{
1053 if (using_apic_timer) { 1075 if (using_apic_timer) {
1054 unsigned long v; 1076 unsigned long v;
@@ -1133,7 +1155,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs)
1133 } 1155 }
1134 1156
1135#ifdef CONFIG_SMP 1157#ifdef CONFIG_SMP
1136 update_process_times(user_mode(regs)); 1158 update_process_times(user_mode_vm(regs));
1137#endif 1159#endif
1138 } 1160 }
1139 1161
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 0ff65abcd56c..d7811c4e8b50 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -228,10 +228,10 @@
228#include <asm/system.h> 228#include <asm/system.h>
229#include <asm/uaccess.h> 229#include <asm/uaccess.h>
230#include <asm/desc.h> 230#include <asm/desc.h>
231#include <asm/i8253.h>
231 232
232#include "io_ports.h" 233#include "io_ports.h"
233 234
234extern spinlock_t i8253_lock;
235extern unsigned long get_cmos_time(void); 235extern unsigned long get_cmos_time(void);
236extern void machine_real_restart(unsigned char *, int); 236extern void machine_real_restart(unsigned char *, int);
237 237
@@ -346,10 +346,10 @@ extern int (*console_blank_hook)(int);
346struct apm_user { 346struct apm_user {
347 int magic; 347 int magic;
348 struct apm_user * next; 348 struct apm_user * next;
349 int suser: 1; 349 unsigned int suser: 1;
350 int writer: 1; 350 unsigned int writer: 1;
351 int reader: 1; 351 unsigned int reader: 1;
352 int suspend_wait: 1; 352 unsigned int suspend_wait: 1;
353 int suspend_result; 353 int suspend_result;
354 int suspends_pending; 354 int suspends_pending;
355 int standbys_pending; 355 int standbys_pending;
@@ -911,14 +911,7 @@ static void apm_power_off(void)
911 0xcd, 0x15 /* int $0x15 */ 911 0xcd, 0x15 /* int $0x15 */
912 }; 912 };
913 913
914 /*
915 * This may be called on an SMP machine.
916 */
917#ifdef CONFIG_SMP
918 /* Some bioses don't like being called from CPU != 0 */ 914 /* Some bioses don't like being called from CPU != 0 */
919 set_cpus_allowed(current, cpumask_of_cpu(0));
920 BUG_ON(smp_processor_id() != 0);
921#endif
922 if (apm_info.realmode_power_off) 915 if (apm_info.realmode_power_off)
923 { 916 {
924 (void)apm_save_cpus(); 917 (void)apm_save_cpus();
@@ -1168,8 +1161,7 @@ static void get_time_diff(void)
1168static void reinit_timer(void) 1161static void reinit_timer(void)
1169{ 1162{
1170#ifdef INIT_TIMER_AFTER_SUSPEND 1163#ifdef INIT_TIMER_AFTER_SUSPEND
1171 unsigned long flags; 1164 unsigned long flags;
1172 extern spinlock_t i8253_lock;
1173 1165
1174 spin_lock_irqsave(&i8253_lock, flags); 1166 spin_lock_irqsave(&i8253_lock, flags);
1175 /* set the clock to 100 Hz */ 1167 /* set the clock to 100 Hz */
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index d199e525680a..4553ffd94b1f 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
24DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); 24DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
25EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); 25EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
26 26
27static int cachesize_override __initdata = -1; 27static int cachesize_override __devinitdata = -1;
28static int disable_x86_fxsr __initdata = 0; 28static int disable_x86_fxsr __devinitdata = 0;
29static int disable_x86_serial_nr __initdata = 1; 29static int disable_x86_serial_nr __devinitdata = 1;
30 30
31struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; 31struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
32 32
@@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str)
59} 59}
60__setup("cachesize=", cachesize_setup); 60__setup("cachesize=", cachesize_setup);
61 61
62int __init get_model_name(struct cpuinfo_x86 *c) 62int __devinit get_model_name(struct cpuinfo_x86 *c)
63{ 63{
64 unsigned int *v; 64 unsigned int *v;
65 char *p, *q; 65 char *p, *q;
@@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c)
89} 89}
90 90
91 91
92void __init display_cacheinfo(struct cpuinfo_x86 *c) 92void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
93{ 93{
94 unsigned int n, dummy, ecx, edx, l2size; 94 unsigned int n, dummy, ecx, edx, l2size;
95 95
@@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c)
130/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ 130/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
131 131
132/* Look up CPU names by table lookup. */ 132/* Look up CPU names by table lookup. */
133static char __init *table_lookup_model(struct cpuinfo_x86 *c) 133static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
134{ 134{
135 struct cpu_model_info *info; 135 struct cpu_model_info *info;
136 136
@@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c)
151} 151}
152 152
153 153
154void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) 154void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
155{ 155{
156 char *v = c->x86_vendor_id; 156 char *v = c->x86_vendor_id;
157 int i; 157 int i;
@@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag)
202 202
203 203
204/* Probe for the CPUID instruction */ 204/* Probe for the CPUID instruction */
205static int __init have_cpuid_p(void) 205static int __devinit have_cpuid_p(void)
206{ 206{
207 return flag_is_changeable_p(X86_EFLAGS_ID); 207 return flag_is_changeable_p(X86_EFLAGS_ID);
208} 208}
@@ -249,7 +249,7 @@ static void __init early_cpu_detect(void)
249#endif 249#endif
250} 250}
251 251
252void __init generic_identify(struct cpuinfo_x86 * c) 252void __devinit generic_identify(struct cpuinfo_x86 * c)
253{ 253{
254 u32 tfms, xlvl; 254 u32 tfms, xlvl;
255 int junk; 255 int junk;
@@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c)
296 } 296 }
297} 297}
298 298
299static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 299static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
300{ 300{
301 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { 301 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
302 /* Disable processor serial number */ 302 /* Disable processor serial number */
@@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup);
324/* 324/*
325 * This does the hard work of actually picking apart the CPU stuff... 325 * This does the hard work of actually picking apart the CPU stuff...
326 */ 326 */
327void __init identify_cpu(struct cpuinfo_x86 *c) 327void __devinit identify_cpu(struct cpuinfo_x86 *c)
328{ 328{
329 int i; 329 int i;
330 330
@@ -432,10 +432,18 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
432#ifdef CONFIG_X86_MCE 432#ifdef CONFIG_X86_MCE
433 mcheck_init(c); 433 mcheck_init(c);
434#endif 434#endif
435 if (c == &boot_cpu_data)
436 sysenter_setup();
437 enable_sep_cpu();
438
439 if (c == &boot_cpu_data)
440 mtrr_bp_init();
441 else
442 mtrr_ap_init();
435} 443}
436 444
437#ifdef CONFIG_X86_HT 445#ifdef CONFIG_X86_HT
438void __init detect_ht(struct cpuinfo_x86 *c) 446void __devinit detect_ht(struct cpuinfo_x86 *c)
439{ 447{
440 u32 eax, ebx, ecx, edx; 448 u32 eax, ebx, ecx, edx;
441 int index_msb, tmp; 449 int index_msb, tmp;
@@ -490,7 +498,7 @@ void __init detect_ht(struct cpuinfo_x86 *c)
490} 498}
491#endif 499#endif
492 500
493void __init print_cpu_info(struct cpuinfo_x86 *c) 501void __devinit print_cpu_info(struct cpuinfo_x86 *c)
494{ 502{
495 char *vendor = NULL; 503 char *vendor = NULL;
496 504
@@ -513,7 +521,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c)
513 printk("\n"); 521 printk("\n");
514} 522}
515 523
516cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; 524cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
517 525
518/* This is hacky. :) 526/* This is hacky. :)
519 * We're emulating future behavior. 527 * We're emulating future behavior.
@@ -560,7 +568,7 @@ void __init early_cpu_init(void)
560 * and IDT. We reload them nevertheless, this function acts as a 568 * and IDT. We reload them nevertheless, this function acts as a
561 * 'CPU state barrier', nothing should get across. 569 * 'CPU state barrier', nothing should get across.
562 */ 570 */
563void __init cpu_init (void) 571void __devinit cpu_init(void)
564{ 572{
565 int cpu = smp_processor_id(); 573 int cpu = smp_processor_id();
566 struct tss_struct * t = &per_cpu(init_tss, cpu); 574 struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -635,7 +643,7 @@ void __init cpu_init (void)
635 643
636 /* Clear all 6 debug registers: */ 644 /* Clear all 6 debug registers: */
637 645
638#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); 646#define CD(register) set_debugreg(0, register)
639 647
640 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); 648 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
641 649
@@ -648,3 +656,15 @@ void __init cpu_init (void)
648 clear_used_math(); 656 clear_used_math();
649 mxcsr_feature_mask_init(); 657 mxcsr_feature_mask_init();
650} 658}
659
660#ifdef CONFIG_HOTPLUG_CPU
661void __devinit cpu_uninit(void)
662{
663 int cpu = raw_smp_processor_id();
664 cpu_clear(cpu, cpu_initialized);
665
666 /* lazy TLB state */
667 per_cpu(cpu_tlbstate, cpu).state = 0;
668 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
669}
670#endif
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 963e17aa205d..60a9e54dd20e 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -442,6 +442,13 @@ acpi_cpufreq_cpu_init (
442 (u32) data->acpi_data.states[i].transition_latency); 442 (u32) data->acpi_data.states[i].transition_latency);
443 443
444 cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); 444 cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
445
446 /*
447 * the first call to ->target() should result in us actually
448 * writing something to the appropriate registers.
449 */
450 data->resume = 1;
451
445 return (result); 452 return (result);
446 453
447 err_freqfree: 454 err_freqfree:
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
index 1a49adb1f4a6..e86ea486c311 100644
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -190,7 +190,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
190 190
191 /* detect which companion chip is used */ 191 /* detect which companion chip is used */
192 while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { 192 while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
193 if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) { 193 if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) {
194 return gx_pci; 194 return gx_pci;
195 } 195 }
196 } 196 }
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 5c530064eb74..73a5dc5b26b8 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -648,9 +648,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
648 } 648 }
649#endif 649#endif
650 650
651 if (powernow_table) 651 kfree(powernow_table);
652 kfree(powernow_table);
653
654 return 0; 652 return 0;
655} 653}
656 654
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 10cc096c0ade..ab6e0611303d 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * (c) 2003, 2004 Advanced Micro Devices, Inc. 2 * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
@@ -44,7 +44,7 @@
44 44
45#define PFX "powernow-k8: " 45#define PFX "powernow-k8: "
46#define BFX PFX "BIOS error: " 46#define BFX PFX "BIOS error: "
47#define VERSION "version 1.40.2" 47#define VERSION "version 1.50.3"
48#include "powernow-k8.h" 48#include "powernow-k8.h"
49 49
50/* serialize freq changes */ 50/* serialize freq changes */
@@ -110,14 +110,13 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
110 u32 lo, hi; 110 u32 lo, hi;
111 u32 i = 0; 111 u32 i = 0;
112 112
113 lo = MSR_S_LO_CHANGE_PENDING; 113 do {
114 while (lo & MSR_S_LO_CHANGE_PENDING) {
115 if (i++ > 0x1000000) { 114 if (i++ > 0x1000000) {
116 printk(KERN_ERR PFX "detected change pending stuck\n"); 115 printk(KERN_ERR PFX "detected change pending stuck\n");
117 return 1; 116 return 1;
118 } 117 }
119 rdmsr(MSR_FIDVID_STATUS, lo, hi); 118 rdmsr(MSR_FIDVID_STATUS, lo, hi);
120 } 119 } while (lo & MSR_S_LO_CHANGE_PENDING);
121 120
122 data->currvid = hi & MSR_S_HI_CURRENT_VID; 121 data->currvid = hi & MSR_S_HI_CURRENT_VID;
123 data->currfid = lo & MSR_S_LO_CURRENT_FID; 122 data->currfid = lo & MSR_S_LO_CURRENT_FID;
@@ -232,7 +231,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
232/* 231/*
233 * Reduce the vid by the max of step or reqvid. 232 * Reduce the vid by the max of step or reqvid.
234 * Decreasing vid codes represent increasing voltages: 233 * Decreasing vid codes represent increasing voltages:
235 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. 234 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
236 */ 235 */
237static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) 236static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
238{ 237{
@@ -467,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu)
467 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 466 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
468 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 467 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
469 ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || 468 ((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
470 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) { 469 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) {
471 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); 470 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
472 goto out; 471 goto out;
473 } 472 }
@@ -696,6 +695,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
696 695
697 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; 696 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
698 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; 697 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
698 data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
699 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; 699 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
700 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); 700 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
701 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; 701 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
@@ -735,8 +735,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
735 } 735 }
736 736
737 for (i = 0; i < data->acpi_data.state_count; i++) { 737 for (i = 0; i < data->acpi_data.state_count; i++) {
738 u32 fid = data->acpi_data.states[i].control & FID_MASK; 738 u32 fid;
739 u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; 739 u32 vid;
740
741 if (data->exttype) {
742 fid = data->acpi_data.states[i].status & FID_MASK;
743 vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK;
744 } else {
745 fid = data->acpi_data.states[i].control & FID_MASK;
746 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
747 }
740 748
741 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 749 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
742 750
@@ -753,7 +761,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
753 } 761 }
754 762
755 /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ 763 /* verify voltage is OK - BIOSs are using "off" to indicate invalid */
756 if (vid == 0x1f) { 764 if (vid == VID_OFF) {
757 dprintk("invalid vid %u, ignoring\n", vid); 765 dprintk("invalid vid %u, ignoring\n", vid);
758 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 766 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
759 continue; 767 continue;
@@ -930,15 +938,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
930 938
931 down(&fidvid_sem); 939 down(&fidvid_sem);
932 940
933 for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
934 /* make sure the sibling is initialized */
935 if (!powernow_data[i]) {
936 ret = 0;
937 up(&fidvid_sem);
938 goto err_out;
939 }
940 }
941
942 powernow_k8_acpi_pst_values(data, newstate); 941 powernow_k8_acpi_pst_values(data, newstate);
943 942
944 if (transition_frequency(data, newstate)) { 943 if (transition_frequency(data, newstate)) {
@@ -978,7 +977,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
978{ 977{
979 struct powernow_k8_data *data; 978 struct powernow_k8_data *data;
980 cpumask_t oldmask = CPU_MASK_ALL; 979 cpumask_t oldmask = CPU_MASK_ALL;
981 int rc; 980 int rc, i;
982 981
983 if (!check_supported_cpu(pol->cpu)) 982 if (!check_supported_cpu(pol->cpu))
984 return -ENODEV; 983 return -ENODEV;
@@ -1064,7 +1063,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
1064 printk("cpu_init done, current fid 0x%x, vid 0x%x\n", 1063 printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1065 data->currfid, data->currvid); 1064 data->currfid, data->currvid);
1066 1065
1067 powernow_data[pol->cpu] = data; 1066 for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
1067 powernow_data[i] = data;
1068 }
1068 1069
1069 return 0; 1070 return 0;
1070 1071
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
index 9ed5bf221cb7..b1e85bb36396 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * (c) 2003, 2004 Advanced Micro Devices, Inc. 2 * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
@@ -19,6 +19,7 @@ struct powernow_k8_data {
19 u32 vidmvs; /* usable value calculated from mvs */ 19 u32 vidmvs; /* usable value calculated from mvs */
20 u32 vstable; /* voltage stabilization time, units 20 us */ 20 u32 vstable; /* voltage stabilization time, units 20 us */
21 u32 plllock; /* pll lock time, units 1 us */ 21 u32 plllock; /* pll lock time, units 1 us */
22 u32 exttype; /* extended interface = 1 */
22 23
23 /* keep track of the current fid / vid */ 24 /* keep track of the current fid / vid */
24 u32 currvid, currfid; 25 u32 currvid, currfid;
@@ -41,7 +42,7 @@ struct powernow_k8_data {
41#define CPUID_XFAM 0x0ff00000 /* extended family */ 42#define CPUID_XFAM 0x0ff00000 /* extended family */
42#define CPUID_XFAM_K8 0 43#define CPUID_XFAM_K8 0
43#define CPUID_XMOD 0x000f0000 /* extended model */ 44#define CPUID_XMOD 0x000f0000 /* extended model */
44#define CPUID_XMOD_REV_E 0x00020000 45#define CPUID_XMOD_REV_F 0x00040000
45#define CPUID_USE_XFAM_XMOD 0x00000f00 46#define CPUID_USE_XFAM_XMOD 0x00000f00
46#define CPUID_GET_MAX_CAPABILITIES 0x80000000 47#define CPUID_GET_MAX_CAPABILITIES 0x80000000
47#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 48#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
@@ -57,25 +58,26 @@ struct powernow_k8_data {
57 58
58/* Field definitions within the FID VID Low Control MSR : */ 59/* Field definitions within the FID VID Low Control MSR : */
59#define MSR_C_LO_INIT_FID_VID 0x00010000 60#define MSR_C_LO_INIT_FID_VID 0x00010000
60#define MSR_C_LO_NEW_VID 0x00001f00 61#define MSR_C_LO_NEW_VID 0x00003f00
61#define MSR_C_LO_NEW_FID 0x0000002f 62#define MSR_C_LO_NEW_FID 0x0000003f
62#define MSR_C_LO_VID_SHIFT 8 63#define MSR_C_LO_VID_SHIFT 8
63 64
64/* Field definitions within the FID VID High Control MSR : */ 65/* Field definitions within the FID VID High Control MSR : */
65#define MSR_C_HI_STP_GNT_TO 0x000fffff 66#define MSR_C_HI_STP_GNT_TO 0x000fffff
66 67
67/* Field definitions within the FID VID Low Status MSR : */ 68/* Field definitions within the FID VID Low Status MSR : */
68#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ 69#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
69#define MSR_S_LO_MAX_RAMP_VID 0x1f000000 70#define MSR_S_LO_MAX_RAMP_VID 0x3f000000
70#define MSR_S_LO_MAX_FID 0x003f0000 71#define MSR_S_LO_MAX_FID 0x003f0000
71#define MSR_S_LO_START_FID 0x00003f00 72#define MSR_S_LO_START_FID 0x00003f00
72#define MSR_S_LO_CURRENT_FID 0x0000003f 73#define MSR_S_LO_CURRENT_FID 0x0000003f
73 74
74/* Field definitions within the FID VID High Status MSR : */ 75/* Field definitions within the FID VID High Status MSR : */
75#define MSR_S_HI_MAX_WORKING_VID 0x001f0000 76#define MSR_S_HI_MIN_WORKING_VID 0x3f000000
76#define MSR_S_HI_START_VID 0x00001f00 77#define MSR_S_HI_MAX_WORKING_VID 0x003f0000
77#define MSR_S_HI_CURRENT_VID 0x0000001f 78#define MSR_S_HI_START_VID 0x00003f00
78#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 79#define MSR_S_HI_CURRENT_VID 0x0000003f
80#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
79 81
80/* 82/*
81 * There are restrictions frequencies have to follow: 83 * There are restrictions frequencies have to follow:
@@ -99,13 +101,15 @@ struct powernow_k8_data {
99#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ 101#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */
100 102
101#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ 103#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */
102#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */ 104#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */
103 105
104#define MIN_FREQ 800 /* Min and max freqs, per spec */ 106#define MIN_FREQ 800 /* Min and max freqs, per spec */
105#define MAX_FREQ 5000 107#define MAX_FREQ 5000
106 108
107#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ 109#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */
108#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */ 110#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
111
112#define VID_OFF 0x3f
109 113
110#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ 114#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
111 115
@@ -121,12 +125,14 @@ struct powernow_k8_data {
121 125
122#define IRT_SHIFT 30 126#define IRT_SHIFT 30
123#define RVO_SHIFT 28 127#define RVO_SHIFT 28
128#define EXT_TYPE_SHIFT 27
124#define PLL_L_SHIFT 20 129#define PLL_L_SHIFT 20
125#define MVS_SHIFT 18 130#define MVS_SHIFT 18
126#define VST_SHIFT 11 131#define VST_SHIFT 11
127#define VID_SHIFT 6 132#define VID_SHIFT 6
128#define IRT_MASK 3 133#define IRT_MASK 3
129#define RVO_MASK 3 134#define RVO_MASK 3
135#define EXT_TYPE_MASK 1
130#define PLL_L_MASK 0x7f 136#define PLL_L_MASK 0x7f
131#define MVS_MASK 3 137#define MVS_MASK 3
132#define VST_MASK 0x7f 138#define VST_MASK 0x7f
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 7dcbf70fc16f..327a55d4d1c6 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -375,7 +375,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
375 arg0.buffer.pointer = (u8 *) arg0_buf; 375 arg0.buffer.pointer = (u8 *) arg0_buf;
376 arg0_buf[0] = ACPI_PDC_REVISION_ID; 376 arg0_buf[0] = ACPI_PDC_REVISION_ID;
377 arg0_buf[1] = 1; 377 arg0_buf[1] = 1;
378 arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR; 378 arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR;
379 379
380 p.pdc = &arg_list; 380 p.pdc = &arg_list;
381 381
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 121aa2176e69..a2c33c1a46c5 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -25,10 +25,10 @@ extern int trap_init_f00f_bug(void);
25/* 25/*
26 * Alignment at which movsl is preferred for bulk memory copies. 26 * Alignment at which movsl is preferred for bulk memory copies.
27 */ 27 */
28struct movsl_mask movsl_mask; 28struct movsl_mask movsl_mask __read_mostly;
29#endif 29#endif
30 30
31void __init early_intel_workaround(struct cpuinfo_x86 *c) 31void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
32{ 32{
33 if (c->x86_vendor != X86_VENDOR_INTEL) 33 if (c->x86_vendor != X86_VENDOR_INTEL)
34 return; 34 return;
@@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c)
43 * This is called before we do cpu ident work 43 * This is called before we do cpu ident work
44 */ 44 */
45 45
46int __init ppro_with_ram_bug(void) 46int __devinit ppro_with_ram_bug(void)
47{ 47{
48 /* Uses data from early_cpu_detect now */ 48 /* Uses data from early_cpu_detect now */
49 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 49 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void)
61 * P4 Xeon errata 037 workaround. 61 * P4 Xeon errata 037 workaround.
62 * Hardware prefetcher may cause stale data to be loaded into the cache. 62 * Hardware prefetcher may cause stale data to be loaded into the cache.
63 */ 63 */
64static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) 64static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
65{ 65{
66 unsigned long lo, hi; 66 unsigned long lo, hi;
67 67
@@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
80/* 80/*
81 * find out the number of processor cores on the die 81 * find out the number of processor cores on the die
82 */ 82 */
83static int __init num_cpu_cores(struct cpuinfo_x86 *c) 83static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
84{ 84{
85 unsigned int eax; 85 unsigned int eax;
86 86
@@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c)
98 return 1; 98 return 1;
99} 99}
100 100
101static void __init init_intel(struct cpuinfo_x86 *c) 101static void __devinit init_intel(struct cpuinfo_x86 *c)
102{ 102{
103 unsigned int l2 = 0; 103 unsigned int l2 = 0;
104 char *p = NULL; 104 char *p = NULL;
@@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
204 return size; 204 return size;
205} 205}
206 206
207static struct cpu_dev intel_cpu_dev __initdata = { 207static struct cpu_dev intel_cpu_dev __devinitdata = {
208 .c_vendor = "Intel", 208 .c_vendor = "Intel",
209 .c_ident = { "GenuineIntel" }, 209 .c_ident = { "GenuineIntel" },
210 .c_models = { 210 .c_models = {
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index a710dc4eb20e..6c55b50cf048 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -28,7 +28,7 @@ struct _cache_table
28}; 28};
29 29
30/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 30/* all the cache descriptor types we care about (no TLB or trace cache entries) */
31static struct _cache_table cache_table[] __initdata = 31static struct _cache_table cache_table[] __devinitdata =
32{ 32{
33 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 33 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
34 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 34 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -128,7 +128,7 @@ static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le
128 cpuid_count(4, index, &eax, &ebx, &ecx, &edx); 128 cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
129 cache_eax.full = eax; 129 cache_eax.full = eax;
130 if (cache_eax.split.type == CACHE_TYPE_NULL) 130 if (cache_eax.split.type == CACHE_TYPE_NULL)
131 return -1; 131 return -EIO; /* better error ? */
132 132
133 this_leaf->eax.full = eax; 133 this_leaf->eax.full = eax;
134 this_leaf->ebx.full = ebx; 134 this_leaf->ebx.full = ebx;
@@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void)
160 return retval; 160 return retval;
161} 161}
162 162
163unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c) 163unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
164{ 164{
165 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 165 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
166 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 166 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
@@ -334,6 +334,7 @@ static int __devinit detect_cache_attributes(unsigned int cpu)
334 struct _cpuid4_info *this_leaf; 334 struct _cpuid4_info *this_leaf;
335 unsigned long j; 335 unsigned long j;
336 int retval; 336 int retval;
337 cpumask_t oldmask;
337 338
338 if (num_cache_leaves == 0) 339 if (num_cache_leaves == 0)
339 return -ENOENT; 340 return -ENOENT;
@@ -345,19 +346,26 @@ static int __devinit detect_cache_attributes(unsigned int cpu)
345 memset(cpuid4_info[cpu], 0, 346 memset(cpuid4_info[cpu], 0,
346 sizeof(struct _cpuid4_info) * num_cache_leaves); 347 sizeof(struct _cpuid4_info) * num_cache_leaves);
347 348
349 oldmask = current->cpus_allowed;
350 retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
351 if (retval)
352 goto out;
353
348 /* Do cpuid and store the results */ 354 /* Do cpuid and store the results */
355 retval = 0;
349 for (j = 0; j < num_cache_leaves; j++) { 356 for (j = 0; j < num_cache_leaves; j++) {
350 this_leaf = CPUID4_INFO_IDX(cpu, j); 357 this_leaf = CPUID4_INFO_IDX(cpu, j);
351 retval = cpuid4_cache_lookup(j, this_leaf); 358 retval = cpuid4_cache_lookup(j, this_leaf);
352 if (unlikely(retval < 0)) 359 if (unlikely(retval < 0))
353 goto err_out; 360 break;
354 cache_shared_cpu_map_setup(cpu, j); 361 cache_shared_cpu_map_setup(cpu, j);
355 } 362 }
356 return 0; 363 set_cpus_allowed(current, oldmask);
357 364
358err_out: 365out:
359 free_cache_attributes(cpu); 366 if (retval)
360 return -ENOMEM; 367 free_cache_attributes(cpu);
368 return retval;
361} 369}
362 370
363#ifdef CONFIG_SYSFS 371#ifdef CONFIG_SYSFS
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index 8df52e86c4d2..c4abe7657397 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
69 69
70 70
71/* AMD K7 machine check is Intel like */ 71/* AMD K7 machine check is Intel like */
72void __init amd_mcheck_init(struct cpuinfo_x86 *c) 72void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
73{ 73{
74 u32 l, h; 74 u32 l, h;
75 int i; 75 int i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index bf6d1aefafc0..2cf25d2ba0f1 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -16,7 +16,7 @@
16 16
17#include "mce.h" 17#include "mce.h"
18 18
19int mce_disabled __initdata = 0; 19int mce_disabled __devinitdata = 0;
20int nr_mce_banks; 20int nr_mce_banks;
21 21
22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ 22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
31void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; 31void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
32 32
33/* This has to be run for each processor */ 33/* This has to be run for each processor */
34void __init mcheck_init(struct cpuinfo_x86 *c) 34void __devinit mcheck_init(struct cpuinfo_x86 *c)
35{ 35{
36 if (mce_disabled==1) 36 if (mce_disabled==1)
37 return; 37 return;
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 8b16ceb929b4..0abccb6fdf9e 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
78} 78}
79 79
80/* P4/Xeon Thermal regulation detect and init */ 80/* P4/Xeon Thermal regulation detect and init */
81static void __init intel_init_thermal(struct cpuinfo_x86 *c) 81static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
82{ 82{
83 u32 l, h; 83 u32 l, h;
84 unsigned int cpu = smp_processor_id(); 84 unsigned int cpu = smp_processor_id();
@@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
232} 232}
233 233
234 234
235void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c) 235void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
236{ 236{
237 u32 l, h; 237 u32 l, h;
238 int i; 238 int i;
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
index c45a1b485c80..ec0614cd2925 100644
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
29} 29}
30 30
31/* Set up machine check reporting for processors with Intel style MCE */ 31/* Set up machine check reporting for processors with Intel style MCE */
32void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c) 32void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
33{ 33{
34 u32 l, h; 34 u32 l, h;
35 35
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
index 46640f8c2494..f01b73f947e1 100644
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
80} 80}
81 81
82/* Set up machine check reporting for processors with Intel style MCE */ 82/* Set up machine check reporting for processors with Intel style MCE */
83void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c) 83void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
84{ 84{
85 u32 l, h; 85 u32 l, h;
86 int i; 86 int i;
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
index 753fa7acb984..7bae68fa168f 100644
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
23} 23}
24 24
25/* Set up machine check reporting on the Winchip C6 series */ 25/* Set up machine check reporting on the Winchip C6 series */
26void __init winchip_mcheck_init(struct cpuinfo_x86 *c) 26void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
27{ 27{
28 u32 lo, hi; 28 u32 lo, hi;
29 machine_check_vector = winchip_machine_check; 29 machine_check_vector = winchip_machine_check;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f468a979e9aa..169ac8e0db68 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -67,14 +67,6 @@ void __init get_mtrr_state(void)
67 mtrr_state.enabled = (lo & 0xc00) >> 10; 67 mtrr_state.enabled = (lo & 0xc00) >> 10;
68} 68}
69 69
70/* Free resources associated with a struct mtrr_state */
71void __init finalize_mtrr_state(void)
72{
73 if (mtrr_state.var_ranges)
74 kfree(mtrr_state.var_ranges);
75 mtrr_state.var_ranges = NULL;
76}
77
78/* Some BIOS's are fucked and don't set all MTRRs the same! */ 70/* Some BIOS's are fucked and don't set all MTRRs the same! */
79void __init mtrr_state_warn(void) 71void __init mtrr_state_warn(void)
80{ 72{
@@ -335,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
335*/ 327*/
336{ 328{
337 unsigned long flags; 329 unsigned long flags;
330 struct mtrr_var_range *vr;
331
332 vr = &mtrr_state.var_ranges[reg];
338 333
339 local_irq_save(flags); 334 local_irq_save(flags);
340 prepare_set(); 335 prepare_set();
@@ -343,11 +338,15 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
343 /* The invalid bit is kept in the mask, so we simply clear the 338 /* The invalid bit is kept in the mask, so we simply clear the
344 relevant mask register to disable a range. */ 339 relevant mask register to disable a range. */
345 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); 340 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
341 memset(vr, 0, sizeof(struct mtrr_var_range));
346 } else { 342 } else {
347 mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, 343 vr->base_lo = base << PAGE_SHIFT | type;
348 (base & size_and_mask) >> (32 - PAGE_SHIFT)); 344 vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
349 mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, 345 vr->mask_lo = -size << PAGE_SHIFT | 0x800;
350 (-size & size_and_mask) >> (32 - PAGE_SHIFT)); 346 vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
347
348 mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
349 mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
351 } 350 }
352 351
353 post_set(); 352 post_set();
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index e1c2042b9b7e..764cac64e211 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -332,6 +332,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
332 332
333 error = -EINVAL; 333 error = -EINVAL;
334 334
335 /* No CPU hotplug when we change MTRR entries */
336 lock_cpu_hotplug();
335 /* Search for existing MTRR */ 337 /* Search for existing MTRR */
336 down(&main_lock); 338 down(&main_lock);
337 for (i = 0; i < num_var_ranges; ++i) { 339 for (i = 0; i < num_var_ranges; ++i) {
@@ -372,9 +374,23 @@ int mtrr_add_page(unsigned long base, unsigned long size,
372 error = i; 374 error = i;
373 out: 375 out:
374 up(&main_lock); 376 up(&main_lock);
377 unlock_cpu_hotplug();
375 return error; 378 return error;
376} 379}
377 380
381static int mtrr_check(unsigned long base, unsigned long size)
382{
383 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
384 printk(KERN_WARNING
385 "mtrr: size and base must be multiples of 4 kiB\n");
386 printk(KERN_DEBUG
387 "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
388 dump_stack();
389 return -1;
390 }
391 return 0;
392}
393
378/** 394/**
379 * mtrr_add - Add a memory type region 395 * mtrr_add - Add a memory type region
380 * @base: Physical base address of region 396 * @base: Physical base address of region
@@ -415,11 +431,8 @@ int
415mtrr_add(unsigned long base, unsigned long size, unsigned int type, 431mtrr_add(unsigned long base, unsigned long size, unsigned int type,
416 char increment) 432 char increment)
417{ 433{
418 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 434 if (mtrr_check(base, size))
419 printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n");
420 printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
421 return -EINVAL; 435 return -EINVAL;
422 }
423 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 436 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
424 increment); 437 increment);
425} 438}
@@ -451,6 +464,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
451 return -ENXIO; 464 return -ENXIO;
452 465
453 max = num_var_ranges; 466 max = num_var_ranges;
467 /* No CPU hotplug when we change MTRR entries */
468 lock_cpu_hotplug();
454 down(&main_lock); 469 down(&main_lock);
455 if (reg < 0) { 470 if (reg < 0) {
456 /* Search for existing MTRR */ 471 /* Search for existing MTRR */
@@ -491,6 +506,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
491 error = reg; 506 error = reg;
492 out: 507 out:
493 up(&main_lock); 508 up(&main_lock);
509 unlock_cpu_hotplug();
494 return error; 510 return error;
495} 511}
496/** 512/**
@@ -511,11 +527,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
511int 527int
512mtrr_del(int reg, unsigned long base, unsigned long size) 528mtrr_del(int reg, unsigned long base, unsigned long size)
513{ 529{
514 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 530 if (mtrr_check(base, size))
515 printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
516 printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
517 return -EINVAL; 531 return -EINVAL;
518 }
519 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 532 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
520} 533}
521 534
@@ -537,21 +550,9 @@ static void __init init_ifs(void)
537 centaur_init_mtrr(); 550 centaur_init_mtrr();
538} 551}
539 552
540static void __init init_other_cpus(void) 553/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
541{ 554 * MTRR driver doesn't require this
542 if (use_intel()) 555 */
543 get_mtrr_state();
544
545 /* bring up the other processors */
546 set_mtrr(~0U,0,0,0);
547
548 if (use_intel()) {
549 finalize_mtrr_state();
550 mtrr_state_warn();
551 }
552}
553
554
555struct mtrr_value { 556struct mtrr_value {
556 mtrr_type ltype; 557 mtrr_type ltype;
557 unsigned long lbase; 558 unsigned long lbase;
@@ -604,13 +605,13 @@ static struct sysdev_driver mtrr_sysdev_driver = {
604 605
605 606
606/** 607/**
607 * mtrr_init - initialize mtrrs on the boot CPU 608 * mtrr_bp_init - initialize mtrrs on the boot CPU
608 * 609 *
609 * This needs to be called early; before any of the other CPUs are 610 * This needs to be called early; before any of the other CPUs are
610 * initialized (i.e. before smp_init()). 611 * initialized (i.e. before smp_init()).
611 * 612 *
612 */ 613 */
613static int __init mtrr_init(void) 614void __init mtrr_bp_init(void)
614{ 615{
615 init_ifs(); 616 init_ifs();
616 617
@@ -667,12 +668,48 @@ static int __init mtrr_init(void)
667 if (mtrr_if) { 668 if (mtrr_if) {
668 set_num_var_ranges(); 669 set_num_var_ranges();
669 init_table(); 670 init_table();
670 init_other_cpus(); 671 if (use_intel())
671 672 get_mtrr_state();
672 return sysdev_driver_register(&cpu_sysdev_class,
673 &mtrr_sysdev_driver);
674 } 673 }
675 return -ENXIO;
676} 674}
677 675
678subsys_initcall(mtrr_init); 676void mtrr_ap_init(void)
677{
678 unsigned long flags;
679
680 if (!mtrr_if || !use_intel())
681 return;
682 /*
683 * Ideally we should hold main_lock here to avoid mtrr entries changed,
684 * but this routine will be called in cpu boot time, holding the lock
685 * breaks it. This routine is called in two cases: 1.very earily time
686 * of software resume, when there absolutely isn't mtrr entry changes;
687 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
688 * prevent mtrr entry changes
689 */
690 local_irq_save(flags);
691
692 mtrr_if->set_all();
693
694 local_irq_restore(flags);
695}
696
697static int __init mtrr_init_finialize(void)
698{
699 if (!mtrr_if)
700 return 0;
701 if (use_intel())
702 mtrr_state_warn();
703 else {
704 /* The CPUs haven't MTRR and seemes not support SMP. They have
705 * specific drivers, we use a tricky method to support
706 * suspend/resume for them.
707 * TBD: is there any system with such CPU which supports
708 * suspend/resume? if no, we should remove the code.
709 */
710 sysdev_driver_register(&cpu_sysdev_class,
711 &mtrr_sysdev_driver);
712 }
713 return 0;
714}
715subsys_initcall(mtrr_init_finialize);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index de1351245599..99c9f2682041 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -91,7 +91,6 @@ extern struct mtrr_ops * mtrr_if;
91 91
92extern unsigned int num_var_ranges; 92extern unsigned int num_var_ranges;
93 93
94void finalize_mtrr_state(void);
95void mtrr_state_warn(void); 94void mtrr_state_warn(void);
96char *mtrr_attrib_to_str(int x); 95char *mtrr_attrib_to_str(int x);
97void mtrr_wrmsr(unsigned, unsigned, unsigned); 96void mtrr_wrmsr(unsigned, unsigned, unsigned);
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 7323c19f354e..8bd77d948a84 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -86,7 +86,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
86 seq_printf(m, "stepping\t: unknown\n"); 86 seq_printf(m, "stepping\t: unknown\n");
87 87
88 if ( cpu_has(c, X86_FEATURE_TSC) ) { 88 if ( cpu_has(c, X86_FEATURE_TSC) ) {
89 seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", 89 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
90 cpu_khz / 1000, (cpu_khz % 1000)); 90 cpu_khz / 1000, (cpu_khz % 1000));
91 } 91 }
92 92
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index f57e5ee94943..fc426380366b 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -76,6 +76,12 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
76#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) 76#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
77 if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 ) 77 if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
78 c->x86 = 6; 78 c->x86 = 6;
79
80#ifdef CONFIG_SYSCTL
81 /* randomize_va_space slows us down enormously;
82 it probably triggers retranslation of x86->native bytecode */
83 randomize_va_space = 0;
84#endif
79} 85}
80 86
81static void transmeta_identify(struct cpuinfo_x86 * c) 87static void transmeta_identify(struct cpuinfo_x86 * c)
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
new file mode 100644
index 000000000000..e5fab12f7926
--- /dev/null
+++ b/arch/i386/kernel/crash.c
@@ -0,0 +1,223 @@
1/*
2 * Architecture specific (i386) functions for kexec based crash dumps.
3 *
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/smp.h>
14#include <linux/irq.h>
15#include <linux/reboot.h>
16#include <linux/kexec.h>
17#include <linux/irq.h>
18#include <linux/delay.h>
19#include <linux/elf.h>
20#include <linux/elfcore.h>
21
22#include <asm/processor.h>
23#include <asm/hardirq.h>
24#include <asm/nmi.h>
25#include <asm/hw_irq.h>
26#include <asm/apic.h>
27#include <mach_ipi.h>
28
29
30note_buf_t crash_notes[NR_CPUS];
31/* This keeps a track of which one is crashing cpu. */
32static int crashing_cpu;
33
34static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
35 size_t data_len)
36{
37 struct elf_note note;
38
39 note.n_namesz = strlen(name) + 1;
40 note.n_descsz = data_len;
41 note.n_type = type;
42 memcpy(buf, &note, sizeof(note));
43 buf += (sizeof(note) +3)/4;
44 memcpy(buf, name, note.n_namesz);
45 buf += (note.n_namesz + 3)/4;
46 memcpy(buf, data, note.n_descsz);
47 buf += (note.n_descsz + 3)/4;
48
49 return buf;
50}
51
52static void final_note(u32 *buf)
53{
54 struct elf_note note;
55
56 note.n_namesz = 0;
57 note.n_descsz = 0;
58 note.n_type = 0;
59 memcpy(buf, &note, sizeof(note));
60}
61
62static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
63{
64 struct elf_prstatus prstatus;
65 u32 *buf;
66
67 if ((cpu < 0) || (cpu >= NR_CPUS))
68 return;
69
70 /* Using ELF notes here is opportunistic.
71 * I need a well defined structure format
72 * for the data I pass, and I need tags
73 * on the data to indicate what information I have
74 * squirrelled away. ELF notes happen to provide
75 * all of that that no need to invent something new.
76 */
77 buf = &crash_notes[cpu][0];
78 memset(&prstatus, 0, sizeof(prstatus));
79 prstatus.pr_pid = current->pid;
80 elf_core_copy_regs(&prstatus.pr_reg, regs);
81 buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
82 sizeof(prstatus));
83 final_note(buf);
84}
85
86static void crash_get_current_regs(struct pt_regs *regs)
87{
88 __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
89 __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
90 __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
91 __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
92 __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
93 __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
94 __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
95 __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
96 __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
97 __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
98 __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
99 __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
100 __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
101
102 regs->eip = (unsigned long)current_text_addr();
103}
104
105/* CPU does not save ss and esp on stack if execution is already
106 * running in kernel mode at the time of NMI occurrence. This code
107 * fixes it.
108 */
109static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
110{
111 memcpy(newregs, oldregs, sizeof(*newregs));
112 newregs->esp = (unsigned long)&(oldregs->esp);
113 __asm__ __volatile__("xorl %eax, %eax;");
114 __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
115}
116
117/* We may have saved_regs from where the error came from
118 * or it is NULL if via a direct panic().
119 */
120static void crash_save_self(struct pt_regs *saved_regs)
121{
122 struct pt_regs regs;
123 int cpu;
124
125 cpu = smp_processor_id();
126 if (saved_regs)
127 crash_setup_regs(&regs, saved_regs);
128 else
129 crash_get_current_regs(&regs);
130 crash_save_this_cpu(&regs, cpu);
131}
132
133#ifdef CONFIG_SMP
134static atomic_t waiting_for_crash_ipi;
135
136static int crash_nmi_callback(struct pt_regs *regs, int cpu)
137{
138 struct pt_regs fixed_regs;
139
140 /* Don't do anything if this handler is invoked on crashing cpu.
141 * Otherwise, system will completely hang. Crashing cpu can get
142 * an NMI if system was initially booted with nmi_watchdog parameter.
143 */
144 if (cpu == crashing_cpu)
145 return 1;
146 local_irq_disable();
147
148 if (!user_mode(regs)) {
149 crash_setup_regs(&fixed_regs, regs);
150 regs = &fixed_regs;
151 }
152 crash_save_this_cpu(regs, cpu);
153 disable_local_APIC();
154 atomic_dec(&waiting_for_crash_ipi);
155 /* Assume hlt works */
156 __asm__("hlt");
157 for(;;);
158
159 return 1;
160}
161
162/*
163 * By using the NMI code instead of a vector we just sneak thru the
164 * word generator coming out with just what we want. AND it does
165 * not matter if clustered_apic_mode is set or not.
166 */
167static void smp_send_nmi_allbutself(void)
168{
169 send_IPI_allbutself(APIC_DM_NMI);
170}
171
172static void nmi_shootdown_cpus(void)
173{
174 unsigned long msecs;
175
176 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
177 /* Would it be better to replace the trap vector here? */
178 set_nmi_callback(crash_nmi_callback);
179 /* Ensure the new callback function is set before sending
180 * out the NMI
181 */
182 wmb();
183
184 smp_send_nmi_allbutself();
185
186 msecs = 1000; /* Wait at most a second for the other cpus to stop */
187 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
188 mdelay(1);
189 msecs--;
190 }
191
192 /* Leave the nmi callback set */
193 disable_local_APIC();
194}
195#else
196static void nmi_shootdown_cpus(void)
197{
198 /* There are no cpus to shootdown */
199}
200#endif
201
202void machine_crash_shutdown(struct pt_regs *regs)
203{
204 /* This function is only called after the system
205 * has paniced or is otherwise in a critical state.
206 * The minimum amount of code to allow a kexec'd kernel
207 * to run successfully needs to happen here.
208 *
209 * In practice this means shooting down the other cpus in
210 * an SMP system.
211 */
212 /* The kernel is broken so disable interrupts */
213 local_irq_disable();
214
215 /* Make a note of crashing cpu. Will be used in NMI callback.*/
216 crashing_cpu = smp_processor_id();
217 nmi_shootdown_cpus();
218 lapic_shutdown();
219#if defined(CONFIG_X86_IO_APIC)
220 disable_IO_APIC();
221#endif
222 crash_save_self(regs);
223}
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index 6ed7e28f306c..a3cdf894302b 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -1,22 +1,15 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include <linux/kernel.h>
3#include <linux/string.h> 2#include <linux/string.h>
4#include <linux/init.h> 3#include <linux/init.h>
5#include <linux/module.h> 4#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/acpi.h>
8#include <asm/io.h>
9#include <linux/pm.h>
10#include <asm/system.h>
11#include <linux/dmi.h> 5#include <linux/dmi.h>
12#include <linux/bootmem.h> 6#include <linux/bootmem.h>
13 7
14 8
15struct dmi_header 9struct dmi_header {
16{ 10 u8 type;
17 u8 type; 11 u8 length;
18 u8 length; 12 u16 handle;
19 u16 handle;
20}; 13};
21 14
22#undef DMI_DEBUG 15#undef DMI_DEBUG
@@ -29,15 +22,13 @@ struct dmi_header
29 22
30static char * __init dmi_string(struct dmi_header *dm, u8 s) 23static char * __init dmi_string(struct dmi_header *dm, u8 s)
31{ 24{
32 u8 *bp=(u8 *)dm; 25 u8 *bp = ((u8 *) dm) + dm->length;
33 bp+=dm->length; 26
34 if(!s) 27 if (!s)
35 return ""; 28 return "";
36 s--; 29 s--;
37 while(s>0 && *bp) 30 while (s > 0 && *bp) {
38 { 31 bp += strlen(bp) + 1;
39 bp+=strlen(bp);
40 bp++;
41 s--; 32 s--;
42 } 33 }
43 return bp; 34 return bp;
@@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s)
47 * We have to be cautious here. We have seen BIOSes with DMI pointers 38 * We have to be cautious here. We have seen BIOSes with DMI pointers
48 * pointing to completely the wrong place for example 39 * pointing to completely the wrong place for example
49 */ 40 */
50 41static int __init dmi_table(u32 base, int len, int num,
51static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *)) 42 void (*decode)(struct dmi_header *))
52{ 43{
53 u8 *buf; 44 u8 *buf, *data;
54 struct dmi_header *dm; 45 int i = 0;
55 u8 *data;
56 int i=0;
57 46
58 buf = bt_ioremap(base, len); 47 buf = bt_ioremap(base, len);
59 if(buf==NULL) 48 if (buf == NULL)
60 return -1; 49 return -1;
61 50
62 data = buf; 51 data = buf;
@@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm
65 * Stop when we see all the items the table claimed to have 54 * Stop when we see all the items the table claimed to have
66 * OR we run off the end of the table (also happens) 55 * OR we run off the end of the table (also happens)
67 */ 56 */
68 57 while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
69 while(i<num && data-buf+sizeof(struct dmi_header)<=len) 58 struct dmi_header *dm = (struct dmi_header *)data;
70 {
71 dm=(struct dmi_header *)data;
72 /* 59 /*
73 * We want to know the total length (formated area and strings) 60 * We want to know the total length (formated area and strings)
74 * before decoding to make sure we won't run off the table in 61 * before decoding to make sure we won't run off the table in
75 * dmi_decode or dmi_string 62 * dmi_decode or dmi_string
76 */ 63 */
77 data+=dm->length; 64 data += dm->length;
78 while(data-buf<len-1 && (data[0] || data[1])) 65 while ((data - buf < len - 1) && (data[0] || data[1]))
79 data++; 66 data++;
80 if(data-buf<len-1) 67 if (data - buf < len - 1)
81 decode(dm); 68 decode(dm);
82 data+=2; 69 data += 2;
83 i++; 70 i++;
84 } 71 }
85 bt_iounmap(buf, len); 72 bt_iounmap(buf, len);
86 return 0; 73 return 0;
87} 74}
88 75
89 76static int __init dmi_checksum(u8 *buf)
90inline static int __init dmi_checksum(u8 *buf)
91{ 77{
92 u8 sum=0; 78 u8 sum = 0;
93 int a; 79 int a;
94 80
95 for(a=0; a<15; a++) 81 for (a = 0; a < 15; a++)
96 sum+=buf[a]; 82 sum += buf[a];
97 return (sum==0); 83
84 return sum == 0;
98} 85}
99 86
100static int __init dmi_iterate(void (*decode)(struct dmi_header *)) 87static int __init dmi_iterate(void (*decode)(struct dmi_header *))
@@ -110,28 +97,30 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *))
110 p = ioremap(0xF0000, 0x10000); 97 p = ioremap(0xF0000, 0x10000);
111 if (p == NULL) 98 if (p == NULL)
112 return -1; 99 return -1;
100
113 for (q = p; q < p + 0x10000; q += 16) { 101 for (q = p; q < p + 0x10000; q += 16) {
114 memcpy_fromio(buf, q, 15); 102 memcpy_fromio(buf, q, 15);
115 if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) 103 if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
116 { 104 u16 num = (buf[13] << 8) | buf[12];
117 u16 num=buf[13]<<8|buf[12]; 105 u16 len = (buf[7] << 8) | buf[6];
118 u16 len=buf[7]<<8|buf[6]; 106 u32 base = (buf[11] << 24) | (buf[10] << 16) |
119 u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8]; 107 (buf[9] << 8) | buf[8];
120 108
121 /* 109 /*
122 * DMI version 0.0 means that the real version is taken from 110 * DMI version 0.0 means that the real version is taken from
123 * the SMBIOS version, which we don't know at this point. 111 * the SMBIOS version, which we don't know at this point.
124 */ 112 */
125 if(buf[14]!=0) 113 if (buf[14] != 0)
126 printk(KERN_INFO "DMI %d.%d present.\n", 114 printk(KERN_INFO "DMI %d.%d present.\n",
127 buf[14]>>4, buf[14]&0x0F); 115 buf[14] >> 4, buf[14] & 0xF);
128 else 116 else
129 printk(KERN_INFO "DMI present.\n"); 117 printk(KERN_INFO "DMI present.\n");
118
130 dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", 119 dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
131 num, len)); 120 num, len));
132 dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", 121 dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
133 base)); 122
134 if(dmi_table(base,len, num, decode)==0) 123 if (dmi_table(base,len, num, decode) == 0)
135 return 0; 124 return 0;
136 } 125 }
137 } 126 }
@@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX];
143/* 132/*
144 * Save a DMI string 133 * Save a DMI string
145 */ 134 */
146
147static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) 135static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
148{ 136{
149 char *d = (char*)dm; 137 char *d = (char*)dm;
150 char *p = dmi_string(dm, d[string]); 138 char *p = dmi_string(dm, d[string]);
151 if(p==NULL || *p == 0) 139
140 if (p == NULL || *p == 0)
152 return; 141 return;
153 if (dmi_ident[slot]) 142 if (dmi_ident[slot])
154 return; 143 return;
155 dmi_ident[slot] = alloc_bootmem(strlen(p)+1); 144
145 dmi_ident[slot] = alloc_bootmem(strlen(p) + 1);
156 if(dmi_ident[slot]) 146 if(dmi_ident[slot])
157 strcpy(dmi_ident[slot], p); 147 strcpy(dmi_ident[slot], p);
158 else 148 else
@@ -160,281 +150,47 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
160} 150}
161 151
162/* 152/*
163 * Ugly compatibility crap.
164 */
165#define dmi_blacklist dmi_system_id
166#define NO_MATCH { DMI_NONE, NULL}
167#define MATCH DMI_MATCH
168
169/*
170 * Toshiba keyboard likes to repeat keys when they are not repeated.
171 */
172
173static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
174{
175 printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
176 return 0;
177}
178
179
180#ifdef CONFIG_ACPI_SLEEP
181static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
182{
183 /* See acpi_wakeup.S */
184 extern long acpi_video_flags;
185 acpi_video_flags |= 2;
186 return 0;
187}
188#endif
189
190
191#ifdef CONFIG_ACPI_BOOT
192extern int acpi_force;
193
194static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
195{
196 if (!acpi_force) {
197 printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
198 disable_acpi();
199 } else {
200 printk(KERN_NOTICE
201 "Warning: DMI blacklist says broken, but acpi forced\n");
202 }
203 return 0;
204}
205
206/*
207 * Limit ACPI to CPU enumeration for HT
208 */
209static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
210{
211 if (!acpi_force) {
212 printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
213 disable_acpi();
214 acpi_ht = 1;
215 } else {
216 printk(KERN_NOTICE
217 "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
218 }
219 return 0;
220}
221#endif
222
223#ifdef CONFIG_ACPI_PCI
224static __init int disable_acpi_irq(struct dmi_blacklist *d)
225{
226 if (!acpi_force) {
227 printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
228 d->ident);
229 acpi_noirq_set();
230 }
231 return 0;
232}
233static __init int disable_acpi_pci(struct dmi_blacklist *d)
234{
235 if (!acpi_force) {
236 printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
237 d->ident);
238 acpi_disable_pci();
239 }
240 return 0;
241}
242#endif
243
244/*
245 * Process the DMI blacklists
246 */
247
248
249/*
250 * This will be expanded over time to force things like the APM
251 * interrupt mask settings according to the laptop
252 */
253
254static __initdata struct dmi_blacklist dmi_blacklist[]={
255
256 { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
257 MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
258 NO_MATCH, NO_MATCH, NO_MATCH
259 } },
260#ifdef CONFIG_ACPI_SLEEP
261 { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
262 MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
263 NO_MATCH, NO_MATCH, NO_MATCH
264 } },
265#endif
266
267#ifdef CONFIG_ACPI_BOOT
268 /*
269 * If your system is blacklisted here, but you find that acpi=force
270 * works for you, please contact acpi-devel@sourceforge.net
271 */
272
273 /*
274 * Boxes that need ACPI disabled
275 */
276
277 { dmi_disable_acpi, "IBM Thinkpad", {
278 MATCH(DMI_BOARD_VENDOR, "IBM"),
279 MATCH(DMI_BOARD_NAME, "2629H1G"),
280 NO_MATCH, NO_MATCH }},
281
282 /*
283 * Boxes that need acpi=ht
284 */
285
286 { force_acpi_ht, "FSC Primergy T850", {
287 MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
288 MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
289 NO_MATCH, NO_MATCH }},
290
291 { force_acpi_ht, "DELL GX240", {
292 MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
293 MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
294 NO_MATCH, NO_MATCH }},
295
296 { force_acpi_ht, "HP VISUALIZE NT Workstation", {
297 MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
298 MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
299 NO_MATCH, NO_MATCH }},
300
301 { force_acpi_ht, "Compaq Workstation W8000", {
302 MATCH(DMI_SYS_VENDOR, "Compaq"),
303 MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
304 NO_MATCH, NO_MATCH }},
305
306 { force_acpi_ht, "ASUS P4B266", {
307 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
308 MATCH(DMI_BOARD_NAME, "P4B266"),
309 NO_MATCH, NO_MATCH }},
310
311 { force_acpi_ht, "ASUS P2B-DS", {
312 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
313 MATCH(DMI_BOARD_NAME, "P2B-DS"),
314 NO_MATCH, NO_MATCH }},
315
316 { force_acpi_ht, "ASUS CUR-DLS", {
317 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
318 MATCH(DMI_BOARD_NAME, "CUR-DLS"),
319 NO_MATCH, NO_MATCH }},
320
321 { force_acpi_ht, "ABIT i440BX-W83977", {
322 MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
323 MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
324 NO_MATCH, NO_MATCH }},
325
326 { force_acpi_ht, "IBM Bladecenter", {
327 MATCH(DMI_BOARD_VENDOR, "IBM"),
328 MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
329 NO_MATCH, NO_MATCH }},
330
331 { force_acpi_ht, "IBM eServer xSeries 360", {
332 MATCH(DMI_BOARD_VENDOR, "IBM"),
333 MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
334 NO_MATCH, NO_MATCH }},
335
336 { force_acpi_ht, "IBM eserver xSeries 330", {
337 MATCH(DMI_BOARD_VENDOR, "IBM"),
338 MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
339 NO_MATCH, NO_MATCH }},
340
341 { force_acpi_ht, "IBM eserver xSeries 440", {
342 MATCH(DMI_BOARD_VENDOR, "IBM"),
343 MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
344 NO_MATCH, NO_MATCH }},
345
346#endif // CONFIG_ACPI_BOOT
347
348#ifdef CONFIG_ACPI_PCI
349 /*
350 * Boxes that need ACPI PCI IRQ routing disabled
351 */
352
353 { disable_acpi_irq, "ASUS A7V", {
354 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
355 MATCH(DMI_BOARD_NAME, "<A7V>"),
356 /* newer BIOS, Revision 1011, does work */
357 MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
358 NO_MATCH }},
359
360 /*
361 * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
362 */
363 { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
364 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
365 MATCH(DMI_BOARD_NAME, "PR-DLS"),
366 MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
367 MATCH(DMI_BIOS_DATE, "03/21/2003") }},
368
369 { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
370 MATCH(DMI_SYS_VENDOR, "Acer"),
371 MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
372 NO_MATCH, NO_MATCH
373 } },
374
375#endif
376
377 { NULL, }
378};
379
380/*
381 * Process a DMI table entry. Right now all we care about are the BIOS 153 * Process a DMI table entry. Right now all we care about are the BIOS
382 * and machine entries. For 2.5 we should pull the smbus controller info 154 * and machine entries. For 2.5 we should pull the smbus controller info
383 * out of here. 155 * out of here.
384 */ 156 */
385
386static void __init dmi_decode(struct dmi_header *dm) 157static void __init dmi_decode(struct dmi_header *dm)
387{ 158{
388#ifdef DMI_DEBUG 159 u8 *data __attribute__((__unused__)) = (u8 *)dm;
389 u8 *data = (u8 *)dm;
390#endif
391 160
392 switch(dm->type) 161 switch(dm->type) {
393 { 162 case 0:
394 case 0: 163 dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
395 dmi_printk(("BIOS Vendor: %s\n", 164 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
396 dmi_string(dm, data[4]))); 165 dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
397 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); 166 dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
398 dmi_printk(("BIOS Version: %s\n", 167 dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
399 dmi_string(dm, data[5]))); 168 dmi_save_ident(dm, DMI_BIOS_DATE, 8);
400 dmi_save_ident(dm, DMI_BIOS_VERSION, 5); 169 break;
401 dmi_printk(("BIOS Release: %s\n", 170 case 1:
402 dmi_string(dm, data[8]))); 171 dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
403 dmi_save_ident(dm, DMI_BIOS_DATE, 8); 172 dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
404 break; 173 dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
405 case 1: 174 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
406 dmi_printk(("System Vendor: %s\n", 175 dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
407 dmi_string(dm, data[4]))); 176 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
408 dmi_save_ident(dm, DMI_SYS_VENDOR, 4); 177 dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
409 dmi_printk(("Product Name: %s\n", 178 dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
410 dmi_string(dm, data[5]))); 179 break;
411 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); 180 case 2:
412 dmi_printk(("Version: %s\n", 181 dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
413 dmi_string(dm, data[6]))); 182 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
414 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); 183 dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
415 dmi_printk(("Serial Number: %s\n", 184 dmi_save_ident(dm, DMI_BOARD_NAME, 5);
416 dmi_string(dm, data[7]))); 185 dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
417 break; 186 dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
418 case 2: 187 break;
419 dmi_printk(("Board Vendor: %s\n",
420 dmi_string(dm, data[4])));
421 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
422 dmi_printk(("Board Name: %s\n",
423 dmi_string(dm, data[5])));
424 dmi_save_ident(dm, DMI_BOARD_NAME, 5);
425 dmi_printk(("Board Version: %s\n",
426 dmi_string(dm, data[6])));
427 dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
428 break;
429 } 188 }
430} 189}
431 190
432void __init dmi_scan_machine(void) 191void __init dmi_scan_machine(void)
433{ 192{
434 int err = dmi_iterate(dmi_decode); 193 if (dmi_iterate(dmi_decode))
435 if(err == 0)
436 dmi_check_system(dmi_blacklist);
437 else
438 printk(KERN_INFO "DMI not present.\n"); 194 printk(KERN_INFO "DMI not present.\n");
439} 195}
440 196
@@ -470,7 +226,6 @@ fail: d++;
470 226
471 return count; 227 return count;
472} 228}
473
474EXPORT_SYMBOL(dmi_check_system); 229EXPORT_SYMBOL(dmi_check_system);
475 230
476/** 231/**
@@ -480,8 +235,8 @@ EXPORT_SYMBOL(dmi_check_system);
480 * Returns one DMI data value, can be used to perform 235 * Returns one DMI data value, can be used to perform
481 * complex DMI data checks. 236 * complex DMI data checks.
482 */ 237 */
483char * dmi_get_system_info(int field) 238char *dmi_get_system_info(int field)
484{ 239{
485 return dmi_ident[field]; 240 return dmi_ident[field];
486} 241}
487 242EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index f732f427b418..385883ea8c19 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -30,6 +30,7 @@
30#include <linux/ioport.h> 30#include <linux/ioport.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/efi.h> 32#include <linux/efi.h>
33#include <linux/kexec.h>
33 34
34#include <asm/setup.h> 35#include <asm/setup.h>
35#include <asm/io.h> 36#include <asm/io.h>
@@ -598,6 +599,9 @@ efi_initialize_iomem_resources(struct resource *code_resource,
598 if (md->type == EFI_CONVENTIONAL_MEMORY) { 599 if (md->type == EFI_CONVENTIONAL_MEMORY) {
599 request_resource(res, code_resource); 600 request_resource(res, code_resource);
600 request_resource(res, data_resource); 601 request_resource(res, data_resource);
602#ifdef CONFIG_KEXEC
603 request_resource(res, &crashk_res);
604#endif
601 } 605 }
602 } 606 }
603} 607}
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index e966fc8c44c4..4477bb107098 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP
299 movl %eax,%cr0 299 movl %eax,%cr0
300 300
301 call check_x87 301 call check_x87
302 incb ready
303 lgdt cpu_gdt_descr 302 lgdt cpu_gdt_descr
304 lidt idt_descr 303 lidt idt_descr
305 ljmp $(__KERNEL_CS),$1f 304 ljmp $(__KERNEL_CS),$1f
@@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP
316 lldt %ax 315 lldt %ax
317 cld # gcc2 wants the direction flag cleared at all times 316 cld # gcc2 wants the direction flag cleared at all times
318#ifdef CONFIG_SMP 317#ifdef CONFIG_SMP
319 movb ready, %cl 318 movb ready, %cl
320 cmpb $1,%cl 319 movb $1, ready
320 cmpb $0,%cl
321 je 1f # the first CPU calls start_kernel 321 je 1f # the first CPU calls start_kernel
322 # all other CPUs call initialize_secondary 322 # all other CPUs call initialize_secondary
323 call initialize_secondary 323 call initialize_secondary
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 903190a4b3ff..180f070d03cb 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -1,97 +1,17 @@
1#include <linux/config.h> 1#include <linux/config.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/smp.h>
4#include <linux/user.h>
5#include <linux/elfcore.h>
6#include <linux/mca.h>
7#include <linux/sched.h>
8#include <linux/in6.h>
9#include <linux/interrupt.h>
10#include <linux/smp_lock.h>
11#include <linux/pm.h>
12#include <linux/pci.h>
13#include <linux/apm_bios.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/tty.h>
17#include <linux/highmem.h>
18#include <linux/time.h>
19
20#include <asm/semaphore.h>
21#include <asm/processor.h>
22#include <asm/i387.h>
23#include <asm/uaccess.h>
24#include <asm/checksum.h> 3#include <asm/checksum.h>
25#include <asm/io.h>
26#include <asm/delay.h>
27#include <asm/irq.h>
28#include <asm/mmx.h>
29#include <asm/desc.h> 4#include <asm/desc.h>
30#include <asm/pgtable.h>
31#include <asm/tlbflush.h>
32#include <asm/nmi.h>
33#include <asm/ist.h>
34#include <asm/kdebug.h>
35
36extern void dump_thread(struct pt_regs *, struct user *);
37extern spinlock_t rtc_lock;
38 5
39/* This is definitely a GPL-only symbol */ 6/* This is definitely a GPL-only symbol */
40EXPORT_SYMBOL_GPL(cpu_gdt_table); 7EXPORT_SYMBOL_GPL(cpu_gdt_table);
41 8
42#if defined(CONFIG_APM_MODULE)
43extern void machine_real_restart(unsigned char *, int);
44EXPORT_SYMBOL(machine_real_restart);
45extern void default_idle(void);
46EXPORT_SYMBOL(default_idle);
47#endif
48
49#ifdef CONFIG_SMP
50extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
51extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
52#endif
53
54#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
55extern struct drive_info_struct drive_info;
56EXPORT_SYMBOL(drive_info);
57#endif
58
59extern unsigned long cpu_khz;
60extern unsigned long get_cmos_time(void);
61
62/* platform dependent support */
63EXPORT_SYMBOL(boot_cpu_data);
64#ifdef CONFIG_DISCONTIGMEM
65EXPORT_SYMBOL(node_data);
66EXPORT_SYMBOL(physnode_map);
67#endif
68#ifdef CONFIG_X86_NUMAQ
69EXPORT_SYMBOL(xquad_portio);
70#endif
71EXPORT_SYMBOL(dump_thread);
72EXPORT_SYMBOL(dump_fpu);
73EXPORT_SYMBOL_GPL(kernel_fpu_begin);
74EXPORT_SYMBOL(__ioremap);
75EXPORT_SYMBOL(ioremap_nocache);
76EXPORT_SYMBOL(iounmap);
77EXPORT_SYMBOL(kernel_thread);
78EXPORT_SYMBOL(pm_idle);
79EXPORT_SYMBOL(pm_power_off);
80EXPORT_SYMBOL(get_cmos_time);
81EXPORT_SYMBOL(cpu_khz);
82EXPORT_SYMBOL(apm_info);
83
84EXPORT_SYMBOL(__down_failed); 9EXPORT_SYMBOL(__down_failed);
85EXPORT_SYMBOL(__down_failed_interruptible); 10EXPORT_SYMBOL(__down_failed_interruptible);
86EXPORT_SYMBOL(__down_failed_trylock); 11EXPORT_SYMBOL(__down_failed_trylock);
87EXPORT_SYMBOL(__up_wakeup); 12EXPORT_SYMBOL(__up_wakeup);
88/* Networking helper routines. */ 13/* Networking helper routines. */
89EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
90/* Delay loops */
91EXPORT_SYMBOL(__ndelay);
92EXPORT_SYMBOL(__udelay);
93EXPORT_SYMBOL(__delay);
94EXPORT_SYMBOL(__const_udelay);
95 15
96EXPORT_SYMBOL(__get_user_1); 16EXPORT_SYMBOL(__get_user_1);
97EXPORT_SYMBOL(__get_user_2); 17EXPORT_SYMBOL(__get_user_2);
@@ -105,87 +25,11 @@ EXPORT_SYMBOL(__put_user_8);
105EXPORT_SYMBOL(strpbrk); 25EXPORT_SYMBOL(strpbrk);
106EXPORT_SYMBOL(strstr); 26EXPORT_SYMBOL(strstr);
107 27
108EXPORT_SYMBOL(strncpy_from_user);
109EXPORT_SYMBOL(__strncpy_from_user);
110EXPORT_SYMBOL(clear_user);
111EXPORT_SYMBOL(__clear_user);
112EXPORT_SYMBOL(__copy_from_user_ll);
113EXPORT_SYMBOL(__copy_to_user_ll);
114EXPORT_SYMBOL(strnlen_user);
115
116EXPORT_SYMBOL(dma_alloc_coherent);
117EXPORT_SYMBOL(dma_free_coherent);
118
119#ifdef CONFIG_PCI
120EXPORT_SYMBOL(pci_mem_start);
121#endif
122
123#ifdef CONFIG_PCI_BIOS
124EXPORT_SYMBOL(pcibios_set_irq_routing);
125EXPORT_SYMBOL(pcibios_get_irq_routing_table);
126#endif
127
128#ifdef CONFIG_X86_USE_3DNOW
129EXPORT_SYMBOL(_mmx_memcpy);
130EXPORT_SYMBOL(mmx_clear_page);
131EXPORT_SYMBOL(mmx_copy_page);
132#endif
133
134#ifdef CONFIG_X86_HT
135EXPORT_SYMBOL(smp_num_siblings);
136EXPORT_SYMBOL(cpu_sibling_map);
137#endif
138
139#ifdef CONFIG_SMP 28#ifdef CONFIG_SMP
140EXPORT_SYMBOL(cpu_data); 29extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
141EXPORT_SYMBOL(cpu_online_map); 30extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
142EXPORT_SYMBOL(cpu_callout_map);
143EXPORT_SYMBOL(__write_lock_failed); 31EXPORT_SYMBOL(__write_lock_failed);
144EXPORT_SYMBOL(__read_lock_failed); 32EXPORT_SYMBOL(__read_lock_failed);
145
146/* Global SMP stuff */
147EXPORT_SYMBOL(smp_call_function);
148
149/* TLB flushing */
150EXPORT_SYMBOL(flush_tlb_page);
151#endif
152
153#ifdef CONFIG_X86_IO_APIC
154EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
155#endif
156
157#ifdef CONFIG_MCA
158EXPORT_SYMBOL(machine_id);
159#endif
160
161#ifdef CONFIG_VT
162EXPORT_SYMBOL(screen_info);
163#endif
164
165EXPORT_SYMBOL(get_wchan);
166
167EXPORT_SYMBOL(rtc_lock);
168
169EXPORT_SYMBOL_GPL(set_nmi_callback);
170EXPORT_SYMBOL_GPL(unset_nmi_callback);
171
172EXPORT_SYMBOL(register_die_notifier);
173#ifdef CONFIG_HAVE_DEC_LOCK
174EXPORT_SYMBOL(_atomic_dec_and_lock);
175#endif
176
177EXPORT_SYMBOL(__PAGE_KERNEL);
178
179#ifdef CONFIG_HIGHMEM
180EXPORT_SYMBOL(kmap);
181EXPORT_SYMBOL(kunmap);
182EXPORT_SYMBOL(kmap_atomic);
183EXPORT_SYMBOL(kunmap_atomic);
184EXPORT_SYMBOL(kmap_atomic_to_page);
185#endif
186
187#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
188EXPORT_SYMBOL(ist_info);
189#endif 33#endif
190 34
191EXPORT_SYMBOL(csum_partial); 35EXPORT_SYMBOL(csum_partial);
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
index c55e037f08f7..d75524758daf 100644
--- a/arch/i386/kernel/i387.c
+++ b/arch/i386/kernel/i387.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/config.h> 11#include <linux/config.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/module.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
14#include <asm/i387.h> 15#include <asm/i387.h>
15#include <asm/math_emu.h> 16#include <asm/math_emu.h>
@@ -79,17 +80,7 @@ void kernel_fpu_begin(void)
79 } 80 }
80 clts(); 81 clts();
81} 82}
82 83EXPORT_SYMBOL_GPL(kernel_fpu_begin);
83void restore_fpu( struct task_struct *tsk )
84{
85 if ( cpu_has_fxsr ) {
86 asm volatile( "fxrstor %0"
87 : : "m" (tsk->thread.i387.fxsave) );
88 } else {
89 asm volatile( "frstor %0"
90 : : "m" (tsk->thread.i387.fsave) );
91 }
92}
93 84
94/* 85/*
95 * FPU tag word conversions. 86 * FPU tag word conversions.
@@ -526,6 +517,7 @@ int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
526 517
527 return fpvalid; 518 return fpvalid;
528} 519}
520EXPORT_SYMBOL(dump_fpu);
529 521
530int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) 522int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
531{ 523{
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 2c4813b47e57..178f4e9bac9d 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -268,10 +268,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
268 return 0; 268 return 0;
269} 269}
270 270
271static int i8259A_shutdown(struct sys_device *dev)
272{
273 /* Put the i8259A into a quiescent state that
274 * the kernel initialization code can get it
275 * out of.
276 */
277 outb(0xff, 0x21); /* mask all of 8259A-1 */
278 outb(0xff, 0xA1); /* mask all of 8259A-1 */
279 return 0;
280}
281
271static struct sysdev_class i8259_sysdev_class = { 282static struct sysdev_class i8259_sysdev_class = {
272 set_kset_name("i8259"), 283 set_kset_name("i8259"),
273 .suspend = i8259A_suspend, 284 .suspend = i8259A_suspend,
274 .resume = i8259A_resume, 285 .resume = i8259A_resume,
286 .shutdown = i8259A_shutdown,
275}; 287};
276 288
277static struct sys_device device_i8259A = { 289static struct sys_device device_i8259A = {
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 7a324e8b86f9..6578f40bd501 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -31,12 +31,13 @@
31#include <linux/mc146818rtc.h> 31#include <linux/mc146818rtc.h>
32#include <linux/compiler.h> 32#include <linux/compiler.h>
33#include <linux/acpi.h> 33#include <linux/acpi.h>
34 34#include <linux/module.h>
35#include <linux/sysdev.h> 35#include <linux/sysdev.h>
36#include <asm/io.h> 36#include <asm/io.h>
37#include <asm/smp.h> 37#include <asm/smp.h>
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40#include <asm/i8259.h>
40 41
41#include <mach_apic.h> 42#include <mach_apic.h>
42 43
@@ -573,12 +574,14 @@ static int balanced_irq(void *unused)
573 for ( ; ; ) { 574 for ( ; ; ) {
574 set_current_state(TASK_INTERRUPTIBLE); 575 set_current_state(TASK_INTERRUPTIBLE);
575 time_remaining = schedule_timeout(time_remaining); 576 time_remaining = schedule_timeout(time_remaining);
576 try_to_freeze(PF_FREEZE); 577 try_to_freeze();
577 if (time_after(jiffies, 578 if (time_after(jiffies,
578 prev_balance_time+balanced_irq_interval)) { 579 prev_balance_time+balanced_irq_interval)) {
580 preempt_disable();
579 do_irq_balance(); 581 do_irq_balance();
580 prev_balance_time = jiffies; 582 prev_balance_time = jiffies;
581 time_remaining = balanced_irq_interval; 583 time_remaining = balanced_irq_interval;
584 preempt_enable();
582 } 585 }
583 } 586 }
584 return 0; 587 return 0;
@@ -630,10 +633,8 @@ static int __init balanced_irq_init(void)
630 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); 633 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
631failed: 634failed:
632 for (i = 0; i < NR_CPUS; i++) { 635 for (i = 0; i < NR_CPUS; i++) {
633 if(irq_cpu_data[i].irq_delta) 636 kfree(irq_cpu_data[i].irq_delta);
634 kfree(irq_cpu_data[i].irq_delta); 637 kfree(irq_cpu_data[i].last_irq);
635 if(irq_cpu_data[i].last_irq)
636 kfree(irq_cpu_data[i].last_irq);
637 } 638 }
638 return 0; 639 return 0;
639} 640}
@@ -812,6 +813,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
812 } 813 }
813 return best_guess; 814 return best_guess;
814} 815}
816EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
815 817
816/* 818/*
817 * This function currently is only a helper for the i386 smp boot process where 819 * This function currently is only a helper for the i386 smp boot process where
@@ -1565,7 +1567,6 @@ void print_all_local_APICs (void)
1565 1567
1566void /*__init*/ print_PIC(void) 1568void /*__init*/ print_PIC(void)
1567{ 1569{
1568 extern spinlock_t i8259A_lock;
1569 unsigned int v; 1570 unsigned int v;
1570 unsigned long flags; 1571 unsigned long flags;
1571 1572
@@ -1633,12 +1634,43 @@ static void __init enable_IO_APIC(void)
1633 */ 1634 */
1634void disable_IO_APIC(void) 1635void disable_IO_APIC(void)
1635{ 1636{
1637 int pin;
1636 /* 1638 /*
1637 * Clear the IO-APIC before rebooting: 1639 * Clear the IO-APIC before rebooting:
1638 */ 1640 */
1639 clear_IO_APIC(); 1641 clear_IO_APIC();
1640 1642
1641 disconnect_bsp_APIC(); 1643 /*
1644 * If the i82559 is routed through an IOAPIC
1645 * Put that IOAPIC in virtual wire mode
1646 * so legacy interrups can be delivered.
1647 */
1648 pin = find_isa_irq_pin(0, mp_ExtINT);
1649 if (pin != -1) {
1650 struct IO_APIC_route_entry entry;
1651 unsigned long flags;
1652
1653 memset(&entry, 0, sizeof(entry));
1654 entry.mask = 0; /* Enabled */
1655 entry.trigger = 0; /* Edge */
1656 entry.irr = 0;
1657 entry.polarity = 0; /* High */
1658 entry.delivery_status = 0;
1659 entry.dest_mode = 0; /* Physical */
1660 entry.delivery_mode = 7; /* ExtInt */
1661 entry.vector = 0;
1662 entry.dest.physical.physical_dest = 0;
1663
1664
1665 /*
1666 * Add it to the IO-APIC irq-routing table:
1667 */
1668 spin_lock_irqsave(&ioapic_lock, flags);
1669 io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
1670 io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
1671 spin_unlock_irqrestore(&ioapic_lock, flags);
1672 }
1673 disconnect_bsp_APIC(pin != -1);
1642} 1674}
1643 1675
1644/* 1676/*
@@ -1659,6 +1691,12 @@ static void __init setup_ioapic_ids_from_mpc(void)
1659 unsigned long flags; 1691 unsigned long flags;
1660 1692
1661 /* 1693 /*
1694 * Don't check I/O APIC IDs for xAPIC systems. They have
1695 * no meaning without the serial APIC bus.
1696 */
1697 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
1698 return;
1699 /*
1662 * This is broken; anything with a real cpu count has to 1700 * This is broken; anything with a real cpu count has to
1663 * circumvent this idiocy regardless. 1701 * circumvent this idiocy regardless.
1664 */ 1702 */
@@ -1684,10 +1722,6 @@ static void __init setup_ioapic_ids_from_mpc(void)
1684 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; 1722 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
1685 } 1723 }
1686 1724
1687 /* Don't check I/O APIC IDs for some xAPIC systems. They have
1688 * no meaning without the serial APIC bus. */
1689 if (NO_IOAPIC_CHECK)
1690 continue;
1691 /* 1725 /*
1692 * Sanity check, is the ID really free? Every APIC in a 1726 * Sanity check, is the ID really free? Every APIC in a
1693 * system must have a unique ID or we get lots of nice 1727 * system must have a unique ID or we get lots of nice
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 73945a3c53c4..ce66dcc26d90 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,9 @@
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/notifier.h>
19#include <linux/cpu.h>
20#include <linux/delay.h>
18 21
19DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; 22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
20EXPORT_PER_CPU_SYMBOL(irq_stat); 23EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -153,6 +156,11 @@ void irq_ctx_init(int cpu)
153 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); 156 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
154} 157}
155 158
159void irq_ctx_exit(int cpu)
160{
161 hardirq_ctx[cpu] = NULL;
162}
163
156extern asmlinkage void __do_softirq(void); 164extern asmlinkage void __do_softirq(void);
157 165
158asmlinkage void do_softirq(void) 166asmlinkage void do_softirq(void)
@@ -210,9 +218,8 @@ int show_interrupts(struct seq_file *p, void *v)
210 218
211 if (i == 0) { 219 if (i == 0) {
212 seq_printf(p, " "); 220 seq_printf(p, " ");
213 for (j=0; j<NR_CPUS; j++) 221 for_each_cpu(j)
214 if (cpu_online(j)) 222 seq_printf(p, "CPU%d ",j);
215 seq_printf(p, "CPU%d ",j);
216 seq_putc(p, '\n'); 223 seq_putc(p, '\n');
217 } 224 }
218 225
@@ -225,9 +232,8 @@ int show_interrupts(struct seq_file *p, void *v)
225#ifndef CONFIG_SMP 232#ifndef CONFIG_SMP
226 seq_printf(p, "%10u ", kstat_irqs(i)); 233 seq_printf(p, "%10u ", kstat_irqs(i));
227#else 234#else
228 for (j = 0; j < NR_CPUS; j++) 235 for_each_cpu(j)
229 if (cpu_online(j)) 236 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
230 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
231#endif 237#endif
232 seq_printf(p, " %14s", irq_desc[i].handler->typename); 238 seq_printf(p, " %14s", irq_desc[i].handler->typename);
233 seq_printf(p, " %s", action->name); 239 seq_printf(p, " %s", action->name);
@@ -240,16 +246,14 @@ skip:
240 spin_unlock_irqrestore(&irq_desc[i].lock, flags); 246 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
241 } else if (i == NR_IRQS) { 247 } else if (i == NR_IRQS) {
242 seq_printf(p, "NMI: "); 248 seq_printf(p, "NMI: ");
243 for (j = 0; j < NR_CPUS; j++) 249 for_each_cpu(j)
244 if (cpu_online(j)) 250 seq_printf(p, "%10u ", nmi_count(j));
245 seq_printf(p, "%10u ", nmi_count(j));
246 seq_putc(p, '\n'); 251 seq_putc(p, '\n');
247#ifdef CONFIG_X86_LOCAL_APIC 252#ifdef CONFIG_X86_LOCAL_APIC
248 seq_printf(p, "LOC: "); 253 seq_printf(p, "LOC: ");
249 for (j = 0; j < NR_CPUS; j++) 254 for_each_cpu(j)
250 if (cpu_online(j)) 255 seq_printf(p, "%10u ",
251 seq_printf(p, "%10u ", 256 per_cpu(irq_stat,j).apic_timer_irqs);
252 per_cpu(irq_stat,j).apic_timer_irqs);
253 seq_putc(p, '\n'); 257 seq_putc(p, '\n');
254#endif 258#endif
255 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 259 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +263,45 @@ skip:
259 } 263 }
260 return 0; 264 return 0;
261} 265}
266
267#ifdef CONFIG_HOTPLUG_CPU
268#include <mach_apic.h>
269
270void fixup_irqs(cpumask_t map)
271{
272 unsigned int irq;
273 static int warned;
274
275 for (irq = 0; irq < NR_IRQS; irq++) {
276 cpumask_t mask;
277 if (irq == 2)
278 continue;
279
280 cpus_and(mask, irq_affinity[irq], map);
281 if (any_online_cpu(mask) == NR_CPUS) {
282 printk("Breaking affinity for irq %i\n", irq);
283 mask = map;
284 }
285 if (irq_desc[irq].handler->set_affinity)
286 irq_desc[irq].handler->set_affinity(irq, mask);
287 else if (irq_desc[irq].action && !(warned++))
288 printk("Cannot set affinity for irq %i\n", irq);
289 }
290
291#if 0
292 barrier();
293 /* Ingo Molnar says: "after the IO-APIC masks have been redirected
294 [note the nop - the interrupt-enable boundary on x86 is two
295 instructions from sti] - to flush out pending hardirqs and
296 IPIs. After this point nothing is supposed to reach this CPU." */
297 __asm__ __volatile__("sti; nop; cli");
298 barrier();
299#else
300 /* That doesn't seem sufficient. Give it 1ms. */
301 local_irq_enable();
302 mdelay(1);
303 local_irq_disable();
304#endif
305}
306#endif
307
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 59ff9b455069..a6d8c45961d3 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -23,6 +23,9 @@
23 * Rusty Russell). 23 * Rusty Russell).
24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
25 * interface to access function arguments. 25 * interface to access function arguments.
26 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
27 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
28 * <prasanna@in.ibm.com> added function-return probes.
26 */ 29 */
27 30
28#include <linux/config.h> 31#include <linux/config.h>
@@ -30,15 +33,14 @@
30#include <linux/ptrace.h> 33#include <linux/ptrace.h>
31#include <linux/spinlock.h> 34#include <linux/spinlock.h>
32#include <linux/preempt.h> 35#include <linux/preempt.h>
36#include <asm/cacheflush.h>
33#include <asm/kdebug.h> 37#include <asm/kdebug.h>
34#include <asm/desc.h> 38#include <asm/desc.h>
35 39
36/* kprobe_status settings */
37#define KPROBE_HIT_ACTIVE 0x00000001
38#define KPROBE_HIT_SS 0x00000002
39
40static struct kprobe *current_kprobe; 40static struct kprobe *current_kprobe;
41static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; 41static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
42static struct kprobe *kprobe_prev;
43static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
42static struct pt_regs jprobe_saved_regs; 44static struct pt_regs jprobe_saved_regs;
43static long *jprobe_saved_esp; 45static long *jprobe_saved_esp;
44/* copy of the kernel stack at the probe fire time */ 46/* copy of the kernel stack at the probe fire time */
@@ -68,16 +70,50 @@ int arch_prepare_kprobe(struct kprobe *p)
68void arch_copy_kprobe(struct kprobe *p) 70void arch_copy_kprobe(struct kprobe *p)
69{ 71{
70 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 72 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
73 p->opcode = *p->addr;
71} 74}
72 75
73void arch_remove_kprobe(struct kprobe *p) 76void arch_arm_kprobe(struct kprobe *p)
74{ 77{
78 *p->addr = BREAKPOINT_INSTRUCTION;
79 flush_icache_range((unsigned long) p->addr,
80 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
75} 81}
76 82
77static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) 83void arch_disarm_kprobe(struct kprobe *p)
78{ 84{
79 *p->addr = p->opcode; 85 *p->addr = p->opcode;
80 regs->eip = (unsigned long)p->addr; 86 flush_icache_range((unsigned long) p->addr,
87 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
88}
89
90void arch_remove_kprobe(struct kprobe *p)
91{
92}
93
94static inline void save_previous_kprobe(void)
95{
96 kprobe_prev = current_kprobe;
97 kprobe_status_prev = kprobe_status;
98 kprobe_old_eflags_prev = kprobe_old_eflags;
99 kprobe_saved_eflags_prev = kprobe_saved_eflags;
100}
101
102static inline void restore_previous_kprobe(void)
103{
104 current_kprobe = kprobe_prev;
105 kprobe_status = kprobe_status_prev;
106 kprobe_old_eflags = kprobe_old_eflags_prev;
107 kprobe_saved_eflags = kprobe_saved_eflags_prev;
108}
109
110static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
111{
112 current_kprobe = p;
113 kprobe_saved_eflags = kprobe_old_eflags
114 = (regs->eflags & (TF_MASK | IF_MASK));
115 if (is_IF_modifier(p->opcode))
116 kprobe_saved_eflags &= ~IF_MASK;
81} 117}
82 118
83static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 119static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -91,6 +127,25 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
91 regs->eip = (unsigned long)&p->ainsn.insn; 127 regs->eip = (unsigned long)&p->ainsn.insn;
92} 128}
93 129
130void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
131{
132 unsigned long *sara = (unsigned long *)&regs->esp;
133 struct kretprobe_instance *ri;
134
135 if ((ri = get_free_rp_inst(rp)) != NULL) {
136 ri->rp = rp;
137 ri->task = current;
138 ri->ret_addr = (kprobe_opcode_t *) *sara;
139
140 /* Replace the return addr with trampoline addr */
141 *sara = (unsigned long) &kretprobe_trampoline;
142
143 add_rp_inst(ri);
144 } else {
145 rp->nmissed++;
146 }
147}
148
94/* 149/*
95 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 150 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
96 * remain disabled thorough out this function. 151 * remain disabled thorough out this function.
@@ -127,8 +182,18 @@ static int kprobe_handler(struct pt_regs *regs)
127 unlock_kprobes(); 182 unlock_kprobes();
128 goto no_kprobe; 183 goto no_kprobe;
129 } 184 }
130 disarm_kprobe(p, regs); 185 /* We have reentered the kprobe_handler(), since
131 ret = 1; 186 * another probe was hit while within the handler.
187 * We here save the original kprobes variables and
188 * just single step on the instruction of the new probe
189 * without calling any user handlers.
190 */
191 save_previous_kprobe();
192 set_current_kprobe(p, regs);
193 p->nmissed++;
194 prepare_singlestep(p, regs);
195 kprobe_status = KPROBE_REENTER;
196 return 1;
132 } else { 197 } else {
133 p = current_kprobe; 198 p = current_kprobe;
134 if (p->break_handler && p->break_handler(p, regs)) { 199 if (p->break_handler && p->break_handler(p, regs)) {
@@ -163,11 +228,7 @@ static int kprobe_handler(struct pt_regs *regs)
163 } 228 }
164 229
165 kprobe_status = KPROBE_HIT_ACTIVE; 230 kprobe_status = KPROBE_HIT_ACTIVE;
166 current_kprobe = p; 231 set_current_kprobe(p, regs);
167 kprobe_saved_eflags = kprobe_old_eflags
168 = (regs->eflags & (TF_MASK | IF_MASK));
169 if (is_IF_modifier(p->opcode))
170 kprobe_saved_eflags &= ~IF_MASK;
171 232
172 if (p->pre_handler && p->pre_handler(p, regs)) 233 if (p->pre_handler && p->pre_handler(p, regs))
173 /* handler has already set things up, so skip ss setup */ 234 /* handler has already set things up, so skip ss setup */
@@ -184,6 +245,78 @@ no_kprobe:
184} 245}
185 246
186/* 247/*
248 * For function-return probes, init_kprobes() establishes a probepoint
249 * here. When a retprobed function returns, this probe is hit and
250 * trampoline_probe_handler() runs, calling the kretprobe's handler.
251 */
252 void kretprobe_trampoline_holder(void)
253 {
254 asm volatile ( ".global kretprobe_trampoline\n"
255 "kretprobe_trampoline: \n"
256 "nop\n");
257 }
258
259/*
260 * Called when we hit the probe point at kretprobe_trampoline
261 */
262int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
263{
264 struct kretprobe_instance *ri = NULL;
265 struct hlist_head *head;
266 struct hlist_node *node, *tmp;
267 unsigned long orig_ret_address = 0;
268 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
269
270 head = kretprobe_inst_table_head(current);
271
272 /*
273 * It is possible to have multiple instances associated with a given
274 * task either because an multiple functions in the call path
275 * have a return probe installed on them, and/or more then one return
276 * return probe was registered for a target function.
277 *
278 * We can handle this because:
279 * - instances are always inserted at the head of the list
280 * - when multiple return probes are registered for the same
281 * function, the first instance's ret_addr will point to the
282 * real return address, and all the rest will point to
283 * kretprobe_trampoline
284 */
285 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
286 if (ri->task != current)
287 /* another task is sharing our hash bucket */
288 continue;
289
290 if (ri->rp && ri->rp->handler)
291 ri->rp->handler(ri, regs);
292
293 orig_ret_address = (unsigned long)ri->ret_addr;
294 recycle_rp_inst(ri);
295
296 if (orig_ret_address != trampoline_address)
297 /*
298 * This is the real return address. Any other
299 * instances associated with this task are for
300 * other calls deeper on the call stack
301 */
302 break;
303 }
304
305 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
306 regs->eip = orig_ret_address;
307
308 unlock_kprobes();
309 preempt_enable_no_resched();
310
311 /*
312 * By returning a non-zero value, we are telling
313 * kprobe_handler() that we have handled unlocking
314 * and re-enabling preemption.
315 */
316 return 1;
317}
318
319/*
187 * Called after single-stepping. p->addr is the address of the 320 * Called after single-stepping. p->addr is the address of the
188 * instruction whose first byte has been replaced by the "int 3" 321 * instruction whose first byte has been replaced by the "int 3"
189 * instruction. To avoid the SMP problems that can occur when we 322 * instruction. To avoid the SMP problems that can occur when we
@@ -263,13 +396,21 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
263 if (!kprobe_running()) 396 if (!kprobe_running())
264 return 0; 397 return 0;
265 398
266 if (current_kprobe->post_handler) 399 if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
400 kprobe_status = KPROBE_HIT_SSDONE;
267 current_kprobe->post_handler(current_kprobe, regs, 0); 401 current_kprobe->post_handler(current_kprobe, regs, 0);
402 }
268 403
269 resume_execution(current_kprobe, regs); 404 resume_execution(current_kprobe, regs);
270 regs->eflags |= kprobe_saved_eflags; 405 regs->eflags |= kprobe_saved_eflags;
271 406
407 /*Restore back the original saved kprobes variables and continue. */
408 if (kprobe_status == KPROBE_REENTER) {
409 restore_previous_kprobe();
410 goto out;
411 }
272 unlock_kprobes(); 412 unlock_kprobes();
413out:
273 preempt_enable_no_resched(); 414 preempt_enable_no_resched();
274 415
275 /* 416 /*
@@ -390,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
390 } 531 }
391 return 0; 532 return 0;
392} 533}
534
535static struct kprobe trampoline_p = {
536 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
537 .pre_handler = trampoline_probe_handler
538};
539
540int __init arch_init_kprobes(void)
541{
542 return register_kprobe(&trampoline_p);
543}
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
new file mode 100644
index 000000000000..cb699a2aa1f8
--- /dev/null
+++ b/arch/i386/kernel/machine_kexec.c
@@ -0,0 +1,226 @@
1/*
2 * machine_kexec.c - handle transition of Linux booting another kernel
3 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/mm.h>
10#include <linux/kexec.h>
11#include <linux/delay.h>
12#include <asm/pgtable.h>
13#include <asm/pgalloc.h>
14#include <asm/tlbflush.h>
15#include <asm/mmu_context.h>
16#include <asm/io.h>
17#include <asm/apic.h>
18#include <asm/cpufeature.h>
19#include <asm/desc.h>
20
21static inline unsigned long read_cr3(void)
22{
23 unsigned long cr3;
24 asm volatile("movl %%cr3,%0": "=r"(cr3));
25 return cr3;
26}
27
28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29
30#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
31#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
32#define L2_ATTR (_PAGE_PRESENT)
33
34#define LEVEL0_SIZE (1UL << 12UL)
35
36#ifndef CONFIG_X86_PAE
37#define LEVEL1_SIZE (1UL << 22UL)
38static u32 pgtable_level1[1024] PAGE_ALIGNED;
39
40static void identity_map_page(unsigned long address)
41{
42 unsigned long level1_index, level2_index;
43 u32 *pgtable_level2;
44
45 /* Find the current page table */
46 pgtable_level2 = __va(read_cr3());
47
48 /* Find the indexes of the physical address to identity map */
49 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
50 level2_index = address / LEVEL1_SIZE;
51
52 /* Identity map the page table entry */
53 pgtable_level1[level1_index] = address | L0_ATTR;
54 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
55
56 /* Flush the tlb so the new mapping takes effect.
57 * Global tlb entries are not flushed but that is not an issue.
58 */
59 load_cr3(pgtable_level2);
60}
61
62#else
63#define LEVEL1_SIZE (1UL << 21UL)
64#define LEVEL2_SIZE (1UL << 30UL)
65static u64 pgtable_level1[512] PAGE_ALIGNED;
66static u64 pgtable_level2[512] PAGE_ALIGNED;
67
68static void identity_map_page(unsigned long address)
69{
70 unsigned long level1_index, level2_index, level3_index;
71 u64 *pgtable_level3;
72
73 /* Find the current page table */
74 pgtable_level3 = __va(read_cr3());
75
76 /* Find the indexes of the physical address to identity map */
77 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
78 level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
79 level3_index = address / LEVEL2_SIZE;
80
81 /* Identity map the page table entry */
82 pgtable_level1[level1_index] = address | L0_ATTR;
83 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
84 set_64bit(&pgtable_level3[level3_index],
85 __pa(pgtable_level2) | L2_ATTR);
86
87 /* Flush the tlb so the new mapping takes effect.
88 * Global tlb entries are not flushed but that is not an issue.
89 */
90 load_cr3(pgtable_level3);
91}
92#endif
93
94static void set_idt(void *newidt, __u16 limit)
95{
96 struct Xgt_desc_struct curidt;
97
98 /* ia32 supports unaliged loads & stores */
99 curidt.size = limit;
100 curidt.address = (unsigned long)newidt;
101
102 __asm__ __volatile__ (
103 "lidtl %0\n"
104 : : "m" (curidt)
105 );
106};
107
108
109static void set_gdt(void *newgdt, __u16 limit)
110{
111 struct Xgt_desc_struct curgdt;
112
113 /* ia32 supports unaligned loads & stores */
114 curgdt.size = limit;
115 curgdt.address = (unsigned long)newgdt;
116
117 __asm__ __volatile__ (
118 "lgdtl %0\n"
119 : : "m" (curgdt)
120 );
121};
122
123static void load_segments(void)
124{
125#define __STR(X) #X
126#define STR(X) __STR(X)
127
128 __asm__ __volatile__ (
129 "\tljmp $"STR(__KERNEL_CS)",$1f\n"
130 "\t1:\n"
131 "\tmovl $"STR(__KERNEL_DS)",%eax\n"
132 "\tmovl %eax,%ds\n"
133 "\tmovl %eax,%es\n"
134 "\tmovl %eax,%fs\n"
135 "\tmovl %eax,%gs\n"
136 "\tmovl %eax,%ss\n"
137 );
138#undef STR
139#undef __STR
140}
141
142typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
143 unsigned long indirection_page,
144 unsigned long reboot_code_buffer,
145 unsigned long start_address,
146 unsigned int has_pae) ATTRIB_NORET;
147
148const extern unsigned char relocate_new_kernel[];
149extern void relocate_new_kernel_end(void);
150const extern unsigned int relocate_new_kernel_size;
151
152/*
153 * A architecture hook called to validate the
154 * proposed image and prepare the control pages
155 * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
156 * have been allocated, but the segments have yet
157 * been copied into the kernel.
158 *
159 * Do what every setup is needed on image and the
160 * reboot code buffer to allow us to avoid allocations
161 * later.
162 *
163 * Currently nothing.
164 */
165int machine_kexec_prepare(struct kimage *image)
166{
167 return 0;
168}
169
170/*
171 * Undo anything leftover by machine_kexec_prepare
172 * when an image is freed.
173 */
174void machine_kexec_cleanup(struct kimage *image)
175{
176}
177
178/*
179 * Do not allocate memory (or fail in any way) in machine_kexec().
180 * We are past the point of no return, committed to rebooting now.
181 */
182NORET_TYPE void machine_kexec(struct kimage *image)
183{
184 unsigned long page_list;
185 unsigned long reboot_code_buffer;
186
187 relocate_new_kernel_t rnk;
188
189 /* Interrupts aren't acceptable while we reboot */
190 local_irq_disable();
191
192 /* Compute some offsets */
193 reboot_code_buffer = page_to_pfn(image->control_code_page)
194 << PAGE_SHIFT;
195 page_list = image->head;
196
197 /* Set up an identity mapping for the reboot_code_buffer */
198 identity_map_page(reboot_code_buffer);
199
200 /* copy it out */
201 memcpy((void *)reboot_code_buffer, relocate_new_kernel,
202 relocate_new_kernel_size);
203
204 /* The segment registers are funny things, they are
205 * automatically loaded from a table, in memory wherever you
206 * set them to a specific selector, but this table is never
207 * accessed again you set the segment to a different selector.
208 *
209 * The more common model is are caches where the behide
210 * the scenes work is done, but is also dropped at arbitrary
211 * times.
212 *
213 * I take advantage of this here by force loading the
214 * segments, before I zap the gdt with an invalid value.
215 */
216 load_segments();
217 /* The gdt & idt are now invalid.
218 * If you want to load them you must set up your own idt & gdt.
219 */
220 set_gdt(phys_to_virt(0),0);
221 set_idt(phys_to_virt(0),0);
222
223 /* now call it */
224 rnk = (relocate_new_kernel_t) reboot_code_buffer;
225 (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
226}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 1347ab4939e7..ce838abb27d8 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -67,7 +67,6 @@ unsigned long mp_lapic_addr;
67 67
68/* Processor that is doing the boot up */ 68/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U; 69unsigned int boot_cpu_physical_apicid = -1U;
70unsigned int boot_cpu_logical_apicid = -1U;
71/* Internal processor count */ 70/* Internal processor count */
72static unsigned int __initdata num_processors; 71static unsigned int __initdata num_processors;
73 72
@@ -180,7 +179,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
180 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 179 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
181 Dprintk(" Bootup CPU\n"); 180 Dprintk(" Bootup CPU\n");
182 boot_cpu_physical_apicid = m->mpc_apicid; 181 boot_cpu_physical_apicid = m->mpc_apicid;
183 boot_cpu_logical_apicid = apicid;
184 } 182 }
185 183
186 if (num_processors >= NR_CPUS) { 184 if (num_processors >= NR_CPUS) {
@@ -914,7 +912,10 @@ void __init mp_register_ioapic (
914 mp_ioapics[idx].mpc_apicaddr = address; 912 mp_ioapics[idx].mpc_apicaddr = address;
915 913
916 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 914 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
917 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); 915 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
916 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
917 else
918 mp_ioapics[idx].mpc_apicid = id;
918 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 919 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
919 920
920 /* 921 /*
@@ -1055,11 +1056,20 @@ void __init mp_config_acpi_legacy_irqs (void)
1055 } 1056 }
1056} 1057}
1057 1058
1059#define MAX_GSI_NUM 4096
1060
1058int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) 1061int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
1059{ 1062{
1060 int ioapic = -1; 1063 int ioapic = -1;
1061 int ioapic_pin = 0; 1064 int ioapic_pin = 0;
1062 int idx, bit = 0; 1065 int idx, bit = 0;
1066 static int pci_irq = 16;
1067 /*
1068 * Mapping between Global System Interrups, which
1069 * represent all possible interrupts, and IRQs
1070 * assigned to actual devices.
1071 */
1072 static int gsi_to_irq[MAX_GSI_NUM];
1063 1073
1064#ifdef CONFIG_ACPI_BUS 1074#ifdef CONFIG_ACPI_BUS
1065 /* Don't set up the ACPI SCI because it's already set up */ 1075 /* Don't set up the ACPI SCI because it's already set up */
@@ -1094,11 +1104,34 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
1094 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { 1104 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
1095 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 1105 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
1096 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 1106 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1097 return gsi; 1107 return gsi_to_irq[gsi];
1098 } 1108 }
1099 1109
1100 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); 1110 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
1101 1111
1112 if (edge_level) {
1113 /*
1114 * For PCI devices assign IRQs in order, avoiding gaps
1115 * due to unused I/O APIC pins.
1116 */
1117 int irq = gsi;
1118 if (gsi < MAX_GSI_NUM) {
1119 if (gsi > 15)
1120 gsi = pci_irq++;
1121#ifdef CONFIG_ACPI_BUS
1122 /*
1123 * Don't assign IRQ used by ACPI SCI
1124 */
1125 if (gsi == acpi_fadt.sci_int)
1126 gsi = pci_irq++;
1127#endif
1128 gsi_to_irq[irq] = gsi;
1129 } else {
1130 printk(KERN_ERR "GSI %u is too high\n", gsi);
1131 return gsi;
1132 }
1133 }
1134
1102 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, 1135 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1103 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, 1136 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
1104 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); 1137 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 2c0ee9c2d020..da6c46d667cb 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -28,8 +28,7 @@
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29 29
30#include <asm/smp.h> 30#include <asm/smp.h>
31#include <asm/mtrr.h> 31#include <asm/div64.h>
32#include <asm/mpspec.h>
33#include <asm/nmi.h> 32#include <asm/nmi.h>
34 33
35#include "mach_traps.h" 34#include "mach_traps.h"
@@ -324,6 +323,16 @@ static void clear_msr_range(unsigned int base, unsigned int n)
324 wrmsr(base+i, 0, 0); 323 wrmsr(base+i, 0, 0);
325} 324}
326 325
326static inline void write_watchdog_counter(const char *descr)
327{
328 u64 count = (u64)cpu_khz * 1000;
329
330 do_div(count, nmi_hz);
331 if(descr)
332 Dprintk("setting %s to -0x%08Lx\n", descr, count);
333 wrmsrl(nmi_perfctr_msr, 0 - count);
334}
335
327static void setup_k7_watchdog(void) 336static void setup_k7_watchdog(void)
328{ 337{
329 unsigned int evntsel; 338 unsigned int evntsel;
@@ -339,8 +348,7 @@ static void setup_k7_watchdog(void)
339 | K7_NMI_EVENT; 348 | K7_NMI_EVENT;
340 349
341 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 350 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
342 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); 351 write_watchdog_counter("K7_PERFCTR0");
343 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
344 apic_write(APIC_LVTPC, APIC_DM_NMI); 352 apic_write(APIC_LVTPC, APIC_DM_NMI);
345 evntsel |= K7_EVNTSEL_ENABLE; 353 evntsel |= K7_EVNTSEL_ENABLE;
346 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 354 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
@@ -361,8 +369,7 @@ static void setup_p6_watchdog(void)
361 | P6_NMI_EVENT; 369 | P6_NMI_EVENT;
362 370
363 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 371 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
364 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); 372 write_watchdog_counter("P6_PERFCTR0");
365 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
366 apic_write(APIC_LVTPC, APIC_DM_NMI); 373 apic_write(APIC_LVTPC, APIC_DM_NMI);
367 evntsel |= P6_EVNTSEL0_ENABLE; 374 evntsel |= P6_EVNTSEL0_ENABLE;
368 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 375 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
@@ -402,8 +409,7 @@ static int setup_p4_watchdog(void)
402 409
403 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 410 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
404 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 411 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
405 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); 412 write_watchdog_counter("P4_IQ_COUNTER0");
406 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
407 apic_write(APIC_LVTPC, APIC_DM_NMI); 413 apic_write(APIC_LVTPC, APIC_DM_NMI);
408 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 414 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
409 return 1; 415 return 1;
@@ -518,7 +524,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
518 * other P6 variant */ 524 * other P6 variant */
519 apic_write(APIC_LVTPC, APIC_DM_NMI); 525 apic_write(APIC_LVTPC, APIC_DM_NMI);
520 } 526 }
521 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); 527 write_watchdog_counter(NULL);
522 } 528 }
523} 529}
524 530
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index e51edf0a6564..5f5b075f860a 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -31,6 +31,7 @@
31#include <linux/nodemask.h> 31#include <linux/nodemask.h>
32#include <asm/numaq.h> 32#include <asm/numaq.h>
33#include <asm/topology.h> 33#include <asm/topology.h>
34#include <asm/processor.h>
34 35
35#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 36#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
36 37
@@ -77,3 +78,11 @@ int __init get_memcfg_numaq(void)
77 smp_dump_qct(); 78 smp_dump_qct();
78 return 1; 79 return 1;
79} 80}
81
82static int __init numaq_dsc_disable(void)
83{
84 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
85 tsc_disable = 1;
86 return 0;
87}
88core_initcall(numaq_dsc_disable);
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
index 4de2e03c7b45..1e51427cc9eb 100644
--- a/arch/i386/kernel/pci-dma.c
+++ b/arch/i386/kernel/pci-dma.c
@@ -11,6 +11,7 @@
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/module.h>
14#include <asm/io.h> 15#include <asm/io.h>
15 16
16struct dma_coherent_mem { 17struct dma_coherent_mem {
@@ -54,6 +55,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
54 } 55 }
55 return ret; 56 return ret;
56} 57}
58EXPORT_SYMBOL(dma_alloc_coherent);
57 59
58void dma_free_coherent(struct device *dev, size_t size, 60void dma_free_coherent(struct device *dev, size_t size,
59 void *vaddr, dma_addr_t dma_handle) 61 void *vaddr, dma_addr_t dma_handle)
@@ -68,6 +70,7 @@ void dma_free_coherent(struct device *dev, size_t size,
68 } else 70 } else
69 free_pages((unsigned long)vaddr, order); 71 free_pages((unsigned long)vaddr, order);
70} 72}
73EXPORT_SYMBOL(dma_free_coherent);
71 74
72int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, 75int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
73 dma_addr_t device_addr, size_t size, int flags) 76 dma_addr_t device_addr, size_t size, int flags)
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 96e3ea6b17c7..e3f362e8af5b 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -13,6 +13,7 @@
13 13
14#include <stdarg.h> 14#include <stdarg.h>
15 15
16#include <linux/cpu.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/fs.h> 19#include <linux/fs.h>
@@ -37,6 +38,7 @@
37#include <linux/kallsyms.h> 38#include <linux/kallsyms.h>
38#include <linux/ptrace.h> 39#include <linux/ptrace.h>
39#include <linux/random.h> 40#include <linux/random.h>
41#include <linux/kprobes.h>
40 42
41#include <asm/uaccess.h> 43#include <asm/uaccess.h>
42#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -54,6 +56,9 @@
54#include <linux/irq.h> 56#include <linux/irq.h>
55#include <linux/err.h> 57#include <linux/err.h>
56 58
59#include <asm/tlbflush.h>
60#include <asm/cpu.h>
61
57asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
58 63
59static int hlt_counter; 64static int hlt_counter;
@@ -73,6 +78,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
73 * Powermanagement idle function, if any.. 78 * Powermanagement idle function, if any..
74 */ 79 */
75void (*pm_idle)(void); 80void (*pm_idle)(void);
81EXPORT_SYMBOL(pm_idle);
76static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 82static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
77 83
78void disable_hlt(void) 84void disable_hlt(void)
@@ -105,6 +111,9 @@ void default_idle(void)
105 cpu_relax(); 111 cpu_relax();
106 } 112 }
107} 113}
114#ifdef CONFIG_APM_MODULE
115EXPORT_SYMBOL(default_idle);
116#endif
108 117
109/* 118/*
110 * On SMP it's slightly faster (but much more power-consuming!) 119 * On SMP it's slightly faster (but much more power-consuming!)
@@ -138,14 +147,42 @@ static void poll_idle (void)
138 } 147 }
139} 148}
140 149
150#ifdef CONFIG_HOTPLUG_CPU
151#include <asm/nmi.h>
152/* We don't actually take CPU down, just spin without interrupts. */
153static inline void play_dead(void)
154{
155 /* This must be done before dead CPU ack */
156 cpu_exit_clear();
157 wbinvd();
158 mb();
159 /* Ack it */
160 __get_cpu_var(cpu_state) = CPU_DEAD;
161
162 /*
163 * With physical CPU hotplug, we should halt the cpu
164 */
165 local_irq_disable();
166 while (1)
167 __asm__ __volatile__("hlt":::"memory");
168}
169#else
170static inline void play_dead(void)
171{
172 BUG();
173}
174#endif /* CONFIG_HOTPLUG_CPU */
175
141/* 176/*
142 * The idle thread. There's no useful work to be 177 * The idle thread. There's no useful work to be
143 * done, so just try to conserve power and have a 178 * done, so just try to conserve power and have a
144 * low exit latency (ie sit in a loop waiting for 179 * low exit latency (ie sit in a loop waiting for
145 * somebody to say that they'd like to reschedule) 180 * somebody to say that they'd like to reschedule)
146 */ 181 */
147void cpu_idle (void) 182void cpu_idle(void)
148{ 183{
184 int cpu = raw_smp_processor_id();
185
149 /* endless idle loop with no priority at all */ 186 /* endless idle loop with no priority at all */
150 while (1) { 187 while (1) {
151 while (!need_resched()) { 188 while (!need_resched()) {
@@ -160,6 +197,9 @@ void cpu_idle (void)
160 if (!idle) 197 if (!idle)
161 idle = default_idle; 198 idle = default_idle;
162 199
200 if (cpu_is_offline(cpu))
201 play_dead();
202
163 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 203 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
164 idle(); 204 idle();
165 } 205 }
@@ -218,7 +258,7 @@ static void mwait_idle(void)
218 } 258 }
219} 259}
220 260
221void __init select_idle_routine(const struct cpuinfo_x86 *c) 261void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
222{ 262{
223 if (cpu_has(c, X86_FEATURE_MWAIT)) { 263 if (cpu_has(c, X86_FEATURE_MWAIT)) {
224 printk("monitor/mwait feature present.\n"); 264 printk("monitor/mwait feature present.\n");
@@ -262,7 +302,7 @@ void show_regs(struct pt_regs * regs)
262 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); 302 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
263 print_symbol("EIP is at %s\n", regs->eip); 303 print_symbol("EIP is at %s\n", regs->eip);
264 304
265 if (regs->xcs & 3) 305 if (user_mode(regs))
266 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); 306 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
267 printk(" EFLAGS: %08lx %s (%s)\n", 307 printk(" EFLAGS: %08lx %s (%s)\n",
268 regs->eflags, print_tainted(), system_utsname.release); 308 regs->eflags, print_tainted(), system_utsname.release);
@@ -325,6 +365,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
325 /* Ok, create the new process.. */ 365 /* Ok, create the new process.. */
326 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL); 366 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
327} 367}
368EXPORT_SYMBOL(kernel_thread);
328 369
329/* 370/*
330 * Free current thread data structures etc.. 371 * Free current thread data structures etc..
@@ -334,6 +375,13 @@ void exit_thread(void)
334 struct task_struct *tsk = current; 375 struct task_struct *tsk = current;
335 struct thread_struct *t = &tsk->thread; 376 struct thread_struct *t = &tsk->thread;
336 377
378 /*
379 * Remove function-return probe instances associated with this task
380 * and put them back on the free list. Do not insert an exit probe for
381 * this function, it will be disabled by kprobe_flush_task if you do.
382 */
383 kprobe_flush_task(tsk);
384
337 /* The process may have allocated an io port bitmap... nuke it. */ 385 /* The process may have allocated an io port bitmap... nuke it. */
338 if (unlikely(NULL != t->io_bitmap_ptr)) { 386 if (unlikely(NULL != t->io_bitmap_ptr)) {
339 int cpu = get_cpu(); 387 int cpu = get_cpu();
@@ -357,6 +405,13 @@ void flush_thread(void)
357{ 405{
358 struct task_struct *tsk = current; 406 struct task_struct *tsk = current;
359 407
408 /*
409 * Remove function-return probe instances associated with this task
410 * and put them back on the free list. Do not insert an exit probe for
411 * this function, it will be disabled by kprobe_flush_task if you do.
412 */
413 kprobe_flush_task(tsk);
414
360 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); 415 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
361 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 416 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
362 /* 417 /*
@@ -508,6 +563,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
508 563
509 dump->u_fpvalid = dump_fpu (regs, &dump->i387); 564 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
510} 565}
566EXPORT_SYMBOL(dump_thread);
511 567
512/* 568/*
513 * Capture the user space registers if the task is not running (in user space) 569 * Capture the user space registers if the task is not running (in user space)
@@ -561,6 +617,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
561} 617}
562 618
563/* 619/*
620 * This function selects if the context switch from prev to next
621 * has to tweak the TSC disable bit in the cr4.
622 */
623static inline void disable_tsc(struct task_struct *prev_p,
624 struct task_struct *next_p)
625{
626 struct thread_info *prev, *next;
627
628 /*
629 * gcc should eliminate the ->thread_info dereference if
630 * has_secure_computing returns 0 at compile time (SECCOMP=n).
631 */
632 prev = prev_p->thread_info;
633 next = next_p->thread_info;
634
635 if (has_secure_computing(prev) || has_secure_computing(next)) {
636 /* slow path here */
637 if (has_secure_computing(prev) &&
638 !has_secure_computing(next)) {
639 write_cr4(read_cr4() & ~X86_CR4_TSD);
640 } else if (!has_secure_computing(prev) &&
641 has_secure_computing(next))
642 write_cr4(read_cr4() | X86_CR4_TSD);
643 }
644}
645
646/*
564 * switch_to(x,yn) should switch tasks from x to y. 647 * switch_to(x,yn) should switch tasks from x to y.
565 * 648 *
566 * We fsave/fwait so that an exception goes off at the right time 649 * We fsave/fwait so that an exception goes off at the right time
@@ -617,28 +700,34 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
617 700
618 /* 701 /*
619 * Restore %fs and %gs if needed. 702 * Restore %fs and %gs if needed.
703 *
704 * Glibc normally makes %fs be zero, and %gs is one of
705 * the TLS segments.
620 */ 706 */
621 if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) { 707 if (unlikely(prev->fs | next->fs))
622 loadsegment(fs, next->fs); 708 loadsegment(fs, next->fs);
709
710 if (prev->gs | next->gs)
623 loadsegment(gs, next->gs); 711 loadsegment(gs, next->gs);
624 }
625 712
626 /* 713 /*
627 * Now maybe reload the debug registers 714 * Now maybe reload the debug registers
628 */ 715 */
629 if (unlikely(next->debugreg[7])) { 716 if (unlikely(next->debugreg[7])) {
630 loaddebug(next, 0); 717 set_debugreg(next->debugreg[0], 0);
631 loaddebug(next, 1); 718 set_debugreg(next->debugreg[1], 1);
632 loaddebug(next, 2); 719 set_debugreg(next->debugreg[2], 2);
633 loaddebug(next, 3); 720 set_debugreg(next->debugreg[3], 3);
634 /* no 4 and 5 */ 721 /* no 4 and 5 */
635 loaddebug(next, 6); 722 set_debugreg(next->debugreg[6], 6);
636 loaddebug(next, 7); 723 set_debugreg(next->debugreg[7], 7);
637 } 724 }
638 725
639 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) 726 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
640 handle_io_bitmap(next, tss); 727 handle_io_bitmap(next, tss);
641 728
729 disable_tsc(prev_p, next_p);
730
642 return prev_p; 731 return prev_p;
643} 732}
644 733
@@ -731,6 +820,7 @@ unsigned long get_wchan(struct task_struct *p)
731 } while (count++ < 16); 820 } while (count++ < 16);
732 return 0; 821 return 0;
733} 822}
823EXPORT_SYMBOL(get_wchan);
734 824
735/* 825/*
736 * sys_alloc_thread_area: get a yet unused TLS descriptor index. 826 * sys_alloc_thread_area: get a yet unused TLS descriptor index.
@@ -827,6 +917,8 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
827 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) 917 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
828 return -EINVAL; 918 return -EINVAL;
829 919
920 memset(&info, 0, sizeof(info));
921
830 desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; 922 desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
831 923
832 info.entry_number = idx; 924 info.entry_number = idx;
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index e34f651fa13c..0da59b42843c 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -668,7 +668,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
668 info.si_code = TRAP_BRKPT; 668 info.si_code = TRAP_BRKPT;
669 669
670 /* User-mode eip? */ 670 /* User-mode eip? */
671 info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; 671 info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
672 672
673 /* Send us the fakey SIGTRAP */ 673 /* Send us the fakey SIGTRAP */
674 force_sig_info(SIGTRAP, &info, tsk); 674 force_sig_info(SIGTRAP, &info, tsk);
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 6dc27eb70ee7..c71fef31dc47 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -2,6 +2,7 @@
2 * linux/arch/i386/kernel/reboot.c 2 * linux/arch/i386/kernel/reboot.c
3 */ 3 */
4 4
5#include <linux/config.h>
5#include <linux/mm.h> 6#include <linux/mm.h>
6#include <linux/module.h> 7#include <linux/module.h>
7#include <linux/delay.h> 8#include <linux/delay.h>
@@ -19,12 +20,12 @@
19 * Power off function, if any 20 * Power off function, if any
20 */ 21 */
21void (*pm_power_off)(void); 22void (*pm_power_off)(void);
23EXPORT_SYMBOL(pm_power_off);
22 24
23static int reboot_mode; 25static int reboot_mode;
24static int reboot_thru_bios; 26static int reboot_thru_bios;
25 27
26#ifdef CONFIG_SMP 28#ifdef CONFIG_SMP
27int reboot_smp = 0;
28static int reboot_cpu = -1; 29static int reboot_cpu = -1;
29/* shamelessly grabbed from lib/vsprintf.c for readability */ 30/* shamelessly grabbed from lib/vsprintf.c for readability */
30#define is_digit(c) ((c) >= '0' && (c) <= '9') 31#define is_digit(c) ((c) >= '0' && (c) <= '9')
@@ -47,7 +48,6 @@ static int __init reboot_setup(char *str)
47 break; 48 break;
48#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
49 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ 50 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
50 reboot_smp = 1;
51 if (is_digit(*(str+1))) { 51 if (is_digit(*(str+1))) {
52 reboot_cpu = (int) (*(str+1) - '0'); 52 reboot_cpu = (int) (*(str+1) - '0');
53 if (is_digit(*(str+2))) 53 if (is_digit(*(str+2)))
@@ -86,33 +86,9 @@ static int __init set_bios_reboot(struct dmi_system_id *d)
86 return 0; 86 return 0;
87} 87}
88 88
89/*
90 * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
91 */
92static int __init set_smp_reboot(struct dmi_system_id *d)
93{
94#ifdef CONFIG_SMP
95 if (!reboot_smp) {
96 reboot_smp = 1;
97 printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
98 }
99#endif
100 return 0;
101}
102
103/*
104 * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
105 */
106static int __init set_smp_bios_reboot(struct dmi_system_id *d)
107{
108 set_smp_reboot(d);
109 set_bios_reboot(d);
110 return 0;
111}
112
113static struct dmi_system_id __initdata reboot_dmi_table[] = { 89static struct dmi_system_id __initdata reboot_dmi_table[] = {
114 { /* Handle problems with rebooting on Dell 1300's */ 90 { /* Handle problems with rebooting on Dell 1300's */
115 .callback = set_smp_bios_reboot, 91 .callback = set_bios_reboot,
116 .ident = "Dell PowerEdge 1300", 92 .ident = "Dell PowerEdge 1300",
117 .matches = { 93 .matches = {
118 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), 94 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -295,42 +271,36 @@ void machine_real_restart(unsigned char *code, int length)
295 : 271 :
296 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); 272 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
297} 273}
274#ifdef CONFIG_APM_MODULE
275EXPORT_SYMBOL(machine_real_restart);
276#endif
298 277
299void machine_restart(char * __unused) 278void machine_shutdown(void)
300{ 279{
301#ifdef CONFIG_SMP 280#ifdef CONFIG_SMP
302 int cpuid; 281 int reboot_cpu_id;
303 282
304 cpuid = GET_APIC_ID(apic_read(APIC_ID)); 283 /* The boot cpu is always logical cpu 0 */
305 284 reboot_cpu_id = 0;
306 if (reboot_smp) { 285
307 286 /* See if there has been given a command line override */
308 /* check to see if reboot_cpu is valid 287 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
309 if its not, default to the BSP */ 288 cpu_isset(reboot_cpu, cpu_online_map)) {
310 if ((reboot_cpu == -1) || 289 reboot_cpu_id = reboot_cpu;
311 (reboot_cpu > (NR_CPUS -1)) ||
312 !physid_isset(cpuid, phys_cpu_present_map))
313 reboot_cpu = boot_cpu_physical_apicid;
314
315 reboot_smp = 0; /* use this as a flag to only go through this once*/
316 /* re-run this function on the other CPUs
317 it will fall though this section since we have
318 cleared reboot_smp, and do the reboot if it is the
319 correct CPU, otherwise it halts. */
320 if (reboot_cpu != cpuid)
321 smp_call_function((void *)machine_restart , NULL, 1, 0);
322 } 290 }
323 291
324 /* if reboot_cpu is still -1, then we want a tradional reboot, 292 /* Make certain the cpu I'm rebooting on is online */
325 and if we are not running on the reboot_cpu,, halt */ 293 if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
326 if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) { 294 reboot_cpu_id = smp_processor_id();
327 for (;;)
328 __asm__ __volatile__ ("hlt");
329 } 295 }
330 /* 296
331 * Stop all CPUs and turn off local APICs and the IO-APIC, so 297 /* Make certain I only run on the appropriate processor */
332 * other OSs see a clean IRQ state. 298 set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
299
300 /* O.K. Now that I'm on the appropriate processor, stop
301 * all of the others, and disable their local APICs.
333 */ 302 */
303
334 smp_send_stop(); 304 smp_send_stop();
335#endif /* CONFIG_SMP */ 305#endif /* CONFIG_SMP */
336 306
@@ -339,7 +309,10 @@ void machine_restart(char * __unused)
339#ifdef CONFIG_X86_IO_APIC 309#ifdef CONFIG_X86_IO_APIC
340 disable_IO_APIC(); 310 disable_IO_APIC();
341#endif 311#endif
312}
342 313
314void machine_emergency_restart(void)
315{
343 if (!reboot_thru_bios) { 316 if (!reboot_thru_bios) {
344 if (efi_enabled) { 317 if (efi_enabled) {
345 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); 318 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
@@ -362,23 +335,22 @@ void machine_restart(char * __unused)
362 machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); 335 machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
363} 336}
364 337
365EXPORT_SYMBOL(machine_restart); 338void machine_restart(char * __unused)
339{
340 machine_shutdown();
341 machine_emergency_restart();
342}
366 343
367void machine_halt(void) 344void machine_halt(void)
368{ 345{
369} 346}
370 347
371EXPORT_SYMBOL(machine_halt);
372
373void machine_power_off(void) 348void machine_power_off(void)
374{ 349{
375 lapic_shutdown(); 350 machine_shutdown();
376 351
377 if (efi_enabled)
378 efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
379 if (pm_power_off) 352 if (pm_power_off)
380 pm_power_off(); 353 pm_power_off();
381} 354}
382 355
383EXPORT_SYMBOL(machine_power_off);
384 356
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..d312616effa1
--- /dev/null
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -0,0 +1,120 @@
1/*
2 * relocate_kernel.S - put the kernel image in place to boot
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/linkage.h>
10
11 /*
12 * Must be relocatable PIC code callable as a C function, that once
13 * it starts can not use the previous processes stack.
14 */
15 .globl relocate_new_kernel
16relocate_new_kernel:
17 /* read the arguments and say goodbye to the stack */
18 movl 4(%esp), %ebx /* page_list */
19 movl 8(%esp), %ebp /* reboot_code_buffer */
20 movl 12(%esp), %edx /* start address */
21 movl 16(%esp), %ecx /* cpu_has_pae */
22
23 /* zero out flags, and disable interrupts */
24 pushl $0
25 popfl
26
27 /* set a new stack at the bottom of our page... */
28 lea 4096(%ebp), %esp
29
30 /* store the parameters back on the stack */
31 pushl %edx /* store the start address */
32
33 /* Set cr0 to a known state:
34 * 31 0 == Paging disabled
35 * 18 0 == Alignment check disabled
36 * 16 0 == Write protect disabled
37 * 3 0 == No task switch
38 * 2 0 == Don't do FP software emulation.
39 * 0 1 == Proctected mode enabled
40 */
41 movl %cr0, %eax
42 andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
43 orl $(1<<0), %eax
44 movl %eax, %cr0
45
46 /* clear cr4 if applicable */
47 testl %ecx, %ecx
48 jz 1f
49 /* Set cr4 to a known state:
50 * Setting everything to zero seems safe.
51 */
52 movl %cr4, %eax
53 andl $0, %eax
54 movl %eax, %cr4
55
56 jmp 1f
571:
58
59 /* Flush the TLB (needed?) */
60 xorl %eax, %eax
61 movl %eax, %cr3
62
63 /* Do the copies */
64 movl %ebx, %ecx
65 jmp 1f
66
670: /* top, read another word from the indirection page */
68 movl (%ebx), %ecx
69 addl $4, %ebx
701:
71 testl $0x1, %ecx /* is it a destination page */
72 jz 2f
73 movl %ecx, %edi
74 andl $0xfffff000, %edi
75 jmp 0b
762:
77 testl $0x2, %ecx /* is it an indirection page */
78 jz 2f
79 movl %ecx, %ebx
80 andl $0xfffff000, %ebx
81 jmp 0b
822:
83 testl $0x4, %ecx /* is it the done indicator */
84 jz 2f
85 jmp 3f
862:
87 testl $0x8, %ecx /* is it the source indicator */
88 jz 0b /* Ignore it otherwise */
89 movl %ecx, %esi /* For every source page do a copy */
90 andl $0xfffff000, %esi
91
92 movl $1024, %ecx
93 rep ; movsl
94 jmp 0b
95
963:
97
98 /* To be certain of avoiding problems with self-modifying code
99 * I need to execute a serializing instruction here.
100 * So I flush the TLB, it's handy, and not processor dependent.
101 */
102 xorl %eax, %eax
103 movl %eax, %cr3
104
105 /* set all of the registers to known values */
106 /* leave %esp alone */
107
108 xorl %eax, %eax
109 xorl %ebx, %ebx
110 xorl %ecx, %ecx
111 xorl %edx, %edx
112 xorl %esi, %esi
113 xorl %edi, %edi
114 xorl %ebp, %ebp
115 ret
116relocate_new_kernel_end:
117
118 .globl relocate_new_kernel_size
119relocate_new_kernel_size:
120 .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 2bfbddebdbf8..af4de58cab54 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -23,8 +23,10 @@
23 * This file handles the architecture-dependent parts of initialization 23 * This file handles the architecture-dependent parts of initialization
24 */ 24 */
25 25
26#include <linux/config.h>
26#include <linux/sched.h> 27#include <linux/sched.h>
27#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/mmzone.h>
28#include <linux/tty.h> 30#include <linux/tty.h>
29#include <linux/ioport.h> 31#include <linux/ioport.h>
30#include <linux/acpi.h> 32#include <linux/acpi.h>
@@ -41,7 +43,12 @@
41#include <linux/init.h> 43#include <linux/init.h>
42#include <linux/edd.h> 44#include <linux/edd.h>
43#include <linux/nodemask.h> 45#include <linux/nodemask.h>
46#include <linux/kexec.h>
47#include <linux/crash_dump.h>
48
44#include <video/edid.h> 49#include <video/edid.h>
50
51#include <asm/apic.h>
45#include <asm/e820.h> 52#include <asm/e820.h>
46#include <asm/mpspec.h> 53#include <asm/mpspec.h>
47#include <asm/setup.h> 54#include <asm/setup.h>
@@ -53,12 +60,15 @@
53#include "setup_arch_pre.h" 60#include "setup_arch_pre.h"
54#include <bios_ebda.h> 61#include <bios_ebda.h>
55 62
63/* Forward Declaration. */
64void __init find_max_pfn(void);
65
56/* This value is set up by the early boot code to point to the value 66/* This value is set up by the early boot code to point to the value
57 immediately after the boot time page tables. It contains a *physical* 67 immediately after the boot time page tables. It contains a *physical*
58 address, and must not be in the .bss segment! */ 68 address, and must not be in the .bss segment! */
59unsigned long init_pg_tables_end __initdata = ~0UL; 69unsigned long init_pg_tables_end __initdata = ~0UL;
60 70
61int disable_pse __initdata = 0; 71int disable_pse __devinitdata = 0;
62 72
63/* 73/*
64 * Machine setup.. 74 * Machine setup..
@@ -73,6 +83,7 @@ EXPORT_SYMBOL(efi_enabled);
73struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 83struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74/* common cpu data for all cpus */ 84/* common cpu data for all cpus */
75struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 85struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86EXPORT_SYMBOL(boot_cpu_data);
76 87
77unsigned long mmu_cr4_features; 88unsigned long mmu_cr4_features;
78 89
@@ -90,12 +101,18 @@ extern acpi_interrupt_flags acpi_sci_flags;
90 101
91/* for MCA, but anyone else can use it if they want */ 102/* for MCA, but anyone else can use it if they want */
92unsigned int machine_id; 103unsigned int machine_id;
104#ifdef CONFIG_MCA
105EXPORT_SYMBOL(machine_id);
106#endif
93unsigned int machine_submodel_id; 107unsigned int machine_submodel_id;
94unsigned int BIOS_revision; 108unsigned int BIOS_revision;
95unsigned int mca_pentium_flag; 109unsigned int mca_pentium_flag;
96 110
97/* For PCI or other memory-mapped resources */ 111/* For PCI or other memory-mapped resources */
98unsigned long pci_mem_start = 0x10000000; 112unsigned long pci_mem_start = 0x10000000;
113#ifdef CONFIG_PCI
114EXPORT_SYMBOL(pci_mem_start);
115#endif
99 116
100/* Boot loader ID as an integer, for the benefit of proc_dointvec */ 117/* Boot loader ID as an integer, for the benefit of proc_dointvec */
101int bootloader_type; 118int bootloader_type;
@@ -107,14 +124,26 @@ static unsigned int highmem_pages = -1;
107 * Setup options 124 * Setup options
108 */ 125 */
109struct drive_info_struct { char dummy[32]; } drive_info; 126struct drive_info_struct { char dummy[32]; } drive_info;
127#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
128 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
129EXPORT_SYMBOL(drive_info);
130#endif
110struct screen_info screen_info; 131struct screen_info screen_info;
132#ifdef CONFIG_VT
133EXPORT_SYMBOL(screen_info);
134#endif
111struct apm_info apm_info; 135struct apm_info apm_info;
136EXPORT_SYMBOL(apm_info);
112struct sys_desc_table_struct { 137struct sys_desc_table_struct {
113 unsigned short length; 138 unsigned short length;
114 unsigned char table[0]; 139 unsigned char table[0];
115}; 140};
116struct edid_info edid_info; 141struct edid_info edid_info;
117struct ist_info ist_info; 142struct ist_info ist_info;
143#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
144 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
145EXPORT_SYMBOL(ist_info);
146#endif
118struct e820map e820; 147struct e820map e820;
119 148
120extern void early_cpu_init(void); 149extern void early_cpu_init(void);
@@ -711,6 +740,15 @@ static void __init parse_cmdline_early (char ** cmdline_p)
711 if (to != command_line) 740 if (to != command_line)
712 to--; 741 to--;
713 if (!memcmp(from+7, "exactmap", 8)) { 742 if (!memcmp(from+7, "exactmap", 8)) {
743#ifdef CONFIG_CRASH_DUMP
744 /* If we are doing a crash dump, we
745 * still need to know the real mem
746 * size before original memory map is
747 * reset.
748 */
749 find_max_pfn();
750 saved_max_pfn = max_pfn;
751#endif
714 from += 8+7; 752 from += 8+7;
715 e820.nr_map = 0; 753 e820.nr_map = 0;
716 userdef = 1; 754 userdef = 1;
@@ -814,6 +852,44 @@ static void __init parse_cmdline_early (char ** cmdline_p)
814#endif /* CONFIG_X86_LOCAL_APIC */ 852#endif /* CONFIG_X86_LOCAL_APIC */
815#endif /* CONFIG_ACPI_BOOT */ 853#endif /* CONFIG_ACPI_BOOT */
816 854
855#ifdef CONFIG_X86_LOCAL_APIC
856 /* enable local APIC */
857 else if (!memcmp(from, "lapic", 5))
858 lapic_enable();
859
860 /* disable local APIC */
861 else if (!memcmp(from, "nolapic", 6))
862 lapic_disable();
863#endif /* CONFIG_X86_LOCAL_APIC */
864
865#ifdef CONFIG_KEXEC
866 /* crashkernel=size@addr specifies the location to reserve for
867 * a crash kernel. By reserving this memory we guarantee
868 * that linux never set's it up as a DMA target.
869 * Useful for holding code to do something appropriate
870 * after a kernel panic.
871 */
872 else if (!memcmp(from, "crashkernel=", 12)) {
873 unsigned long size, base;
874 size = memparse(from+12, &from);
875 if (*from == '@') {
876 base = memparse(from+1, &from);
877 /* FIXME: Do I want a sanity check
878 * to validate the memory range?
879 */
880 crashk_res.start = base;
881 crashk_res.end = base + size - 1;
882 }
883 }
884#endif
885#ifdef CONFIG_CRASH_DUMP
886 /* elfcorehdr= specifies the location of elf core header
887 * stored by the crashed kernel.
888 */
889 else if (!memcmp(from, "elfcorehdr=", 11))
890 elfcorehdr_addr = memparse(from+11, &from);
891#endif
892
817 /* 893 /*
818 * highmem=size forces highmem to be exactly 'size' bytes. 894 * highmem=size forces highmem to be exactly 'size' bytes.
819 * This works even on boxes that have no highmem otherwise. 895 * This works even on boxes that have no highmem otherwise.
@@ -1022,7 +1098,7 @@ static void __init reserve_ebda_region(void)
1022 reserve_bootmem(addr, PAGE_SIZE); 1098 reserve_bootmem(addr, PAGE_SIZE);
1023} 1099}
1024 1100
1025#ifndef CONFIG_DISCONTIGMEM 1101#ifndef CONFIG_NEED_MULTIPLE_NODES
1026void __init setup_bootmem_allocator(void); 1102void __init setup_bootmem_allocator(void);
1027static unsigned long __init setup_memory(void) 1103static unsigned long __init setup_memory(void)
1028{ 1104{
@@ -1072,9 +1148,9 @@ void __init zone_sizes_init(void)
1072 free_area_init(zones_size); 1148 free_area_init(zones_size);
1073} 1149}
1074#else 1150#else
1075extern unsigned long setup_memory(void); 1151extern unsigned long __init setup_memory(void);
1076extern void zone_sizes_init(void); 1152extern void zone_sizes_init(void);
1077#endif /* !CONFIG_DISCONTIGMEM */ 1153#endif /* !CONFIG_NEED_MULTIPLE_NODES */
1078 1154
1079void __init setup_bootmem_allocator(void) 1155void __init setup_bootmem_allocator(void)
1080{ 1156{
@@ -1092,8 +1168,8 @@ void __init setup_bootmem_allocator(void)
1092 * the (very unlikely) case of us accidentally initializing the 1168 * the (very unlikely) case of us accidentally initializing the
1093 * bootmem allocator with an invalid RAM area. 1169 * bootmem allocator with an invalid RAM area.
1094 */ 1170 */
1095 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + 1171 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1096 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); 1172 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1097 1173
1098 /* 1174 /*
1099 * reserve physical page 0 - it's a special BIOS page on many boxes, 1175 * reserve physical page 0 - it's a special BIOS page on many boxes,
@@ -1149,6 +1225,11 @@ void __init setup_bootmem_allocator(void)
1149 } 1225 }
1150 } 1226 }
1151#endif 1227#endif
1228#ifdef CONFIG_KEXEC
1229 if (crashk_res.start != crashk_res.end)
1230 reserve_bootmem(crashk_res.start,
1231 crashk_res.end - crashk_res.start + 1);
1232#endif
1152} 1233}
1153 1234
1154/* 1235/*
@@ -1202,6 +1283,9 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
1202 */ 1283 */
1203 request_resource(res, code_resource); 1284 request_resource(res, code_resource);
1204 request_resource(res, data_resource); 1285 request_resource(res, data_resource);
1286#ifdef CONFIG_KEXEC
1287 request_resource(res, &crashk_res);
1288#endif
1205 } 1289 }
1206 } 1290 }
1207} 1291}
@@ -1330,7 +1414,7 @@ static struct nop {
1330 This runs before SMP is initialized to avoid SMP problems with 1414 This runs before SMP is initialized to avoid SMP problems with
1331 self modifying code. This implies that assymetric systems where 1415 self modifying code. This implies that assymetric systems where
1332 APs have less capabilities than the boot processor are not handled. 1416 APs have less capabilities than the boot processor are not handled.
1333 In this case boot with "noreplacement". */ 1417 Tough. Make sure you disable such features by hand. */
1334void apply_alternatives(void *start, void *end) 1418void apply_alternatives(void *start, void *end)
1335{ 1419{
1336 struct alt_instr *a; 1420 struct alt_instr *a;
@@ -1358,24 +1442,12 @@ void apply_alternatives(void *start, void *end)
1358 } 1442 }
1359} 1443}
1360 1444
1361static int no_replacement __initdata = 0;
1362
1363void __init alternative_instructions(void) 1445void __init alternative_instructions(void)
1364{ 1446{
1365 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 1447 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1366 if (no_replacement)
1367 return;
1368 apply_alternatives(__alt_instructions, __alt_instructions_end); 1448 apply_alternatives(__alt_instructions, __alt_instructions_end);
1369} 1449}
1370 1450
1371static int __init noreplacement_setup(char *s)
1372{
1373 no_replacement = 1;
1374 return 0;
1375}
1376
1377__setup("noreplacement", noreplacement_setup);
1378
1379static char * __init machine_specific_memory_setup(void); 1451static char * __init machine_specific_memory_setup(void);
1380 1452
1381#ifdef CONFIG_MCA 1453#ifdef CONFIG_MCA
@@ -1475,6 +1547,7 @@ void __init setup_arch(char **cmdline_p)
1475#endif 1547#endif
1476 paging_init(); 1548 paging_init();
1477 remapped_pgdat_init(); 1549 remapped_pgdat_init();
1550 sparse_init();
1478 zone_sizes_init(); 1551 zone_sizes_init();
1479 1552
1480 /* 1553 /*
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index ea46d028af08..89ef7adc63a4 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -346,8 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
346extern void __user __kernel_sigreturn; 346extern void __user __kernel_sigreturn;
347extern void __user __kernel_rt_sigreturn; 347extern void __user __kernel_rt_sigreturn;
348 348
349static void setup_frame(int sig, struct k_sigaction *ka, 349static int setup_frame(int sig, struct k_sigaction *ka,
350 sigset_t *set, struct pt_regs * regs) 350 sigset_t *set, struct pt_regs * regs)
351{ 351{
352 void __user *restorer; 352 void __user *restorer;
353 struct sigframe __user *frame; 353 struct sigframe __user *frame;
@@ -429,13 +429,14 @@ static void setup_frame(int sig, struct k_sigaction *ka,
429 current->comm, current->pid, frame, regs->eip, frame->pretcode); 429 current->comm, current->pid, frame, regs->eip, frame->pretcode);
430#endif 430#endif
431 431
432 return; 432 return 1;
433 433
434give_sigsegv: 434give_sigsegv:
435 force_sigsegv(sig, current); 435 force_sigsegv(sig, current);
436 return 0;
436} 437}
437 438
438static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 439static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
439 sigset_t *set, struct pt_regs * regs) 440 sigset_t *set, struct pt_regs * regs)
440{ 441{
441 void __user *restorer; 442 void __user *restorer;
@@ -522,20 +523,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
522 current->comm, current->pid, frame, regs->eip, frame->pretcode); 523 current->comm, current->pid, frame, regs->eip, frame->pretcode);
523#endif 524#endif
524 525
525 return; 526 return 1;
526 527
527give_sigsegv: 528give_sigsegv:
528 force_sigsegv(sig, current); 529 force_sigsegv(sig, current);
530 return 0;
529} 531}
530 532
531/* 533/*
532 * OK, we're invoking a handler 534 * OK, we're invoking a handler
533 */ 535 */
534 536
535static void 537static int
536handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 538handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
537 sigset_t *oldset, struct pt_regs * regs) 539 sigset_t *oldset, struct pt_regs * regs)
538{ 540{
541 int ret;
542
539 /* Are we from a system call? */ 543 /* Are we from a system call? */
540 if (regs->orig_eax >= 0) { 544 if (regs->orig_eax >= 0) {
541 /* If so, check system call restarting.. */ 545 /* If so, check system call restarting.. */
@@ -569,17 +573,19 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
569 573
570 /* Set up the stack frame */ 574 /* Set up the stack frame */
571 if (ka->sa.sa_flags & SA_SIGINFO) 575 if (ka->sa.sa_flags & SA_SIGINFO)
572 setup_rt_frame(sig, ka, info, oldset, regs); 576 ret = setup_rt_frame(sig, ka, info, oldset, regs);
573 else 577 else
574 setup_frame(sig, ka, oldset, regs); 578 ret = setup_frame(sig, ka, oldset, regs);
575 579
576 if (!(ka->sa.sa_flags & SA_NODEFER)) { 580 if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
577 spin_lock_irq(&current->sighand->siglock); 581 spin_lock_irq(&current->sighand->siglock);
578 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 582 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
579 sigaddset(&current->blocked,sig); 583 sigaddset(&current->blocked,sig);
580 recalc_sigpending(); 584 recalc_sigpending();
581 spin_unlock_irq(&current->sighand->siglock); 585 spin_unlock_irq(&current->sighand->siglock);
582 } 586 }
587
588 return ret;
583} 589}
584 590
585/* 591/*
@@ -599,13 +605,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
599 * kernel mode. Just return without doing anything 605 * kernel mode. Just return without doing anything
600 * if so. 606 * if so.
601 */ 607 */
602 if ((regs->xcs & 3) != 3) 608 if (!user_mode(regs))
603 return 1; 609 return 1;
604 610
605 if (current->flags & PF_FREEZE) { 611 if (try_to_freeze())
606 refrigerator(0);
607 goto no_signal; 612 goto no_signal;
608 }
609 613
610 if (!oldset) 614 if (!oldset)
611 oldset = &current->blocked; 615 oldset = &current->blocked;
@@ -618,12 +622,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
618 * inside the kernel. 622 * inside the kernel.
619 */ 623 */
620 if (unlikely(current->thread.debugreg[7])) { 624 if (unlikely(current->thread.debugreg[7])) {
621 loaddebug(&current->thread, 7); 625 set_debugreg(current->thread.debugreg[7], 7);
622 } 626 }
623 627
624 /* Whee! Actually deliver the signal. */ 628 /* Whee! Actually deliver the signal. */
625 handle_signal(signr, &info, &ka, oldset, regs); 629 return handle_signal(signr, &info, &ka, oldset, regs);
626 return 1;
627 } 630 }
628 631
629 no_signal: 632 no_signal:
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 6223c33ac91c..cec4bde67161 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,8 @@
19#include <linux/mc146818rtc.h> 19#include <linux/mc146818rtc.h>
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/cpu.h>
23#include <linux/module.h>
22 24
23#include <asm/mtrr.h> 25#include <asm/mtrr.h>
24#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
@@ -163,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
163 unsigned long flags; 165 unsigned long flags;
164 166
165 local_irq_save(flags); 167 local_irq_save(flags);
166 168 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
167 /* 169 /*
168 * Wait for idle. 170 * Wait for idle.
169 */ 171 */
@@ -345,21 +347,21 @@ out:
345static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 347static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
346 unsigned long va) 348 unsigned long va)
347{ 349{
348 cpumask_t tmp;
349 /* 350 /*
350 * A couple of (to be removed) sanity checks: 351 * A couple of (to be removed) sanity checks:
351 * 352 *
352 * - we do not send IPIs to not-yet booted CPUs.
353 * - current CPU must not be in mask 353 * - current CPU must not be in mask
354 * - mask must exist :) 354 * - mask must exist :)
355 */ 355 */
356 BUG_ON(cpus_empty(cpumask)); 356 BUG_ON(cpus_empty(cpumask));
357
358 cpus_and(tmp, cpumask, cpu_online_map);
359 BUG_ON(!cpus_equal(cpumask, tmp));
360 BUG_ON(cpu_isset(smp_processor_id(), cpumask)); 357 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
361 BUG_ON(!mm); 358 BUG_ON(!mm);
362 359
360 /* If a CPU which we ran on has gone down, OK. */
361 cpus_and(cpumask, cpumask, cpu_online_map);
362 if (cpus_empty(cpumask))
363 return;
364
363 /* 365 /*
364 * i'm not happy about this global shared spinlock in the 366 * i'm not happy about this global shared spinlock in the
365 * MM hot path, but we'll see how contended it is. 367 * MM hot path, but we'll see how contended it is.
@@ -452,6 +454,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
452 454
453 preempt_enable(); 455 preempt_enable();
454} 456}
457EXPORT_SYMBOL(flush_tlb_page);
455 458
456static void do_flush_tlb_all(void* info) 459static void do_flush_tlb_all(void* info)
457{ 460{
@@ -474,6 +477,7 @@ void flush_tlb_all(void)
474 */ 477 */
475void smp_send_reschedule(int cpu) 478void smp_send_reschedule(int cpu)
476{ 479{
480 WARN_ON(cpu_is_offline(cpu));
477 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 481 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
478} 482}
479 483
@@ -491,6 +495,16 @@ struct call_data_struct {
491 int wait; 495 int wait;
492}; 496};
493 497
498void lock_ipi_call_lock(void)
499{
500 spin_lock_irq(&call_lock);
501}
502
503void unlock_ipi_call_lock(void)
504{
505 spin_unlock_irq(&call_lock);
506}
507
494static struct call_data_struct * call_data; 508static struct call_data_struct * call_data;
495 509
496/* 510/*
@@ -514,10 +528,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
514 */ 528 */
515{ 529{
516 struct call_data_struct data; 530 struct call_data_struct data;
517 int cpus = num_online_cpus()-1; 531 int cpus;
518 532
519 if (!cpus) 533 /* Holding any lock stops cpus from going down. */
534 spin_lock(&call_lock);
535 cpus = num_online_cpus() - 1;
536 if (!cpus) {
537 spin_unlock(&call_lock);
520 return 0; 538 return 0;
539 }
521 540
522 /* Can deadlock when called with interrupts disabled */ 541 /* Can deadlock when called with interrupts disabled */
523 WARN_ON(irqs_disabled()); 542 WARN_ON(irqs_disabled());
@@ -529,7 +548,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
529 if (wait) 548 if (wait)
530 atomic_set(&data.finished, 0); 549 atomic_set(&data.finished, 0);
531 550
532 spin_lock(&call_lock);
533 call_data = &data; 551 call_data = &data;
534 mb(); 552 mb();
535 553
@@ -547,6 +565,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
547 565
548 return 0; 566 return 0;
549} 567}
568EXPORT_SYMBOL(smp_call_function);
550 569
551static void stop_this_cpu (void * dummy) 570static void stop_this_cpu (void * dummy)
552{ 571{
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bc1bb6919e6a..8ac8e9fd5614 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -44,6 +44,9 @@
44#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
45#include <linux/irq.h> 45#include <linux/irq.h>
46#include <linux/bootmem.h> 46#include <linux/bootmem.h>
47#include <linux/notifier.h>
48#include <linux/cpu.h>
49#include <linux/percpu.h>
47 50
48#include <linux/delay.h> 51#include <linux/delay.h>
49#include <linux/mc146818rtc.h> 52#include <linux/mc146818rtc.h>
@@ -56,26 +59,48 @@
56#include <smpboot_hooks.h> 59#include <smpboot_hooks.h>
57 60
58/* Set if we find a B stepping CPU */ 61/* Set if we find a B stepping CPU */
59static int __initdata smp_b_stepping; 62static int __devinitdata smp_b_stepping;
60 63
61/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
62int smp_num_siblings = 1; 65int smp_num_siblings = 1;
63int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ 66#ifdef CONFIG_X86_HT
67EXPORT_SYMBOL(smp_num_siblings);
68#endif
69
70/* Package ID of each logical CPU */
71int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
64EXPORT_SYMBOL(phys_proc_id); 72EXPORT_SYMBOL(phys_proc_id);
65int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ 73
74/* Core ID of each logical CPU */
75int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
66EXPORT_SYMBOL(cpu_core_id); 76EXPORT_SYMBOL(cpu_core_id);
67 77
78cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
79EXPORT_SYMBOL(cpu_sibling_map);
80
81cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
82EXPORT_SYMBOL(cpu_core_map);
83
68/* bitmap of online cpus */ 84/* bitmap of online cpus */
69cpumask_t cpu_online_map; 85cpumask_t cpu_online_map __read_mostly;
86EXPORT_SYMBOL(cpu_online_map);
70 87
71cpumask_t cpu_callin_map; 88cpumask_t cpu_callin_map;
72cpumask_t cpu_callout_map; 89cpumask_t cpu_callout_map;
90EXPORT_SYMBOL(cpu_callout_map);
73static cpumask_t smp_commenced_mask; 91static cpumask_t smp_commenced_mask;
74 92
93/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
94 * is no way to resync one AP against BP. TBD: for prescott and above, we
95 * should use IA64's algorithm
96 */
97static int __devinitdata tsc_sync_disabled;
98
75/* Per CPU bogomips and other parameters */ 99/* Per CPU bogomips and other parameters */
76struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 100struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
101EXPORT_SYMBOL(cpu_data);
77 102
78u8 x86_cpu_to_apicid[NR_CPUS] = 103u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
79 { [0 ... NR_CPUS-1] = 0xff }; 104 { [0 ... NR_CPUS-1] = 0xff };
80EXPORT_SYMBOL(x86_cpu_to_apicid); 105EXPORT_SYMBOL(x86_cpu_to_apicid);
81 106
@@ -90,13 +115,16 @@ static int trampoline_exec;
90 115
91static void map_cpu_to_logical_apicid(void); 116static void map_cpu_to_logical_apicid(void);
92 117
118/* State of each CPU. */
119DEFINE_PER_CPU(int, cpu_state) = { 0 };
120
93/* 121/*
94 * Currently trivial. Write the real->protected mode 122 * Currently trivial. Write the real->protected mode
95 * bootstrap into the page concerned. The caller 123 * bootstrap into the page concerned. The caller
96 * has made sure it's suitably aligned. 124 * has made sure it's suitably aligned.
97 */ 125 */
98 126
99static unsigned long __init setup_trampoline(void) 127static unsigned long __devinit setup_trampoline(void)
100{ 128{
101 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); 129 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
102 return virt_to_phys(trampoline_base); 130 return virt_to_phys(trampoline_base);
@@ -126,7 +154,7 @@ void __init smp_alloc_memory(void)
126 * a given CPU 154 * a given CPU
127 */ 155 */
128 156
129static void __init smp_store_cpu_info(int id) 157static void __devinit smp_store_cpu_info(int id)
130{ 158{
131 struct cpuinfo_x86 *c = cpu_data + id; 159 struct cpuinfo_x86 *c = cpu_data + id;
132 160
@@ -199,7 +227,7 @@ static void __init synchronize_tsc_bp (void)
199 unsigned long long t0; 227 unsigned long long t0;
200 unsigned long long sum, avg; 228 unsigned long long sum, avg;
201 long long delta; 229 long long delta;
202 unsigned long one_usec; 230 unsigned int one_usec;
203 int buggy = 0; 231 int buggy = 0;
204 232
205 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); 233 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
@@ -320,7 +348,7 @@ extern void calibrate_delay(void);
320 348
321static atomic_t init_deasserted; 349static atomic_t init_deasserted;
322 350
323static void __init smp_callin(void) 351static void __devinit smp_callin(void)
324{ 352{
325 int cpuid, phys_id; 353 int cpuid, phys_id;
326 unsigned long timeout; 354 unsigned long timeout;
@@ -405,16 +433,48 @@ static void __init smp_callin(void)
405 /* 433 /*
406 * Synchronize the TSC with the BP 434 * Synchronize the TSC with the BP
407 */ 435 */
408 if (cpu_has_tsc && cpu_khz) 436 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
409 synchronize_tsc_ap(); 437 synchronize_tsc_ap();
410} 438}
411 439
412static int cpucount; 440static int cpucount;
413 441
442static inline void
443set_cpu_sibling_map(int cpu)
444{
445 int i;
446
447 if (smp_num_siblings > 1) {
448 for (i = 0; i < NR_CPUS; i++) {
449 if (!cpu_isset(i, cpu_callout_map))
450 continue;
451 if (cpu_core_id[cpu] == cpu_core_id[i]) {
452 cpu_set(i, cpu_sibling_map[cpu]);
453 cpu_set(cpu, cpu_sibling_map[i]);
454 }
455 }
456 } else {
457 cpu_set(cpu, cpu_sibling_map[cpu]);
458 }
459
460 if (current_cpu_data.x86_num_cores > 1) {
461 for (i = 0; i < NR_CPUS; i++) {
462 if (!cpu_isset(i, cpu_callout_map))
463 continue;
464 if (phys_proc_id[cpu] == phys_proc_id[i]) {
465 cpu_set(i, cpu_core_map[cpu]);
466 cpu_set(cpu, cpu_core_map[i]);
467 }
468 }
469 } else {
470 cpu_core_map[cpu] = cpu_sibling_map[cpu];
471 }
472}
473
414/* 474/*
415 * Activate a secondary processor. 475 * Activate a secondary processor.
416 */ 476 */
417static void __init start_secondary(void *unused) 477static void __devinit start_secondary(void *unused)
418{ 478{
419 /* 479 /*
420 * Dont put anything before smp_callin(), SMP 480 * Dont put anything before smp_callin(), SMP
@@ -437,7 +497,23 @@ static void __init start_secondary(void *unused)
437 * the local TLBs too. 497 * the local TLBs too.
438 */ 498 */
439 local_flush_tlb(); 499 local_flush_tlb();
500
501 /* This must be done before setting cpu_online_map */
502 set_cpu_sibling_map(raw_smp_processor_id());
503 wmb();
504
505 /*
506 * We need to hold call_lock, so there is no inconsistency
507 * between the time smp_call_function() determines number of
508 * IPI receipients, and the time when the determination is made
509 * for which cpus receive the IPI. Holding this
510 * lock helps us to not include this cpu in a currently in progress
511 * smp_call_function().
512 */
513 lock_ipi_call_lock();
440 cpu_set(smp_processor_id(), cpu_online_map); 514 cpu_set(smp_processor_id(), cpu_online_map);
515 unlock_ipi_call_lock();
516 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
441 517
442 /* We can take interrupts now: we're officially "up". */ 518 /* We can take interrupts now: we're officially "up". */
443 local_irq_enable(); 519 local_irq_enable();
@@ -452,7 +528,7 @@ static void __init start_secondary(void *unused)
452 * from the task structure 528 * from the task structure
453 * This function must not return. 529 * This function must not return.
454 */ 530 */
455void __init initialize_secondary(void) 531void __devinit initialize_secondary(void)
456{ 532{
457 /* 533 /*
458 * We don't actually need to load the full TSS, 534 * We don't actually need to load the full TSS,
@@ -474,10 +550,10 @@ extern struct {
474#ifdef CONFIG_NUMA 550#ifdef CONFIG_NUMA
475 551
476/* which logical CPUs are on which nodes */ 552/* which logical CPUs are on which nodes */
477cpumask_t node_2_cpu_mask[MAX_NUMNODES] = 553cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
478 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; 554 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
479/* which node each logical CPU is on */ 555/* which node each logical CPU is on */
480int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; 556int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
481EXPORT_SYMBOL(cpu_2_node); 557EXPORT_SYMBOL(cpu_2_node);
482 558
483/* set up a mapping between cpu and node. */ 559/* set up a mapping between cpu and node. */
@@ -505,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu)
505 581
506#endif /* CONFIG_NUMA */ 582#endif /* CONFIG_NUMA */
507 583
508u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; 584u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
509 585
510static void map_cpu_to_logical_apicid(void) 586static void map_cpu_to_logical_apicid(void)
511{ 587{
@@ -566,7 +642,7 @@ static inline void __inquire_remote_apic(int apicid)
566 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 642 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
567 * won't ... remember to clear down the APIC, etc later. 643 * won't ... remember to clear down the APIC, etc later.
568 */ 644 */
569static int __init 645static int __devinit
570wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 646wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
571{ 647{
572 unsigned long send_status = 0, accept_status = 0; 648 unsigned long send_status = 0, accept_status = 0;
@@ -612,7 +688,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
612#endif /* WAKE_SECONDARY_VIA_NMI */ 688#endif /* WAKE_SECONDARY_VIA_NMI */
613 689
614#ifdef WAKE_SECONDARY_VIA_INIT 690#ifdef WAKE_SECONDARY_VIA_INIT
615static int __init 691static int __devinit
616wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) 692wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
617{ 693{
618 unsigned long send_status = 0, accept_status = 0; 694 unsigned long send_status = 0, accept_status = 0;
@@ -747,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
747#endif /* WAKE_SECONDARY_VIA_INIT */ 823#endif /* WAKE_SECONDARY_VIA_INIT */
748 824
749extern cpumask_t cpu_initialized; 825extern cpumask_t cpu_initialized;
826static inline int alloc_cpu_id(void)
827{
828 cpumask_t tmp_map;
829 int cpu;
830 cpus_complement(tmp_map, cpu_present_map);
831 cpu = first_cpu(tmp_map);
832 if (cpu >= NR_CPUS)
833 return -ENODEV;
834 return cpu;
835}
836
837#ifdef CONFIG_HOTPLUG_CPU
838static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
839static inline struct task_struct * alloc_idle_task(int cpu)
840{
841 struct task_struct *idle;
842
843 if ((idle = cpu_idle_tasks[cpu]) != NULL) {
844 /* initialize thread_struct. we really want to avoid destroy
845 * idle tread
846 */
847 idle->thread.esp = (unsigned long)(((struct pt_regs *)
848 (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
849 init_idle(idle, cpu);
850 return idle;
851 }
852 idle = fork_idle(cpu);
750 853
751static int __init do_boot_cpu(int apicid) 854 if (!IS_ERR(idle))
855 cpu_idle_tasks[cpu] = idle;
856 return idle;
857}
858#else
859#define alloc_idle_task(cpu) fork_idle(cpu)
860#endif
861
862static int __devinit do_boot_cpu(int apicid, int cpu)
752/* 863/*
753 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 864 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
754 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 865 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -757,16 +868,17 @@ static int __init do_boot_cpu(int apicid)
757{ 868{
758 struct task_struct *idle; 869 struct task_struct *idle;
759 unsigned long boot_error; 870 unsigned long boot_error;
760 int timeout, cpu; 871 int timeout;
761 unsigned long start_eip; 872 unsigned long start_eip;
762 unsigned short nmi_high = 0, nmi_low = 0; 873 unsigned short nmi_high = 0, nmi_low = 0;
763 874
764 cpu = ++cpucount; 875 ++cpucount;
876
765 /* 877 /*
766 * We can't use kernel_thread since we must avoid to 878 * We can't use kernel_thread since we must avoid to
767 * reschedule the child. 879 * reschedule the child.
768 */ 880 */
769 idle = fork_idle(cpu); 881 idle = alloc_idle_task(cpu);
770 if (IS_ERR(idle)) 882 if (IS_ERR(idle))
771 panic("failed fork for CPU %d", cpu); 883 panic("failed fork for CPU %d", cpu);
772 idle->thread.eip = (unsigned long) start_secondary; 884 idle->thread.eip = (unsigned long) start_secondary;
@@ -833,13 +945,16 @@ static int __init do_boot_cpu(int apicid)
833 inquire_remote_apic(apicid); 945 inquire_remote_apic(apicid);
834 } 946 }
835 } 947 }
836 x86_cpu_to_apicid[cpu] = apicid; 948
837 if (boot_error) { 949 if (boot_error) {
838 /* Try to put things back the way they were before ... */ 950 /* Try to put things back the way they were before ... */
839 unmap_cpu_to_logical_apicid(cpu); 951 unmap_cpu_to_logical_apicid(cpu);
840 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ 952 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
841 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 953 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
842 cpucount--; 954 cpucount--;
955 } else {
956 x86_cpu_to_apicid[cpu] = apicid;
957 cpu_set(cpu, cpu_present_map);
843 } 958 }
844 959
845 /* mark "stuck" area as not stuck */ 960 /* mark "stuck" area as not stuck */
@@ -848,6 +963,75 @@ static int __init do_boot_cpu(int apicid)
848 return boot_error; 963 return boot_error;
849} 964}
850 965
966#ifdef CONFIG_HOTPLUG_CPU
967void cpu_exit_clear(void)
968{
969 int cpu = raw_smp_processor_id();
970
971 idle_task_exit();
972
973 cpucount --;
974 cpu_uninit();
975 irq_ctx_exit(cpu);
976
977 cpu_clear(cpu, cpu_callout_map);
978 cpu_clear(cpu, cpu_callin_map);
979 cpu_clear(cpu, cpu_present_map);
980
981 cpu_clear(cpu, smp_commenced_mask);
982 unmap_cpu_to_logical_apicid(cpu);
983}
984
985struct warm_boot_cpu_info {
986 struct completion *complete;
987 int apicid;
988 int cpu;
989};
990
991static void __devinit do_warm_boot_cpu(void *p)
992{
993 struct warm_boot_cpu_info *info = p;
994 do_boot_cpu(info->apicid, info->cpu);
995 complete(info->complete);
996}
997
998int __devinit smp_prepare_cpu(int cpu)
999{
1000 DECLARE_COMPLETION(done);
1001 struct warm_boot_cpu_info info;
1002 struct work_struct task;
1003 int apicid, ret;
1004
1005 lock_cpu_hotplug();
1006 apicid = x86_cpu_to_apicid[cpu];
1007 if (apicid == BAD_APICID) {
1008 ret = -ENODEV;
1009 goto exit;
1010 }
1011
1012 info.complete = &done;
1013 info.apicid = apicid;
1014 info.cpu = cpu;
1015 INIT_WORK(&task, do_warm_boot_cpu, &info);
1016
1017 tsc_sync_disabled = 1;
1018
1019 /* init low mem mapping */
1020 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1021 sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
1022 flush_tlb_all();
1023 schedule_work(&task);
1024 wait_for_completion(&done);
1025
1026 tsc_sync_disabled = 0;
1027 zap_low_mappings();
1028 ret = 0;
1029exit:
1030 unlock_cpu_hotplug();
1031 return ret;
1032}
1033#endif
1034
851static void smp_tune_scheduling (void) 1035static void smp_tune_scheduling (void)
852{ 1036{
853 unsigned long cachesize; /* kB */ 1037 unsigned long cachesize; /* kB */
@@ -885,10 +1069,9 @@ static void smp_tune_scheduling (void)
885static int boot_cpu_logical_apicid; 1069static int boot_cpu_logical_apicid;
886/* Where the IO area was mapped on multiquad, always 0 otherwise */ 1070/* Where the IO area was mapped on multiquad, always 0 otherwise */
887void *xquad_portio; 1071void *xquad_portio;
888 1072#ifdef CONFIG_X86_NUMAQ
889cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; 1073EXPORT_SYMBOL(xquad_portio);
890cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 1074#endif
891EXPORT_SYMBOL(cpu_core_map);
892 1075
893static void __init smp_boot_cpus(unsigned int max_cpus) 1076static void __init smp_boot_cpus(unsigned int max_cpus)
894{ 1077{
@@ -1001,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1001 if (max_cpus <= cpucount+1) 1184 if (max_cpus <= cpucount+1)
1002 continue; 1185 continue;
1003 1186
1004 if (do_boot_cpu(apicid)) 1187 if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
1005 printk("CPU #%d not responding - cannot use it.\n", 1188 printk("CPU #%d not responding - cannot use it.\n",
1006 apicid); 1189 apicid);
1007 else 1190 else
@@ -1053,44 +1236,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1053 cpus_clear(cpu_core_map[cpu]); 1236 cpus_clear(cpu_core_map[cpu]);
1054 } 1237 }
1055 1238
1056 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1239 cpu_set(0, cpu_sibling_map[0]);
1057 struct cpuinfo_x86 *c = cpu_data + cpu; 1240 cpu_set(0, cpu_core_map[0]);
1058 int siblings = 0;
1059 int i;
1060 if (!cpu_isset(cpu, cpu_callout_map))
1061 continue;
1062
1063 if (smp_num_siblings > 1) {
1064 for (i = 0; i < NR_CPUS; i++) {
1065 if (!cpu_isset(i, cpu_callout_map))
1066 continue;
1067 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1068 siblings++;
1069 cpu_set(i, cpu_sibling_map[cpu]);
1070 }
1071 }
1072 } else {
1073 siblings++;
1074 cpu_set(cpu, cpu_sibling_map[cpu]);
1075 }
1076
1077 if (siblings != smp_num_siblings) {
1078 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1079 smp_num_siblings = siblings;
1080 }
1081
1082 if (c->x86_num_cores > 1) {
1083 for (i = 0; i < NR_CPUS; i++) {
1084 if (!cpu_isset(i, cpu_callout_map))
1085 continue;
1086 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1087 cpu_set(i, cpu_core_map[cpu]);
1088 }
1089 }
1090 } else {
1091 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1092 }
1093 }
1094 1241
1095 smpboot_setup_io_apic(); 1242 smpboot_setup_io_apic();
1096 1243
@@ -1107,6 +1254,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1107 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ 1254 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1108void __init smp_prepare_cpus(unsigned int max_cpus) 1255void __init smp_prepare_cpus(unsigned int max_cpus)
1109{ 1256{
1257 smp_commenced_mask = cpumask_of_cpu(0);
1258 cpu_callin_map = cpumask_of_cpu(0);
1259 mb();
1110 smp_boot_cpus(max_cpus); 1260 smp_boot_cpus(max_cpus);
1111} 1261}
1112 1262
@@ -1114,23 +1264,98 @@ void __devinit smp_prepare_boot_cpu(void)
1114{ 1264{
1115 cpu_set(smp_processor_id(), cpu_online_map); 1265 cpu_set(smp_processor_id(), cpu_online_map);
1116 cpu_set(smp_processor_id(), cpu_callout_map); 1266 cpu_set(smp_processor_id(), cpu_callout_map);
1267 cpu_set(smp_processor_id(), cpu_present_map);
1268 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
1117} 1269}
1118 1270
1119int __devinit __cpu_up(unsigned int cpu) 1271#ifdef CONFIG_HOTPLUG_CPU
1272static void
1273remove_siblinginfo(int cpu)
1120{ 1274{
1121 /* This only works at boot for x86. See "rewrite" above. */ 1275 int sibling;
1122 if (cpu_isset(cpu, smp_commenced_mask)) { 1276
1123 local_irq_enable(); 1277 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1124 return -ENOSYS; 1278 cpu_clear(cpu, cpu_sibling_map[sibling]);
1279 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1280 cpu_clear(cpu, cpu_core_map[sibling]);
1281 cpus_clear(cpu_sibling_map[cpu]);
1282 cpus_clear(cpu_core_map[cpu]);
1283 phys_proc_id[cpu] = BAD_APICID;
1284 cpu_core_id[cpu] = BAD_APICID;
1285}
1286
1287int __cpu_disable(void)
1288{
1289 cpumask_t map = cpu_online_map;
1290 int cpu = smp_processor_id();
1291
1292 /*
1293 * Perhaps use cpufreq to drop frequency, but that could go
1294 * into generic code.
1295 *
1296 * We won't take down the boot processor on i386 due to some
1297 * interrupts only being able to be serviced by the BSP.
1298 * Especially so if we're not using an IOAPIC -zwane
1299 */
1300 if (cpu == 0)
1301 return -EBUSY;
1302
1303 /* We enable the timer again on the exit path of the death loop */
1304 disable_APIC_timer();
1305 /* Allow any queued timer interrupts to get serviced */
1306 local_irq_enable();
1307 mdelay(1);
1308 local_irq_disable();
1309
1310 remove_siblinginfo(cpu);
1311
1312 cpu_clear(cpu, map);
1313 fixup_irqs(map);
1314 /* It's now safe to remove this processor from the online map */
1315 cpu_clear(cpu, cpu_online_map);
1316 return 0;
1317}
1318
1319void __cpu_die(unsigned int cpu)
1320{
1321 /* We don't do anything here: idle task is faking death itself. */
1322 unsigned int i;
1323
1324 for (i = 0; i < 10; i++) {
1325 /* They ack this in play_dead by setting CPU_DEAD */
1326 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1327 printk ("CPU %d is now offline\n", cpu);
1328 return;
1329 }
1330 current->state = TASK_UNINTERRUPTIBLE;
1331 schedule_timeout(HZ/10);
1125 } 1332 }
1333 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1334}
1335#else /* ... !CONFIG_HOTPLUG_CPU */
1336int __cpu_disable(void)
1337{
1338 return -ENOSYS;
1339}
1126 1340
1341void __cpu_die(unsigned int cpu)
1342{
1343 /* We said "no" in __cpu_disable */
1344 BUG();
1345}
1346#endif /* CONFIG_HOTPLUG_CPU */
1347
1348int __devinit __cpu_up(unsigned int cpu)
1349{
1127 /* In case one didn't come up */ 1350 /* In case one didn't come up */
1128 if (!cpu_isset(cpu, cpu_callin_map)) { 1351 if (!cpu_isset(cpu, cpu_callin_map)) {
1352 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1129 local_irq_enable(); 1353 local_irq_enable();
1130 return -EIO; 1354 return -EIO;
1131 } 1355 }
1132 1356
1133 local_irq_enable(); 1357 local_irq_enable();
1358 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1134 /* Unleash the CPU! */ 1359 /* Unleash the CPU! */
1135 cpu_set(cpu, smp_commenced_mask); 1360 cpu_set(cpu, smp_commenced_mask);
1136 while (!cpu_isset(cpu, cpu_online_map)) 1361 while (!cpu_isset(cpu, cpu_online_map))
@@ -1144,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
1144 setup_ioapic_dest(); 1369 setup_ioapic_dest();
1145#endif 1370#endif
1146 zap_low_mappings(); 1371 zap_low_mappings();
1372#ifndef CONFIG_HOTPLUG_CPU
1147 /* 1373 /*
1148 * Disable executability of the SMP trampoline: 1374 * Disable executability of the SMP trampoline:
1149 */ 1375 */
1150 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); 1376 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1377#endif
1151} 1378}
1152 1379
1153void __init smp_intr_init(void) 1380void __init smp_intr_init(void)
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index d408afaf6495..9b21a31d4f4e 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -251,7 +251,7 @@ ENTRY(sys_call_table)
251 .long sys_io_submit 251 .long sys_io_submit
252 .long sys_io_cancel 252 .long sys_io_cancel
253 .long sys_fadvise64 /* 250 */ 253 .long sys_fadvise64 /* 250 */
254 .long sys_set_zone_reclaim 254 .long sys_ni_syscall
255 .long sys_exit_group 255 .long sys_exit_group
256 .long sys_lookup_dcookie 256 .long sys_lookup_dcookie
257 .long sys_epoll_create 257 .long sys_epoll_create
@@ -283,9 +283,14 @@ ENTRY(sys_call_table)
283 .long sys_mq_timedreceive /* 280 */ 283 .long sys_mq_timedreceive /* 280 */
284 .long sys_mq_notify 284 .long sys_mq_notify
285 .long sys_mq_getsetattr 285 .long sys_mq_getsetattr
286 .long sys_ni_syscall /* reserved for kexec */ 286 .long sys_kexec_load
287 .long sys_waitid 287 .long sys_waitid
288 .long sys_ni_syscall /* 285 */ /* available */ 288 .long sys_ni_syscall /* 285 */ /* available */
289 .long sys_add_key 289 .long sys_add_key
290 .long sys_request_key 290 .long sys_request_key
291 .long sys_keyctl 291 .long sys_keyctl
292 .long sys_ioprio_set
293 .long sys_ioprio_get /* 290 */
294 .long sys_inotify_init
295 .long sys_inotify_add_watch
296 .long sys_inotify_rm_watch
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 960d8bd137d0..0bada1870bdf 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -21,11 +21,16 @@
21 21
22extern asmlinkage void sysenter_entry(void); 22extern asmlinkage void sysenter_entry(void);
23 23
24void enable_sep_cpu(void *info) 24void enable_sep_cpu(void)
25{ 25{
26 int cpu = get_cpu(); 26 int cpu = get_cpu();
27 struct tss_struct *tss = &per_cpu(init_tss, cpu); 27 struct tss_struct *tss = &per_cpu(init_tss, cpu);
28 28
29 if (!boot_cpu_has(X86_FEATURE_SEP)) {
30 put_cpu();
31 return;
32 }
33
29 tss->ss1 = __KERNEL_CS; 34 tss->ss1 = __KERNEL_CS;
30 tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; 35 tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
31 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 36 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
@@ -41,7 +46,7 @@ void enable_sep_cpu(void *info)
41extern const char vsyscall_int80_start, vsyscall_int80_end; 46extern const char vsyscall_int80_start, vsyscall_int80_end;
42extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 47extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
43 48
44static int __init sysenter_setup(void) 49int __init sysenter_setup(void)
45{ 50{
46 void *page = (void *)get_zeroed_page(GFP_ATOMIC); 51 void *page = (void *)get_zeroed_page(GFP_ATOMIC);
47 52
@@ -58,8 +63,5 @@ static int __init sysenter_setup(void)
58 &vsyscall_sysenter_start, 63 &vsyscall_sysenter_start,
59 &vsyscall_sysenter_end - &vsyscall_sysenter_start); 64 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
60 65
61 on_each_cpu(enable_sep_cpu, NULL, 1, 1);
62 return 0; 66 return 0;
63} 67}
64
65__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a0dcb7c87c30..0ee9dee8af06 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -68,7 +68,8 @@
68 68
69#include "io_ports.h" 69#include "io_ports.h"
70 70
71extern spinlock_t i8259A_lock; 71#include <asm/i8259.h>
72
72int pit_latch_buggy; /* extern */ 73int pit_latch_buggy; /* extern */
73 74
74#include "do_timer.h" 75#include "do_timer.h"
@@ -77,16 +78,20 @@ u64 jiffies_64 = INITIAL_JIFFIES;
77 78
78EXPORT_SYMBOL(jiffies_64); 79EXPORT_SYMBOL(jiffies_64);
79 80
80unsigned long cpu_khz; /* Detected as we calibrate the TSC */ 81unsigned int cpu_khz; /* Detected as we calibrate the TSC */
82EXPORT_SYMBOL(cpu_khz);
81 83
82extern unsigned long wall_jiffies; 84extern unsigned long wall_jiffies;
83 85
84DEFINE_SPINLOCK(rtc_lock); 86DEFINE_SPINLOCK(rtc_lock);
87EXPORT_SYMBOL(rtc_lock);
88
89#include <asm/i8253.h>
85 90
86DEFINE_SPINLOCK(i8253_lock); 91DEFINE_SPINLOCK(i8253_lock);
87EXPORT_SYMBOL(i8253_lock); 92EXPORT_SYMBOL(i8253_lock);
88 93
89struct timer_opts *cur_timer = &timer_none; 94struct timer_opts *cur_timer __read_mostly = &timer_none;
90 95
91/* 96/*
92 * This is a special lock that is owned by the CPU and holds the index 97 * This is a special lock that is owned by the CPU and holds the index
@@ -324,6 +329,8 @@ unsigned long get_cmos_time(void)
324 329
325 return retval; 330 return retval;
326} 331}
332EXPORT_SYMBOL(get_cmos_time);
333
327static void sync_cmos_clock(unsigned long dummy); 334static void sync_cmos_clock(unsigned long dummy);
328 335
329static struct timer_list sync_cmos_timer = 336static struct timer_list sync_cmos_timer =
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 10a0cbb88e75..658c0629ba6a 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a)
50 * comparator value and continue. Next tick can be caught by checking 50 * comparator value and continue. Next tick can be caught by checking
51 * for a change in the comparator value. Used in apic.c. 51 * for a change in the comparator value. Used in apic.c.
52 */ 52 */
53static void __init wait_hpet_tick(void) 53static void __devinit wait_hpet_tick(void)
54{ 54{
55 unsigned int start_cmp_val, end_cmp_val; 55 unsigned int start_cmp_val, end_cmp_val;
56 56
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
index 8e201219f525..8163fe0cf1f0 100644
--- a/arch/i386/kernel/timers/common.c
+++ b/arch/i386/kernel/timers/common.c
@@ -86,7 +86,7 @@ bad_ctc:
86#define CALIBRATE_CNT_HPET (5 * hpet_tick) 86#define CALIBRATE_CNT_HPET (5 * hpet_tick)
87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) 87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
88 88
89unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) 89unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
90{ 90{
91 unsigned long tsc_startlow, tsc_starthigh; 91 unsigned long tsc_startlow, tsc_starthigh;
92 unsigned long tsc_endlow, tsc_endhigh; 92 unsigned long tsc_endlow, tsc_endhigh;
@@ -139,6 +139,15 @@ bad_calibration:
139} 139}
140#endif 140#endif
141 141
142
143unsigned long read_timer_tsc(void)
144{
145 unsigned long retval;
146 rdtscl(retval);
147 return retval;
148}
149
150
142/* calculate cpu_khz */ 151/* calculate cpu_khz */
143void init_cpu_khz(void) 152void init_cpu_khz(void)
144{ 153{
@@ -154,7 +163,8 @@ void init_cpu_khz(void)
154 :"=a" (cpu_khz), "=d" (edx) 163 :"=a" (cpu_khz), "=d" (edx)
155 :"r" (tsc_quotient), 164 :"r" (tsc_quotient),
156 "0" (eax), "1" (edx)); 165 "0" (eax), "1" (edx));
157 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); 166 printk("Detected %u.%03u MHz processor.\n",
167 cpu_khz / 1000, cpu_khz % 1000);
158 } 168 }
159 } 169 }
160 } 170 }
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
index a3d6a288088b..7e39ed8e33f8 100644
--- a/arch/i386/kernel/timers/timer.c
+++ b/arch/i386/kernel/timers/timer.c
@@ -64,3 +64,12 @@ struct timer_opts* __init select_timer(void)
64 panic("select_timer: Cannot find a suitable timer\n"); 64 panic("select_timer: Cannot find a suitable timer\n");
65 return NULL; 65 return NULL;
66} 66}
67
68int read_current_timer(unsigned long *timer_val)
69{
70 if (cur_timer->read_timer) {
71 *timer_val = cur_timer->read_timer();
72 return 0;
73 }
74 return -1;
75}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
index f6f1206a11bb..13892a65c941 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -17,9 +17,9 @@
17#include <asm/io.h> 17#include <asm/io.h>
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/fixmap.h> 19#include <asm/fixmap.h>
20#include "io_ports.h" 20#include <asm/i8253.h>
21 21
22extern spinlock_t i8253_lock; 22#include "io_ports.h"
23 23
24/* Number of usecs that the last interrupt was delayed */ 24/* Number of usecs that the last interrupt was delayed */
25static int delay_at_last_interrupt; 25static int delay_at_last_interrupt;
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index f778f471a09a..ef8dac5dd33b 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -18,7 +18,7 @@
18#include "mach_timer.h" 18#include "mach_timer.h"
19#include <asm/hpet.h> 19#include <asm/hpet.h>
20 20
21static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ 21static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */
22static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ 22static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */
23static unsigned long hpet_last; /* hpet counter value at last tick*/ 23static unsigned long hpet_last; /* hpet counter value at last tick*/
24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ 24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
@@ -158,7 +158,7 @@ static int __init init_hpet(char* override)
158 { unsigned long eax=0, edx=1000; 158 { unsigned long eax=0, edx=1000;
159 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, 159 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
160 eax, edx); 160 eax, edx);
161 printk("Detected %lu.%03lu MHz processor.\n", 161 printk("Detected %u.%03u MHz processor.\n",
162 cpu_khz / 1000, cpu_khz % 1000); 162 cpu_khz / 1000, cpu_khz % 1000);
163 } 163 }
164 set_cyc2ns_scale(cpu_khz/1000); 164 set_cyc2ns_scale(cpu_khz/1000);
@@ -180,12 +180,13 @@ static int __init init_hpet(char* override)
180/************************************************************/ 180/************************************************************/
181 181
182/* tsc timer_opts struct */ 182/* tsc timer_opts struct */
183static struct timer_opts timer_hpet = { 183static struct timer_opts timer_hpet __read_mostly = {
184 .name = "hpet", 184 .name = "hpet",
185 .mark_offset = mark_offset_hpet, 185 .mark_offset = mark_offset_hpet,
186 .get_offset = get_offset_hpet, 186 .get_offset = get_offset_hpet,
187 .monotonic_clock = monotonic_clock_hpet, 187 .monotonic_clock = monotonic_clock_hpet,
188 .delay = delay_hpet, 188 .delay = delay_hpet,
189 .read_timer = read_timer_tsc,
189}; 190};
190 191
191struct init_timer_opts __initdata timer_hpet_init = { 192struct init_timer_opts __initdata timer_hpet_init = {
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 967d5453cd0e..06de036a820c 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -15,9 +15,8 @@
15#include <asm/smp.h> 15#include <asm/smp.h>
16#include <asm/io.h> 16#include <asm/io.h>
17#include <asm/arch_hooks.h> 17#include <asm/arch_hooks.h>
18#include <asm/i8253.h>
18 19
19extern spinlock_t i8259A_lock;
20extern spinlock_t i8253_lock;
21#include "do_timer.h" 20#include "do_timer.h"
22#include "io_ports.h" 21#include "io_ports.h"
23 22
@@ -166,7 +165,6 @@ struct init_timer_opts __initdata timer_pit_init = {
166 165
167void setup_pit_timer(void) 166void setup_pit_timer(void)
168{ 167{
169 extern spinlock_t i8253_lock;
170 unsigned long flags; 168 unsigned long flags;
171 169
172 spin_lock_irqsave(&i8253_lock, flags); 170 spin_lock_irqsave(&i8253_lock, flags);
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index d77f22030fe6..4ef20e663498 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -246,6 +246,7 @@ static struct timer_opts timer_pmtmr = {
246 .get_offset = get_offset_pmtmr, 246 .get_offset = get_offset_pmtmr,
247 .monotonic_clock = monotonic_clock_pmtmr, 247 .monotonic_clock = monotonic_clock_pmtmr,
248 .delay = delay_pmtmr, 248 .delay = delay_pmtmr,
249 .read_timer = read_timer_tsc,
249}; 250};
250 251
251struct init_timer_opts __initdata timer_pmtmr_init = { 252struct init_timer_opts __initdata timer_pmtmr_init = {
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 180444d87824..8f4e4d5bc560 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -24,6 +24,7 @@
24#include "mach_timer.h" 24#include "mach_timer.h"
25 25
26#include <asm/hpet.h> 26#include <asm/hpet.h>
27#include <asm/i8253.h>
27 28
28#ifdef CONFIG_HPET_TIMER 29#ifdef CONFIG_HPET_TIMER
29static unsigned long hpet_usec_quotient; 30static unsigned long hpet_usec_quotient;
@@ -33,9 +34,7 @@ static struct timer_opts timer_tsc;
33 34
34static inline void cpufreq_delayed_get(void); 35static inline void cpufreq_delayed_get(void);
35 36
36int tsc_disable __initdata = 0; 37int tsc_disable __devinitdata = 0;
37
38extern spinlock_t i8253_lock;
39 38
40static int use_tsc; 39static int use_tsc;
41/* Number of usecs that the last interrupt was delayed */ 40/* Number of usecs that the last interrupt was delayed */
@@ -256,7 +255,7 @@ static unsigned long loops_per_jiffy_ref = 0;
256 255
257#ifndef CONFIG_SMP 256#ifndef CONFIG_SMP
258static unsigned long fast_gettimeoffset_ref = 0; 257static unsigned long fast_gettimeoffset_ref = 0;
259static unsigned long cpu_khz_ref = 0; 258static unsigned int cpu_khz_ref = 0;
260#endif 259#endif
261 260
262static int 261static int
@@ -323,7 +322,7 @@ static inline void cpufreq_delayed_get(void) { return; }
323int recalibrate_cpu_khz(void) 322int recalibrate_cpu_khz(void)
324{ 323{
325#ifndef CONFIG_SMP 324#ifndef CONFIG_SMP
326 unsigned long cpu_khz_old = cpu_khz; 325 unsigned int cpu_khz_old = cpu_khz;
327 326
328 if (cpu_has_tsc) { 327 if (cpu_has_tsc) {
329 init_cpu_khz(); 328 init_cpu_khz();
@@ -534,7 +533,8 @@ static int __init init_tsc(char* override)
534 :"=a" (cpu_khz), "=d" (edx) 533 :"=a" (cpu_khz), "=d" (edx)
535 :"r" (tsc_quotient), 534 :"r" (tsc_quotient),
536 "0" (eax), "1" (edx)); 535 "0" (eax), "1" (edx));
537 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); 536 printk("Detected %u.%03u MHz processor.\n",
537 cpu_khz / 1000, cpu_khz % 1000);
538 } 538 }
539 set_cyc2ns_scale(cpu_khz/1000); 539 set_cyc2ns_scale(cpu_khz/1000);
540 return 0; 540 return 0;
@@ -572,6 +572,7 @@ static struct timer_opts timer_tsc = {
572 .get_offset = get_offset_tsc, 572 .get_offset = get_offset_tsc,
573 .monotonic_clock = monotonic_clock_tsc, 573 .monotonic_clock = monotonic_clock_tsc,
574 .delay = delay_tsc, 574 .delay = delay_tsc,
575 .read_timer = read_timer_tsc,
575}; 576};
576 577
577struct init_timer_opts __initdata timer_tsc_init = { 578struct init_timer_opts __initdata timer_tsc_init = {
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 83c579e82a81..a61f33d06ea3 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -27,6 +27,7 @@
27#include <linux/ptrace.h> 27#include <linux/ptrace.h>
28#include <linux/utsname.h> 28#include <linux/utsname.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/kexec.h>
30 31
31#ifdef CONFIG_EISA 32#ifdef CONFIG_EISA
32#include <linux/ioport.h> 33#include <linux/ioport.h>
@@ -104,6 +105,7 @@ int register_die_notifier(struct notifier_block *nb)
104 spin_unlock_irqrestore(&die_notifier_lock, flags); 105 spin_unlock_irqrestore(&die_notifier_lock, flags);
105 return err; 106 return err;
106} 107}
108EXPORT_SYMBOL(register_die_notifier);
107 109
108static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) 110static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
109{ 111{
@@ -209,7 +211,7 @@ void show_registers(struct pt_regs *regs)
209 211
210 esp = (unsigned long) (&regs->esp); 212 esp = (unsigned long) (&regs->esp);
211 ss = __KERNEL_DS; 213 ss = __KERNEL_DS;
212 if (regs->xcs & 3) { 214 if (user_mode(regs)) {
213 in_kernel = 0; 215 in_kernel = 0;
214 esp = regs->esp; 216 esp = regs->esp;
215 ss = regs->xss & 0xffff; 217 ss = regs->xss & 0xffff;
@@ -233,22 +235,22 @@ void show_registers(struct pt_regs *regs)
233 * time of the fault.. 235 * time of the fault..
234 */ 236 */
235 if (in_kernel) { 237 if (in_kernel) {
236 u8 *eip; 238 u8 __user *eip;
237 239
238 printk("\nStack: "); 240 printk("\nStack: ");
239 show_stack(NULL, (unsigned long*)esp); 241 show_stack(NULL, (unsigned long*)esp);
240 242
241 printk("Code: "); 243 printk("Code: ");
242 244
243 eip = (u8 *)regs->eip - 43; 245 eip = (u8 __user *)regs->eip - 43;
244 for (i = 0; i < 64; i++, eip++) { 246 for (i = 0; i < 64; i++, eip++) {
245 unsigned char c; 247 unsigned char c;
246 248
247 if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { 249 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
248 printk(" Bad EIP value."); 250 printk(" Bad EIP value.");
249 break; 251 break;
250 } 252 }
251 if (eip == (u8 *)regs->eip) 253 if (eip == (u8 __user *)regs->eip)
252 printk("<%02x> ", c); 254 printk("<%02x> ", c);
253 else 255 else
254 printk("%02x ", c); 256 printk("%02x ", c);
@@ -265,20 +267,20 @@ static void handle_BUG(struct pt_regs *regs)
265 char c; 267 char c;
266 unsigned long eip; 268 unsigned long eip;
267 269
268 if (regs->xcs & 3) 270 if (user_mode(regs))
269 goto no_bug; /* Not in kernel */ 271 goto no_bug; /* Not in kernel */
270 272
271 eip = regs->eip; 273 eip = regs->eip;
272 274
273 if (eip < PAGE_OFFSET) 275 if (eip < PAGE_OFFSET)
274 goto no_bug; 276 goto no_bug;
275 if (__get_user(ud2, (unsigned short *)eip)) 277 if (__get_user(ud2, (unsigned short __user *)eip))
276 goto no_bug; 278 goto no_bug;
277 if (ud2 != 0x0b0f) 279 if (ud2 != 0x0b0f)
278 goto no_bug; 280 goto no_bug;
279 if (__get_user(line, (unsigned short *)(eip + 2))) 281 if (__get_user(line, (unsigned short __user *)(eip + 2)))
280 goto bug; 282 goto bug;
281 if (__get_user(file, (char **)(eip + 4)) || 283 if (__get_user(file, (char * __user *)(eip + 4)) ||
282 (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) 284 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
283 file = "<bad filename>"; 285 file = "<bad filename>";
284 286
@@ -293,6 +295,9 @@ bug:
293 printk("Kernel BUG\n"); 295 printk("Kernel BUG\n");
294} 296}
295 297
298/* This is gone through when something in the kernel
299 * has done something bad and is about to be terminated.
300*/
296void die(const char * str, struct pt_regs * regs, long err) 301void die(const char * str, struct pt_regs * regs, long err)
297{ 302{
298 static struct { 303 static struct {
@@ -340,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err)
340 bust_spinlocks(0); 345 bust_spinlocks(0);
341 die.lock_owner = -1; 346 die.lock_owner = -1;
342 spin_unlock_irq(&die.lock); 347 spin_unlock_irq(&die.lock);
348
349 if (kexec_should_crash(current))
350 crash_kexec(regs);
351
343 if (in_interrupt()) 352 if (in_interrupt())
344 panic("Fatal exception in interrupt"); 353 panic("Fatal exception in interrupt");
345 354
@@ -353,26 +362,27 @@ void die(const char * str, struct pt_regs * regs, long err)
353 362
354static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) 363static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
355{ 364{
356 if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) 365 if (!user_mode_vm(regs))
357 die(str, regs, err); 366 die(str, regs, err);
358} 367}
359 368
360static void do_trap(int trapnr, int signr, char *str, int vm86, 369static void do_trap(int trapnr, int signr, char *str, int vm86,
361 struct pt_regs * regs, long error_code, siginfo_t *info) 370 struct pt_regs * regs, long error_code, siginfo_t *info)
362{ 371{
372 struct task_struct *tsk = current;
373 tsk->thread.error_code = error_code;
374 tsk->thread.trap_no = trapnr;
375
363 if (regs->eflags & VM_MASK) { 376 if (regs->eflags & VM_MASK) {
364 if (vm86) 377 if (vm86)
365 goto vm86_trap; 378 goto vm86_trap;
366 goto trap_signal; 379 goto trap_signal;
367 } 380 }
368 381
369 if (!(regs->xcs & 3)) 382 if (!user_mode(regs))
370 goto kernel_trap; 383 goto kernel_trap;
371 384
372 trap_signal: { 385 trap_signal: {
373 struct task_struct *tsk = current;
374 tsk->thread.error_code = error_code;
375 tsk->thread.trap_no = trapnr;
376 if (info) 386 if (info)
377 force_sig_info(signr, info, tsk); 387 force_sig_info(signr, info, tsk);
378 else 388 else
@@ -485,10 +495,13 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
485 } 495 }
486 put_cpu(); 496 put_cpu();
487 497
498 current->thread.error_code = error_code;
499 current->thread.trap_no = 13;
500
488 if (regs->eflags & VM_MASK) 501 if (regs->eflags & VM_MASK)
489 goto gp_in_vm86; 502 goto gp_in_vm86;
490 503
491 if (!(regs->xcs & 3)) 504 if (!user_mode(regs))
492 goto gp_in_kernel; 505 goto gp_in_kernel;
493 506
494 current->thread.error_code = error_code; 507 current->thread.error_code = error_code;
@@ -569,6 +582,15 @@ void die_nmi (struct pt_regs *regs, const char *msg)
569 console_silent(); 582 console_silent();
570 spin_unlock(&nmi_print_lock); 583 spin_unlock(&nmi_print_lock);
571 bust_spinlocks(0); 584 bust_spinlocks(0);
585
586 /* If we are in kernel we are probably nested up pretty bad
587 * and might aswell get out now while we still can.
588 */
589 if (!user_mode(regs)) {
590 current->thread.trap_no = 2;
591 crash_kexec(regs);
592 }
593
572 do_exit(SIGSEGV); 594 do_exit(SIGSEGV);
573} 595}
574 596
@@ -624,6 +646,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
624 nmi_enter(); 646 nmi_enter();
625 647
626 cpu = smp_processor_id(); 648 cpu = smp_processor_id();
649
650#ifdef CONFIG_HOTPLUG_CPU
651 if (!cpu_online(cpu)) {
652 nmi_exit();
653 return;
654 }
655#endif
656
627 ++nmi_count(cpu); 657 ++nmi_count(cpu);
628 658
629 if (!nmi_callback(regs, cpu)) 659 if (!nmi_callback(regs, cpu))
@@ -636,11 +666,13 @@ void set_nmi_callback(nmi_callback_t callback)
636{ 666{
637 nmi_callback = callback; 667 nmi_callback = callback;
638} 668}
669EXPORT_SYMBOL_GPL(set_nmi_callback);
639 670
640void unset_nmi_callback(void) 671void unset_nmi_callback(void)
641{ 672{
642 nmi_callback = dummy_nmi_callback; 673 nmi_callback = dummy_nmi_callback;
643} 674}
675EXPORT_SYMBOL_GPL(unset_nmi_callback);
644 676
645#ifdef CONFIG_KPROBES 677#ifdef CONFIG_KPROBES
646fastcall void do_int3(struct pt_regs *regs, long error_code) 678fastcall void do_int3(struct pt_regs *regs, long error_code)
@@ -682,7 +714,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
682 unsigned int condition; 714 unsigned int condition;
683 struct task_struct *tsk = current; 715 struct task_struct *tsk = current;
684 716
685 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); 717 get_debugreg(condition, 6);
686 718
687 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 719 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
688 SIGTRAP) == NOTIFY_STOP) 720 SIGTRAP) == NOTIFY_STOP)
@@ -713,7 +745,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
713 * check for kernel mode by just checking the CPL 745 * check for kernel mode by just checking the CPL
714 * of CS. 746 * of CS.
715 */ 747 */
716 if ((regs->xcs & 3) == 0) 748 if (!user_mode(regs))
717 goto clear_TF_reenable; 749 goto clear_TF_reenable;
718 } 750 }
719 751
@@ -724,9 +756,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
724 * the signal is delivered. 756 * the signal is delivered.
725 */ 757 */
726clear_dr7: 758clear_dr7:
727 __asm__("movl %0,%%db7" 759 set_debugreg(0, 7);
728 : /* no output */
729 : "r" (0));
730 return; 760 return;
731 761
732debug_vm86: 762debug_vm86:
@@ -871,9 +901,9 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
871 error_code); 901 error_code);
872 return; 902 return;
873 } 903 }
874 die_if_kernel("cache flush denied", regs, error_code);
875 current->thread.trap_no = 19; 904 current->thread.trap_no = 19;
876 current->thread.error_code = error_code; 905 current->thread.error_code = error_code;
906 die_if_kernel("cache flush denied", regs, error_code);
877 force_sig(SIGSEGV, current); 907 force_sig(SIGSEGV, current);
878 } 908 }
879} 909}
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index e0512cc8bea7..761972f8cb6c 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -2,20 +2,23 @@
2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; 2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
3 */ 3 */
4 4
5#define LOAD_OFFSET __PAGE_OFFSET
6
5#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
6#include <asm/thread_info.h> 8#include <asm/thread_info.h>
7#include <asm/page.h> 9#include <asm/page.h>
8 10
9OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") 11OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
10OUTPUT_ARCH(i386) 12OUTPUT_ARCH(i386)
11ENTRY(startup_32) 13ENTRY(phys_startup_32)
12jiffies = jiffies_64; 14jiffies = jiffies_64;
13SECTIONS 15SECTIONS
14{ 16{
15 . = __PAGE_OFFSET + 0x100000; 17 . = __KERNEL_START;
18 phys_startup_32 = startup_32 - LOAD_OFFSET;
16 /* read-only */ 19 /* read-only */
17 _text = .; /* Text and read-only data */ 20 _text = .; /* Text and read-only data */
18 .text : { 21 .text : AT(ADDR(.text) - LOAD_OFFSET) {
19 *(.text) 22 *(.text)
20 SCHED_TEXT 23 SCHED_TEXT
21 LOCK_TEXT 24 LOCK_TEXT
@@ -27,49 +30,58 @@ SECTIONS
27 30
28 . = ALIGN(16); /* Exception table */ 31 . = ALIGN(16); /* Exception table */
29 __start___ex_table = .; 32 __start___ex_table = .;
30 __ex_table : { *(__ex_table) } 33 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
31 __stop___ex_table = .; 34 __stop___ex_table = .;
32 35
33 RODATA 36 RODATA
34 37
35 /* writeable */ 38 /* writeable */
36 .data : { /* Data */ 39 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
37 *(.data) 40 *(.data)
38 CONSTRUCTORS 41 CONSTRUCTORS
39 } 42 }
40 43
41 . = ALIGN(4096); 44 . = ALIGN(4096);
42 __nosave_begin = .; 45 __nosave_begin = .;
43 .data_nosave : { *(.data.nosave) } 46 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
44 . = ALIGN(4096); 47 . = ALIGN(4096);
45 __nosave_end = .; 48 __nosave_end = .;
46 49
47 . = ALIGN(4096); 50 . = ALIGN(4096);
48 .data.page_aligned : { *(.data.idt) } 51 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
52 *(.data.idt)
53 }
49 54
50 . = ALIGN(32); 55 . = ALIGN(32);
51 .data.cacheline_aligned : { *(.data.cacheline_aligned) } 56 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
57 *(.data.cacheline_aligned)
58 }
52 59
60 /* rarely changed data like cpu maps */
61 . = ALIGN(32);
62 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
53 _edata = .; /* End of data section */ 63 _edata = .; /* End of data section */
54 64
55 . = ALIGN(THREAD_SIZE); /* init_task */ 65 . = ALIGN(THREAD_SIZE); /* init_task */
56 .data.init_task : { *(.data.init_task) } 66 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
67 *(.data.init_task)
68 }
57 69
58 /* will be freed after init */ 70 /* will be freed after init */
59 . = ALIGN(4096); /* Init code and data */ 71 . = ALIGN(4096); /* Init code and data */
60 __init_begin = .; 72 __init_begin = .;
61 .init.text : { 73 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
62 _sinittext = .; 74 _sinittext = .;
63 *(.init.text) 75 *(.init.text)
64 _einittext = .; 76 _einittext = .;
65 } 77 }
66 .init.data : { *(.init.data) } 78 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
67 . = ALIGN(16); 79 . = ALIGN(16);
68 __setup_start = .; 80 __setup_start = .;
69 .init.setup : { *(.init.setup) } 81 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
70 __setup_end = .; 82 __setup_end = .;
71 __initcall_start = .; 83 __initcall_start = .;
72 .initcall.init : { 84 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
73 *(.initcall1.init) 85 *(.initcall1.init)
74 *(.initcall2.init) 86 *(.initcall2.init)
75 *(.initcall3.init) 87 *(.initcall3.init)
@@ -80,33 +92,41 @@ SECTIONS
80 } 92 }
81 __initcall_end = .; 93 __initcall_end = .;
82 __con_initcall_start = .; 94 __con_initcall_start = .;
83 .con_initcall.init : { *(.con_initcall.init) } 95 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
96 *(.con_initcall.init)
97 }
84 __con_initcall_end = .; 98 __con_initcall_end = .;
85 SECURITY_INIT 99 SECURITY_INIT
86 . = ALIGN(4); 100 . = ALIGN(4);
87 __alt_instructions = .; 101 __alt_instructions = .;
88 .altinstructions : { *(.altinstructions) } 102 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
103 *(.altinstructions)
104 }
89 __alt_instructions_end = .; 105 __alt_instructions_end = .;
90 .altinstr_replacement : { *(.altinstr_replacement) } 106 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
107 *(.altinstr_replacement)
108 }
91 /* .exit.text is discard at runtime, not link time, to deal with references 109 /* .exit.text is discard at runtime, not link time, to deal with references
92 from .altinstructions and .eh_frame */ 110 from .altinstructions and .eh_frame */
93 .exit.text : { *(.exit.text) } 111 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
94 .exit.data : { *(.exit.data) } 112 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
95 . = ALIGN(4096); 113 . = ALIGN(4096);
96 __initramfs_start = .; 114 __initramfs_start = .;
97 .init.ramfs : { *(.init.ramfs) } 115 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
98 __initramfs_end = .; 116 __initramfs_end = .;
99 . = ALIGN(32); 117 . = ALIGN(32);
100 __per_cpu_start = .; 118 __per_cpu_start = .;
101 .data.percpu : { *(.data.percpu) } 119 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
102 __per_cpu_end = .; 120 __per_cpu_end = .;
103 . = ALIGN(4096); 121 . = ALIGN(4096);
104 __init_end = .; 122 __init_end = .;
105 /* freed after init ends here */ 123 /* freed after init ends here */
106 124
107 __bss_start = .; /* BSS */ 125 __bss_start = .; /* BSS */
108 .bss : { 126 .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
109 *(.bss.page_aligned) 127 *(.bss.page_aligned)
128 }
129 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
110 *(.bss) 130 *(.bss)
111 } 131 }
112 . = ALIGN(4); 132 . = ALIGN(4);