diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/alpha/kernel/osf_sys.c | 7 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/amd.c | 7 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/common.c | 7 | ||||
-rw-r--r-- | arch/i386/pci/fixup.c | 2 | ||||
-rw-r--r-- | arch/ia64/ia32/ia32_ioctl.c | 1 | ||||
-rw-r--r-- | arch/mips/vr41xx/common/pmu.c | 55 | ||||
-rw-r--r-- | arch/ppc/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/irq_user.c | 10 | ||||
-rw-r--r-- | arch/x86_64/Kconfig | 14 | ||||
-rw-r--r-- | arch/x86_64/defconfig | 58 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/apic.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 11 | ||||
-rw-r--r-- | arch/x86_64/kernel/io_apic.c | 70 | ||||
-rw-r--r-- | arch/x86_64/kernel/mpparse.c | 22 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 248 | ||||
-rw-r--r-- | arch/x86_64/kernel/pmtimer.c | 101 | ||||
-rw-r--r-- | arch/x86_64/kernel/ptrace.c | 13 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 17 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 262 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 62 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 5 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 11 | ||||
-rw-r--r-- | arch/x86_64/mm/ioremap.c | 2 |
24 files changed, 683 insertions, 310 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 64e450dddb49..167fd89f8707 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c | |||
@@ -1150,16 +1150,13 @@ osf_usleep_thread(struct timeval32 __user *sleep, struct timeval32 __user *remai | |||
1150 | if (get_tv32(&tmp, sleep)) | 1150 | if (get_tv32(&tmp, sleep)) |
1151 | goto fault; | 1151 | goto fault; |
1152 | 1152 | ||
1153 | ticks = tmp.tv_usec; | 1153 | ticks = timeval_to_jiffies(&tmp); |
1154 | ticks = (ticks + (1000000 / HZ) - 1) / (1000000 / HZ); | ||
1155 | ticks += tmp.tv_sec * HZ; | ||
1156 | 1154 | ||
1157 | current->state = TASK_INTERRUPTIBLE; | 1155 | current->state = TASK_INTERRUPTIBLE; |
1158 | ticks = schedule_timeout(ticks); | 1156 | ticks = schedule_timeout(ticks); |
1159 | 1157 | ||
1160 | if (remain) { | 1158 | if (remain) { |
1161 | tmp.tv_sec = ticks / HZ; | 1159 | jiffies_to_timeval(ticks, &tmp); |
1162 | tmp.tv_usec = ticks % HZ; | ||
1163 | if (put_tv32(remain, &tmp)) | 1160 | if (put_tv32(remain, &tmp)) |
1164 | goto fault; | 1161 | goto fault; |
1165 | } | 1162 | } |
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 16dbc4151be4..fa34a06c0d79 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -24,9 +24,6 @@ __asm__(".align 4\nvide: ret"); | |||
24 | 24 | ||
25 | static void __init init_amd(struct cpuinfo_x86 *c) | 25 | static void __init init_amd(struct cpuinfo_x86 *c) |
26 | { | 26 | { |
27 | #ifdef CONFIG_X86_SMP | ||
28 | int cpu = c == &boot_cpu_data ? 0 : c - cpu_data; | ||
29 | #endif | ||
30 | u32 l, h; | 27 | u32 l, h; |
31 | int mbytes = num_physpages >> (20-PAGE_SHIFT); | 28 | int mbytes = num_physpages >> (20-PAGE_SHIFT); |
32 | int r; | 29 | int r; |
@@ -205,7 +202,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
205 | * of two. | 202 | * of two. |
206 | */ | 203 | */ |
207 | if (c->x86_num_cores > 1) { | 204 | if (c->x86_num_cores > 1) { |
208 | cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); | 205 | int cpu = smp_processor_id(); |
206 | /* Fix up the APIC ID following AMD specifications. */ | ||
207 | cpu_core_id[cpu] >>= hweight32(c->x86_num_cores - 1); | ||
209 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", | 208 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", |
210 | cpu, c->x86_num_cores, cpu_core_id[cpu]); | 209 | cpu, c->x86_num_cores, cpu_core_id[cpu]); |
211 | } | 210 | } |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 6be0310e3cd3..11e6e6f23fa0 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -243,6 +243,13 @@ static void __init early_cpu_detect(void) | |||
243 | } | 243 | } |
244 | 244 | ||
245 | early_intel_workaround(c); | 245 | early_intel_workaround(c); |
246 | |||
247 | #ifdef CONFIG_SMP | ||
248 | #ifdef CONFIG_X86_HT | ||
249 | phys_proc_id[smp_processor_id()] = | ||
250 | #endif | ||
251 | cpu_core_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; | ||
252 | #endif | ||
246 | } | 253 | } |
247 | 254 | ||
248 | void __init generic_identify(struct cpuinfo_x86 * c) | 255 | void __init generic_identify(struct cpuinfo_x86 * c) |
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index be52c5ac4e05..8e8e895e1b5a 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c | |||
@@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci | |||
253 | #define MAX_PCIEROOT 6 | 253 | #define MAX_PCIEROOT 6 |
254 | static int quirk_aspm_offset[MAX_PCIEROOT << 3]; | 254 | static int quirk_aspm_offset[MAX_PCIEROOT << 3]; |
255 | 255 | ||
256 | #define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b) | 256 | #define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7)) |
257 | 257 | ||
258 | static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) | 258 | static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) |
259 | { | 259 | { |
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c index 9845dabe2613..164b211f4174 100644 --- a/arch/ia64/ia32/ia32_ioctl.c +++ b/arch/ia64/ia32/ia32_ioctl.c | |||
@@ -13,7 +13,6 @@ | |||
13 | 13 | ||
14 | #define INCLUDES | 14 | #define INCLUDES |
15 | #include "compat_ioctl.c" | 15 | #include "compat_ioctl.c" |
16 | #include <asm/ioctl32.h> | ||
17 | 16 | ||
18 | #define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) | 17 | #define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) |
19 | 18 | ||
diff --git a/arch/mips/vr41xx/common/pmu.c b/arch/mips/vr41xx/common/pmu.c index c5f1043de938..53166f3598b2 100644 --- a/arch/mips/vr41xx/common/pmu.c +++ b/arch/mips/vr41xx/common/pmu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * pmu.c, Power Management Unit routines for NEC VR4100 series. | 2 | * pmu.c, Power Management Unit routines for NEC VR4100 series. |
3 | * | 3 | * |
4 | * Copyright (C) 2003-2004 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> | 4 | * Copyright (C) 2003-2005 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -17,7 +17,9 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | */ | 19 | */ |
20 | #include <linux/errno.h> | ||
20 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/ioport.h> | ||
21 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
22 | #include <linux/smp.h> | 24 | #include <linux/smp.h> |
23 | #include <linux/types.h> | 25 | #include <linux/types.h> |
@@ -27,20 +29,31 @@ | |||
27 | #include <asm/reboot.h> | 29 | #include <asm/reboot.h> |
28 | #include <asm/system.h> | 30 | #include <asm/system.h> |
29 | 31 | ||
30 | #define PMUCNT2REG KSEG1ADDR(0x0f0000c6) | 32 | #define PMU_TYPE1_BASE 0x0b0000a0UL |
33 | #define PMU_TYPE1_SIZE 0x0eUL | ||
34 | |||
35 | #define PMU_TYPE2_BASE 0x0f0000c0UL | ||
36 | #define PMU_TYPE2_SIZE 0x10UL | ||
37 | |||
38 | #define PMUCNT2REG 0x06 | ||
31 | #define SOFTRST 0x0010 | 39 | #define SOFTRST 0x0010 |
32 | 40 | ||
41 | static void __iomem *pmu_base; | ||
42 | |||
43 | #define pmu_read(offset) readw(pmu_base + (offset)) | ||
44 | #define pmu_write(offset, value) writew((value), pmu_base + (offset)) | ||
45 | |||
33 | static inline void software_reset(void) | 46 | static inline void software_reset(void) |
34 | { | 47 | { |
35 | uint16_t val; | 48 | uint16_t pmucnt2; |
36 | 49 | ||
37 | switch (current_cpu_data.cputype) { | 50 | switch (current_cpu_data.cputype) { |
38 | case CPU_VR4122: | 51 | case CPU_VR4122: |
39 | case CPU_VR4131: | 52 | case CPU_VR4131: |
40 | case CPU_VR4133: | 53 | case CPU_VR4133: |
41 | val = readw(PMUCNT2REG); | 54 | pmucnt2 = pmu_read(PMUCNT2REG); |
42 | val |= SOFTRST; | 55 | pmucnt2 |= SOFTRST; |
43 | writew(val, PMUCNT2REG); | 56 | pmu_write(PMUCNT2REG, pmucnt2); |
44 | break; | 57 | break; |
45 | default: | 58 | default: |
46 | break; | 59 | break; |
@@ -71,6 +84,34 @@ static void vr41xx_power_off(void) | |||
71 | 84 | ||
72 | static int __init vr41xx_pmu_init(void) | 85 | static int __init vr41xx_pmu_init(void) |
73 | { | 86 | { |
87 | unsigned long start, size; | ||
88 | |||
89 | switch (current_cpu_data.cputype) { | ||
90 | case CPU_VR4111: | ||
91 | case CPU_VR4121: | ||
92 | start = PMU_TYPE1_BASE; | ||
93 | size = PMU_TYPE1_SIZE; | ||
94 | break; | ||
95 | case CPU_VR4122: | ||
96 | case CPU_VR4131: | ||
97 | case CPU_VR4133: | ||
98 | start = PMU_TYPE2_BASE; | ||
99 | size = PMU_TYPE2_SIZE; | ||
100 | break; | ||
101 | default: | ||
102 | printk("Unexpected CPU of NEC VR4100 series\n"); | ||
103 | return -ENODEV; | ||
104 | } | ||
105 | |||
106 | if (request_mem_region(start, size, "PMU") == NULL) | ||
107 | return -EBUSY; | ||
108 | |||
109 | pmu_base = ioremap(start, size); | ||
110 | if (pmu_base == NULL) { | ||
111 | release_mem_region(start, size); | ||
112 | return -EBUSY; | ||
113 | } | ||
114 | |||
74 | _machine_restart = vr41xx_restart; | 115 | _machine_restart = vr41xx_restart; |
75 | _machine_halt = vr41xx_halt; | 116 | _machine_halt = vr41xx_halt; |
76 | _machine_power_off = vr41xx_power_off; | 117 | _machine_power_off = vr41xx_power_off; |
@@ -78,4 +119,4 @@ static int __init vr41xx_pmu_init(void) | |||
78 | return 0; | 119 | return 0; |
79 | } | 120 | } |
80 | 121 | ||
81 | early_initcall(vr41xx_pmu_init); | 122 | core_initcall(vr41xx_pmu_init); |
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index 5dfb42f1a152..309797d7f96d 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c | |||
@@ -753,6 +753,8 @@ void __init setup_arch(char **cmdline_p) | |||
753 | strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); | 753 | strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); |
754 | *cmdline_p = cmd_line; | 754 | *cmdline_p = cmd_line; |
755 | 755 | ||
756 | parse_early_param(); | ||
757 | |||
756 | /* set up the bootmem stuff with available memory */ | 758 | /* set up the bootmem stuff with available memory */ |
757 | do_init_bootmem(); | 759 | do_init_bootmem(); |
758 | if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); | 760 | if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); |
diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c index 6d6f9484b884..b3074cbaa479 100644 --- a/arch/um/kernel/irq_user.c +++ b/arch/um/kernel/irq_user.c | |||
@@ -236,9 +236,15 @@ static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) | |||
236 | (*prev)->fd, pollfds[i].fd); | 236 | (*prev)->fd, pollfds[i].fd); |
237 | goto out; | 237 | goto out; |
238 | } | 238 | } |
239 | memcpy(&pollfds[i], &pollfds[i + 1], | 239 | |
240 | (pollfds_num - i - 1) * sizeof(pollfds[0])); | ||
241 | pollfds_num--; | 240 | pollfds_num--; |
241 | |||
242 | /* This moves the *whole* array after pollfds[i] (though | ||
243 | * it doesn't spot as such)! */ | ||
244 | |||
245 | memmove(&pollfds[i], &pollfds[i + 1], | ||
246 | (pollfds_num - i) * sizeof(pollfds[0])); | ||
247 | |||
242 | if(last_irq_ptr == &old_fd->next) | 248 | if(last_irq_ptr == &old_fd->next) |
243 | last_irq_ptr = prev; | 249 | last_irq_ptr = prev; |
244 | *prev = (*prev)->next; | 250 | *prev = (*prev)->next; |
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 44ee7f6acf7b..82cb2a3f127a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -303,6 +303,20 @@ config HPET_TIMER | |||
303 | as it is off-chip. You can find the HPET spec at | 303 | as it is off-chip. You can find the HPET spec at |
304 | <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. | 304 | <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. |
305 | 305 | ||
306 | config X86_PM_TIMER | ||
307 | bool "PM timer" | ||
308 | default y | ||
309 | help | ||
310 | Support the ACPI PM timer for time keeping. This is slow, | ||
311 | but is useful on some chipsets without HPET on systems with more | ||
312 | than one CPU. On a single processor or single socket multi core | ||
313 | system it is normally not required. | ||
314 | When the PM timer is active 64bit vsyscalls are disabled | ||
315 | and should not be enabled (/proc/sys/kernel/vsyscall64 should | ||
316 | not be changed). | ||
317 | The kernel selects the PM timer only as a last resort, so it is | ||
318 | useful to enable just in case. | ||
319 | |||
306 | config HPET_EMULATE_RTC | 320 | config HPET_EMULATE_RTC |
307 | bool "Provide RTC interrupt" | 321 | bool "Provide RTC interrupt" |
308 | depends on HPET_TIMER && RTC=y | 322 | depends on HPET_TIMER && RTC=y |
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 9ce51dee30b3..569595b74c7c 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.11-bk7 | 3 | # Linux kernel version: 2.6.12-rc4 |
4 | # Sat Mar 12 23:43:44 2005 | 4 | # Fri May 13 06:39:11 2005 |
5 | # | 5 | # |
6 | CONFIG_X86_64=y | 6 | CONFIG_X86_64=y |
7 | CONFIG_64BIT=y | 7 | CONFIG_64BIT=y |
@@ -11,8 +11,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y | |||
11 | CONFIG_GENERIC_CALIBRATE_DELAY=y | 11 | CONFIG_GENERIC_CALIBRATE_DELAY=y |
12 | CONFIG_X86_CMPXCHG=y | 12 | CONFIG_X86_CMPXCHG=y |
13 | CONFIG_EARLY_PRINTK=y | 13 | CONFIG_EARLY_PRINTK=y |
14 | CONFIG_HPET_TIMER=y | ||
15 | CONFIG_HPET_EMULATE_RTC=y | ||
16 | CONFIG_GENERIC_ISA_DMA=y | 14 | CONFIG_GENERIC_ISA_DMA=y |
17 | CONFIG_GENERIC_IOMAP=y | 15 | CONFIG_GENERIC_IOMAP=y |
18 | 16 | ||
@@ -22,6 +20,7 @@ CONFIG_GENERIC_IOMAP=y | |||
22 | CONFIG_EXPERIMENTAL=y | 20 | CONFIG_EXPERIMENTAL=y |
23 | CONFIG_CLEAN_COMPILE=y | 21 | CONFIG_CLEAN_COMPILE=y |
24 | CONFIG_LOCK_KERNEL=y | 22 | CONFIG_LOCK_KERNEL=y |
23 | CONFIG_INIT_ENV_ARG_LIMIT=32 | ||
25 | 24 | ||
26 | # | 25 | # |
27 | # General setup | 26 | # General setup |
@@ -33,7 +32,6 @@ CONFIG_POSIX_MQUEUE=y | |||
33 | # CONFIG_BSD_PROCESS_ACCT is not set | 32 | # CONFIG_BSD_PROCESS_ACCT is not set |
34 | CONFIG_SYSCTL=y | 33 | CONFIG_SYSCTL=y |
35 | # CONFIG_AUDIT is not set | 34 | # CONFIG_AUDIT is not set |
36 | CONFIG_LOG_BUF_SHIFT=18 | ||
37 | # CONFIG_HOTPLUG is not set | 35 | # CONFIG_HOTPLUG is not set |
38 | CONFIG_KOBJECT_UEVENT=y | 36 | CONFIG_KOBJECT_UEVENT=y |
39 | CONFIG_IKCONFIG=y | 37 | CONFIG_IKCONFIG=y |
@@ -43,10 +41,11 @@ CONFIG_IKCONFIG_PROC=y | |||
43 | CONFIG_KALLSYMS=y | 41 | CONFIG_KALLSYMS=y |
44 | CONFIG_KALLSYMS_ALL=y | 42 | CONFIG_KALLSYMS_ALL=y |
45 | # CONFIG_KALLSYMS_EXTRA_PASS is not set | 43 | # CONFIG_KALLSYMS_EXTRA_PASS is not set |
44 | CONFIG_PRINTK=y | ||
45 | CONFIG_BUG=y | ||
46 | CONFIG_BASE_FULL=y | 46 | CONFIG_BASE_FULL=y |
47 | CONFIG_FUTEX=y | 47 | CONFIG_FUTEX=y |
48 | CONFIG_EPOLL=y | 48 | CONFIG_EPOLL=y |
49 | # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set | ||
50 | CONFIG_SHMEM=y | 49 | CONFIG_SHMEM=y |
51 | CONFIG_CC_ALIGN_FUNCTIONS=0 | 50 | CONFIG_CC_ALIGN_FUNCTIONS=0 |
52 | CONFIG_CC_ALIGN_LABELS=0 | 51 | CONFIG_CC_ALIGN_LABELS=0 |
@@ -93,6 +92,9 @@ CONFIG_DISCONTIGMEM=y | |||
93 | CONFIG_NUMA=y | 92 | CONFIG_NUMA=y |
94 | CONFIG_HAVE_DEC_LOCK=y | 93 | CONFIG_HAVE_DEC_LOCK=y |
95 | CONFIG_NR_CPUS=8 | 94 | CONFIG_NR_CPUS=8 |
95 | CONFIG_HPET_TIMER=y | ||
96 | CONFIG_X86_PM_TIMER=y | ||
97 | CONFIG_HPET_EMULATE_RTC=y | ||
96 | CONFIG_GART_IOMMU=y | 98 | CONFIG_GART_IOMMU=y |
97 | CONFIG_SWIOTLB=y | 99 | CONFIG_SWIOTLB=y |
98 | CONFIG_X86_MCE=y | 100 | CONFIG_X86_MCE=y |
@@ -100,6 +102,7 @@ CONFIG_X86_MCE_INTEL=y | |||
100 | CONFIG_SECCOMP=y | 102 | CONFIG_SECCOMP=y |
101 | CONFIG_GENERIC_HARDIRQS=y | 103 | CONFIG_GENERIC_HARDIRQS=y |
102 | CONFIG_GENERIC_IRQ_PROBE=y | 104 | CONFIG_GENERIC_IRQ_PROBE=y |
105 | CONFIG_ISA_DMA_API=y | ||
103 | 106 | ||
104 | # | 107 | # |
105 | # Power management options | 108 | # Power management options |
@@ -129,7 +132,7 @@ CONFIG_ACPI_NUMA=y | |||
129 | # CONFIG_ACPI_IBM is not set | 132 | # CONFIG_ACPI_IBM is not set |
130 | CONFIG_ACPI_TOSHIBA=y | 133 | CONFIG_ACPI_TOSHIBA=y |
131 | CONFIG_ACPI_BLACKLIST_YEAR=2001 | 134 | CONFIG_ACPI_BLACKLIST_YEAR=2001 |
132 | CONFIG_ACPI_DEBUG=y | 135 | # CONFIG_ACPI_DEBUG is not set |
133 | CONFIG_ACPI_BUS=y | 136 | CONFIG_ACPI_BUS=y |
134 | CONFIG_ACPI_EC=y | 137 | CONFIG_ACPI_EC=y |
135 | CONFIG_ACPI_POWER=y | 138 | CONFIG_ACPI_POWER=y |
@@ -141,6 +144,7 @@ CONFIG_ACPI_SYSTEM=y | |||
141 | # CPU Frequency scaling | 144 | # CPU Frequency scaling |
142 | # | 145 | # |
143 | CONFIG_CPU_FREQ=y | 146 | CONFIG_CPU_FREQ=y |
147 | CONFIG_CPU_FREQ_TABLE=y | ||
144 | # CONFIG_CPU_FREQ_DEBUG is not set | 148 | # CONFIG_CPU_FREQ_DEBUG is not set |
145 | CONFIG_CPU_FREQ_STAT=y | 149 | CONFIG_CPU_FREQ_STAT=y |
146 | # CONFIG_CPU_FREQ_STAT_DETAILS is not set | 150 | # CONFIG_CPU_FREQ_STAT_DETAILS is not set |
@@ -150,7 +154,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y | |||
150 | # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set | 154 | # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set |
151 | CONFIG_CPU_FREQ_GOV_USERSPACE=y | 155 | CONFIG_CPU_FREQ_GOV_USERSPACE=y |
152 | CONFIG_CPU_FREQ_GOV_ONDEMAND=y | 156 | CONFIG_CPU_FREQ_GOV_ONDEMAND=y |
153 | CONFIG_CPU_FREQ_TABLE=y | ||
154 | 157 | ||
155 | # | 158 | # |
156 | # CPUFreq processor drivers | 159 | # CPUFreq processor drivers |
@@ -164,6 +167,7 @@ CONFIG_X86_ACPI_CPUFREQ=y | |||
164 | # shared options | 167 | # shared options |
165 | # | 168 | # |
166 | CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y | 169 | CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y |
170 | # CONFIG_X86_SPEEDSTEP_LIB is not set | ||
167 | 171 | ||
168 | # | 172 | # |
169 | # Bus options (PCI etc.) | 173 | # Bus options (PCI etc.) |
@@ -172,9 +176,11 @@ CONFIG_PCI=y | |||
172 | CONFIG_PCI_DIRECT=y | 176 | CONFIG_PCI_DIRECT=y |
173 | CONFIG_PCI_MMCONFIG=y | 177 | CONFIG_PCI_MMCONFIG=y |
174 | CONFIG_UNORDERED_IO=y | 178 | CONFIG_UNORDERED_IO=y |
179 | # CONFIG_PCIEPORTBUS is not set | ||
175 | CONFIG_PCI_MSI=y | 180 | CONFIG_PCI_MSI=y |
176 | # CONFIG_PCI_LEGACY_PROC is not set | 181 | # CONFIG_PCI_LEGACY_PROC is not set |
177 | # CONFIG_PCI_NAMES is not set | 182 | # CONFIG_PCI_NAMES is not set |
183 | # CONFIG_PCI_DEBUG is not set | ||
178 | 184 | ||
179 | # | 185 | # |
180 | # PCCARD (PCMCIA/CardBus) support | 186 | # PCCARD (PCMCIA/CardBus) support |
@@ -182,10 +188,6 @@ CONFIG_PCI_MSI=y | |||
182 | # CONFIG_PCCARD is not set | 188 | # CONFIG_PCCARD is not set |
183 | 189 | ||
184 | # | 190 | # |
185 | # PC-card bridges | ||
186 | # | ||
187 | |||
188 | # | ||
189 | # PCI Hotplug Support | 191 | # PCI Hotplug Support |
190 | # | 192 | # |
191 | # CONFIG_HOTPLUG_PCI is not set | 193 | # CONFIG_HOTPLUG_PCI is not set |
@@ -254,7 +256,7 @@ CONFIG_LBD=y | |||
254 | # IO Schedulers | 256 | # IO Schedulers |
255 | # | 257 | # |
256 | CONFIG_IOSCHED_NOOP=y | 258 | CONFIG_IOSCHED_NOOP=y |
257 | CONFIG_IOSCHED_AS=y | 259 | # CONFIG_IOSCHED_AS is not set |
258 | CONFIG_IOSCHED_DEADLINE=y | 260 | CONFIG_IOSCHED_DEADLINE=y |
259 | CONFIG_IOSCHED_CFQ=y | 261 | CONFIG_IOSCHED_CFQ=y |
260 | # CONFIG_ATA_OVER_ETH is not set | 262 | # CONFIG_ATA_OVER_ETH is not set |
@@ -308,7 +310,8 @@ CONFIG_BLK_DEV_AMD74XX=y | |||
308 | CONFIG_BLK_DEV_PIIX=y | 310 | CONFIG_BLK_DEV_PIIX=y |
309 | # CONFIG_BLK_DEV_NS87415 is not set | 311 | # CONFIG_BLK_DEV_NS87415 is not set |
310 | # CONFIG_BLK_DEV_PDC202XX_OLD is not set | 312 | # CONFIG_BLK_DEV_PDC202XX_OLD is not set |
311 | # CONFIG_BLK_DEV_PDC202XX_NEW is not set | 313 | CONFIG_BLK_DEV_PDC202XX_NEW=y |
314 | # CONFIG_PDC202XX_FORCE is not set | ||
312 | # CONFIG_BLK_DEV_SVWKS is not set | 315 | # CONFIG_BLK_DEV_SVWKS is not set |
313 | # CONFIG_BLK_DEV_SIIMAGE is not set | 316 | # CONFIG_BLK_DEV_SIIMAGE is not set |
314 | # CONFIG_BLK_DEV_SIS5513 is not set | 317 | # CONFIG_BLK_DEV_SIS5513 is not set |
@@ -353,7 +356,7 @@ CONFIG_BLK_DEV_SD=y | |||
353 | # | 356 | # |
354 | # SCSI low-level drivers | 357 | # SCSI low-level drivers |
355 | # | 358 | # |
356 | CONFIG_BLK_DEV_3W_XXXX_RAID=y | 359 | # CONFIG_BLK_DEV_3W_XXXX_RAID is not set |
357 | # CONFIG_SCSI_3W_9XXX is not set | 360 | # CONFIG_SCSI_3W_9XXX is not set |
358 | # CONFIG_SCSI_ACARD is not set | 361 | # CONFIG_SCSI_ACARD is not set |
359 | # CONFIG_SCSI_AACRAID is not set | 362 | # CONFIG_SCSI_AACRAID is not set |
@@ -384,7 +387,6 @@ CONFIG_SCSI_SATA_VIA=y | |||
384 | # CONFIG_SCSI_BUSLOGIC is not set | 387 | # CONFIG_SCSI_BUSLOGIC is not set |
385 | # CONFIG_SCSI_DMX3191D is not set | 388 | # CONFIG_SCSI_DMX3191D is not set |
386 | # CONFIG_SCSI_EATA is not set | 389 | # CONFIG_SCSI_EATA is not set |
387 | # CONFIG_SCSI_EATA_PIO is not set | ||
388 | # CONFIG_SCSI_FUTURE_DOMAIN is not set | 390 | # CONFIG_SCSI_FUTURE_DOMAIN is not set |
389 | # CONFIG_SCSI_GDTH is not set | 391 | # CONFIG_SCSI_GDTH is not set |
390 | # CONFIG_SCSI_IPS is not set | 392 | # CONFIG_SCSI_IPS is not set |
@@ -392,7 +394,6 @@ CONFIG_SCSI_SATA_VIA=y | |||
392 | # CONFIG_SCSI_INIA100 is not set | 394 | # CONFIG_SCSI_INIA100 is not set |
393 | # CONFIG_SCSI_SYM53C8XX_2 is not set | 395 | # CONFIG_SCSI_SYM53C8XX_2 is not set |
394 | # CONFIG_SCSI_IPR is not set | 396 | # CONFIG_SCSI_IPR is not set |
395 | # CONFIG_SCSI_QLOGIC_ISP is not set | ||
396 | # CONFIG_SCSI_QLOGIC_FC is not set | 397 | # CONFIG_SCSI_QLOGIC_FC is not set |
397 | # CONFIG_SCSI_QLOGIC_1280 is not set | 398 | # CONFIG_SCSI_QLOGIC_1280 is not set |
398 | CONFIG_SCSI_QLA2XXX=y | 399 | CONFIG_SCSI_QLA2XXX=y |
@@ -401,6 +402,7 @@ CONFIG_SCSI_QLA2XXX=y | |||
401 | # CONFIG_SCSI_QLA2300 is not set | 402 | # CONFIG_SCSI_QLA2300 is not set |
402 | # CONFIG_SCSI_QLA2322 is not set | 403 | # CONFIG_SCSI_QLA2322 is not set |
403 | # CONFIG_SCSI_QLA6312 is not set | 404 | # CONFIG_SCSI_QLA6312 is not set |
405 | # CONFIG_SCSI_LPFC is not set | ||
404 | # CONFIG_SCSI_DC395x is not set | 406 | # CONFIG_SCSI_DC395x is not set |
405 | # CONFIG_SCSI_DC390T is not set | 407 | # CONFIG_SCSI_DC390T is not set |
406 | # CONFIG_SCSI_DEBUG is not set | 408 | # CONFIG_SCSI_DEBUG is not set |
@@ -437,7 +439,6 @@ CONFIG_NET=y | |||
437 | # | 439 | # |
438 | CONFIG_PACKET=y | 440 | CONFIG_PACKET=y |
439 | # CONFIG_PACKET_MMAP is not set | 441 | # CONFIG_PACKET_MMAP is not set |
440 | # CONFIG_NETLINK_DEV is not set | ||
441 | CONFIG_UNIX=y | 442 | CONFIG_UNIX=y |
442 | # CONFIG_NET_KEY is not set | 443 | # CONFIG_NET_KEY is not set |
443 | CONFIG_INET=y | 444 | CONFIG_INET=y |
@@ -502,7 +503,7 @@ CONFIG_NETDEVICES=y | |||
502 | # CONFIG_DUMMY is not set | 503 | # CONFIG_DUMMY is not set |
503 | # CONFIG_BONDING is not set | 504 | # CONFIG_BONDING is not set |
504 | # CONFIG_EQUALIZER is not set | 505 | # CONFIG_EQUALIZER is not set |
505 | # CONFIG_TUN is not set | 506 | CONFIG_TUN=y |
506 | 507 | ||
507 | # | 508 | # |
508 | # ARCnet devices | 509 | # ARCnet devices |
@@ -525,8 +526,7 @@ CONFIG_MII=y | |||
525 | # CONFIG_HP100 is not set | 526 | # CONFIG_HP100 is not set |
526 | CONFIG_NET_PCI=y | 527 | CONFIG_NET_PCI=y |
527 | # CONFIG_PCNET32 is not set | 528 | # CONFIG_PCNET32 is not set |
528 | CONFIG_AMD8111_ETH=y | 529 | # CONFIG_AMD8111_ETH is not set |
529 | # CONFIG_AMD8111E_NAPI is not set | ||
530 | # CONFIG_ADAPTEC_STARFIRE is not set | 530 | # CONFIG_ADAPTEC_STARFIRE is not set |
531 | # CONFIG_B44 is not set | 531 | # CONFIG_B44 is not set |
532 | CONFIG_FORCEDETH=y | 532 | CONFIG_FORCEDETH=y |
@@ -536,7 +536,7 @@ CONFIG_FORCEDETH=y | |||
536 | # CONFIG_FEALNX is not set | 536 | # CONFIG_FEALNX is not set |
537 | # CONFIG_NATSEMI is not set | 537 | # CONFIG_NATSEMI is not set |
538 | # CONFIG_NE2K_PCI is not set | 538 | # CONFIG_NE2K_PCI is not set |
539 | CONFIG_8139CP=m | 539 | CONFIG_8139CP=y |
540 | CONFIG_8139TOO=y | 540 | CONFIG_8139TOO=y |
541 | # CONFIG_8139TOO_PIO is not set | 541 | # CONFIG_8139TOO_PIO is not set |
542 | # CONFIG_8139TOO_TUNE_TWISTER is not set | 542 | # CONFIG_8139TOO_TUNE_TWISTER is not set |
@@ -671,6 +671,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4 | |||
671 | # | 671 | # |
672 | CONFIG_SERIAL_CORE=y | 672 | CONFIG_SERIAL_CORE=y |
673 | CONFIG_SERIAL_CORE_CONSOLE=y | 673 | CONFIG_SERIAL_CORE_CONSOLE=y |
674 | # CONFIG_SERIAL_JSM is not set | ||
674 | CONFIG_UNIX98_PTYS=y | 675 | CONFIG_UNIX98_PTYS=y |
675 | CONFIG_LEGACY_PTYS=y | 676 | CONFIG_LEGACY_PTYS=y |
676 | CONFIG_LEGACY_PTY_COUNT=256 | 677 | CONFIG_LEGACY_PTY_COUNT=256 |
@@ -696,6 +697,7 @@ CONFIG_RTC=y | |||
696 | # | 697 | # |
697 | CONFIG_AGP=y | 698 | CONFIG_AGP=y |
698 | CONFIG_AGP_AMD64=y | 699 | CONFIG_AGP_AMD64=y |
700 | CONFIG_AGP_INTEL=y | ||
699 | # CONFIG_DRM is not set | 701 | # CONFIG_DRM is not set |
700 | # CONFIG_MWAVE is not set | 702 | # CONFIG_MWAVE is not set |
701 | CONFIG_RAW_DRIVER=y | 703 | CONFIG_RAW_DRIVER=y |
@@ -703,7 +705,7 @@ CONFIG_HPET=y | |||
703 | # CONFIG_HPET_RTC_IRQ is not set | 705 | # CONFIG_HPET_RTC_IRQ is not set |
704 | CONFIG_HPET_MMAP=y | 706 | CONFIG_HPET_MMAP=y |
705 | CONFIG_MAX_RAW_DEVS=256 | 707 | CONFIG_MAX_RAW_DEVS=256 |
706 | CONFIG_HANGCHECK_TIMER=y | 708 | # CONFIG_HANGCHECK_TIMER is not set |
707 | 709 | ||
708 | # | 710 | # |
709 | # TPM devices | 711 | # TPM devices |
@@ -786,6 +788,8 @@ CONFIG_SOUND_ICH=y | |||
786 | # | 788 | # |
787 | # USB support | 789 | # USB support |
788 | # | 790 | # |
791 | CONFIG_USB_ARCH_HAS_HCD=y | ||
792 | CONFIG_USB_ARCH_HAS_OHCI=y | ||
789 | CONFIG_USB=y | 793 | CONFIG_USB=y |
790 | # CONFIG_USB_DEBUG is not set | 794 | # CONFIG_USB_DEBUG is not set |
791 | 795 | ||
@@ -797,8 +801,6 @@ CONFIG_USB_DEVICEFS=y | |||
797 | # CONFIG_USB_DYNAMIC_MINORS is not set | 801 | # CONFIG_USB_DYNAMIC_MINORS is not set |
798 | # CONFIG_USB_SUSPEND is not set | 802 | # CONFIG_USB_SUSPEND is not set |
799 | # CONFIG_USB_OTG is not set | 803 | # CONFIG_USB_OTG is not set |
800 | CONFIG_USB_ARCH_HAS_HCD=y | ||
801 | CONFIG_USB_ARCH_HAS_OHCI=y | ||
802 | 804 | ||
803 | # | 805 | # |
804 | # USB Host Controller Drivers | 806 | # USB Host Controller Drivers |
@@ -826,7 +828,6 @@ CONFIG_USB_PRINTER=y | |||
826 | # | 828 | # |
827 | CONFIG_USB_STORAGE=y | 829 | CONFIG_USB_STORAGE=y |
828 | # CONFIG_USB_STORAGE_DEBUG is not set | 830 | # CONFIG_USB_STORAGE_DEBUG is not set |
829 | # CONFIG_USB_STORAGE_RW_DETECT is not set | ||
830 | # CONFIG_USB_STORAGE_DATAFAB is not set | 831 | # CONFIG_USB_STORAGE_DATAFAB is not set |
831 | # CONFIG_USB_STORAGE_FREECOM is not set | 832 | # CONFIG_USB_STORAGE_FREECOM is not set |
832 | # CONFIG_USB_STORAGE_ISD200 is not set | 833 | # CONFIG_USB_STORAGE_ISD200 is not set |
@@ -965,7 +966,7 @@ CONFIG_AUTOFS_FS=y | |||
965 | # CD-ROM/DVD Filesystems | 966 | # CD-ROM/DVD Filesystems |
966 | # | 967 | # |
967 | CONFIG_ISO9660_FS=y | 968 | CONFIG_ISO9660_FS=y |
968 | # CONFIG_JOLIET is not set | 969 | CONFIG_JOLIET=y |
969 | # CONFIG_ZISOFS is not set | 970 | # CONFIG_ZISOFS is not set |
970 | # CONFIG_UDF_FS is not set | 971 | # CONFIG_UDF_FS is not set |
971 | 972 | ||
@@ -1092,9 +1093,10 @@ CONFIG_OPROFILE=y | |||
1092 | # | 1093 | # |
1093 | # Kernel hacking | 1094 | # Kernel hacking |
1094 | # | 1095 | # |
1096 | # CONFIG_PRINTK_TIME is not set | ||
1095 | CONFIG_DEBUG_KERNEL=y | 1097 | CONFIG_DEBUG_KERNEL=y |
1096 | CONFIG_MAGIC_SYSRQ=y | 1098 | CONFIG_MAGIC_SYSRQ=y |
1097 | # CONFIG_PRINTK_TIME is not set | 1099 | CONFIG_LOG_BUF_SHIFT=18 |
1098 | # CONFIG_SCHEDSTATS is not set | 1100 | # CONFIG_SCHEDSTATS is not set |
1099 | # CONFIG_DEBUG_SLAB is not set | 1101 | # CONFIG_DEBUG_SLAB is not set |
1100 | # CONFIG_DEBUG_SPINLOCK is not set | 1102 | # CONFIG_DEBUG_SPINLOCK is not set |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 0a3318e08ab6..5ca4a4598fda 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | |||
28 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 28 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o |
29 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | 29 | obj-$(CONFIG_SWIOTLB) += swiotlb.o |
30 | obj-$(CONFIG_KPROBES) += kprobes.o | 30 | obj-$(CONFIG_KPROBES) += kprobes.o |
31 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | ||
31 | 32 | ||
32 | obj-$(CONFIG_MODULES) += module.o | 33 | obj-$(CONFIG_MODULES) += module.o |
33 | 34 | ||
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7e13545748e0..f8e6cc4fecd4 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/mpspec.h> | 33 | #include <asm/mpspec.h> |
34 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
35 | #include <asm/mach_apic.h> | 35 | #include <asm/mach_apic.h> |
36 | #include <asm/nmi.h> | ||
36 | 37 | ||
37 | int apic_verbosity; | 38 | int apic_verbosity; |
38 | 39 | ||
@@ -925,7 +926,7 @@ __init int oem_force_hpet_timer(void) | |||
925 | unsigned id; | 926 | unsigned id; |
926 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); | 927 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); |
927 | 928 | ||
928 | bitmap_empty(clustermap, NUM_APIC_CLUSTERS); | 929 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
929 | 930 | ||
930 | for (i = 0; i < NR_CPUS; i++) { | 931 | for (i = 0; i < NR_CPUS; i++) { |
931 | id = bios_cpu_apicid[i]; | 932 | id = bios_cpu_apicid[i]; |
@@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void) | |||
1056 | nr_ioapics = 0; | 1057 | nr_ioapics = 0; |
1057 | #endif | 1058 | #endif |
1058 | setup_boot_APIC_clock(); | 1059 | setup_boot_APIC_clock(); |
1059 | 1060 | check_nmi_watchdog(); | |
1060 | return 0; | 1061 | return 0; |
1061 | } | 1062 | } |
1062 | 1063 | ||
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 1086b5fcac21..28817490fdc6 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -220,13 +220,18 @@ sysret_careful: | |||
220 | jmp sysret_check | 220 | jmp sysret_check |
221 | 221 | ||
222 | /* Handle a signal */ | 222 | /* Handle a signal */ |
223 | /* edx: work flags (arg3) */ | ||
224 | sysret_signal: | 223 | sysret_signal: |
225 | sti | 224 | sti |
225 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | ||
226 | jz 1f | ||
227 | |||
228 | /* Really a signal */ | ||
229 | /* edx: work flags (arg3) */ | ||
226 | leaq do_notify_resume(%rip),%rax | 230 | leaq do_notify_resume(%rip),%rax |
227 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 231 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
228 | xorl %esi,%esi # oldset -> arg2 | 232 | xorl %esi,%esi # oldset -> arg2 |
229 | call ptregscall_common | 233 | call ptregscall_common |
234 | 1: movl $_TIF_NEED_RESCHED,%edi | ||
230 | jmp sysret_check | 235 | jmp sysret_check |
231 | 236 | ||
232 | /* Do syscall tracing */ | 237 | /* Do syscall tracing */ |
@@ -484,6 +489,8 @@ retint_careful: | |||
484 | jmp retint_check | 489 | jmp retint_check |
485 | 490 | ||
486 | retint_signal: | 491 | retint_signal: |
492 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | ||
493 | jz retint_swapgs | ||
487 | sti | 494 | sti |
488 | SAVE_REST | 495 | SAVE_REST |
489 | movq $-1,ORIG_RAX(%rsp) | 496 | movq $-1,ORIG_RAX(%rsp) |
@@ -492,8 +499,8 @@ retint_signal: | |||
492 | call do_notify_resume | 499 | call do_notify_resume |
493 | RESTORE_REST | 500 | RESTORE_REST |
494 | cli | 501 | cli |
502 | movl $_TIF_NEED_RESCHED,%edi | ||
495 | GET_THREAD_INFO(%rcx) | 503 | GET_THREAD_INFO(%rcx) |
496 | movl $_TIF_WORK_MASK,%edi | ||
497 | jmp retint_check | 504 | jmp retint_check |
498 | 505 | ||
499 | #ifdef CONFIG_PREEMPT | 506 | #ifdef CONFIG_PREEMPT |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 60be58617eb9..ac7684324954 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -1804,76 +1804,6 @@ device_initcall(ioapic_init_sysfs); | |||
1804 | 1804 | ||
1805 | #define IO_APIC_MAX_ID 0xFE | 1805 | #define IO_APIC_MAX_ID 0xFE |
1806 | 1806 | ||
1807 | int __init io_apic_get_unique_id (int ioapic, int apic_id) | ||
1808 | { | ||
1809 | union IO_APIC_reg_00 reg_00; | ||
1810 | static physid_mask_t apic_id_map; | ||
1811 | unsigned long flags; | ||
1812 | int i = 0; | ||
1813 | |||
1814 | /* | ||
1815 | * The P4 platform supports up to 256 APIC IDs on two separate APIC | ||
1816 | * buses (one for LAPICs, one for IOAPICs), where predecessors only | ||
1817 | * supports up to 16 on one shared APIC bus. | ||
1818 | * | ||
1819 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full | ||
1820 | * advantage of new APIC bus architecture. | ||
1821 | */ | ||
1822 | |||
1823 | if (physids_empty(apic_id_map)) | ||
1824 | apic_id_map = phys_cpu_present_map; | ||
1825 | |||
1826 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1827 | reg_00.raw = io_apic_read(ioapic, 0); | ||
1828 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1829 | |||
1830 | if (apic_id >= IO_APIC_MAX_ID) { | ||
1831 | apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | ||
1832 | "%d\n", ioapic, apic_id, reg_00.bits.ID); | ||
1833 | apic_id = reg_00.bits.ID; | ||
1834 | } | ||
1835 | |||
1836 | /* | ||
1837 | * Every APIC in a system must have a unique ID or we get lots of nice | ||
1838 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
1839 | */ | ||
1840 | if (physid_isset(apic_id, apic_id_map)) { | ||
1841 | |||
1842 | for (i = 0; i < IO_APIC_MAX_ID; i++) { | ||
1843 | if (!physid_isset(i, apic_id_map)) | ||
1844 | break; | ||
1845 | } | ||
1846 | |||
1847 | if (i == IO_APIC_MAX_ID) | ||
1848 | panic("Max apic_id exceeded!\n"); | ||
1849 | |||
1850 | apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " | ||
1851 | "trying %d\n", ioapic, apic_id, i); | ||
1852 | |||
1853 | apic_id = i; | ||
1854 | } | ||
1855 | |||
1856 | physid_set(apic_id, apic_id_map); | ||
1857 | |||
1858 | if (reg_00.bits.ID != apic_id) { | ||
1859 | reg_00.bits.ID = apic_id; | ||
1860 | |||
1861 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1862 | io_apic_write(ioapic, 0, reg_00.raw); | ||
1863 | reg_00.raw = io_apic_read(ioapic, 0); | ||
1864 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1865 | |||
1866 | /* Sanity check */ | ||
1867 | if (reg_00.bits.ID != apic_id) | ||
1868 | panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); | ||
1869 | } | ||
1870 | |||
1871 | apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); | ||
1872 | |||
1873 | return apic_id; | ||
1874 | } | ||
1875 | |||
1876 | |||
1877 | int __init io_apic_get_version (int ioapic) | 1807 | int __init io_apic_get_version (int ioapic) |
1878 | { | 1808 | { |
1879 | union IO_APIC_reg_01 reg_01; | 1809 | union IO_APIC_reg_01 reg_01; |
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 7ec031c6ca10..f86d9db94bfc 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c | |||
@@ -107,6 +107,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
107 | static void __init MP_processor_info (struct mpc_config_processor *m) | 107 | static void __init MP_processor_info (struct mpc_config_processor *m) |
108 | { | 108 | { |
109 | int ver; | 109 | int ver; |
110 | static int found_bsp=0; | ||
110 | 111 | ||
111 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | 112 | if (!(m->mpc_cpuflag & CPU_ENABLED)) |
112 | return; | 113 | return; |
@@ -126,11 +127,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
126 | " Processor ignored.\n", NR_CPUS); | 127 | " Processor ignored.\n", NR_CPUS); |
127 | return; | 128 | return; |
128 | } | 129 | } |
129 | if (num_processors >= maxcpus) { | ||
130 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
131 | " Processor ignored.\n", maxcpus); | ||
132 | return; | ||
133 | } | ||
134 | 130 | ||
135 | num_processors++; | 131 | num_processors++; |
136 | 132 | ||
@@ -150,7 +146,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
150 | ver = 0x10; | 146 | ver = 0x10; |
151 | } | 147 | } |
152 | apic_version[m->mpc_apicid] = ver; | 148 | apic_version[m->mpc_apicid] = ver; |
153 | bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; | 149 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
150 | /* | ||
151 | * bios_cpu_apicid is required to have processors listed | ||
152 | * in same order as logical cpu numbers. Hence the first | ||
153 | * entry is BSP, and so on. | ||
154 | */ | ||
155 | bios_cpu_apicid[0] = m->mpc_apicid; | ||
156 | x86_cpu_to_apicid[0] = m->mpc_apicid; | ||
157 | found_bsp = 1; | ||
158 | } else { | ||
159 | bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid; | ||
160 | x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid; | ||
161 | } | ||
154 | } | 162 | } |
155 | 163 | ||
156 | static void __init MP_bus_info (struct mpc_config_bus *m) | 164 | static void __init MP_bus_info (struct mpc_config_bus *m) |
@@ -759,7 +767,7 @@ void __init mp_register_ioapic ( | |||
759 | mp_ioapics[idx].mpc_apicaddr = address; | 767 | mp_ioapics[idx].mpc_apicaddr = address; |
760 | 768 | ||
761 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 769 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
762 | mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); | 770 | mp_ioapics[idx].mpc_apicid = id; |
763 | mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); | 771 | mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); |
764 | 772 | ||
765 | /* | 773 | /* |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 61de0b34a01e..31c0f2e6ac91 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/msr.h> | 33 | #include <asm/msr.h> |
34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
35 | #include <asm/kdebug.h> | 35 | #include <asm/kdebug.h> |
36 | #include <asm/local.h> | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 39 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: |
@@ -59,7 +60,8 @@ int panic_on_timeout; | |||
59 | 60 | ||
60 | unsigned int nmi_watchdog = NMI_DEFAULT; | 61 | unsigned int nmi_watchdog = NMI_DEFAULT; |
61 | static unsigned int nmi_hz = HZ; | 62 | static unsigned int nmi_hz = HZ; |
62 | unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | 63 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ |
64 | static unsigned int nmi_p4_cccr_val; | ||
63 | 65 | ||
64 | /* Note that these events don't tick when the CPU idles. This means | 66 | /* Note that these events don't tick when the CPU idles. This means |
65 | the frequency varies with CPU load. */ | 67 | the frequency varies with CPU load. */ |
@@ -71,61 +73,87 @@ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | |||
71 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | 73 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 |
72 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | 74 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING |
73 | 75 | ||
74 | #define P6_EVNTSEL0_ENABLE (1 << 22) | 76 | #define MSR_P4_MISC_ENABLE 0x1A0 |
75 | #define P6_EVNTSEL_INT (1 << 20) | 77 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) |
76 | #define P6_EVNTSEL_OS (1 << 17) | 78 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) |
77 | #define P6_EVNTSEL_USR (1 << 16) | 79 | #define MSR_P4_PERFCTR0 0x300 |
78 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | 80 | #define MSR_P4_CCCR0 0x360 |
79 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | 81 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) |
82 | #define P4_ESCR_OS (1<<3) | ||
83 | #define P4_ESCR_USR (1<<2) | ||
84 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
85 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
86 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
87 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
88 | #define P4_CCCR_COMPARE (1<<18) | ||
89 | #define P4_CCCR_REQUIRED (3<<16) | ||
90 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
91 | #define P4_CCCR_ENABLE (1<<12) | ||
92 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
93 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
94 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
95 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
96 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
97 | #define P4_NMI_IQ_CCCR0 \ | ||
98 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
99 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
100 | |||
101 | static __init inline int nmi_known_cpu(void) | ||
102 | { | ||
103 | switch (boot_cpu_data.x86_vendor) { | ||
104 | case X86_VENDOR_AMD: | ||
105 | return boot_cpu_data.x86 == 15; | ||
106 | case X86_VENDOR_INTEL: | ||
107 | return boot_cpu_data.x86 == 15; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
80 | 111 | ||
81 | /* Run after command line and cpu_init init, but before all other checks */ | 112 | /* Run after command line and cpu_init init, but before all other checks */ |
82 | void __init nmi_watchdog_default(void) | 113 | void __init nmi_watchdog_default(void) |
83 | { | 114 | { |
84 | if (nmi_watchdog != NMI_DEFAULT) | 115 | if (nmi_watchdog != NMI_DEFAULT) |
85 | return; | 116 | return; |
86 | 117 | if (nmi_known_cpu()) | |
87 | /* For some reason the IO APIC watchdog doesn't work on the AMD | 118 | nmi_watchdog = NMI_LOCAL_APIC; |
88 | 8111 chipset. For now switch to local APIC mode using | 119 | else |
89 | perfctr0 there. On Intel CPUs we don't have code to handle | ||
90 | the perfctr and the IO-APIC seems to work, so use that. */ | ||
91 | |||
92 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
93 | nmi_watchdog = NMI_LOCAL_APIC; | ||
94 | printk(KERN_INFO | ||
95 | "Using local APIC NMI watchdog using perfctr0\n"); | ||
96 | } else { | ||
97 | printk(KERN_INFO "Using IO APIC NMI watchdog\n"); | ||
98 | nmi_watchdog = NMI_IO_APIC; | 120 | nmi_watchdog = NMI_IO_APIC; |
99 | } | ||
100 | } | 121 | } |
101 | 122 | ||
102 | /* Why is there no CPUID flag for this? */ | 123 | #ifdef CONFIG_SMP |
103 | static __init int cpu_has_lapic(void) | 124 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
125 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | ||
126 | * CPUs during the test make them busy. | ||
127 | */ | ||
128 | static __init void nmi_cpu_busy(void *data) | ||
104 | { | 129 | { |
105 | switch (boot_cpu_data.x86_vendor) { | 130 | volatile int *endflag = data; |
106 | case X86_VENDOR_INTEL: | 131 | local_irq_enable(); |
107 | case X86_VENDOR_AMD: | 132 | /* Intentionally don't use cpu_relax here. This is |
108 | return boot_cpu_data.x86 >= 6; | 133 | to make sure that the performance counter really ticks, |
109 | /* .... add more cpus here or find a different way to figure this out. */ | 134 | even if there is a simulator or similar that catches the |
110 | default: | 135 | pause instruction. On a real HT machine this is fine because |
111 | return 0; | 136 | all other CPUs are busy with "useless" delay loops and don't |
112 | } | 137 | care if they get somewhat less cycles. */ |
138 | while (*endflag == 0) | ||
139 | barrier(); | ||
113 | } | 140 | } |
141 | #endif | ||
114 | 142 | ||
115 | static int __init check_nmi_watchdog (void) | 143 | int __init check_nmi_watchdog (void) |
116 | { | 144 | { |
117 | int counts[NR_CPUS]; | 145 | volatile int endflag = 0; |
146 | int *counts; | ||
118 | int cpu; | 147 | int cpu; |
119 | 148 | ||
120 | if (nmi_watchdog == NMI_NONE) | 149 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
121 | return 0; | 150 | if (!counts) |
151 | return -1; | ||
122 | 152 | ||
123 | if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { | 153 | printk(KERN_INFO "testing NMI watchdog ... "); |
124 | nmi_watchdog = NMI_NONE; | ||
125 | return -1; | ||
126 | } | ||
127 | 154 | ||
128 | printk(KERN_INFO "Testing NMI watchdog ... "); | 155 | if (nmi_watchdog == NMI_LOCAL_APIC) |
156 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | ||
129 | 157 | ||
130 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 158 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
131 | counts[cpu] = cpu_pda[cpu].__nmi_count; | 159 | counts[cpu] = cpu_pda[cpu].__nmi_count; |
@@ -133,15 +161,22 @@ static int __init check_nmi_watchdog (void) | |||
133 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 161 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
134 | 162 | ||
135 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 163 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
164 | if (!cpu_online(cpu)) | ||
165 | continue; | ||
136 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { | 166 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { |
137 | printk("CPU#%d: NMI appears to be stuck (%d)!\n", | 167 | endflag = 1; |
168 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
138 | cpu, | 169 | cpu, |
170 | counts[cpu], | ||
139 | cpu_pda[cpu].__nmi_count); | 171 | cpu_pda[cpu].__nmi_count); |
140 | nmi_active = 0; | 172 | nmi_active = 0; |
141 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 173 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; |
174 | nmi_perfctr_msr = 0; | ||
175 | kfree(counts); | ||
142 | return -1; | 176 | return -1; |
143 | } | 177 | } |
144 | } | 178 | } |
179 | endflag = 1; | ||
145 | printk("OK.\n"); | 180 | printk("OK.\n"); |
146 | 181 | ||
147 | /* now that we know it works we can reduce NMI frequency to | 182 | /* now that we know it works we can reduce NMI frequency to |
@@ -149,10 +184,9 @@ static int __init check_nmi_watchdog (void) | |||
149 | if (nmi_watchdog == NMI_LOCAL_APIC) | 184 | if (nmi_watchdog == NMI_LOCAL_APIC) |
150 | nmi_hz = 1; | 185 | nmi_hz = 1; |
151 | 186 | ||
187 | kfree(counts); | ||
152 | return 0; | 188 | return 0; |
153 | } | 189 | } |
154 | /* Have this called later during boot so counters are updating */ | ||
155 | late_initcall(check_nmi_watchdog); | ||
156 | 190 | ||
157 | int __init setup_nmi_watchdog(char *str) | 191 | int __init setup_nmi_watchdog(char *str) |
158 | { | 192 | { |
@@ -170,7 +204,7 @@ int __init setup_nmi_watchdog(char *str) | |||
170 | 204 | ||
171 | if (nmi >= NMI_INVALID) | 205 | if (nmi >= NMI_INVALID) |
172 | return 0; | 206 | return 0; |
173 | nmi_watchdog = nmi; | 207 | nmi_watchdog = nmi; |
174 | return 1; | 208 | return 1; |
175 | } | 209 | } |
176 | 210 | ||
@@ -185,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void) | |||
185 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | 219 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); |
186 | break; | 220 | break; |
187 | case X86_VENDOR_INTEL: | 221 | case X86_VENDOR_INTEL: |
188 | wrmsr(MSR_IA32_EVNTSEL0, 0, 0); | 222 | if (boot_cpu_data.x86 == 15) { |
223 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | ||
224 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
225 | } | ||
189 | break; | 226 | break; |
190 | } | 227 | } |
191 | nmi_active = -1; | 228 | nmi_active = -1; |
@@ -253,7 +290,7 @@ void enable_timer_nmi_watchdog(void) | |||
253 | 290 | ||
254 | static int nmi_pm_active; /* nmi_active before suspend */ | 291 | static int nmi_pm_active; /* nmi_active before suspend */ |
255 | 292 | ||
256 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 293 | static int lapic_nmi_suspend(struct sys_device *dev, u32 state) |
257 | { | 294 | { |
258 | nmi_pm_active = nmi_active; | 295 | nmi_pm_active = nmi_active; |
259 | disable_lapic_nmi_watchdog(); | 296 | disable_lapic_nmi_watchdog(); |
@@ -300,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs); | |||
300 | * Original code written by Keith Owens. | 337 | * Original code written by Keith Owens. |
301 | */ | 338 | */ |
302 | 339 | ||
340 | static void clear_msr_range(unsigned int base, unsigned int n) | ||
341 | { | ||
342 | unsigned int i; | ||
343 | |||
344 | for(i = 0; i < n; ++i) | ||
345 | wrmsr(base+i, 0, 0); | ||
346 | } | ||
347 | |||
303 | static void setup_k7_watchdog(void) | 348 | static void setup_k7_watchdog(void) |
304 | { | 349 | { |
305 | int i; | 350 | int i; |
306 | unsigned int evntsel; | 351 | unsigned int evntsel; |
307 | 352 | ||
308 | /* No check, so can start with slow frequency */ | ||
309 | nmi_hz = 1; | ||
310 | |||
311 | /* XXX should check these in EFER */ | ||
312 | |||
313 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 353 | nmi_perfctr_msr = MSR_K7_PERFCTR0; |
314 | 354 | ||
315 | for(i = 0; i < 4; ++i) { | 355 | for(i = 0; i < 4; ++i) { |
316 | /* Simulator may not support it */ | 356 | /* Simulator may not support it */ |
317 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) | 357 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { |
358 | nmi_perfctr_msr = 0; | ||
318 | return; | 359 | return; |
360 | } | ||
319 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); | 361 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); |
320 | } | 362 | } |
321 | 363 | ||
@@ -325,12 +367,54 @@ static void setup_k7_watchdog(void) | |||
325 | | K7_NMI_EVENT; | 367 | | K7_NMI_EVENT; |
326 | 368 | ||
327 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 369 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
328 | wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz); | 370 | wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); |
329 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 371 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
330 | evntsel |= K7_EVNTSEL_ENABLE; | 372 | evntsel |= K7_EVNTSEL_ENABLE; |
331 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 373 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
332 | } | 374 | } |
333 | 375 | ||
376 | |||
377 | static int setup_p4_watchdog(void) | ||
378 | { | ||
379 | unsigned int misc_enable, dummy; | ||
380 | |||
381 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | ||
382 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
383 | return 0; | ||
384 | |||
385 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
386 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
387 | #ifdef CONFIG_SMP | ||
388 | if (smp_num_siblings == 2) | ||
389 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | ||
390 | #endif | ||
391 | |||
392 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | ||
393 | clear_msr_range(0x3F1, 2); | ||
394 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | ||
395 | docs doesn't fully define it, so leave it alone for now. */ | ||
396 | if (boot_cpu_data.x86_model >= 0x3) { | ||
397 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | ||
398 | clear_msr_range(0x3A0, 26); | ||
399 | clear_msr_range(0x3BC, 3); | ||
400 | } else { | ||
401 | clear_msr_range(0x3A0, 31); | ||
402 | } | ||
403 | clear_msr_range(0x3C0, 6); | ||
404 | clear_msr_range(0x3C8, 6); | ||
405 | clear_msr_range(0x3E0, 2); | ||
406 | clear_msr_range(MSR_P4_CCCR0, 18); | ||
407 | clear_msr_range(MSR_P4_PERFCTR0, 18); | ||
408 | |||
409 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | ||
410 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | ||
411 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); | ||
412 | wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); | ||
413 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
414 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
415 | return 1; | ||
416 | } | ||
417 | |||
334 | void setup_apic_nmi_watchdog(void) | 418 | void setup_apic_nmi_watchdog(void) |
335 | { | 419 | { |
336 | switch (boot_cpu_data.x86_vendor) { | 420 | switch (boot_cpu_data.x86_vendor) { |
@@ -341,6 +425,13 @@ void setup_apic_nmi_watchdog(void) | |||
341 | return; | 425 | return; |
342 | setup_k7_watchdog(); | 426 | setup_k7_watchdog(); |
343 | break; | 427 | break; |
428 | case X86_VENDOR_INTEL: | ||
429 | if (boot_cpu_data.x86 != 15) | ||
430 | return; | ||
431 | if (!setup_p4_watchdog()) | ||
432 | return; | ||
433 | break; | ||
434 | |||
344 | default: | 435 | default: |
345 | return; | 436 | return; |
346 | } | 437 | } |
@@ -355,56 +446,67 @@ void setup_apic_nmi_watchdog(void) | |||
355 | * | 446 | * |
356 | * as these watchdog NMI IRQs are generated on every CPU, we only | 447 | * as these watchdog NMI IRQs are generated on every CPU, we only |
357 | * have to check the current processor. | 448 | * have to check the current processor. |
358 | * | ||
359 | * since NMIs don't listen to _any_ locks, we have to be extremely | ||
360 | * careful not to rely on unsafe variables. The printk might lock | ||
361 | * up though, so we have to break up any console locks first ... | ||
362 | * [when there will be more tty-related locks, break them up | ||
363 | * here too!] | ||
364 | */ | 449 | */ |
365 | 450 | ||
366 | static unsigned int | 451 | static DEFINE_PER_CPU(unsigned, last_irq_sum); |
367 | last_irq_sums [NR_CPUS], | 452 | static DEFINE_PER_CPU(local_t, alert_counter); |
368 | alert_counter [NR_CPUS]; | 453 | static DEFINE_PER_CPU(int, nmi_touch); |
369 | 454 | ||
370 | void touch_nmi_watchdog (void) | 455 | void touch_nmi_watchdog (void) |
371 | { | 456 | { |
372 | int i; | 457 | int i; |
373 | 458 | ||
374 | /* | 459 | /* |
375 | * Just reset the alert counters, (other CPUs might be | 460 | * Tell other CPUs to reset their alert counters. We cannot |
376 | * spinning on locks we hold): | 461 | * do it ourselves because the alert count increase is not |
462 | * atomic. | ||
377 | */ | 463 | */ |
378 | for (i = 0; i < NR_CPUS; i++) | 464 | for (i = 0; i < NR_CPUS; i++) |
379 | alert_counter[i] = 0; | 465 | per_cpu(nmi_touch, i) = 1; |
380 | } | 466 | } |
381 | 467 | ||
382 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) | 468 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) |
383 | { | 469 | { |
384 | int sum, cpu; | 470 | int sum; |
471 | int touched = 0; | ||
385 | 472 | ||
386 | cpu = safe_smp_processor_id(); | ||
387 | sum = read_pda(apic_timer_irqs); | 473 | sum = read_pda(apic_timer_irqs); |
388 | if (last_irq_sums[cpu] == sum) { | 474 | if (__get_cpu_var(nmi_touch)) { |
475 | __get_cpu_var(nmi_touch) = 0; | ||
476 | touched = 1; | ||
477 | } | ||
478 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | ||
389 | /* | 479 | /* |
390 | * Ayiee, looks like this CPU is stuck ... | 480 | * Ayiee, looks like this CPU is stuck ... |
391 | * wait a few IRQs (5 seconds) before doing the oops ... | 481 | * wait a few IRQs (5 seconds) before doing the oops ... |
392 | */ | 482 | */ |
393 | alert_counter[cpu]++; | 483 | local_inc(&__get_cpu_var(alert_counter)); |
394 | if (alert_counter[cpu] == 5*nmi_hz) { | 484 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { |
395 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 485 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
396 | == NOTIFY_STOP) { | 486 | == NOTIFY_STOP) { |
397 | alert_counter[cpu] = 0; | 487 | local_set(&__get_cpu_var(alert_counter), 0); |
398 | return; | 488 | return; |
399 | } | 489 | } |
400 | die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); | 490 | die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); |
401 | } | 491 | } |
402 | } else { | 492 | } else { |
403 | last_irq_sums[cpu] = sum; | 493 | __get_cpu_var(last_irq_sum) = sum; |
404 | alert_counter[cpu] = 0; | 494 | local_set(&__get_cpu_var(alert_counter), 0); |
405 | } | 495 | } |
406 | if (nmi_perfctr_msr) | 496 | if (nmi_perfctr_msr) { |
497 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | ||
498 | /* | ||
499 | * P4 quirks: | ||
500 | * - An overflown perfctr will assert its interrupt | ||
501 | * until the OVF flag in its CCCR is cleared. | ||
502 | * - LVTPC is masked on interrupt and must be | ||
503 | * unmasked by the LVTPC handler. | ||
504 | */ | ||
505 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
506 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
507 | } | ||
407 | wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); | 508 | wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); |
509 | } | ||
408 | } | 510 | } |
409 | 511 | ||
410 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) | 512 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) |
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c new file mode 100644 index 000000000000..feb5f108dd26 --- /dev/null +++ b/arch/x86_64/kernel/pmtimer.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* Ported over from i386 by AK, original copyright was: | ||
2 | * | ||
3 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
4 | * | ||
5 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
6 | * southbridges as primary timing source for the Linux kernel. | ||
7 | * | ||
8 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
9 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
10 | * | ||
11 | * This file is licensed under the GPL v2. | ||
12 | * | ||
13 | * Dropped all the hardware bug workarounds for now. Hopefully they | ||
14 | * are not needed on 64bit chipsets. | ||
15 | */ | ||
16 | |||
17 | #include <linux/jiffies.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/time.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/cpumask.h> | ||
22 | #include <asm/io.h> | ||
23 | #include <asm/proto.h> | ||
24 | #include <asm/msr.h> | ||
25 | #include <asm/vsyscall.h> | ||
26 | |||
27 | /* The I/O port the PMTMR resides at. | ||
28 | * The location is detected during setup_arch(), | ||
29 | * in arch/i386/kernel/acpi/boot.c */ | ||
30 | u32 pmtmr_ioport; | ||
31 | |||
32 | /* value of the Power timer at last timer interrupt */ | ||
33 | static u32 offset_delay; | ||
34 | static u32 last_pmtmr_tick; | ||
35 | |||
36 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
37 | |||
38 | static inline u32 cyc2us(u32 cycles) | ||
39 | { | ||
40 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
41 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
42 | * | ||
43 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
44 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
45 | * u32 overflows. | ||
46 | */ | ||
47 | cycles *= 286; | ||
48 | return (cycles >> 10); | ||
49 | } | ||
50 | |||
51 | int pmtimer_mark_offset(void) | ||
52 | { | ||
53 | static int first_run = 1; | ||
54 | unsigned long tsc; | ||
55 | u32 lost; | ||
56 | |||
57 | u32 tick = inl(pmtmr_ioport); | ||
58 | u32 delta; | ||
59 | |||
60 | delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); | ||
61 | |||
62 | last_pmtmr_tick = tick; | ||
63 | monotonic_base += delta * NSEC_PER_USEC; | ||
64 | |||
65 | delta += offset_delay; | ||
66 | |||
67 | lost = delta / (USEC_PER_SEC / HZ); | ||
68 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
69 | |||
70 | rdtscll(tsc); | ||
71 | vxtime.last_tsc = tsc - offset_delay * cpu_khz; | ||
72 | |||
73 | /* don't calculate delay for first run, | ||
74 | or if we've got less then a tick */ | ||
75 | if (first_run || (lost < 1)) { | ||
76 | first_run = 0; | ||
77 | offset_delay = 0; | ||
78 | } | ||
79 | |||
80 | return lost - 1; | ||
81 | } | ||
82 | |||
83 | unsigned int do_gettimeoffset_pm(void) | ||
84 | { | ||
85 | u32 now, offset, delta = 0; | ||
86 | |||
87 | offset = last_pmtmr_tick; | ||
88 | now = inl(pmtmr_ioport); | ||
89 | delta = (now - offset) & ACPI_PM_MASK; | ||
90 | |||
91 | return offset_delay + cyc2us(delta); | ||
92 | } | ||
93 | |||
94 | |||
95 | static int __init nopmtimer_setup(char *s) | ||
96 | { | ||
97 | pmtmr_ioport = 0; | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | __setup("nopmtimer", nopmtimer_setup); | ||
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index e26e86bb56fe..60dc9b98951d 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -257,13 +257,13 @@ static int putreg(struct task_struct *child, | |||
257 | value &= 0xffff; | 257 | value &= 0xffff; |
258 | return 0; | 258 | return 0; |
259 | case offsetof(struct user_regs_struct,fs_base): | 259 | case offsetof(struct user_regs_struct,fs_base): |
260 | if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) | 260 | if (value >= TASK_SIZE) |
261 | return -EIO; | 261 | return -EIO; |
262 | child->thread.fs = value; | 262 | child->thread.fs = value; |
263 | return 0; | 263 | return 0; |
264 | case offsetof(struct user_regs_struct,gs_base): | 264 | case offsetof(struct user_regs_struct,gs_base): |
265 | if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) | 265 | if (value >= TASK_SIZE) |
266 | return -EIO; | 266 | return -EIO; |
267 | child->thread.gs = value; | 267 | child->thread.gs = value; |
268 | return 0; | 268 | return 0; |
269 | case offsetof(struct user_regs_struct, eflags): | 269 | case offsetof(struct user_regs_struct, eflags): |
@@ -277,6 +277,11 @@ static int putreg(struct task_struct *child, | |||
277 | return -EIO; | 277 | return -EIO; |
278 | value &= 0xffff; | 278 | value &= 0xffff; |
279 | break; | 279 | break; |
280 | case offsetof(struct user_regs_struct, rip): | ||
281 | /* Check if the new RIP address is canonical */ | ||
282 | if (value >= TASK_SIZE) | ||
283 | return -EIO; | ||
284 | break; | ||
280 | } | 285 | } |
281 | put_stack_long(child, regno - sizeof(struct pt_regs), value); | 286 | put_stack_long(child, regno - sizeof(struct pt_regs), value); |
282 | return 0; | 287 | return 0; |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 2129cf9ba6b2..b9fd0252c279 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -727,11 +727,12 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c) | |||
727 | static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | 727 | static void __init amd_detect_cmp(struct cpuinfo_x86 *c) |
728 | { | 728 | { |
729 | #ifdef CONFIG_SMP | 729 | #ifdef CONFIG_SMP |
730 | int cpu = c->x86_apicid; | 730 | int cpu = smp_processor_id(); |
731 | int node = 0; | 731 | int node = 0; |
732 | if (c->x86_num_cores == 1) | 732 | if (c->x86_num_cores == 1) |
733 | return; | 733 | return; |
734 | cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); | 734 | /* Fix up the APIC ID following the AMD specification. */ |
735 | cpu_core_id[cpu] >>= hweight32(c->x86_num_cores - 1); | ||
735 | 736 | ||
736 | #ifdef CONFIG_NUMA | 737 | #ifdef CONFIG_NUMA |
737 | /* When an ACPI SRAT table is available use the mappings from SRAT | 738 | /* When an ACPI SRAT table is available use the mappings from SRAT |
@@ -745,6 +746,9 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
745 | node = cpu_to_node[cpu]; | 746 | node = cpu_to_node[cpu]; |
746 | } | 747 | } |
747 | #endif | 748 | #endif |
749 | /* For now: - better than BAD_APIC_ID at least*/ | ||
750 | phys_proc_id[cpu] = cpu_core_id[cpu]; | ||
751 | |||
748 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", | 752 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", |
749 | cpu, c->x86_num_cores, node, cpu_core_id[cpu]); | 753 | cpu, c->x86_num_cores, node, cpu_core_id[cpu]); |
750 | #endif | 754 | #endif |
@@ -925,7 +929,6 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
925 | c->x86_clflush_size = 64; | 929 | c->x86_clflush_size = 64; |
926 | c->x86_cache_alignment = c->x86_clflush_size; | 930 | c->x86_cache_alignment = c->x86_clflush_size; |
927 | c->x86_num_cores = 1; | 931 | c->x86_num_cores = 1; |
928 | c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data; | ||
929 | c->extended_cpuid_level = 0; | 932 | c->extended_cpuid_level = 0; |
930 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 933 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
931 | 934 | ||
@@ -954,11 +957,15 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
954 | } | 957 | } |
955 | if (c->x86_capability[0] & (1<<19)) | 958 | if (c->x86_capability[0] & (1<<19)) |
956 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 959 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
957 | c->x86_apicid = misc >> 24; | ||
958 | } else { | 960 | } else { |
959 | /* Have CPUID level 0 only - unheard of */ | 961 | /* Have CPUID level 0 only - unheard of */ |
960 | c->x86 = 4; | 962 | c->x86 = 4; |
961 | } | 963 | } |
964 | |||
965 | #ifdef CONFIG_SMP | ||
966 | phys_proc_id[smp_processor_id()] = | ||
967 | cpu_core_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; | ||
968 | #endif | ||
962 | } | 969 | } |
963 | 970 | ||
964 | /* | 971 | /* |
@@ -1088,7 +1095,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1088 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1095 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1089 | 1096 | ||
1090 | /* Other (Linux-defined) */ | 1097 | /* Other (Linux-defined) */ |
1091 | "cxmmx", NULL, "cyrix_arr", "centaur_mcr", "k8c+", | 1098 | "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, |
1092 | "constant_tsc", NULL, NULL, | 1099 | "constant_tsc", NULL, NULL, |
1093 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1100 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1094 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1101 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 73f7e8b9543a..5abdee1e16a5 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/kdebug.h> | 56 | #include <asm/kdebug.h> |
57 | #include <asm/tlbflush.h> | 57 | #include <asm/tlbflush.h> |
58 | #include <asm/proto.h> | 58 | #include <asm/proto.h> |
59 | #include <asm/nmi.h> | ||
59 | 60 | ||
60 | /* Change for real CPU hotplug. Note other files need to be fixed | 61 | /* Change for real CPU hotplug. Note other files need to be fixed |
61 | first too. */ | 62 | first too. */ |
@@ -125,96 +126,210 @@ static void __cpuinit smp_store_cpu_info(int id) | |||
125 | 126 | ||
126 | *c = boot_cpu_data; | 127 | *c = boot_cpu_data; |
127 | identify_cpu(c); | 128 | identify_cpu(c); |
129 | print_cpu_info(c); | ||
128 | } | 130 | } |
129 | 131 | ||
130 | /* | 132 | /* |
131 | * Synchronize TSCs of CPUs | 133 | * New Funky TSC sync algorithm borrowed from IA64. |
134 | * Main advantage is that it doesn't reset the TSCs fully and | ||
135 | * in general looks more robust and it works better than my earlier | ||
136 | * attempts. I believe it was written by David Mosberger. Some minor | ||
137 | * adjustments for x86-64 by me -AK | ||
132 | * | 138 | * |
133 | * This new algorithm is less accurate than the old "zero TSCs" | 139 | * Original comment reproduced below. |
134 | * one, but we cannot zero TSCs anymore in the new hotplug CPU | 140 | * |
135 | * model. | 141 | * Synchronize TSC of the current (slave) CPU with the TSC of the |
142 | * MASTER CPU (normally the time-keeper CPU). We use a closed loop to | ||
143 | * eliminate the possibility of unaccounted-for errors (such as | ||
144 | * getting a machine check in the middle of a calibration step). The | ||
145 | * basic idea is for the slave to ask the master what itc value it has | ||
146 | * and to read its own itc before and after the master responds. Each | ||
147 | * iteration gives us three timestamps: | ||
148 | * | ||
149 | * slave master | ||
150 | * | ||
151 | * t0 ---\ | ||
152 | * ---\ | ||
153 | * ---> | ||
154 | * tm | ||
155 | * /--- | ||
156 | * /--- | ||
157 | * t1 <--- | ||
158 | * | ||
159 | * | ||
160 | * The goal is to adjust the slave's TSC such that tm falls exactly | ||
161 | * half-way between t0 and t1. If we achieve this, the clocks are | ||
162 | * synchronized provided the interconnect between the slave and the | ||
163 | * master is symmetric. Even if the interconnect were asymmetric, we | ||
164 | * would still know that the synchronization error is smaller than the | ||
165 | * roundtrip latency (t0 - t1). | ||
166 | * | ||
167 | * When the interconnect is quiet and symmetric, this lets us | ||
168 | * synchronize the TSC to within one or two cycles. However, we can | ||
169 | * only *guarantee* that the synchronization is accurate to within a | ||
170 | * round-trip time, which is typically in the range of several hundred | ||
171 | * cycles (e.g., ~500 cycles). In practice, this means that the TSCs | ||
172 | * are usually almost perfectly synchronized, but we shouldn't assume | ||
173 | * that the accuracy is much better than half a micro second or so. | ||
174 | * | ||
175 | * [there are other errors like the latency of RDTSC and of the | ||
176 | * WRMSR. These can also account to hundreds of cycles. So it's | ||
177 | * probably worse. It claims 153 cycles error on a dual Opteron, | ||
178 | * but I suspect the numbers are actually somewhat worse -AK] | ||
136 | */ | 179 | */ |
137 | 180 | ||
138 | static atomic_t __cpuinitdata tsc_flag; | 181 | #define MASTER 0 |
182 | #define SLAVE (SMP_CACHE_BYTES/8) | ||
183 | |||
184 | /* Intentionally don't use cpu_relax() while TSC synchronization | ||
185 | because we don't want to go into funky power save modi or cause | ||
186 | hypervisors to schedule us away. Going to sleep would likely affect | ||
187 | latency and low latency is the primary objective here. -AK */ | ||
188 | #define no_cpu_relax() barrier() | ||
189 | |||
139 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); | 190 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); |
140 | static unsigned long long __cpuinitdata bp_tsc, ap_tsc; | 191 | static volatile __cpuinitdata unsigned long go[SLAVE + 1]; |
192 | static int notscsync __cpuinitdata; | ||
193 | |||
194 | #undef DEBUG_TSC_SYNC | ||
141 | 195 | ||
142 | #define NR_LOOPS 5 | 196 | #define NUM_ROUNDS 64 /* magic value */ |
197 | #define NUM_ITERS 5 /* likewise */ | ||
143 | 198 | ||
144 | static void __cpuinit sync_tsc_bp_init(int init) | 199 | /* Callback on boot CPU */ |
200 | static __cpuinit void sync_master(void *arg) | ||
145 | { | 201 | { |
146 | if (init) | 202 | unsigned long flags, i; |
147 | _raw_spin_lock(&tsc_sync_lock); | 203 | |
148 | else | 204 | if (smp_processor_id() != boot_cpu_id) |
149 | _raw_spin_unlock(&tsc_sync_lock); | 205 | return; |
150 | atomic_set(&tsc_flag, 0); | 206 | |
207 | go[MASTER] = 0; | ||
208 | |||
209 | local_irq_save(flags); | ||
210 | { | ||
211 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { | ||
212 | while (!go[MASTER]) | ||
213 | no_cpu_relax(); | ||
214 | go[MASTER] = 0; | ||
215 | rdtscll(go[SLAVE]); | ||
216 | } | ||
217 | } | ||
218 | local_irq_restore(flags); | ||
151 | } | 219 | } |
152 | 220 | ||
153 | /* | 221 | /* |
154 | * Synchronize TSC on AP with BP. | 222 | * Return the number of cycles by which our tsc differs from the tsc |
223 | * on the master (time-keeper) CPU. A positive number indicates our | ||
224 | * tsc is ahead of the master, negative that it is behind. | ||
155 | */ | 225 | */ |
156 | static void __cpuinit __sync_tsc_ap(void) | 226 | static inline long |
227 | get_delta(long *rt, long *master) | ||
157 | { | 228 | { |
158 | if (!cpu_has_tsc) | 229 | unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; |
159 | return; | 230 | unsigned long tcenter, t0, t1, tm; |
160 | Dprintk("AP %d syncing TSC\n", smp_processor_id()); | 231 | int i; |
161 | 232 | ||
162 | while (atomic_read(&tsc_flag) != 0) | 233 | for (i = 0; i < NUM_ITERS; ++i) { |
163 | cpu_relax(); | 234 | rdtscll(t0); |
164 | atomic_inc(&tsc_flag); | 235 | go[MASTER] = 1; |
165 | mb(); | 236 | while (!(tm = go[SLAVE])) |
166 | _raw_spin_lock(&tsc_sync_lock); | 237 | no_cpu_relax(); |
167 | wrmsrl(MSR_IA32_TSC, bp_tsc); | 238 | go[SLAVE] = 0; |
168 | _raw_spin_unlock(&tsc_sync_lock); | 239 | rdtscll(t1); |
169 | rdtscll(ap_tsc); | 240 | |
170 | mb(); | 241 | if (t1 - t0 < best_t1 - best_t0) |
171 | atomic_inc(&tsc_flag); | 242 | best_t0 = t0, best_t1 = t1, best_tm = tm; |
172 | mb(); | 243 | } |
244 | |||
245 | *rt = best_t1 - best_t0; | ||
246 | *master = best_tm - best_t0; | ||
247 | |||
248 | /* average best_t0 and best_t1 without overflow: */ | ||
249 | tcenter = (best_t0/2 + best_t1/2); | ||
250 | if (best_t0 % 2 + best_t1 % 2 == 2) | ||
251 | ++tcenter; | ||
252 | return tcenter - best_tm; | ||
173 | } | 253 | } |
174 | 254 | ||
175 | static void __cpuinit sync_tsc_ap(void) | 255 | static __cpuinit void sync_tsc(void) |
176 | { | 256 | { |
177 | int i; | 257 | int i, done = 0; |
178 | for (i = 0; i < NR_LOOPS; i++) | 258 | long delta, adj, adjust_latency = 0; |
179 | __sync_tsc_ap(); | 259 | unsigned long flags, rt, master_time_stamp, bound; |
260 | #if DEBUG_TSC_SYNC | ||
261 | static struct syncdebug { | ||
262 | long rt; /* roundtrip time */ | ||
263 | long master; /* master's timestamp */ | ||
264 | long diff; /* difference between midpoint and master's timestamp */ | ||
265 | long lat; /* estimate of tsc adjustment latency */ | ||
266 | } t[NUM_ROUNDS] __cpuinitdata; | ||
267 | #endif | ||
268 | |||
269 | go[MASTER] = 1; | ||
270 | |||
271 | smp_call_function(sync_master, NULL, 1, 0); | ||
272 | |||
273 | while (go[MASTER]) /* wait for master to be ready */ | ||
274 | no_cpu_relax(); | ||
275 | |||
276 | spin_lock_irqsave(&tsc_sync_lock, flags); | ||
277 | { | ||
278 | for (i = 0; i < NUM_ROUNDS; ++i) { | ||
279 | delta = get_delta(&rt, &master_time_stamp); | ||
280 | if (delta == 0) { | ||
281 | done = 1; /* let's lock on to this... */ | ||
282 | bound = rt; | ||
283 | } | ||
284 | |||
285 | if (!done) { | ||
286 | unsigned long t; | ||
287 | if (i > 0) { | ||
288 | adjust_latency += -delta; | ||
289 | adj = -delta + adjust_latency/4; | ||
290 | } else | ||
291 | adj = -delta; | ||
292 | |||
293 | rdtscll(t); | ||
294 | wrmsrl(MSR_IA32_TSC, t + adj); | ||
295 | } | ||
296 | #if DEBUG_TSC_SYNC | ||
297 | t[i].rt = rt; | ||
298 | t[i].master = master_time_stamp; | ||
299 | t[i].diff = delta; | ||
300 | t[i].lat = adjust_latency/4; | ||
301 | #endif | ||
302 | } | ||
303 | } | ||
304 | spin_unlock_irqrestore(&tsc_sync_lock, flags); | ||
305 | |||
306 | #if DEBUG_TSC_SYNC | ||
307 | for (i = 0; i < NUM_ROUNDS; ++i) | ||
308 | printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", | ||
309 | t[i].rt, t[i].master, t[i].diff, t[i].lat); | ||
310 | #endif | ||
311 | |||
312 | printk(KERN_INFO | ||
313 | "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " | ||
314 | "maxerr %lu cycles)\n", | ||
315 | smp_processor_id(), boot_cpu_id, delta, rt); | ||
180 | } | 316 | } |
181 | 317 | ||
182 | /* | 318 | static void __cpuinit tsc_sync_wait(void) |
183 | * Synchronize TSC from BP to AP. | ||
184 | */ | ||
185 | static void __cpuinit __sync_tsc_bp(int cpu) | ||
186 | { | 319 | { |
187 | if (!cpu_has_tsc) | 320 | if (notscsync || !cpu_has_tsc) |
188 | return; | 321 | return; |
189 | 322 | printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(), | |
190 | /* Wait for AP */ | 323 | boot_cpu_id); |
191 | while (atomic_read(&tsc_flag) == 0) | 324 | sync_tsc(); |
192 | cpu_relax(); | ||
193 | /* Save BPs TSC */ | ||
194 | sync_core(); | ||
195 | rdtscll(bp_tsc); | ||
196 | /* Don't do the sync core here to avoid too much latency. */ | ||
197 | mb(); | ||
198 | /* Start the AP */ | ||
199 | _raw_spin_unlock(&tsc_sync_lock); | ||
200 | /* Wait for AP again */ | ||
201 | while (atomic_read(&tsc_flag) < 2) | ||
202 | cpu_relax(); | ||
203 | rdtscl(bp_tsc); | ||
204 | barrier(); | ||
205 | } | 325 | } |
206 | 326 | ||
207 | static void __cpuinit sync_tsc_bp(int cpu) | 327 | static __init int notscsync_setup(char *s) |
208 | { | 328 | { |
209 | int i; | 329 | notscsync = 1; |
210 | for (i = 0; i < NR_LOOPS - 1; i++) { | 330 | return 0; |
211 | __sync_tsc_bp(cpu); | ||
212 | sync_tsc_bp_init(1); | ||
213 | } | ||
214 | __sync_tsc_bp(cpu); | ||
215 | printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n", | ||
216 | cpu, ap_tsc - bp_tsc); | ||
217 | } | 331 | } |
332 | __setup("notscsync", notscsync_setup); | ||
218 | 333 | ||
219 | static atomic_t init_deasserted __cpuinitdata; | 334 | static atomic_t init_deasserted __cpuinitdata; |
220 | 335 | ||
@@ -315,11 +430,6 @@ void __cpuinit start_secondary(void) | |||
315 | cpu_init(); | 430 | cpu_init(); |
316 | smp_callin(); | 431 | smp_callin(); |
317 | 432 | ||
318 | /* | ||
319 | * Synchronize the TSC with the BP | ||
320 | */ | ||
321 | sync_tsc_ap(); | ||
322 | |||
323 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | 433 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ |
324 | barrier(); | 434 | barrier(); |
325 | 435 | ||
@@ -334,7 +444,6 @@ void __cpuinit start_secondary(void) | |||
334 | enable_8259A_irq(0); | 444 | enable_8259A_irq(0); |
335 | } | 445 | } |
336 | 446 | ||
337 | |||
338 | enable_APIC_timer(); | 447 | enable_APIC_timer(); |
339 | 448 | ||
340 | /* | 449 | /* |
@@ -343,6 +452,11 @@ void __cpuinit start_secondary(void) | |||
343 | cpu_set(smp_processor_id(), cpu_online_map); | 452 | cpu_set(smp_processor_id(), cpu_online_map); |
344 | mb(); | 453 | mb(); |
345 | 454 | ||
455 | /* Wait for TSC sync to not schedule things before. | ||
456 | We still process interrupts, which could see an inconsistent | ||
457 | time in that window unfortunately. */ | ||
458 | tsc_sync_wait(); | ||
459 | |||
346 | cpu_idle(); | 460 | cpu_idle(); |
347 | } | 461 | } |
348 | 462 | ||
@@ -531,7 +645,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
531 | printk("failed fork for CPU %d\n", cpu); | 645 | printk("failed fork for CPU %d\n", cpu); |
532 | return PTR_ERR(idle); | 646 | return PTR_ERR(idle); |
533 | } | 647 | } |
534 | x86_cpu_to_apicid[cpu] = apicid; | ||
535 | 648 | ||
536 | cpu_pda[cpu].pcurrent = idle; | 649 | cpu_pda[cpu].pcurrent = idle; |
537 | 650 | ||
@@ -600,8 +713,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
600 | 713 | ||
601 | if (cpu_isset(cpu, cpu_callin_map)) { | 714 | if (cpu_isset(cpu, cpu_callin_map)) { |
602 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 715 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
603 | Dprintk("OK.\n"); | ||
604 | print_cpu_info(&cpu_data[cpu]); | ||
605 | Dprintk("CPU has booted.\n"); | 716 | Dprintk("CPU has booted.\n"); |
606 | } else { | 717 | } else { |
607 | boot_error = 1; | 718 | boot_error = 1; |
@@ -842,7 +953,6 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus) | |||
842 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); | 953 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); |
843 | /* Or can we switch back to PIC here? */ | 954 | /* Or can we switch back to PIC here? */ |
844 | } | 955 | } |
845 | x86_cpu_to_apicid[0] = boot_cpu_id; | ||
846 | 956 | ||
847 | /* | 957 | /* |
848 | * Now start the IO-APICs | 958 | * Now start the IO-APICs |
@@ -889,18 +999,14 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
889 | printk("__cpu_up: bad cpu %d\n", cpu); | 999 | printk("__cpu_up: bad cpu %d\n", cpu); |
890 | return -EINVAL; | 1000 | return -EINVAL; |
891 | } | 1001 | } |
892 | sync_tsc_bp_init(1); | ||
893 | 1002 | ||
894 | /* Boot it! */ | 1003 | /* Boot it! */ |
895 | err = do_boot_cpu(cpu, apicid); | 1004 | err = do_boot_cpu(cpu, apicid); |
896 | if (err < 0) { | 1005 | if (err < 0) { |
897 | sync_tsc_bp_init(0); | ||
898 | Dprintk("do_boot_cpu failed %d\n", err); | 1006 | Dprintk("do_boot_cpu failed %d\n", err); |
899 | return err; | 1007 | return err; |
900 | } | 1008 | } |
901 | 1009 | ||
902 | sync_tsc_bp(cpu); | ||
903 | |||
904 | /* Unleash the CPU! */ | 1010 | /* Unleash the CPU! */ |
905 | Dprintk("waiting for cpu %d\n", cpu); | 1011 | Dprintk("waiting for cpu %d\n", cpu); |
906 | 1012 | ||
@@ -923,4 +1029,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus) | |||
923 | 1029 | ||
924 | detect_siblings(); | 1030 | detect_siblings(); |
925 | time_init_gtod(); | 1031 | time_init_gtod(); |
1032 | |||
1033 | check_nmi_watchdog(); | ||
926 | } | 1034 | } |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 0e3b60e1ac5b..735b6767c8ed 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/sysdev.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/bcd.h> | 27 | #include <linux/bcd.h> |
28 | #include <linux/kallsyms.h> | 28 | #include <linux/kallsyms.h> |
29 | #include <linux/acpi.h> | ||
30 | #include <acpi/achware.h> /* for PM timer frequency */ | ||
29 | #include <asm/8253pit.h> | 31 | #include <asm/8253pit.h> |
30 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
31 | #include <asm/vsyscall.h> | 33 | #include <asm/vsyscall.h> |
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
396 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; | 398 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; |
397 | 399 | ||
398 | vxtime.last = offset; | 400 | vxtime.last = offset; |
401 | #ifdef CONFIG_X86_PM_TIMER | ||
402 | } else if (vxtime.mode == VXTIME_PMTMR) { | ||
403 | lost = pmtimer_mark_offset(); | ||
404 | #endif | ||
399 | } else { | 405 | } else { |
400 | offset = (((tsc - vxtime.last_tsc) * | 406 | offset = (((tsc - vxtime.last_tsc) * |
401 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); | 407 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); |
@@ -898,6 +904,13 @@ void __init time_init(void) | |||
898 | hpet_period; | 904 | hpet_period; |
899 | cpu_khz = hpet_calibrate_tsc(); | 905 | cpu_khz = hpet_calibrate_tsc(); |
900 | timename = "HPET"; | 906 | timename = "HPET"; |
907 | #ifdef CONFIG_X86_PM_TIMER | ||
908 | } else if (pmtmr_ioport) { | ||
909 | vxtime_hz = PM_TIMER_FREQUENCY; | ||
910 | timename = "PM"; | ||
911 | pit_init(); | ||
912 | cpu_khz = pit_calibrate_tsc(); | ||
913 | #endif | ||
901 | } else { | 914 | } else { |
902 | pit_init(); | 915 | pit_init(); |
903 | cpu_khz = pit_calibrate_tsc(); | 916 | cpu_khz = pit_calibrate_tsc(); |
@@ -923,35 +936,50 @@ void __init time_init(void) | |||
923 | } | 936 | } |
924 | 937 | ||
925 | /* | 938 | /* |
939 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
940 | * over all CPUs. | ||
941 | */ | ||
942 | static __init int unsynchronized_tsc(void) | ||
943 | { | ||
944 | #ifdef CONFIG_SMP | ||
945 | if (oem_force_hpet_timer()) | ||
946 | return 1; | ||
947 | /* Intel systems are normally all synchronized. Exceptions | ||
948 | are handled in the OEM check above. */ | ||
949 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
950 | return 0; | ||
951 | /* All in a single socket - should be synchronized */ | ||
952 | if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) | ||
953 | return 0; | ||
954 | #endif | ||
955 | /* Assume multi socket systems are not synchronized */ | ||
956 | return num_online_cpus() > 1; | ||
957 | } | ||
958 | |||
959 | /* | ||
926 | * Decide after all CPUs are booted what mode gettimeofday should use. | 960 | * Decide after all CPUs are booted what mode gettimeofday should use. |
927 | */ | 961 | */ |
928 | void __init time_init_gtod(void) | 962 | void __init time_init_gtod(void) |
929 | { | 963 | { |
930 | char *timetype; | 964 | char *timetype; |
931 | 965 | ||
932 | /* | 966 | if (unsynchronized_tsc()) |
933 | * AMD systems with more than one CPU don't have fully synchronized | ||
934 | * TSCs. Always use HPET gettimeofday for these, although it is slower. | ||
935 | * Intel SMP systems usually have synchronized TSCs, so use always | ||
936 | * the TSC. | ||
937 | * | ||
938 | * Exceptions: | ||
939 | * IBM Summit2 checked by oem_force_hpet_timer(). | ||
940 | * AMD dual core may also not need HPET. Check me. | ||
941 | * | ||
942 | * Can be turned off with "notsc". | ||
943 | */ | ||
944 | if (num_online_cpus() > 1 && | ||
945 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
946 | notsc = 1; | ||
947 | /* Some systems will want to disable TSC and use HPET. */ | ||
948 | if (oem_force_hpet_timer()) | ||
949 | notsc = 1; | 967 | notsc = 1; |
950 | if (vxtime.hpet_address && notsc) { | 968 | if (vxtime.hpet_address && notsc) { |
951 | timetype = "HPET"; | 969 | timetype = "HPET"; |
952 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; | 970 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; |
953 | vxtime.mode = VXTIME_HPET; | 971 | vxtime.mode = VXTIME_HPET; |
954 | do_gettimeoffset = do_gettimeoffset_hpet; | 972 | do_gettimeoffset = do_gettimeoffset_hpet; |
973 | #ifdef CONFIG_X86_PM_TIMER | ||
974 | /* Using PM for gettimeofday is quite slow, but we have no other | ||
975 | choice because the TSC is too unreliable on some systems. */ | ||
976 | } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { | ||
977 | timetype = "PM"; | ||
978 | do_gettimeoffset = do_gettimeoffset_pm; | ||
979 | vxtime.mode = VXTIME_PMTMR; | ||
980 | sysctl_vsyscall = 0; | ||
981 | printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); | ||
982 | #endif | ||
955 | } else { | 983 | } else { |
956 | timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; | 984 | timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; |
957 | vxtime.mode = VXTIME_TSC; | 985 | vxtime.mode = VXTIME_TSC; |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b4b8dc59663a..2e5734425949 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) | |||
65 | usec = (__xtime.tv_nsec / 1000) + | 65 | usec = (__xtime.tv_nsec / 1000) + |
66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); | 66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); |
67 | 67 | ||
68 | if (__vxtime.mode == VXTIME_TSC) { | 68 | if (__vxtime.mode != VXTIME_HPET) { |
69 | sync_core(); | 69 | sync_core(); |
70 | rdtscll(t); | 70 | rdtscll(t); |
71 | if (t < __vxtime.last_tsc) | 71 | if (t < __vxtime.last_tsc) |
@@ -217,8 +217,9 @@ static int __init vsyscall_init(void) | |||
217 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 217 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
218 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 218 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
219 | map_vsyscall(); | 219 | map_vsyscall(); |
220 | sysctl_vsyscall = 1; | 220 | #ifdef CONFIG_SYSCTL |
221 | register_sysctl_table(kernel_root_table2, 0); | 221 | register_sysctl_table(kernel_root_table2, 0); |
222 | #endif | ||
222 | return 0; | 223 | return 0; |
223 | } | 224 | } |
224 | 225 | ||
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index e03309216764..5d6b2114cc9a 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -234,6 +234,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Handle a fault on the vmalloc or module mapping area | 236 | * Handle a fault on the vmalloc or module mapping area |
237 | * | ||
238 | * This assumes no large pages in there. | ||
237 | */ | 239 | */ |
238 | static int vmalloc_fault(unsigned long address) | 240 | static int vmalloc_fault(unsigned long address) |
239 | { | 241 | { |
@@ -272,7 +274,10 @@ static int vmalloc_fault(unsigned long address) | |||
272 | if (!pte_present(*pte_ref)) | 274 | if (!pte_present(*pte_ref)) |
273 | return -1; | 275 | return -1; |
274 | pte = pte_offset_kernel(pmd, address); | 276 | pte = pte_offset_kernel(pmd, address); |
275 | if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref)) | 277 | /* Don't use pte_page here, because the mappings can point |
278 | outside mem_map, and the NUMA hash lookup cannot handle | ||
279 | that. */ | ||
280 | if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) | ||
276 | BUG(); | 281 | BUG(); |
277 | __flush_tlb_all(); | 282 | __flush_tlb_all(); |
278 | return 0; | 283 | return 0; |
@@ -346,7 +351,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
346 | * protection error (error_code & 1) == 0. | 351 | * protection error (error_code & 1) == 0. |
347 | */ | 352 | */ |
348 | if (unlikely(address >= TASK_SIZE)) { | 353 | if (unlikely(address >= TASK_SIZE)) { |
349 | if (!(error_code & 5)) { | 354 | if (!(error_code & 5) && |
355 | ((address >= VMALLOC_START && address < VMALLOC_END) || | ||
356 | (address >= MODULES_VADDR && address < MODULES_END))) { | ||
350 | if (vmalloc_fault(address) < 0) | 357 | if (vmalloc_fault(address) < 0) |
351 | goto bad_area_nosemaphore; | 358 | goto bad_area_nosemaphore; |
352 | return; | 359 | return; |
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 74ec8554b195..c6fb0cb69992 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c | |||
@@ -272,7 +272,7 @@ void iounmap(volatile void __iomem *addr) | |||
272 | if ((p->flags >> 20) && | 272 | if ((p->flags >> 20) && |
273 | p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) { | 273 | p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) { |
274 | /* p->size includes the guard page, but cpa doesn't like that */ | 274 | /* p->size includes the guard page, but cpa doesn't like that */ |
275 | change_page_attr(virt_to_page(__va(p->phys_addr)), | 275 | change_page_attr_addr((unsigned long)__va(p->phys_addr), |
276 | p->size >> PAGE_SHIFT, | 276 | p->size >> PAGE_SHIFT, |
277 | PAGE_KERNEL); | 277 | PAGE_KERNEL); |
278 | global_flush_tlb(); | 278 | global_flush_tlb(); |