aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/osf_sys.c7
-rw-r--r--arch/i386/kernel/cpu/amd.c7
-rw-r--r--arch/i386/kernel/cpu/common.c7
-rw-r--r--arch/i386/pci/fixup.c2
-rw-r--r--arch/ia64/ia32/ia32_ioctl.c1
-rw-r--r--arch/mips/vr41xx/common/pmu.c55
-rw-r--r--arch/ppc/kernel/setup.c2
-rw-r--r--arch/um/kernel/irq_user.c10
-rw-r--r--arch/x86_64/Kconfig14
-rw-r--r--arch/x86_64/defconfig58
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/apic.c5
-rw-r--r--arch/x86_64/kernel/entry.S11
-rw-r--r--arch/x86_64/kernel/io_apic.c70
-rw-r--r--arch/x86_64/kernel/mpparse.c22
-rw-r--r--arch/x86_64/kernel/nmi.c248
-rw-r--r--arch/x86_64/kernel/pmtimer.c101
-rw-r--r--arch/x86_64/kernel/ptrace.c13
-rw-r--r--arch/x86_64/kernel/setup.c17
-rw-r--r--arch/x86_64/kernel/smpboot.c262
-rw-r--r--arch/x86_64/kernel/time.c62
-rw-r--r--arch/x86_64/kernel/vsyscall.c5
-rw-r--r--arch/x86_64/mm/fault.c11
-rw-r--r--arch/x86_64/mm/ioremap.c2
24 files changed, 683 insertions, 310 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 64e450dddb49..167fd89f8707 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1150,16 +1150,13 @@ osf_usleep_thread(struct timeval32 __user *sleep, struct timeval32 __user *remai
1150 if (get_tv32(&tmp, sleep)) 1150 if (get_tv32(&tmp, sleep))
1151 goto fault; 1151 goto fault;
1152 1152
1153 ticks = tmp.tv_usec; 1153 ticks = timeval_to_jiffies(&tmp);
1154 ticks = (ticks + (1000000 / HZ) - 1) / (1000000 / HZ);
1155 ticks += tmp.tv_sec * HZ;
1156 1154
1157 current->state = TASK_INTERRUPTIBLE; 1155 current->state = TASK_INTERRUPTIBLE;
1158 ticks = schedule_timeout(ticks); 1156 ticks = schedule_timeout(ticks);
1159 1157
1160 if (remain) { 1158 if (remain) {
1161 tmp.tv_sec = ticks / HZ; 1159 jiffies_to_timeval(ticks, &tmp);
1162 tmp.tv_usec = ticks % HZ;
1163 if (put_tv32(remain, &tmp)) 1160 if (put_tv32(remain, &tmp))
1164 goto fault; 1161 goto fault;
1165 } 1162 }
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 16dbc4151be4..fa34a06c0d79 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -24,9 +24,6 @@ __asm__(".align 4\nvide: ret");
24 24
25static void __init init_amd(struct cpuinfo_x86 *c) 25static void __init init_amd(struct cpuinfo_x86 *c)
26{ 26{
27#ifdef CONFIG_X86_SMP
28 int cpu = c == &boot_cpu_data ? 0 : c - cpu_data;
29#endif
30 u32 l, h; 27 u32 l, h;
31 int mbytes = num_physpages >> (20-PAGE_SHIFT); 28 int mbytes = num_physpages >> (20-PAGE_SHIFT);
32 int r; 29 int r;
@@ -205,7 +202,9 @@ static void __init init_amd(struct cpuinfo_x86 *c)
205 * of two. 202 * of two.
206 */ 203 */
207 if (c->x86_num_cores > 1) { 204 if (c->x86_num_cores > 1) {
208 cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); 205 int cpu = smp_processor_id();
206 /* Fix up the APIC ID following AMD specifications. */
207 cpu_core_id[cpu] >>= hweight32(c->x86_num_cores - 1);
209 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 208 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
210 cpu, c->x86_num_cores, cpu_core_id[cpu]); 209 cpu, c->x86_num_cores, cpu_core_id[cpu]);
211 } 210 }
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 6be0310e3cd3..11e6e6f23fa0 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -243,6 +243,13 @@ static void __init early_cpu_detect(void)
243 } 243 }
244 244
245 early_intel_workaround(c); 245 early_intel_workaround(c);
246
247#ifdef CONFIG_SMP
248#ifdef CONFIG_X86_HT
249 phys_proc_id[smp_processor_id()] =
250#endif
251 cpu_core_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
252#endif
246} 253}
247 254
248void __init generic_identify(struct cpuinfo_x86 * c) 255void __init generic_identify(struct cpuinfo_x86 * c)
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
index be52c5ac4e05..8e8e895e1b5a 100644
--- a/arch/i386/pci/fixup.c
+++ b/arch/i386/pci/fixup.c
@@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci
253#define MAX_PCIEROOT 6 253#define MAX_PCIEROOT 6
254static int quirk_aspm_offset[MAX_PCIEROOT << 3]; 254static int quirk_aspm_offset[MAX_PCIEROOT << 3];
255 255
256#define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b) 256#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7))
257 257
258static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) 258static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
259{ 259{
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
index 9845dabe2613..164b211f4174 100644
--- a/arch/ia64/ia32/ia32_ioctl.c
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -13,7 +13,6 @@
13 13
14#define INCLUDES 14#define INCLUDES
15#include "compat_ioctl.c" 15#include "compat_ioctl.c"
16#include <asm/ioctl32.h>
17 16
18#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) 17#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
19 18
diff --git a/arch/mips/vr41xx/common/pmu.c b/arch/mips/vr41xx/common/pmu.c
index c5f1043de938..53166f3598b2 100644
--- a/arch/mips/vr41xx/common/pmu.c
+++ b/arch/mips/vr41xx/common/pmu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * pmu.c, Power Management Unit routines for NEC VR4100 series. 2 * pmu.c, Power Management Unit routines for NEC VR4100 series.
3 * 3 *
4 * Copyright (C) 2003-2004 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> 4 * Copyright (C) 2003-2005 Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -17,7 +17,9 @@
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */ 19 */
20#include <linux/errno.h>
20#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/ioport.h>
21#include <linux/kernel.h> 23#include <linux/kernel.h>
22#include <linux/smp.h> 24#include <linux/smp.h>
23#include <linux/types.h> 25#include <linux/types.h>
@@ -27,20 +29,31 @@
27#include <asm/reboot.h> 29#include <asm/reboot.h>
28#include <asm/system.h> 30#include <asm/system.h>
29 31
30#define PMUCNT2REG KSEG1ADDR(0x0f0000c6) 32#define PMU_TYPE1_BASE 0x0b0000a0UL
33#define PMU_TYPE1_SIZE 0x0eUL
34
35#define PMU_TYPE2_BASE 0x0f0000c0UL
36#define PMU_TYPE2_SIZE 0x10UL
37
38#define PMUCNT2REG 0x06
31 #define SOFTRST 0x0010 39 #define SOFTRST 0x0010
32 40
41static void __iomem *pmu_base;
42
43#define pmu_read(offset) readw(pmu_base + (offset))
44#define pmu_write(offset, value) writew((value), pmu_base + (offset))
45
33static inline void software_reset(void) 46static inline void software_reset(void)
34{ 47{
35 uint16_t val; 48 uint16_t pmucnt2;
36 49
37 switch (current_cpu_data.cputype) { 50 switch (current_cpu_data.cputype) {
38 case CPU_VR4122: 51 case CPU_VR4122:
39 case CPU_VR4131: 52 case CPU_VR4131:
40 case CPU_VR4133: 53 case CPU_VR4133:
41 val = readw(PMUCNT2REG); 54 pmucnt2 = pmu_read(PMUCNT2REG);
42 val |= SOFTRST; 55 pmucnt2 |= SOFTRST;
43 writew(val, PMUCNT2REG); 56 pmu_write(PMUCNT2REG, pmucnt2);
44 break; 57 break;
45 default: 58 default:
46 break; 59 break;
@@ -71,6 +84,34 @@ static void vr41xx_power_off(void)
71 84
72static int __init vr41xx_pmu_init(void) 85static int __init vr41xx_pmu_init(void)
73{ 86{
87 unsigned long start, size;
88
89 switch (current_cpu_data.cputype) {
90 case CPU_VR4111:
91 case CPU_VR4121:
92 start = PMU_TYPE1_BASE;
93 size = PMU_TYPE1_SIZE;
94 break;
95 case CPU_VR4122:
96 case CPU_VR4131:
97 case CPU_VR4133:
98 start = PMU_TYPE2_BASE;
99 size = PMU_TYPE2_SIZE;
100 break;
101 default:
102 printk("Unexpected CPU of NEC VR4100 series\n");
103 return -ENODEV;
104 }
105
106 if (request_mem_region(start, size, "PMU") == NULL)
107 return -EBUSY;
108
109 pmu_base = ioremap(start, size);
110 if (pmu_base == NULL) {
111 release_mem_region(start, size);
112 return -EBUSY;
113 }
114
74 _machine_restart = vr41xx_restart; 115 _machine_restart = vr41xx_restart;
75 _machine_halt = vr41xx_halt; 116 _machine_halt = vr41xx_halt;
76 _machine_power_off = vr41xx_power_off; 117 _machine_power_off = vr41xx_power_off;
@@ -78,4 +119,4 @@ static int __init vr41xx_pmu_init(void)
78 return 0; 119 return 0;
79} 120}
80 121
81early_initcall(vr41xx_pmu_init); 122core_initcall(vr41xx_pmu_init);
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 5dfb42f1a152..309797d7f96d 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -753,6 +753,8 @@ void __init setup_arch(char **cmdline_p)
753 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); 753 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
754 *cmdline_p = cmd_line; 754 *cmdline_p = cmd_line;
755 755
756 parse_early_param();
757
756 /* set up the bootmem stuff with available memory */ 758 /* set up the bootmem stuff with available memory */
757 do_init_bootmem(); 759 do_init_bootmem();
758 if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); 760 if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c
index 6d6f9484b884..b3074cbaa479 100644
--- a/arch/um/kernel/irq_user.c
+++ b/arch/um/kernel/irq_user.c
@@ -236,9 +236,15 @@ static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
236 (*prev)->fd, pollfds[i].fd); 236 (*prev)->fd, pollfds[i].fd);
237 goto out; 237 goto out;
238 } 238 }
239 memcpy(&pollfds[i], &pollfds[i + 1], 239
240 (pollfds_num - i - 1) * sizeof(pollfds[0]));
241 pollfds_num--; 240 pollfds_num--;
241
242 /* This moves the *whole* array after pollfds[i] (though
243 * it doesn't spot as such)! */
244
245 memmove(&pollfds[i], &pollfds[i + 1],
246 (pollfds_num - i) * sizeof(pollfds[0]));
247
242 if(last_irq_ptr == &old_fd->next) 248 if(last_irq_ptr == &old_fd->next)
243 last_irq_ptr = prev; 249 last_irq_ptr = prev;
244 *prev = (*prev)->next; 250 *prev = (*prev)->next;
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 44ee7f6acf7b..82cb2a3f127a 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -303,6 +303,20 @@ config HPET_TIMER
303 as it is off-chip. You can find the HPET spec at 303 as it is off-chip. You can find the HPET spec at
304 <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. 304 <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
305 305
306config X86_PM_TIMER
307 bool "PM timer"
308 default y
309 help
310 Support the ACPI PM timer for time keeping. This is slow,
311 but is useful on some chipsets without HPET on systems with more
312 than one CPU. On a single processor or single socket multi core
313 system it is normally not required.
314 When the PM timer is active 64bit vsyscalls are disabled
315 and should not be enabled (/proc/sys/kernel/vsyscall64 should
316 not be changed).
317 The kernel selects the PM timer only as a last resort, so it is
318 useful to enable just in case.
319
306config HPET_EMULATE_RTC 320config HPET_EMULATE_RTC
307 bool "Provide RTC interrupt" 321 bool "Provide RTC interrupt"
308 depends on HPET_TIMER && RTC=y 322 depends on HPET_TIMER && RTC=y
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 9ce51dee30b3..569595b74c7c 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.11-bk7 3# Linux kernel version: 2.6.12-rc4
4# Sat Mar 12 23:43:44 2005 4# Fri May 13 06:39:11 2005
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -11,8 +11,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
11CONFIG_GENERIC_CALIBRATE_DELAY=y 11CONFIG_GENERIC_CALIBRATE_DELAY=y
12CONFIG_X86_CMPXCHG=y 12CONFIG_X86_CMPXCHG=y
13CONFIG_EARLY_PRINTK=y 13CONFIG_EARLY_PRINTK=y
14CONFIG_HPET_TIMER=y
15CONFIG_HPET_EMULATE_RTC=y
16CONFIG_GENERIC_ISA_DMA=y 14CONFIG_GENERIC_ISA_DMA=y
17CONFIG_GENERIC_IOMAP=y 15CONFIG_GENERIC_IOMAP=y
18 16
@@ -22,6 +20,7 @@ CONFIG_GENERIC_IOMAP=y
22CONFIG_EXPERIMENTAL=y 20CONFIG_EXPERIMENTAL=y
23CONFIG_CLEAN_COMPILE=y 21CONFIG_CLEAN_COMPILE=y
24CONFIG_LOCK_KERNEL=y 22CONFIG_LOCK_KERNEL=y
23CONFIG_INIT_ENV_ARG_LIMIT=32
25 24
26# 25#
27# General setup 26# General setup
@@ -33,7 +32,6 @@ CONFIG_POSIX_MQUEUE=y
33# CONFIG_BSD_PROCESS_ACCT is not set 32# CONFIG_BSD_PROCESS_ACCT is not set
34CONFIG_SYSCTL=y 33CONFIG_SYSCTL=y
35# CONFIG_AUDIT is not set 34# CONFIG_AUDIT is not set
36CONFIG_LOG_BUF_SHIFT=18
37# CONFIG_HOTPLUG is not set 35# CONFIG_HOTPLUG is not set
38CONFIG_KOBJECT_UEVENT=y 36CONFIG_KOBJECT_UEVENT=y
39CONFIG_IKCONFIG=y 37CONFIG_IKCONFIG=y
@@ -43,10 +41,11 @@ CONFIG_IKCONFIG_PROC=y
43CONFIG_KALLSYMS=y 41CONFIG_KALLSYMS=y
44CONFIG_KALLSYMS_ALL=y 42CONFIG_KALLSYMS_ALL=y
45# CONFIG_KALLSYMS_EXTRA_PASS is not set 43# CONFIG_KALLSYMS_EXTRA_PASS is not set
44CONFIG_PRINTK=y
45CONFIG_BUG=y
46CONFIG_BASE_FULL=y 46CONFIG_BASE_FULL=y
47CONFIG_FUTEX=y 47CONFIG_FUTEX=y
48CONFIG_EPOLL=y 48CONFIG_EPOLL=y
49# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
50CONFIG_SHMEM=y 49CONFIG_SHMEM=y
51CONFIG_CC_ALIGN_FUNCTIONS=0 50CONFIG_CC_ALIGN_FUNCTIONS=0
52CONFIG_CC_ALIGN_LABELS=0 51CONFIG_CC_ALIGN_LABELS=0
@@ -93,6 +92,9 @@ CONFIG_DISCONTIGMEM=y
93CONFIG_NUMA=y 92CONFIG_NUMA=y
94CONFIG_HAVE_DEC_LOCK=y 93CONFIG_HAVE_DEC_LOCK=y
95CONFIG_NR_CPUS=8 94CONFIG_NR_CPUS=8
95CONFIG_HPET_TIMER=y
96CONFIG_X86_PM_TIMER=y
97CONFIG_HPET_EMULATE_RTC=y
96CONFIG_GART_IOMMU=y 98CONFIG_GART_IOMMU=y
97CONFIG_SWIOTLB=y 99CONFIG_SWIOTLB=y
98CONFIG_X86_MCE=y 100CONFIG_X86_MCE=y
@@ -100,6 +102,7 @@ CONFIG_X86_MCE_INTEL=y
100CONFIG_SECCOMP=y 102CONFIG_SECCOMP=y
101CONFIG_GENERIC_HARDIRQS=y 103CONFIG_GENERIC_HARDIRQS=y
102CONFIG_GENERIC_IRQ_PROBE=y 104CONFIG_GENERIC_IRQ_PROBE=y
105CONFIG_ISA_DMA_API=y
103 106
104# 107#
105# Power management options 108# Power management options
@@ -129,7 +132,7 @@ CONFIG_ACPI_NUMA=y
129# CONFIG_ACPI_IBM is not set 132# CONFIG_ACPI_IBM is not set
130CONFIG_ACPI_TOSHIBA=y 133CONFIG_ACPI_TOSHIBA=y
131CONFIG_ACPI_BLACKLIST_YEAR=2001 134CONFIG_ACPI_BLACKLIST_YEAR=2001
132CONFIG_ACPI_DEBUG=y 135# CONFIG_ACPI_DEBUG is not set
133CONFIG_ACPI_BUS=y 136CONFIG_ACPI_BUS=y
134CONFIG_ACPI_EC=y 137CONFIG_ACPI_EC=y
135CONFIG_ACPI_POWER=y 138CONFIG_ACPI_POWER=y
@@ -141,6 +144,7 @@ CONFIG_ACPI_SYSTEM=y
141# CPU Frequency scaling 144# CPU Frequency scaling
142# 145#
143CONFIG_CPU_FREQ=y 146CONFIG_CPU_FREQ=y
147CONFIG_CPU_FREQ_TABLE=y
144# CONFIG_CPU_FREQ_DEBUG is not set 148# CONFIG_CPU_FREQ_DEBUG is not set
145CONFIG_CPU_FREQ_STAT=y 149CONFIG_CPU_FREQ_STAT=y
146# CONFIG_CPU_FREQ_STAT_DETAILS is not set 150# CONFIG_CPU_FREQ_STAT_DETAILS is not set
@@ -150,7 +154,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
150# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 154# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
151CONFIG_CPU_FREQ_GOV_USERSPACE=y 155CONFIG_CPU_FREQ_GOV_USERSPACE=y
152CONFIG_CPU_FREQ_GOV_ONDEMAND=y 156CONFIG_CPU_FREQ_GOV_ONDEMAND=y
153CONFIG_CPU_FREQ_TABLE=y
154 157
155# 158#
156# CPUFreq processor drivers 159# CPUFreq processor drivers
@@ -164,6 +167,7 @@ CONFIG_X86_ACPI_CPUFREQ=y
164# shared options 167# shared options
165# 168#
166CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y 169CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
170# CONFIG_X86_SPEEDSTEP_LIB is not set
167 171
168# 172#
169# Bus options (PCI etc.) 173# Bus options (PCI etc.)
@@ -172,9 +176,11 @@ CONFIG_PCI=y
172CONFIG_PCI_DIRECT=y 176CONFIG_PCI_DIRECT=y
173CONFIG_PCI_MMCONFIG=y 177CONFIG_PCI_MMCONFIG=y
174CONFIG_UNORDERED_IO=y 178CONFIG_UNORDERED_IO=y
179# CONFIG_PCIEPORTBUS is not set
175CONFIG_PCI_MSI=y 180CONFIG_PCI_MSI=y
176# CONFIG_PCI_LEGACY_PROC is not set 181# CONFIG_PCI_LEGACY_PROC is not set
177# CONFIG_PCI_NAMES is not set 182# CONFIG_PCI_NAMES is not set
183# CONFIG_PCI_DEBUG is not set
178 184
179# 185#
180# PCCARD (PCMCIA/CardBus) support 186# PCCARD (PCMCIA/CardBus) support
@@ -182,10 +188,6 @@ CONFIG_PCI_MSI=y
182# CONFIG_PCCARD is not set 188# CONFIG_PCCARD is not set
183 189
184# 190#
185# PC-card bridges
186#
187
188#
189# PCI Hotplug Support 191# PCI Hotplug Support
190# 192#
191# CONFIG_HOTPLUG_PCI is not set 193# CONFIG_HOTPLUG_PCI is not set
@@ -254,7 +256,7 @@ CONFIG_LBD=y
254# IO Schedulers 256# IO Schedulers
255# 257#
256CONFIG_IOSCHED_NOOP=y 258CONFIG_IOSCHED_NOOP=y
257CONFIG_IOSCHED_AS=y 259# CONFIG_IOSCHED_AS is not set
258CONFIG_IOSCHED_DEADLINE=y 260CONFIG_IOSCHED_DEADLINE=y
259CONFIG_IOSCHED_CFQ=y 261CONFIG_IOSCHED_CFQ=y
260# CONFIG_ATA_OVER_ETH is not set 262# CONFIG_ATA_OVER_ETH is not set
@@ -308,7 +310,8 @@ CONFIG_BLK_DEV_AMD74XX=y
308CONFIG_BLK_DEV_PIIX=y 310CONFIG_BLK_DEV_PIIX=y
309# CONFIG_BLK_DEV_NS87415 is not set 311# CONFIG_BLK_DEV_NS87415 is not set
310# CONFIG_BLK_DEV_PDC202XX_OLD is not set 312# CONFIG_BLK_DEV_PDC202XX_OLD is not set
311# CONFIG_BLK_DEV_PDC202XX_NEW is not set 313CONFIG_BLK_DEV_PDC202XX_NEW=y
314# CONFIG_PDC202XX_FORCE is not set
312# CONFIG_BLK_DEV_SVWKS is not set 315# CONFIG_BLK_DEV_SVWKS is not set
313# CONFIG_BLK_DEV_SIIMAGE is not set 316# CONFIG_BLK_DEV_SIIMAGE is not set
314# CONFIG_BLK_DEV_SIS5513 is not set 317# CONFIG_BLK_DEV_SIS5513 is not set
@@ -353,7 +356,7 @@ CONFIG_BLK_DEV_SD=y
353# 356#
354# SCSI low-level drivers 357# SCSI low-level drivers
355# 358#
356CONFIG_BLK_DEV_3W_XXXX_RAID=y 359# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
357# CONFIG_SCSI_3W_9XXX is not set 360# CONFIG_SCSI_3W_9XXX is not set
358# CONFIG_SCSI_ACARD is not set 361# CONFIG_SCSI_ACARD is not set
359# CONFIG_SCSI_AACRAID is not set 362# CONFIG_SCSI_AACRAID is not set
@@ -384,7 +387,6 @@ CONFIG_SCSI_SATA_VIA=y
384# CONFIG_SCSI_BUSLOGIC is not set 387# CONFIG_SCSI_BUSLOGIC is not set
385# CONFIG_SCSI_DMX3191D is not set 388# CONFIG_SCSI_DMX3191D is not set
386# CONFIG_SCSI_EATA is not set 389# CONFIG_SCSI_EATA is not set
387# CONFIG_SCSI_EATA_PIO is not set
388# CONFIG_SCSI_FUTURE_DOMAIN is not set 390# CONFIG_SCSI_FUTURE_DOMAIN is not set
389# CONFIG_SCSI_GDTH is not set 391# CONFIG_SCSI_GDTH is not set
390# CONFIG_SCSI_IPS is not set 392# CONFIG_SCSI_IPS is not set
@@ -392,7 +394,6 @@ CONFIG_SCSI_SATA_VIA=y
392# CONFIG_SCSI_INIA100 is not set 394# CONFIG_SCSI_INIA100 is not set
393# CONFIG_SCSI_SYM53C8XX_2 is not set 395# CONFIG_SCSI_SYM53C8XX_2 is not set
394# CONFIG_SCSI_IPR is not set 396# CONFIG_SCSI_IPR is not set
395# CONFIG_SCSI_QLOGIC_ISP is not set
396# CONFIG_SCSI_QLOGIC_FC is not set 397# CONFIG_SCSI_QLOGIC_FC is not set
397# CONFIG_SCSI_QLOGIC_1280 is not set 398# CONFIG_SCSI_QLOGIC_1280 is not set
398CONFIG_SCSI_QLA2XXX=y 399CONFIG_SCSI_QLA2XXX=y
@@ -401,6 +402,7 @@ CONFIG_SCSI_QLA2XXX=y
401# CONFIG_SCSI_QLA2300 is not set 402# CONFIG_SCSI_QLA2300 is not set
402# CONFIG_SCSI_QLA2322 is not set 403# CONFIG_SCSI_QLA2322 is not set
403# CONFIG_SCSI_QLA6312 is not set 404# CONFIG_SCSI_QLA6312 is not set
405# CONFIG_SCSI_LPFC is not set
404# CONFIG_SCSI_DC395x is not set 406# CONFIG_SCSI_DC395x is not set
405# CONFIG_SCSI_DC390T is not set 407# CONFIG_SCSI_DC390T is not set
406# CONFIG_SCSI_DEBUG is not set 408# CONFIG_SCSI_DEBUG is not set
@@ -437,7 +439,6 @@ CONFIG_NET=y
437# 439#
438CONFIG_PACKET=y 440CONFIG_PACKET=y
439# CONFIG_PACKET_MMAP is not set 441# CONFIG_PACKET_MMAP is not set
440# CONFIG_NETLINK_DEV is not set
441CONFIG_UNIX=y 442CONFIG_UNIX=y
442# CONFIG_NET_KEY is not set 443# CONFIG_NET_KEY is not set
443CONFIG_INET=y 444CONFIG_INET=y
@@ -502,7 +503,7 @@ CONFIG_NETDEVICES=y
502# CONFIG_DUMMY is not set 503# CONFIG_DUMMY is not set
503# CONFIG_BONDING is not set 504# CONFIG_BONDING is not set
504# CONFIG_EQUALIZER is not set 505# CONFIG_EQUALIZER is not set
505# CONFIG_TUN is not set 506CONFIG_TUN=y
506 507
507# 508#
508# ARCnet devices 509# ARCnet devices
@@ -525,8 +526,7 @@ CONFIG_MII=y
525# CONFIG_HP100 is not set 526# CONFIG_HP100 is not set
526CONFIG_NET_PCI=y 527CONFIG_NET_PCI=y
527# CONFIG_PCNET32 is not set 528# CONFIG_PCNET32 is not set
528CONFIG_AMD8111_ETH=y 529# CONFIG_AMD8111_ETH is not set
529# CONFIG_AMD8111E_NAPI is not set
530# CONFIG_ADAPTEC_STARFIRE is not set 530# CONFIG_ADAPTEC_STARFIRE is not set
531# CONFIG_B44 is not set 531# CONFIG_B44 is not set
532CONFIG_FORCEDETH=y 532CONFIG_FORCEDETH=y
@@ -536,7 +536,7 @@ CONFIG_FORCEDETH=y
536# CONFIG_FEALNX is not set 536# CONFIG_FEALNX is not set
537# CONFIG_NATSEMI is not set 537# CONFIG_NATSEMI is not set
538# CONFIG_NE2K_PCI is not set 538# CONFIG_NE2K_PCI is not set
539CONFIG_8139CP=m 539CONFIG_8139CP=y
540CONFIG_8139TOO=y 540CONFIG_8139TOO=y
541# CONFIG_8139TOO_PIO is not set 541# CONFIG_8139TOO_PIO is not set
542# CONFIG_8139TOO_TUNE_TWISTER is not set 542# CONFIG_8139TOO_TUNE_TWISTER is not set
@@ -671,6 +671,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4
671# 671#
672CONFIG_SERIAL_CORE=y 672CONFIG_SERIAL_CORE=y
673CONFIG_SERIAL_CORE_CONSOLE=y 673CONFIG_SERIAL_CORE_CONSOLE=y
674# CONFIG_SERIAL_JSM is not set
674CONFIG_UNIX98_PTYS=y 675CONFIG_UNIX98_PTYS=y
675CONFIG_LEGACY_PTYS=y 676CONFIG_LEGACY_PTYS=y
676CONFIG_LEGACY_PTY_COUNT=256 677CONFIG_LEGACY_PTY_COUNT=256
@@ -696,6 +697,7 @@ CONFIG_RTC=y
696# 697#
697CONFIG_AGP=y 698CONFIG_AGP=y
698CONFIG_AGP_AMD64=y 699CONFIG_AGP_AMD64=y
700CONFIG_AGP_INTEL=y
699# CONFIG_DRM is not set 701# CONFIG_DRM is not set
700# CONFIG_MWAVE is not set 702# CONFIG_MWAVE is not set
701CONFIG_RAW_DRIVER=y 703CONFIG_RAW_DRIVER=y
@@ -703,7 +705,7 @@ CONFIG_HPET=y
703# CONFIG_HPET_RTC_IRQ is not set 705# CONFIG_HPET_RTC_IRQ is not set
704CONFIG_HPET_MMAP=y 706CONFIG_HPET_MMAP=y
705CONFIG_MAX_RAW_DEVS=256 707CONFIG_MAX_RAW_DEVS=256
706CONFIG_HANGCHECK_TIMER=y 708# CONFIG_HANGCHECK_TIMER is not set
707 709
708# 710#
709# TPM devices 711# TPM devices
@@ -786,6 +788,8 @@ CONFIG_SOUND_ICH=y
786# 788#
787# USB support 789# USB support
788# 790#
791CONFIG_USB_ARCH_HAS_HCD=y
792CONFIG_USB_ARCH_HAS_OHCI=y
789CONFIG_USB=y 793CONFIG_USB=y
790# CONFIG_USB_DEBUG is not set 794# CONFIG_USB_DEBUG is not set
791 795
@@ -797,8 +801,6 @@ CONFIG_USB_DEVICEFS=y
797# CONFIG_USB_DYNAMIC_MINORS is not set 801# CONFIG_USB_DYNAMIC_MINORS is not set
798# CONFIG_USB_SUSPEND is not set 802# CONFIG_USB_SUSPEND is not set
799# CONFIG_USB_OTG is not set 803# CONFIG_USB_OTG is not set
800CONFIG_USB_ARCH_HAS_HCD=y
801CONFIG_USB_ARCH_HAS_OHCI=y
802 804
803# 805#
804# USB Host Controller Drivers 806# USB Host Controller Drivers
@@ -826,7 +828,6 @@ CONFIG_USB_PRINTER=y
826# 828#
827CONFIG_USB_STORAGE=y 829CONFIG_USB_STORAGE=y
828# CONFIG_USB_STORAGE_DEBUG is not set 830# CONFIG_USB_STORAGE_DEBUG is not set
829# CONFIG_USB_STORAGE_RW_DETECT is not set
830# CONFIG_USB_STORAGE_DATAFAB is not set 831# CONFIG_USB_STORAGE_DATAFAB is not set
831# CONFIG_USB_STORAGE_FREECOM is not set 832# CONFIG_USB_STORAGE_FREECOM is not set
832# CONFIG_USB_STORAGE_ISD200 is not set 833# CONFIG_USB_STORAGE_ISD200 is not set
@@ -965,7 +966,7 @@ CONFIG_AUTOFS_FS=y
965# CD-ROM/DVD Filesystems 966# CD-ROM/DVD Filesystems
966# 967#
967CONFIG_ISO9660_FS=y 968CONFIG_ISO9660_FS=y
968# CONFIG_JOLIET is not set 969CONFIG_JOLIET=y
969# CONFIG_ZISOFS is not set 970# CONFIG_ZISOFS is not set
970# CONFIG_UDF_FS is not set 971# CONFIG_UDF_FS is not set
971 972
@@ -1092,9 +1093,10 @@ CONFIG_OPROFILE=y
1092# 1093#
1093# Kernel hacking 1094# Kernel hacking
1094# 1095#
1096# CONFIG_PRINTK_TIME is not set
1095CONFIG_DEBUG_KERNEL=y 1097CONFIG_DEBUG_KERNEL=y
1096CONFIG_MAGIC_SYSRQ=y 1098CONFIG_MAGIC_SYSRQ=y
1097# CONFIG_PRINTK_TIME is not set 1099CONFIG_LOG_BUF_SHIFT=18
1098# CONFIG_SCHEDSTATS is not set 1100# CONFIG_SCHEDSTATS is not set
1099# CONFIG_DEBUG_SLAB is not set 1101# CONFIG_DEBUG_SLAB is not set
1100# CONFIG_DEBUG_SPINLOCK is not set 1102# CONFIG_DEBUG_SPINLOCK is not set
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 0a3318e08ab6..5ca4a4598fda 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
29obj-$(CONFIG_SWIOTLB) += swiotlb.o 29obj-$(CONFIG_SWIOTLB) += swiotlb.o
30obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
31obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
31 32
32obj-$(CONFIG_MODULES) += module.o 33obj-$(CONFIG_MODULES) += module.o
33 34
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 7e13545748e0..f8e6cc4fecd4 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -33,6 +33,7 @@
33#include <asm/mpspec.h> 33#include <asm/mpspec.h>
34#include <asm/pgalloc.h> 34#include <asm/pgalloc.h>
35#include <asm/mach_apic.h> 35#include <asm/mach_apic.h>
36#include <asm/nmi.h>
36 37
37int apic_verbosity; 38int apic_verbosity;
38 39
@@ -925,7 +926,7 @@ __init int oem_force_hpet_timer(void)
925 unsigned id; 926 unsigned id;
926 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); 927 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
927 928
928 bitmap_empty(clustermap, NUM_APIC_CLUSTERS); 929 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
929 930
930 for (i = 0; i < NR_CPUS; i++) { 931 for (i = 0; i < NR_CPUS; i++) {
931 id = bios_cpu_apicid[i]; 932 id = bios_cpu_apicid[i];
@@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void)
1056 nr_ioapics = 0; 1057 nr_ioapics = 0;
1057#endif 1058#endif
1058 setup_boot_APIC_clock(); 1059 setup_boot_APIC_clock();
1059 1060 check_nmi_watchdog();
1060 return 0; 1061 return 0;
1061} 1062}
1062 1063
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 1086b5fcac21..28817490fdc6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -220,13 +220,18 @@ sysret_careful:
220 jmp sysret_check 220 jmp sysret_check
221 221
222 /* Handle a signal */ 222 /* Handle a signal */
223 /* edx: work flags (arg3) */
224sysret_signal: 223sysret_signal:
225 sti 224 sti
225 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
226 jz 1f
227
228 /* Really a signal */
229 /* edx: work flags (arg3) */
226 leaq do_notify_resume(%rip),%rax 230 leaq do_notify_resume(%rip),%rax
227 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 231 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
228 xorl %esi,%esi # oldset -> arg2 232 xorl %esi,%esi # oldset -> arg2
229 call ptregscall_common 233 call ptregscall_common
2341: movl $_TIF_NEED_RESCHED,%edi
230 jmp sysret_check 235 jmp sysret_check
231 236
232 /* Do syscall tracing */ 237 /* Do syscall tracing */
@@ -484,6 +489,8 @@ retint_careful:
484 jmp retint_check 489 jmp retint_check
485 490
486retint_signal: 491retint_signal:
492 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
493 jz retint_swapgs
487 sti 494 sti
488 SAVE_REST 495 SAVE_REST
489 movq $-1,ORIG_RAX(%rsp) 496 movq $-1,ORIG_RAX(%rsp)
@@ -492,8 +499,8 @@ retint_signal:
492 call do_notify_resume 499 call do_notify_resume
493 RESTORE_REST 500 RESTORE_REST
494 cli 501 cli
502 movl $_TIF_NEED_RESCHED,%edi
495 GET_THREAD_INFO(%rcx) 503 GET_THREAD_INFO(%rcx)
496 movl $_TIF_WORK_MASK,%edi
497 jmp retint_check 504 jmp retint_check
498 505
499#ifdef CONFIG_PREEMPT 506#ifdef CONFIG_PREEMPT
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 60be58617eb9..ac7684324954 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1804,76 +1804,6 @@ device_initcall(ioapic_init_sysfs);
1804 1804
1805#define IO_APIC_MAX_ID 0xFE 1805#define IO_APIC_MAX_ID 0xFE
1806 1806
1807int __init io_apic_get_unique_id (int ioapic, int apic_id)
1808{
1809 union IO_APIC_reg_00 reg_00;
1810 static physid_mask_t apic_id_map;
1811 unsigned long flags;
1812 int i = 0;
1813
1814 /*
1815 * The P4 platform supports up to 256 APIC IDs on two separate APIC
1816 * buses (one for LAPICs, one for IOAPICs), where predecessors only
1817 * supports up to 16 on one shared APIC bus.
1818 *
1819 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
1820 * advantage of new APIC bus architecture.
1821 */
1822
1823 if (physids_empty(apic_id_map))
1824 apic_id_map = phys_cpu_present_map;
1825
1826 spin_lock_irqsave(&ioapic_lock, flags);
1827 reg_00.raw = io_apic_read(ioapic, 0);
1828 spin_unlock_irqrestore(&ioapic_lock, flags);
1829
1830 if (apic_id >= IO_APIC_MAX_ID) {
1831 apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
1832 "%d\n", ioapic, apic_id, reg_00.bits.ID);
1833 apic_id = reg_00.bits.ID;
1834 }
1835
1836 /*
1837 * Every APIC in a system must have a unique ID or we get lots of nice
1838 * 'stuck on smp_invalidate_needed IPI wait' messages.
1839 */
1840 if (physid_isset(apic_id, apic_id_map)) {
1841
1842 for (i = 0; i < IO_APIC_MAX_ID; i++) {
1843 if (!physid_isset(i, apic_id_map))
1844 break;
1845 }
1846
1847 if (i == IO_APIC_MAX_ID)
1848 panic("Max apic_id exceeded!\n");
1849
1850 apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
1851 "trying %d\n", ioapic, apic_id, i);
1852
1853 apic_id = i;
1854 }
1855
1856 physid_set(apic_id, apic_id_map);
1857
1858 if (reg_00.bits.ID != apic_id) {
1859 reg_00.bits.ID = apic_id;
1860
1861 spin_lock_irqsave(&ioapic_lock, flags);
1862 io_apic_write(ioapic, 0, reg_00.raw);
1863 reg_00.raw = io_apic_read(ioapic, 0);
1864 spin_unlock_irqrestore(&ioapic_lock, flags);
1865
1866 /* Sanity check */
1867 if (reg_00.bits.ID != apic_id)
1868 panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
1869 }
1870
1871 apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
1872
1873 return apic_id;
1874}
1875
1876
1877int __init io_apic_get_version (int ioapic) 1807int __init io_apic_get_version (int ioapic)
1878{ 1808{
1879 union IO_APIC_reg_01 reg_01; 1809 union IO_APIC_reg_01 reg_01;
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 7ec031c6ca10..f86d9db94bfc 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -107,6 +107,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
107static void __init MP_processor_info (struct mpc_config_processor *m) 107static void __init MP_processor_info (struct mpc_config_processor *m)
108{ 108{
109 int ver; 109 int ver;
110 static int found_bsp=0;
110 111
111 if (!(m->mpc_cpuflag & CPU_ENABLED)) 112 if (!(m->mpc_cpuflag & CPU_ENABLED))
112 return; 113 return;
@@ -126,11 +127,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
126 " Processor ignored.\n", NR_CPUS); 127 " Processor ignored.\n", NR_CPUS);
127 return; 128 return;
128 } 129 }
129 if (num_processors >= maxcpus) {
130 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
131 " Processor ignored.\n", maxcpus);
132 return;
133 }
134 130
135 num_processors++; 131 num_processors++;
136 132
@@ -150,7 +146,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
150 ver = 0x10; 146 ver = 0x10;
151 } 147 }
152 apic_version[m->mpc_apicid] = ver; 148 apic_version[m->mpc_apicid] = ver;
153 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; 149 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
150 /*
151 * bios_cpu_apicid is required to have processors listed
152 * in same order as logical cpu numbers. Hence the first
153 * entry is BSP, and so on.
154 */
155 bios_cpu_apicid[0] = m->mpc_apicid;
156 x86_cpu_to_apicid[0] = m->mpc_apicid;
157 found_bsp = 1;
158 } else {
159 bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
160 x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
161 }
154} 162}
155 163
156static void __init MP_bus_info (struct mpc_config_bus *m) 164static void __init MP_bus_info (struct mpc_config_bus *m)
@@ -759,7 +767,7 @@ void __init mp_register_ioapic (
759 mp_ioapics[idx].mpc_apicaddr = address; 767 mp_ioapics[idx].mpc_apicaddr = address;
760 768
761 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 769 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
762 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); 770 mp_ioapics[idx].mpc_apicid = id;
763 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 771 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
764 772
765 /* 773 /*
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 61de0b34a01e..31c0f2e6ac91 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -33,6 +33,7 @@
33#include <asm/msr.h> 33#include <asm/msr.h>
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/kdebug.h> 35#include <asm/kdebug.h>
36#include <asm/local.h>
36 37
37/* 38/*
38 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -59,7 +60,8 @@ int panic_on_timeout;
59 60
60unsigned int nmi_watchdog = NMI_DEFAULT; 61unsigned int nmi_watchdog = NMI_DEFAULT;
61static unsigned int nmi_hz = HZ; 62static unsigned int nmi_hz = HZ;
62unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 63static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
64static unsigned int nmi_p4_cccr_val;
63 65
64/* Note that these events don't tick when the CPU idles. This means 66/* Note that these events don't tick when the CPU idles. This means
65 the frequency varies with CPU load. */ 67 the frequency varies with CPU load. */
@@ -71,61 +73,87 @@ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
71#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 73#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
72#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 74#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
73 75
74#define P6_EVNTSEL0_ENABLE (1 << 22) 76#define MSR_P4_MISC_ENABLE 0x1A0
75#define P6_EVNTSEL_INT (1 << 20) 77#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
76#define P6_EVNTSEL_OS (1 << 17) 78#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
77#define P6_EVNTSEL_USR (1 << 16) 79#define MSR_P4_PERFCTR0 0x300
78#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 80#define MSR_P4_CCCR0 0x360
79#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 81#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
82#define P4_ESCR_OS (1<<3)
83#define P4_ESCR_USR (1<<2)
84#define P4_CCCR_OVF_PMI0 (1<<26)
85#define P4_CCCR_OVF_PMI1 (1<<27)
86#define P4_CCCR_THRESHOLD(N) ((N)<<20)
87#define P4_CCCR_COMPLEMENT (1<<19)
88#define P4_CCCR_COMPARE (1<<18)
89#define P4_CCCR_REQUIRED (3<<16)
90#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
91#define P4_CCCR_ENABLE (1<<12)
92/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
93 CRU_ESCR0 (with any non-null event selector) through a complemented
94 max threshold. [IA32-Vol3, Section 14.9.9] */
95#define MSR_P4_IQ_COUNTER0 0x30C
96#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
97#define P4_NMI_IQ_CCCR0 \
98 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
99 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
100
101static __init inline int nmi_known_cpu(void)
102{
103 switch (boot_cpu_data.x86_vendor) {
104 case X86_VENDOR_AMD:
105 return boot_cpu_data.x86 == 15;
106 case X86_VENDOR_INTEL:
107 return boot_cpu_data.x86 == 15;
108 }
109 return 0;
110}
80 111
81/* Run after command line and cpu_init init, but before all other checks */ 112/* Run after command line and cpu_init init, but before all other checks */
82void __init nmi_watchdog_default(void) 113void __init nmi_watchdog_default(void)
83{ 114{
84 if (nmi_watchdog != NMI_DEFAULT) 115 if (nmi_watchdog != NMI_DEFAULT)
85 return; 116 return;
86 117 if (nmi_known_cpu())
87 /* For some reason the IO APIC watchdog doesn't work on the AMD 118 nmi_watchdog = NMI_LOCAL_APIC;
88 8111 chipset. For now switch to local APIC mode using 119 else
89 perfctr0 there. On Intel CPUs we don't have code to handle
90 the perfctr and the IO-APIC seems to work, so use that. */
91
92 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
93 nmi_watchdog = NMI_LOCAL_APIC;
94 printk(KERN_INFO
95 "Using local APIC NMI watchdog using perfctr0\n");
96 } else {
97 printk(KERN_INFO "Using IO APIC NMI watchdog\n");
98 nmi_watchdog = NMI_IO_APIC; 120 nmi_watchdog = NMI_IO_APIC;
99 }
100} 121}
101 122
102/* Why is there no CPUID flag for this? */ 123#ifdef CONFIG_SMP
103static __init int cpu_has_lapic(void) 124/* The performance counters used by NMI_LOCAL_APIC don't trigger when
125 * the CPU is idle. To make sure the NMI watchdog really ticks on all
126 * CPUs during the test make them busy.
127 */
128static __init void nmi_cpu_busy(void *data)
104{ 129{
105 switch (boot_cpu_data.x86_vendor) { 130 volatile int *endflag = data;
106 case X86_VENDOR_INTEL: 131 local_irq_enable();
107 case X86_VENDOR_AMD: 132 /* Intentionally don't use cpu_relax here. This is
108 return boot_cpu_data.x86 >= 6; 133 to make sure that the performance counter really ticks,
109 /* .... add more cpus here or find a different way to figure this out. */ 134 even if there is a simulator or similar that catches the
110 default: 135 pause instruction. On a real HT machine this is fine because
111 return 0; 136 all other CPUs are busy with "useless" delay loops and don't
112 } 137 care if they get somewhat less cycles. */
138 while (*endflag == 0)
139 barrier();
113} 140}
141#endif
114 142
115static int __init check_nmi_watchdog (void) 143int __init check_nmi_watchdog (void)
116{ 144{
117 int counts[NR_CPUS]; 145 volatile int endflag = 0;
146 int *counts;
118 int cpu; 147 int cpu;
119 148
120 if (nmi_watchdog == NMI_NONE) 149 counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
121 return 0; 150 if (!counts)
151 return -1;
122 152
123 if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { 153 printk(KERN_INFO "testing NMI watchdog ... ");
124 nmi_watchdog = NMI_NONE;
125 return -1;
126 }
127 154
128 printk(KERN_INFO "Testing NMI watchdog ... "); 155 if (nmi_watchdog == NMI_LOCAL_APIC)
156 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
129 157
130 for (cpu = 0; cpu < NR_CPUS; cpu++) 158 for (cpu = 0; cpu < NR_CPUS; cpu++)
131 counts[cpu] = cpu_pda[cpu].__nmi_count; 159 counts[cpu] = cpu_pda[cpu].__nmi_count;
@@ -133,15 +161,22 @@ static int __init check_nmi_watchdog (void)
133 mdelay((10*1000)/nmi_hz); // wait 10 ticks 161 mdelay((10*1000)/nmi_hz); // wait 10 ticks
134 162
135 for (cpu = 0; cpu < NR_CPUS; cpu++) { 163 for (cpu = 0; cpu < NR_CPUS; cpu++) {
164 if (!cpu_online(cpu))
165 continue;
136 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { 166 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
137 printk("CPU#%d: NMI appears to be stuck (%d)!\n", 167 endflag = 1;
168 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
138 cpu, 169 cpu,
170 counts[cpu],
139 cpu_pda[cpu].__nmi_count); 171 cpu_pda[cpu].__nmi_count);
140 nmi_active = 0; 172 nmi_active = 0;
141 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 173 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
174 nmi_perfctr_msr = 0;
175 kfree(counts);
142 return -1; 176 return -1;
143 } 177 }
144 } 178 }
179 endflag = 1;
145 printk("OK.\n"); 180 printk("OK.\n");
146 181
147 /* now that we know it works we can reduce NMI frequency to 182 /* now that we know it works we can reduce NMI frequency to
@@ -149,10 +184,9 @@ static int __init check_nmi_watchdog (void)
149 if (nmi_watchdog == NMI_LOCAL_APIC) 184 if (nmi_watchdog == NMI_LOCAL_APIC)
150 nmi_hz = 1; 185 nmi_hz = 1;
151 186
187 kfree(counts);
152 return 0; 188 return 0;
153} 189}
154/* Have this called later during boot so counters are updating */
155late_initcall(check_nmi_watchdog);
156 190
157int __init setup_nmi_watchdog(char *str) 191int __init setup_nmi_watchdog(char *str)
158{ 192{
@@ -170,7 +204,7 @@ int __init setup_nmi_watchdog(char *str)
170 204
171 if (nmi >= NMI_INVALID) 205 if (nmi >= NMI_INVALID)
172 return 0; 206 return 0;
173 nmi_watchdog = nmi; 207 nmi_watchdog = nmi;
174 return 1; 208 return 1;
175} 209}
176 210
@@ -185,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void)
185 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 219 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
186 break; 220 break;
187 case X86_VENDOR_INTEL: 221 case X86_VENDOR_INTEL:
188 wrmsr(MSR_IA32_EVNTSEL0, 0, 0); 222 if (boot_cpu_data.x86 == 15) {
223 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
224 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
225 }
189 break; 226 break;
190 } 227 }
191 nmi_active = -1; 228 nmi_active = -1;
@@ -253,7 +290,7 @@ void enable_timer_nmi_watchdog(void)
253 290
254static int nmi_pm_active; /* nmi_active before suspend */ 291static int nmi_pm_active; /* nmi_active before suspend */
255 292
256static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 293static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
257{ 294{
258 nmi_pm_active = nmi_active; 295 nmi_pm_active = nmi_active;
259 disable_lapic_nmi_watchdog(); 296 disable_lapic_nmi_watchdog();
@@ -300,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs);
300 * Original code written by Keith Owens. 337 * Original code written by Keith Owens.
301 */ 338 */
302 339
340static void clear_msr_range(unsigned int base, unsigned int n)
341{
342 unsigned int i;
343
344 for(i = 0; i < n; ++i)
345 wrmsr(base+i, 0, 0);
346}
347
303static void setup_k7_watchdog(void) 348static void setup_k7_watchdog(void)
304{ 349{
305 int i; 350 int i;
306 unsigned int evntsel; 351 unsigned int evntsel;
307 352
308 /* No check, so can start with slow frequency */
309 nmi_hz = 1;
310
311 /* XXX should check these in EFER */
312
313 nmi_perfctr_msr = MSR_K7_PERFCTR0; 353 nmi_perfctr_msr = MSR_K7_PERFCTR0;
314 354
315 for(i = 0; i < 4; ++i) { 355 for(i = 0; i < 4; ++i) {
316 /* Simulator may not support it */ 356 /* Simulator may not support it */
317 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) 357 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) {
358 nmi_perfctr_msr = 0;
318 return; 359 return;
360 }
319 wrmsrl(MSR_K7_PERFCTR0+i, 0UL); 361 wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
320 } 362 }
321 363
@@ -325,12 +367,54 @@ static void setup_k7_watchdog(void)
325 | K7_NMI_EVENT; 367 | K7_NMI_EVENT;
326 368
327 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 369 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
328 wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz); 370 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
329 apic_write(APIC_LVTPC, APIC_DM_NMI); 371 apic_write(APIC_LVTPC, APIC_DM_NMI);
330 evntsel |= K7_EVNTSEL_ENABLE; 372 evntsel |= K7_EVNTSEL_ENABLE;
331 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 373 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
332} 374}
333 375
376
377static int setup_p4_watchdog(void)
378{
379 unsigned int misc_enable, dummy;
380
381 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
382 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
383 return 0;
384
385 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
386 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
387#ifdef CONFIG_SMP
388 if (smp_num_siblings == 2)
389 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
390#endif
391
392 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
393 clear_msr_range(0x3F1, 2);
394 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
395 docs doesn't fully define it, so leave it alone for now. */
396 if (boot_cpu_data.x86_model >= 0x3) {
397 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
398 clear_msr_range(0x3A0, 26);
399 clear_msr_range(0x3BC, 3);
400 } else {
401 clear_msr_range(0x3A0, 31);
402 }
403 clear_msr_range(0x3C0, 6);
404 clear_msr_range(0x3C8, 6);
405 clear_msr_range(0x3E0, 2);
406 clear_msr_range(MSR_P4_CCCR0, 18);
407 clear_msr_range(MSR_P4_PERFCTR0, 18);
408
409 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
410 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
411 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
412 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
413 apic_write(APIC_LVTPC, APIC_DM_NMI);
414 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
415 return 1;
416}
417
334void setup_apic_nmi_watchdog(void) 418void setup_apic_nmi_watchdog(void)
335{ 419{
336 switch (boot_cpu_data.x86_vendor) { 420 switch (boot_cpu_data.x86_vendor) {
@@ -341,6 +425,13 @@ void setup_apic_nmi_watchdog(void)
341 return; 425 return;
342 setup_k7_watchdog(); 426 setup_k7_watchdog();
343 break; 427 break;
428 case X86_VENDOR_INTEL:
429 if (boot_cpu_data.x86 != 15)
430 return;
431 if (!setup_p4_watchdog())
432 return;
433 break;
434
344 default: 435 default:
345 return; 436 return;
346 } 437 }
@@ -355,56 +446,67 @@ void setup_apic_nmi_watchdog(void)
355 * 446 *
356 * as these watchdog NMI IRQs are generated on every CPU, we only 447 * as these watchdog NMI IRQs are generated on every CPU, we only
357 * have to check the current processor. 448 * have to check the current processor.
358 *
359 * since NMIs don't listen to _any_ locks, we have to be extremely
360 * careful not to rely on unsafe variables. The printk might lock
361 * up though, so we have to break up any console locks first ...
362 * [when there will be more tty-related locks, break them up
363 * here too!]
364 */ 449 */
365 450
366static unsigned int 451static DEFINE_PER_CPU(unsigned, last_irq_sum);
367 last_irq_sums [NR_CPUS], 452static DEFINE_PER_CPU(local_t, alert_counter);
368 alert_counter [NR_CPUS]; 453static DEFINE_PER_CPU(int, nmi_touch);
369 454
370void touch_nmi_watchdog (void) 455void touch_nmi_watchdog (void)
371{ 456{
372 int i; 457 int i;
373 458
374 /* 459 /*
375 * Just reset the alert counters, (other CPUs might be 460 * Tell other CPUs to reset their alert counters. We cannot
376 * spinning on locks we hold): 461 * do it ourselves because the alert count increase is not
462 * atomic.
377 */ 463 */
378 for (i = 0; i < NR_CPUS; i++) 464 for (i = 0; i < NR_CPUS; i++)
379 alert_counter[i] = 0; 465 per_cpu(nmi_touch, i) = 1;
380} 466}
381 467
382void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) 468void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
383{ 469{
384 int sum, cpu; 470 int sum;
471 int touched = 0;
385 472
386 cpu = safe_smp_processor_id();
387 sum = read_pda(apic_timer_irqs); 473 sum = read_pda(apic_timer_irqs);
388 if (last_irq_sums[cpu] == sum) { 474 if (__get_cpu_var(nmi_touch)) {
475 __get_cpu_var(nmi_touch) = 0;
476 touched = 1;
477 }
478 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
389 /* 479 /*
390 * Ayiee, looks like this CPU is stuck ... 480 * Ayiee, looks like this CPU is stuck ...
391 * wait a few IRQs (5 seconds) before doing the oops ... 481 * wait a few IRQs (5 seconds) before doing the oops ...
392 */ 482 */
393 alert_counter[cpu]++; 483 local_inc(&__get_cpu_var(alert_counter));
394 if (alert_counter[cpu] == 5*nmi_hz) { 484 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) {
395 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 485 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
396 == NOTIFY_STOP) { 486 == NOTIFY_STOP) {
397 alert_counter[cpu] = 0; 487 local_set(&__get_cpu_var(alert_counter), 0);
398 return; 488 return;
399 } 489 }
400 die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); 490 die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
401 } 491 }
402 } else { 492 } else {
403 last_irq_sums[cpu] = sum; 493 __get_cpu_var(last_irq_sum) = sum;
404 alert_counter[cpu] = 0; 494 local_set(&__get_cpu_var(alert_counter), 0);
405 } 495 }
406 if (nmi_perfctr_msr) 496 if (nmi_perfctr_msr) {
497 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
498 /*
499 * P4 quirks:
500 * - An overflown perfctr will assert its interrupt
501 * until the OVF flag in its CCCR is cleared.
502 * - LVTPC is masked on interrupt and must be
503 * unmasked by the LVTPC handler.
504 */
505 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
506 apic_write(APIC_LVTPC, APIC_DM_NMI);
507 }
407 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); 508 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
509 }
408} 510}
409 511
410static int dummy_nmi_callback(struct pt_regs * regs, int cpu) 512static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
new file mode 100644
index 000000000000..feb5f108dd26
--- /dev/null
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -0,0 +1,101 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <asm/io.h>
23#include <asm/proto.h>
24#include <asm/msr.h>
25#include <asm/vsyscall.h>
26
27/* The I/O port the PMTMR resides at.
28 * The location is detected during setup_arch(),
29 * in arch/i386/kernel/acpi/boot.c */
30u32 pmtmr_ioport;
31
32/* value of the Power timer at last timer interrupt */
33static u32 offset_delay;
34static u32 last_pmtmr_tick;
35
36#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
37
38static inline u32 cyc2us(u32 cycles)
39{
40 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
41 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
42 *
43 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
44 * easily be multiplied with 286 (=0x11E) without having to fear
45 * u32 overflows.
46 */
47 cycles *= 286;
48 return (cycles >> 10);
49}
50
51int pmtimer_mark_offset(void)
52{
53 static int first_run = 1;
54 unsigned long tsc;
55 u32 lost;
56
57 u32 tick = inl(pmtmr_ioport);
58 u32 delta;
59
60 delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
61
62 last_pmtmr_tick = tick;
63 monotonic_base += delta * NSEC_PER_USEC;
64
65 delta += offset_delay;
66
67 lost = delta / (USEC_PER_SEC / HZ);
68 offset_delay = delta % (USEC_PER_SEC / HZ);
69
70 rdtscll(tsc);
71 vxtime.last_tsc = tsc - offset_delay * cpu_khz;
72
73 /* don't calculate delay for first run,
74 or if we've got less then a tick */
75 if (first_run || (lost < 1)) {
76 first_run = 0;
77 offset_delay = 0;
78 }
79
80 return lost - 1;
81}
82
83unsigned int do_gettimeoffset_pm(void)
84{
85 u32 now, offset, delta = 0;
86
87 offset = last_pmtmr_tick;
88 now = inl(pmtmr_ioport);
89 delta = (now - offset) & ACPI_PM_MASK;
90
91 return offset_delay + cyc2us(delta);
92}
93
94
95static int __init nopmtimer_setup(char *s)
96{
97 pmtmr_ioport = 0;
98 return 0;
99}
100
101__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index e26e86bb56fe..60dc9b98951d 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -257,13 +257,13 @@ static int putreg(struct task_struct *child,
257 value &= 0xffff; 257 value &= 0xffff;
258 return 0; 258 return 0;
259 case offsetof(struct user_regs_struct,fs_base): 259 case offsetof(struct user_regs_struct,fs_base):
260 if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) 260 if (value >= TASK_SIZE)
261 return -EIO; 261 return -EIO;
262 child->thread.fs = value; 262 child->thread.fs = value;
263 return 0; 263 return 0;
264 case offsetof(struct user_regs_struct,gs_base): 264 case offsetof(struct user_regs_struct,gs_base):
265 if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) 265 if (value >= TASK_SIZE)
266 return -EIO; 266 return -EIO;
267 child->thread.gs = value; 267 child->thread.gs = value;
268 return 0; 268 return 0;
269 case offsetof(struct user_regs_struct, eflags): 269 case offsetof(struct user_regs_struct, eflags):
@@ -277,6 +277,11 @@ static int putreg(struct task_struct *child,
277 return -EIO; 277 return -EIO;
278 value &= 0xffff; 278 value &= 0xffff;
279 break; 279 break;
280 case offsetof(struct user_regs_struct, rip):
281 /* Check if the new RIP address is canonical */
282 if (value >= TASK_SIZE)
283 return -EIO;
284 break;
280 } 285 }
281 put_stack_long(child, regno - sizeof(struct pt_regs), value); 286 put_stack_long(child, regno - sizeof(struct pt_regs), value);
282 return 0; 287 return 0;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 2129cf9ba6b2..b9fd0252c279 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -727,11 +727,12 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
727static void __init amd_detect_cmp(struct cpuinfo_x86 *c) 727static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
728{ 728{
729#ifdef CONFIG_SMP 729#ifdef CONFIG_SMP
730 int cpu = c->x86_apicid; 730 int cpu = smp_processor_id();
731 int node = 0; 731 int node = 0;
732 if (c->x86_num_cores == 1) 732 if (c->x86_num_cores == 1)
733 return; 733 return;
734 cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); 734 /* Fix up the APIC ID following the AMD specification. */
735 cpu_core_id[cpu] >>= hweight32(c->x86_num_cores - 1);
735 736
736#ifdef CONFIG_NUMA 737#ifdef CONFIG_NUMA
737 /* When an ACPI SRAT table is available use the mappings from SRAT 738 /* When an ACPI SRAT table is available use the mappings from SRAT
@@ -745,6 +746,9 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
745 node = cpu_to_node[cpu]; 746 node = cpu_to_node[cpu];
746 } 747 }
747#endif 748#endif
749 /* For now: - better than BAD_APIC_ID at least*/
750 phys_proc_id[cpu] = cpu_core_id[cpu];
751
748 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 752 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
749 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 753 cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
750#endif 754#endif
@@ -925,7 +929,6 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
925 c->x86_clflush_size = 64; 929 c->x86_clflush_size = 64;
926 c->x86_cache_alignment = c->x86_clflush_size; 930 c->x86_cache_alignment = c->x86_clflush_size;
927 c->x86_num_cores = 1; 931 c->x86_num_cores = 1;
928 c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
929 c->extended_cpuid_level = 0; 932 c->extended_cpuid_level = 0;
930 memset(&c->x86_capability, 0, sizeof c->x86_capability); 933 memset(&c->x86_capability, 0, sizeof c->x86_capability);
931 934
@@ -954,11 +957,15 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
954 } 957 }
955 if (c->x86_capability[0] & (1<<19)) 958 if (c->x86_capability[0] & (1<<19))
956 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 959 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
957 c->x86_apicid = misc >> 24;
958 } else { 960 } else {
959 /* Have CPUID level 0 only - unheard of */ 961 /* Have CPUID level 0 only - unheard of */
960 c->x86 = 4; 962 c->x86 = 4;
961 } 963 }
964
965#ifdef CONFIG_SMP
966 phys_proc_id[smp_processor_id()] =
967 cpu_core_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
968#endif
962} 969}
963 970
964/* 971/*
@@ -1088,7 +1095,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1088 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1095 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1089 1096
1090 /* Other (Linux-defined) */ 1097 /* Other (Linux-defined) */
1091 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", "k8c+", 1098 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1092 "constant_tsc", NULL, NULL, 1099 "constant_tsc", NULL, NULL,
1093 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1100 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1094 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1101 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 73f7e8b9543a..5abdee1e16a5 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -56,6 +56,7 @@
56#include <asm/kdebug.h> 56#include <asm/kdebug.h>
57#include <asm/tlbflush.h> 57#include <asm/tlbflush.h>
58#include <asm/proto.h> 58#include <asm/proto.h>
59#include <asm/nmi.h>
59 60
60/* Change for real CPU hotplug. Note other files need to be fixed 61/* Change for real CPU hotplug. Note other files need to be fixed
61 first too. */ 62 first too. */
@@ -125,96 +126,210 @@ static void __cpuinit smp_store_cpu_info(int id)
125 126
126 *c = boot_cpu_data; 127 *c = boot_cpu_data;
127 identify_cpu(c); 128 identify_cpu(c);
129 print_cpu_info(c);
128} 130}
129 131
130/* 132/*
131 * Synchronize TSCs of CPUs 133 * New Funky TSC sync algorithm borrowed from IA64.
134 * Main advantage is that it doesn't reset the TSCs fully and
135 * in general looks more robust and it works better than my earlier
136 * attempts. I believe it was written by David Mosberger. Some minor
137 * adjustments for x86-64 by me -AK
132 * 138 *
133 * This new algorithm is less accurate than the old "zero TSCs" 139 * Original comment reproduced below.
134 * one, but we cannot zero TSCs anymore in the new hotplug CPU 140 *
135 * model. 141 * Synchronize TSC of the current (slave) CPU with the TSC of the
142 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
143 * eliminate the possibility of unaccounted-for errors (such as
144 * getting a machine check in the middle of a calibration step). The
145 * basic idea is for the slave to ask the master what itc value it has
146 * and to read its own itc before and after the master responds. Each
147 * iteration gives us three timestamps:
148 *
149 * slave master
150 *
151 * t0 ---\
152 * ---\
153 * --->
154 * tm
155 * /---
156 * /---
157 * t1 <---
158 *
159 *
160 * The goal is to adjust the slave's TSC such that tm falls exactly
161 * half-way between t0 and t1. If we achieve this, the clocks are
162 * synchronized provided the interconnect between the slave and the
163 * master is symmetric. Even if the interconnect were asymmetric, we
164 * would still know that the synchronization error is smaller than the
165 * roundtrip latency (t0 - t1).
166 *
167 * When the interconnect is quiet and symmetric, this lets us
168 * synchronize the TSC to within one or two cycles. However, we can
169 * only *guarantee* that the synchronization is accurate to within a
170 * round-trip time, which is typically in the range of several hundred
171 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
172 * are usually almost perfectly synchronized, but we shouldn't assume
173 * that the accuracy is much better than half a micro second or so.
174 *
175 * [there are other errors like the latency of RDTSC and of the
176 * WRMSR. These can also account to hundreds of cycles. So it's
177 * probably worse. It claims 153 cycles error on a dual Opteron,
178 * but I suspect the numbers are actually somewhat worse -AK]
136 */ 179 */
137 180
138static atomic_t __cpuinitdata tsc_flag; 181#define MASTER 0
182#define SLAVE (SMP_CACHE_BYTES/8)
183
184/* Intentionally don't use cpu_relax() while TSC synchronization
185 because we don't want to go into funky power save modi or cause
186 hypervisors to schedule us away. Going to sleep would likely affect
187 latency and low latency is the primary objective here. -AK */
188#define no_cpu_relax() barrier()
189
139static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); 190static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
140static unsigned long long __cpuinitdata bp_tsc, ap_tsc; 191static volatile __cpuinitdata unsigned long go[SLAVE + 1];
192static int notscsync __cpuinitdata;
193
194#undef DEBUG_TSC_SYNC
141 195
142#define NR_LOOPS 5 196#define NUM_ROUNDS 64 /* magic value */
197#define NUM_ITERS 5 /* likewise */
143 198
144static void __cpuinit sync_tsc_bp_init(int init) 199/* Callback on boot CPU */
200static __cpuinit void sync_master(void *arg)
145{ 201{
146 if (init) 202 unsigned long flags, i;
147 _raw_spin_lock(&tsc_sync_lock); 203
148 else 204 if (smp_processor_id() != boot_cpu_id)
149 _raw_spin_unlock(&tsc_sync_lock); 205 return;
150 atomic_set(&tsc_flag, 0); 206
207 go[MASTER] = 0;
208
209 local_irq_save(flags);
210 {
211 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
212 while (!go[MASTER])
213 no_cpu_relax();
214 go[MASTER] = 0;
215 rdtscll(go[SLAVE]);
216 }
217 }
218 local_irq_restore(flags);
151} 219}
152 220
153/* 221/*
154 * Synchronize TSC on AP with BP. 222 * Return the number of cycles by which our tsc differs from the tsc
223 * on the master (time-keeper) CPU. A positive number indicates our
224 * tsc is ahead of the master, negative that it is behind.
155 */ 225 */
156static void __cpuinit __sync_tsc_ap(void) 226static inline long
227get_delta(long *rt, long *master)
157{ 228{
158 if (!cpu_has_tsc) 229 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
159 return; 230 unsigned long tcenter, t0, t1, tm;
160 Dprintk("AP %d syncing TSC\n", smp_processor_id()); 231 int i;
161 232
162 while (atomic_read(&tsc_flag) != 0) 233 for (i = 0; i < NUM_ITERS; ++i) {
163 cpu_relax(); 234 rdtscll(t0);
164 atomic_inc(&tsc_flag); 235 go[MASTER] = 1;
165 mb(); 236 while (!(tm = go[SLAVE]))
166 _raw_spin_lock(&tsc_sync_lock); 237 no_cpu_relax();
167 wrmsrl(MSR_IA32_TSC, bp_tsc); 238 go[SLAVE] = 0;
168 _raw_spin_unlock(&tsc_sync_lock); 239 rdtscll(t1);
169 rdtscll(ap_tsc); 240
170 mb(); 241 if (t1 - t0 < best_t1 - best_t0)
171 atomic_inc(&tsc_flag); 242 best_t0 = t0, best_t1 = t1, best_tm = tm;
172 mb(); 243 }
244
245 *rt = best_t1 - best_t0;
246 *master = best_tm - best_t0;
247
248 /* average best_t0 and best_t1 without overflow: */
249 tcenter = (best_t0/2 + best_t1/2);
250 if (best_t0 % 2 + best_t1 % 2 == 2)
251 ++tcenter;
252 return tcenter - best_tm;
173} 253}
174 254
175static void __cpuinit sync_tsc_ap(void) 255static __cpuinit void sync_tsc(void)
176{ 256{
177 int i; 257 int i, done = 0;
178 for (i = 0; i < NR_LOOPS; i++) 258 long delta, adj, adjust_latency = 0;
179 __sync_tsc_ap(); 259 unsigned long flags, rt, master_time_stamp, bound;
260#if DEBUG_TSC_SYNC
261 static struct syncdebug {
262 long rt; /* roundtrip time */
263 long master; /* master's timestamp */
264 long diff; /* difference between midpoint and master's timestamp */
265 long lat; /* estimate of tsc adjustment latency */
266 } t[NUM_ROUNDS] __cpuinitdata;
267#endif
268
269 go[MASTER] = 1;
270
271 smp_call_function(sync_master, NULL, 1, 0);
272
273 while (go[MASTER]) /* wait for master to be ready */
274 no_cpu_relax();
275
276 spin_lock_irqsave(&tsc_sync_lock, flags);
277 {
278 for (i = 0; i < NUM_ROUNDS; ++i) {
279 delta = get_delta(&rt, &master_time_stamp);
280 if (delta == 0) {
281 done = 1; /* let's lock on to this... */
282 bound = rt;
283 }
284
285 if (!done) {
286 unsigned long t;
287 if (i > 0) {
288 adjust_latency += -delta;
289 adj = -delta + adjust_latency/4;
290 } else
291 adj = -delta;
292
293 rdtscll(t);
294 wrmsrl(MSR_IA32_TSC, t + adj);
295 }
296#if DEBUG_TSC_SYNC
297 t[i].rt = rt;
298 t[i].master = master_time_stamp;
299 t[i].diff = delta;
300 t[i].lat = adjust_latency/4;
301#endif
302 }
303 }
304 spin_unlock_irqrestore(&tsc_sync_lock, flags);
305
306#if DEBUG_TSC_SYNC
307 for (i = 0; i < NUM_ROUNDS; ++i)
308 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
309 t[i].rt, t[i].master, t[i].diff, t[i].lat);
310#endif
311
312 printk(KERN_INFO
313 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
314 "maxerr %lu cycles)\n",
315 smp_processor_id(), boot_cpu_id, delta, rt);
180} 316}
181 317
182/* 318static void __cpuinit tsc_sync_wait(void)
183 * Synchronize TSC from BP to AP.
184 */
185static void __cpuinit __sync_tsc_bp(int cpu)
186{ 319{
187 if (!cpu_has_tsc) 320 if (notscsync || !cpu_has_tsc)
188 return; 321 return;
189 322 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
190 /* Wait for AP */ 323 boot_cpu_id);
191 while (atomic_read(&tsc_flag) == 0) 324 sync_tsc();
192 cpu_relax();
193 /* Save BPs TSC */
194 sync_core();
195 rdtscll(bp_tsc);
196 /* Don't do the sync core here to avoid too much latency. */
197 mb();
198 /* Start the AP */
199 _raw_spin_unlock(&tsc_sync_lock);
200 /* Wait for AP again */
201 while (atomic_read(&tsc_flag) < 2)
202 cpu_relax();
203 rdtscl(bp_tsc);
204 barrier();
205} 325}
206 326
207static void __cpuinit sync_tsc_bp(int cpu) 327static __init int notscsync_setup(char *s)
208{ 328{
209 int i; 329 notscsync = 1;
210 for (i = 0; i < NR_LOOPS - 1; i++) { 330 return 0;
211 __sync_tsc_bp(cpu);
212 sync_tsc_bp_init(1);
213 }
214 __sync_tsc_bp(cpu);
215 printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n",
216 cpu, ap_tsc - bp_tsc);
217} 331}
332__setup("notscsync", notscsync_setup);
218 333
219static atomic_t init_deasserted __cpuinitdata; 334static atomic_t init_deasserted __cpuinitdata;
220 335
@@ -315,11 +430,6 @@ void __cpuinit start_secondary(void)
315 cpu_init(); 430 cpu_init();
316 smp_callin(); 431 smp_callin();
317 432
318 /*
319 * Synchronize the TSC with the BP
320 */
321 sync_tsc_ap();
322
323 /* otherwise gcc will move up the smp_processor_id before the cpu_init */ 433 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
324 barrier(); 434 barrier();
325 435
@@ -334,7 +444,6 @@ void __cpuinit start_secondary(void)
334 enable_8259A_irq(0); 444 enable_8259A_irq(0);
335 } 445 }
336 446
337
338 enable_APIC_timer(); 447 enable_APIC_timer();
339 448
340 /* 449 /*
@@ -343,6 +452,11 @@ void __cpuinit start_secondary(void)
343 cpu_set(smp_processor_id(), cpu_online_map); 452 cpu_set(smp_processor_id(), cpu_online_map);
344 mb(); 453 mb();
345 454
455 /* Wait for TSC sync to not schedule things before.
456 We still process interrupts, which could see an inconsistent
457 time in that window unfortunately. */
458 tsc_sync_wait();
459
346 cpu_idle(); 460 cpu_idle();
347} 461}
348 462
@@ -531,7 +645,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
531 printk("failed fork for CPU %d\n", cpu); 645 printk("failed fork for CPU %d\n", cpu);
532 return PTR_ERR(idle); 646 return PTR_ERR(idle);
533 } 647 }
534 x86_cpu_to_apicid[cpu] = apicid;
535 648
536 cpu_pda[cpu].pcurrent = idle; 649 cpu_pda[cpu].pcurrent = idle;
537 650
@@ -600,8 +713,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
600 713
601 if (cpu_isset(cpu, cpu_callin_map)) { 714 if (cpu_isset(cpu, cpu_callin_map)) {
602 /* number CPUs logically, starting from 1 (BSP is 0) */ 715 /* number CPUs logically, starting from 1 (BSP is 0) */
603 Dprintk("OK.\n");
604 print_cpu_info(&cpu_data[cpu]);
605 Dprintk("CPU has booted.\n"); 716 Dprintk("CPU has booted.\n");
606 } else { 717 } else {
607 boot_error = 1; 718 boot_error = 1;
@@ -842,7 +953,6 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
842 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); 953 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
843 /* Or can we switch back to PIC here? */ 954 /* Or can we switch back to PIC here? */
844 } 955 }
845 x86_cpu_to_apicid[0] = boot_cpu_id;
846 956
847 /* 957 /*
848 * Now start the IO-APICs 958 * Now start the IO-APICs
@@ -889,18 +999,14 @@ int __cpuinit __cpu_up(unsigned int cpu)
889 printk("__cpu_up: bad cpu %d\n", cpu); 999 printk("__cpu_up: bad cpu %d\n", cpu);
890 return -EINVAL; 1000 return -EINVAL;
891 } 1001 }
892 sync_tsc_bp_init(1);
893 1002
894 /* Boot it! */ 1003 /* Boot it! */
895 err = do_boot_cpu(cpu, apicid); 1004 err = do_boot_cpu(cpu, apicid);
896 if (err < 0) { 1005 if (err < 0) {
897 sync_tsc_bp_init(0);
898 Dprintk("do_boot_cpu failed %d\n", err); 1006 Dprintk("do_boot_cpu failed %d\n", err);
899 return err; 1007 return err;
900 } 1008 }
901 1009
902 sync_tsc_bp(cpu);
903
904 /* Unleash the CPU! */ 1010 /* Unleash the CPU! */
905 Dprintk("waiting for cpu %d\n", cpu); 1011 Dprintk("waiting for cpu %d\n", cpu);
906 1012
@@ -923,4 +1029,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus)
923 1029
924 detect_siblings(); 1030 detect_siblings();
925 time_init_gtod(); 1031 time_init_gtod();
1032
1033 check_nmi_watchdog();
926} 1034}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 0e3b60e1ac5b..735b6767c8ed 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -26,6 +26,8 @@
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/bcd.h> 27#include <linux/bcd.h>
28#include <linux/kallsyms.h> 28#include <linux/kallsyms.h>
29#include <linux/acpi.h>
30#include <acpi/achware.h> /* for PM timer frequency */
29#include <asm/8253pit.h> 31#include <asm/8253pit.h>
30#include <asm/pgtable.h> 32#include <asm/pgtable.h>
31#include <asm/vsyscall.h> 33#include <asm/vsyscall.h>
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
396 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 398 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
397 399
398 vxtime.last = offset; 400 vxtime.last = offset;
401#ifdef CONFIG_X86_PM_TIMER
402 } else if (vxtime.mode == VXTIME_PMTMR) {
403 lost = pmtimer_mark_offset();
404#endif
399 } else { 405 } else {
400 offset = (((tsc - vxtime.last_tsc) * 406 offset = (((tsc - vxtime.last_tsc) *
401 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 407 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
@@ -898,6 +904,13 @@ void __init time_init(void)
898 hpet_period; 904 hpet_period;
899 cpu_khz = hpet_calibrate_tsc(); 905 cpu_khz = hpet_calibrate_tsc();
900 timename = "HPET"; 906 timename = "HPET";
907#ifdef CONFIG_X86_PM_TIMER
908 } else if (pmtmr_ioport) {
909 vxtime_hz = PM_TIMER_FREQUENCY;
910 timename = "PM";
911 pit_init();
912 cpu_khz = pit_calibrate_tsc();
913#endif
901 } else { 914 } else {
902 pit_init(); 915 pit_init();
903 cpu_khz = pit_calibrate_tsc(); 916 cpu_khz = pit_calibrate_tsc();
@@ -923,35 +936,50 @@ void __init time_init(void)
923} 936}
924 937
925/* 938/*
939 * Make an educated guess if the TSC is trustworthy and synchronized
940 * over all CPUs.
941 */
942static __init int unsynchronized_tsc(void)
943{
944#ifdef CONFIG_SMP
945 if (oem_force_hpet_timer())
946 return 1;
947 /* Intel systems are normally all synchronized. Exceptions
948 are handled in the OEM check above. */
949 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
950 return 0;
951 /* All in a single socket - should be synchronized */
952 if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
953 return 0;
954#endif
955 /* Assume multi socket systems are not synchronized */
956 return num_online_cpus() > 1;
957}
958
959/*
926 * Decide after all CPUs are booted what mode gettimeofday should use. 960 * Decide after all CPUs are booted what mode gettimeofday should use.
927 */ 961 */
928void __init time_init_gtod(void) 962void __init time_init_gtod(void)
929{ 963{
930 char *timetype; 964 char *timetype;
931 965
932 /* 966 if (unsynchronized_tsc())
933 * AMD systems with more than one CPU don't have fully synchronized
934 * TSCs. Always use HPET gettimeofday for these, although it is slower.
935 * Intel SMP systems usually have synchronized TSCs, so use always
936 * the TSC.
937 *
938 * Exceptions:
939 * IBM Summit2 checked by oem_force_hpet_timer().
940 * AMD dual core may also not need HPET. Check me.
941 *
942 * Can be turned off with "notsc".
943 */
944 if (num_online_cpus() > 1 &&
945 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
946 notsc = 1;
947 /* Some systems will want to disable TSC and use HPET. */
948 if (oem_force_hpet_timer())
949 notsc = 1; 967 notsc = 1;
950 if (vxtime.hpet_address && notsc) { 968 if (vxtime.hpet_address && notsc) {
951 timetype = "HPET"; 969 timetype = "HPET";
952 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; 970 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
953 vxtime.mode = VXTIME_HPET; 971 vxtime.mode = VXTIME_HPET;
954 do_gettimeoffset = do_gettimeoffset_hpet; 972 do_gettimeoffset = do_gettimeoffset_hpet;
973#ifdef CONFIG_X86_PM_TIMER
974 /* Using PM for gettimeofday is quite slow, but we have no other
975 choice because the TSC is too unreliable on some systems. */
976 } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
977 timetype = "PM";
978 do_gettimeoffset = do_gettimeoffset_pm;
979 vxtime.mode = VXTIME_PMTMR;
980 sysctl_vsyscall = 0;
981 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
982#endif
955 } else { 983 } else {
956 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; 984 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
957 vxtime.mode = VXTIME_TSC; 985 vxtime.mode = VXTIME_TSC;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b4b8dc59663a..2e5734425949 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
65 usec = (__xtime.tv_nsec / 1000) + 65 usec = (__xtime.tv_nsec / 1000) +
66 (__jiffies - __wall_jiffies) * (1000000 / HZ); 66 (__jiffies - __wall_jiffies) * (1000000 / HZ);
67 67
68 if (__vxtime.mode == VXTIME_TSC) { 68 if (__vxtime.mode != VXTIME_HPET) {
69 sync_core(); 69 sync_core();
70 rdtscll(t); 70 rdtscll(t);
71 if (t < __vxtime.last_tsc) 71 if (t < __vxtime.last_tsc)
@@ -217,8 +217,9 @@ static int __init vsyscall_init(void)
217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
219 map_vsyscall(); 219 map_vsyscall();
220 sysctl_vsyscall = 1; 220#ifdef CONFIG_SYSCTL
221 register_sysctl_table(kernel_root_table2, 0); 221 register_sysctl_table(kernel_root_table2, 0);
222#endif
222 return 0; 223 return 0;
223} 224}
224 225
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index e03309216764..5d6b2114cc9a 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -234,6 +234,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
234 234
235/* 235/*
236 * Handle a fault on the vmalloc or module mapping area 236 * Handle a fault on the vmalloc or module mapping area
237 *
238 * This assumes no large pages in there.
237 */ 239 */
238static int vmalloc_fault(unsigned long address) 240static int vmalloc_fault(unsigned long address)
239{ 241{
@@ -272,7 +274,10 @@ static int vmalloc_fault(unsigned long address)
272 if (!pte_present(*pte_ref)) 274 if (!pte_present(*pte_ref))
273 return -1; 275 return -1;
274 pte = pte_offset_kernel(pmd, address); 276 pte = pte_offset_kernel(pmd, address);
275 if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref)) 277 /* Don't use pte_page here, because the mappings can point
278 outside mem_map, and the NUMA hash lookup cannot handle
279 that. */
280 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
276 BUG(); 281 BUG();
277 __flush_tlb_all(); 282 __flush_tlb_all();
278 return 0; 283 return 0;
@@ -346,7 +351,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
346 * protection error (error_code & 1) == 0. 351 * protection error (error_code & 1) == 0.
347 */ 352 */
348 if (unlikely(address >= TASK_SIZE)) { 353 if (unlikely(address >= TASK_SIZE)) {
349 if (!(error_code & 5)) { 354 if (!(error_code & 5) &&
355 ((address >= VMALLOC_START && address < VMALLOC_END) ||
356 (address >= MODULES_VADDR && address < MODULES_END))) {
350 if (vmalloc_fault(address) < 0) 357 if (vmalloc_fault(address) < 0)
351 goto bad_area_nosemaphore; 358 goto bad_area_nosemaphore;
352 return; 359 return;
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 74ec8554b195..c6fb0cb69992 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -272,7 +272,7 @@ void iounmap(volatile void __iomem *addr)
272 if ((p->flags >> 20) && 272 if ((p->flags >> 20) &&
273 p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) { 273 p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
274 /* p->size includes the guard page, but cpa doesn't like that */ 274 /* p->size includes the guard page, but cpa doesn't like that */
275 change_page_attr(virt_to_page(__va(p->phys_addr)), 275 change_page_attr_addr((unsigned long)__va(p->phys_addr),
276 p->size >> PAGE_SHIFT, 276 p->size >> PAGE_SHIFT,
277 PAGE_KERNEL); 277 PAGE_KERNEL);
278 global_flush_tlb(); 278 global_flush_tlb();