aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/apic.c5
-rw-r--r--arch/x86_64/kernel/entry.S11
-rw-r--r--arch/x86_64/kernel/io_apic.c81
-rw-r--r--arch/x86_64/kernel/kprobes.c7
-rw-r--r--arch/x86_64/kernel/module.c4
-rw-r--r--arch/x86_64/kernel/mpparse.c22
-rw-r--r--arch/x86_64/kernel/nmi.c248
-rw-r--r--arch/x86_64/kernel/pmtimer.c101
-rw-r--r--arch/x86_64/kernel/ptrace.c17
-rw-r--r--arch/x86_64/kernel/setup.c30
-rw-r--r--arch/x86_64/kernel/signal.c4
-rw-r--r--arch/x86_64/kernel/smpboot.c263
-rw-r--r--arch/x86_64/kernel/time.c62
-rw-r--r--arch/x86_64/kernel/vsyscall.c5
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c13
16 files changed, 591 insertions, 283 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 0a3318e08ab6..5ca4a4598fda 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
29obj-$(CONFIG_SWIOTLB) += swiotlb.o 29obj-$(CONFIG_SWIOTLB) += swiotlb.o
30obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
31obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
31 32
32obj-$(CONFIG_MODULES) += module.o 33obj-$(CONFIG_MODULES) += module.o
33 34
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 7e13545748e0..f8e6cc4fecd4 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -33,6 +33,7 @@
33#include <asm/mpspec.h> 33#include <asm/mpspec.h>
34#include <asm/pgalloc.h> 34#include <asm/pgalloc.h>
35#include <asm/mach_apic.h> 35#include <asm/mach_apic.h>
36#include <asm/nmi.h>
36 37
37int apic_verbosity; 38int apic_verbosity;
38 39
@@ -925,7 +926,7 @@ __init int oem_force_hpet_timer(void)
925 unsigned id; 926 unsigned id;
926 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); 927 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
927 928
928 bitmap_empty(clustermap, NUM_APIC_CLUSTERS); 929 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
929 930
930 for (i = 0; i < NR_CPUS; i++) { 931 for (i = 0; i < NR_CPUS; i++) {
931 id = bios_cpu_apicid[i]; 932 id = bios_cpu_apicid[i];
@@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void)
1056 nr_ioapics = 0; 1057 nr_ioapics = 0;
1057#endif 1058#endif
1058 setup_boot_APIC_clock(); 1059 setup_boot_APIC_clock();
1059 1060 check_nmi_watchdog();
1060 return 0; 1061 return 0;
1061} 1062}
1062 1063
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 1086b5fcac21..28817490fdc6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -220,13 +220,18 @@ sysret_careful:
220 jmp sysret_check 220 jmp sysret_check
221 221
222 /* Handle a signal */ 222 /* Handle a signal */
223 /* edx: work flags (arg3) */
224sysret_signal: 223sysret_signal:
225 sti 224 sti
225 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
226 jz 1f
227
228 /* Really a signal */
229 /* edx: work flags (arg3) */
226 leaq do_notify_resume(%rip),%rax 230 leaq do_notify_resume(%rip),%rax
227 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 231 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
228 xorl %esi,%esi # oldset -> arg2 232 xorl %esi,%esi # oldset -> arg2
229 call ptregscall_common 233 call ptregscall_common
2341: movl $_TIF_NEED_RESCHED,%edi
230 jmp sysret_check 235 jmp sysret_check
231 236
232 /* Do syscall tracing */ 237 /* Do syscall tracing */
@@ -484,6 +489,8 @@ retint_careful:
484 jmp retint_check 489 jmp retint_check
485 490
486retint_signal: 491retint_signal:
492 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
493 jz retint_swapgs
487 sti 494 sti
488 SAVE_REST 495 SAVE_REST
489 movq $-1,ORIG_RAX(%rsp) 496 movq $-1,ORIG_RAX(%rsp)
@@ -492,8 +499,8 @@ retint_signal:
492 call do_notify_resume 499 call do_notify_resume
493 RESTORE_REST 500 RESTORE_REST
494 cli 501 cli
502 movl $_TIF_NEED_RESCHED,%edi
495 GET_THREAD_INFO(%rcx) 503 GET_THREAD_INFO(%rcx)
496 movl $_TIF_WORK_MASK,%edi
497 jmp retint_check 504 jmp retint_check
498 505
499#ifdef CONFIG_PREEMPT 506#ifdef CONFIG_PREEMPT
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 60be58617eb9..80e9b498c443 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -42,6 +42,8 @@
42 42
43int sis_apic_bug; /* not actually supported, dummy for compile */ 43int sis_apic_bug; /* not actually supported, dummy for compile */
44 44
45static int no_timer_check;
46
45static DEFINE_SPINLOCK(ioapic_lock); 47static DEFINE_SPINLOCK(ioapic_lock);
46 48
47/* 49/*
@@ -1601,7 +1603,7 @@ static inline void check_timer(void)
1601 * Ok, does IRQ0 through the IOAPIC work? 1603 * Ok, does IRQ0 through the IOAPIC work?
1602 */ 1604 */
1603 unmask_IO_APIC_irq(0); 1605 unmask_IO_APIC_irq(0);
1604 if (timer_irq_works()) { 1606 if (!no_timer_check && timer_irq_works()) {
1605 nmi_watchdog_default(); 1607 nmi_watchdog_default();
1606 if (nmi_watchdog == NMI_IO_APIC) { 1608 if (nmi_watchdog == NMI_IO_APIC) {
1607 disable_8259A_irq(0); 1609 disable_8259A_irq(0);
@@ -1671,6 +1673,13 @@ static inline void check_timer(void)
1671 panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); 1673 panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
1672} 1674}
1673 1675
1676static int __init notimercheck(char *s)
1677{
1678 no_timer_check = 1;
1679 return 1;
1680}
1681__setup("no_timer_check", notimercheck);
1682
1674/* 1683/*
1675 * 1684 *
1676 * IRQ's that are handled by the PIC in the MPS IOAPIC case. 1685 * IRQ's that are handled by the PIC in the MPS IOAPIC case.
@@ -1804,76 +1813,6 @@ device_initcall(ioapic_init_sysfs);
1804 1813
1805#define IO_APIC_MAX_ID 0xFE 1814#define IO_APIC_MAX_ID 0xFE
1806 1815
1807int __init io_apic_get_unique_id (int ioapic, int apic_id)
1808{
1809 union IO_APIC_reg_00 reg_00;
1810 static physid_mask_t apic_id_map;
1811 unsigned long flags;
1812 int i = 0;
1813
1814 /*
1815 * The P4 platform supports up to 256 APIC IDs on two separate APIC
1816 * buses (one for LAPICs, one for IOAPICs), where predecessors only
1817 * supports up to 16 on one shared APIC bus.
1818 *
1819 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
1820 * advantage of new APIC bus architecture.
1821 */
1822
1823 if (physids_empty(apic_id_map))
1824 apic_id_map = phys_cpu_present_map;
1825
1826 spin_lock_irqsave(&ioapic_lock, flags);
1827 reg_00.raw = io_apic_read(ioapic, 0);
1828 spin_unlock_irqrestore(&ioapic_lock, flags);
1829
1830 if (apic_id >= IO_APIC_MAX_ID) {
1831 apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
1832 "%d\n", ioapic, apic_id, reg_00.bits.ID);
1833 apic_id = reg_00.bits.ID;
1834 }
1835
1836 /*
1837 * Every APIC in a system must have a unique ID or we get lots of nice
1838 * 'stuck on smp_invalidate_needed IPI wait' messages.
1839 */
1840 if (physid_isset(apic_id, apic_id_map)) {
1841
1842 for (i = 0; i < IO_APIC_MAX_ID; i++) {
1843 if (!physid_isset(i, apic_id_map))
1844 break;
1845 }
1846
1847 if (i == IO_APIC_MAX_ID)
1848 panic("Max apic_id exceeded!\n");
1849
1850 apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
1851 "trying %d\n", ioapic, apic_id, i);
1852
1853 apic_id = i;
1854 }
1855
1856 physid_set(apic_id, apic_id_map);
1857
1858 if (reg_00.bits.ID != apic_id) {
1859 reg_00.bits.ID = apic_id;
1860
1861 spin_lock_irqsave(&ioapic_lock, flags);
1862 io_apic_write(ioapic, 0, reg_00.raw);
1863 reg_00.raw = io_apic_read(ioapic, 0);
1864 spin_unlock_irqrestore(&ioapic_lock, flags);
1865
1866 /* Sanity check */
1867 if (reg_00.bits.ID != apic_id)
1868 panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
1869 }
1870
1871 apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
1872
1873 return apic_id;
1874}
1875
1876
1877int __init io_apic_get_version (int ioapic) 1816int __init io_apic_get_version (int ioapic)
1878{ 1817{
1879 union IO_APIC_reg_01 reg_01; 1818 union IO_APIC_reg_01 reg_01;
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index 4f2a852299b6..f77f8a0ff187 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -355,6 +355,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs)
355 *tos &= ~(TF_MASK | IF_MASK); 355 *tos &= ~(TF_MASK | IF_MASK);
356 *tos |= kprobe_old_rflags; 356 *tos |= kprobe_old_rflags;
357 break; 357 break;
358 case 0xc3: /* ret/lret */
359 case 0xcb:
360 case 0xc2:
361 case 0xca:
362 regs->eflags &= ~TF_MASK;
363 /* rip is already adjusted, no more changes required*/
364 return;
358 case 0xe8: /* call relative - Fix return addr */ 365 case 0xe8: /* call relative - Fix return addr */
359 *tos = orig_rip + (*tos - copy_rip); 366 *tos = orig_rip + (*tos - copy_rip);
360 break; 367 break;
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index c2ffea8845ed..bac195c74bcc 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -30,9 +30,12 @@
30 30
31#define DEBUGP(fmt...) 31#define DEBUGP(fmt...)
32 32
33#ifndef CONFIG_UML
33void module_free(struct module *mod, void *module_region) 34void module_free(struct module *mod, void *module_region)
34{ 35{
35 vfree(module_region); 36 vfree(module_region);
37 /* FIXME: If module_region == mod->init_region, trim exception
38 table entries. */
36} 39}
37 40
38void *module_alloc(unsigned long size) 41void *module_alloc(unsigned long size)
@@ -51,6 +54,7 @@ void *module_alloc(unsigned long size)
51 54
52 return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); 55 return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
53} 56}
57#endif
54 58
55/* We don't need anything special. */ 59/* We don't need anything special. */
56int module_frob_arch_sections(Elf_Ehdr *hdr, 60int module_frob_arch_sections(Elf_Ehdr *hdr,
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 7ec031c6ca10..f86d9db94bfc 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -107,6 +107,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
107static void __init MP_processor_info (struct mpc_config_processor *m) 107static void __init MP_processor_info (struct mpc_config_processor *m)
108{ 108{
109 int ver; 109 int ver;
110 static int found_bsp=0;
110 111
111 if (!(m->mpc_cpuflag & CPU_ENABLED)) 112 if (!(m->mpc_cpuflag & CPU_ENABLED))
112 return; 113 return;
@@ -126,11 +127,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
126 " Processor ignored.\n", NR_CPUS); 127 " Processor ignored.\n", NR_CPUS);
127 return; 128 return;
128 } 129 }
129 if (num_processors >= maxcpus) {
130 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
131 " Processor ignored.\n", maxcpus);
132 return;
133 }
134 130
135 num_processors++; 131 num_processors++;
136 132
@@ -150,7 +146,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
150 ver = 0x10; 146 ver = 0x10;
151 } 147 }
152 apic_version[m->mpc_apicid] = ver; 148 apic_version[m->mpc_apicid] = ver;
153 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; 149 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
150 /*
151 * bios_cpu_apicid is required to have processors listed
152 * in same order as logical cpu numbers. Hence the first
153 * entry is BSP, and so on.
154 */
155 bios_cpu_apicid[0] = m->mpc_apicid;
156 x86_cpu_to_apicid[0] = m->mpc_apicid;
157 found_bsp = 1;
158 } else {
159 bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
160 x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
161 }
154} 162}
155 163
156static void __init MP_bus_info (struct mpc_config_bus *m) 164static void __init MP_bus_info (struct mpc_config_bus *m)
@@ -759,7 +767,7 @@ void __init mp_register_ioapic (
759 mp_ioapics[idx].mpc_apicaddr = address; 767 mp_ioapics[idx].mpc_apicaddr = address;
760 768
761 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 769 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
762 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); 770 mp_ioapics[idx].mpc_apicid = id;
763 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 771 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
764 772
765 /* 773 /*
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 61de0b34a01e..31c0f2e6ac91 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -33,6 +33,7 @@
33#include <asm/msr.h> 33#include <asm/msr.h>
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/kdebug.h> 35#include <asm/kdebug.h>
36#include <asm/local.h>
36 37
37/* 38/*
38 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -59,7 +60,8 @@ int panic_on_timeout;
59 60
60unsigned int nmi_watchdog = NMI_DEFAULT; 61unsigned int nmi_watchdog = NMI_DEFAULT;
61static unsigned int nmi_hz = HZ; 62static unsigned int nmi_hz = HZ;
62unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 63static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
64static unsigned int nmi_p4_cccr_val;
63 65
64/* Note that these events don't tick when the CPU idles. This means 66/* Note that these events don't tick when the CPU idles. This means
65 the frequency varies with CPU load. */ 67 the frequency varies with CPU load. */
@@ -71,61 +73,87 @@ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
71#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 73#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
72#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 74#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
73 75
74#define P6_EVNTSEL0_ENABLE (1 << 22) 76#define MSR_P4_MISC_ENABLE 0x1A0
75#define P6_EVNTSEL_INT (1 << 20) 77#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
76#define P6_EVNTSEL_OS (1 << 17) 78#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
77#define P6_EVNTSEL_USR (1 << 16) 79#define MSR_P4_PERFCTR0 0x300
78#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 80#define MSR_P4_CCCR0 0x360
79#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 81#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
82#define P4_ESCR_OS (1<<3)
83#define P4_ESCR_USR (1<<2)
84#define P4_CCCR_OVF_PMI0 (1<<26)
85#define P4_CCCR_OVF_PMI1 (1<<27)
86#define P4_CCCR_THRESHOLD(N) ((N)<<20)
87#define P4_CCCR_COMPLEMENT (1<<19)
88#define P4_CCCR_COMPARE (1<<18)
89#define P4_CCCR_REQUIRED (3<<16)
90#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
91#define P4_CCCR_ENABLE (1<<12)
92/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
93 CRU_ESCR0 (with any non-null event selector) through a complemented
94 max threshold. [IA32-Vol3, Section 14.9.9] */
95#define MSR_P4_IQ_COUNTER0 0x30C
96#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
97#define P4_NMI_IQ_CCCR0 \
98 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
99 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
100
101static __init inline int nmi_known_cpu(void)
102{
103 switch (boot_cpu_data.x86_vendor) {
104 case X86_VENDOR_AMD:
105 return boot_cpu_data.x86 == 15;
106 case X86_VENDOR_INTEL:
107 return boot_cpu_data.x86 == 15;
108 }
109 return 0;
110}
80 111
81/* Run after command line and cpu_init init, but before all other checks */ 112/* Run after command line and cpu_init init, but before all other checks */
82void __init nmi_watchdog_default(void) 113void __init nmi_watchdog_default(void)
83{ 114{
84 if (nmi_watchdog != NMI_DEFAULT) 115 if (nmi_watchdog != NMI_DEFAULT)
85 return; 116 return;
86 117 if (nmi_known_cpu())
87 /* For some reason the IO APIC watchdog doesn't work on the AMD 118 nmi_watchdog = NMI_LOCAL_APIC;
88 8111 chipset. For now switch to local APIC mode using 119 else
89 perfctr0 there. On Intel CPUs we don't have code to handle
90 the perfctr and the IO-APIC seems to work, so use that. */
91
92 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
93 nmi_watchdog = NMI_LOCAL_APIC;
94 printk(KERN_INFO
95 "Using local APIC NMI watchdog using perfctr0\n");
96 } else {
97 printk(KERN_INFO "Using IO APIC NMI watchdog\n");
98 nmi_watchdog = NMI_IO_APIC; 120 nmi_watchdog = NMI_IO_APIC;
99 }
100} 121}
101 122
102/* Why is there no CPUID flag for this? */ 123#ifdef CONFIG_SMP
103static __init int cpu_has_lapic(void) 124/* The performance counters used by NMI_LOCAL_APIC don't trigger when
125 * the CPU is idle. To make sure the NMI watchdog really ticks on all
126 * CPUs during the test make them busy.
127 */
128static __init void nmi_cpu_busy(void *data)
104{ 129{
105 switch (boot_cpu_data.x86_vendor) { 130 volatile int *endflag = data;
106 case X86_VENDOR_INTEL: 131 local_irq_enable();
107 case X86_VENDOR_AMD: 132 /* Intentionally don't use cpu_relax here. This is
108 return boot_cpu_data.x86 >= 6; 133 to make sure that the performance counter really ticks,
109 /* .... add more cpus here or find a different way to figure this out. */ 134 even if there is a simulator or similar that catches the
110 default: 135 pause instruction. On a real HT machine this is fine because
111 return 0; 136 all other CPUs are busy with "useless" delay loops and don't
112 } 137 care if they get somewhat less cycles. */
138 while (*endflag == 0)
139 barrier();
113} 140}
141#endif
114 142
115static int __init check_nmi_watchdog (void) 143int __init check_nmi_watchdog (void)
116{ 144{
117 int counts[NR_CPUS]; 145 volatile int endflag = 0;
146 int *counts;
118 int cpu; 147 int cpu;
119 148
120 if (nmi_watchdog == NMI_NONE) 149 counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
121 return 0; 150 if (!counts)
151 return -1;
122 152
123 if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { 153 printk(KERN_INFO "testing NMI watchdog ... ");
124 nmi_watchdog = NMI_NONE;
125 return -1;
126 }
127 154
128 printk(KERN_INFO "Testing NMI watchdog ... "); 155 if (nmi_watchdog == NMI_LOCAL_APIC)
156 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
129 157
130 for (cpu = 0; cpu < NR_CPUS; cpu++) 158 for (cpu = 0; cpu < NR_CPUS; cpu++)
131 counts[cpu] = cpu_pda[cpu].__nmi_count; 159 counts[cpu] = cpu_pda[cpu].__nmi_count;
@@ -133,15 +161,22 @@ static int __init check_nmi_watchdog (void)
133 mdelay((10*1000)/nmi_hz); // wait 10 ticks 161 mdelay((10*1000)/nmi_hz); // wait 10 ticks
134 162
135 for (cpu = 0; cpu < NR_CPUS; cpu++) { 163 for (cpu = 0; cpu < NR_CPUS; cpu++) {
164 if (!cpu_online(cpu))
165 continue;
136 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { 166 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
137 printk("CPU#%d: NMI appears to be stuck (%d)!\n", 167 endflag = 1;
168 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
138 cpu, 169 cpu,
170 counts[cpu],
139 cpu_pda[cpu].__nmi_count); 171 cpu_pda[cpu].__nmi_count);
140 nmi_active = 0; 172 nmi_active = 0;
141 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 173 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
174 nmi_perfctr_msr = 0;
175 kfree(counts);
142 return -1; 176 return -1;
143 } 177 }
144 } 178 }
179 endflag = 1;
145 printk("OK.\n"); 180 printk("OK.\n");
146 181
147 /* now that we know it works we can reduce NMI frequency to 182 /* now that we know it works we can reduce NMI frequency to
@@ -149,10 +184,9 @@ static int __init check_nmi_watchdog (void)
149 if (nmi_watchdog == NMI_LOCAL_APIC) 184 if (nmi_watchdog == NMI_LOCAL_APIC)
150 nmi_hz = 1; 185 nmi_hz = 1;
151 186
187 kfree(counts);
152 return 0; 188 return 0;
153} 189}
154/* Have this called later during boot so counters are updating */
155late_initcall(check_nmi_watchdog);
156 190
157int __init setup_nmi_watchdog(char *str) 191int __init setup_nmi_watchdog(char *str)
158{ 192{
@@ -170,7 +204,7 @@ int __init setup_nmi_watchdog(char *str)
170 204
171 if (nmi >= NMI_INVALID) 205 if (nmi >= NMI_INVALID)
172 return 0; 206 return 0;
173 nmi_watchdog = nmi; 207 nmi_watchdog = nmi;
174 return 1; 208 return 1;
175} 209}
176 210
@@ -185,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void)
185 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 219 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
186 break; 220 break;
187 case X86_VENDOR_INTEL: 221 case X86_VENDOR_INTEL:
188 wrmsr(MSR_IA32_EVNTSEL0, 0, 0); 222 if (boot_cpu_data.x86 == 15) {
223 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
224 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
225 }
189 break; 226 break;
190 } 227 }
191 nmi_active = -1; 228 nmi_active = -1;
@@ -253,7 +290,7 @@ void enable_timer_nmi_watchdog(void)
253 290
254static int nmi_pm_active; /* nmi_active before suspend */ 291static int nmi_pm_active; /* nmi_active before suspend */
255 292
256static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 293static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
257{ 294{
258 nmi_pm_active = nmi_active; 295 nmi_pm_active = nmi_active;
259 disable_lapic_nmi_watchdog(); 296 disable_lapic_nmi_watchdog();
@@ -300,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs);
300 * Original code written by Keith Owens. 337 * Original code written by Keith Owens.
301 */ 338 */
302 339
340static void clear_msr_range(unsigned int base, unsigned int n)
341{
342 unsigned int i;
343
344 for(i = 0; i < n; ++i)
345 wrmsr(base+i, 0, 0);
346}
347
303static void setup_k7_watchdog(void) 348static void setup_k7_watchdog(void)
304{ 349{
305 int i; 350 int i;
306 unsigned int evntsel; 351 unsigned int evntsel;
307 352
308 /* No check, so can start with slow frequency */
309 nmi_hz = 1;
310
311 /* XXX should check these in EFER */
312
313 nmi_perfctr_msr = MSR_K7_PERFCTR0; 353 nmi_perfctr_msr = MSR_K7_PERFCTR0;
314 354
315 for(i = 0; i < 4; ++i) { 355 for(i = 0; i < 4; ++i) {
316 /* Simulator may not support it */ 356 /* Simulator may not support it */
317 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) 357 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) {
358 nmi_perfctr_msr = 0;
318 return; 359 return;
360 }
319 wrmsrl(MSR_K7_PERFCTR0+i, 0UL); 361 wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
320 } 362 }
321 363
@@ -325,12 +367,54 @@ static void setup_k7_watchdog(void)
325 | K7_NMI_EVENT; 367 | K7_NMI_EVENT;
326 368
327 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 369 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
328 wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz); 370 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
329 apic_write(APIC_LVTPC, APIC_DM_NMI); 371 apic_write(APIC_LVTPC, APIC_DM_NMI);
330 evntsel |= K7_EVNTSEL_ENABLE; 372 evntsel |= K7_EVNTSEL_ENABLE;
331 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 373 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
332} 374}
333 375
376
377static int setup_p4_watchdog(void)
378{
379 unsigned int misc_enable, dummy;
380
381 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
382 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
383 return 0;
384
385 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
386 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
387#ifdef CONFIG_SMP
388 if (smp_num_siblings == 2)
389 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
390#endif
391
392 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
393 clear_msr_range(0x3F1, 2);
394 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
395 docs doesn't fully define it, so leave it alone for now. */
396 if (boot_cpu_data.x86_model >= 0x3) {
397 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
398 clear_msr_range(0x3A0, 26);
399 clear_msr_range(0x3BC, 3);
400 } else {
401 clear_msr_range(0x3A0, 31);
402 }
403 clear_msr_range(0x3C0, 6);
404 clear_msr_range(0x3C8, 6);
405 clear_msr_range(0x3E0, 2);
406 clear_msr_range(MSR_P4_CCCR0, 18);
407 clear_msr_range(MSR_P4_PERFCTR0, 18);
408
409 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
410 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
411 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
412 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
413 apic_write(APIC_LVTPC, APIC_DM_NMI);
414 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
415 return 1;
416}
417
334void setup_apic_nmi_watchdog(void) 418void setup_apic_nmi_watchdog(void)
335{ 419{
336 switch (boot_cpu_data.x86_vendor) { 420 switch (boot_cpu_data.x86_vendor) {
@@ -341,6 +425,13 @@ void setup_apic_nmi_watchdog(void)
341 return; 425 return;
342 setup_k7_watchdog(); 426 setup_k7_watchdog();
343 break; 427 break;
428 case X86_VENDOR_INTEL:
429 if (boot_cpu_data.x86 != 15)
430 return;
431 if (!setup_p4_watchdog())
432 return;
433 break;
434
344 default: 435 default:
345 return; 436 return;
346 } 437 }
@@ -355,56 +446,67 @@ void setup_apic_nmi_watchdog(void)
355 * 446 *
356 * as these watchdog NMI IRQs are generated on every CPU, we only 447 * as these watchdog NMI IRQs are generated on every CPU, we only
357 * have to check the current processor. 448 * have to check the current processor.
358 *
359 * since NMIs don't listen to _any_ locks, we have to be extremely
360 * careful not to rely on unsafe variables. The printk might lock
361 * up though, so we have to break up any console locks first ...
362 * [when there will be more tty-related locks, break them up
363 * here too!]
364 */ 449 */
365 450
366static unsigned int 451static DEFINE_PER_CPU(unsigned, last_irq_sum);
367 last_irq_sums [NR_CPUS], 452static DEFINE_PER_CPU(local_t, alert_counter);
368 alert_counter [NR_CPUS]; 453static DEFINE_PER_CPU(int, nmi_touch);
369 454
370void touch_nmi_watchdog (void) 455void touch_nmi_watchdog (void)
371{ 456{
372 int i; 457 int i;
373 458
374 /* 459 /*
375 * Just reset the alert counters, (other CPUs might be 460 * Tell other CPUs to reset their alert counters. We cannot
376 * spinning on locks we hold): 461 * do it ourselves because the alert count increase is not
462 * atomic.
377 */ 463 */
378 for (i = 0; i < NR_CPUS; i++) 464 for (i = 0; i < NR_CPUS; i++)
379 alert_counter[i] = 0; 465 per_cpu(nmi_touch, i) = 1;
380} 466}
381 467
382void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) 468void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
383{ 469{
384 int sum, cpu; 470 int sum;
471 int touched = 0;
385 472
386 cpu = safe_smp_processor_id();
387 sum = read_pda(apic_timer_irqs); 473 sum = read_pda(apic_timer_irqs);
388 if (last_irq_sums[cpu] == sum) { 474 if (__get_cpu_var(nmi_touch)) {
475 __get_cpu_var(nmi_touch) = 0;
476 touched = 1;
477 }
478 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
389 /* 479 /*
390 * Ayiee, looks like this CPU is stuck ... 480 * Ayiee, looks like this CPU is stuck ...
391 * wait a few IRQs (5 seconds) before doing the oops ... 481 * wait a few IRQs (5 seconds) before doing the oops ...
392 */ 482 */
393 alert_counter[cpu]++; 483 local_inc(&__get_cpu_var(alert_counter));
394 if (alert_counter[cpu] == 5*nmi_hz) { 484 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) {
395 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 485 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
396 == NOTIFY_STOP) { 486 == NOTIFY_STOP) {
397 alert_counter[cpu] = 0; 487 local_set(&__get_cpu_var(alert_counter), 0);
398 return; 488 return;
399 } 489 }
400 die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); 490 die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
401 } 491 }
402 } else { 492 } else {
403 last_irq_sums[cpu] = sum; 493 __get_cpu_var(last_irq_sum) = sum;
404 alert_counter[cpu] = 0; 494 local_set(&__get_cpu_var(alert_counter), 0);
405 } 495 }
406 if (nmi_perfctr_msr) 496 if (nmi_perfctr_msr) {
497 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
498 /*
499 * P4 quirks:
500 * - An overflown perfctr will assert its interrupt
501 * until the OVF flag in its CCCR is cleared.
502 * - LVTPC is masked on interrupt and must be
503 * unmasked by the LVTPC handler.
504 */
505 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
506 apic_write(APIC_LVTPC, APIC_DM_NMI);
507 }
407 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); 508 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
509 }
408} 510}
409 511
410static int dummy_nmi_callback(struct pt_regs * regs, int cpu) 512static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
new file mode 100644
index 000000000000..feb5f108dd26
--- /dev/null
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -0,0 +1,101 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <asm/io.h>
23#include <asm/proto.h>
24#include <asm/msr.h>
25#include <asm/vsyscall.h>
26
27/* The I/O port the PMTMR resides at.
28 * The location is detected during setup_arch(),
29 * in arch/i386/kernel/acpi/boot.c */
30u32 pmtmr_ioport;
31
32/* value of the Power timer at last timer interrupt */
33static u32 offset_delay;
34static u32 last_pmtmr_tick;
35
36#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
37
38static inline u32 cyc2us(u32 cycles)
39{
40 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
41 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
42 *
43 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
44 * easily be multiplied with 286 (=0x11E) without having to fear
45 * u32 overflows.
46 */
47 cycles *= 286;
48 return (cycles >> 10);
49}
50
51int pmtimer_mark_offset(void)
52{
53 static int first_run = 1;
54 unsigned long tsc;
55 u32 lost;
56
57 u32 tick = inl(pmtmr_ioport);
58 u32 delta;
59
60 delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
61
62 last_pmtmr_tick = tick;
63 monotonic_base += delta * NSEC_PER_USEC;
64
65 delta += offset_delay;
66
67 lost = delta / (USEC_PER_SEC / HZ);
68 offset_delay = delta % (USEC_PER_SEC / HZ);
69
70 rdtscll(tsc);
71 vxtime.last_tsc = tsc - offset_delay * cpu_khz;
72
73 /* don't calculate delay for first run,
74 or if we've got less then a tick */
75 if (first_run || (lost < 1)) {
76 first_run = 0;
77 offset_delay = 0;
78 }
79
80 return lost - 1;
81}
82
83unsigned int do_gettimeoffset_pm(void)
84{
85 u32 now, offset, delta = 0;
86
87 offset = last_pmtmr_tick;
88 now = inl(pmtmr_ioport);
89 delta = (now - offset) & ACPI_PM_MASK;
90
91 return offset_delay + cyc2us(delta);
92}
93
94
95static int __init nopmtimer_setup(char *s)
96{
97 pmtmr_ioport = 0;
98 return 0;
99}
100
101__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index e26e86bb56fe..525f6a128a27 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -257,13 +257,13 @@ static int putreg(struct task_struct *child,
257 value &= 0xffff; 257 value &= 0xffff;
258 return 0; 258 return 0;
259 case offsetof(struct user_regs_struct,fs_base): 259 case offsetof(struct user_regs_struct,fs_base):
260 if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) 260 if (value >= TASK_SIZE)
261 return -EIO; 261 return -EIO;
262 child->thread.fs = value; 262 child->thread.fs = value;
263 return 0; 263 return 0;
264 case offsetof(struct user_regs_struct,gs_base): 264 case offsetof(struct user_regs_struct,gs_base):
265 if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) 265 if (value >= TASK_SIZE)
266 return -EIO; 266 return -EIO;
267 child->thread.gs = value; 267 child->thread.gs = value;
268 return 0; 268 return 0;
269 case offsetof(struct user_regs_struct, eflags): 269 case offsetof(struct user_regs_struct, eflags):
@@ -277,6 +277,11 @@ static int putreg(struct task_struct *child,
277 return -EIO; 277 return -EIO;
278 value &= 0xffff; 278 value &= 0xffff;
279 break; 279 break;
280 case offsetof(struct user_regs_struct, rip):
281 /* Check if the new RIP address is canonical */
282 if (value >= TASK_SIZE)
283 return -EIO;
284 break;
280 } 285 }
281 put_stack_long(child, regno - sizeof(struct pt_regs), value); 286 put_stack_long(child, regno - sizeof(struct pt_regs), value);
282 return 0; 287 return 0;
@@ -375,7 +380,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
375 break; 380 break;
376 381
377 switch (addr) { 382 switch (addr) {
378 case 0 ... sizeof(struct user_regs_struct): 383 case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
379 tmp = getreg(child, addr); 384 tmp = getreg(child, addr);
380 break; 385 break;
381 case offsetof(struct user, u_debugreg[0]): 386 case offsetof(struct user, u_debugreg[0]):
@@ -420,7 +425,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
420 break; 425 break;
421 426
422 switch (addr) { 427 switch (addr) {
423 case 0 ... sizeof(struct user_regs_struct): 428 case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
424 ret = putreg(child, addr, data); 429 ret = putreg(child, addr, data);
425 break; 430 break;
426 /* Disallows to set a breakpoint into the vsyscall */ 431 /* Disallows to set a breakpoint into the vsyscall */
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 2129cf9ba6b2..99f038ede23c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -719,7 +719,6 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
719 } 719 }
720} 720}
721 721
722#ifdef CONFIG_SMP
723/* 722/*
724 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 723 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
725 * Assumes number of cores is a power of two. 724 * Assumes number of cores is a power of two.
@@ -727,17 +726,26 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
727static void __init amd_detect_cmp(struct cpuinfo_x86 *c) 726static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
728{ 727{
729#ifdef CONFIG_SMP 728#ifdef CONFIG_SMP
730 int cpu = c->x86_apicid; 729 int cpu = smp_processor_id();
731 int node = 0; 730 int node = 0;
731 unsigned bits;
732 if (c->x86_num_cores == 1) 732 if (c->x86_num_cores == 1)
733 return; 733 return;
734 cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); 734
735 bits = 0;
736 while ((1 << bits) < c->x86_num_cores)
737 bits++;
738
739 /* Low order bits define the core id (index of core in socket) */
740 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
741 /* Convert the APIC ID into the socket ID */
742 phys_proc_id[cpu] >>= bits;
735 743
736#ifdef CONFIG_NUMA 744#ifdef CONFIG_NUMA
737 /* When an ACPI SRAT table is available use the mappings from SRAT 745 /* When an ACPI SRAT table is available use the mappings from SRAT
738 instead. */ 746 instead. */
739 if (acpi_numa <= 0) { 747 if (acpi_numa <= 0) {
740 node = cpu_core_id[cpu]; 748 node = phys_proc_id[cpu];
741 if (!node_online(node)) 749 if (!node_online(node))
742 node = first_node(node_online_map); 750 node = first_node(node_online_map);
743 cpu_to_node[cpu] = node; 751 cpu_to_node[cpu] = node;
@@ -745,15 +753,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
745 node = cpu_to_node[cpu]; 753 node = cpu_to_node[cpu];
746 } 754 }
747#endif 755#endif
756
748 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 757 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
749 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 758 cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
750#endif 759#endif
751} 760}
752#else
753static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
754{
755}
756#endif
757 761
758static int __init init_amd(struct cpuinfo_x86 *c) 762static int __init init_amd(struct cpuinfo_x86 *c)
759{ 763{
@@ -925,7 +929,6 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
925 c->x86_clflush_size = 64; 929 c->x86_clflush_size = 64;
926 c->x86_cache_alignment = c->x86_clflush_size; 930 c->x86_cache_alignment = c->x86_clflush_size;
927 c->x86_num_cores = 1; 931 c->x86_num_cores = 1;
928 c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
929 c->extended_cpuid_level = 0; 932 c->extended_cpuid_level = 0;
930 memset(&c->x86_capability, 0, sizeof c->x86_capability); 933 memset(&c->x86_capability, 0, sizeof c->x86_capability);
931 934
@@ -954,11 +957,14 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
954 } 957 }
955 if (c->x86_capability[0] & (1<<19)) 958 if (c->x86_capability[0] & (1<<19))
956 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 959 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
957 c->x86_apicid = misc >> 24;
958 } else { 960 } else {
959 /* Have CPUID level 0 only - unheard of */ 961 /* Have CPUID level 0 only - unheard of */
960 c->x86 = 4; 962 c->x86 = 4;
961 } 963 }
964
965#ifdef CONFIG_SMP
966 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
967#endif
962} 968}
963 969
964/* 970/*
@@ -1088,7 +1094,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1088 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1094 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1089 1095
1090 /* Other (Linux-defined) */ 1096 /* Other (Linux-defined) */
1091 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", "k8c+", 1097 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1092 "constant_tsc", NULL, NULL, 1098 "constant_tsc", NULL, NULL,
1093 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1099 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1094 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1100 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index d439ced150c6..3fdcdba0fec5 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -452,7 +452,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
452 regs->rip -= 2; 452 regs->rip -= 2;
453 } 453 }
454 if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { 454 if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
455 regs->rax = __NR_restart_syscall; 455 regs->rax = test_thread_flag(TIF_IA32) ?
456 __NR_ia32_restart_syscall :
457 __NR_restart_syscall;
456 regs->rip -= 2; 458 regs->rip -= 2;
457 } 459 }
458 } 460 }
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 73f7e8b9543a..f1ec0f345941 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -56,6 +56,7 @@
56#include <asm/kdebug.h> 56#include <asm/kdebug.h>
57#include <asm/tlbflush.h> 57#include <asm/tlbflush.h>
58#include <asm/proto.h> 58#include <asm/proto.h>
59#include <asm/nmi.h>
59 60
60/* Change for real CPU hotplug. Note other files need to be fixed 61/* Change for real CPU hotplug. Note other files need to be fixed
61 first too. */ 62 first too. */
@@ -93,6 +94,7 @@ int smp_threads_ready;
93 94
94cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; 95cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
95cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 96cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
97EXPORT_SYMBOL(cpu_core_map);
96 98
97/* 99/*
98 * Trampoline 80x86 program as an array. 100 * Trampoline 80x86 program as an array.
@@ -125,96 +127,210 @@ static void __cpuinit smp_store_cpu_info(int id)
125 127
126 *c = boot_cpu_data; 128 *c = boot_cpu_data;
127 identify_cpu(c); 129 identify_cpu(c);
130 print_cpu_info(c);
128} 131}
129 132
130/* 133/*
131 * Synchronize TSCs of CPUs 134 * New Funky TSC sync algorithm borrowed from IA64.
135 * Main advantage is that it doesn't reset the TSCs fully and
136 * in general looks more robust and it works better than my earlier
137 * attempts. I believe it was written by David Mosberger. Some minor
138 * adjustments for x86-64 by me -AK
132 * 139 *
133 * This new algorithm is less accurate than the old "zero TSCs" 140 * Original comment reproduced below.
134 * one, but we cannot zero TSCs anymore in the new hotplug CPU 141 *
135 * model. 142 * Synchronize TSC of the current (slave) CPU with the TSC of the
143 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
144 * eliminate the possibility of unaccounted-for errors (such as
145 * getting a machine check in the middle of a calibration step). The
146 * basic idea is for the slave to ask the master what itc value it has
147 * and to read its own itc before and after the master responds. Each
148 * iteration gives us three timestamps:
149 *
150 * slave master
151 *
152 * t0 ---\
153 * ---\
154 * --->
155 * tm
156 * /---
157 * /---
158 * t1 <---
159 *
160 *
161 * The goal is to adjust the slave's TSC such that tm falls exactly
162 * half-way between t0 and t1. If we achieve this, the clocks are
163 * synchronized provided the interconnect between the slave and the
164 * master is symmetric. Even if the interconnect were asymmetric, we
165 * would still know that the synchronization error is smaller than the
166 * roundtrip latency (t0 - t1).
167 *
168 * When the interconnect is quiet and symmetric, this lets us
169 * synchronize the TSC to within one or two cycles. However, we can
170 * only *guarantee* that the synchronization is accurate to within a
171 * round-trip time, which is typically in the range of several hundred
172 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
173 * are usually almost perfectly synchronized, but we shouldn't assume
174 * that the accuracy is much better than half a micro second or so.
175 *
176 * [there are other errors like the latency of RDTSC and of the
177 * WRMSR. These can also account to hundreds of cycles. So it's
178 * probably worse. It claims 153 cycles error on a dual Opteron,
179 * but I suspect the numbers are actually somewhat worse -AK]
136 */ 180 */
137 181
138static atomic_t __cpuinitdata tsc_flag; 182#define MASTER 0
183#define SLAVE (SMP_CACHE_BYTES/8)
184
185/* Intentionally don't use cpu_relax() while TSC synchronization
186 because we don't want to go into funky power save modi or cause
187 hypervisors to schedule us away. Going to sleep would likely affect
188 latency and low latency is the primary objective here. -AK */
189#define no_cpu_relax() barrier()
190
139static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); 191static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
140static unsigned long long __cpuinitdata bp_tsc, ap_tsc; 192static volatile __cpuinitdata unsigned long go[SLAVE + 1];
193static int notscsync __cpuinitdata;
194
195#undef DEBUG_TSC_SYNC
141 196
142#define NR_LOOPS 5 197#define NUM_ROUNDS 64 /* magic value */
198#define NUM_ITERS 5 /* likewise */
143 199
144static void __cpuinit sync_tsc_bp_init(int init) 200/* Callback on boot CPU */
201static __cpuinit void sync_master(void *arg)
145{ 202{
146 if (init) 203 unsigned long flags, i;
147 _raw_spin_lock(&tsc_sync_lock); 204
148 else 205 if (smp_processor_id() != boot_cpu_id)
149 _raw_spin_unlock(&tsc_sync_lock); 206 return;
150 atomic_set(&tsc_flag, 0); 207
208 go[MASTER] = 0;
209
210 local_irq_save(flags);
211 {
212 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
213 while (!go[MASTER])
214 no_cpu_relax();
215 go[MASTER] = 0;
216 rdtscll(go[SLAVE]);
217 }
218 }
219 local_irq_restore(flags);
151} 220}
152 221
153/* 222/*
154 * Synchronize TSC on AP with BP. 223 * Return the number of cycles by which our tsc differs from the tsc
224 * on the master (time-keeper) CPU. A positive number indicates our
225 * tsc is ahead of the master, negative that it is behind.
155 */ 226 */
156static void __cpuinit __sync_tsc_ap(void) 227static inline long
228get_delta(long *rt, long *master)
157{ 229{
158 if (!cpu_has_tsc) 230 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
159 return; 231 unsigned long tcenter, t0, t1, tm;
160 Dprintk("AP %d syncing TSC\n", smp_processor_id()); 232 int i;
161 233
162 while (atomic_read(&tsc_flag) != 0) 234 for (i = 0; i < NUM_ITERS; ++i) {
163 cpu_relax(); 235 rdtscll(t0);
164 atomic_inc(&tsc_flag); 236 go[MASTER] = 1;
165 mb(); 237 while (!(tm = go[SLAVE]))
166 _raw_spin_lock(&tsc_sync_lock); 238 no_cpu_relax();
167 wrmsrl(MSR_IA32_TSC, bp_tsc); 239 go[SLAVE] = 0;
168 _raw_spin_unlock(&tsc_sync_lock); 240 rdtscll(t1);
169 rdtscll(ap_tsc); 241
170 mb(); 242 if (t1 - t0 < best_t1 - best_t0)
171 atomic_inc(&tsc_flag); 243 best_t0 = t0, best_t1 = t1, best_tm = tm;
172 mb(); 244 }
245
246 *rt = best_t1 - best_t0;
247 *master = best_tm - best_t0;
248
249 /* average best_t0 and best_t1 without overflow: */
250 tcenter = (best_t0/2 + best_t1/2);
251 if (best_t0 % 2 + best_t1 % 2 == 2)
252 ++tcenter;
253 return tcenter - best_tm;
173} 254}
174 255
175static void __cpuinit sync_tsc_ap(void) 256static __cpuinit void sync_tsc(void)
176{ 257{
177 int i; 258 int i, done = 0;
178 for (i = 0; i < NR_LOOPS; i++) 259 long delta, adj, adjust_latency = 0;
179 __sync_tsc_ap(); 260 unsigned long flags, rt, master_time_stamp, bound;
261#if DEBUG_TSC_SYNC
262 static struct syncdebug {
263 long rt; /* roundtrip time */
264 long master; /* master's timestamp */
265 long diff; /* difference between midpoint and master's timestamp */
266 long lat; /* estimate of tsc adjustment latency */
267 } t[NUM_ROUNDS] __cpuinitdata;
268#endif
269
270 go[MASTER] = 1;
271
272 smp_call_function(sync_master, NULL, 1, 0);
273
274 while (go[MASTER]) /* wait for master to be ready */
275 no_cpu_relax();
276
277 spin_lock_irqsave(&tsc_sync_lock, flags);
278 {
279 for (i = 0; i < NUM_ROUNDS; ++i) {
280 delta = get_delta(&rt, &master_time_stamp);
281 if (delta == 0) {
282 done = 1; /* let's lock on to this... */
283 bound = rt;
284 }
285
286 if (!done) {
287 unsigned long t;
288 if (i > 0) {
289 adjust_latency += -delta;
290 adj = -delta + adjust_latency/4;
291 } else
292 adj = -delta;
293
294 rdtscll(t);
295 wrmsrl(MSR_IA32_TSC, t + adj);
296 }
297#if DEBUG_TSC_SYNC
298 t[i].rt = rt;
299 t[i].master = master_time_stamp;
300 t[i].diff = delta;
301 t[i].lat = adjust_latency/4;
302#endif
303 }
304 }
305 spin_unlock_irqrestore(&tsc_sync_lock, flags);
306
307#if DEBUG_TSC_SYNC
308 for (i = 0; i < NUM_ROUNDS; ++i)
309 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
310 t[i].rt, t[i].master, t[i].diff, t[i].lat);
311#endif
312
313 printk(KERN_INFO
314 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
315 "maxerr %lu cycles)\n",
316 smp_processor_id(), boot_cpu_id, delta, rt);
180} 317}
181 318
182/* 319static void __cpuinit tsc_sync_wait(void)
183 * Synchronize TSC from BP to AP.
184 */
185static void __cpuinit __sync_tsc_bp(int cpu)
186{ 320{
187 if (!cpu_has_tsc) 321 if (notscsync || !cpu_has_tsc)
188 return; 322 return;
189 323 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
190 /* Wait for AP */ 324 boot_cpu_id);
191 while (atomic_read(&tsc_flag) == 0) 325 sync_tsc();
192 cpu_relax();
193 /* Save BPs TSC */
194 sync_core();
195 rdtscll(bp_tsc);
196 /* Don't do the sync core here to avoid too much latency. */
197 mb();
198 /* Start the AP */
199 _raw_spin_unlock(&tsc_sync_lock);
200 /* Wait for AP again */
201 while (atomic_read(&tsc_flag) < 2)
202 cpu_relax();
203 rdtscl(bp_tsc);
204 barrier();
205} 326}
206 327
207static void __cpuinit sync_tsc_bp(int cpu) 328static __init int notscsync_setup(char *s)
208{ 329{
209 int i; 330 notscsync = 1;
210 for (i = 0; i < NR_LOOPS - 1; i++) { 331 return 0;
211 __sync_tsc_bp(cpu);
212 sync_tsc_bp_init(1);
213 }
214 __sync_tsc_bp(cpu);
215 printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n",
216 cpu, ap_tsc - bp_tsc);
217} 332}
333__setup("notscsync", notscsync_setup);
218 334
219static atomic_t init_deasserted __cpuinitdata; 335static atomic_t init_deasserted __cpuinitdata;
220 336
@@ -315,11 +431,6 @@ void __cpuinit start_secondary(void)
315 cpu_init(); 431 cpu_init();
316 smp_callin(); 432 smp_callin();
317 433
318 /*
319 * Synchronize the TSC with the BP
320 */
321 sync_tsc_ap();
322
323 /* otherwise gcc will move up the smp_processor_id before the cpu_init */ 434 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
324 barrier(); 435 barrier();
325 436
@@ -334,7 +445,6 @@ void __cpuinit start_secondary(void)
334 enable_8259A_irq(0); 445 enable_8259A_irq(0);
335 } 446 }
336 447
337
338 enable_APIC_timer(); 448 enable_APIC_timer();
339 449
340 /* 450 /*
@@ -343,6 +453,11 @@ void __cpuinit start_secondary(void)
343 cpu_set(smp_processor_id(), cpu_online_map); 453 cpu_set(smp_processor_id(), cpu_online_map);
344 mb(); 454 mb();
345 455
456 /* Wait for TSC sync to not schedule things before.
457 We still process interrupts, which could see an inconsistent
458 time in that window unfortunately. */
459 tsc_sync_wait();
460
346 cpu_idle(); 461 cpu_idle();
347} 462}
348 463
@@ -531,7 +646,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
531 printk("failed fork for CPU %d\n", cpu); 646 printk("failed fork for CPU %d\n", cpu);
532 return PTR_ERR(idle); 647 return PTR_ERR(idle);
533 } 648 }
534 x86_cpu_to_apicid[cpu] = apicid;
535 649
536 cpu_pda[cpu].pcurrent = idle; 650 cpu_pda[cpu].pcurrent = idle;
537 651
@@ -600,8 +714,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
600 714
601 if (cpu_isset(cpu, cpu_callin_map)) { 715 if (cpu_isset(cpu, cpu_callin_map)) {
602 /* number CPUs logically, starting from 1 (BSP is 0) */ 716 /* number CPUs logically, starting from 1 (BSP is 0) */
603 Dprintk("OK.\n");
604 print_cpu_info(&cpu_data[cpu]);
605 Dprintk("CPU has booted.\n"); 717 Dprintk("CPU has booted.\n");
606 } else { 718 } else {
607 boot_error = 1; 719 boot_error = 1;
@@ -842,7 +954,6 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
842 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); 954 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
843 /* Or can we switch back to PIC here? */ 955 /* Or can we switch back to PIC here? */
844 } 956 }
845 x86_cpu_to_apicid[0] = boot_cpu_id;
846 957
847 /* 958 /*
848 * Now start the IO-APICs 959 * Now start the IO-APICs
@@ -889,18 +1000,14 @@ int __cpuinit __cpu_up(unsigned int cpu)
889 printk("__cpu_up: bad cpu %d\n", cpu); 1000 printk("__cpu_up: bad cpu %d\n", cpu);
890 return -EINVAL; 1001 return -EINVAL;
891 } 1002 }
892 sync_tsc_bp_init(1);
893 1003
894 /* Boot it! */ 1004 /* Boot it! */
895 err = do_boot_cpu(cpu, apicid); 1005 err = do_boot_cpu(cpu, apicid);
896 if (err < 0) { 1006 if (err < 0) {
897 sync_tsc_bp_init(0);
898 Dprintk("do_boot_cpu failed %d\n", err); 1007 Dprintk("do_boot_cpu failed %d\n", err);
899 return err; 1008 return err;
900 } 1009 }
901 1010
902 sync_tsc_bp(cpu);
903
904 /* Unleash the CPU! */ 1011 /* Unleash the CPU! */
905 Dprintk("waiting for cpu %d\n", cpu); 1012 Dprintk("waiting for cpu %d\n", cpu);
906 1013
@@ -923,4 +1030,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus)
923 1030
924 detect_siblings(); 1031 detect_siblings();
925 time_init_gtod(); 1032 time_init_gtod();
1033
1034 check_nmi_watchdog();
926} 1035}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 0e3b60e1ac5b..735b6767c8ed 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -26,6 +26,8 @@
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/bcd.h> 27#include <linux/bcd.h>
28#include <linux/kallsyms.h> 28#include <linux/kallsyms.h>
29#include <linux/acpi.h>
30#include <acpi/achware.h> /* for PM timer frequency */
29#include <asm/8253pit.h> 31#include <asm/8253pit.h>
30#include <asm/pgtable.h> 32#include <asm/pgtable.h>
31#include <asm/vsyscall.h> 33#include <asm/vsyscall.h>
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
396 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 398 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
397 399
398 vxtime.last = offset; 400 vxtime.last = offset;
401#ifdef CONFIG_X86_PM_TIMER
402 } else if (vxtime.mode == VXTIME_PMTMR) {
403 lost = pmtimer_mark_offset();
404#endif
399 } else { 405 } else {
400 offset = (((tsc - vxtime.last_tsc) * 406 offset = (((tsc - vxtime.last_tsc) *
401 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 407 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
@@ -898,6 +904,13 @@ void __init time_init(void)
898 hpet_period; 904 hpet_period;
899 cpu_khz = hpet_calibrate_tsc(); 905 cpu_khz = hpet_calibrate_tsc();
900 timename = "HPET"; 906 timename = "HPET";
907#ifdef CONFIG_X86_PM_TIMER
908 } else if (pmtmr_ioport) {
909 vxtime_hz = PM_TIMER_FREQUENCY;
910 timename = "PM";
911 pit_init();
912 cpu_khz = pit_calibrate_tsc();
913#endif
901 } else { 914 } else {
902 pit_init(); 915 pit_init();
903 cpu_khz = pit_calibrate_tsc(); 916 cpu_khz = pit_calibrate_tsc();
@@ -923,35 +936,50 @@ void __init time_init(void)
923} 936}
924 937
925/* 938/*
939 * Make an educated guess if the TSC is trustworthy and synchronized
940 * over all CPUs.
941 */
942static __init int unsynchronized_tsc(void)
943{
944#ifdef CONFIG_SMP
945 if (oem_force_hpet_timer())
946 return 1;
947 /* Intel systems are normally all synchronized. Exceptions
948 are handled in the OEM check above. */
949 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
950 return 0;
951 /* All in a single socket - should be synchronized */
952 if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
953 return 0;
954#endif
955 /* Assume multi socket systems are not synchronized */
956 return num_online_cpus() > 1;
957}
958
959/*
926 * Decide after all CPUs are booted what mode gettimeofday should use. 960 * Decide after all CPUs are booted what mode gettimeofday should use.
927 */ 961 */
928void __init time_init_gtod(void) 962void __init time_init_gtod(void)
929{ 963{
930 char *timetype; 964 char *timetype;
931 965
932 /* 966 if (unsynchronized_tsc())
933 * AMD systems with more than one CPU don't have fully synchronized
934 * TSCs. Always use HPET gettimeofday for these, although it is slower.
935 * Intel SMP systems usually have synchronized TSCs, so use always
936 * the TSC.
937 *
938 * Exceptions:
939 * IBM Summit2 checked by oem_force_hpet_timer().
940 * AMD dual core may also not need HPET. Check me.
941 *
942 * Can be turned off with "notsc".
943 */
944 if (num_online_cpus() > 1 &&
945 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
946 notsc = 1;
947 /* Some systems will want to disable TSC and use HPET. */
948 if (oem_force_hpet_timer())
949 notsc = 1; 967 notsc = 1;
950 if (vxtime.hpet_address && notsc) { 968 if (vxtime.hpet_address && notsc) {
951 timetype = "HPET"; 969 timetype = "HPET";
952 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; 970 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
953 vxtime.mode = VXTIME_HPET; 971 vxtime.mode = VXTIME_HPET;
954 do_gettimeoffset = do_gettimeoffset_hpet; 972 do_gettimeoffset = do_gettimeoffset_hpet;
973#ifdef CONFIG_X86_PM_TIMER
974 /* Using PM for gettimeofday is quite slow, but we have no other
975 choice because the TSC is too unreliable on some systems. */
976 } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
977 timetype = "PM";
978 do_gettimeoffset = do_gettimeoffset_pm;
979 vxtime.mode = VXTIME_PMTMR;
980 sysctl_vsyscall = 0;
981 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
982#endif
955 } else { 983 } else {
956 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; 984 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
957 vxtime.mode = VXTIME_TSC; 985 vxtime.mode = VXTIME_TSC;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b4b8dc59663a..2e5734425949 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
65 usec = (__xtime.tv_nsec / 1000) + 65 usec = (__xtime.tv_nsec / 1000) +
66 (__jiffies - __wall_jiffies) * (1000000 / HZ); 66 (__jiffies - __wall_jiffies) * (1000000 / HZ);
67 67
68 if (__vxtime.mode == VXTIME_TSC) { 68 if (__vxtime.mode != VXTIME_HPET) {
69 sync_core(); 69 sync_core();
70 rdtscll(t); 70 rdtscll(t);
71 if (t < __vxtime.last_tsc) 71 if (t < __vxtime.last_tsc)
@@ -217,8 +217,9 @@ static int __init vsyscall_init(void)
217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
219 map_vsyscall(); 219 map_vsyscall();
220 sysctl_vsyscall = 1; 220#ifdef CONFIG_SYSCTL
221 register_sysctl_table(kernel_root_table2, 0); 221 register_sysctl_table(kernel_root_table2, 0);
222#endif
222 return 0; 223 return 0;
223} 224}
224 225
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 88626e626886..a43dedb58fa2 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -139,35 +139,23 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
139#undef memmove 139#undef memmove
140#undef memchr 140#undef memchr
141#undef strlen 141#undef strlen
142#undef strcpy
143#undef strncmp 142#undef strncmp
144#undef strncpy 143#undef strncpy
145#undef strchr 144#undef strchr
146#undef strcmp
147#undef strcpy
148#undef strcat
149#undef memcmp
150 145
151extern void * memset(void *,int,__kernel_size_t); 146extern void * memset(void *,int,__kernel_size_t);
152extern size_t strlen(const char *); 147extern size_t strlen(const char *);
153extern void * memmove(void * dest,const void *src,size_t count); 148extern void * memmove(void * dest,const void *src,size_t count);
154extern char * strcpy(char * dest,const char *src);
155extern int strcmp(const char * cs,const char * ct);
156extern void *memchr(const void *s, int c, size_t n); 149extern void *memchr(const void *s, int c, size_t n);
157extern void * memcpy(void *,const void *,__kernel_size_t); 150extern void * memcpy(void *,const void *,__kernel_size_t);
158extern void * __memcpy(void *,const void *,__kernel_size_t); 151extern void * __memcpy(void *,const void *,__kernel_size_t);
159extern char * strcat(char *, const char *);
160extern int memcmp(const void * cs,const void * ct,size_t count);
161 152
162EXPORT_SYMBOL(memset); 153EXPORT_SYMBOL(memset);
163EXPORT_SYMBOL(strlen); 154EXPORT_SYMBOL(strlen);
164EXPORT_SYMBOL(memmove); 155EXPORT_SYMBOL(memmove);
165EXPORT_SYMBOL(strcpy);
166EXPORT_SYMBOL(strncmp); 156EXPORT_SYMBOL(strncmp);
167EXPORT_SYMBOL(strncpy); 157EXPORT_SYMBOL(strncpy);
168EXPORT_SYMBOL(strchr); 158EXPORT_SYMBOL(strchr);
169EXPORT_SYMBOL(strcmp);
170EXPORT_SYMBOL(strcat);
171EXPORT_SYMBOL(strncat); 159EXPORT_SYMBOL(strncat);
172EXPORT_SYMBOL(memchr); 160EXPORT_SYMBOL(memchr);
173EXPORT_SYMBOL(strrchr); 161EXPORT_SYMBOL(strrchr);
@@ -175,7 +163,6 @@ EXPORT_SYMBOL(strnlen);
175EXPORT_SYMBOL(memscan); 163EXPORT_SYMBOL(memscan);
176EXPORT_SYMBOL(memcpy); 164EXPORT_SYMBOL(memcpy);
177EXPORT_SYMBOL(__memcpy); 165EXPORT_SYMBOL(__memcpy);
178EXPORT_SYMBOL(memcmp);
179 166
180#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM 167#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
181/* prototypes are wrong, these are assembly with custom calling functions */ 168/* prototypes are wrong, these are assembly with custom calling functions */