aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/osf_sys.c7
-rw-r--r--arch/arm/mach-s3c2410/clock.c2
-rw-r--r--arch/arm/mach-s3c2410/s3c2440.c6
-rw-r--r--arch/arm/mm/Kconfig21
-rw-r--r--arch/arm/mm/copypage-v4mc.S80
-rw-r--r--arch/arm/mm/copypage-v4mc.c111
-rw-r--r--arch/arm/mm/copypage-v6.c28
-rw-r--r--arch/arm/mm/flush.c37
-rw-r--r--arch/arm/mm/mm-armv.c27
-rw-r--r--arch/i386/Kconfig2
-rw-r--r--arch/i386/kernel/cpu/amd.c12
-rw-r--r--arch/i386/kernel/cpu/common.c4
-rw-r--r--arch/i386/kernel/smpboot.c1
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c17
-rw-r--r--arch/i386/mm/ioremap.c10
-rw-r--r--arch/i386/pci/fixup.c2
-rw-r--r--arch/i386/pci/irq.c5
-rw-r--r--arch/ia64/ia32/ia32_ioctl.c1
-rw-r--r--arch/mips/vr41xx/common/pmu.c55
-rw-r--r--arch/ppc/kernel/head_44x.S15
-rw-r--r--arch/ppc/kernel/setup.c4
-rw-r--r--arch/ppc/lib/string.S7
-rw-r--r--arch/ppc/mm/init.c1
-rw-r--r--arch/ppc/syslib/ipic.c2
-rw-r--r--arch/ppc/syslib/mpc83xx_devices.c1
-rw-r--r--arch/ppc/syslib/mpc85xx_devices.c1
-rw-r--r--arch/ppc/syslib/open_pic.c2
-rw-r--r--arch/ppc64/kernel/mf.c85
-rw-r--r--arch/ppc64/kernel/pmac_smp.c28
-rw-r--r--arch/ppc64/kernel/prom_init.c44
-rw-r--r--arch/ppc64/kernel/rtc.c39
-rw-r--r--arch/ppc64/kernel/time.c1
-rw-r--r--arch/sparc64/kernel/pci_iommu.c167
-rw-r--r--arch/sparc64/kernel/sbus.c31
-rw-r--r--arch/sparc64/kernel/setup.c11
-rw-r--r--arch/sparc64/kernel/smp.c3
-rw-r--r--arch/sparc64/kernel/traps.c19
-rw-r--r--arch/um/Kconfig_x86_644
-rw-r--r--arch/um/drivers/chan_kern.c22
-rw-r--r--arch/um/drivers/mcast_kern.c4
-rw-r--r--arch/um/drivers/mcast_user.c47
-rw-r--r--arch/um/drivers/ubd_kern.c296
-rw-r--r--arch/um/include/sysdep-i386/ptrace.h2
-rw-r--r--arch/um/include/sysdep-x86_64/checksum.h26
-rw-r--r--arch/um/include/sysdep-x86_64/ptrace.h62
-rw-r--r--arch/um/kernel/Makefile2
-rw-r--r--arch/um/kernel/checksum.c0
-rw-r--r--arch/um/kernel/initrd.c78
-rw-r--r--arch/um/kernel/irq_user.c10
-rw-r--r--arch/um/kernel/ksyms.c1
-rw-r--r--arch/um/kernel/mem.c40
-rw-r--r--arch/um/kernel/ptrace.c6
-rw-r--r--arch/um/kernel/trap_kern.c36
-rw-r--r--arch/um/kernel/tt/ksyms.c1
-rw-r--r--arch/um/kernel/uml.lds.S2
-rw-r--r--arch/um/sys-i386/Makefile4
-rw-r--r--arch/um/sys-i386/delay.c16
-rw-r--r--arch/um/sys-x86_64/Makefile6
-rw-r--r--arch/um/sys-x86_64/delay.c33
-rw-r--r--arch/um/sys-x86_64/ksyms.c3
-rw-r--r--arch/um/sys-x86_64/ptrace.c9
-rw-r--r--arch/um/sys-x86_64/syscalls.c1
-rw-r--r--arch/um/sys-x86_64/user-offsets.c8
-rw-r--r--arch/x86_64/Kconfig16
-rw-r--r--arch/x86_64/defconfig58
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/apic.c5
-rw-r--r--arch/x86_64/kernel/entry.S11
-rw-r--r--arch/x86_64/kernel/io_apic.c81
-rw-r--r--arch/x86_64/kernel/mpparse.c22
-rw-r--r--arch/x86_64/kernel/nmi.c248
-rw-r--r--arch/x86_64/kernel/pmtimer.c101
-rw-r--r--arch/x86_64/kernel/ptrace.c17
-rw-r--r--arch/x86_64/kernel/setup.c30
-rw-r--r--arch/x86_64/kernel/signal.c4
-rw-r--r--arch/x86_64/kernel/smpboot.c263
-rw-r--r--arch/x86_64/kernel/time.c62
-rw-r--r--arch/x86_64/kernel/traps.c2
-rw-r--r--arch/x86_64/kernel/vsyscall.c5
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c3
-rw-r--r--arch/x86_64/mm/fault.c11
-rw-r--r--arch/x86_64/mm/ioremap.c29
82 files changed, 1435 insertions, 1142 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 64e450dddb49..167fd89f8707 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1150,16 +1150,13 @@ osf_usleep_thread(struct timeval32 __user *sleep, struct timeval32 __user *remai
1150 if (get_tv32(&tmp, sleep)) 1150 if (get_tv32(&tmp, sleep))
1151 goto fault; 1151 goto fault;
1152 1152
1153 ticks = tmp.tv_usec; 1153 ticks = timeval_to_jiffies(&tmp);
1154 ticks = (ticks + (1000000 / HZ) - 1) / (1000000 / HZ);
1155 ticks += tmp.tv_sec * HZ;
1156 1154
1157 current->state = TASK_INTERRUPTIBLE; 1155 current->state = TASK_INTERRUPTIBLE;
1158 ticks = schedule_timeout(ticks); 1156 ticks = schedule_timeout(ticks);
1159 1157
1160 if (remain) { 1158 if (remain) {
1161 tmp.tv_sec = ticks / HZ; 1159 jiffies_to_timeval(ticks, &tmp);
1162 tmp.tv_usec = ticks % HZ;
1163 if (put_tv32(remain, &tmp)) 1160 if (put_tv32(remain, &tmp))
1164 goto fault; 1161 goto fault;
1165 } 1162 }
diff --git a/arch/arm/mach-s3c2410/clock.c b/arch/arm/mach-s3c2410/clock.c
index e23f534d4e1d..8d986b8401c2 100644
--- a/arch/arm/mach-s3c2410/clock.c
+++ b/arch/arm/mach-s3c2410/clock.c
@@ -478,7 +478,7 @@ static int s3c2440_clk_add(struct sys_device *sysdev)
478{ 478{
479 unsigned long upllcon = __raw_readl(S3C2410_UPLLCON); 479 unsigned long upllcon = __raw_readl(S3C2410_UPLLCON);
480 480
481 s3c2440_clk_upll.rate = s3c2410_get_pll(upllcon, clk_xtal.rate) * 2; 481 s3c2440_clk_upll.rate = s3c2410_get_pll(upllcon, clk_xtal.rate);
482 482
483 printk("S3C2440: Clock Support, UPLL %ld.%03ld MHz\n", 483 printk("S3C2440: Clock Support, UPLL %ld.%03ld MHz\n",
484 print_mhz(s3c2440_clk_upll.rate)); 484 print_mhz(s3c2440_clk_upll.rate));
diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c
index 9a8cc5ae2255..d4c8281b55f6 100644
--- a/arch/arm/mach-s3c2410/s3c2440.c
+++ b/arch/arm/mach-s3c2410/s3c2440.c
@@ -192,9 +192,11 @@ void __init s3c2440_map_io(struct map_desc *mach_desc, int size)
192 192
193 iotable_init(s3c2440_iodesc, ARRAY_SIZE(s3c2440_iodesc)); 193 iotable_init(s3c2440_iodesc, ARRAY_SIZE(s3c2440_iodesc));
194 iotable_init(mach_desc, size); 194 iotable_init(mach_desc, size);
195
195 /* rename any peripherals used differing from the s3c2410 */ 196 /* rename any peripherals used differing from the s3c2410 */
196 197
197 s3c_device_i2c.name = "s3c2440-i2c"; 198 s3c_device_i2c.name = "s3c2440-i2c";
199 s3c_device_nand.name = "s3c2440-nand";
198 200
199 /* change irq for watchdog */ 201 /* change irq for watchdog */
200 202
@@ -225,7 +227,7 @@ void __init s3c2440_init_clocks(int xtal)
225 break; 227 break;
226 228
227 case S3C2440_CLKDIVN_HDIVN_2: 229 case S3C2440_CLKDIVN_HDIVN_2:
228 hdiv = 1; 230 hdiv = 2;
229 break; 231 break;
230 232
231 case S3C2440_CLKDIVN_HDIVN_4_8: 233 case S3C2440_CLKDIVN_HDIVN_4_8:
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c4fc6be629de..48bac7da8c70 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -412,21 +412,20 @@ config CPU_BPREDICT_DISABLE
412 412
413config TLS_REG_EMUL 413config TLS_REG_EMUL
414 bool 414 bool
415 default y if (SMP || CPU_32v6) && (CPU_32v5 || CPU_32v4 || CPU_32v3) 415 default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3)
416 help 416 help
417 We might be running on an ARMv6+ processor which should have the TLS 417 An SMP system using a pre-ARMv6 processor (there are apparently
418 register but for some reason we can't use it, or maybe an SMP system 418 a few prototypes like that in existence) and therefore access to
419 using a pre-ARMv6 processor (there are apparently a few prototypes 419 that required register must be emulated.
420 like that in existence) and therefore access to that register must
421 be emulated.
422 420
423config HAS_TLS_REG 421config HAS_TLS_REG
424 bool 422 bool
425 depends on CPU_32v6 423 depends on !TLS_REG_EMUL
426 default y if !TLS_REG_EMUL 424 default y if SMP || CPU_32v7
427 help 425 help
428 This selects support for the CP15 thread register. 426 This selects support for the CP15 thread register.
429 It is defined to be available on ARMv6 or later. If a particular 427 It is defined to be available on some ARMv6 processors (including
430 ARMv6 or later CPU doesn't support it then it must omc;ide "select 428 all SMP capable ARMv6's) or later processors. User space may
431 TLS_REG_EMUL" along with its other caracteristics. 429 assume directly accessing that register and always obtain the
430 expected value only on ARMv7 and above.
432 431
diff --git a/arch/arm/mm/copypage-v4mc.S b/arch/arm/mm/copypage-v4mc.S
deleted file mode 100644
index 305af3dab3d8..000000000000
--- a/arch/arm/mm/copypage-v4mc.S
+++ /dev/null
@@ -1,80 +0,0 @@
1/*
2 * linux/arch/arm/lib/copy_page-armv4mc.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * ASM optimised string functions
11 */
12#include <linux/linkage.h>
13#include <linux/init.h>
14#include <asm/constants.h>
15
16 .text
17 .align 5
18/*
19 * ARMv4 mini-dcache optimised copy_user_page
20 *
21 * We flush the destination cache lines just before we write the data into the
22 * corresponding address. Since the Dcache is read-allocate, this removes the
23 * Dcache aliasing issue. The writes will be forwarded to the write buffer,
24 * and merged as appropriate.
25 *
26 * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
27 * instruction. If your processor does not supply this, you have to write your
28 * own copy_user_page that does the right thing.
29 */
30ENTRY(v4_mc_copy_user_page)
31 stmfd sp!, {r4, lr} @ 2
32 mov r4, r0
33 mov r0, r1
34 bl map_page_minicache
35 mov r1, #PAGE_SZ/64 @ 1
36 ldmia r0!, {r2, r3, ip, lr} @ 4
371: mcr p15, 0, r4, c7, c6, 1 @ 1 invalidate D line
38 stmia r4!, {r2, r3, ip, lr} @ 4
39 ldmia r0!, {r2, r3, ip, lr} @ 4+1
40 stmia r4!, {r2, r3, ip, lr} @ 4
41 ldmia r0!, {r2, r3, ip, lr} @ 4
42 mcr p15, 0, r4, c7, c6, 1 @ 1 invalidate D line
43 stmia r4!, {r2, r3, ip, lr} @ 4
44 ldmia r0!, {r2, r3, ip, lr} @ 4
45 subs r1, r1, #1 @ 1
46 stmia r4!, {r2, r3, ip, lr} @ 4
47 ldmneia r0!, {r2, r3, ip, lr} @ 4
48 bne 1b @ 1
49 ldmfd sp!, {r4, pc} @ 3
50
51 .align 5
52/*
53 * ARMv4 optimised clear_user_page
54 *
55 * Same story as above.
56 */
57ENTRY(v4_mc_clear_user_page)
58 str lr, [sp, #-4]!
59 mov r1, #PAGE_SZ/64 @ 1
60 mov r2, #0 @ 1
61 mov r3, #0 @ 1
62 mov ip, #0 @ 1
63 mov lr, #0 @ 1
641: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line
65 stmia r0!, {r2, r3, ip, lr} @ 4
66 stmia r0!, {r2, r3, ip, lr} @ 4
67 mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line
68 stmia r0!, {r2, r3, ip, lr} @ 4
69 stmia r0!, {r2, r3, ip, lr} @ 4
70 subs r1, r1, #1 @ 1
71 bne 1b @ 1
72 ldr pc, [sp], #4
73
74 __INITDATA
75
76 .type v4_mc_user_fns, #object
77ENTRY(v4_mc_user_fns)
78 .long v4_mc_clear_user_page
79 .long v4_mc_copy_user_page
80 .size v4_mc_user_fns, . - v4_mc_user_fns
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
new file mode 100644
index 000000000000..fc69dccdace1
--- /dev/null
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -0,0 +1,111 @@
1/*
2 * linux/arch/arm/lib/copypage-armv4mc.S
3 *
4 * Copyright (C) 1995-2005 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This handles the mini data cache, as found on SA11x0 and XScale
11 * processors. When we copy a user page page, we map it in such a way
12 * that accesses to this page will not touch the main data cache, but
13 * will be cached in the mini data cache. This prevents us thrashing
14 * the main data cache on page faults.
15 */
16#include <linux/init.h>
17#include <linux/mm.h>
18
19#include <asm/page.h>
20#include <asm/pgtable.h>
21#include <asm/tlbflush.h>
22
23/*
24 * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
25 * specific hacks for copying pages efficiently.
26 */
27#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
28 L_PTE_CACHEABLE)
29
30#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
31
32static DEFINE_SPINLOCK(minicache_lock);
33
34/*
35 * ARMv4 mini-dcache optimised copy_user_page
36 *
37 * We flush the destination cache lines just before we write the data into the
38 * corresponding address. Since the Dcache is read-allocate, this removes the
39 * Dcache aliasing issue. The writes will be forwarded to the write buffer,
40 * and merged as appropriate.
41 *
42 * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
43 * instruction. If your processor does not supply this, you have to write your
44 * own copy_user_page that does the right thing.
45 */
46static void __attribute__((naked))
47mc_copy_user_page(void *from, void *to)
48{
49 asm volatile(
50 "stmfd sp!, {r4, lr} @ 2\n\
51 mov r4, %2 @ 1\n\
52 ldmia %0!, {r2, r3, ip, lr} @ 4\n\
531: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\
54 stmia %1!, {r2, r3, ip, lr} @ 4\n\
55 ldmia %0!, {r2, r3, ip, lr} @ 4+1\n\
56 stmia %1!, {r2, r3, ip, lr} @ 4\n\
57 ldmia %0!, {r2, r3, ip, lr} @ 4\n\
58 mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\
59 stmia %1!, {r2, r3, ip, lr} @ 4\n\
60 ldmia %0!, {r2, r3, ip, lr} @ 4\n\
61 subs r4, r4, #1 @ 1\n\
62 stmia %1!, {r2, r3, ip, lr} @ 4\n\
63 ldmneia %0!, {r2, r3, ip, lr} @ 4\n\
64 bne 1b @ 1\n\
65 ldmfd sp!, {r4, pc} @ 3"
66 :
67 : "r" (from), "r" (to), "I" (PAGE_SIZE / 64));
68}
69
70void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
71{
72 spin_lock(&minicache_lock);
73
74 set_pte(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
75 flush_tlb_kernel_page(0xffff8000);
76
77 mc_copy_user_page((void *)0xffff8000, kto);
78
79 spin_unlock(&minicache_lock);
80}
81
82/*
83 * ARMv4 optimised clear_user_page
84 */
85void __attribute__((naked))
86v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
87{
88 asm volatile(
89 "str lr, [sp, #-4]!\n\
90 mov r1, %0 @ 1\n\
91 mov r2, #0 @ 1\n\
92 mov r3, #0 @ 1\n\
93 mov ip, #0 @ 1\n\
94 mov lr, #0 @ 1\n\
951: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\
96 stmia r0!, {r2, r3, ip, lr} @ 4\n\
97 stmia r0!, {r2, r3, ip, lr} @ 4\n\
98 mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\
99 stmia r0!, {r2, r3, ip, lr} @ 4\n\
100 stmia r0!, {r2, r3, ip, lr} @ 4\n\
101 subs r1, r1, #1 @ 1\n\
102 bne 1b @ 1\n\
103 ldr pc, [sp], #4"
104 :
105 : "I" (PAGE_SIZE / 64));
106}
107
108struct cpu_user_fns v4_mc_user_fns __initdata = {
109 .cpu_clear_user_page = v4_mc_clear_user_page,
110 .cpu_copy_user_page = v4_mc_copy_user_page,
111};
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 694ac8208858..a8c00236bd3d 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -26,8 +26,8 @@
26#define to_address (0xffffc000) 26#define to_address (0xffffc000)
27#define to_pgprot PAGE_KERNEL 27#define to_pgprot PAGE_KERNEL
28 28
29static pte_t *from_pte; 29#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
30static pte_t *to_pte; 30
31static DEFINE_SPINLOCK(v6_lock); 31static DEFINE_SPINLOCK(v6_lock);
32 32
33#define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT) 33#define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
@@ -74,8 +74,8 @@ void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vadd
74 */ 74 */
75 spin_lock(&v6_lock); 75 spin_lock(&v6_lock);
76 76
77 set_pte(from_pte + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot)); 77 set_pte(TOP_PTE(from_address) + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot));
78 set_pte(to_pte + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot)); 78 set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot));
79 79
80 from = from_address + (offset << PAGE_SHIFT); 80 from = from_address + (offset << PAGE_SHIFT);
81 to = to_address + (offset << PAGE_SHIFT); 81 to = to_address + (offset << PAGE_SHIFT);
@@ -114,7 +114,7 @@ void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr)
114 */ 114 */
115 spin_lock(&v6_lock); 115 spin_lock(&v6_lock);
116 116
117 set_pte(to_pte + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot)); 117 set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot));
118 flush_tlb_kernel_page(to); 118 flush_tlb_kernel_page(to);
119 clear_page((void *)to); 119 clear_page((void *)to);
120 120
@@ -129,21 +129,6 @@ struct cpu_user_fns v6_user_fns __initdata = {
129static int __init v6_userpage_init(void) 129static int __init v6_userpage_init(void)
130{ 130{
131 if (cache_is_vipt_aliasing()) { 131 if (cache_is_vipt_aliasing()) {
132 pgd_t *pgd;
133 pmd_t *pmd;
134
135 pgd = pgd_offset_k(from_address);
136 pmd = pmd_alloc(&init_mm, pgd, from_address);
137 if (!pmd)
138 BUG();
139 from_pte = pte_alloc_kernel(&init_mm, pmd, from_address);
140 if (!from_pte)
141 BUG();
142
143 to_pte = pte_alloc_kernel(&init_mm, pmd, to_address);
144 if (!to_pte)
145 BUG();
146
147 cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing; 132 cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing;
148 cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing; 133 cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing;
149 } 134 }
@@ -151,5 +136,4 @@ static int __init v6_userpage_init(void)
151 return 0; 136 return 0;
152} 137}
153 138
154__initcall(v6_userpage_init); 139core_initcall(v6_userpage_init);
155
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index c6de48d89503..4085ed983e46 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -13,6 +13,29 @@
13 13
14#include <asm/cacheflush.h> 14#include <asm/cacheflush.h>
15#include <asm/system.h> 15#include <asm/system.h>
16#include <asm/tlbflush.h>
17
18#ifdef CONFIG_CPU_CACHE_VIPT
19#define ALIAS_FLUSH_START 0xffff4000
20
21#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
22
23static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
24{
25 unsigned long to = ALIAS_FLUSH_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
26
27 set_pte(TOP_PTE(to), pfn_pte(pfn, PAGE_KERNEL));
28 flush_tlb_kernel_page(to);
29
30 asm( "mcrr p15, 0, %1, %0, c14\n"
31 " mcrr p15, 0, %1, %0, c5\n"
32 :
33 : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES)
34 : "cc");
35}
36#else
37#define flush_pfn_alias(pfn,vaddr) do { } while (0)
38#endif
16 39
17static void __flush_dcache_page(struct address_space *mapping, struct page *page) 40static void __flush_dcache_page(struct address_space *mapping, struct page *page)
18{ 41{
@@ -37,6 +60,18 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page
37 return; 60 return;
38 61
39 /* 62 /*
63 * This is a page cache page. If we have a VIPT cache, we
64 * only need to do one flush - which would be at the relevant
65 * userspace colour, which is congruent with page->index.
66 */
67 if (cache_is_vipt()) {
68 if (cache_is_vipt_aliasing())
69 flush_pfn_alias(page_to_pfn(page),
70 page->index << PAGE_CACHE_SHIFT);
71 return;
72 }
73
74 /*
40 * There are possible user space mappings of this page: 75 * There are possible user space mappings of this page:
41 * - VIVT cache: we need to also write back and invalidate all user 76 * - VIVT cache: we need to also write back and invalidate all user
42 * data in the current VM view associated with this page. 77 * data in the current VM view associated with this page.
@@ -57,8 +92,6 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page
57 continue; 92 continue;
58 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; 93 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
59 flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page)); 94 flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
60 if (cache_is_vipt())
61 break;
62 } 95 }
63 flush_dcache_mmap_unlock(mapping); 96 flush_dcache_mmap_unlock(mapping);
64} 97}
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index 585dfb8e20b9..2c2b93d77d43 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -37,6 +37,8 @@ pgprot_t pgprot_kernel;
37 37
38EXPORT_SYMBOL(pgprot_kernel); 38EXPORT_SYMBOL(pgprot_kernel);
39 39
40pmd_t *top_pmd;
41
40struct cachepolicy { 42struct cachepolicy {
41 const char policy[16]; 43 const char policy[16];
42 unsigned int cr_mask; 44 unsigned int cr_mask;
@@ -142,6 +144,16 @@ __setup("noalign", noalign_setup);
142 144
143#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) 145#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
144 146
147static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
148{
149 return pmd_offset(pgd, virt);
150}
151
152static inline pmd_t *pmd_off_k(unsigned long virt)
153{
154 return pmd_off(pgd_offset_k(virt), virt);
155}
156
145/* 157/*
146 * need to get a 16k page for level 1 158 * need to get a 16k page for level 1
147 */ 159 */
@@ -220,7 +232,7 @@ void free_pgd_slow(pgd_t *pgd)
220 return; 232 return;
221 233
222 /* pgd is always present and good */ 234 /* pgd is always present and good */
223 pmd = (pmd_t *)pgd; 235 pmd = pmd_off(pgd, 0);
224 if (pmd_none(*pmd)) 236 if (pmd_none(*pmd))
225 goto free; 237 goto free;
226 if (pmd_bad(*pmd)) { 238 if (pmd_bad(*pmd)) {
@@ -246,9 +258,8 @@ free:
246static inline void 258static inline void
247alloc_init_section(unsigned long virt, unsigned long phys, int prot) 259alloc_init_section(unsigned long virt, unsigned long phys, int prot)
248{ 260{
249 pmd_t *pmdp; 261 pmd_t *pmdp = pmd_off_k(virt);
250 262
251 pmdp = pmd_offset(pgd_offset_k(virt), virt);
252 if (virt & (1 << 20)) 263 if (virt & (1 << 20))
253 pmdp++; 264 pmdp++;
254 265
@@ -283,11 +294,9 @@ alloc_init_supersection(unsigned long virt, unsigned long phys, int prot)
283static inline void 294static inline void
284alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot) 295alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot)
285{ 296{
286 pmd_t *pmdp; 297 pmd_t *pmdp = pmd_off_k(virt);
287 pte_t *ptep; 298 pte_t *ptep;
288 299
289 pmdp = pmd_offset(pgd_offset_k(virt), virt);
290
291 if (pmd_none(*pmdp)) { 300 if (pmd_none(*pmdp)) {
292 unsigned long pmdval; 301 unsigned long pmdval;
293 ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * 302 ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
@@ -310,7 +319,7 @@ alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pg
310 */ 319 */
311static inline void clear_mapping(unsigned long virt) 320static inline void clear_mapping(unsigned long virt)
312{ 321{
313 pmd_clear(pmd_offset(pgd_offset_k(virt), virt)); 322 pmd_clear(pmd_off_k(virt));
314} 323}
315 324
316struct mem_types { 325struct mem_types {
@@ -578,7 +587,7 @@ void setup_mm_for_reboot(char mode)
578 PMD_TYPE_SECT; 587 PMD_TYPE_SECT;
579 if (cpu_arch <= CPU_ARCH_ARMv5) 588 if (cpu_arch <= CPU_ARCH_ARMv5)
580 pmdval |= PMD_BIT4; 589 pmdval |= PMD_BIT4;
581 pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT); 590 pmd = pmd_off(pgd, i << PGDIR_SHIFT);
582 pmd[0] = __pmd(pmdval); 591 pmd[0] = __pmd(pmdval);
583 pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1))); 592 pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
584 flush_pmd_entry(pmd); 593 flush_pmd_entry(pmd);
@@ -675,6 +684,8 @@ void __init memtable_init(struct meminfo *mi)
675 684
676 flush_cache_all(); 685 flush_cache_all();
677 flush_tlb_all(); 686 flush_tlb_all();
687
688 top_pmd = pmd_off_k(0xffff0000);
678} 689}
679 690
680/* 691/*
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index e382f32d435e..dfd904f6883b 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1163,7 +1163,7 @@ config PCI_DIRECT
1163 1163
1164config PCI_MMCONFIG 1164config PCI_MMCONFIG
1165 bool 1165 bool
1166 depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI)) 1166 depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
1167 select ACPI_BOOT 1167 select ACPI_BOOT
1168 default y 1168 default y
1169 1169
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 16dbc4151be4..73aeaf5a9d4e 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -24,9 +24,6 @@ __asm__(".align 4\nvide: ret");
24 24
25static void __init init_amd(struct cpuinfo_x86 *c) 25static void __init init_amd(struct cpuinfo_x86 *c)
26{ 26{
27#ifdef CONFIG_X86_SMP
28 int cpu = c == &boot_cpu_data ? 0 : c - cpu_data;
29#endif
30 u32 l, h; 27 u32 l, h;
31 int mbytes = num_physpages >> (20-PAGE_SHIFT); 28 int mbytes = num_physpages >> (20-PAGE_SHIFT);
32 int r; 29 int r;
@@ -198,14 +195,19 @@ static void __init init_amd(struct cpuinfo_x86 *c)
198 c->x86_num_cores = 1; 195 c->x86_num_cores = 1;
199 } 196 }
200 197
201#ifdef CONFIG_X86_SMP 198#ifdef CONFIG_X86_HT
202 /* 199 /*
203 * On a AMD dual core setup the lower bits of the APIC id 200 * On a AMD dual core setup the lower bits of the APIC id
204 * distingush the cores. Assumes number of cores is a power 201 * distingush the cores. Assumes number of cores is a power
205 * of two. 202 * of two.
206 */ 203 */
207 if (c->x86_num_cores > 1) { 204 if (c->x86_num_cores > 1) {
208 cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); 205 int cpu = smp_processor_id();
206 unsigned bits = 0;
207 while ((1 << bits) < c->x86_num_cores)
208 bits++;
209 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
210 phys_proc_id[cpu] >>= bits;
209 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 211 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
210 cpu, c->x86_num_cores, cpu_core_id[cpu]); 212 cpu, c->x86_num_cores, cpu_core_id[cpu]);
211 } 213 }
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 6be0310e3cd3..d199e525680a 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -243,6 +243,10 @@ static void __init early_cpu_detect(void)
243 } 243 }
244 244
245 early_intel_workaround(c); 245 early_intel_workaround(c);
246
247#ifdef CONFIG_X86_HT
248 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
249#endif
246} 250}
247 251
248void __init generic_identify(struct cpuinfo_x86 * c) 252void __init generic_identify(struct cpuinfo_x86 * c)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index cbea7ac582e5..35bfe138cb1a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -888,6 +888,7 @@ void *xquad_portio;
888 888
889cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; 889cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
890cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 890cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
891EXPORT_SYMBOL(cpu_core_map);
891 892
892static void __init smp_boot_cpus(unsigned int max_cpus) 893static void __init smp_boot_cpus(unsigned int max_cpus)
893{ 894{
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 903d739ca74a..a6e0ddd65bd0 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -97,7 +97,6 @@ static void ack_vic_irq(unsigned int irq);
97static void vic_enable_cpi(void); 97static void vic_enable_cpi(void);
98static void do_boot_cpu(__u8 cpuid); 98static void do_boot_cpu(__u8 cpuid);
99static void do_quad_bootstrap(void); 99static void do_quad_bootstrap(void);
100static inline void wrapper_smp_local_timer_interrupt(struct pt_regs *);
101 100
102int hard_smp_processor_id(void); 101int hard_smp_processor_id(void);
103 102
@@ -126,6 +125,14 @@ send_QIC_CPI(__u32 cpuset, __u8 cpi)
126} 125}
127 126
128static inline void 127static inline void
128wrapper_smp_local_timer_interrupt(struct pt_regs *regs)
129{
130 irq_enter();
131 smp_local_timer_interrupt(regs);
132 irq_exit();
133}
134
135static inline void
129send_one_CPI(__u8 cpu, __u8 cpi) 136send_one_CPI(__u8 cpu, __u8 cpi)
130{ 137{
131 if(voyager_quad_processors & (1<<cpu)) 138 if(voyager_quad_processors & (1<<cpu))
@@ -1249,14 +1256,6 @@ smp_vic_timer_interrupt(struct pt_regs *regs)
1249 smp_local_timer_interrupt(regs); 1256 smp_local_timer_interrupt(regs);
1250} 1257}
1251 1258
1252static inline void
1253wrapper_smp_local_timer_interrupt(struct pt_regs *regs)
1254{
1255 irq_enter();
1256 smp_local_timer_interrupt(regs);
1257 irq_exit();
1258}
1259
1260/* local (per CPU) timer interrupt. It does both profiling and 1259/* local (per CPU) timer interrupt. It does both profiling and
1261 * process statistics/rescheduling. 1260 * process statistics/rescheduling.
1262 * 1261 *
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index db06f7399913..ab542792b27b 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -238,19 +238,21 @@ void iounmap(volatile void __iomem *addr)
238 addr < phys_to_virt(ISA_END_ADDRESS)) 238 addr < phys_to_virt(ISA_END_ADDRESS))
239 return; 239 return;
240 240
241 p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); 241 write_lock(&vmlist_lock);
242 p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
242 if (!p) { 243 if (!p) {
243 printk("__iounmap: bad address %p\n", addr); 244 printk("iounmap: bad address %p\n", addr);
244 return; 245 goto out_unlock;
245 } 246 }
246 247
247 if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { 248 if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
248 /* p->size includes the guard page, but cpa doesn't like that */
249 change_page_attr(virt_to_page(__va(p->phys_addr)), 249 change_page_attr(virt_to_page(__va(p->phys_addr)),
250 p->size >> PAGE_SHIFT, 250 p->size >> PAGE_SHIFT,
251 PAGE_KERNEL); 251 PAGE_KERNEL);
252 global_flush_tlb(); 252 global_flush_tlb();
253 } 253 }
254out_unlock:
255 write_unlock(&vmlist_lock);
254 kfree(p); 256 kfree(p);
255} 257}
256 258
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
index be52c5ac4e05..8e8e895e1b5a 100644
--- a/arch/i386/pci/fixup.c
+++ b/arch/i386/pci/fixup.c
@@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci
253#define MAX_PCIEROOT 6 253#define MAX_PCIEROOT 6
254static int quirk_aspm_offset[MAX_PCIEROOT << 3]; 254static int quirk_aspm_offset[MAX_PCIEROOT << 3];
255 255
256#define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b) 256#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7))
257 257
258static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) 258static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
259{ 259{
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index d6598da4b67b..da21b1d07c15 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -1029,7 +1029,6 @@ void pcibios_penalize_isa_irq(int irq)
1029static int pirq_enable_irq(struct pci_dev *dev) 1029static int pirq_enable_irq(struct pci_dev *dev)
1030{ 1030{
1031 u8 pin; 1031 u8 pin;
1032 extern int via_interrupt_line_quirk;
1033 struct pci_dev *temp_dev; 1032 struct pci_dev *temp_dev;
1034 1033
1035 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 1034 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
@@ -1084,10 +1083,6 @@ static int pirq_enable_irq(struct pci_dev *dev)
1084 printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", 1083 printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
1085 'A' + pin, pci_name(dev), msg); 1084 'A' + pin, pci_name(dev), msg);
1086 } 1085 }
1087 /* VIA bridges use interrupt line for apic/pci steering across
1088 the V-Link */
1089 else if (via_interrupt_line_quirk)
1090 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15);
1091 return 0; 1086 return 0;
1092} 1087}
1093 1088
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
index 9845dabe2613..164b211f4174 100644
--- a/arch/ia64/ia32/ia32_ioctl.c
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -13,7 +13,6 @@
13 13
14#define INCLUDES 14#define INCLUDES
15#include "compat_ioctl.c" 15#include "compat_ioctl.c"
16#include <asm/ioctl32.h>
17 16
18#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) 17#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
19 18
diff --git a/arch/mips/vr41xx/common/pmu.c b/arch/mips/vr41xx/common/pmu.c
index c5f1043de938..53166f3598b2 100644
--- a/arch/mips/vr41xx/common/pmu.c
+++ b/arch/mips/vr41xx/common/pmu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * pmu.c, Power Management Unit routines for NEC VR4100 series. 2 * pmu.c, Power Management Unit routines for NEC VR4100 series.
3 * 3 *
4 * Copyright (C) 2003-2004 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> 4 * Copyright (C) 2003-2005 Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -17,7 +17,9 @@
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */ 19 */
20#include <linux/errno.h>
20#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/ioport.h>
21#include <linux/kernel.h> 23#include <linux/kernel.h>
22#include <linux/smp.h> 24#include <linux/smp.h>
23#include <linux/types.h> 25#include <linux/types.h>
@@ -27,20 +29,31 @@
27#include <asm/reboot.h> 29#include <asm/reboot.h>
28#include <asm/system.h> 30#include <asm/system.h>
29 31
30#define PMUCNT2REG KSEG1ADDR(0x0f0000c6) 32#define PMU_TYPE1_BASE 0x0b0000a0UL
33#define PMU_TYPE1_SIZE 0x0eUL
34
35#define PMU_TYPE2_BASE 0x0f0000c0UL
36#define PMU_TYPE2_SIZE 0x10UL
37
38#define PMUCNT2REG 0x06
31 #define SOFTRST 0x0010 39 #define SOFTRST 0x0010
32 40
41static void __iomem *pmu_base;
42
43#define pmu_read(offset) readw(pmu_base + (offset))
44#define pmu_write(offset, value) writew((value), pmu_base + (offset))
45
33static inline void software_reset(void) 46static inline void software_reset(void)
34{ 47{
35 uint16_t val; 48 uint16_t pmucnt2;
36 49
37 switch (current_cpu_data.cputype) { 50 switch (current_cpu_data.cputype) {
38 case CPU_VR4122: 51 case CPU_VR4122:
39 case CPU_VR4131: 52 case CPU_VR4131:
40 case CPU_VR4133: 53 case CPU_VR4133:
41 val = readw(PMUCNT2REG); 54 pmucnt2 = pmu_read(PMUCNT2REG);
42 val |= SOFTRST; 55 pmucnt2 |= SOFTRST;
43 writew(val, PMUCNT2REG); 56 pmu_write(PMUCNT2REG, pmucnt2);
44 break; 57 break;
45 default: 58 default:
46 break; 59 break;
@@ -71,6 +84,34 @@ static void vr41xx_power_off(void)
71 84
72static int __init vr41xx_pmu_init(void) 85static int __init vr41xx_pmu_init(void)
73{ 86{
87 unsigned long start, size;
88
89 switch (current_cpu_data.cputype) {
90 case CPU_VR4111:
91 case CPU_VR4121:
92 start = PMU_TYPE1_BASE;
93 size = PMU_TYPE1_SIZE;
94 break;
95 case CPU_VR4122:
96 case CPU_VR4131:
97 case CPU_VR4133:
98 start = PMU_TYPE2_BASE;
99 size = PMU_TYPE2_SIZE;
100 break;
101 default:
102 printk("Unexpected CPU of NEC VR4100 series\n");
103 return -ENODEV;
104 }
105
106 if (request_mem_region(start, size, "PMU") == NULL)
107 return -EBUSY;
108
109 pmu_base = ioremap(start, size);
110 if (pmu_base == NULL) {
111 release_mem_region(start, size);
112 return -EBUSY;
113 }
114
74 _machine_restart = vr41xx_restart; 115 _machine_restart = vr41xx_restart;
75 _machine_halt = vr41xx_halt; 116 _machine_halt = vr41xx_halt;
76 _machine_power_off = vr41xx_power_off; 117 _machine_power_off = vr41xx_power_off;
@@ -78,4 +119,4 @@ static int __init vr41xx_pmu_init(void)
78 return 0; 119 return 0;
79} 120}
80 121
81early_initcall(vr41xx_pmu_init); 122core_initcall(vr41xx_pmu_init);
diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S
index 9b6a8e513657..6c7ae6052464 100644
--- a/arch/ppc/kernel/head_44x.S
+++ b/arch/ppc/kernel/head_44x.S
@@ -330,8 +330,9 @@ interrupt_base:
330 /* If we are faulting a kernel address, we have to use the 330 /* If we are faulting a kernel address, we have to use the
331 * kernel page tables. 331 * kernel page tables.
332 */ 332 */
333 andis. r11, r10, 0x8000 333 lis r11, TASK_SIZE@h
334 beq 3f 334 cmplw r10, r11
335 blt+ 3f
335 lis r11, swapper_pg_dir@h 336 lis r11, swapper_pg_dir@h
336 ori r11, r11, swapper_pg_dir@l 337 ori r11, r11, swapper_pg_dir@l
337 338
@@ -464,8 +465,9 @@ interrupt_base:
464 /* If we are faulting a kernel address, we have to use the 465 /* If we are faulting a kernel address, we have to use the
465 * kernel page tables. 466 * kernel page tables.
466 */ 467 */
467 andis. r11, r10, 0x8000 468 lis r11, TASK_SIZE@h
468 beq 3f 469 cmplw r10, r11
470 blt+ 3f
469 lis r11, swapper_pg_dir@h 471 lis r11, swapper_pg_dir@h
470 ori r11, r11, swapper_pg_dir@l 472 ori r11, r11, swapper_pg_dir@l
471 473
@@ -533,8 +535,9 @@ interrupt_base:
533 /* If we are faulting a kernel address, we have to use the 535 /* If we are faulting a kernel address, we have to use the
534 * kernel page tables. 536 * kernel page tables.
535 */ 537 */
536 andis. r11, r10, 0x8000 538 lis r11, TASK_SIZE@h
537 beq 3f 539 cmplw r10, r11
540 blt+ 3f
538 lis r11, swapper_pg_dir@h 541 lis r11, swapper_pg_dir@h
539 ori r11, r11, swapper_pg_dir@l 542 ori r11, r11, swapper_pg_dir@l
540 543
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 5dfb42f1a152..5c20266e3b1f 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -499,7 +499,7 @@ static int __init set_preferred_console(void)
499{ 499{
500 struct device_node *prom_stdout; 500 struct device_node *prom_stdout;
501 char *name; 501 char *name;
502 int offset; 502 int offset = 0;
503 503
504 if (of_stdout_device == NULL) 504 if (of_stdout_device == NULL)
505 return -ENODEV; 505 return -ENODEV;
@@ -753,6 +753,8 @@ void __init setup_arch(char **cmdline_p)
753 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); 753 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
754 *cmdline_p = cmd_line; 754 *cmdline_p = cmd_line;
755 755
756 parse_early_param();
757
756 /* set up the bootmem stuff with available memory */ 758 /* set up the bootmem stuff with available memory */
757 do_init_bootmem(); 759 do_init_bootmem();
758 if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); 760 if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S
index 8d08a2eb225e..36c9b97fd92a 100644
--- a/arch/ppc/lib/string.S
+++ b/arch/ppc/lib/string.S
@@ -446,6 +446,7 @@ _GLOBAL(__copy_tofrom_user)
446#ifdef CONFIG_8xx 446#ifdef CONFIG_8xx
447 /* Don't use prefetch on 8xx */ 447 /* Don't use prefetch on 8xx */
448 mtctr r0 448 mtctr r0
449 li r0,0
44953: COPY_16_BYTES_WITHEX(0) 45053: COPY_16_BYTES_WITHEX(0)
450 bdnz 53b 451 bdnz 53b
451 452
@@ -564,7 +565,9 @@ _GLOBAL(__copy_tofrom_user)
564/* or write fault in cacheline loop */ 565/* or write fault in cacheline loop */
565105: li r9,1 566105: li r9,1
56692: li r3,LG_CACHELINE_BYTES 56792: li r3,LG_CACHELINE_BYTES
567 b 99f 568 mfctr r8
569 add r0,r0,r8
570 b 106f
568/* read fault in final word loop */ 571/* read fault in final word loop */
569108: li r9,0 572108: li r9,0
570 b 93f 573 b 93f
@@ -585,7 +588,7 @@ _GLOBAL(__copy_tofrom_user)
585 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 588 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
586 */ 589 */
58799: mfctr r0 59099: mfctr r0
588 slw r3,r0,r3 591106: slw r3,r0,r3
589 add. r3,r3,r5 592 add. r3,r3,r5
590 beq 120f /* shouldn't happen */ 593 beq 120f /* shouldn't happen */
591 cmpwi 0,r9,0 594 cmpwi 0,r9,0
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
index be02a7fec2b7..363c157e3617 100644
--- a/arch/ppc/mm/init.c
+++ b/arch/ppc/mm/init.c
@@ -179,6 +179,7 @@ void free_initmem(void)
179 if (!have_of) 179 if (!have_of)
180 FREESEC(openfirmware); 180 FREESEC(openfirmware);
181 printk("\n"); 181 printk("\n");
182 ppc_md.progress = NULL;
182#undef FREESEC 183#undef FREESEC
183} 184}
184 185
diff --git a/arch/ppc/syslib/ipic.c b/arch/ppc/syslib/ipic.c
index acb2cde3171f..580ed658e872 100644
--- a/arch/ppc/syslib/ipic.c
+++ b/arch/ppc/syslib/ipic.c
@@ -479,7 +479,7 @@ void __init ipic_init(phys_addr_t phys_addr,
479 temp = 0; 479 temp = 0;
480 for (i = 0 ; i < senses_count ; i++) { 480 for (i = 0 ; i < senses_count ; i++) {
481 if ((senses[i] & IRQ_SENSE_MASK) == IRQ_SENSE_EDGE) { 481 if ((senses[i] & IRQ_SENSE_MASK) == IRQ_SENSE_EDGE) {
482 temp |= 1 << (16 - i); 482 temp |= 1 << (15 - i);
483 if (i != 0) 483 if (i != 0)
484 irq_desc[i + irq_offset + MPC83xx_IRQ_EXT1 - 1].status = 0; 484 irq_desc[i + irq_offset + MPC83xx_IRQ_EXT1 - 1].status = 0;
485 else 485 else
diff --git a/arch/ppc/syslib/mpc83xx_devices.c b/arch/ppc/syslib/mpc83xx_devices.c
index 5c1a919eaabf..75c8e9834ae7 100644
--- a/arch/ppc/syslib/mpc83xx_devices.c
+++ b/arch/ppc/syslib/mpc83xx_devices.c
@@ -61,6 +61,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
61 .iotype = UPIO_MEM, 61 .iotype = UPIO_MEM,
62 .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, 62 .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
63 }, 63 },
64 { },
64}; 65};
65 66
66struct platform_device ppc_sys_platform_devices[] = { 67struct platform_device ppc_sys_platform_devices[] = {
diff --git a/arch/ppc/syslib/mpc85xx_devices.c b/arch/ppc/syslib/mpc85xx_devices.c
index a231795ee26f..1e658ef57e75 100644
--- a/arch/ppc/syslib/mpc85xx_devices.c
+++ b/arch/ppc/syslib/mpc85xx_devices.c
@@ -61,6 +61,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
61 .iotype = UPIO_MEM, 61 .iotype = UPIO_MEM,
62 .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ, 62 .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ,
63 }, 63 },
64 { },
64}; 65};
65 66
66struct platform_device ppc_sys_platform_devices[] = { 67struct platform_device ppc_sys_platform_devices[] = {
diff --git a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c
index 7619e16fccae..9d4ed68b5804 100644
--- a/arch/ppc/syslib/open_pic.c
+++ b/arch/ppc/syslib/open_pic.c
@@ -557,12 +557,10 @@ static void __init openpic_initipi(u_int ipi, u_int pri, u_int vec)
557 */ 557 */
558void openpic_cause_IPI(u_int ipi, cpumask_t cpumask) 558void openpic_cause_IPI(u_int ipi, cpumask_t cpumask)
559{ 559{
560 cpumask_t phys;
561 DECL_THIS_CPU; 560 DECL_THIS_CPU;
562 561
563 CHECK_THIS_CPU; 562 CHECK_THIS_CPU;
564 check_arg_ipi(ipi); 563 check_arg_ipi(ipi);
565 phys = physmask(cpumask);
566 openpic_write(&OpenPIC->THIS_CPU.IPI_Dispatch(ipi), 564 openpic_write(&OpenPIC->THIS_CPU.IPI_Dispatch(ipi),
567 cpus_addr(physmask(cpumask))[0]); 565 cpus_addr(physmask(cpumask))[0]);
568} 566}
diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c
index 1bd52ece497c..5aca7e8005a8 100644
--- a/arch/ppc64/kernel/mf.c
+++ b/arch/ppc64/kernel/mf.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * mf.c 2 * mf.c
3 * Copyright (C) 2001 Troy D. Armstrong IBM Corporation 3 * Copyright (C) 2001 Troy D. Armstrong IBM Corporation
4 * Copyright (C) 2004 Stephen Rothwell IBM Corporation 4 * Copyright (C) 2004-2005 Stephen Rothwell IBM Corporation
5 * 5 *
6 * This modules exists as an interface between a Linux secondary partition 6 * This modules exists as an interface between a Linux secondary partition
7 * running on an iSeries and the primary partition's Virtual Service 7 * running on an iSeries and the primary partition's Virtual Service
@@ -36,10 +36,12 @@
36 36
37#include <asm/time.h> 37#include <asm/time.h>
38#include <asm/uaccess.h> 38#include <asm/uaccess.h>
39#include <asm/paca.h>
39#include <asm/iSeries/vio.h> 40#include <asm/iSeries/vio.h>
40#include <asm/iSeries/mf.h> 41#include <asm/iSeries/mf.h>
41#include <asm/iSeries/HvLpConfig.h> 42#include <asm/iSeries/HvLpConfig.h>
42#include <asm/iSeries/ItSpCommArea.h> 43#include <asm/iSeries/ItSpCommArea.h>
44#include <asm/iSeries/ItLpQueue.h>
43 45
44/* 46/*
45 * This is the structure layout for the Machine Facilites LPAR event 47 * This is the structure layout for the Machine Facilites LPAR event
@@ -696,36 +698,23 @@ static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
696 complete(&rtc->com); 698 complete(&rtc->com);
697} 699}
698 700
699int mf_get_rtc(struct rtc_time *tm) 701static int rtc_set_tm(int rc, u8 *ce_msg, struct rtc_time *tm)
700{ 702{
701 struct ce_msg_comp_data ce_complete;
702 struct rtc_time_data rtc_data;
703 int rc;
704
705 memset(&ce_complete, 0, sizeof(ce_complete));
706 memset(&rtc_data, 0, sizeof(rtc_data));
707 init_completion(&rtc_data.com);
708 ce_complete.handler = &get_rtc_time_complete;
709 ce_complete.token = &rtc_data;
710 rc = signal_ce_msg_simple(0x40, &ce_complete);
711 if (rc)
712 return rc;
713 wait_for_completion(&rtc_data.com);
714 tm->tm_wday = 0; 703 tm->tm_wday = 0;
715 tm->tm_yday = 0; 704 tm->tm_yday = 0;
716 tm->tm_isdst = 0; 705 tm->tm_isdst = 0;
717 if (rtc_data.rc) { 706 if (rc) {
718 tm->tm_sec = 0; 707 tm->tm_sec = 0;
719 tm->tm_min = 0; 708 tm->tm_min = 0;
720 tm->tm_hour = 0; 709 tm->tm_hour = 0;
721 tm->tm_mday = 15; 710 tm->tm_mday = 15;
722 tm->tm_mon = 5; 711 tm->tm_mon = 5;
723 tm->tm_year = 52; 712 tm->tm_year = 52;
724 return rtc_data.rc; 713 return rc;
725 } 714 }
726 715
727 if ((rtc_data.ce_msg.ce_msg[2] == 0xa9) || 716 if ((ce_msg[2] == 0xa9) ||
728 (rtc_data.ce_msg.ce_msg[2] == 0xaf)) { 717 (ce_msg[2] == 0xaf)) {
729 /* TOD clock is not set */ 718 /* TOD clock is not set */
730 tm->tm_sec = 1; 719 tm->tm_sec = 1;
731 tm->tm_min = 1; 720 tm->tm_min = 1;
@@ -736,7 +725,6 @@ int mf_get_rtc(struct rtc_time *tm)
736 mf_set_rtc(tm); 725 mf_set_rtc(tm);
737 } 726 }
738 { 727 {
739 u8 *ce_msg = rtc_data.ce_msg.ce_msg;
740 u8 year = ce_msg[5]; 728 u8 year = ce_msg[5];
741 u8 sec = ce_msg[6]; 729 u8 sec = ce_msg[6];
742 u8 min = ce_msg[7]; 730 u8 min = ce_msg[7];
@@ -765,6 +753,63 @@ int mf_get_rtc(struct rtc_time *tm)
765 return 0; 753 return 0;
766} 754}
767 755
756int mf_get_rtc(struct rtc_time *tm)
757{
758 struct ce_msg_comp_data ce_complete;
759 struct rtc_time_data rtc_data;
760 int rc;
761
762 memset(&ce_complete, 0, sizeof(ce_complete));
763 memset(&rtc_data, 0, sizeof(rtc_data));
764 init_completion(&rtc_data.com);
765 ce_complete.handler = &get_rtc_time_complete;
766 ce_complete.token = &rtc_data;
767 rc = signal_ce_msg_simple(0x40, &ce_complete);
768 if (rc)
769 return rc;
770 wait_for_completion(&rtc_data.com);
771 return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
772}
773
774struct boot_rtc_time_data {
775 int busy;
776 struct ce_msg_data ce_msg;
777 int rc;
778};
779
780static void get_boot_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
781{
782 struct boot_rtc_time_data *rtc = token;
783
784 memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg));
785 rtc->rc = 0;
786 rtc->busy = 0;
787}
788
789int mf_get_boot_rtc(struct rtc_time *tm)
790{
791 struct ce_msg_comp_data ce_complete;
792 struct boot_rtc_time_data rtc_data;
793 int rc;
794
795 memset(&ce_complete, 0, sizeof(ce_complete));
796 memset(&rtc_data, 0, sizeof(rtc_data));
797 rtc_data.busy = 1;
798 ce_complete.handler = &get_boot_rtc_time_complete;
799 ce_complete.token = &rtc_data;
800 rc = signal_ce_msg_simple(0x40, &ce_complete);
801 if (rc)
802 return rc;
803 /* We need to poll here as we are not yet taking interrupts */
804 while (rtc_data.busy) {
805 extern unsigned long lpevent_count;
806 struct ItLpQueue *lpq = get_paca()->lpqueue_ptr;
807 if (lpq && ItLpQueue_isLpIntPending(lpq))
808 lpevent_count += ItLpQueue_process(lpq, NULL);
809 }
810 return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
811}
812
768int mf_set_rtc(struct rtc_time *tm) 813int mf_set_rtc(struct rtc_time *tm)
769{ 814{
770 char ce_time[12]; 815 char ce_time[12];
diff --git a/arch/ppc64/kernel/pmac_smp.c b/arch/ppc64/kernel/pmac_smp.c
index c27588ede2fe..a23de37227bf 100644
--- a/arch/ppc64/kernel/pmac_smp.c
+++ b/arch/ppc64/kernel/pmac_smp.c
@@ -68,6 +68,7 @@ extern struct smp_ops_t *smp_ops;
68 68
69static void (*pmac_tb_freeze)(int freeze); 69static void (*pmac_tb_freeze)(int freeze);
70static struct device_node *pmac_tb_clock_chip_host; 70static struct device_node *pmac_tb_clock_chip_host;
71static u8 pmac_tb_pulsar_addr;
71static DEFINE_SPINLOCK(timebase_lock); 72static DEFINE_SPINLOCK(timebase_lock);
72static unsigned long timebase; 73static unsigned long timebase;
73 74
@@ -106,12 +107,9 @@ static void smp_core99_pulsar_tb_freeze(int freeze)
106 u8 data; 107 u8 data;
107 int rc; 108 int rc;
108 109
109 /* Strangely, the device-tree says address is 0xd2, but darwin
110 * accesses 0xd0 ...
111 */
112 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined); 110 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined);
113 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, 111 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
114 0xd4 | pmac_low_i2c_read, 112 pmac_tb_pulsar_addr | pmac_low_i2c_read,
115 0x2e, &data, 1); 113 0x2e, &data, 1);
116 if (rc != 0) 114 if (rc != 0)
117 goto bail; 115 goto bail;
@@ -120,7 +118,7 @@ static void smp_core99_pulsar_tb_freeze(int freeze)
120 118
121 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub); 119 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub);
122 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, 120 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
123 0xd4 | pmac_low_i2c_write, 121 pmac_tb_pulsar_addr | pmac_low_i2c_write,
124 0x2e, &data, 1); 122 0x2e, &data, 1);
125 bail: 123 bail:
126 if (rc != 0) { 124 if (rc != 0) {
@@ -185,6 +183,12 @@ static int __init smp_core99_probe(void)
185 if (ncpus <= 1) 183 if (ncpus <= 1)
186 return 1; 184 return 1;
187 185
186 /* HW sync only on these platforms */
187 if (!machine_is_compatible("PowerMac7,2") &&
188 !machine_is_compatible("PowerMac7,3") &&
189 !machine_is_compatible("RackMac3,1"))
190 goto nohwsync;
191
188 /* Look for the clock chip */ 192 /* Look for the clock chip */
189 for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) { 193 for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) {
190 struct device_node *p = of_get_parent(cc); 194 struct device_node *p = of_get_parent(cc);
@@ -198,11 +202,18 @@ static int __init smp_core99_probe(void)
198 goto next; 202 goto next;
199 switch (*reg) { 203 switch (*reg) {
200 case 0xd2: 204 case 0xd2:
201 pmac_tb_freeze = smp_core99_cypress_tb_freeze; 205 if (device_is_compatible(cc, "pulsar-legacy-slewing")) {
202 printk(KERN_INFO "Timebase clock is Cypress chip\n"); 206 pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
207 pmac_tb_pulsar_addr = 0xd2;
208 printk(KERN_INFO "Timebase clock is Pulsar chip\n");
209 } else if (device_is_compatible(cc, "cy28508")) {
210 pmac_tb_freeze = smp_core99_cypress_tb_freeze;
211 printk(KERN_INFO "Timebase clock is Cypress chip\n");
212 }
203 break; 213 break;
204 case 0xd4: 214 case 0xd4:
205 pmac_tb_freeze = smp_core99_pulsar_tb_freeze; 215 pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
216 pmac_tb_pulsar_addr = 0xd4;
206 printk(KERN_INFO "Timebase clock is Pulsar chip\n"); 217 printk(KERN_INFO "Timebase clock is Pulsar chip\n");
207 break; 218 break;
208 } 219 }
@@ -210,12 +221,15 @@ static int __init smp_core99_probe(void)
210 pmac_tb_clock_chip_host = p; 221 pmac_tb_clock_chip_host = p;
211 smp_ops->give_timebase = smp_core99_give_timebase; 222 smp_ops->give_timebase = smp_core99_give_timebase;
212 smp_ops->take_timebase = smp_core99_take_timebase; 223 smp_ops->take_timebase = smp_core99_take_timebase;
224 of_node_put(cc);
225 of_node_put(p);
213 break; 226 break;
214 } 227 }
215 next: 228 next:
216 of_node_put(p); 229 of_node_put(p);
217 } 230 }
218 231
232 nohwsync:
219 mpic_request_ipis(); 233 mpic_request_ipis();
220 234
221 return ncpus; 235 return ncpus;
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
index 35ec42de962e..6f79b7b9b445 100644
--- a/arch/ppc64/kernel/prom_init.c
+++ b/arch/ppc64/kernel/prom_init.c
@@ -1750,7 +1750,44 @@ static void __init flatten_device_tree(void)
1750 prom_printf("Device tree struct 0x%x -> 0x%x\n", 1750 prom_printf("Device tree struct 0x%x -> 0x%x\n",
1751 RELOC(dt_struct_start), RELOC(dt_struct_end)); 1751 RELOC(dt_struct_start), RELOC(dt_struct_end));
1752 1752
1753 } 1753}
1754
1755
1756static void __init fixup_device_tree(void)
1757{
1758 unsigned long offset = reloc_offset();
1759 phandle u3, i2c, mpic;
1760 u32 u3_rev;
1761 u32 interrupts[2];
1762 u32 parent;
1763
1764 /* Some G5s have a missing interrupt definition, fix it up here */
1765 u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000"));
1766 if ((long)u3 <= 0)
1767 return;
1768 i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000"));
1769 if ((long)i2c <= 0)
1770 return;
1771 mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000"));
1772 if ((long)mpic <= 0)
1773 return;
1774
1775 /* check if proper rev of u3 */
1776 if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) <= 0)
1777 return;
1778 if (u3_rev != 0x35)
1779 return;
1780 /* does it need fixup ? */
1781 if (prom_getproplen(i2c, "interrupts") > 0)
1782 return;
1783 /* interrupt on this revision of u3 is number 0 and level */
1784 interrupts[0] = 0;
1785 interrupts[1] = 1;
1786 prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
1787 parent = (u32)mpic;
1788 prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
1789}
1790
1754 1791
1755static void __init prom_find_boot_cpu(void) 1792static void __init prom_find_boot_cpu(void)
1756{ 1793{
@@ -1920,6 +1957,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long
1920 } 1957 }
1921 1958
1922 /* 1959 /*
1960 * Fixup any known bugs in the device-tree
1961 */
1962 fixup_device_tree();
1963
1964 /*
1923 * Now finally create the flattened device-tree 1965 * Now finally create the flattened device-tree
1924 */ 1966 */
1925 prom_printf("copying OF device tree ...\n"); 1967 prom_printf("copying OF device tree ...\n");
diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c
index 3e70b91375fc..67989055a9fe 100644
--- a/arch/ppc64/kernel/rtc.c
+++ b/arch/ppc64/kernel/rtc.c
@@ -292,47 +292,10 @@ int iSeries_set_rtc_time(struct rtc_time *tm)
292 292
293void iSeries_get_boot_time(struct rtc_time *tm) 293void iSeries_get_boot_time(struct rtc_time *tm)
294{ 294{
295 unsigned long time;
296 static unsigned long lastsec = 1;
297
298 u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart));
299 u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1);
300 int year = 1970;
301 int year1 = ( dataWord1 >> 24 ) & 0x000000FF;
302 int year2 = ( dataWord1 >> 16 ) & 0x000000FF;
303 int sec = ( dataWord1 >> 8 ) & 0x000000FF;
304 int min = dataWord1 & 0x000000FF;
305 int hour = ( dataWord2 >> 24 ) & 0x000000FF;
306 int day = ( dataWord2 >> 8 ) & 0x000000FF;
307 int mon = dataWord2 & 0x000000FF;
308
309 if ( piranha_simulator ) 295 if ( piranha_simulator )
310 return; 296 return;
311 297
312 BCD_TO_BIN(sec); 298 mf_get_boot_rtc(tm);
313 BCD_TO_BIN(min);
314 BCD_TO_BIN(hour);
315 BCD_TO_BIN(day);
316 BCD_TO_BIN(mon);
317 BCD_TO_BIN(year1);
318 BCD_TO_BIN(year2);
319 year = year1 * 100 + year2;
320
321 time = mktime(year, mon, day, hour, min, sec);
322 time += ( jiffies / HZ );
323
324 /* Now THIS is a nasty hack!
325 * It ensures that the first two calls get different answers.
326 * That way the loop in init_time (time.c) will not think
327 * the clock is stuck.
328 */
329 if ( lastsec ) {
330 time -= lastsec;
331 --lastsec;
332 }
333
334 to_tm(time, tm);
335 tm->tm_year -= 1900;
336 tm->tm_mon -= 1; 299 tm->tm_mon -= 1;
337} 300}
338#endif 301#endif
diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c
index 772a465b49f9..3d54745108c7 100644
--- a/arch/ppc64/kernel/time.c
+++ b/arch/ppc64/kernel/time.c
@@ -515,6 +515,7 @@ void __init time_init(void)
515 do_gtod.varp = &do_gtod.vars[0]; 515 do_gtod.varp = &do_gtod.vars[0];
516 do_gtod.var_idx = 0; 516 do_gtod.var_idx = 0;
517 do_gtod.varp->tb_orig_stamp = tb_last_stamp; 517 do_gtod.varp->tb_orig_stamp = tb_last_stamp;
518 get_paca()->next_jiffy_update_tb = tb_last_stamp + tb_ticks_per_jiffy;
518 do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; 519 do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
519 do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; 520 do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
520 do_gtod.varp->tb_to_xs = tb_to_xs; 521 do_gtod.varp->tb_to_xs = tb_to_xs;
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index 292983413ae2..33ca56c90da2 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -8,6 +8,7 @@
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/delay.h>
11 12
12#include <asm/pbm.h> 13#include <asm/pbm.h>
13 14
@@ -379,6 +380,56 @@ bad:
379 return PCI_DMA_ERROR_CODE; 380 return PCI_DMA_ERROR_CODE;
380} 381}
381 382
383static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages)
384{
385 int limit;
386
387 PCI_STC_FLUSHFLAG_INIT(strbuf);
388 if (strbuf->strbuf_ctxflush &&
389 iommu->iommu_ctxflush) {
390 unsigned long matchreg, flushreg;
391
392 flushreg = strbuf->strbuf_ctxflush;
393 matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
394
395 limit = 100000;
396 pci_iommu_write(flushreg, ctx);
397 for(;;) {
398 if (((long)pci_iommu_read(matchreg)) >= 0L)
399 break;
400 limit--;
401 if (!limit)
402 break;
403 udelay(1);
404 }
405 if (!limit)
406 printk(KERN_WARNING "pci_strbuf_flush: ctx flush "
407 "timeout vaddr[%08x] ctx[%lx]\n",
408 vaddr, ctx);
409 } else {
410 unsigned long i;
411
412 for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
413 pci_iommu_write(strbuf->strbuf_pflush, vaddr);
414 }
415
416 pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
417 (void) pci_iommu_read(iommu->write_complete_reg);
418
419 limit = 100000;
420 while (!PCI_STC_FLUSHFLAG_SET(strbuf)) {
421 limit--;
422 if (!limit)
423 break;
424 udelay(1);
425 membar("#LoadLoad");
426 }
427 if (!limit)
428 printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
429 "vaddr[%08x] ctx[%lx] npages[%ld]\n",
430 vaddr, ctx, npages);
431}
432
382/* Unmap a single streaming mode DMA translation. */ 433/* Unmap a single streaming mode DMA translation. */
383void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) 434void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
384{ 435{
@@ -386,7 +437,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
386 struct pci_iommu *iommu; 437 struct pci_iommu *iommu;
387 struct pci_strbuf *strbuf; 438 struct pci_strbuf *strbuf;
388 iopte_t *base; 439 iopte_t *base;
389 unsigned long flags, npages, i, ctx; 440 unsigned long flags, npages, ctx;
390 441
391 if (direction == PCI_DMA_NONE) 442 if (direction == PCI_DMA_NONE)
392 BUG(); 443 BUG();
@@ -414,29 +465,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
414 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; 465 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
415 466
416 /* Step 1: Kick data out of streaming buffers if necessary. */ 467 /* Step 1: Kick data out of streaming buffers if necessary. */
417 if (strbuf->strbuf_enabled) { 468 if (strbuf->strbuf_enabled)
418 u32 vaddr = bus_addr; 469 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
419
420 PCI_STC_FLUSHFLAG_INIT(strbuf);
421 if (strbuf->strbuf_ctxflush &&
422 iommu->iommu_ctxflush) {
423 unsigned long matchreg, flushreg;
424
425 flushreg = strbuf->strbuf_ctxflush;
426 matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
427 do {
428 pci_iommu_write(flushreg, ctx);
429 } while(((long)pci_iommu_read(matchreg)) < 0L);
430 } else {
431 for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
432 pci_iommu_write(strbuf->strbuf_pflush, vaddr);
433 }
434
435 pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
436 (void) pci_iommu_read(iommu->write_complete_reg);
437 while (!PCI_STC_FLUSHFLAG_SET(strbuf))
438 membar("#LoadLoad");
439 }
440 470
441 /* Step 2: Clear out first TSB entry. */ 471 /* Step 2: Clear out first TSB entry. */
442 iopte_make_dummy(iommu, base); 472 iopte_make_dummy(iommu, base);
@@ -647,29 +677,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
647 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; 677 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
648 678
649 /* Step 1: Kick data out of streaming buffers if necessary. */ 679 /* Step 1: Kick data out of streaming buffers if necessary. */
650 if (strbuf->strbuf_enabled) { 680 if (strbuf->strbuf_enabled)
651 u32 vaddr = (u32) bus_addr; 681 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
652
653 PCI_STC_FLUSHFLAG_INIT(strbuf);
654 if (strbuf->strbuf_ctxflush &&
655 iommu->iommu_ctxflush) {
656 unsigned long matchreg, flushreg;
657
658 flushreg = strbuf->strbuf_ctxflush;
659 matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
660 do {
661 pci_iommu_write(flushreg, ctx);
662 } while(((long)pci_iommu_read(matchreg)) < 0L);
663 } else {
664 for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
665 pci_iommu_write(strbuf->strbuf_pflush, vaddr);
666 }
667
668 pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
669 (void) pci_iommu_read(iommu->write_complete_reg);
670 while (!PCI_STC_FLUSHFLAG_SET(strbuf))
671 membar("#LoadLoad");
672 }
673 682
674 /* Step 2: Clear out first TSB entry. */ 683 /* Step 2: Clear out first TSB entry. */
675 iopte_make_dummy(iommu, base); 684 iopte_make_dummy(iommu, base);
@@ -715,28 +724,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size
715 } 724 }
716 725
717 /* Step 2: Kick data out of streaming buffers. */ 726 /* Step 2: Kick data out of streaming buffers. */
718 PCI_STC_FLUSHFLAG_INIT(strbuf); 727 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
719 if (iommu->iommu_ctxflush &&
720 strbuf->strbuf_ctxflush) {
721 unsigned long matchreg, flushreg;
722
723 flushreg = strbuf->strbuf_ctxflush;
724 matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
725 do {
726 pci_iommu_write(flushreg, ctx);
727 } while(((long)pci_iommu_read(matchreg)) < 0L);
728 } else {
729 unsigned long i;
730
731 for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE)
732 pci_iommu_write(strbuf->strbuf_pflush, bus_addr);
733 }
734
735 /* Step 3: Perform flush synchronization sequence. */
736 pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
737 (void) pci_iommu_read(iommu->write_complete_reg);
738 while (!PCI_STC_FLUSHFLAG_SET(strbuf))
739 membar("#LoadLoad");
740 728
741 spin_unlock_irqrestore(&iommu->lock, flags); 729 spin_unlock_irqrestore(&iommu->lock, flags);
742} 730}
@@ -749,7 +737,8 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
749 struct pcidev_cookie *pcp; 737 struct pcidev_cookie *pcp;
750 struct pci_iommu *iommu; 738 struct pci_iommu *iommu;
751 struct pci_strbuf *strbuf; 739 struct pci_strbuf *strbuf;
752 unsigned long flags, ctx; 740 unsigned long flags, ctx, npages, i;
741 u32 bus_addr;
753 742
754 pcp = pdev->sysdata; 743 pcp = pdev->sysdata;
755 iommu = pcp->pbm->iommu; 744 iommu = pcp->pbm->iommu;
@@ -772,36 +761,14 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
772 } 761 }
773 762
774 /* Step 2: Kick data out of streaming buffers. */ 763 /* Step 2: Kick data out of streaming buffers. */
775 PCI_STC_FLUSHFLAG_INIT(strbuf); 764 bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
776 if (iommu->iommu_ctxflush && 765 for(i = 1; i < nelems; i++)
777 strbuf->strbuf_ctxflush) { 766 if (!sglist[i].dma_length)
778 unsigned long matchreg, flushreg; 767 break;
779 768 i--;
780 flushreg = strbuf->strbuf_ctxflush; 769 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
781 matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); 770 - bus_addr) >> IO_PAGE_SHIFT;
782 do { 771 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
783 pci_iommu_write(flushreg, ctx);
784 } while (((long)pci_iommu_read(matchreg)) < 0L);
785 } else {
786 unsigned long i, npages;
787 u32 bus_addr;
788
789 bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
790
791 for(i = 1; i < nelems; i++)
792 if (!sglist[i].dma_length)
793 break;
794 i--;
795 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT;
796 for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE)
797 pci_iommu_write(strbuf->strbuf_pflush, bus_addr);
798 }
799
800 /* Step 3: Perform flush synchronization sequence. */
801 pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
802 (void) pci_iommu_read(iommu->write_complete_reg);
803 while (!PCI_STC_FLUSHFLAG_SET(strbuf))
804 membar("#LoadLoad");
805 772
806 spin_unlock_irqrestore(&iommu->lock, flags); 773 spin_unlock_irqrestore(&iommu->lock, flags);
807} 774}
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index 14d9c3a21b9a..76ea6455433f 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -117,19 +117,34 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages
117 117
118#define STRBUF_TAG_VALID 0x02UL 118#define STRBUF_TAG_VALID 0x02UL
119 119
120static void strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) 120static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
121{ 121{
122 unsigned long n;
123 int limit;
124
122 iommu->strbuf_flushflag = 0UL; 125 iommu->strbuf_flushflag = 0UL;
123 while (npages--) 126 n = npages;
124 upa_writeq(base + (npages << IO_PAGE_SHIFT), 127 while (n--)
128 upa_writeq(base + (n << IO_PAGE_SHIFT),
125 iommu->strbuf_regs + STRBUF_PFLUSH); 129 iommu->strbuf_regs + STRBUF_PFLUSH);
126 130
127 /* Whoopee cushion! */ 131 /* Whoopee cushion! */
128 upa_writeq(__pa(&iommu->strbuf_flushflag), 132 upa_writeq(__pa(&iommu->strbuf_flushflag),
129 iommu->strbuf_regs + STRBUF_FSYNC); 133 iommu->strbuf_regs + STRBUF_FSYNC);
130 upa_readq(iommu->sbus_control_reg); 134 upa_readq(iommu->sbus_control_reg);
131 while (iommu->strbuf_flushflag == 0UL) 135
136 limit = 100000;
137 while (iommu->strbuf_flushflag == 0UL) {
138 limit--;
139 if (!limit)
140 break;
141 udelay(1);
132 membar("#LoadLoad"); 142 membar("#LoadLoad");
143 }
144 if (!limit)
145 printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout "
146 "vaddr[%08x] npages[%ld]\n",
147 base, npages);
133} 148}
134 149
135static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) 150static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages)
@@ -406,7 +421,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size,
406 421
407 spin_lock_irqsave(&iommu->lock, flags); 422 spin_lock_irqsave(&iommu->lock, flags);
408 free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); 423 free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
409 strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT); 424 sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT);
410 spin_unlock_irqrestore(&iommu->lock, flags); 425 spin_unlock_irqrestore(&iommu->lock, flags);
411} 426}
412 427
@@ -569,7 +584,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int
569 iommu = sdev->bus->iommu; 584 iommu = sdev->bus->iommu;
570 spin_lock_irqsave(&iommu->lock, flags); 585 spin_lock_irqsave(&iommu->lock, flags);
571 free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); 586 free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
572 strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT); 587 sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT);
573 spin_unlock_irqrestore(&iommu->lock, flags); 588 spin_unlock_irqrestore(&iommu->lock, flags);
574} 589}
575 590
@@ -581,7 +596,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t
581 size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); 596 size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));
582 597
583 spin_lock_irqsave(&iommu->lock, flags); 598 spin_lock_irqsave(&iommu->lock, flags);
584 strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT); 599 sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT);
585 spin_unlock_irqrestore(&iommu->lock, flags); 600 spin_unlock_irqrestore(&iommu->lock, flags);
586} 601}
587 602
@@ -605,7 +620,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int
605 size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; 620 size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;
606 621
607 spin_lock_irqsave(&iommu->lock, flags); 622 spin_lock_irqsave(&iommu->lock, flags);
608 strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT); 623 sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT);
609 spin_unlock_irqrestore(&iommu->lock, flags); 624 spin_unlock_irqrestore(&iommu->lock, flags);
610} 625}
611 626
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 12c3d84b7460..b7e6a91952b2 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -383,6 +383,17 @@ static void __init process_switch(char c)
383 /* Use PROM debug console. */ 383 /* Use PROM debug console. */
384 register_console(&prom_debug_console); 384 register_console(&prom_debug_console);
385 break; 385 break;
386 case 'P':
387 /* Force UltraSPARC-III P-Cache on. */
388 if (tlb_type != cheetah) {
389 printk("BOOT: Ignoring P-Cache force option.\n");
390 break;
391 }
392 cheetah_pcache_forced_on = 1;
393 add_taint(TAINT_MACHINE_CHECK);
394 cheetah_enable_pcache();
395 break;
396
386 default: 397 default:
387 printk("Unknown boot switch (-%c)\n", c); 398 printk("Unknown boot switch (-%c)\n", c);
388 break; 399 break;
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 6dff06a44e76..e5b9c7a27789 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -123,6 +123,9 @@ void __init smp_callin(void)
123 123
124 smp_setup_percpu_timer(); 124 smp_setup_percpu_timer();
125 125
126 if (cheetah_pcache_forced_on)
127 cheetah_enable_pcache();
128
126 local_irq_enable(); 129 local_irq_enable();
127 130
128 calibrate_delay(); 131 calibrate_delay();
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 56b203a2af69..a9f4596d7c2b 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -421,6 +421,25 @@ asmlinkage void cee_log(unsigned long ce_status,
421 } 421 }
422} 422}
423 423
424int cheetah_pcache_forced_on;
425
426void cheetah_enable_pcache(void)
427{
428 unsigned long dcr;
429
430 printk("CHEETAH: Enabling P-Cache on cpu %d.\n",
431 smp_processor_id());
432
433 __asm__ __volatile__("ldxa [%%g0] %1, %0"
434 : "=r" (dcr)
435 : "i" (ASI_DCU_CONTROL_REG));
436 dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL);
437 __asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
438 "membar #Sync"
439 : /* no outputs */
440 : "r" (dcr), "i" (ASI_DCU_CONTROL_REG));
441}
442
424/* Cheetah error trap handling. */ 443/* Cheetah error trap handling. */
425static unsigned long ecache_flush_physbase; 444static unsigned long ecache_flush_physbase;
426static unsigned long ecache_flush_linesize; 445static unsigned long ecache_flush_linesize;
diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64
index fd8d7e8982b1..f162f50f0b17 100644
--- a/arch/um/Kconfig_x86_64
+++ b/arch/um/Kconfig_x86_64
@@ -6,6 +6,10 @@ config 64BIT
6 bool 6 bool
7 default y 7 default y
8 8
9config TOP_ADDR
10 hex
11 default 0x80000000
12
9config 3_LEVEL_PGTABLES 13config 3_LEVEL_PGTABLES
10 bool 14 bool
11 default y 15 default y
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 0150038af795..14a12d6b3df6 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -20,9 +20,17 @@
20#include "os.h" 20#include "os.h"
21 21
22#ifdef CONFIG_NOCONFIG_CHAN 22#ifdef CONFIG_NOCONFIG_CHAN
23
24/* The printk's here are wrong because we are complaining that there is no
25 * output device, but printk is printing to that output device. The user will
26 * never see the error. printf would be better, except it can't run on a
27 * kernel stack because it will overflow it.
28 * Use printk for now since that will avoid crashing.
29 */
30
23static void *not_configged_init(char *str, int device, struct chan_opts *opts) 31static void *not_configged_init(char *str, int device, struct chan_opts *opts)
24{ 32{
25 printf(KERN_ERR "Using a channel type which is configured out of " 33 printk(KERN_ERR "Using a channel type which is configured out of "
26 "UML\n"); 34 "UML\n");
27 return(NULL); 35 return(NULL);
28} 36}
@@ -30,27 +38,27 @@ static void *not_configged_init(char *str, int device, struct chan_opts *opts)
30static int not_configged_open(int input, int output, int primary, void *data, 38static int not_configged_open(int input, int output, int primary, void *data,
31 char **dev_out) 39 char **dev_out)
32{ 40{
33 printf(KERN_ERR "Using a channel type which is configured out of " 41 printk(KERN_ERR "Using a channel type which is configured out of "
34 "UML\n"); 42 "UML\n");
35 return(-ENODEV); 43 return(-ENODEV);
36} 44}
37 45
38static void not_configged_close(int fd, void *data) 46static void not_configged_close(int fd, void *data)
39{ 47{
40 printf(KERN_ERR "Using a channel type which is configured out of " 48 printk(KERN_ERR "Using a channel type which is configured out of "
41 "UML\n"); 49 "UML\n");
42} 50}
43 51
44static int not_configged_read(int fd, char *c_out, void *data) 52static int not_configged_read(int fd, char *c_out, void *data)
45{ 53{
46 printf(KERN_ERR "Using a channel type which is configured out of " 54 printk(KERN_ERR "Using a channel type which is configured out of "
47 "UML\n"); 55 "UML\n");
48 return(-EIO); 56 return(-EIO);
49} 57}
50 58
51static int not_configged_write(int fd, const char *buf, int len, void *data) 59static int not_configged_write(int fd, const char *buf, int len, void *data)
52{ 60{
53 printf(KERN_ERR "Using a channel type which is configured out of " 61 printk(KERN_ERR "Using a channel type which is configured out of "
54 "UML\n"); 62 "UML\n");
55 return(-EIO); 63 return(-EIO);
56} 64}
@@ -58,7 +66,7 @@ static int not_configged_write(int fd, const char *buf, int len, void *data)
58static int not_configged_console_write(int fd, const char *buf, int len, 66static int not_configged_console_write(int fd, const char *buf, int len,
59 void *data) 67 void *data)
60{ 68{
61 printf(KERN_ERR "Using a channel type which is configured out of " 69 printk(KERN_ERR "Using a channel type which is configured out of "
62 "UML\n"); 70 "UML\n");
63 return(-EIO); 71 return(-EIO);
64} 72}
@@ -66,7 +74,7 @@ static int not_configged_console_write(int fd, const char *buf, int len,
66static int not_configged_window_size(int fd, void *data, unsigned short *rows, 74static int not_configged_window_size(int fd, void *data, unsigned short *rows,
67 unsigned short *cols) 75 unsigned short *cols)
68{ 76{
69 printf(KERN_ERR "Using a channel type which is configured out of " 77 printk(KERN_ERR "Using a channel type which is configured out of "
70 "UML\n"); 78 "UML\n");
71 return(-ENODEV); 79 return(-ENODEV);
72} 80}
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index faf714e87b5b..217438cdef33 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -73,7 +73,6 @@ int mcast_setup(char *str, char **mac_out, void *data)
73 struct mcast_init *init = data; 73 struct mcast_init *init = data;
74 char *port_str = NULL, *ttl_str = NULL, *remain; 74 char *port_str = NULL, *ttl_str = NULL, *remain;
75 char *last; 75 char *last;
76 int n;
77 76
78 *init = ((struct mcast_init) 77 *init = ((struct mcast_init)
79 { .addr = "239.192.168.1", 78 { .addr = "239.192.168.1",
@@ -89,13 +88,12 @@ int mcast_setup(char *str, char **mac_out, void *data)
89 } 88 }
90 89
91 if(port_str != NULL){ 90 if(port_str != NULL){
92 n = simple_strtoul(port_str, &last, 10); 91 init->port = simple_strtoul(port_str, &last, 10);
93 if((*last != '\0') || (last == port_str)){ 92 if((*last != '\0') || (last == port_str)){
94 printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", 93 printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
95 port_str); 94 port_str);
96 return(0); 95 return(0);
97 } 96 }
98 init->port = htons(n);
99 } 97 }
100 98
101 if(ttl_str != NULL){ 99 if(ttl_str != NULL){
diff --git a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c
index 0fe1d9fa9139..7a0d115b29d0 100644
--- a/arch/um/drivers/mcast_user.c
+++ b/arch/um/drivers/mcast_user.c
@@ -38,7 +38,7 @@ static struct sockaddr_in *new_addr(char *addr, unsigned short port)
38 } 38 }
39 sin->sin_family = AF_INET; 39 sin->sin_family = AF_INET;
40 sin->sin_addr.s_addr = in_aton(addr); 40 sin->sin_addr.s_addr = in_aton(addr);
41 sin->sin_port = port; 41 sin->sin_port = htons(port);
42 return(sin); 42 return(sin);
43} 43}
44 44
@@ -55,28 +55,25 @@ static int mcast_open(void *data)
55 struct mcast_data *pri = data; 55 struct mcast_data *pri = data;
56 struct sockaddr_in *sin = pri->mcast_addr; 56 struct sockaddr_in *sin = pri->mcast_addr;
57 struct ip_mreq mreq; 57 struct ip_mreq mreq;
58 int fd, yes = 1; 58 int fd = -EINVAL, yes = 1, err = -EINVAL;;
59 59
60 60
61 if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) { 61 if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0))
62 fd = -EINVAL;
63 goto out; 62 goto out;
64 }
65 63
66 fd = socket(AF_INET, SOCK_DGRAM, 0); 64 fd = socket(AF_INET, SOCK_DGRAM, 0);
65
67 if (fd < 0){ 66 if (fd < 0){
68 printk("mcast_open : data socket failed, errno = %d\n", 67 printk("mcast_open : data socket failed, errno = %d\n",
69 errno); 68 errno);
70 fd = -ENOMEM; 69 fd = -errno;
71 goto out; 70 goto out;
72 } 71 }
73 72
74 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { 73 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
75 printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", 74 printk("mcast_open: SO_REUSEADDR failed, errno = %d\n",
76 errno); 75 errno);
77 os_close_file(fd); 76 goto out_close;
78 fd = -EINVAL;
79 goto out;
80 } 77 }
81 78
82 /* set ttl according to config */ 79 /* set ttl according to config */
@@ -84,26 +81,20 @@ static int mcast_open(void *data)
84 sizeof(pri->ttl)) < 0) { 81 sizeof(pri->ttl)) < 0) {
85 printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", 82 printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n",
86 errno); 83 errno);
87 os_close_file(fd); 84 goto out_close;
88 fd = -EINVAL;
89 goto out;
90 } 85 }
91 86
92 /* set LOOP, so data does get fed back to local sockets */ 87 /* set LOOP, so data does get fed back to local sockets */
93 if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { 88 if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) {
94 printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", 89 printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n",
95 errno); 90 errno);
96 os_close_file(fd); 91 goto out_close;
97 fd = -EINVAL;
98 goto out;
99 } 92 }
100 93
101 /* bind socket to mcast address */ 94 /* bind socket to mcast address */
102 if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { 95 if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) {
103 printk("mcast_open : data bind failed, errno = %d\n", errno); 96 printk("mcast_open : data bind failed, errno = %d\n", errno);
104 os_close_file(fd); 97 goto out_close;
105 fd = -EINVAL;
106 goto out;
107 } 98 }
108 99
109 /* subscribe to the multicast group */ 100 /* subscribe to the multicast group */
@@ -117,12 +108,15 @@ static int mcast_open(void *data)
117 "interface on the host.\n"); 108 "interface on the host.\n");
118 printk("eth0 should be configured in order to use the " 109 printk("eth0 should be configured in order to use the "
119 "multicast transport.\n"); 110 "multicast transport.\n");
120 os_close_file(fd); 111 goto out_close;
121 fd = -EINVAL;
122 } 112 }
123 113
124 out: 114 out:
125 return(fd); 115 return fd;
116
117 out_close:
118 os_close_file(fd);
119 return err;
126} 120}
127 121
128static void mcast_close(int fd, void *data) 122static void mcast_close(int fd, void *data)
@@ -164,14 +158,3 @@ struct net_user_info mcast_user_info = {
164 .delete_address = NULL, 158 .delete_address = NULL,
165 .max_packet = MAX_PACKET - ETH_HEADER_OTHER 159 .max_packet = MAX_PACKET - ETH_HEADER_OTHER
166}; 160};
167
168/*
169 * Overrides for Emacs so that we follow Linus's tabbing style.
170 * Emacs will notice this stuff at the end of the file and automatically
171 * adjust the settings for this buffer only. This must remain at the end
172 * of the file.
173 * ---------------------------------------------------------------------------
174 * Local variables:
175 * c-file-style: "linux"
176 * End:
177 */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 9a56ff94308d..88f956c34fed 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -55,7 +55,7 @@
55#include "mem_kern.h" 55#include "mem_kern.h"
56#include "cow.h" 56#include "cow.h"
57 57
58enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; 58enum ubd_req { UBD_READ, UBD_WRITE };
59 59
60struct io_thread_req { 60struct io_thread_req {
61 enum ubd_req op; 61 enum ubd_req op;
@@ -68,8 +68,6 @@ struct io_thread_req {
68 unsigned long sector_mask; 68 unsigned long sector_mask;
69 unsigned long long cow_offset; 69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2]; 70 unsigned long bitmap_words[2];
71 int map_fd;
72 unsigned long long map_offset;
73 int error; 71 int error;
74}; 72};
75 73
@@ -122,10 +120,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
122 120
123#define MAX_DEV (8) 121#define MAX_DEV (8)
124 122
125/* Changed in early boot */
126static int ubd_do_mmap = 0;
127#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE
128
129static struct block_device_operations ubd_blops = { 123static struct block_device_operations ubd_blops = {
130 .owner = THIS_MODULE, 124 .owner = THIS_MODULE,
131 .open = ubd_open, 125 .open = ubd_open,
@@ -175,12 +169,6 @@ struct ubd {
175 int no_cow; 169 int no_cow;
176 struct cow cow; 170 struct cow cow;
177 struct platform_device pdev; 171 struct platform_device pdev;
178
179 int map_writes;
180 int map_reads;
181 int nomap_writes;
182 int nomap_reads;
183 int write_maps;
184}; 172};
185 173
186#define DEFAULT_COW { \ 174#define DEFAULT_COW { \
@@ -200,11 +188,6 @@ struct ubd {
200 .openflags = OPEN_FLAGS, \ 188 .openflags = OPEN_FLAGS, \
201 .no_cow = 0, \ 189 .no_cow = 0, \
202 .cow = DEFAULT_COW, \ 190 .cow = DEFAULT_COW, \
203 .map_writes = 0, \
204 .map_reads = 0, \
205 .nomap_writes = 0, \
206 .nomap_reads = 0, \
207 .write_maps = 0, \
208} 191}
209 192
210struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; 193struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
@@ -314,13 +297,6 @@ static int ubd_setup_common(char *str, int *index_out)
314 int major; 297 int major;
315 298
316 str++; 299 str++;
317 if(!strcmp(str, "mmap")){
318 CHOOSE_MODE(printk("mmap not supported by the ubd "
319 "driver in tt mode\n"),
320 ubd_do_mmap = 1);
321 return(0);
322 }
323
324 if(!strcmp(str, "sync")){ 300 if(!strcmp(str, "sync")){
325 global_openflags = of_sync(global_openflags); 301 global_openflags = of_sync(global_openflags);
326 return(0); 302 return(0);
@@ -524,7 +500,7 @@ static void ubd_handler(void)
524{ 500{
525 struct io_thread_req req; 501 struct io_thread_req req;
526 struct request *rq = elv_next_request(ubd_queue); 502 struct request *rq = elv_next_request(ubd_queue);
527 int n, err; 503 int n;
528 504
529 do_ubd = NULL; 505 do_ubd = NULL;
530 intr_count++; 506 intr_count++;
@@ -538,19 +514,6 @@ static void ubd_handler(void)
538 return; 514 return;
539 } 515 }
540 516
541 if((req.op != UBD_MMAP) &&
542 ((req.offset != ((__u64) (rq->sector)) << 9) ||
543 (req.length != (rq->current_nr_sectors) << 9)))
544 panic("I/O op mismatch");
545
546 if(req.map_fd != -1){
547 err = physmem_subst_mapping(req.buffer, req.map_fd,
548 req.map_offset, 1);
549 if(err)
550 printk("ubd_handler - physmem_subst_mapping failed, "
551 "err = %d\n", -err);
552 }
553
554 ubd_finish(rq, req.error); 517 ubd_finish(rq, req.error);
555 reactivate_fd(thread_fd, UBD_IRQ); 518 reactivate_fd(thread_fd, UBD_IRQ);
556 do_ubd_request(ubd_queue); 519 do_ubd_request(ubd_queue);
@@ -583,14 +546,10 @@ static int ubd_file_size(struct ubd *dev, __u64 *size_out)
583 546
584static void ubd_close(struct ubd *dev) 547static void ubd_close(struct ubd *dev)
585{ 548{
586 if(ubd_do_mmap)
587 physmem_forget_descriptor(dev->fd);
588 os_close_file(dev->fd); 549 os_close_file(dev->fd);
589 if(dev->cow.file == NULL) 550 if(dev->cow.file == NULL)
590 return; 551 return;
591 552
592 if(ubd_do_mmap)
593 physmem_forget_descriptor(dev->cow.fd);
594 os_close_file(dev->cow.fd); 553 os_close_file(dev->cow.fd);
595 vfree(dev->cow.bitmap); 554 vfree(dev->cow.bitmap);
596 dev->cow.bitmap = NULL; 555 dev->cow.bitmap = NULL;
@@ -1010,94 +969,13 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1010 req->bitmap_words, bitmap_len); 969 req->bitmap_words, bitmap_len);
1011} 970}
1012 971
1013static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset)
1014{
1015 __u64 sector;
1016 unsigned char *bitmap;
1017 int bit, i;
1018
1019 /* mmap must have been requested on the command line */
1020 if(!ubd_do_mmap)
1021 return(-1);
1022
1023 /* The buffer must be page aligned */
1024 if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0)
1025 return(-1);
1026
1027 /* The request must be a page long */
1028 if((req->current_nr_sectors << 9) != PAGE_SIZE)
1029 return(-1);
1030
1031 if(dev->cow.file == NULL)
1032 return(dev->fd);
1033
1034 sector = offset >> 9;
1035 bitmap = (unsigned char *) dev->cow.bitmap;
1036 bit = ubd_test_bit(sector, bitmap);
1037
1038 for(i = 1; i < req->current_nr_sectors; i++){
1039 if(ubd_test_bit(sector + i, bitmap) != bit)
1040 return(-1);
1041 }
1042
1043 if(bit || (rq_data_dir(req) == WRITE))
1044 offset += dev->cow.data_offset;
1045
1046 /* The data on disk must be page aligned */
1047 if((offset % UBD_MMAP_BLOCK_SIZE) != 0)
1048 return(-1);
1049
1050 return(bit ? dev->fd : dev->cow.fd);
1051}
1052
1053static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset,
1054 struct request *req,
1055 struct io_thread_req *io_req)
1056{
1057 int err;
1058
1059 if(rq_data_dir(req) == WRITE){
1060 /* Writes are almost no-ops since the new data is already in the
1061 * host page cache
1062 */
1063 dev->map_writes++;
1064 if(dev->cow.file != NULL)
1065 cowify_bitmap(io_req->offset, io_req->length,
1066 &io_req->sector_mask, &io_req->cow_offset,
1067 dev->cow.bitmap, dev->cow.bitmap_offset,
1068 io_req->bitmap_words,
1069 dev->cow.bitmap_len);
1070 }
1071 else {
1072 int w;
1073
1074 if((dev->cow.file != NULL) && (fd == dev->cow.fd))
1075 w = 0;
1076 else w = dev->openflags.w;
1077
1078 if((dev->cow.file != NULL) && (fd == dev->fd))
1079 offset += dev->cow.data_offset;
1080
1081 err = physmem_subst_mapping(req->buffer, fd, offset, w);
1082 if(err){
1083 printk("physmem_subst_mapping failed, err = %d\n",
1084 -err);
1085 return(1);
1086 }
1087 dev->map_reads++;
1088 }
1089 io_req->op = UBD_MMAP;
1090 io_req->buffer = req->buffer;
1091 return(0);
1092}
1093
1094/* Called with ubd_io_lock held */ 972/* Called with ubd_io_lock held */
1095static int prepare_request(struct request *req, struct io_thread_req *io_req) 973static int prepare_request(struct request *req, struct io_thread_req *io_req)
1096{ 974{
1097 struct gendisk *disk = req->rq_disk; 975 struct gendisk *disk = req->rq_disk;
1098 struct ubd *dev = disk->private_data; 976 struct ubd *dev = disk->private_data;
1099 __u64 offset; 977 __u64 offset;
1100 int len, fd; 978 int len;
1101 979
1102 if(req->rq_status == RQ_INACTIVE) return(1); 980 if(req->rq_status == RQ_INACTIVE) return(1);
1103 981
@@ -1114,34 +992,12 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
1114 992
1115 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; 993 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1116 io_req->fds[1] = dev->fd; 994 io_req->fds[1] = dev->fd;
1117 io_req->map_fd = -1;
1118 io_req->cow_offset = -1; 995 io_req->cow_offset = -1;
1119 io_req->offset = offset; 996 io_req->offset = offset;
1120 io_req->length = len; 997 io_req->length = len;
1121 io_req->error = 0; 998 io_req->error = 0;
1122 io_req->sector_mask = 0; 999 io_req->sector_mask = 0;
1123 1000
1124 fd = mmap_fd(req, dev, io_req->offset);
1125 if(fd > 0){
1126 /* If mmapping is otherwise OK, but the first access to the
1127 * page is a write, then it's not mapped in yet. So we have
1128 * to write the data to disk first, then we can map the disk
1129 * page in and continue normally from there.
1130 */
1131 if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){
1132 io_req->map_fd = dev->fd;
1133 io_req->map_offset = io_req->offset +
1134 dev->cow.data_offset;
1135 dev->write_maps++;
1136 }
1137 else return(prepare_mmap_request(dev, fd, io_req->offset, req,
1138 io_req));
1139 }
1140
1141 if(rq_data_dir(req) == READ)
1142 dev->nomap_reads++;
1143 else dev->nomap_writes++;
1144
1145 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; 1001 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1146 io_req->offsets[0] = 0; 1002 io_req->offsets[0] = 0;
1147 io_req->offsets[1] = dev->cow.data_offset; 1003 io_req->offsets[1] = dev->cow.data_offset;
@@ -1229,143 +1085,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
1229 return(-EINVAL); 1085 return(-EINVAL);
1230} 1086}
1231 1087
1232static int ubd_check_remapped(int fd, unsigned long address, int is_write,
1233 __u64 offset)
1234{
1235 __u64 bitmap_offset;
1236 unsigned long new_bitmap[2];
1237 int i, err, n;
1238
1239 /* If it's not a write access, we can't do anything about it */
1240 if(!is_write)
1241 return(0);
1242
1243 /* We have a write */
1244 for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){
1245 struct ubd *dev = &ubd_dev[i];
1246
1247 if((dev->fd != fd) && (dev->cow.fd != fd))
1248 continue;
1249
1250 /* It's a write to a ubd device */
1251
1252 /* This should be impossible now */
1253 if(!dev->openflags.w){
1254 /* It's a write access on a read-only device - probably
1255 * shouldn't happen. If the kernel is trying to change
1256 * something with no intention of writing it back out,
1257 * then this message will clue us in that this needs
1258 * fixing
1259 */
1260 printk("Write access to mapped page from readonly ubd "
1261 "device %d\n", i);
1262 return(0);
1263 }
1264
1265 /* It's a write to a writeable ubd device - it must be COWed
1266 * because, otherwise, the page would have been mapped in
1267 * writeable
1268 */
1269
1270 if(!dev->cow.file)
1271 panic("Write fault on writeable non-COW ubd device %d",
1272 i);
1273
1274 /* It should also be an access to the backing file since the
1275 * COW pages should be mapped in read-write
1276 */
1277
1278 if(fd == dev->fd)
1279 panic("Write fault on a backing page of ubd "
1280 "device %d\n", i);
1281
1282 /* So, we do the write, copying the backing data to the COW
1283 * file...
1284 */
1285
1286 err = os_seek_file(dev->fd, offset + dev->cow.data_offset);
1287 if(err < 0)
1288 panic("Couldn't seek to %lld in COW file of ubd "
1289 "device %d, err = %d",
1290 offset + dev->cow.data_offset, i, -err);
1291
1292 n = os_write_file(dev->fd, (void *) address, PAGE_SIZE);
1293 if(n != PAGE_SIZE)
1294 panic("Couldn't copy data to COW file of ubd "
1295 "device %d, err = %d", i, -n);
1296
1297 /* ... updating the COW bitmap... */
1298
1299 cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset,
1300 dev->cow.bitmap, dev->cow.bitmap_offset,
1301 new_bitmap, dev->cow.bitmap_len);
1302
1303 err = os_seek_file(dev->fd, bitmap_offset);
1304 if(err < 0)
1305 panic("Couldn't seek to %lld in COW file of ubd "
1306 "device %d, err = %d", bitmap_offset, i, -err);
1307
1308 n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap));
1309 if(n != sizeof(new_bitmap))
1310 panic("Couldn't update bitmap of ubd device %d, "
1311 "err = %d", i, -n);
1312
1313 /* Maybe we can map the COW page in, and maybe we can't. If
1314 * it is a pre-V3 COW file, we can't, since the alignment will
1315 * be wrong. If it is a V3 or later COW file which has been
1316 * moved to a system with a larger page size, then maybe we
1317 * can't, depending on the exact location of the page.
1318 */
1319
1320 offset += dev->cow.data_offset;
1321
1322 /* Remove the remapping, putting the original anonymous page
1323 * back. If the COW file can be mapped in, that is done.
1324 * Otherwise, the COW page is read in.
1325 */
1326
1327 if(!physmem_remove_mapping((void *) address))
1328 panic("Address 0x%lx not remapped by ubd device %d",
1329 address, i);
1330 if((offset % UBD_MMAP_BLOCK_SIZE) == 0)
1331 physmem_subst_mapping((void *) address, dev->fd,
1332 offset, 1);
1333 else {
1334 err = os_seek_file(dev->fd, offset);
1335 if(err < 0)
1336 panic("Couldn't seek to %lld in COW file of "
1337 "ubd device %d, err = %d", offset, i,
1338 -err);
1339
1340 n = os_read_file(dev->fd, (void *) address, PAGE_SIZE);
1341 if(n != PAGE_SIZE)
1342 panic("Failed to read page from offset %llx of "
1343 "COW file of ubd device %d, err = %d",
1344 offset, i, -n);
1345 }
1346
1347 return(1);
1348 }
1349
1350 /* It's not a write on a ubd device */
1351 return(0);
1352}
1353
1354static struct remapper ubd_remapper = {
1355 .list = LIST_HEAD_INIT(ubd_remapper.list),
1356 .proc = ubd_check_remapped,
1357};
1358
1359static int ubd_remapper_setup(void)
1360{
1361 if(ubd_do_mmap)
1362 register_remapper(&ubd_remapper);
1363
1364 return(0);
1365}
1366
1367__initcall(ubd_remapper_setup);
1368
1369static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) 1088static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1370{ 1089{
1371 struct uml_stat buf1, buf2; 1090 struct uml_stat buf1, buf2;
@@ -1568,15 +1287,6 @@ void do_io(struct io_thread_req *req)
1568 int err; 1287 int err;
1569 __u64 off; 1288 __u64 off;
1570 1289
1571 if(req->op == UBD_MMAP){
1572 /* Touch the page to force the host to do any necessary IO to
1573 * get it into memory
1574 */
1575 n = *((volatile int *) req->buffer);
1576 req->error = update_bitmap(req);
1577 return;
1578 }
1579
1580 nsectors = req->length / req->sectorsize; 1290 nsectors = req->length / req->sectorsize;
1581 start = 0; 1291 start = 0;
1582 do { 1292 do {
diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h
index 84ec7ff5cf8c..6eaeb9919983 100644
--- a/arch/um/include/sysdep-i386/ptrace.h
+++ b/arch/um/include/sysdep-i386/ptrace.h
@@ -31,7 +31,6 @@ extern int sysemu_supported;
31#ifdef UML_CONFIG_MODE_SKAS 31#ifdef UML_CONFIG_MODE_SKAS
32 32
33#include "skas_ptregs.h" 33#include "skas_ptregs.h"
34#include "sysdep/faultinfo.h"
35 34
36#define REGS_IP(r) ((r)[HOST_IP]) 35#define REGS_IP(r) ((r)[HOST_IP])
37#define REGS_SP(r) ((r)[HOST_SP]) 36#define REGS_SP(r) ((r)[HOST_SP])
@@ -59,6 +58,7 @@ extern int sysemu_supported;
59#define PTRACE_SYSEMU_SINGLESTEP 32 58#define PTRACE_SYSEMU_SINGLESTEP 32
60#endif 59#endif
61 60
61#include "sysdep/faultinfo.h"
62#include "choose-mode.h" 62#include "choose-mode.h"
63 63
64union uml_pt_regs { 64union uml_pt_regs {
diff --git a/arch/um/include/sysdep-x86_64/checksum.h b/arch/um/include/sysdep-x86_64/checksum.h
index 572c6c19be33..ea97005af694 100644
--- a/arch/um/include/sysdep-x86_64/checksum.h
+++ b/arch/um/include/sysdep-x86_64/checksum.h
@@ -9,8 +9,6 @@
9#include "linux/in6.h" 9#include "linux/in6.h"
10#include "asm/uaccess.h" 10#include "asm/uaccess.h"
11 11
12extern unsigned int csum_partial_copy_from(const unsigned char *src, unsigned char *dst, int len,
13 int sum, int *err_ptr);
14extern unsigned csum_partial(const unsigned char *buff, unsigned len, 12extern unsigned csum_partial(const unsigned char *buff, unsigned len,
15 unsigned sum); 13 unsigned sum);
16 14
@@ -31,10 +29,15 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char *
31} 29}
32 30
33static __inline__ 31static __inline__
34unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst, 32unsigned int csum_partial_copy_from_user(const unsigned char *src,
35 int len, int sum, int *err_ptr) 33 unsigned char *dst, int len, int sum,
34 int *err_ptr)
36{ 35{
37 return csum_partial_copy_from(src, dst, len, sum, err_ptr); 36 if(copy_from_user(dst, src, len)){
37 *err_ptr = -EFAULT;
38 return(-1);
39 }
40 return csum_partial(dst, len, sum);
38} 41}
39 42
40/** 43/**
@@ -137,15 +140,6 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b)
137 return a; 140 return a;
138} 141}
139 142
140#endif 143extern unsigned short ip_compute_csum(unsigned char * buff, int len);
141 144
142/* 145#endif
143 * Overrides for Emacs so that we follow Linus's tabbing style.
144 * Emacs will notice this stuff at the end of the file and automatically
145 * adjust the settings for this buffer only. This must remain at the end
146 * of the file.
147 * ---------------------------------------------------------------------------
148 * Local variables:
149 * c-file-style: "linux"
150 * End:
151 */
diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h
index 348e8fcd513f..be8acd5efd97 100644
--- a/arch/um/include/sysdep-x86_64/ptrace.h
+++ b/arch/um/include/sysdep-x86_64/ptrace.h
@@ -135,6 +135,7 @@ extern int mode_tt;
135 __CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs)) 135 __CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs))
136#define UPT_SC(r) ((r)->tt.sc) 136#define UPT_SC(r) ((r)->tt.sc)
137#define UPT_SYSCALL_NR(r) __CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall) 137#define UPT_SYSCALL_NR(r) __CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall)
138#define UPT_SYSCALL_RET(r) UPT_RAX(r)
138 139
139extern int user_context(unsigned long sp); 140extern int user_context(unsigned long sp);
140 141
@@ -196,32 +197,32 @@ struct syscall_args {
196 197
197 198
198#define UPT_SET(regs, reg, val) \ 199#define UPT_SET(regs, reg, val) \
199 ({ unsigned long val; \ 200 ({ unsigned long __upt_val = val; \
200 switch(reg){ \ 201 switch(reg){ \
201 case R8: UPT_R8(regs) = val; break; \ 202 case R8: UPT_R8(regs) = __upt_val; break; \
202 case R9: UPT_R9(regs) = val; break; \ 203 case R9: UPT_R9(regs) = __upt_val; break; \
203 case R10: UPT_R10(regs) = val; break; \ 204 case R10: UPT_R10(regs) = __upt_val; break; \
204 case R11: UPT_R11(regs) = val; break; \ 205 case R11: UPT_R11(regs) = __upt_val; break; \
205 case R12: UPT_R12(regs) = val; break; \ 206 case R12: UPT_R12(regs) = __upt_val; break; \
206 case R13: UPT_R13(regs) = val; break; \ 207 case R13: UPT_R13(regs) = __upt_val; break; \
207 case R14: UPT_R14(regs) = val; break; \ 208 case R14: UPT_R14(regs) = __upt_val; break; \
208 case R15: UPT_R15(regs) = val; break; \ 209 case R15: UPT_R15(regs) = __upt_val; break; \
209 case RIP: UPT_IP(regs) = val; break; \ 210 case RIP: UPT_IP(regs) = __upt_val; break; \
210 case RSP: UPT_SP(regs) = val; break; \ 211 case RSP: UPT_SP(regs) = __upt_val; break; \
211 case RAX: UPT_RAX(regs) = val; break; \ 212 case RAX: UPT_RAX(regs) = __upt_val; break; \
212 case RBX: UPT_RBX(regs) = val; break; \ 213 case RBX: UPT_RBX(regs) = __upt_val; break; \
213 case RCX: UPT_RCX(regs) = val; break; \ 214 case RCX: UPT_RCX(regs) = __upt_val; break; \
214 case RDX: UPT_RDX(regs) = val; break; \ 215 case RDX: UPT_RDX(regs) = __upt_val; break; \
215 case RSI: UPT_RSI(regs) = val; break; \ 216 case RSI: UPT_RSI(regs) = __upt_val; break; \
216 case RDI: UPT_RDI(regs) = val; break; \ 217 case RDI: UPT_RDI(regs) = __upt_val; break; \
217 case RBP: UPT_RBP(regs) = val; break; \ 218 case RBP: UPT_RBP(regs) = __upt_val; break; \
218 case ORIG_RAX: UPT_ORIG_RAX(regs) = val; break; \ 219 case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \
219 case CS: UPT_CS(regs) = val; break; \ 220 case CS: UPT_CS(regs) = __upt_val; break; \
220 case DS: UPT_DS(regs) = val; break; \ 221 case DS: UPT_DS(regs) = __upt_val; break; \
221 case ES: UPT_ES(regs) = val; break; \ 222 case ES: UPT_ES(regs) = __upt_val; break; \
222 case FS: UPT_FS(regs) = val; break; \ 223 case FS: UPT_FS(regs) = __upt_val; break; \
223 case GS: UPT_GS(regs) = val; break; \ 224 case GS: UPT_GS(regs) = __upt_val; break; \
224 case EFLAGS: UPT_EFLAGS(regs) = val; break; \ 225 case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \
225 default : \ 226 default : \
226 panic("Bad register in UPT_SET : %d\n", reg); \ 227 panic("Bad register in UPT_SET : %d\n", reg); \
227 break; \ 228 break; \
@@ -245,14 +246,3 @@ struct syscall_args {
245 CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo)) 246 CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo))
246 247
247#endif 248#endif
248
249/*
250 * Overrides for Emacs so that we follow Linus's tabbing style.
251 * Emacs will notice this stuff at the end of the file and automatically
252 * adjust the settings for this buffer only. This must remain at the end
253 * of the file.
254 * ---------------------------------------------------------------------------
255 * Local variables:
256 * c-file-style: "linux"
257 * End:
258 */
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 9736ca27c5f0..a8918e80df96 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -14,7 +14,7 @@ obj-y = config.o exec_kern.o exitcode.o \
14 tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ 14 tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \
15 user_util.o 15 user_util.o
16 16
17obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o 17obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
18obj-$(CONFIG_GPROF) += gprof_syms.o 18obj-$(CONFIG_GPROF) += gprof_syms.o
19obj-$(CONFIG_GCOV) += gmon_syms.o 19obj-$(CONFIG_GCOV) += gmon_syms.o
20obj-$(CONFIG_TTY_LOG) += tty_log.o 20obj-$(CONFIG_TTY_LOG) += tty_log.o
diff --git a/arch/um/kernel/checksum.c b/arch/um/kernel/checksum.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/um/kernel/checksum.c
+++ /dev/null
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
new file mode 100644
index 000000000000..82ecf904b09c
--- /dev/null
+++ b/arch/um/kernel/initrd.c
@@ -0,0 +1,78 @@
1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include "linux/init.h"
7#include "linux/bootmem.h"
8#include "linux/initrd.h"
9#include "asm/types.h"
10#include "user_util.h"
11#include "kern_util.h"
12#include "initrd.h"
13#include "init.h"
14#include "os.h"
15
16/* Changed by uml_initrd_setup, which is a setup */
17static char *initrd __initdata = NULL;
18
19static int __init read_initrd(void)
20{
21 void *area;
22 long long size;
23 int err;
24
25 if(initrd == NULL) return 0;
26 err = os_file_size(initrd, &size);
27 if(err) return 0;
28 area = alloc_bootmem(size);
29 if(area == NULL) return 0;
30 if(load_initrd(initrd, area, size) == -1) return 0;
31 initrd_start = (unsigned long) area;
32 initrd_end = initrd_start + size;
33 return 0;
34}
35
36__uml_postsetup(read_initrd);
37
38static int __init uml_initrd_setup(char *line, int *add)
39{
40 initrd = line;
41 return 0;
42}
43
44__uml_setup("initrd=", uml_initrd_setup,
45"initrd=<initrd image>\n"
46" This is used to boot UML from an initrd image. The argument is the\n"
47" name of the file containing the image.\n\n"
48);
49
50int load_initrd(char *filename, void *buf, int size)
51{
52 int fd, n;
53
54 fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
55 if(fd < 0){
56 printk("Opening '%s' failed - err = %d\n", filename, -fd);
57 return(-1);
58 }
59 n = os_read_file(fd, buf, size);
60 if(n != size){
61 printk("Read of %d bytes from '%s' failed, err = %d\n", size,
62 filename, -n);
63 return(-1);
64 }
65
66 os_close_file(fd);
67 return(0);
68}
69/*
70 * Overrides for Emacs so that we follow Linus's tabbing style.
71 * Emacs will notice this stuff at the end of the file and automatically
72 * adjust the settings for this buffer only. This must remain at the end
73 * of the file.
74 * ---------------------------------------------------------------------------
75 * Local variables:
76 * c-file-style: "linux"
77 * End:
78 */
diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c
index 6d6f9484b884..b3074cbaa479 100644
--- a/arch/um/kernel/irq_user.c
+++ b/arch/um/kernel/irq_user.c
@@ -236,9 +236,15 @@ static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
236 (*prev)->fd, pollfds[i].fd); 236 (*prev)->fd, pollfds[i].fd);
237 goto out; 237 goto out;
238 } 238 }
239 memcpy(&pollfds[i], &pollfds[i + 1], 239
240 (pollfds_num - i - 1) * sizeof(pollfds[0]));
241 pollfds_num--; 240 pollfds_num--;
241
242 /* This moves the *whole* array after pollfds[i] (though
243 * it doesn't spot as such)! */
244
245 memmove(&pollfds[i], &pollfds[i + 1],
246 (pollfds_num - i) * sizeof(pollfds[0]));
247
242 if(last_irq_ptr == &old_fd->next) 248 if(last_irq_ptr == &old_fd->next)
243 last_irq_ptr = prev; 249 last_irq_ptr = prev;
244 *prev = (*prev)->next; 250 *prev = (*prev)->next;
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 78d69dc74b26..99439fa15ef4 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -57,6 +57,7 @@ EXPORT_SYMBOL(copy_to_user_tt);
57EXPORT_SYMBOL(strncpy_from_user_skas); 57EXPORT_SYMBOL(strncpy_from_user_skas);
58EXPORT_SYMBOL(copy_to_user_skas); 58EXPORT_SYMBOL(copy_to_user_skas);
59EXPORT_SYMBOL(copy_from_user_skas); 59EXPORT_SYMBOL(copy_from_user_skas);
60EXPORT_SYMBOL(clear_user_skas);
60#endif 61#endif
61EXPORT_SYMBOL(uml_strdup); 62EXPORT_SYMBOL(uml_strdup);
62 63
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index f156661781cb..c22825f13e40 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -100,12 +100,37 @@ void mem_init(void)
100#endif 100#endif
101} 101}
102 102
103/*
104 * Create a page table and place a pointer to it in a middle page
105 * directory entry.
106 */
107static void __init one_page_table_init(pmd_t *pmd)
108{
109 if (pmd_none(*pmd)) {
110 pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
111 set_pmd(pmd, __pmd(_KERNPG_TABLE +
112 (unsigned long) __pa(pte)));
113 if (pte != pte_offset_kernel(pmd, 0))
114 BUG();
115 }
116}
117
118static void __init one_md_table_init(pud_t *pud)
119{
120#ifdef CONFIG_3_LEVEL_PGTABLES
121 pmd_t *pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
122 set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table)));
123 if (pmd_table != pmd_offset(pud, 0))
124 BUG();
125#endif
126}
127
103static void __init fixrange_init(unsigned long start, unsigned long end, 128static void __init fixrange_init(unsigned long start, unsigned long end,
104 pgd_t *pgd_base) 129 pgd_t *pgd_base)
105{ 130{
106 pgd_t *pgd; 131 pgd_t *pgd;
132 pud_t *pud;
107 pmd_t *pmd; 133 pmd_t *pmd;
108 pte_t *pte;
109 int i, j; 134 int i, j;
110 unsigned long vaddr; 135 unsigned long vaddr;
111 136
@@ -115,15 +140,12 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
115 pgd = pgd_base + i; 140 pgd = pgd_base + i;
116 141
117 for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { 142 for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
118 pmd = (pmd_t *)pgd; 143 pud = pud_offset(pgd, vaddr);
144 if (pud_none(*pud))
145 one_md_table_init(pud);
146 pmd = pmd_offset(pud, vaddr);
119 for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { 147 for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
120 if (pmd_none(*pmd)) { 148 one_page_table_init(pmd);
121 pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
122 set_pmd(pmd, __pmd(_KERNPG_TABLE +
123 (unsigned long) __pa(pte)));
124 if (pte != pte_offset_kernel(pmd, 0))
125 BUG();
126 }
127 vaddr += PMD_SIZE; 149 vaddr += PMD_SIZE;
128 } 150 }
129 j = 0; 151 j = 0;
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 2b75d8d9ba73..2925e15324de 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -28,9 +28,9 @@ static inline void set_singlestepping(struct task_struct *child, int on)
28 child->thread.singlestep_syscall = 0; 28 child->thread.singlestep_syscall = 0;
29 29
30#ifdef SUBARCH_SET_SINGLESTEPPING 30#ifdef SUBARCH_SET_SINGLESTEPPING
31 SUBARCH_SET_SINGLESTEPPING(child, on) 31 SUBARCH_SET_SINGLESTEPPING(child, on);
32#endif 32#endif
33 } 33}
34 34
35/* 35/*
36 * Called by kernel/ptrace.c when detaching.. 36 * Called by kernel/ptrace.c when detaching..
@@ -83,7 +83,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
83 } 83 }
84 84
85#ifdef SUBACH_PTRACE_SPECIAL 85#ifdef SUBACH_PTRACE_SPECIAL
86 SUBARCH_PTRACE_SPECIAL(child,request,addr,data) 86 SUBARCH_PTRACE_SPECIAL(child,request,addr,data);
87#endif 87#endif
88 88
89 ret = ptrace_check_attach(child, request == PTRACE_KILL); 89 ret = ptrace_check_attach(child, request == PTRACE_KILL);
diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
index 5fca2c61eb98..1de22d8a313a 100644
--- a/arch/um/kernel/trap_kern.c
+++ b/arch/um/kernel/trap_kern.c
@@ -57,10 +57,11 @@ int handle_page_fault(unsigned long address, unsigned long ip,
57 *code_out = SEGV_ACCERR; 57 *code_out = SEGV_ACCERR;
58 if(is_write && !(vma->vm_flags & VM_WRITE)) 58 if(is_write && !(vma->vm_flags & VM_WRITE))
59 goto out; 59 goto out;
60
61 if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
62 goto out;
63
60 page = address & PAGE_MASK; 64 page = address & PAGE_MASK;
61 pgd = pgd_offset(mm, page);
62 pud = pud_offset(pgd, page);
63 pmd = pmd_offset(pud, page);
64 do { 65 do {
65 survive: 66 survive:
66 switch (handle_mm_fault(mm, vma, address, is_write)){ 67 switch (handle_mm_fault(mm, vma, address, is_write)){
@@ -106,33 +107,6 @@ out_of_memory:
106 goto out; 107 goto out;
107} 108}
108 109
109LIST_HEAD(physmem_remappers);
110
111void register_remapper(struct remapper *info)
112{
113 list_add(&info->list, &physmem_remappers);
114}
115
116static int check_remapped_addr(unsigned long address, int is_write)
117{
118 struct remapper *remapper;
119 struct list_head *ele;
120 __u64 offset;
121 int fd;
122
123 fd = phys_mapping(__pa(address), &offset);
124 if(fd == -1)
125 return(0);
126
127 list_for_each(ele, &physmem_remappers){
128 remapper = list_entry(ele, struct remapper, list);
129 if((*remapper->proc)(fd, address, is_write, offset))
130 return(1);
131 }
132
133 return(0);
134}
135
136/* 110/*
137 * We give a *copy* of the faultinfo in the regs to segv. 111 * We give a *copy* of the faultinfo in the regs to segv.
138 * This must be done, since nesting SEGVs could overwrite 112 * This must be done, since nesting SEGVs could overwrite
@@ -151,8 +125,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
151 flush_tlb_kernel_vm(); 125 flush_tlb_kernel_vm();
152 return(0); 126 return(0);
153 } 127 }
154 else if(check_remapped_addr(address & PAGE_MASK, is_write))
155 return(0);
156 else if(current->mm == NULL) 128 else if(current->mm == NULL)
157 panic("Segfault with no mm"); 129 panic("Segfault with no mm");
158 err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); 130 err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);
diff --git a/arch/um/kernel/tt/ksyms.c b/arch/um/kernel/tt/ksyms.c
index 92ec85d67c7c..84a9385a8fef 100644
--- a/arch/um/kernel/tt/ksyms.c
+++ b/arch/um/kernel/tt/ksyms.c
@@ -12,6 +12,7 @@ EXPORT_SYMBOL(__do_copy_to_user);
12EXPORT_SYMBOL(__do_strncpy_from_user); 12EXPORT_SYMBOL(__do_strncpy_from_user);
13EXPORT_SYMBOL(__do_strnlen_user); 13EXPORT_SYMBOL(__do_strnlen_user);
14EXPORT_SYMBOL(__do_clear_user); 14EXPORT_SYMBOL(__do_clear_user);
15EXPORT_SYMBOL(clear_user_tt);
15 16
16EXPORT_SYMBOL(tracing_pid); 17EXPORT_SYMBOL(tracing_pid);
17EXPORT_SYMBOL(honeypot); 18EXPORT_SYMBOL(honeypot);
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 76eadb309189..dd5355500bdc 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -73,6 +73,8 @@ SECTIONS
73 73
74 .got : { *(.got.plt) *(.got) } 74 .got : { *(.got.plt) *(.got) }
75 .dynamic : { *(.dynamic) } 75 .dynamic : { *(.dynamic) }
76 .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
77 .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
76 /* We want the small data sections together, so single-instruction offsets 78 /* We want the small data sections together, so single-instruction offsets
77 can access them all, and initialized data all before uninitialized, so 79 can access them all, and initialized data all before uninitialized, so
78 we can shorten the on-disk segment size. */ 80 we can shorten the on-disk segment size. */
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index fcd67c3414e4..4351e5605506 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -9,11 +9,11 @@ USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
9 9
10SYMLINKS = bitops.c semaphore.c highmem.c module.c 10SYMLINKS = bitops.c semaphore.c highmem.c module.c
11 11
12include arch/um/scripts/Makefile.rules
13
12bitops.c-dir = lib 14bitops.c-dir = lib
13semaphore.c-dir = kernel 15semaphore.c-dir = kernel
14highmem.c-dir = mm 16highmem.c-dir = mm
15module.c-dir = kernel 17module.c-dir = kernel
16 18
17subdir- := util 19subdir- := util
18
19include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c
index e9892eef51ce..2c11b9770e8b 100644
--- a/arch/um/sys-i386/delay.c
+++ b/arch/um/sys-i386/delay.c
@@ -1,5 +1,7 @@
1#include "linux/delay.h" 1#include <linux/module.h>
2#include "asm/param.h" 2#include <linux/kernel.h>
3#include <linux/delay.h>
4#include <asm/param.h>
3 5
4void __delay(unsigned long time) 6void __delay(unsigned long time)
5{ 7{
@@ -20,13 +22,19 @@ void __udelay(unsigned long usecs)
20 int i, n; 22 int i, n;
21 23
22 n = (loops_per_jiffy * HZ * usecs) / MILLION; 24 n = (loops_per_jiffy * HZ * usecs) / MILLION;
23 for(i=0;i<n;i++) ; 25 for(i=0;i<n;i++)
26 cpu_relax();
24} 27}
25 28
29EXPORT_SYMBOL(__udelay);
30
26void __const_udelay(unsigned long usecs) 31void __const_udelay(unsigned long usecs)
27{ 32{
28 int i, n; 33 int i, n;
29 34
30 n = (loops_per_jiffy * HZ * usecs) / MILLION; 35 n = (loops_per_jiffy * HZ * usecs) / MILLION;
31 for(i=0;i<n;i++) ; 36 for(i=0;i<n;i++)
37 cpu_relax();
32} 38}
39
40EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 3d7da911cc8c..608466ad6b22 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -14,11 +14,11 @@ obj-$(CONFIG_MODULES) += module.o um_module.o
14 14
15USER_OBJS := ptrace_user.o sigcontext.o 15USER_OBJS := ptrace_user.o sigcontext.o
16 16
17include arch/um/scripts/Makefile.rules
18
19SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \ 17SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \
20 semaphore.c thunk.S module.c 18 semaphore.c thunk.S module.c
21 19
20include arch/um/scripts/Makefile.rules
21
22bitops.c-dir = lib 22bitops.c-dir = lib
23csum-copy.S-dir = lib 23csum-copy.S-dir = lib
24csum-partial.c-dir = lib 24csum-partial.c-dir = lib
@@ -28,6 +28,4 @@ semaphore.c-dir = kernel
28thunk.S-dir = lib 28thunk.S-dir = lib
29module.c-dir = kernel 29module.c-dir = kernel
30 30
31CFLAGS_csum-partial.o := -Dcsum_partial=arch_csum_partial
32
33subdir- := util 31subdir- := util
diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c
index 651332aeec22..137f4446b439 100644
--- a/arch/um/sys-x86_64/delay.c
+++ b/arch/um/sys-x86_64/delay.c
@@ -5,40 +5,37 @@
5 * Licensed under the GPL 5 * Licensed under the GPL
6 */ 6 */
7 7
8#include "linux/delay.h" 8#include <linux/module.h>
9#include "asm/processor.h" 9#include <linux/delay.h>
10#include "asm/param.h" 10#include <asm/processor.h>
11#include <asm/param.h>
11 12
12void __delay(unsigned long loops) 13void __delay(unsigned long loops)
13{ 14{
14 unsigned long i; 15 unsigned long i;
15 16
16 for(i = 0; i < loops; i++) ; 17 for(i = 0; i < loops; i++)
18 cpu_relax();
17} 19}
18 20
19void __udelay(unsigned long usecs) 21void __udelay(unsigned long usecs)
20{ 22{
21 int i, n; 23 unsigned long i, n;
22 24
23 n = (loops_per_jiffy * HZ * usecs) / MILLION; 25 n = (loops_per_jiffy * HZ * usecs) / MILLION;
24 for(i=0;i<n;i++) ; 26 for(i=0;i<n;i++)
27 cpu_relax();
25} 28}
26 29
30EXPORT_SYMBOL(__udelay);
31
27void __const_udelay(unsigned long usecs) 32void __const_udelay(unsigned long usecs)
28{ 33{
29 int i, n; 34 unsigned long i, n;
30 35
31 n = (loops_per_jiffy * HZ * usecs) / MILLION; 36 n = (loops_per_jiffy * HZ * usecs) / MILLION;
32 for(i=0;i<n;i++) ; 37 for(i=0;i<n;i++)
38 cpu_relax();
33} 39}
34 40
35/* 41EXPORT_SYMBOL(__const_udelay);
36 * Overrides for Emacs so that we follow Linus's tabbing style.
37 * Emacs will notice this stuff at the end of the file and automatically
38 * adjust the settings for this buffer only. This must remain at the end
39 * of the file.
40 * ---------------------------------------------------------------------------
41 * Local variables:
42 * c-file-style: "linux"
43 * End:
44 */
diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c
index a27f0ee6a4f6..859273808203 100644
--- a/arch/um/sys-x86_64/ksyms.c
+++ b/arch/um/sys-x86_64/ksyms.c
@@ -16,5 +16,4 @@ EXPORT_SYMBOL(__up_wakeup);
16EXPORT_SYMBOL(__memcpy); 16EXPORT_SYMBOL(__memcpy);
17 17
18/* Networking helper routines. */ 18/* Networking helper routines. */
19/*EXPORT_SYMBOL(csum_partial_copy_from); 19EXPORT_SYMBOL(ip_compute_csum);
20EXPORT_SYMBOL(csum_partial_copy_to);*/
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c
index b593bb256f2c..74eee5c7c6dd 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/um/sys-x86_64/ptrace.c
@@ -5,10 +5,11 @@
5 */ 5 */
6 6
7#define __FRAME_OFFSETS 7#define __FRAME_OFFSETS
8#include "asm/ptrace.h" 8#include <asm/ptrace.h>
9#include "linux/sched.h" 9#include <linux/sched.h>
10#include "linux/errno.h" 10#include <linux/errno.h>
11#include "asm/elf.h" 11#include <asm/uaccess.h>
12#include <asm/elf.h>
12 13
13/* XXX x86_64 */ 14/* XXX x86_64 */
14unsigned long not_ss; 15unsigned long not_ss;
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c
index dd9914642b8e..d4a59657fb99 100644
--- a/arch/um/sys-x86_64/syscalls.c
+++ b/arch/um/sys-x86_64/syscalls.c
@@ -15,6 +15,7 @@
15#include "asm/unistd.h" 15#include "asm/unistd.h"
16#include "asm/prctl.h" /* XXX This should get the constants from libc */ 16#include "asm/prctl.h" /* XXX This should get the constants from libc */
17#include "choose-mode.h" 17#include "choose-mode.h"
18#include "kern.h"
18 19
19asmlinkage long sys_uname64(struct new_utsname __user * name) 20asmlinkage long sys_uname64(struct new_utsname __user * name)
20{ 21{
diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c
index 5e14792e4838..513d17ceafd4 100644
--- a/arch/um/sys-x86_64/user-offsets.c
+++ b/arch/um/sys-x86_64/user-offsets.c
@@ -3,6 +3,14 @@
3#include <signal.h> 3#include <signal.h>
4#define __FRAME_OFFSETS 4#define __FRAME_OFFSETS
5#include <asm/ptrace.h> 5#include <asm/ptrace.h>
6#include <asm/types.h>
7/* For some reason, x86_64 defines u64 and u32 only in <pci/types.h>, which I
8 * refuse to include here, even though they're used throughout the headers.
9 * These are used in asm/user.h, and that include can't be avoided because of
10 * the sizeof(struct user_regs_struct) below.
11 */
12typedef __u64 u64;
13typedef __u32 u32;
6#include <asm/user.h> 14#include <asm/user.h>
7 15
8#define DEFINE(sym, val) \ 16#define DEFINE(sym, val) \
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 44ee7f6acf7b..0f430d9d3632 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -303,6 +303,20 @@ config HPET_TIMER
303 as it is off-chip. You can find the HPET spec at 303 as it is off-chip. You can find the HPET spec at
304 <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. 304 <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
305 305
306config X86_PM_TIMER
307 bool "PM timer"
308 default y
309 help
310 Support the ACPI PM timer for time keeping. This is slow,
311 but is useful on some chipsets without HPET on systems with more
312 than one CPU. On a single processor or single socket multi core
313 system it is normally not required.
314 When the PM timer is active 64bit vsyscalls are disabled
315 and should not be enabled (/proc/sys/kernel/vsyscall64 should
316 not be changed).
317 The kernel selects the PM timer only as a last resort, so it is
318 useful to enable just in case.
319
306config HPET_EMULATE_RTC 320config HPET_EMULATE_RTC
307 bool "Provide RTC interrupt" 321 bool "Provide RTC interrupt"
308 depends on HPET_TIMER && RTC=y 322 depends on HPET_TIMER && RTC=y
@@ -407,7 +421,7 @@ config PCI_DIRECT
407 421
408config PCI_MMCONFIG 422config PCI_MMCONFIG
409 bool "Support mmconfig PCI config space access" 423 bool "Support mmconfig PCI config space access"
410 depends on PCI 424 depends on PCI && ACPI
411 select ACPI_BOOT 425 select ACPI_BOOT
412 426
413config UNORDERED_IO 427config UNORDERED_IO
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 9ce51dee30b3..569595b74c7c 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.11-bk7 3# Linux kernel version: 2.6.12-rc4
4# Sat Mar 12 23:43:44 2005 4# Fri May 13 06:39:11 2005
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -11,8 +11,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
11CONFIG_GENERIC_CALIBRATE_DELAY=y 11CONFIG_GENERIC_CALIBRATE_DELAY=y
12CONFIG_X86_CMPXCHG=y 12CONFIG_X86_CMPXCHG=y
13CONFIG_EARLY_PRINTK=y 13CONFIG_EARLY_PRINTK=y
14CONFIG_HPET_TIMER=y
15CONFIG_HPET_EMULATE_RTC=y
16CONFIG_GENERIC_ISA_DMA=y 14CONFIG_GENERIC_ISA_DMA=y
17CONFIG_GENERIC_IOMAP=y 15CONFIG_GENERIC_IOMAP=y
18 16
@@ -22,6 +20,7 @@ CONFIG_GENERIC_IOMAP=y
22CONFIG_EXPERIMENTAL=y 20CONFIG_EXPERIMENTAL=y
23CONFIG_CLEAN_COMPILE=y 21CONFIG_CLEAN_COMPILE=y
24CONFIG_LOCK_KERNEL=y 22CONFIG_LOCK_KERNEL=y
23CONFIG_INIT_ENV_ARG_LIMIT=32
25 24
26# 25#
27# General setup 26# General setup
@@ -33,7 +32,6 @@ CONFIG_POSIX_MQUEUE=y
33# CONFIG_BSD_PROCESS_ACCT is not set 32# CONFIG_BSD_PROCESS_ACCT is not set
34CONFIG_SYSCTL=y 33CONFIG_SYSCTL=y
35# CONFIG_AUDIT is not set 34# CONFIG_AUDIT is not set
36CONFIG_LOG_BUF_SHIFT=18
37# CONFIG_HOTPLUG is not set 35# CONFIG_HOTPLUG is not set
38CONFIG_KOBJECT_UEVENT=y 36CONFIG_KOBJECT_UEVENT=y
39CONFIG_IKCONFIG=y 37CONFIG_IKCONFIG=y
@@ -43,10 +41,11 @@ CONFIG_IKCONFIG_PROC=y
43CONFIG_KALLSYMS=y 41CONFIG_KALLSYMS=y
44CONFIG_KALLSYMS_ALL=y 42CONFIG_KALLSYMS_ALL=y
45# CONFIG_KALLSYMS_EXTRA_PASS is not set 43# CONFIG_KALLSYMS_EXTRA_PASS is not set
44CONFIG_PRINTK=y
45CONFIG_BUG=y
46CONFIG_BASE_FULL=y 46CONFIG_BASE_FULL=y
47CONFIG_FUTEX=y 47CONFIG_FUTEX=y
48CONFIG_EPOLL=y 48CONFIG_EPOLL=y
49# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
50CONFIG_SHMEM=y 49CONFIG_SHMEM=y
51CONFIG_CC_ALIGN_FUNCTIONS=0 50CONFIG_CC_ALIGN_FUNCTIONS=0
52CONFIG_CC_ALIGN_LABELS=0 51CONFIG_CC_ALIGN_LABELS=0
@@ -93,6 +92,9 @@ CONFIG_DISCONTIGMEM=y
93CONFIG_NUMA=y 92CONFIG_NUMA=y
94CONFIG_HAVE_DEC_LOCK=y 93CONFIG_HAVE_DEC_LOCK=y
95CONFIG_NR_CPUS=8 94CONFIG_NR_CPUS=8
95CONFIG_HPET_TIMER=y
96CONFIG_X86_PM_TIMER=y
97CONFIG_HPET_EMULATE_RTC=y
96CONFIG_GART_IOMMU=y 98CONFIG_GART_IOMMU=y
97CONFIG_SWIOTLB=y 99CONFIG_SWIOTLB=y
98CONFIG_X86_MCE=y 100CONFIG_X86_MCE=y
@@ -100,6 +102,7 @@ CONFIG_X86_MCE_INTEL=y
100CONFIG_SECCOMP=y 102CONFIG_SECCOMP=y
101CONFIG_GENERIC_HARDIRQS=y 103CONFIG_GENERIC_HARDIRQS=y
102CONFIG_GENERIC_IRQ_PROBE=y 104CONFIG_GENERIC_IRQ_PROBE=y
105CONFIG_ISA_DMA_API=y
103 106
104# 107#
105# Power management options 108# Power management options
@@ -129,7 +132,7 @@ CONFIG_ACPI_NUMA=y
129# CONFIG_ACPI_IBM is not set 132# CONFIG_ACPI_IBM is not set
130CONFIG_ACPI_TOSHIBA=y 133CONFIG_ACPI_TOSHIBA=y
131CONFIG_ACPI_BLACKLIST_YEAR=2001 134CONFIG_ACPI_BLACKLIST_YEAR=2001
132CONFIG_ACPI_DEBUG=y 135# CONFIG_ACPI_DEBUG is not set
133CONFIG_ACPI_BUS=y 136CONFIG_ACPI_BUS=y
134CONFIG_ACPI_EC=y 137CONFIG_ACPI_EC=y
135CONFIG_ACPI_POWER=y 138CONFIG_ACPI_POWER=y
@@ -141,6 +144,7 @@ CONFIG_ACPI_SYSTEM=y
141# CPU Frequency scaling 144# CPU Frequency scaling
142# 145#
143CONFIG_CPU_FREQ=y 146CONFIG_CPU_FREQ=y
147CONFIG_CPU_FREQ_TABLE=y
144# CONFIG_CPU_FREQ_DEBUG is not set 148# CONFIG_CPU_FREQ_DEBUG is not set
145CONFIG_CPU_FREQ_STAT=y 149CONFIG_CPU_FREQ_STAT=y
146# CONFIG_CPU_FREQ_STAT_DETAILS is not set 150# CONFIG_CPU_FREQ_STAT_DETAILS is not set
@@ -150,7 +154,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
150# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 154# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
151CONFIG_CPU_FREQ_GOV_USERSPACE=y 155CONFIG_CPU_FREQ_GOV_USERSPACE=y
152CONFIG_CPU_FREQ_GOV_ONDEMAND=y 156CONFIG_CPU_FREQ_GOV_ONDEMAND=y
153CONFIG_CPU_FREQ_TABLE=y
154 157
155# 158#
156# CPUFreq processor drivers 159# CPUFreq processor drivers
@@ -164,6 +167,7 @@ CONFIG_X86_ACPI_CPUFREQ=y
164# shared options 167# shared options
165# 168#
166CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y 169CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
170# CONFIG_X86_SPEEDSTEP_LIB is not set
167 171
168# 172#
169# Bus options (PCI etc.) 173# Bus options (PCI etc.)
@@ -172,9 +176,11 @@ CONFIG_PCI=y
172CONFIG_PCI_DIRECT=y 176CONFIG_PCI_DIRECT=y
173CONFIG_PCI_MMCONFIG=y 177CONFIG_PCI_MMCONFIG=y
174CONFIG_UNORDERED_IO=y 178CONFIG_UNORDERED_IO=y
179# CONFIG_PCIEPORTBUS is not set
175CONFIG_PCI_MSI=y 180CONFIG_PCI_MSI=y
176# CONFIG_PCI_LEGACY_PROC is not set 181# CONFIG_PCI_LEGACY_PROC is not set
177# CONFIG_PCI_NAMES is not set 182# CONFIG_PCI_NAMES is not set
183# CONFIG_PCI_DEBUG is not set
178 184
179# 185#
180# PCCARD (PCMCIA/CardBus) support 186# PCCARD (PCMCIA/CardBus) support
@@ -182,10 +188,6 @@ CONFIG_PCI_MSI=y
182# CONFIG_PCCARD is not set 188# CONFIG_PCCARD is not set
183 189
184# 190#
185# PC-card bridges
186#
187
188#
189# PCI Hotplug Support 191# PCI Hotplug Support
190# 192#
191# CONFIG_HOTPLUG_PCI is not set 193# CONFIG_HOTPLUG_PCI is not set
@@ -254,7 +256,7 @@ CONFIG_LBD=y
254# IO Schedulers 256# IO Schedulers
255# 257#
256CONFIG_IOSCHED_NOOP=y 258CONFIG_IOSCHED_NOOP=y
257CONFIG_IOSCHED_AS=y 259# CONFIG_IOSCHED_AS is not set
258CONFIG_IOSCHED_DEADLINE=y 260CONFIG_IOSCHED_DEADLINE=y
259CONFIG_IOSCHED_CFQ=y 261CONFIG_IOSCHED_CFQ=y
260# CONFIG_ATA_OVER_ETH is not set 262# CONFIG_ATA_OVER_ETH is not set
@@ -308,7 +310,8 @@ CONFIG_BLK_DEV_AMD74XX=y
308CONFIG_BLK_DEV_PIIX=y 310CONFIG_BLK_DEV_PIIX=y
309# CONFIG_BLK_DEV_NS87415 is not set 311# CONFIG_BLK_DEV_NS87415 is not set
310# CONFIG_BLK_DEV_PDC202XX_OLD is not set 312# CONFIG_BLK_DEV_PDC202XX_OLD is not set
311# CONFIG_BLK_DEV_PDC202XX_NEW is not set 313CONFIG_BLK_DEV_PDC202XX_NEW=y
314# CONFIG_PDC202XX_FORCE is not set
312# CONFIG_BLK_DEV_SVWKS is not set 315# CONFIG_BLK_DEV_SVWKS is not set
313# CONFIG_BLK_DEV_SIIMAGE is not set 316# CONFIG_BLK_DEV_SIIMAGE is not set
314# CONFIG_BLK_DEV_SIS5513 is not set 317# CONFIG_BLK_DEV_SIS5513 is not set
@@ -353,7 +356,7 @@ CONFIG_BLK_DEV_SD=y
353# 356#
354# SCSI low-level drivers 357# SCSI low-level drivers
355# 358#
356CONFIG_BLK_DEV_3W_XXXX_RAID=y 359# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
357# CONFIG_SCSI_3W_9XXX is not set 360# CONFIG_SCSI_3W_9XXX is not set
358# CONFIG_SCSI_ACARD is not set 361# CONFIG_SCSI_ACARD is not set
359# CONFIG_SCSI_AACRAID is not set 362# CONFIG_SCSI_AACRAID is not set
@@ -384,7 +387,6 @@ CONFIG_SCSI_SATA_VIA=y
384# CONFIG_SCSI_BUSLOGIC is not set 387# CONFIG_SCSI_BUSLOGIC is not set
385# CONFIG_SCSI_DMX3191D is not set 388# CONFIG_SCSI_DMX3191D is not set
386# CONFIG_SCSI_EATA is not set 389# CONFIG_SCSI_EATA is not set
387# CONFIG_SCSI_EATA_PIO is not set
388# CONFIG_SCSI_FUTURE_DOMAIN is not set 390# CONFIG_SCSI_FUTURE_DOMAIN is not set
389# CONFIG_SCSI_GDTH is not set 391# CONFIG_SCSI_GDTH is not set
390# CONFIG_SCSI_IPS is not set 392# CONFIG_SCSI_IPS is not set
@@ -392,7 +394,6 @@ CONFIG_SCSI_SATA_VIA=y
392# CONFIG_SCSI_INIA100 is not set 394# CONFIG_SCSI_INIA100 is not set
393# CONFIG_SCSI_SYM53C8XX_2 is not set 395# CONFIG_SCSI_SYM53C8XX_2 is not set
394# CONFIG_SCSI_IPR is not set 396# CONFIG_SCSI_IPR is not set
395# CONFIG_SCSI_QLOGIC_ISP is not set
396# CONFIG_SCSI_QLOGIC_FC is not set 397# CONFIG_SCSI_QLOGIC_FC is not set
397# CONFIG_SCSI_QLOGIC_1280 is not set 398# CONFIG_SCSI_QLOGIC_1280 is not set
398CONFIG_SCSI_QLA2XXX=y 399CONFIG_SCSI_QLA2XXX=y
@@ -401,6 +402,7 @@ CONFIG_SCSI_QLA2XXX=y
401# CONFIG_SCSI_QLA2300 is not set 402# CONFIG_SCSI_QLA2300 is not set
402# CONFIG_SCSI_QLA2322 is not set 403# CONFIG_SCSI_QLA2322 is not set
403# CONFIG_SCSI_QLA6312 is not set 404# CONFIG_SCSI_QLA6312 is not set
405# CONFIG_SCSI_LPFC is not set
404# CONFIG_SCSI_DC395x is not set 406# CONFIG_SCSI_DC395x is not set
405# CONFIG_SCSI_DC390T is not set 407# CONFIG_SCSI_DC390T is not set
406# CONFIG_SCSI_DEBUG is not set 408# CONFIG_SCSI_DEBUG is not set
@@ -437,7 +439,6 @@ CONFIG_NET=y
437# 439#
438CONFIG_PACKET=y 440CONFIG_PACKET=y
439# CONFIG_PACKET_MMAP is not set 441# CONFIG_PACKET_MMAP is not set
440# CONFIG_NETLINK_DEV is not set
441CONFIG_UNIX=y 442CONFIG_UNIX=y
442# CONFIG_NET_KEY is not set 443# CONFIG_NET_KEY is not set
443CONFIG_INET=y 444CONFIG_INET=y
@@ -502,7 +503,7 @@ CONFIG_NETDEVICES=y
502# CONFIG_DUMMY is not set 503# CONFIG_DUMMY is not set
503# CONFIG_BONDING is not set 504# CONFIG_BONDING is not set
504# CONFIG_EQUALIZER is not set 505# CONFIG_EQUALIZER is not set
505# CONFIG_TUN is not set 506CONFIG_TUN=y
506 507
507# 508#
508# ARCnet devices 509# ARCnet devices
@@ -525,8 +526,7 @@ CONFIG_MII=y
525# CONFIG_HP100 is not set 526# CONFIG_HP100 is not set
526CONFIG_NET_PCI=y 527CONFIG_NET_PCI=y
527# CONFIG_PCNET32 is not set 528# CONFIG_PCNET32 is not set
528CONFIG_AMD8111_ETH=y 529# CONFIG_AMD8111_ETH is not set
529# CONFIG_AMD8111E_NAPI is not set
530# CONFIG_ADAPTEC_STARFIRE is not set 530# CONFIG_ADAPTEC_STARFIRE is not set
531# CONFIG_B44 is not set 531# CONFIG_B44 is not set
532CONFIG_FORCEDETH=y 532CONFIG_FORCEDETH=y
@@ -536,7 +536,7 @@ CONFIG_FORCEDETH=y
536# CONFIG_FEALNX is not set 536# CONFIG_FEALNX is not set
537# CONFIG_NATSEMI is not set 537# CONFIG_NATSEMI is not set
538# CONFIG_NE2K_PCI is not set 538# CONFIG_NE2K_PCI is not set
539CONFIG_8139CP=m 539CONFIG_8139CP=y
540CONFIG_8139TOO=y 540CONFIG_8139TOO=y
541# CONFIG_8139TOO_PIO is not set 541# CONFIG_8139TOO_PIO is not set
542# CONFIG_8139TOO_TUNE_TWISTER is not set 542# CONFIG_8139TOO_TUNE_TWISTER is not set
@@ -671,6 +671,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4
671# 671#
672CONFIG_SERIAL_CORE=y 672CONFIG_SERIAL_CORE=y
673CONFIG_SERIAL_CORE_CONSOLE=y 673CONFIG_SERIAL_CORE_CONSOLE=y
674# CONFIG_SERIAL_JSM is not set
674CONFIG_UNIX98_PTYS=y 675CONFIG_UNIX98_PTYS=y
675CONFIG_LEGACY_PTYS=y 676CONFIG_LEGACY_PTYS=y
676CONFIG_LEGACY_PTY_COUNT=256 677CONFIG_LEGACY_PTY_COUNT=256
@@ -696,6 +697,7 @@ CONFIG_RTC=y
696# 697#
697CONFIG_AGP=y 698CONFIG_AGP=y
698CONFIG_AGP_AMD64=y 699CONFIG_AGP_AMD64=y
700CONFIG_AGP_INTEL=y
699# CONFIG_DRM is not set 701# CONFIG_DRM is not set
700# CONFIG_MWAVE is not set 702# CONFIG_MWAVE is not set
701CONFIG_RAW_DRIVER=y 703CONFIG_RAW_DRIVER=y
@@ -703,7 +705,7 @@ CONFIG_HPET=y
703# CONFIG_HPET_RTC_IRQ is not set 705# CONFIG_HPET_RTC_IRQ is not set
704CONFIG_HPET_MMAP=y 706CONFIG_HPET_MMAP=y
705CONFIG_MAX_RAW_DEVS=256 707CONFIG_MAX_RAW_DEVS=256
706CONFIG_HANGCHECK_TIMER=y 708# CONFIG_HANGCHECK_TIMER is not set
707 709
708# 710#
709# TPM devices 711# TPM devices
@@ -786,6 +788,8 @@ CONFIG_SOUND_ICH=y
786# 788#
787# USB support 789# USB support
788# 790#
791CONFIG_USB_ARCH_HAS_HCD=y
792CONFIG_USB_ARCH_HAS_OHCI=y
789CONFIG_USB=y 793CONFIG_USB=y
790# CONFIG_USB_DEBUG is not set 794# CONFIG_USB_DEBUG is not set
791 795
@@ -797,8 +801,6 @@ CONFIG_USB_DEVICEFS=y
797# CONFIG_USB_DYNAMIC_MINORS is not set 801# CONFIG_USB_DYNAMIC_MINORS is not set
798# CONFIG_USB_SUSPEND is not set 802# CONFIG_USB_SUSPEND is not set
799# CONFIG_USB_OTG is not set 803# CONFIG_USB_OTG is not set
800CONFIG_USB_ARCH_HAS_HCD=y
801CONFIG_USB_ARCH_HAS_OHCI=y
802 804
803# 805#
804# USB Host Controller Drivers 806# USB Host Controller Drivers
@@ -826,7 +828,6 @@ CONFIG_USB_PRINTER=y
826# 828#
827CONFIG_USB_STORAGE=y 829CONFIG_USB_STORAGE=y
828# CONFIG_USB_STORAGE_DEBUG is not set 830# CONFIG_USB_STORAGE_DEBUG is not set
829# CONFIG_USB_STORAGE_RW_DETECT is not set
830# CONFIG_USB_STORAGE_DATAFAB is not set 831# CONFIG_USB_STORAGE_DATAFAB is not set
831# CONFIG_USB_STORAGE_FREECOM is not set 832# CONFIG_USB_STORAGE_FREECOM is not set
832# CONFIG_USB_STORAGE_ISD200 is not set 833# CONFIG_USB_STORAGE_ISD200 is not set
@@ -965,7 +966,7 @@ CONFIG_AUTOFS_FS=y
965# CD-ROM/DVD Filesystems 966# CD-ROM/DVD Filesystems
966# 967#
967CONFIG_ISO9660_FS=y 968CONFIG_ISO9660_FS=y
968# CONFIG_JOLIET is not set 969CONFIG_JOLIET=y
969# CONFIG_ZISOFS is not set 970# CONFIG_ZISOFS is not set
970# CONFIG_UDF_FS is not set 971# CONFIG_UDF_FS is not set
971 972
@@ -1092,9 +1093,10 @@ CONFIG_OPROFILE=y
1092# 1093#
1093# Kernel hacking 1094# Kernel hacking
1094# 1095#
1096# CONFIG_PRINTK_TIME is not set
1095CONFIG_DEBUG_KERNEL=y 1097CONFIG_DEBUG_KERNEL=y
1096CONFIG_MAGIC_SYSRQ=y 1098CONFIG_MAGIC_SYSRQ=y
1097# CONFIG_PRINTK_TIME is not set 1099CONFIG_LOG_BUF_SHIFT=18
1098# CONFIG_SCHEDSTATS is not set 1100# CONFIG_SCHEDSTATS is not set
1099# CONFIG_DEBUG_SLAB is not set 1101# CONFIG_DEBUG_SLAB is not set
1100# CONFIG_DEBUG_SPINLOCK is not set 1102# CONFIG_DEBUG_SPINLOCK is not set
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 0a3318e08ab6..5ca4a4598fda 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
29obj-$(CONFIG_SWIOTLB) += swiotlb.o 29obj-$(CONFIG_SWIOTLB) += swiotlb.o
30obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
31obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
31 32
32obj-$(CONFIG_MODULES) += module.o 33obj-$(CONFIG_MODULES) += module.o
33 34
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 7e13545748e0..f8e6cc4fecd4 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -33,6 +33,7 @@
33#include <asm/mpspec.h> 33#include <asm/mpspec.h>
34#include <asm/pgalloc.h> 34#include <asm/pgalloc.h>
35#include <asm/mach_apic.h> 35#include <asm/mach_apic.h>
36#include <asm/nmi.h>
36 37
37int apic_verbosity; 38int apic_verbosity;
38 39
@@ -925,7 +926,7 @@ __init int oem_force_hpet_timer(void)
925 unsigned id; 926 unsigned id;
926 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); 927 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
927 928
928 bitmap_empty(clustermap, NUM_APIC_CLUSTERS); 929 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
929 930
930 for (i = 0; i < NR_CPUS; i++) { 931 for (i = 0; i < NR_CPUS; i++) {
931 id = bios_cpu_apicid[i]; 932 id = bios_cpu_apicid[i];
@@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void)
1056 nr_ioapics = 0; 1057 nr_ioapics = 0;
1057#endif 1058#endif
1058 setup_boot_APIC_clock(); 1059 setup_boot_APIC_clock();
1059 1060 check_nmi_watchdog();
1060 return 0; 1061 return 0;
1061} 1062}
1062 1063
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 1086b5fcac21..28817490fdc6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -220,13 +220,18 @@ sysret_careful:
220 jmp sysret_check 220 jmp sysret_check
221 221
222 /* Handle a signal */ 222 /* Handle a signal */
223 /* edx: work flags (arg3) */
224sysret_signal: 223sysret_signal:
225 sti 224 sti
225 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
226 jz 1f
227
228 /* Really a signal */
229 /* edx: work flags (arg3) */
226 leaq do_notify_resume(%rip),%rax 230 leaq do_notify_resume(%rip),%rax
227 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 231 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
228 xorl %esi,%esi # oldset -> arg2 232 xorl %esi,%esi # oldset -> arg2
229 call ptregscall_common 233 call ptregscall_common
2341: movl $_TIF_NEED_RESCHED,%edi
230 jmp sysret_check 235 jmp sysret_check
231 236
232 /* Do syscall tracing */ 237 /* Do syscall tracing */
@@ -484,6 +489,8 @@ retint_careful:
484 jmp retint_check 489 jmp retint_check
485 490
486retint_signal: 491retint_signal:
492 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
493 jz retint_swapgs
487 sti 494 sti
488 SAVE_REST 495 SAVE_REST
489 movq $-1,ORIG_RAX(%rsp) 496 movq $-1,ORIG_RAX(%rsp)
@@ -492,8 +499,8 @@ retint_signal:
492 call do_notify_resume 499 call do_notify_resume
493 RESTORE_REST 500 RESTORE_REST
494 cli 501 cli
502 movl $_TIF_NEED_RESCHED,%edi
495 GET_THREAD_INFO(%rcx) 503 GET_THREAD_INFO(%rcx)
496 movl $_TIF_WORK_MASK,%edi
497 jmp retint_check 504 jmp retint_check
498 505
499#ifdef CONFIG_PREEMPT 506#ifdef CONFIG_PREEMPT
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 60be58617eb9..80e9b498c443 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -42,6 +42,8 @@
42 42
43int sis_apic_bug; /* not actually supported, dummy for compile */ 43int sis_apic_bug; /* not actually supported, dummy for compile */
44 44
45static int no_timer_check;
46
45static DEFINE_SPINLOCK(ioapic_lock); 47static DEFINE_SPINLOCK(ioapic_lock);
46 48
47/* 49/*
@@ -1601,7 +1603,7 @@ static inline void check_timer(void)
1601 * Ok, does IRQ0 through the IOAPIC work? 1603 * Ok, does IRQ0 through the IOAPIC work?
1602 */ 1604 */
1603 unmask_IO_APIC_irq(0); 1605 unmask_IO_APIC_irq(0);
1604 if (timer_irq_works()) { 1606 if (!no_timer_check && timer_irq_works()) {
1605 nmi_watchdog_default(); 1607 nmi_watchdog_default();
1606 if (nmi_watchdog == NMI_IO_APIC) { 1608 if (nmi_watchdog == NMI_IO_APIC) {
1607 disable_8259A_irq(0); 1609 disable_8259A_irq(0);
@@ -1671,6 +1673,13 @@ static inline void check_timer(void)
1671 panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); 1673 panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
1672} 1674}
1673 1675
1676static int __init notimercheck(char *s)
1677{
1678 no_timer_check = 1;
1679 return 1;
1680}
1681__setup("no_timer_check", notimercheck);
1682
1674/* 1683/*
1675 * 1684 *
1676 * IRQ's that are handled by the PIC in the MPS IOAPIC case. 1685 * IRQ's that are handled by the PIC in the MPS IOAPIC case.
@@ -1804,76 +1813,6 @@ device_initcall(ioapic_init_sysfs);
1804 1813
1805#define IO_APIC_MAX_ID 0xFE 1814#define IO_APIC_MAX_ID 0xFE
1806 1815
1807int __init io_apic_get_unique_id (int ioapic, int apic_id)
1808{
1809 union IO_APIC_reg_00 reg_00;
1810 static physid_mask_t apic_id_map;
1811 unsigned long flags;
1812 int i = 0;
1813
1814 /*
1815 * The P4 platform supports up to 256 APIC IDs on two separate APIC
1816 * buses (one for LAPICs, one for IOAPICs), where predecessors only
1817 * supports up to 16 on one shared APIC bus.
1818 *
1819 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
1820 * advantage of new APIC bus architecture.
1821 */
1822
1823 if (physids_empty(apic_id_map))
1824 apic_id_map = phys_cpu_present_map;
1825
1826 spin_lock_irqsave(&ioapic_lock, flags);
1827 reg_00.raw = io_apic_read(ioapic, 0);
1828 spin_unlock_irqrestore(&ioapic_lock, flags);
1829
1830 if (apic_id >= IO_APIC_MAX_ID) {
1831 apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
1832 "%d\n", ioapic, apic_id, reg_00.bits.ID);
1833 apic_id = reg_00.bits.ID;
1834 }
1835
1836 /*
1837 * Every APIC in a system must have a unique ID or we get lots of nice
1838 * 'stuck on smp_invalidate_needed IPI wait' messages.
1839 */
1840 if (physid_isset(apic_id, apic_id_map)) {
1841
1842 for (i = 0; i < IO_APIC_MAX_ID; i++) {
1843 if (!physid_isset(i, apic_id_map))
1844 break;
1845 }
1846
1847 if (i == IO_APIC_MAX_ID)
1848 panic("Max apic_id exceeded!\n");
1849
1850 apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
1851 "trying %d\n", ioapic, apic_id, i);
1852
1853 apic_id = i;
1854 }
1855
1856 physid_set(apic_id, apic_id_map);
1857
1858 if (reg_00.bits.ID != apic_id) {
1859 reg_00.bits.ID = apic_id;
1860
1861 spin_lock_irqsave(&ioapic_lock, flags);
1862 io_apic_write(ioapic, 0, reg_00.raw);
1863 reg_00.raw = io_apic_read(ioapic, 0);
1864 spin_unlock_irqrestore(&ioapic_lock, flags);
1865
1866 /* Sanity check */
1867 if (reg_00.bits.ID != apic_id)
1868 panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
1869 }
1870
1871 apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
1872
1873 return apic_id;
1874}
1875
1876
1877int __init io_apic_get_version (int ioapic) 1816int __init io_apic_get_version (int ioapic)
1878{ 1817{
1879 union IO_APIC_reg_01 reg_01; 1818 union IO_APIC_reg_01 reg_01;
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 7ec031c6ca10..f86d9db94bfc 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -107,6 +107,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
107static void __init MP_processor_info (struct mpc_config_processor *m) 107static void __init MP_processor_info (struct mpc_config_processor *m)
108{ 108{
109 int ver; 109 int ver;
110 static int found_bsp=0;
110 111
111 if (!(m->mpc_cpuflag & CPU_ENABLED)) 112 if (!(m->mpc_cpuflag & CPU_ENABLED))
112 return; 113 return;
@@ -126,11 +127,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
126 " Processor ignored.\n", NR_CPUS); 127 " Processor ignored.\n", NR_CPUS);
127 return; 128 return;
128 } 129 }
129 if (num_processors >= maxcpus) {
130 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
131 " Processor ignored.\n", maxcpus);
132 return;
133 }
134 130
135 num_processors++; 131 num_processors++;
136 132
@@ -150,7 +146,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
150 ver = 0x10; 146 ver = 0x10;
151 } 147 }
152 apic_version[m->mpc_apicid] = ver; 148 apic_version[m->mpc_apicid] = ver;
153 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; 149 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
150 /*
151 * bios_cpu_apicid is required to have processors listed
152 * in same order as logical cpu numbers. Hence the first
153 * entry is BSP, and so on.
154 */
155 bios_cpu_apicid[0] = m->mpc_apicid;
156 x86_cpu_to_apicid[0] = m->mpc_apicid;
157 found_bsp = 1;
158 } else {
159 bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
160 x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
161 }
154} 162}
155 163
156static void __init MP_bus_info (struct mpc_config_bus *m) 164static void __init MP_bus_info (struct mpc_config_bus *m)
@@ -759,7 +767,7 @@ void __init mp_register_ioapic (
759 mp_ioapics[idx].mpc_apicaddr = address; 767 mp_ioapics[idx].mpc_apicaddr = address;
760 768
761 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 769 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
762 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); 770 mp_ioapics[idx].mpc_apicid = id;
763 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 771 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
764 772
765 /* 773 /*
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 61de0b34a01e..31c0f2e6ac91 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -33,6 +33,7 @@
33#include <asm/msr.h> 33#include <asm/msr.h>
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/kdebug.h> 35#include <asm/kdebug.h>
36#include <asm/local.h>
36 37
37/* 38/*
38 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -59,7 +60,8 @@ int panic_on_timeout;
59 60
60unsigned int nmi_watchdog = NMI_DEFAULT; 61unsigned int nmi_watchdog = NMI_DEFAULT;
61static unsigned int nmi_hz = HZ; 62static unsigned int nmi_hz = HZ;
62unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 63static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
64static unsigned int nmi_p4_cccr_val;
63 65
64/* Note that these events don't tick when the CPU idles. This means 66/* Note that these events don't tick when the CPU idles. This means
65 the frequency varies with CPU load. */ 67 the frequency varies with CPU load. */
@@ -71,61 +73,87 @@ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
71#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 73#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
72#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 74#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
73 75
74#define P6_EVNTSEL0_ENABLE (1 << 22) 76#define MSR_P4_MISC_ENABLE 0x1A0
75#define P6_EVNTSEL_INT (1 << 20) 77#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
76#define P6_EVNTSEL_OS (1 << 17) 78#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
77#define P6_EVNTSEL_USR (1 << 16) 79#define MSR_P4_PERFCTR0 0x300
78#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 80#define MSR_P4_CCCR0 0x360
79#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 81#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
82#define P4_ESCR_OS (1<<3)
83#define P4_ESCR_USR (1<<2)
84#define P4_CCCR_OVF_PMI0 (1<<26)
85#define P4_CCCR_OVF_PMI1 (1<<27)
86#define P4_CCCR_THRESHOLD(N) ((N)<<20)
87#define P4_CCCR_COMPLEMENT (1<<19)
88#define P4_CCCR_COMPARE (1<<18)
89#define P4_CCCR_REQUIRED (3<<16)
90#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
91#define P4_CCCR_ENABLE (1<<12)
92/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
93 CRU_ESCR0 (with any non-null event selector) through a complemented
94 max threshold. [IA32-Vol3, Section 14.9.9] */
95#define MSR_P4_IQ_COUNTER0 0x30C
96#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
97#define P4_NMI_IQ_CCCR0 \
98 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
99 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
100
101static __init inline int nmi_known_cpu(void)
102{
103 switch (boot_cpu_data.x86_vendor) {
104 case X86_VENDOR_AMD:
105 return boot_cpu_data.x86 == 15;
106 case X86_VENDOR_INTEL:
107 return boot_cpu_data.x86 == 15;
108 }
109 return 0;
110}
80 111
81/* Run after command line and cpu_init init, but before all other checks */ 112/* Run after command line and cpu_init init, but before all other checks */
82void __init nmi_watchdog_default(void) 113void __init nmi_watchdog_default(void)
83{ 114{
84 if (nmi_watchdog != NMI_DEFAULT) 115 if (nmi_watchdog != NMI_DEFAULT)
85 return; 116 return;
86 117 if (nmi_known_cpu())
87 /* For some reason the IO APIC watchdog doesn't work on the AMD 118 nmi_watchdog = NMI_LOCAL_APIC;
88 8111 chipset. For now switch to local APIC mode using 119 else
89 perfctr0 there. On Intel CPUs we don't have code to handle
90 the perfctr and the IO-APIC seems to work, so use that. */
91
92 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
93 nmi_watchdog = NMI_LOCAL_APIC;
94 printk(KERN_INFO
95 "Using local APIC NMI watchdog using perfctr0\n");
96 } else {
97 printk(KERN_INFO "Using IO APIC NMI watchdog\n");
98 nmi_watchdog = NMI_IO_APIC; 120 nmi_watchdog = NMI_IO_APIC;
99 }
100} 121}
101 122
102/* Why is there no CPUID flag for this? */ 123#ifdef CONFIG_SMP
103static __init int cpu_has_lapic(void) 124/* The performance counters used by NMI_LOCAL_APIC don't trigger when
125 * the CPU is idle. To make sure the NMI watchdog really ticks on all
126 * CPUs during the test make them busy.
127 */
128static __init void nmi_cpu_busy(void *data)
104{ 129{
105 switch (boot_cpu_data.x86_vendor) { 130 volatile int *endflag = data;
106 case X86_VENDOR_INTEL: 131 local_irq_enable();
107 case X86_VENDOR_AMD: 132 /* Intentionally don't use cpu_relax here. This is
108 return boot_cpu_data.x86 >= 6; 133 to make sure that the performance counter really ticks,
109 /* .... add more cpus here or find a different way to figure this out. */ 134 even if there is a simulator or similar that catches the
110 default: 135 pause instruction. On a real HT machine this is fine because
111 return 0; 136 all other CPUs are busy with "useless" delay loops and don't
112 } 137 care if they get somewhat less cycles. */
138 while (*endflag == 0)
139 barrier();
113} 140}
141#endif
114 142
115static int __init check_nmi_watchdog (void) 143int __init check_nmi_watchdog (void)
116{ 144{
117 int counts[NR_CPUS]; 145 volatile int endflag = 0;
146 int *counts;
118 int cpu; 147 int cpu;
119 148
120 if (nmi_watchdog == NMI_NONE) 149 counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
121 return 0; 150 if (!counts)
151 return -1;
122 152
123 if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { 153 printk(KERN_INFO "testing NMI watchdog ... ");
124 nmi_watchdog = NMI_NONE;
125 return -1;
126 }
127 154
128 printk(KERN_INFO "Testing NMI watchdog ... "); 155 if (nmi_watchdog == NMI_LOCAL_APIC)
156 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
129 157
130 for (cpu = 0; cpu < NR_CPUS; cpu++) 158 for (cpu = 0; cpu < NR_CPUS; cpu++)
131 counts[cpu] = cpu_pda[cpu].__nmi_count; 159 counts[cpu] = cpu_pda[cpu].__nmi_count;
@@ -133,15 +161,22 @@ static int __init check_nmi_watchdog (void)
133 mdelay((10*1000)/nmi_hz); // wait 10 ticks 161 mdelay((10*1000)/nmi_hz); // wait 10 ticks
134 162
135 for (cpu = 0; cpu < NR_CPUS; cpu++) { 163 for (cpu = 0; cpu < NR_CPUS; cpu++) {
164 if (!cpu_online(cpu))
165 continue;
136 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { 166 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
137 printk("CPU#%d: NMI appears to be stuck (%d)!\n", 167 endflag = 1;
168 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
138 cpu, 169 cpu,
170 counts[cpu],
139 cpu_pda[cpu].__nmi_count); 171 cpu_pda[cpu].__nmi_count);
140 nmi_active = 0; 172 nmi_active = 0;
141 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 173 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
174 nmi_perfctr_msr = 0;
175 kfree(counts);
142 return -1; 176 return -1;
143 } 177 }
144 } 178 }
179 endflag = 1;
145 printk("OK.\n"); 180 printk("OK.\n");
146 181
147 /* now that we know it works we can reduce NMI frequency to 182 /* now that we know it works we can reduce NMI frequency to
@@ -149,10 +184,9 @@ static int __init check_nmi_watchdog (void)
149 if (nmi_watchdog == NMI_LOCAL_APIC) 184 if (nmi_watchdog == NMI_LOCAL_APIC)
150 nmi_hz = 1; 185 nmi_hz = 1;
151 186
187 kfree(counts);
152 return 0; 188 return 0;
153} 189}
154/* Have this called later during boot so counters are updating */
155late_initcall(check_nmi_watchdog);
156 190
157int __init setup_nmi_watchdog(char *str) 191int __init setup_nmi_watchdog(char *str)
158{ 192{
@@ -170,7 +204,7 @@ int __init setup_nmi_watchdog(char *str)
170 204
171 if (nmi >= NMI_INVALID) 205 if (nmi >= NMI_INVALID)
172 return 0; 206 return 0;
173 nmi_watchdog = nmi; 207 nmi_watchdog = nmi;
174 return 1; 208 return 1;
175} 209}
176 210
@@ -185,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void)
185 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 219 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
186 break; 220 break;
187 case X86_VENDOR_INTEL: 221 case X86_VENDOR_INTEL:
188 wrmsr(MSR_IA32_EVNTSEL0, 0, 0); 222 if (boot_cpu_data.x86 == 15) {
223 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
224 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
225 }
189 break; 226 break;
190 } 227 }
191 nmi_active = -1; 228 nmi_active = -1;
@@ -253,7 +290,7 @@ void enable_timer_nmi_watchdog(void)
253 290
254static int nmi_pm_active; /* nmi_active before suspend */ 291static int nmi_pm_active; /* nmi_active before suspend */
255 292
256static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 293static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
257{ 294{
258 nmi_pm_active = nmi_active; 295 nmi_pm_active = nmi_active;
259 disable_lapic_nmi_watchdog(); 296 disable_lapic_nmi_watchdog();
@@ -300,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs);
300 * Original code written by Keith Owens. 337 * Original code written by Keith Owens.
301 */ 338 */
302 339
340static void clear_msr_range(unsigned int base, unsigned int n)
341{
342 unsigned int i;
343
344 for(i = 0; i < n; ++i)
345 wrmsr(base+i, 0, 0);
346}
347
303static void setup_k7_watchdog(void) 348static void setup_k7_watchdog(void)
304{ 349{
305 int i; 350 int i;
306 unsigned int evntsel; 351 unsigned int evntsel;
307 352
308 /* No check, so can start with slow frequency */
309 nmi_hz = 1;
310
311 /* XXX should check these in EFER */
312
313 nmi_perfctr_msr = MSR_K7_PERFCTR0; 353 nmi_perfctr_msr = MSR_K7_PERFCTR0;
314 354
315 for(i = 0; i < 4; ++i) { 355 for(i = 0; i < 4; ++i) {
316 /* Simulator may not support it */ 356 /* Simulator may not support it */
317 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) 357 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) {
358 nmi_perfctr_msr = 0;
318 return; 359 return;
360 }
319 wrmsrl(MSR_K7_PERFCTR0+i, 0UL); 361 wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
320 } 362 }
321 363
@@ -325,12 +367,54 @@ static void setup_k7_watchdog(void)
325 | K7_NMI_EVENT; 367 | K7_NMI_EVENT;
326 368
327 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 369 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
328 wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz); 370 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
329 apic_write(APIC_LVTPC, APIC_DM_NMI); 371 apic_write(APIC_LVTPC, APIC_DM_NMI);
330 evntsel |= K7_EVNTSEL_ENABLE; 372 evntsel |= K7_EVNTSEL_ENABLE;
331 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 373 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
332} 374}
333 375
376
377static int setup_p4_watchdog(void)
378{
379 unsigned int misc_enable, dummy;
380
381 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
382 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
383 return 0;
384
385 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
386 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
387#ifdef CONFIG_SMP
388 if (smp_num_siblings == 2)
389 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
390#endif
391
392 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
393 clear_msr_range(0x3F1, 2);
394 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
395 docs doesn't fully define it, so leave it alone for now. */
396 if (boot_cpu_data.x86_model >= 0x3) {
397 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
398 clear_msr_range(0x3A0, 26);
399 clear_msr_range(0x3BC, 3);
400 } else {
401 clear_msr_range(0x3A0, 31);
402 }
403 clear_msr_range(0x3C0, 6);
404 clear_msr_range(0x3C8, 6);
405 clear_msr_range(0x3E0, 2);
406 clear_msr_range(MSR_P4_CCCR0, 18);
407 clear_msr_range(MSR_P4_PERFCTR0, 18);
408
409 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
410 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
411 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
412 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
413 apic_write(APIC_LVTPC, APIC_DM_NMI);
414 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
415 return 1;
416}
417
334void setup_apic_nmi_watchdog(void) 418void setup_apic_nmi_watchdog(void)
335{ 419{
336 switch (boot_cpu_data.x86_vendor) { 420 switch (boot_cpu_data.x86_vendor) {
@@ -341,6 +425,13 @@ void setup_apic_nmi_watchdog(void)
341 return; 425 return;
342 setup_k7_watchdog(); 426 setup_k7_watchdog();
343 break; 427 break;
428 case X86_VENDOR_INTEL:
429 if (boot_cpu_data.x86 != 15)
430 return;
431 if (!setup_p4_watchdog())
432 return;
433 break;
434
344 default: 435 default:
345 return; 436 return;
346 } 437 }
@@ -355,56 +446,67 @@ void setup_apic_nmi_watchdog(void)
355 * 446 *
356 * as these watchdog NMI IRQs are generated on every CPU, we only 447 * as these watchdog NMI IRQs are generated on every CPU, we only
357 * have to check the current processor. 448 * have to check the current processor.
358 *
359 * since NMIs don't listen to _any_ locks, we have to be extremely
360 * careful not to rely on unsafe variables. The printk might lock
361 * up though, so we have to break up any console locks first ...
362 * [when there will be more tty-related locks, break them up
363 * here too!]
364 */ 449 */
365 450
366static unsigned int 451static DEFINE_PER_CPU(unsigned, last_irq_sum);
367 last_irq_sums [NR_CPUS], 452static DEFINE_PER_CPU(local_t, alert_counter);
368 alert_counter [NR_CPUS]; 453static DEFINE_PER_CPU(int, nmi_touch);
369 454
370void touch_nmi_watchdog (void) 455void touch_nmi_watchdog (void)
371{ 456{
372 int i; 457 int i;
373 458
374 /* 459 /*
375 * Just reset the alert counters, (other CPUs might be 460 * Tell other CPUs to reset their alert counters. We cannot
376 * spinning on locks we hold): 461 * do it ourselves because the alert count increase is not
462 * atomic.
377 */ 463 */
378 for (i = 0; i < NR_CPUS; i++) 464 for (i = 0; i < NR_CPUS; i++)
379 alert_counter[i] = 0; 465 per_cpu(nmi_touch, i) = 1;
380} 466}
381 467
382void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) 468void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
383{ 469{
384 int sum, cpu; 470 int sum;
471 int touched = 0;
385 472
386 cpu = safe_smp_processor_id();
387 sum = read_pda(apic_timer_irqs); 473 sum = read_pda(apic_timer_irqs);
388 if (last_irq_sums[cpu] == sum) { 474 if (__get_cpu_var(nmi_touch)) {
475 __get_cpu_var(nmi_touch) = 0;
476 touched = 1;
477 }
478 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
389 /* 479 /*
390 * Ayiee, looks like this CPU is stuck ... 480 * Ayiee, looks like this CPU is stuck ...
391 * wait a few IRQs (5 seconds) before doing the oops ... 481 * wait a few IRQs (5 seconds) before doing the oops ...
392 */ 482 */
393 alert_counter[cpu]++; 483 local_inc(&__get_cpu_var(alert_counter));
394 if (alert_counter[cpu] == 5*nmi_hz) { 484 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) {
395 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 485 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
396 == NOTIFY_STOP) { 486 == NOTIFY_STOP) {
397 alert_counter[cpu] = 0; 487 local_set(&__get_cpu_var(alert_counter), 0);
398 return; 488 return;
399 } 489 }
400 die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); 490 die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
401 } 491 }
402 } else { 492 } else {
403 last_irq_sums[cpu] = sum; 493 __get_cpu_var(last_irq_sum) = sum;
404 alert_counter[cpu] = 0; 494 local_set(&__get_cpu_var(alert_counter), 0);
405 } 495 }
406 if (nmi_perfctr_msr) 496 if (nmi_perfctr_msr) {
497 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
498 /*
499 * P4 quirks:
500 * - An overflown perfctr will assert its interrupt
501 * until the OVF flag in its CCCR is cleared.
502 * - LVTPC is masked on interrupt and must be
503 * unmasked by the LVTPC handler.
504 */
505 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
506 apic_write(APIC_LVTPC, APIC_DM_NMI);
507 }
407 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); 508 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
509 }
408} 510}
409 511
410static int dummy_nmi_callback(struct pt_regs * regs, int cpu) 512static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
new file mode 100644
index 000000000000..feb5f108dd26
--- /dev/null
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -0,0 +1,101 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <asm/io.h>
23#include <asm/proto.h>
24#include <asm/msr.h>
25#include <asm/vsyscall.h>
26
27/* The I/O port the PMTMR resides at.
28 * The location is detected during setup_arch(),
29 * in arch/i386/kernel/acpi/boot.c */
30u32 pmtmr_ioport;
31
32/* value of the Power timer at last timer interrupt */
33static u32 offset_delay;
34static u32 last_pmtmr_tick;
35
36#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
37
38static inline u32 cyc2us(u32 cycles)
39{
40 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
41 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
42 *
43 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
44 * easily be multiplied with 286 (=0x11E) without having to fear
45 * u32 overflows.
46 */
47 cycles *= 286;
48 return (cycles >> 10);
49}
50
51int pmtimer_mark_offset(void)
52{
53 static int first_run = 1;
54 unsigned long tsc;
55 u32 lost;
56
57 u32 tick = inl(pmtmr_ioport);
58 u32 delta;
59
60 delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
61
62 last_pmtmr_tick = tick;
63 monotonic_base += delta * NSEC_PER_USEC;
64
65 delta += offset_delay;
66
67 lost = delta / (USEC_PER_SEC / HZ);
68 offset_delay = delta % (USEC_PER_SEC / HZ);
69
70 rdtscll(tsc);
71 vxtime.last_tsc = tsc - offset_delay * cpu_khz;
72
73 /* don't calculate delay for first run,
74 or if we've got less then a tick */
75 if (first_run || (lost < 1)) {
76 first_run = 0;
77 offset_delay = 0;
78 }
79
80 return lost - 1;
81}
82
83unsigned int do_gettimeoffset_pm(void)
84{
85 u32 now, offset, delta = 0;
86
87 offset = last_pmtmr_tick;
88 now = inl(pmtmr_ioport);
89 delta = (now - offset) & ACPI_PM_MASK;
90
91 return offset_delay + cyc2us(delta);
92}
93
94
95static int __init nopmtimer_setup(char *s)
96{
97 pmtmr_ioport = 0;
98 return 0;
99}
100
101__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index e26e86bb56fe..525f6a128a27 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -257,13 +257,13 @@ static int putreg(struct task_struct *child,
257 value &= 0xffff; 257 value &= 0xffff;
258 return 0; 258 return 0;
259 case offsetof(struct user_regs_struct,fs_base): 259 case offsetof(struct user_regs_struct,fs_base):
260 if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) 260 if (value >= TASK_SIZE)
261 return -EIO; 261 return -EIO;
262 child->thread.fs = value; 262 child->thread.fs = value;
263 return 0; 263 return 0;
264 case offsetof(struct user_regs_struct,gs_base): 264 case offsetof(struct user_regs_struct,gs_base):
265 if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) 265 if (value >= TASK_SIZE)
266 return -EIO; 266 return -EIO;
267 child->thread.gs = value; 267 child->thread.gs = value;
268 return 0; 268 return 0;
269 case offsetof(struct user_regs_struct, eflags): 269 case offsetof(struct user_regs_struct, eflags):
@@ -277,6 +277,11 @@ static int putreg(struct task_struct *child,
277 return -EIO; 277 return -EIO;
278 value &= 0xffff; 278 value &= 0xffff;
279 break; 279 break;
280 case offsetof(struct user_regs_struct, rip):
281 /* Check if the new RIP address is canonical */
282 if (value >= TASK_SIZE)
283 return -EIO;
284 break;
280 } 285 }
281 put_stack_long(child, regno - sizeof(struct pt_regs), value); 286 put_stack_long(child, regno - sizeof(struct pt_regs), value);
282 return 0; 287 return 0;
@@ -375,7 +380,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
375 break; 380 break;
376 381
377 switch (addr) { 382 switch (addr) {
378 case 0 ... sizeof(struct user_regs_struct): 383 case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
379 tmp = getreg(child, addr); 384 tmp = getreg(child, addr);
380 break; 385 break;
381 case offsetof(struct user, u_debugreg[0]): 386 case offsetof(struct user, u_debugreg[0]):
@@ -420,7 +425,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
420 break; 425 break;
421 426
422 switch (addr) { 427 switch (addr) {
423 case 0 ... sizeof(struct user_regs_struct): 428 case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
424 ret = putreg(child, addr, data); 429 ret = putreg(child, addr, data);
425 break; 430 break;
426 /* Disallows to set a breakpoint into the vsyscall */ 431 /* Disallows to set a breakpoint into the vsyscall */
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 2129cf9ba6b2..99f038ede23c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -719,7 +719,6 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
719 } 719 }
720} 720}
721 721
722#ifdef CONFIG_SMP
723/* 722/*
724 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 723 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
725 * Assumes number of cores is a power of two. 724 * Assumes number of cores is a power of two.
@@ -727,17 +726,26 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
727static void __init amd_detect_cmp(struct cpuinfo_x86 *c) 726static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
728{ 727{
729#ifdef CONFIG_SMP 728#ifdef CONFIG_SMP
730 int cpu = c->x86_apicid; 729 int cpu = smp_processor_id();
731 int node = 0; 730 int node = 0;
731 unsigned bits;
732 if (c->x86_num_cores == 1) 732 if (c->x86_num_cores == 1)
733 return; 733 return;
734 cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1); 734
735 bits = 0;
736 while ((1 << bits) < c->x86_num_cores)
737 bits++;
738
739 /* Low order bits define the core id (index of core in socket) */
740 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
741 /* Convert the APIC ID into the socket ID */
742 phys_proc_id[cpu] >>= bits;
735 743
736#ifdef CONFIG_NUMA 744#ifdef CONFIG_NUMA
737 /* When an ACPI SRAT table is available use the mappings from SRAT 745 /* When an ACPI SRAT table is available use the mappings from SRAT
738 instead. */ 746 instead. */
739 if (acpi_numa <= 0) { 747 if (acpi_numa <= 0) {
740 node = cpu_core_id[cpu]; 748 node = phys_proc_id[cpu];
741 if (!node_online(node)) 749 if (!node_online(node))
742 node = first_node(node_online_map); 750 node = first_node(node_online_map);
743 cpu_to_node[cpu] = node; 751 cpu_to_node[cpu] = node;
@@ -745,15 +753,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
745 node = cpu_to_node[cpu]; 753 node = cpu_to_node[cpu];
746 } 754 }
747#endif 755#endif
756
748 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 757 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
749 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 758 cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
750#endif 759#endif
751} 760}
752#else
753static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
754{
755}
756#endif
757 761
758static int __init init_amd(struct cpuinfo_x86 *c) 762static int __init init_amd(struct cpuinfo_x86 *c)
759{ 763{
@@ -925,7 +929,6 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
925 c->x86_clflush_size = 64; 929 c->x86_clflush_size = 64;
926 c->x86_cache_alignment = c->x86_clflush_size; 930 c->x86_cache_alignment = c->x86_clflush_size;
927 c->x86_num_cores = 1; 931 c->x86_num_cores = 1;
928 c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
929 c->extended_cpuid_level = 0; 932 c->extended_cpuid_level = 0;
930 memset(&c->x86_capability, 0, sizeof c->x86_capability); 933 memset(&c->x86_capability, 0, sizeof c->x86_capability);
931 934
@@ -954,11 +957,14 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
954 } 957 }
955 if (c->x86_capability[0] & (1<<19)) 958 if (c->x86_capability[0] & (1<<19))
956 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 959 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
957 c->x86_apicid = misc >> 24;
958 } else { 960 } else {
959 /* Have CPUID level 0 only - unheard of */ 961 /* Have CPUID level 0 only - unheard of */
960 c->x86 = 4; 962 c->x86 = 4;
961 } 963 }
964
965#ifdef CONFIG_SMP
966 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
967#endif
962} 968}
963 969
964/* 970/*
@@ -1088,7 +1094,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1088 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1094 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1089 1095
1090 /* Other (Linux-defined) */ 1096 /* Other (Linux-defined) */
1091 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", "k8c+", 1097 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1092 "constant_tsc", NULL, NULL, 1098 "constant_tsc", NULL, NULL,
1093 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1099 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1094 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1100 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index d439ced150c6..3fdcdba0fec5 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -452,7 +452,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
452 regs->rip -= 2; 452 regs->rip -= 2;
453 } 453 }
454 if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { 454 if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
455 regs->rax = __NR_restart_syscall; 455 regs->rax = test_thread_flag(TIF_IA32) ?
456 __NR_ia32_restart_syscall :
457 __NR_restart_syscall;
456 regs->rip -= 2; 458 regs->rip -= 2;
457 } 459 }
458 } 460 }
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 73f7e8b9543a..f1ec0f345941 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -56,6 +56,7 @@
56#include <asm/kdebug.h> 56#include <asm/kdebug.h>
57#include <asm/tlbflush.h> 57#include <asm/tlbflush.h>
58#include <asm/proto.h> 58#include <asm/proto.h>
59#include <asm/nmi.h>
59 60
60/* Change for real CPU hotplug. Note other files need to be fixed 61/* Change for real CPU hotplug. Note other files need to be fixed
61 first too. */ 62 first too. */
@@ -93,6 +94,7 @@ int smp_threads_ready;
93 94
94cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; 95cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
95cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 96cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
97EXPORT_SYMBOL(cpu_core_map);
96 98
97/* 99/*
98 * Trampoline 80x86 program as an array. 100 * Trampoline 80x86 program as an array.
@@ -125,96 +127,210 @@ static void __cpuinit smp_store_cpu_info(int id)
125 127
126 *c = boot_cpu_data; 128 *c = boot_cpu_data;
127 identify_cpu(c); 129 identify_cpu(c);
130 print_cpu_info(c);
128} 131}
129 132
130/* 133/*
131 * Synchronize TSCs of CPUs 134 * New Funky TSC sync algorithm borrowed from IA64.
135 * Main advantage is that it doesn't reset the TSCs fully and
136 * in general looks more robust and it works better than my earlier
137 * attempts. I believe it was written by David Mosberger. Some minor
138 * adjustments for x86-64 by me -AK
132 * 139 *
133 * This new algorithm is less accurate than the old "zero TSCs" 140 * Original comment reproduced below.
134 * one, but we cannot zero TSCs anymore in the new hotplug CPU 141 *
135 * model. 142 * Synchronize TSC of the current (slave) CPU with the TSC of the
143 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
144 * eliminate the possibility of unaccounted-for errors (such as
145 * getting a machine check in the middle of a calibration step). The
146 * basic idea is for the slave to ask the master what itc value it has
147 * and to read its own itc before and after the master responds. Each
148 * iteration gives us three timestamps:
149 *
150 * slave master
151 *
152 * t0 ---\
153 * ---\
154 * --->
155 * tm
156 * /---
157 * /---
158 * t1 <---
159 *
160 *
161 * The goal is to adjust the slave's TSC such that tm falls exactly
162 * half-way between t0 and t1. If we achieve this, the clocks are
163 * synchronized provided the interconnect between the slave and the
164 * master is symmetric. Even if the interconnect were asymmetric, we
165 * would still know that the synchronization error is smaller than the
166 * roundtrip latency (t0 - t1).
167 *
168 * When the interconnect is quiet and symmetric, this lets us
169 * synchronize the TSC to within one or two cycles. However, we can
170 * only *guarantee* that the synchronization is accurate to within a
171 * round-trip time, which is typically in the range of several hundred
172 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
173 * are usually almost perfectly synchronized, but we shouldn't assume
174 * that the accuracy is much better than half a micro second or so.
175 *
176 * [there are other errors like the latency of RDTSC and of the
177 * WRMSR. These can also account to hundreds of cycles. So it's
178 * probably worse. It claims 153 cycles error on a dual Opteron,
179 * but I suspect the numbers are actually somewhat worse -AK]
136 */ 180 */
137 181
138static atomic_t __cpuinitdata tsc_flag; 182#define MASTER 0
183#define SLAVE (SMP_CACHE_BYTES/8)
184
185/* Intentionally don't use cpu_relax() while TSC synchronization
186 because we don't want to go into funky power save modi or cause
187 hypervisors to schedule us away. Going to sleep would likely affect
188 latency and low latency is the primary objective here. -AK */
189#define no_cpu_relax() barrier()
190
139static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); 191static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
140static unsigned long long __cpuinitdata bp_tsc, ap_tsc; 192static volatile __cpuinitdata unsigned long go[SLAVE + 1];
193static int notscsync __cpuinitdata;
194
195#undef DEBUG_TSC_SYNC
141 196
142#define NR_LOOPS 5 197#define NUM_ROUNDS 64 /* magic value */
198#define NUM_ITERS 5 /* likewise */
143 199
144static void __cpuinit sync_tsc_bp_init(int init) 200/* Callback on boot CPU */
201static __cpuinit void sync_master(void *arg)
145{ 202{
146 if (init) 203 unsigned long flags, i;
147 _raw_spin_lock(&tsc_sync_lock); 204
148 else 205 if (smp_processor_id() != boot_cpu_id)
149 _raw_spin_unlock(&tsc_sync_lock); 206 return;
150 atomic_set(&tsc_flag, 0); 207
208 go[MASTER] = 0;
209
210 local_irq_save(flags);
211 {
212 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
213 while (!go[MASTER])
214 no_cpu_relax();
215 go[MASTER] = 0;
216 rdtscll(go[SLAVE]);
217 }
218 }
219 local_irq_restore(flags);
151} 220}
152 221
153/* 222/*
154 * Synchronize TSC on AP with BP. 223 * Return the number of cycles by which our tsc differs from the tsc
224 * on the master (time-keeper) CPU. A positive number indicates our
225 * tsc is ahead of the master, negative that it is behind.
155 */ 226 */
156static void __cpuinit __sync_tsc_ap(void) 227static inline long
228get_delta(long *rt, long *master)
157{ 229{
158 if (!cpu_has_tsc) 230 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
159 return; 231 unsigned long tcenter, t0, t1, tm;
160 Dprintk("AP %d syncing TSC\n", smp_processor_id()); 232 int i;
161 233
162 while (atomic_read(&tsc_flag) != 0) 234 for (i = 0; i < NUM_ITERS; ++i) {
163 cpu_relax(); 235 rdtscll(t0);
164 atomic_inc(&tsc_flag); 236 go[MASTER] = 1;
165 mb(); 237 while (!(tm = go[SLAVE]))
166 _raw_spin_lock(&tsc_sync_lock); 238 no_cpu_relax();
167 wrmsrl(MSR_IA32_TSC, bp_tsc); 239 go[SLAVE] = 0;
168 _raw_spin_unlock(&tsc_sync_lock); 240 rdtscll(t1);
169 rdtscll(ap_tsc); 241
170 mb(); 242 if (t1 - t0 < best_t1 - best_t0)
171 atomic_inc(&tsc_flag); 243 best_t0 = t0, best_t1 = t1, best_tm = tm;
172 mb(); 244 }
245
246 *rt = best_t1 - best_t0;
247 *master = best_tm - best_t0;
248
249 /* average best_t0 and best_t1 without overflow: */
250 tcenter = (best_t0/2 + best_t1/2);
251 if (best_t0 % 2 + best_t1 % 2 == 2)
252 ++tcenter;
253 return tcenter - best_tm;
173} 254}
174 255
175static void __cpuinit sync_tsc_ap(void) 256static __cpuinit void sync_tsc(void)
176{ 257{
177 int i; 258 int i, done = 0;
178 for (i = 0; i < NR_LOOPS; i++) 259 long delta, adj, adjust_latency = 0;
179 __sync_tsc_ap(); 260 unsigned long flags, rt, master_time_stamp, bound;
261#if DEBUG_TSC_SYNC
262 static struct syncdebug {
263 long rt; /* roundtrip time */
264 long master; /* master's timestamp */
265 long diff; /* difference between midpoint and master's timestamp */
266 long lat; /* estimate of tsc adjustment latency */
267 } t[NUM_ROUNDS] __cpuinitdata;
268#endif
269
270 go[MASTER] = 1;
271
272 smp_call_function(sync_master, NULL, 1, 0);
273
274 while (go[MASTER]) /* wait for master to be ready */
275 no_cpu_relax();
276
277 spin_lock_irqsave(&tsc_sync_lock, flags);
278 {
279 for (i = 0; i < NUM_ROUNDS; ++i) {
280 delta = get_delta(&rt, &master_time_stamp);
281 if (delta == 0) {
282 done = 1; /* let's lock on to this... */
283 bound = rt;
284 }
285
286 if (!done) {
287 unsigned long t;
288 if (i > 0) {
289 adjust_latency += -delta;
290 adj = -delta + adjust_latency/4;
291 } else
292 adj = -delta;
293
294 rdtscll(t);
295 wrmsrl(MSR_IA32_TSC, t + adj);
296 }
297#if DEBUG_TSC_SYNC
298 t[i].rt = rt;
299 t[i].master = master_time_stamp;
300 t[i].diff = delta;
301 t[i].lat = adjust_latency/4;
302#endif
303 }
304 }
305 spin_unlock_irqrestore(&tsc_sync_lock, flags);
306
307#if DEBUG_TSC_SYNC
308 for (i = 0; i < NUM_ROUNDS; ++i)
309 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
310 t[i].rt, t[i].master, t[i].diff, t[i].lat);
311#endif
312
313 printk(KERN_INFO
314 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
315 "maxerr %lu cycles)\n",
316 smp_processor_id(), boot_cpu_id, delta, rt);
180} 317}
181 318
182/* 319static void __cpuinit tsc_sync_wait(void)
183 * Synchronize TSC from BP to AP.
184 */
185static void __cpuinit __sync_tsc_bp(int cpu)
186{ 320{
187 if (!cpu_has_tsc) 321 if (notscsync || !cpu_has_tsc)
188 return; 322 return;
189 323 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
190 /* Wait for AP */ 324 boot_cpu_id);
191 while (atomic_read(&tsc_flag) == 0) 325 sync_tsc();
192 cpu_relax();
193 /* Save BPs TSC */
194 sync_core();
195 rdtscll(bp_tsc);
196 /* Don't do the sync core here to avoid too much latency. */
197 mb();
198 /* Start the AP */
199 _raw_spin_unlock(&tsc_sync_lock);
200 /* Wait for AP again */
201 while (atomic_read(&tsc_flag) < 2)
202 cpu_relax();
203 rdtscl(bp_tsc);
204 barrier();
205} 326}
206 327
207static void __cpuinit sync_tsc_bp(int cpu) 328static __init int notscsync_setup(char *s)
208{ 329{
209 int i; 330 notscsync = 1;
210 for (i = 0; i < NR_LOOPS - 1; i++) { 331 return 0;
211 __sync_tsc_bp(cpu);
212 sync_tsc_bp_init(1);
213 }
214 __sync_tsc_bp(cpu);
215 printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n",
216 cpu, ap_tsc - bp_tsc);
217} 332}
333__setup("notscsync", notscsync_setup);
218 334
219static atomic_t init_deasserted __cpuinitdata; 335static atomic_t init_deasserted __cpuinitdata;
220 336
@@ -315,11 +431,6 @@ void __cpuinit start_secondary(void)
315 cpu_init(); 431 cpu_init();
316 smp_callin(); 432 smp_callin();
317 433
318 /*
319 * Synchronize the TSC with the BP
320 */
321 sync_tsc_ap();
322
323 /* otherwise gcc will move up the smp_processor_id before the cpu_init */ 434 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
324 barrier(); 435 barrier();
325 436
@@ -334,7 +445,6 @@ void __cpuinit start_secondary(void)
334 enable_8259A_irq(0); 445 enable_8259A_irq(0);
335 } 446 }
336 447
337
338 enable_APIC_timer(); 448 enable_APIC_timer();
339 449
340 /* 450 /*
@@ -343,6 +453,11 @@ void __cpuinit start_secondary(void)
343 cpu_set(smp_processor_id(), cpu_online_map); 453 cpu_set(smp_processor_id(), cpu_online_map);
344 mb(); 454 mb();
345 455
456 /* Wait for TSC sync to not schedule things before.
457 We still process interrupts, which could see an inconsistent
458 time in that window unfortunately. */
459 tsc_sync_wait();
460
346 cpu_idle(); 461 cpu_idle();
347} 462}
348 463
@@ -531,7 +646,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
531 printk("failed fork for CPU %d\n", cpu); 646 printk("failed fork for CPU %d\n", cpu);
532 return PTR_ERR(idle); 647 return PTR_ERR(idle);
533 } 648 }
534 x86_cpu_to_apicid[cpu] = apicid;
535 649
536 cpu_pda[cpu].pcurrent = idle; 650 cpu_pda[cpu].pcurrent = idle;
537 651
@@ -600,8 +714,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
600 714
601 if (cpu_isset(cpu, cpu_callin_map)) { 715 if (cpu_isset(cpu, cpu_callin_map)) {
602 /* number CPUs logically, starting from 1 (BSP is 0) */ 716 /* number CPUs logically, starting from 1 (BSP is 0) */
603 Dprintk("OK.\n");
604 print_cpu_info(&cpu_data[cpu]);
605 Dprintk("CPU has booted.\n"); 717 Dprintk("CPU has booted.\n");
606 } else { 718 } else {
607 boot_error = 1; 719 boot_error = 1;
@@ -842,7 +954,6 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
842 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); 954 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
843 /* Or can we switch back to PIC here? */ 955 /* Or can we switch back to PIC here? */
844 } 956 }
845 x86_cpu_to_apicid[0] = boot_cpu_id;
846 957
847 /* 958 /*
848 * Now start the IO-APICs 959 * Now start the IO-APICs
@@ -889,18 +1000,14 @@ int __cpuinit __cpu_up(unsigned int cpu)
889 printk("__cpu_up: bad cpu %d\n", cpu); 1000 printk("__cpu_up: bad cpu %d\n", cpu);
890 return -EINVAL; 1001 return -EINVAL;
891 } 1002 }
892 sync_tsc_bp_init(1);
893 1003
894 /* Boot it! */ 1004 /* Boot it! */
895 err = do_boot_cpu(cpu, apicid); 1005 err = do_boot_cpu(cpu, apicid);
896 if (err < 0) { 1006 if (err < 0) {
897 sync_tsc_bp_init(0);
898 Dprintk("do_boot_cpu failed %d\n", err); 1007 Dprintk("do_boot_cpu failed %d\n", err);
899 return err; 1008 return err;
900 } 1009 }
901 1010
902 sync_tsc_bp(cpu);
903
904 /* Unleash the CPU! */ 1011 /* Unleash the CPU! */
905 Dprintk("waiting for cpu %d\n", cpu); 1012 Dprintk("waiting for cpu %d\n", cpu);
906 1013
@@ -923,4 +1030,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus)
923 1030
924 detect_siblings(); 1031 detect_siblings();
925 time_init_gtod(); 1032 time_init_gtod();
1033
1034 check_nmi_watchdog();
926} 1035}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 0e3b60e1ac5b..735b6767c8ed 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -26,6 +26,8 @@
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/bcd.h> 27#include <linux/bcd.h>
28#include <linux/kallsyms.h> 28#include <linux/kallsyms.h>
29#include <linux/acpi.h>
30#include <acpi/achware.h> /* for PM timer frequency */
29#include <asm/8253pit.h> 31#include <asm/8253pit.h>
30#include <asm/pgtable.h> 32#include <asm/pgtable.h>
31#include <asm/vsyscall.h> 33#include <asm/vsyscall.h>
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
396 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 398 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
397 399
398 vxtime.last = offset; 400 vxtime.last = offset;
401#ifdef CONFIG_X86_PM_TIMER
402 } else if (vxtime.mode == VXTIME_PMTMR) {
403 lost = pmtimer_mark_offset();
404#endif
399 } else { 405 } else {
400 offset = (((tsc - vxtime.last_tsc) * 406 offset = (((tsc - vxtime.last_tsc) *
401 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 407 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
@@ -898,6 +904,13 @@ void __init time_init(void)
898 hpet_period; 904 hpet_period;
899 cpu_khz = hpet_calibrate_tsc(); 905 cpu_khz = hpet_calibrate_tsc();
900 timename = "HPET"; 906 timename = "HPET";
907#ifdef CONFIG_X86_PM_TIMER
908 } else if (pmtmr_ioport) {
909 vxtime_hz = PM_TIMER_FREQUENCY;
910 timename = "PM";
911 pit_init();
912 cpu_khz = pit_calibrate_tsc();
913#endif
901 } else { 914 } else {
902 pit_init(); 915 pit_init();
903 cpu_khz = pit_calibrate_tsc(); 916 cpu_khz = pit_calibrate_tsc();
@@ -923,35 +936,50 @@ void __init time_init(void)
923} 936}
924 937
925/* 938/*
939 * Make an educated guess if the TSC is trustworthy and synchronized
940 * over all CPUs.
941 */
942static __init int unsynchronized_tsc(void)
943{
944#ifdef CONFIG_SMP
945 if (oem_force_hpet_timer())
946 return 1;
947 /* Intel systems are normally all synchronized. Exceptions
948 are handled in the OEM check above. */
949 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
950 return 0;
951 /* All in a single socket - should be synchronized */
952 if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
953 return 0;
954#endif
955 /* Assume multi socket systems are not synchronized */
956 return num_online_cpus() > 1;
957}
958
959/*
926 * Decide after all CPUs are booted what mode gettimeofday should use. 960 * Decide after all CPUs are booted what mode gettimeofday should use.
927 */ 961 */
928void __init time_init_gtod(void) 962void __init time_init_gtod(void)
929{ 963{
930 char *timetype; 964 char *timetype;
931 965
932 /* 966 if (unsynchronized_tsc())
933 * AMD systems with more than one CPU don't have fully synchronized
934 * TSCs. Always use HPET gettimeofday for these, although it is slower.
935 * Intel SMP systems usually have synchronized TSCs, so use always
936 * the TSC.
937 *
938 * Exceptions:
939 * IBM Summit2 checked by oem_force_hpet_timer().
940 * AMD dual core may also not need HPET. Check me.
941 *
942 * Can be turned off with "notsc".
943 */
944 if (num_online_cpus() > 1 &&
945 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
946 notsc = 1;
947 /* Some systems will want to disable TSC and use HPET. */
948 if (oem_force_hpet_timer())
949 notsc = 1; 967 notsc = 1;
950 if (vxtime.hpet_address && notsc) { 968 if (vxtime.hpet_address && notsc) {
951 timetype = "HPET"; 969 timetype = "HPET";
952 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; 970 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
953 vxtime.mode = VXTIME_HPET; 971 vxtime.mode = VXTIME_HPET;
954 do_gettimeoffset = do_gettimeoffset_hpet; 972 do_gettimeoffset = do_gettimeoffset_hpet;
973#ifdef CONFIG_X86_PM_TIMER
974 /* Using PM for gettimeofday is quite slow, but we have no other
975 choice because the TSC is too unreliable on some systems. */
976 } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
977 timetype = "PM";
978 do_gettimeoffset = do_gettimeoffset_pm;
979 vxtime.mode = VXTIME_PMTMR;
980 sysctl_vsyscall = 0;
981 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
982#endif
955 } else { 983 } else {
956 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; 984 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
957 vxtime.mode = VXTIME_TSC; 985 vxtime.mode = VXTIME_TSC;
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 65a37f52c56e..3dfec8fdabcd 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -332,10 +332,12 @@ void handle_BUG(struct pt_regs *regs)
332 printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line); 332 printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
333} 333}
334 334
335#ifdef CONFIG_BUG
335void out_of_line_bug(void) 336void out_of_line_bug(void)
336{ 337{
337 BUG(); 338 BUG();
338} 339}
340#endif
339 341
340static DEFINE_SPINLOCK(die_lock); 342static DEFINE_SPINLOCK(die_lock);
341static int die_owner = -1; 343static int die_owner = -1;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b4b8dc59663a..2e5734425949 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
65 usec = (__xtime.tv_nsec / 1000) + 65 usec = (__xtime.tv_nsec / 1000) +
66 (__jiffies - __wall_jiffies) * (1000000 / HZ); 66 (__jiffies - __wall_jiffies) * (1000000 / HZ);
67 67
68 if (__vxtime.mode == VXTIME_TSC) { 68 if (__vxtime.mode != VXTIME_HPET) {
69 sync_core(); 69 sync_core();
70 rdtscll(t); 70 rdtscll(t);
71 if (t < __vxtime.last_tsc) 71 if (t < __vxtime.last_tsc)
@@ -217,8 +217,9 @@ static int __init vsyscall_init(void)
217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
219 map_vsyscall(); 219 map_vsyscall();
220 sysctl_vsyscall = 1; 220#ifdef CONFIG_SYSCTL
221 register_sysctl_table(kernel_root_table2, 0); 221 register_sysctl_table(kernel_root_table2, 0);
222#endif
222 return 0; 223 return 0;
223} 224}
224 225
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index a43dedb58fa2..68ec03070e5a 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -193,8 +193,9 @@ EXPORT_SYMBOL(smp_num_siblings);
193extern void do_softirq_thunk(void); 193extern void do_softirq_thunk(void);
194EXPORT_SYMBOL(do_softirq_thunk); 194EXPORT_SYMBOL(do_softirq_thunk);
195 195
196void out_of_line_bug(void); 196#ifdef CONFIG_BUG
197EXPORT_SYMBOL(out_of_line_bug); 197EXPORT_SYMBOL(out_of_line_bug);
198#endif
198 199
199EXPORT_SYMBOL(init_level4_pgt); 200EXPORT_SYMBOL(init_level4_pgt);
200 201
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index e03309216764..5d6b2114cc9a 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -234,6 +234,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
234 234
235/* 235/*
236 * Handle a fault on the vmalloc or module mapping area 236 * Handle a fault on the vmalloc or module mapping area
237 *
238 * This assumes no large pages in there.
237 */ 239 */
238static int vmalloc_fault(unsigned long address) 240static int vmalloc_fault(unsigned long address)
239{ 241{
@@ -272,7 +274,10 @@ static int vmalloc_fault(unsigned long address)
272 if (!pte_present(*pte_ref)) 274 if (!pte_present(*pte_ref))
273 return -1; 275 return -1;
274 pte = pte_offset_kernel(pmd, address); 276 pte = pte_offset_kernel(pmd, address);
275 if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref)) 277 /* Don't use pte_page here, because the mappings can point
278 outside mem_map, and the NUMA hash lookup cannot handle
279 that. */
280 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
276 BUG(); 281 BUG();
277 __flush_tlb_all(); 282 __flush_tlb_all();
278 return 0; 283 return 0;
@@ -346,7 +351,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
346 * protection error (error_code & 1) == 0. 351 * protection error (error_code & 1) == 0.
347 */ 352 */
348 if (unlikely(address >= TASK_SIZE)) { 353 if (unlikely(address >= TASK_SIZE)) {
349 if (!(error_code & 5)) { 354 if (!(error_code & 5) &&
355 ((address >= VMALLOC_START && address < VMALLOC_END) ||
356 (address >= MODULES_VADDR && address < MODULES_END))) {
350 if (vmalloc_fault(address) < 0) 357 if (vmalloc_fault(address) < 0)
351 goto bad_area_nosemaphore; 358 goto bad_area_nosemaphore;
352 return; 359 return;
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 74ec8554b195..58aac23760ef 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -133,7 +133,7 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size,
133 unsigned long flags) 133 unsigned long flags)
134{ 134{
135 int err = 0; 135 int err = 0;
136 if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { 136 if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
137 unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 137 unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
138 unsigned long vaddr = (unsigned long) __va(phys_addr); 138 unsigned long vaddr = (unsigned long) __va(phys_addr);
139 139
@@ -214,7 +214,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
214 remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); 214 remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
215 return NULL; 215 return NULL;
216 } 216 }
217 if (ioremap_change_attr(phys_addr, size, flags) < 0) { 217 if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
218 area->flags &= 0xffffff; 218 area->flags &= 0xffffff;
219 vunmap(addr); 219 vunmap(addr);
220 return NULL; 220 return NULL;
@@ -251,7 +251,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
251 251
252void iounmap(volatile void __iomem *addr) 252void iounmap(volatile void __iomem *addr)
253{ 253{
254 struct vm_struct *p, **pprev; 254 struct vm_struct *p;
255 255
256 if (addr <= high_memory) 256 if (addr <= high_memory)
257 return; 257 return;
@@ -260,24 +260,11 @@ void iounmap(volatile void __iomem *addr)
260 return; 260 return;
261 261
262 write_lock(&vmlist_lock); 262 write_lock(&vmlist_lock);
263 for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev) 263 p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK));
264 if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr)) 264 if (!p)
265 break; 265 printk("iounmap: bad address %p\n", addr);
266 if (!p) { 266 else if (p->flags >> 20)
267 printk("__iounmap: bad address %p\n", addr); 267 ioremap_change_attr(p->phys_addr, p->size, 0);
268 goto out_unlock;
269 }
270 *pprev = p->next;
271 unmap_vm_area(p);
272 if ((p->flags >> 20) &&
273 p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
274 /* p->size includes the guard page, but cpa doesn't like that */
275 change_page_attr(virt_to_page(__va(p->phys_addr)),
276 p->size >> PAGE_SHIFT,
277 PAGE_KERNEL);
278 global_flush_tlb();
279 }
280out_unlock:
281 write_unlock(&vmlist_lock); 268 write_unlock(&vmlist_lock);
282 kfree(p); 269 kfree(p);
283} 270}