aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS5
-rw-r--r--Documentation/dontdiff4
-rw-r--r--Documentation/i386/boot.txt23
-rw-r--r--Documentation/kernel-parameters.txt21
-rw-r--r--Documentation/x86_64/boot-options.txt14
-rw-r--r--Documentation/x86_64/fake-numa-for-cpusets66
-rw-r--r--Documentation/x86_64/machinecheck7
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/boot/misc.c2
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm/boot/compressed/misc.c2
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm26/boot/compressed/misc.c2
-rw-r--r--arch/cris/arch-v32/vmlinux.lds.S1
-rw-r--r--arch/frv/kernel/vmlinux.lds.S1
-rw-r--r--arch/i386/Kconfig38
-rw-r--r--arch/i386/Kconfig.cpu35
-rw-r--r--arch/i386/Kconfig.debug10
-rw-r--r--arch/i386/Makefile2
-rw-r--r--arch/i386/Makefile.cpu9
-rw-r--r--arch/i386/boot/Makefile4
-rw-r--r--arch/i386/boot/compressed/misc.c2
-rw-r--r--arch/i386/boot/setup.S24
-rw-r--r--arch/i386/defconfig73
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c2
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c21
-rw-r--r--arch/i386/kernel/alternative.c102
-rw-r--r--arch/i386/kernel/apic.c22
-rw-r--r--arch/i386/kernel/apm.c10
-rw-r--r--arch/i386/kernel/asm-offsets.c18
-rw-r--r--arch/i386/kernel/cpu/Makefile4
-rw-r--r--arch/i386/kernel/cpu/amd.c15
-rw-r--r--arch/i386/kernel/cpu/bugs.c191
-rw-r--r--arch/i386/kernel/cpu/centaur.c10
-rw-r--r--arch/i386/kernel/cpu/common.c217
-rw-r--r--arch/i386/kernel/cpu/cyrix.c21
-rw-r--r--arch/i386/kernel/cpu/intel.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c13
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c3
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c16
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c101
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c11
-rw-r--r--arch/i386/kernel/cpu/nexgen.c10
-rw-r--r--arch/i386/kernel/cpu/perfctr-watchdog.c658
-rw-r--r--arch/i386/kernel/cpu/proc.c3
-rw-r--r--arch/i386/kernel/cpu/rise.c9
-rw-r--r--arch/i386/kernel/cpu/transmeta.c10
-rw-r--r--arch/i386/kernel/cpu/umc.c10
-rw-r--r--arch/i386/kernel/doublefault.c29
-rw-r--r--arch/i386/kernel/e820.c64
-rw-r--r--arch/i386/kernel/efi.c16
-rw-r--r--arch/i386/kernel/entry.S23
-rw-r--r--arch/i386/kernel/head.S118
-rw-r--r--arch/i386/kernel/i386_ksyms.c2
-rw-r--r--arch/i386/kernel/io_apic.c38
-rw-r--r--arch/i386/kernel/ioport.c3
-rw-r--r--arch/i386/kernel/irq.c3
-rw-r--r--arch/i386/kernel/mpparse.c2
-rw-r--r--arch/i386/kernel/nmi.c832
-rw-r--r--arch/i386/kernel/paravirt.c522
-rw-r--r--arch/i386/kernel/process.c37
-rw-r--r--arch/i386/kernel/quirks.c69
-rw-r--r--arch/i386/kernel/reboot.c55
-rw-r--r--arch/i386/kernel/reboot_fixups.c2
-rw-r--r--arch/i386/kernel/smp.c304
-rw-r--r--arch/i386/kernel/smpboot.c146
-rw-r--r--arch/i386/kernel/sysenter.c269
-rw-r--r--arch/i386/kernel/time.c2
-rw-r--r--arch/i386/kernel/trampoline.S12
-rw-r--r--arch/i386/kernel/traps.c32
-rw-r--r--arch/i386/kernel/tsc.c13
-rw-r--r--arch/i386/kernel/verify_cpu.S65
-rw-r--r--arch/i386/kernel/vmi.c131
-rw-r--r--arch/i386/kernel/vmiclock.c318
-rw-r--r--arch/i386/kernel/vmitime.c482
-rw-r--r--arch/i386/kernel/vmlinux.lds.S24
-rw-r--r--arch/i386/kernel/vsyscall.lds.S4
-rw-r--r--arch/i386/lib/bitops.c4
-rw-r--r--arch/i386/lib/checksum.S69
-rw-r--r--arch/i386/lib/getuser.S26
-rw-r--r--arch/i386/lib/putuser.S39
-rw-r--r--arch/i386/lib/usercopy.c7
-rw-r--r--arch/i386/mach-generic/bigsmp.c2
-rw-r--r--arch/i386/mach-generic/es7000.c41
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c16
-rw-r--r--arch/i386/mm/fault.c60
-rw-r--r--arch/i386/mm/highmem.c10
-rw-r--r--arch/i386/mm/init.c196
-rw-r--r--arch/i386/mm/pageattr.c6
-rw-r--r--arch/i386/mm/pgtable.c94
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/pci/init.c2
-rw-r--r--arch/i386/pci/mmconfig-shared.c25
-rw-r--r--arch/i386/power/cpu.c1
-rw-r--r--arch/i386/power/suspend.c14
-rw-r--r--arch/m32r/kernel/vmlinux.lds.S2
-rw-r--r--arch/mips/kernel/vmlinux.lds.S2
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/suspend.c24
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S6
-rw-r--r--arch/ppc/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/sh/kernel/vmlinux.lds.S2
-rw-r--r--arch/sh64/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc64/kernel/smp.c6
-rw-r--r--arch/um/defconfig1
-rw-r--r--arch/x86_64/Kconfig61
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile2
-rw-r--r--arch/x86_64/boot/compressed/Makefile12
-rw-r--r--arch/x86_64/boot/compressed/head.S339
-rw-r--r--arch/x86_64/boot/compressed/misc.c247
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.lds44
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.scr9
-rw-r--r--arch/x86_64/boot/setup.S85
-rw-r--r--arch/x86_64/boot/video.S2043
-rw-r--r--arch/x86_64/defconfig183
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c10
-rw-r--r--arch/x86_64/ia32/ia32entry.S4
-rw-r--r--arch/x86_64/ia32/syscall32.c1
-rw-r--r--arch/x86_64/kernel/Makefile7
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c24
-rw-r--r--arch/x86_64/kernel/acpi/wakeup.S286
-rw-r--r--arch/x86_64/kernel/aperture.c5
-rw-r--r--arch/x86_64/kernel/apic.c35
-rw-r--r--arch/x86_64/kernel/asm-offsets.c10
-rw-r--r--arch/x86_64/kernel/bugs.c21
-rw-r--r--arch/x86_64/kernel/e820.c5
-rw-r--r--arch/x86_64/kernel/early-quirks.c13
-rw-r--r--arch/x86_64/kernel/early_printk.c5
-rw-r--r--arch/x86_64/kernel/entry.S5
-rw-r--r--arch/x86_64/kernel/functionlist1284
-rw-r--r--arch/x86_64/kernel/genapic.c104
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c137
-rw-r--r--arch/x86_64/kernel/genapic_flat.c25
-rw-r--r--arch/x86_64/kernel/head.S340
-rw-r--r--arch/x86_64/kernel/head64.c41
-rw-r--r--arch/x86_64/kernel/io_apic.c31
-rw-r--r--arch/x86_64/kernel/ioport.c1
-rw-r--r--arch/x86_64/kernel/machine_kexec.c14
-rw-r--r--arch/x86_64/kernel/mce.c32
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c678
-rw-r--r--arch/x86_64/kernel/pci-calgary.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c2
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c12
-rw-r--r--arch/x86_64/kernel/setup.c35
-rw-r--r--arch/x86_64/kernel/setup64.c5
-rw-r--r--arch/x86_64/kernel/signal.c6
-rw-r--r--arch/x86_64/kernel/smp.c30
-rw-r--r--arch/x86_64/kernel/smpboot.c47
-rw-r--r--arch/x86_64/kernel/suspend.c19
-rw-r--r--arch/x86_64/kernel/suspend_asm.S7
-rw-r--r--arch/x86_64/kernel/syscall.c1
-rw-r--r--arch/x86_64/kernel/time.c71
-rw-r--r--arch/x86_64/kernel/trampoline.S123
-rw-r--r--arch/x86_64/kernel/traps.c34
-rw-r--r--arch/x86_64/kernel/tsc.c17
-rw-r--r--arch/x86_64/kernel/tsc_sync.c4
-rw-r--r--arch/x86_64/kernel/verify_cpu.S119
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S20
-rw-r--r--arch/x86_64/kernel/vsyscall.c68
-rw-r--r--arch/x86_64/mm/fault.c5
-rw-r--r--arch/x86_64/mm/init.c172
-rw-r--r--arch/x86_64/mm/k8topology.c9
-rw-r--r--arch/x86_64/mm/numa.c306
-rw-r--r--arch/x86_64/mm/pageattr.c32
-rw-r--r--arch/x86_64/mm/srat.c8
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S2
-rw-r--r--drivers/acpi/processor_idle.c4
-rw-r--r--drivers/char/agp/amd64-agp.c13
-rw-r--r--drivers/video/console/vgacon.c3
-rw-r--r--fs/compat.c5
-rw-r--r--fs/compat_ioctl.c19
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--include/asm-alpha/mmu_context.h1
-rw-r--r--include/asm-alpha/percpu.h14
-rw-r--r--include/asm-arm/mmu_context.h1
-rw-r--r--include/asm-arm26/mmu_context.h2
-rw-r--r--include/asm-avr32/mmu_context.h1
-rw-r--r--include/asm-cris/mmu_context.h2
-rw-r--r--include/asm-frv/mmu_context.h1
-rw-r--r--include/asm-generic/mm_hooks.h18
-rw-r--r--include/asm-generic/percpu.h1
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/asm-h8300/mmu_context.h1
-rw-r--r--include/asm-i386/Kbuild2
-rw-r--r--include/asm-i386/alternative.h34
-rw-r--r--include/asm-i386/apic.h9
-rw-r--r--include/asm-i386/bugs.h194
-rw-r--r--include/asm-i386/cpufeature.h13
-rw-r--r--include/asm-i386/current.h5
-rw-r--r--include/asm-i386/desc.h95
-rw-r--r--include/asm-i386/e820.h1
-rw-r--r--include/asm-i386/elf.h28
-rw-r--r--include/asm-i386/fixmap.h11
-rw-r--r--include/asm-i386/genapic.h6
-rw-r--r--include/asm-i386/highmem.h6
-rw-r--r--include/asm-i386/hpet.h2
-rw-r--r--include/asm-i386/i387.h17
-rw-r--r--include/asm-i386/io.h15
-rw-r--r--include/asm-i386/irq.h2
-rw-r--r--include/asm-i386/irq_regs.h12
-rw-r--r--include/asm-i386/irqflags.h64
-rw-r--r--include/asm-i386/kexec.h8
-rw-r--r--include/asm-i386/mach-bigsmp/mach_apic.h2
-rw-r--r--include/asm-i386/mach-default/mach_apic.h2
-rw-r--r--include/asm-i386/mach-es7000/mach_apic.h9
-rw-r--r--include/asm-i386/mach-es7000/mach_mpparse.h32
-rw-r--r--include/asm-i386/mach-generic/mach_apic.h2
-rw-r--r--include/asm-i386/mach-numaq/mach_apic.h2
-rw-r--r--include/asm-i386/mach-summit/mach_apic.h2
-rw-r--r--include/asm-i386/mach-summit/mach_mpparse.h4
-rw-r--r--include/asm-i386/mach-visws/mach_apic.h2
-rw-r--r--include/asm-i386/mmu_context.h17
-rw-r--r--include/asm-i386/module.h2
-rw-r--r--include/asm-i386/msr-index.h278
-rw-r--r--include/asm-i386/msr.h400
-rw-r--r--include/asm-i386/mtrr.h4
-rw-r--r--include/asm-i386/nmi.h8
-rw-r--r--include/asm-i386/page.h81
-rw-r--r--include/asm-i386/paravirt.h957
-rw-r--r--include/asm-i386/pda.h100
-rw-r--r--include/asm-i386/percpu.h136
-rw-r--r--include/asm-i386/pgalloc.h1
-rw-r--r--include/asm-i386/pgtable-2level-defs.h2
-rw-r--r--include/asm-i386/pgtable-2level.h37
-rw-r--r--include/asm-i386/pgtable-3level-defs.h6
-rw-r--r--include/asm-i386/pgtable-3level.h69
-rw-r--r--include/asm-i386/pgtable.h62
-rw-r--r--include/asm-i386/processor-flags.h91
-rw-r--r--include/asm-i386/processor.h187
-rw-r--r--include/asm-i386/reboot.h20
-rw-r--r--include/asm-i386/reboot_fixups.h (renamed from include/linux/reboot_fixups.h)4
-rw-r--r--include/asm-i386/required-features.h34
-rw-r--r--include/asm-i386/segment.h10
-rw-r--r--include/asm-i386/smp.h64
-rw-r--r--include/asm-i386/system.h139
-rw-r--r--include/asm-i386/timer.h2
-rw-r--r--include/asm-i386/tlbflush.h19
-rw-r--r--include/asm-i386/tsc.h15
-rw-r--r--include/asm-i386/uaccess.h14
-rw-r--r--include/asm-i386/vmi_time.h18
-rw-r--r--include/asm-ia64/mmu_context.h1
-rw-r--r--include/asm-m32r/mmu_context.h1
-rw-r--r--include/asm-m68k/mmu_context.h1
-rw-r--r--include/asm-m68knommu/mmu_context.h1
-rw-r--r--include/asm-mips/mmu_context.h1
-rw-r--r--include/asm-parisc/mmu_context.h1
-rw-r--r--include/asm-powerpc/mmu_context.h1
-rw-r--r--include/asm-ppc/mmu_context.h1
-rw-r--r--include/asm-s390/mmu_context.h2
-rw-r--r--include/asm-sh/mmu_context.h1
-rw-r--r--include/asm-sh64/mmu_context.h2
-rw-r--r--include/asm-sparc/mmu_context.h2
-rw-r--r--include/asm-sparc64/mmu_context.h1
-rw-r--r--include/asm-sparc64/percpu.h10
-rw-r--r--include/asm-um/mmu_context.h2
-rw-r--r--include/asm-v850/mmu_context.h2
-rw-r--r--include/asm-x86_64/Kbuild4
-rw-r--r--include/asm-x86_64/alternative.h5
-rw-r--r--include/asm-x86_64/apic.h10
-rw-r--r--include/asm-x86_64/bugs.h30
-rw-r--r--include/asm-x86_64/const.h20
-rw-r--r--include/asm-x86_64/desc.h21
-rw-r--r--include/asm-x86_64/dma-mapping.h2
-rw-r--r--include/asm-x86_64/fixmap.h1
-rw-r--r--include/asm-x86_64/genapic.h4
-rw-r--r--include/asm-x86_64/ipi.h61
-rw-r--r--include/asm-x86_64/irqflags.h9
-rw-r--r--include/asm-x86_64/mmu_context.h1
-rw-r--r--include/asm-x86_64/mmzone.h2
-rw-r--r--include/asm-x86_64/msr-index.h1
-rw-r--r--include/asm-x86_64/msr.h274
-rw-r--r--include/asm-x86_64/mtrr.h12
-rw-r--r--include/asm-x86_64/nmi.h9
-rw-r--r--include/asm-x86_64/page.h39
-rw-r--r--include/asm-x86_64/percpu.h10
-rw-r--r--include/asm-x86_64/pgalloc.h15
-rw-r--r--include/asm-x86_64/pgtable.h42
-rw-r--r--include/asm-x86_64/processor-flags.h1
-rw-r--r--include/asm-x86_64/processor.h55
-rw-r--r--include/asm-x86_64/proto.h15
-rw-r--r--include/asm-x86_64/segment.h2
-rw-r--r--include/asm-x86_64/smp.h4
-rw-r--r--include/asm-x86_64/suspend.h13
-rw-r--r--include/asm-x86_64/system.h7
-rw-r--r--include/asm-x86_64/timex.h2
-rw-r--r--include/asm-x86_64/tlbflush.h33
-rw-r--r--include/asm-x86_64/unistd.h3
-rw-r--r--include/asm-xtensa/mmu_context.h1
-rw-r--r--include/linux/bootmem.h4
-rw-r--r--include/linux/cpufreq.h19
-rw-r--r--include/linux/crash_dump.h8
-rw-r--r--include/linux/elf.h17
-rw-r--r--include/linux/elfnote.h4
-rw-r--r--include/linux/highmem.h5
-rw-r--r--include/linux/init.h7
-rw-r--r--include/linux/percpu.h9
-rw-r--r--include/linux/poison.h3
-rw-r--r--init/main.c8
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/power/power.h5
-rw-r--r--kernel/power/snapshot.c11
-rw-r--r--kernel/power/swap.c42
-rw-r--r--lib/inflate.c129
-rw-r--r--mm/highmem.c9
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/slab.c7
-rw-r--r--mm/vmalloc.c14
-rw-r--r--scripts/mod/modpost.c1
319 files changed, 7564 insertions, 10400 deletions
diff --git a/CREDITS b/CREDITS
index d7140309e06d..c5f819bacda3 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1745,8 +1745,9 @@ S: D-64295
1745S: Germany 1745S: Germany
1746 1746
1747N: Andi Kleen 1747N: Andi Kleen
1748E: ak@muc.de 1748E: andi@firstfloor.org
1749D: network hacker, syncookies 1749U: http://www.halobates.de
1750D: network, x86, NUMA, various hacks
1750S: Schwalbenstr. 96 1751S: Schwalbenstr. 96
1751S: 85551 Ottobrunn 1752S: 85551 Ottobrunn
1752S: Germany 1753S: Germany
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 63c2d0c55aa2..64e9f6c4826b 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -55,8 +55,8 @@ aic7*seq.h*
55aicasm 55aicasm
56aicdb.h* 56aicdb.h*
57asm 57asm
58asm-offsets.* 58asm-offsets.h
59asm_offsets.* 59asm_offsets.h
60autoconf.h* 60autoconf.h*
61bbootsect 61bbootsect
62bin2c 62bin2c
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index 38fe1f03fb14..6498666ea330 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -2,7 +2,7 @@
2 ---------------------------- 2 ----------------------------
3 3
4 H. Peter Anvin <hpa@zytor.com> 4 H. Peter Anvin <hpa@zytor.com>
5 Last update 2007-01-26 5 Last update 2007-03-06
6 6
7On the i386 platform, the Linux kernel uses a rather complicated boot 7On the i386 platform, the Linux kernel uses a rather complicated boot
8convention. This has evolved partially due to historical aspects, as 8convention. This has evolved partially due to historical aspects, as
@@ -35,9 +35,13 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible
35 initrd address available to the bootloader. 35 initrd address available to the bootloader.
36 36
37Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. 37Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes.
38
38Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. 39Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
39 Introduce relocatable_kernel and kernel_alignment fields. 40 Introduce relocatable_kernel and kernel_alignment fields.
40 41
42Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
43 the boot command line
44
41 45
42**** MEMORY LAYOUT 46**** MEMORY LAYOUT
43 47
@@ -133,6 +137,8 @@ Offset Proto Name Meaning
133022C/4 2.03+ initrd_addr_max Highest legal initrd address 137022C/4 2.03+ initrd_addr_max Highest legal initrd address
1340230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 1380230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
1350234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 1390234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
1400235/3 N/A pad2 Unused
1410238/4 2.06+ cmdline_size Maximum size of the kernel command line
136 142
137(1) For backwards compatibility, if the setup_sects field contains 0, the 143(1) For backwards compatibility, if the setup_sects field contains 0, the
138 real value is 4. 144 real value is 4.
@@ -233,6 +239,12 @@ filled out, however:
233 if your ramdisk is exactly 131072 bytes long and this field is 239 if your ramdisk is exactly 131072 bytes long and this field is
234 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) 240 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
235 241
242 cmdline_size:
243 The maximum size of the command line without the terminating
244 zero. This means that the command line can contain at most
245 cmdline_size characters. With protocol version 2.05 and
246 earlier, the maximum size was 255.
247
236 248
237**** THE KERNEL COMMAND LINE 249**** THE KERNEL COMMAND LINE
238 250
@@ -241,11 +253,10 @@ loader to communicate with the kernel. Some of its options are also
241relevant to the boot loader itself, see "special command line options" 253relevant to the boot loader itself, see "special command line options"
242below. 254below.
243 255
244The kernel command line is a null-terminated string currently up to 256The kernel command line is a null-terminated string. The maximum
245255 characters long, plus the final null. A string that is too long 257length can be retrieved from the field cmdline_size. Before protocol
246will be automatically truncated by the kernel, a boot loader may allow 258version 2.06, the maximum was 255 characters. A string that is too
247a longer command line to be passed to permit future kernels to extend 259long will be automatically truncated by the kernel.
248this limit.
249 260
250If the boot protocol version is 2.02 or later, the address of the 261If the boot protocol version is 2.02 or later, the address of the
251kernel command line is given by the header field cmd_line_ptr (see 262kernel command line is given by the header field cmd_line_ptr (see
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 84c3bd05c639..38d7db3262c7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -64,6 +64,7 @@ parameter is applicable:
64 GENERIC_TIME The generic timeofday code is enabled. 64 GENERIC_TIME The generic timeofday code is enabled.
65 NFS Appropriate NFS support is enabled. 65 NFS Appropriate NFS support is enabled.
66 OSS OSS sound support is enabled. 66 OSS OSS sound support is enabled.
67 PV_OPS A paravirtualized kernel
67 PARIDE The ParIDE subsystem is enabled. 68 PARIDE The ParIDE subsystem is enabled.
68 PARISC The PA-RISC architecture is enabled. 69 PARISC The PA-RISC architecture is enabled.
69 PCI PCI bus support is enabled. 70 PCI PCI bus support is enabled.
@@ -695,8 +696,15 @@ and is between 256 and 4096 characters. It is defined in the file
695 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed 696 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
696 See Documentation/ide.txt. 697 See Documentation/ide.txt.
697 698
698 idle= [HW] 699 idle= [X86]
699 Format: idle=poll or idle=halt 700 Format: idle=poll or idle=mwait
701 Poll forces a polling idle loop that can slightly improves the performance
702 of waking up a idle CPU, but will use a lot of power and make the system
703 run hot. Not recommended.
704 idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
705 to not use it because it doesn't save as much power as a normal idle
706 loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
707 as idle=poll.
700 708
701 ignore_loglevel [KNL] 709 ignore_loglevel [KNL]
702 Ignore loglevel setting - this will print /all/ 710 Ignore loglevel setting - this will print /all/
@@ -1157,6 +1165,11 @@ and is between 256 and 4096 characters. It is defined in the file
1157 1165
1158 nomce [IA-32] Machine Check Exception 1166 nomce [IA-32] Machine Check Exception
1159 1167
1168 noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops
1169
1170 noreplace-smp [IA-32,SMP] Don't replace SMP instructions
1171 with UP alternatives
1172
1160 noresidual [PPC] Don't use residual data on PReP machines. 1173 noresidual [PPC] Don't use residual data on PReP machines.
1161 1174
1162 noresume [SWSUSP] Disables resume and restores original swap 1175 noresume [SWSUSP] Disables resume and restores original swap
@@ -1562,6 +1575,9 @@ and is between 256 and 4096 characters. It is defined in the file
1562 smart2= [HW] 1575 smart2= [HW]
1563 Format: <io1>[,<io2>[,...,<io8>]] 1576 Format: <io1>[,<io2>[,...,<io8>]]
1564 1577
1578 smp-alt-once [IA-32,SMP] On a hotplug CPU system, only
1579 attempt to substitute SMP alternatives once at boot.
1580
1565 snd-ad1816a= [HW,ALSA] 1581 snd-ad1816a= [HW,ALSA]
1566 1582
1567 snd-ad1848= [HW,ALSA] 1583 snd-ad1848= [HW,ALSA]
@@ -1820,6 +1836,7 @@ and is between 256 and 4096 characters. It is defined in the file
1820 [USBHID] The interval which mice are to be polled at. 1836 [USBHID] The interval which mice are to be polled at.
1821 1837
1822 vdso= [IA-32,SH] 1838 vdso= [IA-32,SH]
1839 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
1823 vdso=1: enable VDSO (default) 1840 vdso=1: enable VDSO (default)
1824 vdso=0: disable VDSO mapping 1841 vdso=0: disable VDSO mapping
1825 1842
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 85f51e5a749f..6177d881983f 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -149,7 +149,19 @@ NUMA
149 149
150 numa=noacpi Don't parse the SRAT table for NUMA setup 150 numa=noacpi Don't parse the SRAT table for NUMA setup
151 151
152 numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine. 152 numa=fake=CMDLINE
153 If a number, fakes CMDLINE nodes and ignores NUMA setup of the
154 actual machine. Otherwise, system memory is configured
155 depending on the sizes and coefficients listed. For example:
156 numa=fake=2*512,1024,4*256,*128
157 gives two 512M nodes, a 1024M node, four 256M nodes, and the
158 rest split into 128M chunks. If the last character of CMDLINE
159 is a *, the remaining memory is divided up equally among its
160 coefficient:
161 numa=fake=2*512,2*
162 gives two 512M nodes and the rest split into two nodes.
163 Otherwise, the remaining system RAM is allocated to an
164 additional node.
153 165
154 numa=hotadd=percent 166 numa=hotadd=percent
155 Only allow hotadd memory to preallocate page structures upto 167 Only allow hotadd memory to preallocate page structures upto
diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets
new file mode 100644
index 000000000000..d1a985c5b00a
--- /dev/null
+++ b/Documentation/x86_64/fake-numa-for-cpusets
@@ -0,0 +1,66 @@
1Using numa=fake and CPUSets for Resource Management
2Written by David Rientjes <rientjes@cs.washington.edu>
3
4This document describes how the numa=fake x86_64 command-line option can be used
5in conjunction with cpusets for coarse memory management. Using this feature,
6you can create fake NUMA nodes that represent contiguous chunks of memory and
7assign them to cpusets and their attached tasks. This is a way of limiting the
8amount of system memory that are available to a certain class of tasks.
9
10For more information on the features of cpusets, see Documentation/cpusets.txt.
11There are a number of different configurations you can use for your needs. For
12more information on the numa=fake command line option and its various ways of
13configuring fake nodes, see Documentation/x86_64/boot-options.txt.
14
15For the purposes of this introduction, we'll assume a very primitive NUMA
16emulation setup of "numa=fake=4*512,". This will split our system memory into
17four equal chunks of 512M each that we can now use to assign to cpusets. As
18you become more familiar with using this combination for resource control,
19you'll determine a better setup to minimize the number of nodes you have to deal
20with.
21
22A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
23
24 Faking node 0 at 0000000000000000-0000000020000000 (512MB)
25 Faking node 1 at 0000000020000000-0000000040000000 (512MB)
26 Faking node 2 at 0000000040000000-0000000060000000 (512MB)
27 Faking node 3 at 0000000060000000-0000000080000000 (512MB)
28 ...
29 On node 0 totalpages: 130975
30 On node 1 totalpages: 131072
31 On node 2 totalpages: 131072
32 On node 3 totalpages: 131072
33
34Now following the instructions for mounting the cpusets filesystem from
35Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
36address spaces) to individual cpusets:
37
38 [root@xroads /]# mkdir exampleset
39 [root@xroads /]# mount -t cpuset none exampleset
40 [root@xroads /]# mkdir exampleset/ddset
41 [root@xroads /]# cd exampleset/ddset
42 [root@xroads /exampleset/ddset]# echo 0-1 > cpus
43 [root@xroads /exampleset/ddset]# echo 0-1 > mems
44
45Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
46memory allocations (1G).
47
48You can now assign tasks to these cpusets to limit the memory resources
49available to them according to the fake nodes assigned as mems:
50
51 [root@xroads /exampleset/ddset]# echo $$ > tasks
52 [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
53 [1] 13425
54
55Notice the difference between the system memory usage as reported by
56/proc/meminfo between the restricted cpuset case above and the unrestricted
57case (i.e. running the same 'dd' command without assigning it to a fake NUMA
58cpuset):
59 Unrestricted Restricted
60 MemTotal: 3091900 kB 3091900 kB
61 MemFree: 42113 kB 1513236 kB
62
63This allows for coarse memory management for the tasks you assign to particular
64cpusets. Since cpusets can form a hierarchy, you can create some pretty
65interesting combinations of use-cases for various classes of tasks for your
66memory management needs.
diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck
index 068a6d9904b9..feaeaf6f6e4d 100644
--- a/Documentation/x86_64/machinecheck
+++ b/Documentation/x86_64/machinecheck
@@ -36,7 +36,12 @@ between all CPUs.
36 36
37check_interval 37check_interval
38 How often to poll for corrected machine check errors, in seconds 38 How often to poll for corrected machine check errors, in seconds
39 (Note output is hexademical). Default 5 minutes. 39 (Note output is hexademical). Default 5 minutes. When the poller
40 finds MCEs it triggers an exponential speedup (poll more often) on
41 the polling interval. When the poller stops finding MCEs, it
42 triggers an exponential backoff (poll less often) on the polling
43 interval. The check_interval variable is both the initial and
44 maximum polling interval.
40 45
41tolerant 46tolerant
42 Tolerance level. When a machine check exception occurs for a non 47 Tolerance level. When a machine check exception occurs for a non
diff --git a/MAINTAINERS b/MAINTAINERS
index b36923e72ce8..0492dd88e12a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1617,7 +1617,7 @@ S: Maintained
1617 1617
1618HPET: x86_64 1618HPET: x86_64
1619P: Andi Kleen and Vojtech Pavlik 1619P: Andi Kleen and Vojtech Pavlik
1620M: ak@muc.de and vojtech@suse.cz 1620M: andi@firstfloor.org and vojtech@suse.cz
1621S: Maintained 1621S: Maintained
1622 1622
1623HPET: ACPI hpet.c 1623HPET: ACPI hpet.c
@@ -2652,6 +2652,19 @@ T: git kernel.org:/pub/scm/linux/kernel/git/kyle/parisc-2.6.git
2652T: cvs cvs.parisc-linux.org:/var/cvs/linux-2.6 2652T: cvs cvs.parisc-linux.org:/var/cvs/linux-2.6
2653S: Maintained 2653S: Maintained
2654 2654
2655PARAVIRT_OPS INTERFACE
2656P: Jeremy Fitzhardinge
2657M: jeremy@xensource.com
2658P: Chris Wright
2659M: chrisw@sous-sol.org
2660P: Zachary Amsden
2661M: zach@vmware.com
2662P: Rusty Russell
2663M: rusty@rustcorp.com.au
2664L: virtualization@lists.osdl.org
2665L: linux-kernel@vger.kernel.org
2666S: Supported
2667
2655PC87360 HARDWARE MONITORING DRIVER 2668PC87360 HARDWARE MONITORING DRIVER
2656P: Jim Cromie 2669P: Jim Cromie
2657M: jim.cromie@gmail.com 2670M: jim.cromie@gmail.com
@@ -3876,6 +3889,15 @@ M: eis@baty.hanse.de
3876L: linux-x25@vger.kernel.org 3889L: linux-x25@vger.kernel.org
3877S: Maintained 3890S: Maintained
3878 3891
3892XEN HYPERVISOR INTERFACE
3893P: Jeremy Fitzhardinge
3894M: jeremy@xensource.com
3895P: Chris Wright
3896M: chrisw@sous-sol.org
3897L: virtualization@lists.osdl.org
3898L: xen-devel@lists.xensource.com
3899S: Supported
3900
3879XFS FILESYSTEM 3901XFS FILESYSTEM
3880P: Silicon Graphics Inc 3902P: Silicon Graphics Inc
3881P: Tim Shimmin, David Chatterton 3903P: Tim Shimmin, David Chatterton
diff --git a/Makefile b/Makefile
index d970cb16545a..387526b69d4f 100644
--- a/Makefile
+++ b/Makefile
@@ -491,7 +491,7 @@ endif
491include $(srctree)/arch/$(ARCH)/Makefile 491include $(srctree)/arch/$(ARCH)/Makefile
492 492
493ifdef CONFIG_FRAME_POINTER 493ifdef CONFIG_FRAME_POINTER
494CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) 494CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
495else 495else
496CFLAGS += -fomit-frame-pointer 496CFLAGS += -fomit-frame-pointer
497endif 497endif
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index 1d65adf5691e..c00646b25f6e 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -98,7 +98,7 @@ extern int end;
98static ulg free_mem_ptr; 98static ulg free_mem_ptr;
99static ulg free_mem_ptr_end; 99static ulg free_mem_ptr_end;
100 100
101#define HEAP_SIZE 0x2000 101#define HEAP_SIZE 0x3000
102 102
103#include "../../../lib/inflate.c" 103#include "../../../lib/inflate.c"
104 104
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 4cc44bd33d33..cf1e6fc6c686 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ SECTIONS
69 . = ALIGN(8); 69 . = ALIGN(8);
70 SECURITY_INIT 70 SECURITY_INIT
71 71
72 . = ALIGN(64); 72 . = ALIGN(8192);
73 __per_cpu_start = .; 73 __per_cpu_start = .;
74 .data.percpu : { *(.data.percpu) } 74 .data.percpu : { *(.data.percpu) }
75 __per_cpu_end = .; 75 __per_cpu_end = .;
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 283891c736c4..9b444022cb9b 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -239,7 +239,7 @@ extern int end;
239static ulg free_mem_ptr; 239static ulg free_mem_ptr;
240static ulg free_mem_ptr_end; 240static ulg free_mem_ptr_end;
241 241
242#define HEAP_SIZE 0x2000 242#define HEAP_SIZE 0x3000
243 243
244#include "../../../../lib/inflate.c" 244#include "../../../../lib/inflate.c"
245 245
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index ddbdad48f5b2..d1a6a597ed9a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -59,7 +59,7 @@ SECTIONS
59 usr/built-in.o(.init.ramfs) 59 usr/built-in.o(.init.ramfs)
60 __initramfs_end = .; 60 __initramfs_end = .;
61#endif 61#endif
62 . = ALIGN(64); 62 . = ALIGN(4096);
63 __per_cpu_start = .; 63 __per_cpu_start = .;
64 *(.data.percpu) 64 *(.data.percpu)
65 __per_cpu_end = .; 65 __per_cpu_end = .;
diff --git a/arch/arm26/boot/compressed/misc.c b/arch/arm26/boot/compressed/misc.c
index f17f50e5516f..0714d19c5776 100644
--- a/arch/arm26/boot/compressed/misc.c
+++ b/arch/arm26/boot/compressed/misc.c
@@ -182,7 +182,7 @@ extern int end;
182static ulg free_mem_ptr; 182static ulg free_mem_ptr;
183static ulg free_mem_ptr_end; 183static ulg free_mem_ptr_end;
184 184
185#define HEAP_SIZE 0x2000 185#define HEAP_SIZE 0x3000
186 186
187#include "../../../../lib/inflate.c" 187#include "../../../../lib/inflate.c"
188 188
diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S
index e124fcd766d5..dfa25e1542b9 100644
--- a/arch/cris/arch-v32/vmlinux.lds.S
+++ b/arch/cris/arch-v32/vmlinux.lds.S
@@ -91,6 +91,7 @@ SECTIONS
91 } 91 }
92 SECURITY_INIT 92 SECURITY_INIT
93 93
94 . = ALIGN (8192);
94 __per_cpu_start = .; 95 __per_cpu_start = .;
95 .data.percpu : { *(.data.percpu) } 96 .data.percpu : { *(.data.percpu) }
96 __per_cpu_end = .; 97 __per_cpu_end = .;
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 97910e016825..28eae9735ad6 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -57,6 +57,7 @@ SECTIONS
57 __alt_instructions_end = .; 57 __alt_instructions_end = .;
58 .altinstr_replacement : { *(.altinstr_replacement) } 58 .altinstr_replacement : { *(.altinstr_replacement) }
59 59
60 . = ALIGN(4096);
60 __per_cpu_start = .; 61 __per_cpu_start = .;
61 .data.percpu : { *(.data.percpu) } 62 .data.percpu : { *(.data.percpu) }
62 __per_cpu_end = .; 63 __per_cpu_end = .;
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index bcf2fc408a1a..a9af760c7e5f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -220,7 +220,7 @@ config PARAVIRT
220 220
221config VMI 221config VMI
222 bool "VMI Paravirt-ops support" 222 bool "VMI Paravirt-ops support"
223 depends on PARAVIRT && !COMPAT_VDSO 223 depends on PARAVIRT
224 help 224 help
225 VMI provides a paravirtualized interface to the VMware ESX server 225 VMI provides a paravirtualized interface to the VMware ESX server
226 (it could be used by other hypervisors in theory too, but is not 226 (it could be used by other hypervisors in theory too, but is not
@@ -571,6 +571,9 @@ choice
571 bool "3G/1G user/kernel split (for full 1G low memory)" 571 bool "3G/1G user/kernel split (for full 1G low memory)"
572 config VMSPLIT_2G 572 config VMSPLIT_2G
573 bool "2G/2G user/kernel split" 573 bool "2G/2G user/kernel split"
574 config VMSPLIT_2G_OPT
575 depends on !HIGHMEM
576 bool "2G/2G user/kernel split (for full 2G low memory)"
574 config VMSPLIT_1G 577 config VMSPLIT_1G
575 bool "1G/3G user/kernel split" 578 bool "1G/3G user/kernel split"
576endchoice 579endchoice
@@ -578,7 +581,8 @@ endchoice
578config PAGE_OFFSET 581config PAGE_OFFSET
579 hex 582 hex
580 default 0xB0000000 if VMSPLIT_3G_OPT 583 default 0xB0000000 if VMSPLIT_3G_OPT
581 default 0x78000000 if VMSPLIT_2G 584 default 0x80000000 if VMSPLIT_2G
585 default 0x78000000 if VMSPLIT_2G_OPT
582 default 0x40000000 if VMSPLIT_1G 586 default 0x40000000 if VMSPLIT_1G
583 default 0xC0000000 587 default 0xC0000000
584 588
@@ -915,12 +919,9 @@ source kernel/power/Kconfig
915 919
916source "drivers/acpi/Kconfig" 920source "drivers/acpi/Kconfig"
917 921
918menu "APM (Advanced Power Management) BIOS Support" 922menuconfig APM
919depends on PM && !X86_VISWS
920
921config APM
922 tristate "APM (Advanced Power Management) BIOS support" 923 tristate "APM (Advanced Power Management) BIOS support"
923 depends on PM 924 depends on PM && !X86_VISWS
924 ---help--- 925 ---help---
925 APM is a BIOS specification for saving power using several different 926 APM is a BIOS specification for saving power using several different
926 techniques. This is mostly useful for battery powered laptops with 927 techniques. This is mostly useful for battery powered laptops with
@@ -977,9 +978,10 @@ config APM
977 To compile this driver as a module, choose M here: the 978 To compile this driver as a module, choose M here: the
978 module will be called apm. 979 module will be called apm.
979 980
981if APM
982
980config APM_IGNORE_USER_SUSPEND 983config APM_IGNORE_USER_SUSPEND
981 bool "Ignore USER SUSPEND" 984 bool "Ignore USER SUSPEND"
982 depends on APM
983 help 985 help
984 This option will ignore USER SUSPEND requests. On machines with a 986 This option will ignore USER SUSPEND requests. On machines with a
985 compliant APM BIOS, you want to say N. However, on the NEC Versa M 987 compliant APM BIOS, you want to say N. However, on the NEC Versa M
@@ -987,7 +989,6 @@ config APM_IGNORE_USER_SUSPEND
987 989
988config APM_DO_ENABLE 990config APM_DO_ENABLE
989 bool "Enable PM at boot time" 991 bool "Enable PM at boot time"
990 depends on APM
991 ---help--- 992 ---help---
992 Enable APM features at boot time. From page 36 of the APM BIOS 993 Enable APM features at boot time. From page 36 of the APM BIOS
993 specification: "When disabled, the APM BIOS does not automatically 994 specification: "When disabled, the APM BIOS does not automatically
@@ -1005,7 +1006,6 @@ config APM_DO_ENABLE
1005 1006
1006config APM_CPU_IDLE 1007config APM_CPU_IDLE
1007 bool "Make CPU Idle calls when idle" 1008 bool "Make CPU Idle calls when idle"
1008 depends on APM
1009 help 1009 help
1010 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. 1010 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
1011 On some machines, this can activate improved power savings, such as 1011 On some machines, this can activate improved power savings, such as
@@ -1017,7 +1017,6 @@ config APM_CPU_IDLE
1017 1017
1018config APM_DISPLAY_BLANK 1018config APM_DISPLAY_BLANK
1019 bool "Enable console blanking using APM" 1019 bool "Enable console blanking using APM"
1020 depends on APM
1021 help 1020 help
1022 Enable console blanking using the APM. Some laptops can use this to 1021 Enable console blanking using the APM. Some laptops can use this to
1023 turn off the LCD backlight when the screen blanker of the Linux 1022 turn off the LCD backlight when the screen blanker of the Linux
@@ -1029,22 +1028,8 @@ config APM_DISPLAY_BLANK
1029 backlight at all, or it might print a lot of errors to the console, 1028 backlight at all, or it might print a lot of errors to the console,
1030 especially if you are using gpm. 1029 especially if you are using gpm.
1031 1030
1032config APM_RTC_IS_GMT
1033 bool "RTC stores time in GMT"
1034 depends on APM
1035 help
1036 Say Y here if your RTC (Real Time Clock a.k.a. hardware clock)
1037 stores the time in GMT (Greenwich Mean Time). Say N if your RTC
1038 stores localtime.
1039
1040 It is in fact recommended to store GMT in your RTC, because then you
1041 don't have to worry about daylight savings time changes. The only
1042 reason not to use GMT in your RTC is if you also run a broken OS
1043 that doesn't understand GMT.
1044
1045config APM_ALLOW_INTS 1031config APM_ALLOW_INTS
1046 bool "Allow interrupts during APM BIOS calls" 1032 bool "Allow interrupts during APM BIOS calls"
1047 depends on APM
1048 help 1033 help
1049 Normally we disable external interrupts while we are making calls to 1034 Normally we disable external interrupts while we are making calls to
1050 the APM BIOS as a measure to lessen the effects of a badly behaving 1035 the APM BIOS as a measure to lessen the effects of a badly behaving
@@ -1055,13 +1040,12 @@ config APM_ALLOW_INTS
1055 1040
1056config APM_REAL_MODE_POWER_OFF 1041config APM_REAL_MODE_POWER_OFF
1057 bool "Use real mode APM BIOS call to power off" 1042 bool "Use real mode APM BIOS call to power off"
1058 depends on APM
1059 help 1043 help
1060 Use real mode APM BIOS calls to switch off the computer. This is 1044 Use real mode APM BIOS calls to switch off the computer. This is
1061 a work-around for a number of buggy BIOSes. Switch this option on if 1045 a work-around for a number of buggy BIOSes. Switch this option on if
1062 your computer crashes instead of powering off properly. 1046 your computer crashes instead of powering off properly.
1063 1047
1064endmenu 1048endif # APM
1065 1049
1066source "arch/i386/kernel/cpu/cpufreq/Kconfig" 1050source "arch/i386/kernel/cpu/cpufreq/Kconfig"
1067 1051
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index b99c0e2a4e63..dce6124cb842 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -43,6 +43,7 @@ config M386
43 - "Geode GX/LX" For AMD Geode GX and LX processors. 43 - "Geode GX/LX" For AMD Geode GX and LX processors.
44 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. 44 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
45 - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). 45 - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
46 - "VIA C7" for VIA C7.
46 47
47 If you don't know what to do, choose "386". 48 If you don't know what to do, choose "386".
48 49
@@ -203,6 +204,12 @@ config MVIAC3_2
203 of SSE and tells gcc to treat the CPU as a 686. 204 of SSE and tells gcc to treat the CPU as a 686.
204 Note, this kernel will not boot on older (pre model 9) C3s. 205 Note, this kernel will not boot on older (pre model 9) C3s.
205 206
207config MVIAC7
208 bool "VIA C7"
209 help
210 Select this for a VIA C7. Selecting this uses the correct cache
211 shift and tells gcc to treat the CPU as a 686.
212
206endchoice 213endchoice
207 214
208config X86_GENERIC 215config X86_GENERIC
@@ -231,16 +238,21 @@ config X86_L1_CACHE_SHIFT
231 default "7" if MPENTIUM4 || X86_GENERIC 238 default "7" if MPENTIUM4 || X86_GENERIC
232 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 239 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
233 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 240 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
234 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 241 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
242
243config X86_XADD
244 bool
245 depends on !M386
246 default y
235 247
236config RWSEM_GENERIC_SPINLOCK 248config RWSEM_GENERIC_SPINLOCK
237 bool 249 bool
238 depends on M386 250 depends on !X86_XADD
239 default y 251 default y
240 252
241config RWSEM_XCHGADD_ALGORITHM 253config RWSEM_XCHGADD_ALGORITHM
242 bool 254 bool
243 depends on !M386 255 depends on X86_XADD
244 default y 256 default y
245 257
246config ARCH_HAS_ILOG2_U32 258config ARCH_HAS_ILOG2_U32
@@ -297,7 +309,7 @@ config X86_ALIGNMENT_16
297 309
298config X86_GOOD_APIC 310config X86_GOOD_APIC
299 bool 311 bool
300 depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 312 depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7
301 default y 313 default y
302 314
303config X86_INTEL_USERCOPY 315config X86_INTEL_USERCOPY
@@ -322,5 +334,18 @@ config X86_OOSTORE
322 334
323config X86_TSC 335config X86_TSC
324 bool 336 bool
325 depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ 337 depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ
326 default y 338 default y
339
340# this should be set for all -march=.. options where the compiler
341# generates cmov.
342config X86_CMOV
343 bool
344 depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
345 default y
346
347config X86_MINIMUM_CPU_MODEL
348 int
349 default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP
350 default "0"
351
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index 458bc1611933..b31c0802e1cc 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -85,14 +85,4 @@ config DOUBLEFAULT
85 option saves about 4k and might cause you much additional grey 85 option saves about 4k and might cause you much additional grey
86 hair. 86 hair.
87 87
88config DEBUG_PARAVIRT
89 bool "Enable some paravirtualization debugging"
90 default n
91 depends on PARAVIRT && DEBUG_KERNEL
92 help
93 Currently deliberately clobbers regs which are allowed to be
94 clobbered in inlined paravirt hooks, even in native mode.
95 If turning this off solves a problem, then DISABLE_INTERRUPTS() or
96 ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
97
98endmenu 88endmenu
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index bd28f9f9b4b7..6dc5e5d90fec 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -34,7 +34,7 @@ CHECKFLAGS += -D__i386__
34CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return 34CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return
35 35
36# prevent gcc from keeping the stack 16 byte aligned 36# prevent gcc from keeping the stack 16 byte aligned
37CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) 37CFLAGS += -mpreferred-stack-boundary=4
38 38
39# CPU-specific tuning. Anything which can be shared with UML should go here. 39# CPU-specific tuning. Anything which can be shared with UML should go here.
40include $(srctree)/arch/i386/Makefile.cpu 40include $(srctree)/arch/i386/Makefile.cpu
diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu
index a32c031c90d7..e372b584e919 100644
--- a/arch/i386/Makefile.cpu
+++ b/arch/i386/Makefile.cpu
@@ -4,9 +4,9 @@
4#-mtune exists since gcc 3.4 4#-mtune exists since gcc 3.4
5HAS_MTUNE := $(call cc-option-yn, -mtune=i386) 5HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
6ifeq ($(HAS_MTUNE),y) 6ifeq ($(HAS_MTUNE),y)
7tune = $(call cc-option,-mtune=$(1),) 7tune = $(call cc-option,-mtune=$(1),$(2))
8else 8else
9tune = $(call cc-option,-mcpu=$(1),) 9tune = $(call cc-option,-mcpu=$(1),$(2))
10endif 10endif
11 11
12align := $(cc-option-align) 12align := $(cc-option-align)
@@ -32,7 +32,8 @@ cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586)
32cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) 32cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
33cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 33cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
34cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) 34cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
35cflags-$(CONFIG_MCORE2) += -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686)) 35cflags-$(CONFIG_MVIAC7) += -march=i686
36cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
36 37
37# AMD Elan support 38# AMD Elan support
38cflags-$(CONFIG_X86_ELAN) += -march=i486 39cflags-$(CONFIG_X86_ELAN) += -march=i486
@@ -42,5 +43,5 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
42 43
43# add at the end to overwrite eventual tuning options from earlier 44# add at the end to overwrite eventual tuning options from earlier
44# cpu entries 45# cpu entries
45cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic) 46cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
46 47
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index e97946626064..bfbc32098a4a 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -36,9 +36,9 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE)
36# --------------------------------------------------------------------------- 36# ---------------------------------------------------------------------------
37 37
38$(obj)/zImage: IMAGE_OFFSET := 0x1000 38$(obj)/zImage: IMAGE_OFFSET := 0x1000
39$(obj)/zImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) 39$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK)
40$(obj)/bzImage: IMAGE_OFFSET := 0x100000 40$(obj)/bzImage: IMAGE_OFFSET := 0x100000
41$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ 41$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
42$(obj)/bzImage: BUILDFLAGS := -b 42$(obj)/bzImage: BUILDFLAGS := -b
43 43
44quiet_cmd_image = BUILD $@ 44quiet_cmd_image = BUILD $@
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index 1ce7017fd627..b28505c544c9 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -189,7 +189,7 @@ static void putstr(const char *);
189static unsigned long free_mem_ptr; 189static unsigned long free_mem_ptr;
190static unsigned long free_mem_end_ptr; 190static unsigned long free_mem_end_ptr;
191 191
192#define HEAP_SIZE 0x3000 192#define HEAP_SIZE 0x4000
193 193
194static char *vidmem = (char *)0xb8000; 194static char *vidmem = (char *)0xb8000;
195static int vidport; 195static int vidport;
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index 06edf1c66242..f8b3b9cda2b1 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -52,6 +52,7 @@
52#include <asm/boot.h> 52#include <asm/boot.h>
53#include <asm/e820.h> 53#include <asm/e820.h>
54#include <asm/page.h> 54#include <asm/page.h>
55#include <asm/setup.h>
55 56
56/* Signature words to ensure LILO loaded us right */ 57/* Signature words to ensure LILO loaded us right */
57#define SIG1 0xAA55 58#define SIG1 0xAA55
@@ -81,7 +82,7 @@ start:
81# This is the setup header, and it must start at %cs:2 (old 0x9020:2) 82# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
82 83
83 .ascii "HdrS" # header signature 84 .ascii "HdrS" # header signature
84 .word 0x0205 # header version number (>= 0x0105) 85 .word 0x0206 # header version number (>= 0x0105)
85 # or else old loadlin-1.5 will fail) 86 # or else old loadlin-1.5 will fail)
86realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 87realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
87start_sys_seg: .word SYSSEG 88start_sys_seg: .word SYSSEG
@@ -171,6 +172,10 @@ relocatable_kernel: .byte 0
171pad2: .byte 0 172pad2: .byte 0
172pad3: .word 0 173pad3: .word 0
173 174
175cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
176 #added with boot protocol
177 #version 2.06
178
174trampoline: call start_of_setup 179trampoline: call start_of_setup
175 .align 16 180 .align 16
176 # The offset at this point is 0x240 181 # The offset at this point is 0x240
@@ -297,7 +302,24 @@ good_sig:
297 302
298loader_panic_mess: .string "Wrong loader, giving up..." 303loader_panic_mess: .string "Wrong loader, giving up..."
299 304
305# check minimum cpuid
306# we do this here because it is the last place we can actually
307# show a user visible error message. Later the video modus
308# might be already messed up.
300loader_ok: 309loader_ok:
310 call verify_cpu
311 testl %eax,%eax
312 jz cpu_ok
313 lea cpu_panic_mess,%si
314 call prtstr
3151: jmp 1b
316
317cpu_panic_mess:
318 .asciz "PANIC: CPU too old for this kernel."
319
320#include "../kernel/verify_cpu.S"
321
322cpu_ok:
301# Get memory size (extended mem, kB) 323# Get memory size (extended mem, kB)
302 324
303 xorl %eax, %eax 325 xorl %eax, %eax
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index c96911c37aea..9da84412a831 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.21-rc3 3# Linux kernel version: 2.6.21-git3
4# Wed Mar 7 15:29:47 2007 4# Tue May 1 07:30:51 2007
5# 5#
6CONFIG_X86_32=y 6CONFIG_X86_32=y
7CONFIG_GENERIC_TIME=y 7CONFIG_GENERIC_TIME=y
@@ -108,9 +108,9 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
108# 108#
109# Processor type and features 109# Processor type and features
110# 110#
111# CONFIG_TICK_ONESHOT is not set 111CONFIG_TICK_ONESHOT=y
112# CONFIG_NO_HZ is not set 112CONFIG_NO_HZ=y
113# CONFIG_HIGH_RES_TIMERS is not set 113CONFIG_HIGH_RES_TIMERS=y
114CONFIG_SMP=y 114CONFIG_SMP=y
115# CONFIG_X86_PC is not set 115# CONFIG_X86_PC is not set
116# CONFIG_X86_ELAN is not set 116# CONFIG_X86_ELAN is not set
@@ -146,9 +146,11 @@ CONFIG_MPENTIUMIII=y
146# CONFIG_MGEODE_LX is not set 146# CONFIG_MGEODE_LX is not set
147# CONFIG_MCYRIXIII is not set 147# CONFIG_MCYRIXIII is not set
148# CONFIG_MVIAC3_2 is not set 148# CONFIG_MVIAC3_2 is not set
149# CONFIG_MVIAC7 is not set
149CONFIG_X86_GENERIC=y 150CONFIG_X86_GENERIC=y
150CONFIG_X86_CMPXCHG=y 151CONFIG_X86_CMPXCHG=y
151CONFIG_X86_L1_CACHE_SHIFT=7 152CONFIG_X86_L1_CACHE_SHIFT=7
153CONFIG_X86_XADD=y
152CONFIG_RWSEM_XCHGADD_ALGORITHM=y 154CONFIG_RWSEM_XCHGADD_ALGORITHM=y
153# CONFIG_ARCH_HAS_ILOG2_U32 is not set 155# CONFIG_ARCH_HAS_ILOG2_U32 is not set
154# CONFIG_ARCH_HAS_ILOG2_U64 is not set 156# CONFIG_ARCH_HAS_ILOG2_U64 is not set
@@ -162,6 +164,8 @@ CONFIG_X86_GOOD_APIC=y
162CONFIG_X86_INTEL_USERCOPY=y 164CONFIG_X86_INTEL_USERCOPY=y
163CONFIG_X86_USE_PPRO_CHECKSUM=y 165CONFIG_X86_USE_PPRO_CHECKSUM=y
164CONFIG_X86_TSC=y 166CONFIG_X86_TSC=y
167CONFIG_X86_CMOV=y
168CONFIG_X86_MINIMUM_CPU_MODEL=4
165CONFIG_HPET_TIMER=y 169CONFIG_HPET_TIMER=y
166CONFIG_HPET_EMULATE_RTC=y 170CONFIG_HPET_EMULATE_RTC=y
167CONFIG_NR_CPUS=32 171CONFIG_NR_CPUS=32
@@ -248,7 +252,6 @@ CONFIG_ACPI_FAN=y
248CONFIG_ACPI_PROCESSOR=y 252CONFIG_ACPI_PROCESSOR=y
249CONFIG_ACPI_THERMAL=y 253CONFIG_ACPI_THERMAL=y
250# CONFIG_ACPI_ASUS is not set 254# CONFIG_ACPI_ASUS is not set
251# CONFIG_ACPI_IBM is not set
252# CONFIG_ACPI_TOSHIBA is not set 255# CONFIG_ACPI_TOSHIBA is not set
253CONFIG_ACPI_BLACKLIST_YEAR=2001 256CONFIG_ACPI_BLACKLIST_YEAR=2001
254CONFIG_ACPI_DEBUG=y 257CONFIG_ACPI_DEBUG=y
@@ -257,10 +260,7 @@ CONFIG_ACPI_POWER=y
257CONFIG_ACPI_SYSTEM=y 260CONFIG_ACPI_SYSTEM=y
258CONFIG_X86_PM_TIMER=y 261CONFIG_X86_PM_TIMER=y
259# CONFIG_ACPI_CONTAINER is not set 262# CONFIG_ACPI_CONTAINER is not set
260 263# CONFIG_ACPI_SBS is not set
261#
262# APM (Advanced Power Management) BIOS Support
263#
264# CONFIG_APM is not set 264# CONFIG_APM is not set
265 265
266# 266#
@@ -277,7 +277,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
277# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 277# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
278CONFIG_CPU_FREQ_GOV_USERSPACE=y 278CONFIG_CPU_FREQ_GOV_USERSPACE=y
279CONFIG_CPU_FREQ_GOV_ONDEMAND=y 279CONFIG_CPU_FREQ_GOV_ONDEMAND=y
280# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set 280CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
281 281
282# 282#
283# CPUFreq processor drivers 283# CPUFreq processor drivers
@@ -349,7 +349,6 @@ CONFIG_NET=y
349# 349#
350# Networking options 350# Networking options
351# 351#
352# CONFIG_NETDEBUG is not set
353CONFIG_PACKET=y 352CONFIG_PACKET=y
354# CONFIG_PACKET_MMAP is not set 353# CONFIG_PACKET_MMAP is not set
355CONFIG_UNIX=y 354CONFIG_UNIX=y
@@ -388,6 +387,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
388CONFIG_IPV6=y 387CONFIG_IPV6=y
389# CONFIG_IPV6_PRIVACY is not set 388# CONFIG_IPV6_PRIVACY is not set
390# CONFIG_IPV6_ROUTER_PREF is not set 389# CONFIG_IPV6_ROUTER_PREF is not set
390# CONFIG_IPV6_OPTIMISTIC_DAD is not set
391# CONFIG_INET6_AH is not set 391# CONFIG_INET6_AH is not set
392# CONFIG_INET6_ESP is not set 392# CONFIG_INET6_ESP is not set
393# CONFIG_INET6_IPCOMP is not set 393# CONFIG_INET6_IPCOMP is not set
@@ -443,6 +443,13 @@ CONFIG_IPV6_SIT=y
443# CONFIG_HAMRADIO is not set 443# CONFIG_HAMRADIO is not set
444# CONFIG_IRDA is not set 444# CONFIG_IRDA is not set
445# CONFIG_BT is not set 445# CONFIG_BT is not set
446# CONFIG_AF_RXRPC is not set
447
448#
449# Wireless
450#
451# CONFIG_CFG80211 is not set
452# CONFIG_WIRELESS_EXT is not set
446# CONFIG_IEEE80211 is not set 453# CONFIG_IEEE80211 is not set
447 454
448# 455#
@@ -463,10 +470,6 @@ CONFIG_FW_LOADER=y
463# Connector - unified userspace <-> kernelspace linker 470# Connector - unified userspace <-> kernelspace linker
464# 471#
465# CONFIG_CONNECTOR is not set 472# CONFIG_CONNECTOR is not set
466
467#
468# Memory Technology Devices (MTD)
469#
470# CONFIG_MTD is not set 473# CONFIG_MTD is not set
471 474
472# 475#
@@ -513,6 +516,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
513# CONFIG_SGI_IOC4 is not set 516# CONFIG_SGI_IOC4 is not set
514# CONFIG_TIFM_CORE is not set 517# CONFIG_TIFM_CORE is not set
515# CONFIG_SONY_LAPTOP is not set 518# CONFIG_SONY_LAPTOP is not set
519# CONFIG_THINKPAD_ACPI is not set
516 520
517# 521#
518# ATA/ATAPI/MFM/RLL support 522# ATA/ATAPI/MFM/RLL support
@@ -548,7 +552,6 @@ CONFIG_BLK_DEV_IDEPCI=y
548# CONFIG_BLK_DEV_RZ1000 is not set 552# CONFIG_BLK_DEV_RZ1000 is not set
549CONFIG_BLK_DEV_IDEDMA_PCI=y 553CONFIG_BLK_DEV_IDEDMA_PCI=y
550# CONFIG_BLK_DEV_IDEDMA_FORCED is not set 554# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
551CONFIG_IDEDMA_PCI_AUTO=y
552# CONFIG_IDEDMA_ONLYDISK is not set 555# CONFIG_IDEDMA_ONLYDISK is not set
553# CONFIG_BLK_DEV_AEC62XX is not set 556# CONFIG_BLK_DEV_AEC62XX is not set
554# CONFIG_BLK_DEV_ALI15X3 is not set 557# CONFIG_BLK_DEV_ALI15X3 is not set
@@ -580,7 +583,6 @@ CONFIG_BLK_DEV_PIIX=y
580# CONFIG_IDE_ARM is not set 583# CONFIG_IDE_ARM is not set
581CONFIG_BLK_DEV_IDEDMA=y 584CONFIG_BLK_DEV_IDEDMA=y
582# CONFIG_IDEDMA_IVB is not set 585# CONFIG_IDEDMA_IVB is not set
583CONFIG_IDEDMA_AUTO=y
584# CONFIG_BLK_DEV_HD is not set 586# CONFIG_BLK_DEV_HD is not set
585 587
586# 588#
@@ -669,6 +671,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
669# CONFIG_SCSI_DC390T is not set 671# CONFIG_SCSI_DC390T is not set
670# CONFIG_SCSI_NSP32 is not set 672# CONFIG_SCSI_NSP32 is not set
671# CONFIG_SCSI_DEBUG is not set 673# CONFIG_SCSI_DEBUG is not set
674# CONFIG_SCSI_ESP_CORE is not set
672# CONFIG_SCSI_SRP is not set 675# CONFIG_SCSI_SRP is not set
673 676
674# 677#
@@ -697,6 +700,7 @@ CONFIG_SATA_ACPI=y
697# CONFIG_PATA_AMD is not set 700# CONFIG_PATA_AMD is not set
698# CONFIG_PATA_ARTOP is not set 701# CONFIG_PATA_ARTOP is not set
699# CONFIG_PATA_ATIIXP is not set 702# CONFIG_PATA_ATIIXP is not set
703# CONFIG_PATA_CMD640_PCI is not set
700# CONFIG_PATA_CMD64X is not set 704# CONFIG_PATA_CMD64X is not set
701# CONFIG_PATA_CS5520 is not set 705# CONFIG_PATA_CS5520 is not set
702# CONFIG_PATA_CS5530 is not set 706# CONFIG_PATA_CS5530 is not set
@@ -762,10 +766,9 @@ CONFIG_IEEE1394=y
762# Subsystem Options 766# Subsystem Options
763# 767#
764# CONFIG_IEEE1394_VERBOSEDEBUG is not set 768# CONFIG_IEEE1394_VERBOSEDEBUG is not set
765# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
766 769
767# 770#
768# Device Drivers 771# Controllers
769# 772#
770 773
771# 774#
@@ -774,10 +777,11 @@ CONFIG_IEEE1394=y
774CONFIG_IEEE1394_OHCI1394=y 777CONFIG_IEEE1394_OHCI1394=y
775 778
776# 779#
777# Protocol Drivers 780# Protocols
778# 781#
779# CONFIG_IEEE1394_VIDEO1394 is not set 782# CONFIG_IEEE1394_VIDEO1394 is not set
780# CONFIG_IEEE1394_SBP2 is not set 783# CONFIG_IEEE1394_SBP2 is not set
784# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
781# CONFIG_IEEE1394_ETH1394 is not set 785# CONFIG_IEEE1394_ETH1394 is not set
782# CONFIG_IEEE1394_DV1394 is not set 786# CONFIG_IEEE1394_DV1394 is not set
783CONFIG_IEEE1394_RAWIO=y 787CONFIG_IEEE1394_RAWIO=y
@@ -820,7 +824,9 @@ CONFIG_MII=y
820# CONFIG_HAPPYMEAL is not set 824# CONFIG_HAPPYMEAL is not set
821# CONFIG_SUNGEM is not set 825# CONFIG_SUNGEM is not set
822# CONFIG_CASSINI is not set 826# CONFIG_CASSINI is not set
823# CONFIG_NET_VENDOR_3COM is not set 827CONFIG_NET_VENDOR_3COM=y
828CONFIG_VORTEX=y
829# CONFIG_TYPHOON is not set
824 830
825# 831#
826# Tulip family network device support 832# Tulip family network device support
@@ -901,9 +907,10 @@ CONFIG_BNX2=y
901# CONFIG_TR is not set 907# CONFIG_TR is not set
902 908
903# 909#
904# Wireless LAN (non-hamradio) 910# Wireless LAN
905# 911#
906# CONFIG_NET_RADIO is not set 912# CONFIG_WLAN_PRE80211 is not set
913# CONFIG_WLAN_80211 is not set
907 914
908# 915#
909# Wan interfaces 916# Wan interfaces
@@ -917,7 +924,6 @@ CONFIG_BNX2=y
917# CONFIG_SHAPER is not set 924# CONFIG_SHAPER is not set
918CONFIG_NETCONSOLE=y 925CONFIG_NETCONSOLE=y
919CONFIG_NETPOLL=y 926CONFIG_NETPOLL=y
920# CONFIG_NETPOLL_RX is not set
921# CONFIG_NETPOLL_TRAP is not set 927# CONFIG_NETPOLL_TRAP is not set
922CONFIG_NET_POLL_CONTROLLER=y 928CONFIG_NET_POLL_CONTROLLER=y
923 929
@@ -1050,7 +1056,7 @@ CONFIG_MAX_RAW_DEVS=256
1050CONFIG_HPET=y 1056CONFIG_HPET=y
1051# CONFIG_HPET_RTC_IRQ is not set 1057# CONFIG_HPET_RTC_IRQ is not set
1052CONFIG_HPET_MMAP=y 1058CONFIG_HPET_MMAP=y
1053CONFIG_HANGCHECK_TIMER=y 1059# CONFIG_HANGCHECK_TIMER is not set
1054 1060
1055# 1061#
1056# TPM devices 1062# TPM devices
@@ -1142,6 +1148,14 @@ CONFIG_HID=y
1142# CONFIG_HID_DEBUG is not set 1148# CONFIG_HID_DEBUG is not set
1143 1149
1144# 1150#
1151# USB Input Devices
1152#
1153CONFIG_USB_HID=y
1154# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1155# CONFIG_HID_FF is not set
1156# CONFIG_USB_HIDDEV is not set
1157
1158#
1145# USB support 1159# USB support
1146# 1160#
1147CONFIG_USB_ARCH_HAS_HCD=y 1161CONFIG_USB_ARCH_HAS_HCD=y
@@ -1154,6 +1168,7 @@ CONFIG_USB=y
1154# Miscellaneous USB options 1168# Miscellaneous USB options
1155# 1169#
1156CONFIG_USB_DEVICEFS=y 1170CONFIG_USB_DEVICEFS=y
1171# CONFIG_USB_DEVICE_CLASS is not set
1157# CONFIG_USB_DYNAMIC_MINORS is not set 1172# CONFIG_USB_DYNAMIC_MINORS is not set
1158# CONFIG_USB_SUSPEND is not set 1173# CONFIG_USB_SUSPEND is not set
1159# CONFIG_USB_OTG is not set 1174# CONFIG_USB_OTG is not set
@@ -1204,10 +1219,6 @@ CONFIG_USB_STORAGE=y
1204# 1219#
1205# USB Input Devices 1220# USB Input Devices
1206# 1221#
1207CONFIG_USB_HID=y
1208# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1209# CONFIG_HID_FF is not set
1210# CONFIG_USB_HIDDEV is not set
1211# CONFIG_USB_AIPTEK is not set 1222# CONFIG_USB_AIPTEK is not set
1212# CONFIG_USB_WACOM is not set 1223# CONFIG_USB_WACOM is not set
1213# CONFIG_USB_ACECAD is not set 1224# CONFIG_USB_ACECAD is not set
@@ -1528,7 +1539,7 @@ CONFIG_DEBUG_KERNEL=y
1528CONFIG_LOG_BUF_SHIFT=18 1539CONFIG_LOG_BUF_SHIFT=18
1529CONFIG_DETECT_SOFTLOCKUP=y 1540CONFIG_DETECT_SOFTLOCKUP=y
1530# CONFIG_SCHEDSTATS is not set 1541# CONFIG_SCHEDSTATS is not set
1531# CONFIG_TIMER_STATS is not set 1542CONFIG_TIMER_STATS=y
1532# CONFIG_DEBUG_SLAB is not set 1543# CONFIG_DEBUG_SLAB is not set
1533# CONFIG_DEBUG_RT_MUTEXES is not set 1544# CONFIG_DEBUG_RT_MUTEXES is not set
1534# CONFIG_RT_MUTEX_TESTER is not set 1545# CONFIG_RT_MUTEX_TESTER is not set
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 4ae3dcf1d2f0..4f98516b9f94 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -39,12 +39,10 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o 39obj-$(CONFIG_HPET_TIMER) += hpet.o
40obj-$(CONFIG_K8_NB) += k8.o 40obj-$(CONFIG_K8_NB) += k8.o
41 41
42obj-$(CONFIG_VMI) += vmi.o vmitime.o 42obj-$(CONFIG_VMI) += vmi.o vmiclock.o
43obj-$(CONFIG_PARAVIRT) += paravirt.o 43obj-$(CONFIG_PARAVIRT) += paravirt.o
44obj-y += pcspeaker.o 44obj-y += pcspeaker.o
45 45
46EXTRA_AFLAGS := -traditional
47
48obj-$(CONFIG_SCx200) += scx200.o 46obj-$(CONFIG_SCx200) += scx200.o
49 47
50# vsyscall.o contains the vsyscall DSO images as __initdata. 48# vsyscall.o contains the vsyscall DSO images as __initdata.
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 9ea5b8ecc7e1..280898b045b2 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -874,7 +874,7 @@ static void __init acpi_process_madt(void)
874 acpi_ioapic = 1; 874 acpi_ioapic = 1;
875 875
876 smp_found_config = 1; 876 smp_found_config = 1;
877 clustered_apic_check(); 877 setup_apic_routing();
878 } 878 }
879 } 879 }
880 if (error == -EINVAL) { 880 if (error == -EINVAL) {
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index 8f7efd38254d..23f78efc577d 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -10,7 +10,6 @@
10#include <asm/pci-direct.h> 10#include <asm/pci-direct.h>
11#include <asm/acpi.h> 11#include <asm/acpi.h>
12#include <asm/apic.h> 12#include <asm/apic.h>
13#include <asm/irq.h>
14 13
15#ifdef CONFIG_ACPI 14#ifdef CONFIG_ACPI
16 15
@@ -48,24 +47,6 @@ static int __init check_bridge(int vendor, int device)
48 return 0; 47 return 0;
49} 48}
50 49
51static void check_intel(void)
52{
53 u16 vendor, device;
54
55 vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
56
57 if (vendor != PCI_VENDOR_ID_INTEL)
58 return;
59
60 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
61#ifdef CONFIG_SMP
62 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
63 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
64 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
65 quirk_intel_irqbalance();
66#endif
67}
68
69void __init check_acpi_pci(void) 50void __init check_acpi_pci(void)
70{ 51{
71 int num, slot, func; 52 int num, slot, func;
@@ -77,8 +58,6 @@ void __init check_acpi_pci(void)
77 if (!early_pci_allowed()) 58 if (!early_pci_allowed())
78 return; 59 return;
79 60
80 check_intel();
81
82 /* Poor man's PCI discovery */ 61 /* Poor man's PCI discovery */
83 for (num = 0; num < 32; num++) { 62 for (num = 0; num < 32; num++) {
84 for (slot = 0; slot < 32; slot++) { 63 for (slot = 0; slot < 32; slot++) {
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 426f59b0106b..e5cec6685cc5 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -5,6 +5,7 @@
5#include <asm/alternative.h> 5#include <asm/alternative.h>
6#include <asm/sections.h> 6#include <asm/sections.h>
7 7
8static int noreplace_smp = 0;
8static int smp_alt_once = 0; 9static int smp_alt_once = 0;
9static int debug_alternative = 0; 10static int debug_alternative = 0;
10 11
@@ -13,15 +14,33 @@ static int __init bootonly(char *str)
13 smp_alt_once = 1; 14 smp_alt_once = 1;
14 return 1; 15 return 1;
15} 16}
17__setup("smp-alt-boot", bootonly);
18
16static int __init debug_alt(char *str) 19static int __init debug_alt(char *str)
17{ 20{
18 debug_alternative = 1; 21 debug_alternative = 1;
19 return 1; 22 return 1;
20} 23}
21
22__setup("smp-alt-boot", bootonly);
23__setup("debug-alternative", debug_alt); 24__setup("debug-alternative", debug_alt);
24 25
26static int __init setup_noreplace_smp(char *str)
27{
28 noreplace_smp = 1;
29 return 1;
30}
31__setup("noreplace-smp", setup_noreplace_smp);
32
33#ifdef CONFIG_PARAVIRT
34static int noreplace_paravirt = 0;
35
36static int __init setup_noreplace_paravirt(char *str)
37{
38 noreplace_paravirt = 1;
39 return 1;
40}
41__setup("noreplace-paravirt", setup_noreplace_paravirt);
42#endif
43
25#define DPRINTK(fmt, args...) if (debug_alternative) \ 44#define DPRINTK(fmt, args...) if (debug_alternative) \
26 printk(KERN_DEBUG fmt, args) 45 printk(KERN_DEBUG fmt, args)
27 46
@@ -132,11 +151,8 @@ static void nop_out(void *insns, unsigned int len)
132} 151}
133 152
134extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 153extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
135extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
136extern u8 *__smp_locks[], *__smp_locks_end[]; 154extern u8 *__smp_locks[], *__smp_locks_end[];
137 155
138extern u8 __smp_alt_begin[], __smp_alt_end[];
139
140/* Replace instructions with better alternatives for this CPU type. 156/* Replace instructions with better alternatives for this CPU type.
141 This runs before SMP is initialized to avoid SMP problems with 157 This runs before SMP is initialized to avoid SMP problems with
142 self modifying code. This implies that assymetric systems where 158 self modifying code. This implies that assymetric systems where
@@ -171,29 +187,6 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
171 187
172#ifdef CONFIG_SMP 188#ifdef CONFIG_SMP
173 189
174static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
175{
176 struct alt_instr *a;
177
178 DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
179 for (a = start; a < end; a++) {
180 memcpy(a->replacement + a->replacementlen,
181 a->instr,
182 a->instrlen);
183 }
184}
185
186static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
187{
188 struct alt_instr *a;
189
190 for (a = start; a < end; a++) {
191 memcpy(a->instr,
192 a->replacement + a->replacementlen,
193 a->instrlen);
194 }
195}
196
197static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) 190static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
198{ 191{
199 u8 **ptr; 192 u8 **ptr;
@@ -211,6 +204,9 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
211{ 204{
212 u8 **ptr; 205 u8 **ptr;
213 206
207 if (noreplace_smp)
208 return;
209
214 for (ptr = start; ptr < end; ptr++) { 210 for (ptr = start; ptr < end; ptr++) {
215 if (*ptr < text) 211 if (*ptr < text)
216 continue; 212 continue;
@@ -245,6 +241,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
245 struct smp_alt_module *smp; 241 struct smp_alt_module *smp;
246 unsigned long flags; 242 unsigned long flags;
247 243
244 if (noreplace_smp)
245 return;
246
248 if (smp_alt_once) { 247 if (smp_alt_once) {
249 if (boot_cpu_has(X86_FEATURE_UP)) 248 if (boot_cpu_has(X86_FEATURE_UP))
250 alternatives_smp_unlock(locks, locks_end, 249 alternatives_smp_unlock(locks, locks_end,
@@ -279,7 +278,7 @@ void alternatives_smp_module_del(struct module *mod)
279 struct smp_alt_module *item; 278 struct smp_alt_module *item;
280 unsigned long flags; 279 unsigned long flags;
281 280
282 if (smp_alt_once) 281 if (smp_alt_once || noreplace_smp)
283 return; 282 return;
284 283
285 spin_lock_irqsave(&smp_alt, flags); 284 spin_lock_irqsave(&smp_alt, flags);
@@ -310,7 +309,7 @@ void alternatives_smp_switch(int smp)
310 return; 309 return;
311#endif 310#endif
312 311
313 if (smp_alt_once) 312 if (noreplace_smp || smp_alt_once)
314 return; 313 return;
315 BUG_ON(!smp && (num_online_cpus() > 1)); 314 BUG_ON(!smp && (num_online_cpus() > 1));
316 315
@@ -319,8 +318,6 @@ void alternatives_smp_switch(int smp)
319 printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); 318 printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
320 clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); 319 clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
321 clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); 320 clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
322 alternatives_smp_apply(__smp_alt_instructions,
323 __smp_alt_instructions_end);
324 list_for_each_entry(mod, &smp_alt_modules, next) 321 list_for_each_entry(mod, &smp_alt_modules, next)
325 alternatives_smp_lock(mod->locks, mod->locks_end, 322 alternatives_smp_lock(mod->locks, mod->locks_end,
326 mod->text, mod->text_end); 323 mod->text, mod->text_end);
@@ -328,8 +325,6 @@ void alternatives_smp_switch(int smp)
328 printk(KERN_INFO "SMP alternatives: switching to UP code\n"); 325 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
329 set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); 326 set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
330 set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); 327 set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
331 apply_alternatives(__smp_alt_instructions,
332 __smp_alt_instructions_end);
333 list_for_each_entry(mod, &smp_alt_modules, next) 328 list_for_each_entry(mod, &smp_alt_modules, next)
334 alternatives_smp_unlock(mod->locks, mod->locks_end, 329 alternatives_smp_unlock(mod->locks, mod->locks_end,
335 mod->text, mod->text_end); 330 mod->text, mod->text_end);
@@ -340,36 +335,31 @@ void alternatives_smp_switch(int smp)
340#endif 335#endif
341 336
342#ifdef CONFIG_PARAVIRT 337#ifdef CONFIG_PARAVIRT
343void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) 338void apply_paravirt(struct paravirt_patch_site *start,
339 struct paravirt_patch_site *end)
344{ 340{
345 struct paravirt_patch *p; 341 struct paravirt_patch_site *p;
342
343 if (noreplace_paravirt)
344 return;
346 345
347 for (p = start; p < end; p++) { 346 for (p = start; p < end; p++) {
348 unsigned int used; 347 unsigned int used;
349 348
350 used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, 349 used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
351 p->len); 350 p->len);
352#ifdef CONFIG_DEBUG_PARAVIRT 351
353 { 352 BUG_ON(used > p->len);
354 int i; 353
355 /* Deliberately clobber regs using "not %reg" to find bugs. */
356 for (i = 0; i < 3; i++) {
357 if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
358 memcpy(p->instr + used, "\xf7\xd0", 2);
359 p->instr[used+1] |= i;
360 used += 2;
361 }
362 }
363 }
364#endif
365 /* Pad the rest with nops */ 354 /* Pad the rest with nops */
366 nop_out(p->instr + used, p->len - used); 355 nop_out(p->instr + used, p->len - used);
367 } 356 }
368 357
369 /* Sync to be conservative, in case we patched following instructions */ 358 /* Sync to be conservative, in case we patched following
359 * instructions */
370 sync_core(); 360 sync_core();
371} 361}
372extern struct paravirt_patch __start_parainstructions[], 362extern struct paravirt_patch_site __start_parainstructions[],
373 __stop_parainstructions[]; 363 __stop_parainstructions[];
374#endif /* CONFIG_PARAVIRT */ 364#endif /* CONFIG_PARAVIRT */
375 365
@@ -396,23 +386,19 @@ void __init alternative_instructions(void)
396 printk(KERN_INFO "SMP alternatives: switching to UP code\n"); 386 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
397 set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); 387 set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
398 set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); 388 set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
399 apply_alternatives(__smp_alt_instructions,
400 __smp_alt_instructions_end);
401 alternatives_smp_unlock(__smp_locks, __smp_locks_end, 389 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
402 _text, _etext); 390 _text, _etext);
403 } 391 }
404 free_init_pages("SMP alternatives", 392 free_init_pages("SMP alternatives",
405 (unsigned long)__smp_alt_begin, 393 __pa_symbol(&__smp_locks),
406 (unsigned long)__smp_alt_end); 394 __pa_symbol(&__smp_locks_end));
407 } else { 395 } else {
408 alternatives_smp_save(__smp_alt_instructions,
409 __smp_alt_instructions_end);
410 alternatives_smp_module_add(NULL, "core kernel", 396 alternatives_smp_module_add(NULL, "core kernel",
411 __smp_locks, __smp_locks_end, 397 __smp_locks, __smp_locks_end,
412 _text, _etext); 398 _text, _etext);
413 alternatives_smp_switch(0); 399 alternatives_smp_switch(0);
414 } 400 }
415#endif 401#endif
416 apply_paravirt(__start_parainstructions, __stop_parainstructions); 402 apply_paravirt(__parainstructions, __parainstructions_end);
417 local_irq_restore(flags); 403 local_irq_restore(flags);
418} 404}
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 93aa911646ad..aca054cc0552 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -129,6 +129,28 @@ static int modern_apic(void)
129 return lapic_get_version() >= 0x14; 129 return lapic_get_version() >= 0x14;
130} 130}
131 131
132void apic_wait_icr_idle(void)
133{
134 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
135 cpu_relax();
136}
137
138unsigned long safe_apic_wait_icr_idle(void)
139{
140 unsigned long send_status;
141 int timeout;
142
143 timeout = 0;
144 do {
145 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
146 if (!send_status)
147 break;
148 udelay(100);
149 } while (timeout++ < 1000);
150
151 return send_status;
152}
153
132/** 154/**
133 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 155 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
134 */ 156 */
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 064bbf2861f4..367ff1d930cb 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -233,11 +233,10 @@
233#include <asm/desc.h> 233#include <asm/desc.h>
234#include <asm/i8253.h> 234#include <asm/i8253.h>
235#include <asm/paravirt.h> 235#include <asm/paravirt.h>
236#include <asm/reboot.h>
236 237
237#include "io_ports.h" 238#include "io_ports.h"
238 239
239extern void machine_real_restart(unsigned char *, int);
240
241#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) 240#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
242extern int (*console_blank_hook)(int); 241extern int (*console_blank_hook)(int);
243#endif 242#endif
@@ -384,13 +383,6 @@ static int ignore_sys_suspend;
384static int ignore_normal_resume; 383static int ignore_normal_resume;
385static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; 384static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
386 385
387#ifdef CONFIG_APM_RTC_IS_GMT
388# define clock_cmos_diff 0
389# define got_clock_diff 1
390#else
391static long clock_cmos_diff;
392static int got_clock_diff;
393#endif
394static int debug __read_mostly; 386static int debug __read_mostly;
395static int smp __read_mostly; 387static int smp __read_mostly;
396static int apm_disabled = -1; 388static int apm_disabled = -1;
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index c37535163bfc..27a776c9044d 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -11,11 +11,11 @@
11#include <linux/suspend.h> 11#include <linux/suspend.h>
12#include <asm/ucontext.h> 12#include <asm/ucontext.h>
13#include "sigframe.h" 13#include "sigframe.h"
14#include <asm/pgtable.h>
14#include <asm/fixmap.h> 15#include <asm/fixmap.h>
15#include <asm/processor.h> 16#include <asm/processor.h>
16#include <asm/thread_info.h> 17#include <asm/thread_info.h>
17#include <asm/elf.h> 18#include <asm/elf.h>
18#include <asm/pda.h>
19 19
20#define DEFINE(sym, val) \ 20#define DEFINE(sym, val) \
21 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 21 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -25,6 +25,9 @@
25#define OFFSET(sym, str, mem) \ 25#define OFFSET(sym, str, mem) \
26 DEFINE(sym, offsetof(struct str, mem)); 26 DEFINE(sym, offsetof(struct str, mem));
27 27
28/* workaround for a warning with -Wmissing-prototypes */
29void foo(void);
30
28void foo(void) 31void foo(void)
29{ 32{
30 OFFSET(SIGCONTEXT_eax, sigcontext, eax); 33 OFFSET(SIGCONTEXT_eax, sigcontext, eax);
@@ -90,17 +93,18 @@ void foo(void)
90 OFFSET(pbe_next, pbe, next); 93 OFFSET(pbe_next, pbe, next);
91 94
92 /* Offset from the sysenter stack to tss.esp0 */ 95 /* Offset from the sysenter stack to tss.esp0 */
93 DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - 96 DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) -
94 sizeof(struct tss_struct)); 97 sizeof(struct tss_struct));
95 98
96 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 99 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
97 DEFINE(VDSO_PRELINK, VDSO_PRELINK); 100 DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
101 DEFINE(PTRS_PER_PTE, PTRS_PER_PTE);
102 DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
103 DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
98 104
99 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); 105 DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK);
100 106
101 BLANK(); 107 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
102 OFFSET(PDA_cpu, i386_pda, cpu_number);
103 OFFSET(PDA_pcurrent, i386_pda, pcurrent);
104 108
105#ifdef CONFIG_PARAVIRT 109#ifdef CONFIG_PARAVIRT
106 BLANK(); 110 BLANK();
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
index 010aecfffbc1..74f27a463db0 100644
--- a/arch/i386/kernel/cpu/Makefile
+++ b/arch/i386/kernel/cpu/Makefile
@@ -2,7 +2,7 @@
2# Makefile for x86-compatible CPU details and quirks 2# Makefile for x86-compatible CPU details and quirks
3# 3#
4 4
5obj-y := common.o proc.o 5obj-y := common.o proc.o bugs.o
6 6
7obj-y += amd.o 7obj-y += amd.o
8obj-y += cyrix.o 8obj-y += cyrix.o
@@ -17,3 +17,5 @@ obj-$(CONFIG_X86_MCE) += mcheck/
17 17
18obj-$(CONFIG_MTRR) += mtrr/ 18obj-$(CONFIG_MTRR) += mtrr/
19obj-$(CONFIG_CPU_FREQ) += cpufreq/ 19obj-$(CONFIG_CPU_FREQ) += cpufreq/
20
21obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db482972..4fec702afd7e 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_broken(void)
53 return 0; 53 return 0;
54} 54}
55 55
56int force_mwait __cpuinitdata;
57
56static void __cpuinit init_amd(struct cpuinfo_x86 *c) 58static void __cpuinit init_amd(struct cpuinfo_x86 *c)
57{ 59{
58 u32 l, h; 60 u32 l, h;
@@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
275 277
276 if (amd_apic_timer_broken()) 278 if (amd_apic_timer_broken())
277 set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); 279 set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
280
281 if (c->x86 == 0x10 && !force_mwait)
282 clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
278} 283}
279 284
280static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) 285static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
@@ -314,13 +319,3 @@ int __init amd_init_cpu(void)
314 cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; 319 cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
315 return 0; 320 return 0;
316} 321}
317
318//early_arch_initcall(amd_init_cpu);
319
320static int __init amd_exit_cpu(void)
321{
322 cpu_devs[X86_VENDOR_AMD] = NULL;
323 return 0;
324}
325
326late_initcall(amd_exit_cpu);
diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c
new file mode 100644
index 000000000000..54428a2500f3
--- /dev/null
+++ b/arch/i386/kernel/cpu/bugs.c
@@ -0,0 +1,191 @@
1/*
2 * arch/i386/cpu/bugs.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 *
6 * Cyrix stuff, June 1998 by:
7 * - Rafael R. Reilova (moved everything from head.S),
8 * <rreilova@ececs.uc.edu>
9 * - Channing Corn (tests & fixes),
10 * - Andrew D. Balsa (code cleanup).
11 */
12#include <linux/init.h>
13#include <linux/utsname.h>
14#include <asm/processor.h>
15#include <asm/i387.h>
16#include <asm/msr.h>
17#include <asm/paravirt.h>
18#include <asm/alternative.h>
19
20static int __init no_halt(char *s)
21{
22 boot_cpu_data.hlt_works_ok = 0;
23 return 1;
24}
25
26__setup("no-hlt", no_halt);
27
28static int __init mca_pentium(char *s)
29{
30 mca_pentium_flag = 1;
31 return 1;
32}
33
34__setup("mca-pentium", mca_pentium);
35
36static int __init no_387(char *s)
37{
38 boot_cpu_data.hard_math = 0;
39 write_cr0(0xE | read_cr0());
40 return 1;
41}
42
43__setup("no387", no_387);
44
45static double __initdata x = 4195835.0;
46static double __initdata y = 3145727.0;
47
48/*
49 * This used to check for exceptions..
50 * However, it turns out that to support that,
51 * the XMM trap handlers basically had to
52 * be buggy. So let's have a correct XMM trap
53 * handler, and forget about printing out
54 * some status at boot.
55 *
56 * We should really only care about bugs here
57 * anyway. Not features.
58 */
59static void __init check_fpu(void)
60{
61 if (!boot_cpu_data.hard_math) {
62#ifndef CONFIG_MATH_EMULATION
63 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
64 printk(KERN_EMERG "Giving up.\n");
65 for (;;) ;
66#endif
67 return;
68 }
69
70/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
71 /* Test for the divl bug.. */
72 __asm__("fninit\n\t"
73 "fldl %1\n\t"
74 "fdivl %2\n\t"
75 "fmull %2\n\t"
76 "fldl %1\n\t"
77 "fsubp %%st,%%st(1)\n\t"
78 "fistpl %0\n\t"
79 "fwait\n\t"
80 "fninit"
81 : "=m" (*&boot_cpu_data.fdiv_bug)
82 : "m" (*&x), "m" (*&y));
83 if (boot_cpu_data.fdiv_bug)
84 printk("Hmm, FPU with FDIV bug.\n");
85}
86
87static void __init check_hlt(void)
88{
89 if (paravirt_enabled())
90 return;
91
92 printk(KERN_INFO "Checking 'hlt' instruction... ");
93 if (!boot_cpu_data.hlt_works_ok) {
94 printk("disabled\n");
95 return;
96 }
97 halt();
98 halt();
99 halt();
100 halt();
101 printk("OK.\n");
102}
103
104/*
105 * Most 386 processors have a bug where a POPAD can lock the
106 * machine even from user space.
107 */
108
109static void __init check_popad(void)
110{
111#ifndef CONFIG_X86_POPAD_OK
112 int res, inp = (int) &res;
113
114 printk(KERN_INFO "Checking for popad bug... ");
115 __asm__ __volatile__(
116 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
117 : "=&a" (res)
118 : "d" (inp)
119 : "ecx", "edi" );
120 /* If this fails, it means that any user program may lock the CPU hard. Too bad. */
121 if (res != 12345678) printk( "Buggy.\n" );
122 else printk( "OK.\n" );
123#endif
124}
125
126/*
127 * Check whether we are able to run this kernel safely on SMP.
128 *
129 * - In order to run on a i386, we need to be compiled for i386
130 * (for due to lack of "invlpg" and working WP on a i386)
131 * - In order to run on anything without a TSC, we need to be
132 * compiled for a i486.
133 * - In order to support the local APIC on a buggy Pentium machine,
134 * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
135 * which happens implicitly if compiled for a Pentium or lower
136 * (unless an advanced selection of CPU features is used) as an
137 * otherwise config implies a properly working local APIC without
138 * the need to do extra reads from the APIC.
139*/
140
141static void __init check_config(void)
142{
143/*
144 * We'd better not be a i386 if we're configured to use some
145 * i486+ only features! (WP works in supervisor mode and the
146 * new "invlpg" and "bswap" instructions)
147 */
148#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
149 if (boot_cpu_data.x86 == 3)
150 panic("Kernel requires i486+ for 'invlpg' and other features");
151#endif
152
153/*
154 * If we configured ourselves for a TSC, we'd better have one!
155 */
156#ifdef CONFIG_X86_TSC
157 if (!cpu_has_tsc && !tsc_disable)
158 panic("Kernel compiled for Pentium+, requires TSC feature!");
159#endif
160
161/*
162 * If we were told we had a good local APIC, check for buggy Pentia,
163 * i.e. all B steppings and the C2 stepping of P54C when using their
164 * integrated APIC (see 11AP erratum in "Pentium Processor
165 * Specification Update").
166 */
167#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
168 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
169 && cpu_has_apic
170 && boot_cpu_data.x86 == 5
171 && boot_cpu_data.x86_model == 2
172 && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
173 panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
174#endif
175}
176
177
178void __init check_bugs(void)
179{
180 identify_boot_cpu();
181#ifndef CONFIG_SMP
182 printk("CPU: ");
183 print_cpu_info(&boot_cpu_data);
184#endif
185 check_config();
186 check_fpu();
187 check_hlt();
188 check_popad();
189 init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
190 alternative_instructions();
191}
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
index 8c25047975c0..473eac883c7b 100644
--- a/arch/i386/kernel/cpu/centaur.c
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -469,13 +469,3 @@ int __init centaur_init_cpu(void)
469 cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev; 469 cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
470 return 0; 470 return 0;
471} 471}
472
473//early_arch_initcall(centaur_init_cpu);
474
475static int __init centaur_exit_cpu(void)
476{
477 cpu_devs[X86_VENDOR_CENTAUR] = NULL;
478 return 0;
479}
480
481late_initcall(centaur_exit_cpu);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index dcbbd0a8bfc2..794d593c47eb 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -18,15 +18,37 @@
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <mach_apic.h> 19#include <mach_apic.h>
20#endif 20#endif
21#include <asm/pda.h>
22 21
23#include "cpu.h" 22#include "cpu.h"
24 23
25DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); 24DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
26EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); 25 [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
26 [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
27 [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
28 [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
29 /*
30 * Segments used for calling PnP BIOS have byte granularity.
31 * They code segments and data segments have fixed 64k limits,
32 * the transfer segment sizes are set at run time.
33 */
34 [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
35 [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
36 [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
37 [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
38 [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
39 /*
40 * The APM segments have byte granularity and their bases
41 * are set at run time. All have 64k limits.
42 */
43 [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
44 /* 16-bit code */
45 [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
46 [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
27 47
28struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; 48 [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
29EXPORT_SYMBOL(_cpu_pda); 49 [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
50} };
51EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
30 52
31static int cachesize_override __cpuinitdata = -1; 53static int cachesize_override __cpuinitdata = -1;
32static int disable_x86_fxsr __cpuinitdata; 54static int disable_x86_fxsr __cpuinitdata;
@@ -368,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_setup);
368/* 390/*
369 * This does the hard work of actually picking apart the CPU stuff... 391 * This does the hard work of actually picking apart the CPU stuff...
370 */ 392 */
371void __cpuinit identify_cpu(struct cpuinfo_x86 *c) 393static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
372{ 394{
373 int i; 395 int i;
374 396
@@ -479,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
479 501
480 /* Init Machine Check Exception if available. */ 502 /* Init Machine Check Exception if available. */
481 mcheck_init(c); 503 mcheck_init(c);
504}
482 505
483 if (c == &boot_cpu_data) 506void __init identify_boot_cpu(void)
484 sysenter_setup(); 507{
508 identify_cpu(&boot_cpu_data);
509 sysenter_setup();
485 enable_sep_cpu(); 510 enable_sep_cpu();
511 mtrr_bp_init();
512}
486 513
487 if (c == &boot_cpu_data) 514void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
488 mtrr_bp_init(); 515{
489 else 516 BUG_ON(c == &boot_cpu_data);
490 mtrr_ap_init(); 517 identify_cpu(c);
518 enable_sep_cpu();
519 mtrr_ap_init();
491} 520}
492 521
493#ifdef CONFIG_X86_HT 522#ifdef CONFIG_X86_HT
@@ -601,129 +630,36 @@ void __init early_cpu_init(void)
601#endif 630#endif
602} 631}
603 632
604/* Make sure %gs is initialized properly in idle threads */ 633/* Make sure %fs is initialized properly in idle threads */
605struct pt_regs * __devinit idle_regs(struct pt_regs *regs) 634struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
606{ 635{
607 memset(regs, 0, sizeof(struct pt_regs)); 636 memset(regs, 0, sizeof(struct pt_regs));
608 regs->xfs = __KERNEL_PDA; 637 regs->xfs = __KERNEL_PERCPU;
609 return regs; 638 return regs;
610} 639}
611 640
612static __cpuinit int alloc_gdt(int cpu) 641/* Current gdt points %fs at the "master" per-cpu area: after this,
642 * it's on the real one. */
643void switch_to_new_gdt(void)
613{ 644{
614 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); 645 struct Xgt_desc_struct gdt_descr;
615 struct desc_struct *gdt;
616 struct i386_pda *pda;
617
618 gdt = (struct desc_struct *)cpu_gdt_descr->address;
619 pda = cpu_pda(cpu);
620
621 /*
622 * This is a horrible hack to allocate the GDT. The problem
623 * is that cpu_init() is called really early for the boot CPU
624 * (and hence needs bootmem) but much later for the secondary
625 * CPUs, when bootmem will have gone away
626 */
627 if (NODE_DATA(0)->bdata->node_bootmem_map) {
628 BUG_ON(gdt != NULL || pda != NULL);
629
630 gdt = alloc_bootmem_pages(PAGE_SIZE);
631 pda = alloc_bootmem(sizeof(*pda));
632 /* alloc_bootmem(_pages) panics on failure, so no check */
633
634 memset(gdt, 0, PAGE_SIZE);
635 memset(pda, 0, sizeof(*pda));
636 } else {
637 /* GDT and PDA might already have been allocated if
638 this is a CPU hotplug re-insertion. */
639 if (gdt == NULL)
640 gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
641
642 if (pda == NULL)
643 pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
644
645 if (unlikely(!gdt || !pda)) {
646 free_pages((unsigned long)gdt, 0);
647 kfree(pda);
648 return 0;
649 }
650 }
651
652 cpu_gdt_descr->address = (unsigned long)gdt;
653 cpu_pda(cpu) = pda;
654
655 return 1;
656}
657 646
658/* Initial PDA used by boot CPU */ 647 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
659struct i386_pda boot_pda = { 648 gdt_descr.size = GDT_SIZE - 1;
660 ._pda = &boot_pda, 649 load_gdt(&gdt_descr);
661 .cpu_number = 0, 650 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
662 .pcurrent = &init_task,
663};
664
665static inline void set_kernel_fs(void)
666{
667 /* Set %fs for this CPU's PDA. Memory clobber is to create a
668 barrier with respect to any PDA operations, so the compiler
669 doesn't move any before here. */
670 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
671} 651}
672 652
673/* Initialize the CPU's GDT and PDA. The boot CPU does this for 653/*
674 itself, but secondaries find this done for them. */ 654 * cpu_init() initializes state that is per-CPU. Some data is already
675__cpuinit int init_gdt(int cpu, struct task_struct *idle) 655 * initialized (naturally) in the bootstrap process, such as the GDT
676{ 656 * and IDT. We reload them nevertheless, this function acts as a
677 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); 657 * 'CPU state barrier', nothing should get across.
678 struct desc_struct *gdt; 658 */
679 struct i386_pda *pda; 659void __cpuinit cpu_init(void)
680
681 /* For non-boot CPUs, the GDT and PDA should already have been
682 allocated. */
683 if (!alloc_gdt(cpu)) {
684 printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
685 return 0;
686 }
687
688 gdt = (struct desc_struct *)cpu_gdt_descr->address;
689 pda = cpu_pda(cpu);
690
691 BUG_ON(gdt == NULL || pda == NULL);
692
693 /*
694 * Initialize the per-CPU GDT with the boot GDT,
695 * and set up the GDT descriptor:
696 */
697 memcpy(gdt, cpu_gdt_table, GDT_SIZE);
698 cpu_gdt_descr->size = GDT_SIZE - 1;
699
700 pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
701 (u32 *)&gdt[GDT_ENTRY_PDA].b,
702 (unsigned long)pda, sizeof(*pda) - 1,
703 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
704
705 memset(pda, 0, sizeof(*pda));
706 pda->_pda = pda;
707 pda->cpu_number = cpu;
708 pda->pcurrent = idle;
709
710 return 1;
711}
712
713void __cpuinit cpu_set_gdt(int cpu)
714{
715 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
716
717 /* Reinit these anyway, even if they've already been done (on
718 the boot CPU, this will transition from the boot gdt+pda to
719 the real ones). */
720 load_gdt(cpu_gdt_descr);
721 set_kernel_fs();
722}
723
724/* Common CPU init for both boot and secondary CPUs */
725static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
726{ 660{
661 int cpu = smp_processor_id();
662 struct task_struct *curr = current;
727 struct tss_struct * t = &per_cpu(init_tss, cpu); 663 struct tss_struct * t = &per_cpu(init_tss, cpu);
728 struct thread_struct *thread = &curr->thread; 664 struct thread_struct *thread = &curr->thread;
729 665
@@ -744,6 +680,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
744 } 680 }
745 681
746 load_idt(&idt_descr); 682 load_idt(&idt_descr);
683 switch_to_new_gdt();
747 684
748 /* 685 /*
749 * Set up and load the per-CPU TSS and LDT 686 * Set up and load the per-CPU TSS and LDT
@@ -783,38 +720,6 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
783 mxcsr_feature_mask_init(); 720 mxcsr_feature_mask_init();
784} 721}
785 722
786/* Entrypoint to initialize secondary CPU */
787void __cpuinit secondary_cpu_init(void)
788{
789 int cpu = smp_processor_id();
790 struct task_struct *curr = current;
791
792 _cpu_init(cpu, curr);
793}
794
795/*
796 * cpu_init() initializes state that is per-CPU. Some data is already
797 * initialized (naturally) in the bootstrap process, such as the GDT
798 * and IDT. We reload them nevertheless, this function acts as a
799 * 'CPU state barrier', nothing should get across.
800 */
801void __cpuinit cpu_init(void)
802{
803 int cpu = smp_processor_id();
804 struct task_struct *curr = current;
805
806 /* Set up the real GDT and PDA, so we can transition from the
807 boot versions. */
808 if (!init_gdt(cpu, curr)) {
809 /* failed to allocate something; not much we can do... */
810 for (;;)
811 local_irq_enable();
812 }
813
814 cpu_set_gdt(cpu);
815 _cpu_init(cpu, curr);
816}
817
818#ifdef CONFIG_HOTPLUG_CPU 723#ifdef CONFIG_HOTPLUG_CPU
819void __cpuinit cpu_uninit(void) 724void __cpuinit cpu_uninit(void)
820{ 725{
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index de27bd07bc9c..0b8411a864fb 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -279,7 +279,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
279 */ 279 */
280 if (vendor == PCI_VENDOR_ID_CYRIX && 280 if (vendor == PCI_VENDOR_ID_CYRIX &&
281 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) 281 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
282 pit_latch_buggy = 1; 282 mark_tsc_unstable("cyrix 5510/5520 detected");
283 } 283 }
284#endif 284#endif
285 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ 285 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
@@ -448,16 +448,6 @@ int __init cyrix_init_cpu(void)
448 return 0; 448 return 0;
449} 449}
450 450
451//early_arch_initcall(cyrix_init_cpu);
452
453static int __init cyrix_exit_cpu(void)
454{
455 cpu_devs[X86_VENDOR_CYRIX] = NULL;
456 return 0;
457}
458
459late_initcall(cyrix_exit_cpu);
460
461static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 451static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
462 .c_vendor = "NSC", 452 .c_vendor = "NSC",
463 .c_ident = { "Geode by NSC" }, 453 .c_ident = { "Geode by NSC" },
@@ -470,12 +460,3 @@ int __init nsc_init_cpu(void)
470 return 0; 460 return 0;
471} 461}
472 462
473//early_arch_initcall(nsc_init_cpu);
474
475static int __init nsc_exit_cpu(void)
476{
477 cpu_devs[X86_VENDOR_NSC] = NULL;
478 return 0;
479}
480
481late_initcall(nsc_exit_cpu);
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 56fe26584957..dc4e08147b1f 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -188,8 +188,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
188 } 188 }
189#endif 189#endif
190 190
191 if (c->x86 == 15) 191 if (c->x86 == 15) {
192 set_bit(X86_FEATURE_P4, c->x86_capability); 192 set_bit(X86_FEATURE_P4, c->x86_capability);
193 set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
194 }
193 if (c->x86 == 6) 195 if (c->x86 == 6)
194 set_bit(X86_FEATURE_P3, c->x86_capability); 196 set_bit(X86_FEATURE_P3, c->x86_capability);
195 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 197 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index b0862af595aa..f9fa4142551e 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -75,6 +75,9 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
75 machine_check_vector = k7_machine_check; 75 machine_check_vector = k7_machine_check;
76 wmb(); 76 wmb();
77 77
78 if (!cpu_has(c, X86_FEATURE_MCE))
79 return;
80
78 printk (KERN_INFO "Intel machine check architecture supported.\n"); 81 printk (KERN_INFO "Intel machine check architecture supported.\n");
79 rdmsr (MSR_IA32_MCG_CAP, l, h); 82 rdmsr (MSR_IA32_MCG_CAP, l, h);
80 if (l & (1<<8)) /* Control register present ? */ 83 if (l & (1<<8)) /* Control register present ? */
@@ -82,9 +85,13 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
82 nr_mce_banks = l & 0xff; 85 nr_mce_banks = l & 0xff;
83 86
84 /* Clear status for MC index 0 separately, we don't touch CTL, 87 /* Clear status for MC index 0 separately, we don't touch CTL,
85 * as some Athlons cause spurious MCEs when its enabled. */ 88 * as some K7 Athlons cause spurious MCEs when its enabled. */
86 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); 89 if (boot_cpu_data.x86 == 6) {
87 for (i=1; i<nr_mce_banks; i++) { 90 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
91 i = 1;
92 } else
93 i = 0;
94 for (; i<nr_mce_banks; i++) {
88 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 95 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
89 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 96 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
90 } 97 }
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index 4f10c62d180c..56cd485b127c 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -38,8 +38,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
38 38
39 switch (c->x86_vendor) { 39 switch (c->x86_vendor) {
40 case X86_VENDOR_AMD: 40 case X86_VENDOR_AMD:
41 if (c->x86==6 || c->x86==15) 41 amd_mcheck_init(c);
42 amd_mcheck_init(c);
43 break; 42 break;
44 43
45 case X86_VENDOR_INTEL: 44 case X86_VENDOR_INTEL:
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 504434a46011..1509edfb2313 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -124,13 +124,10 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
124 124
125 125
126/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 126/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
127static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 127static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
128{ 128{
129 u32 h; 129 u32 h;
130 130
131 if (mce_num_extended_msrs == 0)
132 goto done;
133
134 rdmsr (MSR_IA32_MCG_EAX, r->eax, h); 131 rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
135 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); 132 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
136 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); 133 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
@@ -141,12 +138,6 @@ static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
141 rdmsr (MSR_IA32_MCG_ESP, r->esp, h); 138 rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
142 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); 139 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
143 rdmsr (MSR_IA32_MCG_EIP, r->eip, h); 140 rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
144
145 /* can we rely on kmalloc to do a dynamic
146 * allocation for the reserved registers?
147 */
148done:
149 return mce_num_extended_msrs;
150} 141}
151 142
152static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) 143static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
@@ -155,7 +146,6 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
155 u32 alow, ahigh, high, low; 146 u32 alow, ahigh, high, low;
156 u32 mcgstl, mcgsth; 147 u32 mcgstl, mcgsth;
157 int i; 148 int i;
158 struct intel_mce_extended_msrs dbg;
159 149
160 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 150 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
161 if (mcgstl & (1<<0)) /* Recoverable ? */ 151 if (mcgstl & (1<<0)) /* Recoverable ? */
@@ -164,7 +154,9 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
164 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 154 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
165 smp_processor_id(), mcgsth, mcgstl); 155 smp_processor_id(), mcgsth, mcgstl);
166 156
167 if (intel_get_extended_msrs(&dbg)) { 157 if (mce_num_extended_msrs > 0) {
158 struct intel_mce_extended_msrs dbg;
159 intel_get_extended_msrs(&dbg);
168 printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", 160 printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
169 smp_processor_id(), dbg.eip, dbg.eflags); 161 smp_processor_id(), dbg.eip, dbg.eflags);
170 printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", 162 printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f77fc53db654..5367e32e0403 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -20,13 +20,25 @@ struct mtrr_state {
20 mtrr_type def_type; 20 mtrr_type def_type;
21}; 21};
22 22
23struct fixed_range_block {
24 int base_msr; /* start address of an MTRR block */
25 int ranges; /* number of MTRRs in this block */
26};
27
28static struct fixed_range_block fixed_range_blocks[] = {
29 { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */
30 { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */
31 { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */
32 {}
33};
34
23static unsigned long smp_changes_mask; 35static unsigned long smp_changes_mask;
24static struct mtrr_state mtrr_state = {}; 36static struct mtrr_state mtrr_state = {};
25 37
26#undef MODULE_PARAM_PREFIX 38#undef MODULE_PARAM_PREFIX
27#define MODULE_PARAM_PREFIX "mtrr." 39#define MODULE_PARAM_PREFIX "mtrr."
28 40
29static __initdata int mtrr_show; 41static int mtrr_show;
30module_param_named(show, mtrr_show, bool, 0); 42module_param_named(show, mtrr_show, bool, 0);
31 43
32/* Get the MSR pair relating to a var range */ 44/* Get the MSR pair relating to a var range */
@@ -37,7 +49,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
37 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); 49 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
38} 50}
39 51
40static void __init 52static void
41get_fixed_ranges(mtrr_type * frs) 53get_fixed_ranges(mtrr_type * frs)
42{ 54{
43 unsigned int *p = (unsigned int *) frs; 55 unsigned int *p = (unsigned int *) frs;
@@ -51,12 +63,18 @@ get_fixed_ranges(mtrr_type * frs)
51 rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); 63 rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
52} 64}
53 65
54static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types) 66void mtrr_save_fixed_ranges(void *info)
67{
68 get_fixed_ranges(mtrr_state.fixed_ranges);
69}
70
71static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types)
55{ 72{
56 unsigned i; 73 unsigned i;
57 74
58 for (i = 0; i < 8; ++i, ++types, base += step) 75 for (i = 0; i < 8; ++i, ++types, base += step)
59 printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types)); 76 printk(KERN_INFO "MTRR %05X-%05X %s\n",
77 base, base + step - 1, mtrr_attrib_to_str(*types));
60} 78}
61 79
62/* Grab all of the MTRR state for this CPU into *state */ 80/* Grab all of the MTRR state for this CPU into *state */
@@ -147,6 +165,44 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
147 smp_processor_id(), msr, a, b); 165 smp_processor_id(), msr, a, b);
148} 166}
149 167
168/**
169 * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
170 * see AMD publication no. 24593, chapter 3.2.1 for more information
171 */
172static inline void k8_enable_fixed_iorrs(void)
173{
174 unsigned lo, hi;
175
176 rdmsr(MSR_K8_SYSCFG, lo, hi);
177 mtrr_wrmsr(MSR_K8_SYSCFG, lo
178 | K8_MTRRFIXRANGE_DRAM_ENABLE
179 | K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
180}
181
182/**
183 * Checks and updates an fixed-range MTRR if it differs from the value it
184 * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
185 * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
186 * \param msr MSR address of the MTTR which should be checked and updated
187 * \param changed pointer which indicates whether the MTRR needed to be changed
188 * \param msrwords pointer to the MSR values which the MSR should have
189 */
190static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
191{
192 unsigned lo, hi;
193
194 rdmsr(msr, lo, hi);
195
196 if (lo != msrwords[0] || hi != msrwords[1]) {
197 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
198 boot_cpu_data.x86 == 15 &&
199 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
200 k8_enable_fixed_iorrs();
201 mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
202 *changed = TRUE;
203 }
204}
205
150int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) 206int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
151/* [SUMMARY] Get a free MTRR. 207/* [SUMMARY] Get a free MTRR.
152 <base> The starting (base) address of the region. 208 <base> The starting (base) address of the region.
@@ -196,36 +252,21 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
196 *type = base_lo & 0xff; 252 *type = base_lo & 0xff;
197} 253}
198 254
255/**
256 * Checks and updates the fixed-range MTRRs if they differ from the saved set
257 * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
258 */
199static int set_fixed_ranges(mtrr_type * frs) 259static int set_fixed_ranges(mtrr_type * frs)
200{ 260{
201 unsigned int *p = (unsigned int *) frs; 261 unsigned long long *saved = (unsigned long long *) frs;
202 int changed = FALSE; 262 int changed = FALSE;
203 int i; 263 int block=-1, range;
204 unsigned int lo, hi;
205 264
206 rdmsr(MTRRfix64K_00000_MSR, lo, hi); 265 while (fixed_range_blocks[++block].ranges)
207 if (p[0] != lo || p[1] != hi) { 266 for (range=0; range < fixed_range_blocks[block].ranges; range++)
208 mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); 267 set_fixed_range(fixed_range_blocks[block].base_msr + range,
209 changed = TRUE; 268 &changed, (unsigned int *) saved++);
210 }
211 269
212 for (i = 0; i < 2; i++) {
213 rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
214 if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
215 mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
216 p[3 + i * 2]);
217 changed = TRUE;
218 }
219 }
220
221 for (i = 0; i < 8; i++) {
222 rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
223 if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
224 mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
225 p[7 + i * 2]);
226 changed = TRUE;
227 }
228 }
229 return changed; 270 return changed;
230} 271}
231 272
@@ -428,7 +469,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
428 } 469 }
429 } 470 }
430 471
431 if (base + size < 0x100) { 472 if (base < 0x100) {
432 printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", 473 printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
433 base, size); 474 base, size);
434 return -EINVAL; 475 return -EINVAL;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 0acfb6a5a220..02a2f39e5e0a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -729,6 +729,17 @@ void mtrr_ap_init(void)
729 local_irq_restore(flags); 729 local_irq_restore(flags);
730} 730}
731 731
732/**
733 * Save current fixed-range MTRR state of the BSP
734 */
735void mtrr_save_state(void)
736{
737 if (smp_processor_id() == 0)
738 mtrr_save_fixed_ranges(NULL);
739 else
740 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
741}
742
732static int __init mtrr_init_finialize(void) 743static int __init mtrr_init_finialize(void)
733{ 744{
734 if (!mtrr_if) 745 if (!mtrr_if)
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
index 8bf23cc80c63..961fbe1a748f 100644
--- a/arch/i386/kernel/cpu/nexgen.c
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -58,13 +58,3 @@ int __init nexgen_init_cpu(void)
58 cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; 58 cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
59 return 0; 59 return 0;
60} 60}
61
62//early_arch_initcall(nexgen_init_cpu);
63
64static int __init nexgen_exit_cpu(void)
65{
66 cpu_devs[X86_VENDOR_NEXGEN] = NULL;
67 return 0;
68}
69
70late_initcall(nexgen_exit_cpu);
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
new file mode 100644
index 000000000000..2b04c8f1db62
--- /dev/null
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -0,0 +1,658 @@
1/* local apic based NMI watchdog for various CPUs.
2 This file also handles reservation of performance counters for coordination
3 with other users (like oprofile).
4
5 Note that these events normally don't tick when the CPU idles. This means
6 the frequency varies with CPU load.
7
8 Original code for K7/P6 written by Keith Owens */
9
10#include <linux/percpu.h>
11#include <linux/module.h>
12#include <linux/kernel.h>
13#include <linux/bitops.h>
14#include <linux/smp.h>
15#include <linux/nmi.h>
16#include <asm/apic.h>
17#include <asm/intel_arch_perfmon.h>
18
19struct nmi_watchdog_ctlblk {
20 unsigned int cccr_msr;
21 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
22 unsigned int evntsel_msr; /* the MSR to select the events to handle */
23};
24
25/* Interface defining a CPU specific perfctr watchdog */
26struct wd_ops {
27 int (*reserve)(void);
28 void (*unreserve)(void);
29 int (*setup)(unsigned nmi_hz);
30 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
31 void (*stop)(void *);
32 unsigned perfctr;
33 unsigned evntsel;
34 u64 checkbit;
35};
36
37static struct wd_ops *wd_ops;
38
39/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
40 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
41 */
42#define NMI_MAX_COUNTER_BITS 66
43
44/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
45 * evtsel_nmi_owner tracks the ownership of the event selection
46 * - different performance counters/ event selection may be reserved for
47 * different subsystems this reservation system just tries to coordinate
48 * things a little
49 */
50static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
51static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
52
53static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
54
55/* converts an msr to an appropriate reservation bit */
56static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
57{
58 return wd_ops ? msr - wd_ops->perfctr : 0;
59}
60
61/* converts an msr to an appropriate reservation bit */
62/* returns the bit offset of the event selection register */
63static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
64{
65 return wd_ops ? msr - wd_ops->evntsel : 0;
66}
67
68/* checks for a bit availability (hack for oprofile) */
69int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
70{
71 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
72
73 return (!test_bit(counter, perfctr_nmi_owner));
74}
75
76/* checks the an msr for availability */
77int avail_to_resrv_perfctr_nmi(unsigned int msr)
78{
79 unsigned int counter;
80
81 counter = nmi_perfctr_msr_to_bit(msr);
82 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
83
84 return (!test_bit(counter, perfctr_nmi_owner));
85}
86
87int reserve_perfctr_nmi(unsigned int msr)
88{
89 unsigned int counter;
90
91 counter = nmi_perfctr_msr_to_bit(msr);
92 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
93
94 if (!test_and_set_bit(counter, perfctr_nmi_owner))
95 return 1;
96 return 0;
97}
98
99void release_perfctr_nmi(unsigned int msr)
100{
101 unsigned int counter;
102
103 counter = nmi_perfctr_msr_to_bit(msr);
104 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
105
106 clear_bit(counter, perfctr_nmi_owner);
107}
108
109int reserve_evntsel_nmi(unsigned int msr)
110{
111 unsigned int counter;
112
113 counter = nmi_evntsel_msr_to_bit(msr);
114 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115
116 if (!test_and_set_bit(counter, evntsel_nmi_owner))
117 return 1;
118 return 0;
119}
120
121void release_evntsel_nmi(unsigned int msr)
122{
123 unsigned int counter;
124
125 counter = nmi_evntsel_msr_to_bit(msr);
126 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
127
128 clear_bit(counter, evntsel_nmi_owner);
129}
130
131EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
132EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
133EXPORT_SYMBOL(reserve_perfctr_nmi);
134EXPORT_SYMBOL(release_perfctr_nmi);
135EXPORT_SYMBOL(reserve_evntsel_nmi);
136EXPORT_SYMBOL(release_evntsel_nmi);
137
138void disable_lapic_nmi_watchdog(void)
139{
140 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
141
142 if (atomic_read(&nmi_active) <= 0)
143 return;
144
145 on_each_cpu(wd_ops->stop, NULL, 0, 1);
146 wd_ops->unreserve();
147
148 BUG_ON(atomic_read(&nmi_active) != 0);
149}
150
151void enable_lapic_nmi_watchdog(void)
152{
153 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
154
155 /* are we already enabled */
156 if (atomic_read(&nmi_active) != 0)
157 return;
158
159 /* are we lapic aware */
160 if (!wd_ops)
161 return;
162 if (!wd_ops->reserve()) {
163 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
164 return;
165 }
166
167 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
168 touch_nmi_watchdog();
169}
170
171/*
172 * Activate the NMI watchdog via the local APIC.
173 */
174
175static unsigned int adjust_for_32bit_ctr(unsigned int hz)
176{
177 u64 counter_val;
178 unsigned int retval = hz;
179
180 /*
181 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
182 * are writable, with higher bits sign extending from bit 31.
183 * So, we can only program the counter with 31 bit values and
184 * 32nd bit should be 1, for 33.. to be 1.
185 * Find the appropriate nmi_hz
186 */
187 counter_val = (u64)cpu_khz * 1000;
188 do_div(counter_val, retval);
189 if (counter_val > 0x7fffffffULL) {
190 u64 count = (u64)cpu_khz * 1000;
191 do_div(count, 0x7fffffffUL);
192 retval = count + 1;
193 }
194 return retval;
195}
196
197static void
198write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
199{
200 u64 count = (u64)cpu_khz * 1000;
201
202 do_div(count, nmi_hz);
203 if(descr)
204 Dprintk("setting %s to -0x%08Lx\n", descr, count);
205 wrmsrl(perfctr_msr, 0 - count);
206}
207
208static void write_watchdog_counter32(unsigned int perfctr_msr,
209 const char *descr, unsigned nmi_hz)
210{
211 u64 count = (u64)cpu_khz * 1000;
212
213 do_div(count, nmi_hz);
214 if(descr)
215 Dprintk("setting %s to -0x%08Lx\n", descr, count);
216 wrmsr(perfctr_msr, (u32)(-count), 0);
217}
218
219/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
220 nicely stable so there is not much variety */
221
222#define K7_EVNTSEL_ENABLE (1 << 22)
223#define K7_EVNTSEL_INT (1 << 20)
224#define K7_EVNTSEL_OS (1 << 17)
225#define K7_EVNTSEL_USR (1 << 16)
226#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
227#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
228
229static int setup_k7_watchdog(unsigned nmi_hz)
230{
231 unsigned int perfctr_msr, evntsel_msr;
232 unsigned int evntsel;
233 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
234
235 perfctr_msr = MSR_K7_PERFCTR0;
236 evntsel_msr = MSR_K7_EVNTSEL0;
237
238 wrmsrl(perfctr_msr, 0UL);
239
240 evntsel = K7_EVNTSEL_INT
241 | K7_EVNTSEL_OS
242 | K7_EVNTSEL_USR
243 | K7_NMI_EVENT;
244
245 /* setup the timer */
246 wrmsr(evntsel_msr, evntsel, 0);
247 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
248 apic_write(APIC_LVTPC, APIC_DM_NMI);
249 evntsel |= K7_EVNTSEL_ENABLE;
250 wrmsr(evntsel_msr, evntsel, 0);
251
252 wd->perfctr_msr = perfctr_msr;
253 wd->evntsel_msr = evntsel_msr;
254 wd->cccr_msr = 0; //unused
255 return 1;
256}
257
258static void single_msr_stop_watchdog(void *arg)
259{
260 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
261
262 wrmsr(wd->evntsel_msr, 0, 0);
263}
264
265static int single_msr_reserve(void)
266{
267 if (!reserve_perfctr_nmi(wd_ops->perfctr))
268 return 0;
269
270 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
271 release_perfctr_nmi(wd_ops->perfctr);
272 return 0;
273 }
274 return 1;
275}
276
277static void single_msr_unreserve(void)
278{
279 release_evntsel_nmi(wd_ops->perfctr);
280 release_perfctr_nmi(wd_ops->evntsel);
281}
282
283static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
284{
285 /* start the cycle over again */
286 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
287}
288
289static struct wd_ops k7_wd_ops = {
290 .reserve = single_msr_reserve,
291 .unreserve = single_msr_unreserve,
292 .setup = setup_k7_watchdog,
293 .rearm = single_msr_rearm,
294 .stop = single_msr_stop_watchdog,
295 .perfctr = MSR_K7_PERFCTR0,
296 .evntsel = MSR_K7_EVNTSEL0,
297 .checkbit = 1ULL<<63,
298};
299
300/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
301
302#define P6_EVNTSEL0_ENABLE (1 << 22)
303#define P6_EVNTSEL_INT (1 << 20)
304#define P6_EVNTSEL_OS (1 << 17)
305#define P6_EVNTSEL_USR (1 << 16)
306#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
307#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
308
309static int setup_p6_watchdog(unsigned nmi_hz)
310{
311 unsigned int perfctr_msr, evntsel_msr;
312 unsigned int evntsel;
313 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
314
315 perfctr_msr = MSR_P6_PERFCTR0;
316 evntsel_msr = MSR_P6_EVNTSEL0;
317
318 wrmsrl(perfctr_msr, 0UL);
319
320 evntsel = P6_EVNTSEL_INT
321 | P6_EVNTSEL_OS
322 | P6_EVNTSEL_USR
323 | P6_NMI_EVENT;
324
325 /* setup the timer */
326 wrmsr(evntsel_msr, evntsel, 0);
327 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
328 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
329 apic_write(APIC_LVTPC, APIC_DM_NMI);
330 evntsel |= P6_EVNTSEL0_ENABLE;
331 wrmsr(evntsel_msr, evntsel, 0);
332
333 wd->perfctr_msr = perfctr_msr;
334 wd->evntsel_msr = evntsel_msr;
335 wd->cccr_msr = 0; //unused
336 return 1;
337}
338
339static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
340{
341 /* P6 based Pentium M need to re-unmask
342 * the apic vector but it doesn't hurt
343 * other P6 variant.
344 * ArchPerfom/Core Duo also needs this */
345 apic_write(APIC_LVTPC, APIC_DM_NMI);
346 /* P6/ARCH_PERFMON has 32 bit counter write */
347 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
348}
349
350static struct wd_ops p6_wd_ops = {
351 .reserve = single_msr_reserve,
352 .unreserve = single_msr_unreserve,
353 .setup = setup_p6_watchdog,
354 .rearm = p6_rearm,
355 .stop = single_msr_stop_watchdog,
356 .perfctr = MSR_P6_PERFCTR0,
357 .evntsel = MSR_P6_EVNTSEL0,
358 .checkbit = 1ULL<<39,
359};
360
361/* Intel P4 performance counters. By far the most complicated of all. */
362
363#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
364#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
365#define P4_ESCR_OS (1<<3)
366#define P4_ESCR_USR (1<<2)
367#define P4_CCCR_OVF_PMI0 (1<<26)
368#define P4_CCCR_OVF_PMI1 (1<<27)
369#define P4_CCCR_THRESHOLD(N) ((N)<<20)
370#define P4_CCCR_COMPLEMENT (1<<19)
371#define P4_CCCR_COMPARE (1<<18)
372#define P4_CCCR_REQUIRED (3<<16)
373#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
374#define P4_CCCR_ENABLE (1<<12)
375#define P4_CCCR_OVF (1<<31)
376
377/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
378 CRU_ESCR0 (with any non-null event selector) through a complemented
379 max threshold. [IA32-Vol3, Section 14.9.9] */
380
381static int setup_p4_watchdog(unsigned nmi_hz)
382{
383 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
384 unsigned int evntsel, cccr_val;
385 unsigned int misc_enable, dummy;
386 unsigned int ht_num;
387 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
388
389 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
390 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
391 return 0;
392
393#ifdef CONFIG_SMP
394 /* detect which hyperthread we are on */
395 if (smp_num_siblings == 2) {
396 unsigned int ebx, apicid;
397
398 ebx = cpuid_ebx(1);
399 apicid = (ebx >> 24) & 0xff;
400 ht_num = apicid & 1;
401 } else
402#endif
403 ht_num = 0;
404
405 /* performance counters are shared resources
406 * assign each hyperthread its own set
407 * (re-use the ESCR0 register, seems safe
408 * and keeps the cccr_val the same)
409 */
410 if (!ht_num) {
411 /* logical cpu 0 */
412 perfctr_msr = MSR_P4_IQ_PERFCTR0;
413 evntsel_msr = MSR_P4_CRU_ESCR0;
414 cccr_msr = MSR_P4_IQ_CCCR0;
415 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
416 } else {
417 /* logical cpu 1 */
418 perfctr_msr = MSR_P4_IQ_PERFCTR1;
419 evntsel_msr = MSR_P4_CRU_ESCR0;
420 cccr_msr = MSR_P4_IQ_CCCR1;
421 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
422 }
423
424 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
425 | P4_ESCR_OS
426 | P4_ESCR_USR;
427
428 cccr_val |= P4_CCCR_THRESHOLD(15)
429 | P4_CCCR_COMPLEMENT
430 | P4_CCCR_COMPARE
431 | P4_CCCR_REQUIRED;
432
433 wrmsr(evntsel_msr, evntsel, 0);
434 wrmsr(cccr_msr, cccr_val, 0);
435 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
436 apic_write(APIC_LVTPC, APIC_DM_NMI);
437 cccr_val |= P4_CCCR_ENABLE;
438 wrmsr(cccr_msr, cccr_val, 0);
439 wd->perfctr_msr = perfctr_msr;
440 wd->evntsel_msr = evntsel_msr;
441 wd->cccr_msr = cccr_msr;
442 return 1;
443}
444
445static void stop_p4_watchdog(void *arg)
446{
447 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
448 wrmsr(wd->cccr_msr, 0, 0);
449 wrmsr(wd->evntsel_msr, 0, 0);
450}
451
452static int p4_reserve(void)
453{
454 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
455 return 0;
456#ifdef CONFIG_SMP
457 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
458 goto fail1;
459#endif
460 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
461 goto fail2;
462 /* RED-PEN why is ESCR1 not reserved here? */
463 return 1;
464 fail2:
465#ifdef CONFIG_SMP
466 if (smp_num_siblings > 1)
467 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
468 fail1:
469#endif
470 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
471 return 0;
472}
473
474static void p4_unreserve(void)
475{
476#ifdef CONFIG_SMP
477 if (smp_num_siblings > 1)
478 release_evntsel_nmi(MSR_P4_IQ_PERFCTR1);
479#endif
480 release_evntsel_nmi(MSR_P4_IQ_PERFCTR0);
481 release_perfctr_nmi(MSR_P4_CRU_ESCR0);
482}
483
484static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
485{
486 unsigned dummy;
487 /*
488 * P4 quirks:
489 * - An overflown perfctr will assert its interrupt
490 * until the OVF flag in its CCCR is cleared.
491 * - LVTPC is masked on interrupt and must be
492 * unmasked by the LVTPC handler.
493 */
494 rdmsrl(wd->cccr_msr, dummy);
495 dummy &= ~P4_CCCR_OVF;
496 wrmsrl(wd->cccr_msr, dummy);
497 apic_write(APIC_LVTPC, APIC_DM_NMI);
498 /* start the cycle over again */
499 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
500}
501
502static struct wd_ops p4_wd_ops = {
503 .reserve = p4_reserve,
504 .unreserve = p4_unreserve,
505 .setup = setup_p4_watchdog,
506 .rearm = p4_rearm,
507 .stop = stop_p4_watchdog,
508 /* RED-PEN this is wrong for the other sibling */
509 .perfctr = MSR_P4_BPU_PERFCTR0,
510 .evntsel = MSR_P4_BSU_ESCR0,
511 .checkbit = 1ULL<<39,
512};
513
514/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
515 all future Intel CPUs. */
516
517#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
518#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
519
520static int setup_intel_arch_watchdog(unsigned nmi_hz)
521{
522 unsigned int ebx;
523 union cpuid10_eax eax;
524 unsigned int unused;
525 unsigned int perfctr_msr, evntsel_msr;
526 unsigned int evntsel;
527 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
528
529 /*
530 * Check whether the Architectural PerfMon supports
531 * Unhalted Core Cycles Event or not.
532 * NOTE: Corresponding bit = 0 in ebx indicates event present.
533 */
534 cpuid(10, &(eax.full), &ebx, &unused, &unused);
535 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
536 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
537 return 0;
538
539 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
540 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1;
541
542 wrmsrl(perfctr_msr, 0UL);
543
544 evntsel = ARCH_PERFMON_EVENTSEL_INT
545 | ARCH_PERFMON_EVENTSEL_OS
546 | ARCH_PERFMON_EVENTSEL_USR
547 | ARCH_PERFMON_NMI_EVENT_SEL
548 | ARCH_PERFMON_NMI_EVENT_UMASK;
549
550 /* setup the timer */
551 wrmsr(evntsel_msr, evntsel, 0);
552 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
553 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
554 apic_write(APIC_LVTPC, APIC_DM_NMI);
555 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
556 wrmsr(evntsel_msr, evntsel, 0);
557
558 wd->perfctr_msr = perfctr_msr;
559 wd->evntsel_msr = evntsel_msr;
560 wd->cccr_msr = 0; //unused
561 wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1);
562 return 1;
563}
564
565static struct wd_ops intel_arch_wd_ops = {
566 .reserve = single_msr_reserve,
567 .unreserve = single_msr_unreserve,
568 .setup = setup_intel_arch_watchdog,
569 .rearm = p6_rearm,
570 .stop = single_msr_stop_watchdog,
571 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
572 .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
573};
574
575static void probe_nmi_watchdog(void)
576{
577 switch (boot_cpu_data.x86_vendor) {
578 case X86_VENDOR_AMD:
579 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
580 boot_cpu_data.x86 != 16)
581 return;
582 wd_ops = &k7_wd_ops;
583 break;
584 case X86_VENDOR_INTEL:
585 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
586 wd_ops = &intel_arch_wd_ops;
587 break;
588 }
589 switch (boot_cpu_data.x86) {
590 case 6:
591 if (boot_cpu_data.x86_model > 0xd)
592 return;
593
594 wd_ops = &p6_wd_ops;
595 break;
596 case 15:
597 if (boot_cpu_data.x86_model > 0x4)
598 return;
599
600 wd_ops = &p4_wd_ops;
601 break;
602 default:
603 return;
604 }
605 break;
606 }
607}
608
609/* Interface to nmi.c */
610
611int lapic_watchdog_init(unsigned nmi_hz)
612{
613 if (!wd_ops) {
614 probe_nmi_watchdog();
615 if (!wd_ops)
616 return -1;
617 }
618
619 if (!(wd_ops->setup(nmi_hz))) {
620 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
621 raw_smp_processor_id());
622 return -1;
623 }
624
625 return 0;
626}
627
628void lapic_watchdog_stop(void)
629{
630 if (wd_ops)
631 wd_ops->stop(NULL);
632}
633
634unsigned lapic_adjust_nmi_hz(unsigned hz)
635{
636 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
637 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
638 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
639 hz = adjust_for_32bit_ctr(hz);
640 return hz;
641}
642
643int lapic_wd_event(unsigned nmi_hz)
644{
645 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
646 u64 ctr;
647 rdmsrl(wd->perfctr_msr, ctr);
648 if (ctr & wd_ops->checkbit) { /* perfctr still running? */
649 return 0;
650 }
651 wd_ops->rearm(wd, nmi_hz);
652 return 1;
653}
654
655int lapic_watchdog_ok(void)
656{
657 return wd_ops != NULL;
658}
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 47e3ebbfb28d..89d91e6cc972 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -72,8 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
72 "stc", 72 "stc",
73 "100mhzsteps", 73 "100mhzsteps",
74 "hwpstate", 74 "hwpstate",
75 NULL, 75 "", /* constant_tsc - moved to flags */
76 NULL, /* constant_tsc - moved to flags */
77 /* nothing */ 76 /* nothing */
78 }; 77 };
79 struct cpuinfo_x86 *c = v; 78 struct cpuinfo_x86 *c = v;
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
index 9317f7414989..50076f22e90f 100644
--- a/arch/i386/kernel/cpu/rise.c
+++ b/arch/i386/kernel/cpu/rise.c
@@ -50,12 +50,3 @@ int __init rise_init_cpu(void)
50 return 0; 50 return 0;
51} 51}
52 52
53//early_arch_initcall(rise_init_cpu);
54
55static int __init rise_exit_cpu(void)
56{
57 cpu_devs[X86_VENDOR_RISE] = NULL;
58 return 0;
59}
60
61late_initcall(rise_exit_cpu);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 5678d46863c6..6471a5a13202 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -112,13 +112,3 @@ int __init transmeta_init_cpu(void)
112 cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; 112 cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
113 return 0; 113 return 0;
114} 114}
115
116//early_arch_initcall(transmeta_init_cpu);
117
118static int __init transmeta_exit_cpu(void)
119{
120 cpu_devs[X86_VENDOR_TRANSMETA] = NULL;
121 return 0;
122}
123
124late_initcall(transmeta_exit_cpu);
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
index 1bf3f87e9c5b..a7a4e75bdcd7 100644
--- a/arch/i386/kernel/cpu/umc.c
+++ b/arch/i386/kernel/cpu/umc.c
@@ -24,13 +24,3 @@ int __init umc_init_cpu(void)
24 cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; 24 cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
25 return 0; 25 return 0;
26} 26}
27
28//early_arch_initcall(umc_init_cpu);
29
30static int __init umc_exit_cpu(void)
31{
32 cpu_devs[X86_VENDOR_UMC] = NULL;
33 return 0;
34}
35
36late_initcall(umc_exit_cpu);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index b4d14c2eb345..265c5597efb0 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -33,7 +33,7 @@ static void doublefault_fn(void)
33 printk("double fault, tss at %08lx\n", tss); 33 printk("double fault, tss at %08lx\n", tss);
34 34
35 if (ptr_ok(tss)) { 35 if (ptr_ok(tss)) {
36 struct tss_struct *t = (struct tss_struct *)tss; 36 struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
37 37
38 printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); 38 printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
39 39
@@ -49,18 +49,21 @@ static void doublefault_fn(void)
49} 49}
50 50
51struct tss_struct doublefault_tss __cacheline_aligned = { 51struct tss_struct doublefault_tss __cacheline_aligned = {
52 .esp0 = STACK_START, 52 .x86_tss = {
53 .ss0 = __KERNEL_DS, 53 .esp0 = STACK_START,
54 .ldt = 0, 54 .ss0 = __KERNEL_DS,
55 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 55 .ldt = 0,
56 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
56 57
57 .eip = (unsigned long) doublefault_fn, 58 .eip = (unsigned long) doublefault_fn,
58 .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ 59 /* 0x2 bit is always set */
59 .esp = STACK_START, 60 .eflags = X86_EFLAGS_SF | 0x2,
60 .es = __USER_DS, 61 .esp = STACK_START,
61 .cs = __KERNEL_CS, 62 .es = __USER_DS,
62 .ss = __KERNEL_DS, 63 .cs = __KERNEL_CS,
63 .ds = __USER_DS, 64 .ss = __KERNEL_DS,
65 .ds = __USER_DS,
64 66
65 .__cr3 = __pa(swapper_pg_dir) 67 .__cr3 = __pa(swapper_pg_dir)
68 }
66}; 69};
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index 70f39560846a..9645bb51f76a 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -161,26 +161,27 @@ static struct resource standard_io_resources[] = { {
161 161
162static int __init romsignature(const unsigned char *rom) 162static int __init romsignature(const unsigned char *rom)
163{ 163{
164 const unsigned short * const ptr = (const unsigned short *)rom;
164 unsigned short sig; 165 unsigned short sig;
165 166
166 return probe_kernel_address((const unsigned short *)rom, sig) == 0 && 167 return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
167 sig == ROMSIGNATURE;
168} 168}
169 169
170static int __init romchecksum(unsigned char *rom, unsigned long length) 170static int __init romchecksum(const unsigned char *rom, unsigned long length)
171{ 171{
172 unsigned char sum; 172 unsigned char sum, c;
173 173
174 for (sum = 0; length; length--) 174 for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
175 sum += *rom++; 175 sum += c;
176 return sum == 0; 176 return !length && !sum;
177} 177}
178 178
179static void __init probe_roms(void) 179static void __init probe_roms(void)
180{ 180{
181 const unsigned char *rom;
181 unsigned long start, length, upper; 182 unsigned long start, length, upper;
182 unsigned char *rom; 183 unsigned char c;
183 int i; 184 int i;
184 185
185 /* video rom */ 186 /* video rom */
186 upper = adapter_rom_resources[0].start; 187 upper = adapter_rom_resources[0].start;
@@ -191,8 +192,11 @@ static void __init probe_roms(void)
191 192
192 video_rom_resource.start = start; 193 video_rom_resource.start = start;
193 194
195 if (probe_kernel_address(rom + 2, c) != 0)
196 continue;
197
194 /* 0 < length <= 0x7f * 512, historically */ 198 /* 0 < length <= 0x7f * 512, historically */
195 length = rom[2] * 512; 199 length = c * 512;
196 200
197 /* if checksum okay, trust length byte */ 201 /* if checksum okay, trust length byte */
198 if (length && romchecksum(rom, length)) 202 if (length && romchecksum(rom, length))
@@ -226,8 +230,11 @@ static void __init probe_roms(void)
226 if (!romsignature(rom)) 230 if (!romsignature(rom))
227 continue; 231 continue;
228 232
233 if (probe_kernel_address(rom + 2, c) != 0)
234 continue;
235
229 /* 0 < length <= 0x7f * 512, historically */ 236 /* 0 < length <= 0x7f * 512, historically */
230 length = rom[2] * 512; 237 length = c * 512;
231 238
232 /* but accept any length that fits if checksum okay */ 239 /* but accept any length that fits if checksum okay */
233 if (!length || start + length > upper || !romchecksum(rom, length)) 240 if (!length || start + length > upper || !romchecksum(rom, length))
@@ -386,10 +393,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
386 ____________________33__ 393 ____________________33__
387 ______________________4_ 394 ______________________4_
388 */ 395 */
389 printk("sanitize start\n");
390 /* if there's only one memory region, don't bother */ 396 /* if there's only one memory region, don't bother */
391 if (*pnr_map < 2) { 397 if (*pnr_map < 2) {
392 printk("sanitize bail 0\n");
393 return -1; 398 return -1;
394 } 399 }
395 400
@@ -398,7 +403,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
398 /* bail out if we find any unreasonable addresses in bios map */ 403 /* bail out if we find any unreasonable addresses in bios map */
399 for (i=0; i<old_nr; i++) 404 for (i=0; i<old_nr; i++)
400 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { 405 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
401 printk("sanitize bail 1\n");
402 return -1; 406 return -1;
403 } 407 }
404 408
@@ -494,7 +498,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
494 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); 498 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
495 *pnr_map = new_nr; 499 *pnr_map = new_nr;
496 500
497 printk("sanitize end\n");
498 return 0; 501 return 0;
499} 502}
500 503
@@ -525,7 +528,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
525 unsigned long long size = biosmap->size; 528 unsigned long long size = biosmap->size;
526 unsigned long long end = start + size; 529 unsigned long long end = start + size;
527 unsigned long type = biosmap->type; 530 unsigned long type = biosmap->type;
528 printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
529 531
530 /* Overflow in 64 bits? Ignore the memory map. */ 532 /* Overflow in 64 bits? Ignore the memory map. */
531 if (start > end) 533 if (start > end)
@@ -536,17 +538,11 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
536 * Not right. Fix it up. 538 * Not right. Fix it up.
537 */ 539 */
538 if (type == E820_RAM) { 540 if (type == E820_RAM) {
539 printk("copy_e820_map() type is E820_RAM\n");
540 if (start < 0x100000ULL && end > 0xA0000ULL) { 541 if (start < 0x100000ULL && end > 0xA0000ULL) {
541 printk("copy_e820_map() lies in range...\n"); 542 if (start < 0xA0000ULL)
542 if (start < 0xA0000ULL) {
543 printk("copy_e820_map() start < 0xA0000ULL\n");
544 add_memory_region(start, 0xA0000ULL-start, type); 543 add_memory_region(start, 0xA0000ULL-start, type);
545 } 544 if (end <= 0x100000ULL)
546 if (end <= 0x100000ULL) {
547 printk("copy_e820_map() end <= 0x100000ULL\n");
548 continue; 545 continue;
549 }
550 start = 0x100000ULL; 546 start = 0x100000ULL;
551 size = end - start; 547 size = end - start;
552 } 548 }
@@ -818,6 +814,26 @@ void __init limit_regions(unsigned long long size)
818 print_memory_map("limit_regions endfunc"); 814 print_memory_map("limit_regions endfunc");
819} 815}
820 816
817/*
818 * This function checks if any part of the range <start,end> is mapped
819 * with type.
820 */
821int
822e820_any_mapped(u64 start, u64 end, unsigned type)
823{
824 int i;
825 for (i = 0; i < e820.nr_map; i++) {
826 const struct e820entry *ei = &e820.map[i];
827 if (type && ei->type != type)
828 continue;
829 if (ei->addr >= end || ei->addr + ei->size <= start)
830 continue;
831 return 1;
832 }
833 return 0;
834}
835EXPORT_SYMBOL_GPL(e820_any_mapped);
836
821 /* 837 /*
822 * This function checks if the entire range <start,end> is mapped with type. 838 * This function checks if the entire range <start,end> is mapped with type.
823 * 839 *
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 8f9c624ace6f..dd9e7faafa7c 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -69,13 +69,11 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
69{ 69{
70 unsigned long cr4; 70 unsigned long cr4;
71 unsigned long temp; 71 unsigned long temp;
72 struct Xgt_desc_struct *cpu_gdt_descr; 72 struct Xgt_desc_struct gdt_descr;
73 73
74 spin_lock(&efi_rt_lock); 74 spin_lock(&efi_rt_lock);
75 local_irq_save(efi_rt_eflags); 75 local_irq_save(efi_rt_eflags);
76 76
77 cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
78
79 /* 77 /*
80 * If I don't have PSE, I should just duplicate two entries in page 78 * If I don't have PSE, I should just duplicate two entries in page
81 * directory. If I have PSE, I just need to duplicate one entry in 79 * directory. If I have PSE, I just need to duplicate one entry in
@@ -105,17 +103,19 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
105 */ 103 */
106 local_flush_tlb(); 104 local_flush_tlb();
107 105
108 cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); 106 gdt_descr.address = __pa(get_cpu_gdt_table(0));
109 load_gdt(cpu_gdt_descr); 107 gdt_descr.size = GDT_SIZE - 1;
108 load_gdt(&gdt_descr);
110} 109}
111 110
112static void efi_call_phys_epilog(void) __releases(efi_rt_lock) 111static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
113{ 112{
114 unsigned long cr4; 113 unsigned long cr4;
115 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); 114 struct Xgt_desc_struct gdt_descr;
116 115
117 cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); 116 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
118 load_gdt(cpu_gdt_descr); 117 gdt_descr.size = GDT_SIZE - 1;
118 load_gdt(&gdt_descr);
119 119
120 cr4 = read_cr4(); 120 cr4 = read_cr4();
121 121
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 18bddcb8e9e8..b1f16ee65e4d 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -15,7 +15,7 @@
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster 15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
16 * on a 486. 16 * on a 486.
17 * 17 *
18 * Stack layout in 'ret_from_system_call': 18 * Stack layout in 'syscall_exit':
19 * ptrace needs to have all regs on the stack. 19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be 20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal, 21 * updated in fork.c:copy_process, signal.c:do_signal,
@@ -132,7 +132,7 @@ VM_MASK = 0x00020000
132 movl $(__USER_DS), %edx; \ 132 movl $(__USER_DS), %edx; \
133 movl %edx, %ds; \ 133 movl %edx, %ds; \
134 movl %edx, %es; \ 134 movl %edx, %es; \
135 movl $(__KERNEL_PDA), %edx; \ 135 movl $(__KERNEL_PERCPU), %edx; \
136 movl %edx, %fs 136 movl %edx, %fs
137 137
138#define RESTORE_INT_REGS \ 138#define RESTORE_INT_REGS \
@@ -305,16 +305,12 @@ sysenter_past_esp:
305 pushl $(__USER_CS) 305 pushl $(__USER_CS)
306 CFI_ADJUST_CFA_OFFSET 4 306 CFI_ADJUST_CFA_OFFSET 4
307 /*CFI_REL_OFFSET cs, 0*/ 307 /*CFI_REL_OFFSET cs, 0*/
308#ifndef CONFIG_COMPAT_VDSO
309 /* 308 /*
310 * Push current_thread_info()->sysenter_return to the stack. 309 * Push current_thread_info()->sysenter_return to the stack.
311 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 310 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
312 * pushed above; +8 corresponds to copy_thread's esp0 setting. 311 * pushed above; +8 corresponds to copy_thread's esp0 setting.
313 */ 312 */
314 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 313 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
315#else
316 pushl $SYSENTER_RETURN
317#endif
318 CFI_ADJUST_CFA_OFFSET 4 314 CFI_ADJUST_CFA_OFFSET 4
319 CFI_REL_OFFSET eip, 0 315 CFI_REL_OFFSET eip, 0
320 316
@@ -342,7 +338,7 @@ sysenter_past_esp:
342 jae syscall_badsys 338 jae syscall_badsys
343 call *sys_call_table(,%eax,4) 339 call *sys_call_table(,%eax,4)
344 movl %eax,PT_EAX(%esp) 340 movl %eax,PT_EAX(%esp)
345 DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) 341 DISABLE_INTERRUPTS(CLBR_ANY)
346 TRACE_IRQS_OFF 342 TRACE_IRQS_OFF
347 movl TI_flags(%ebp), %ecx 343 movl TI_flags(%ebp), %ecx
348 testw $_TIF_ALLWORK_MASK, %cx 344 testw $_TIF_ALLWORK_MASK, %cx
@@ -560,9 +556,7 @@ END(syscall_badsys)
560 556
561#define FIXUP_ESPFIX_STACK \ 557#define FIXUP_ESPFIX_STACK \
562 /* since we are on a wrong stack, we cant make it a C code :( */ \ 558 /* since we are on a wrong stack, we cant make it a C code :( */ \
563 movl %fs:PDA_cpu, %ebx; \ 559 PER_CPU(gdt_page, %ebx); \
564 PER_CPU(cpu_gdt_descr, %ebx); \
565 movl GDS_address(%ebx), %ebx; \
566 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 560 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
567 addl %esp, %eax; \ 561 addl %esp, %eax; \
568 pushl $__KERNEL_DS; \ 562 pushl $__KERNEL_DS; \
@@ -635,7 +629,7 @@ ENTRY(name) \
635 SAVE_ALL; \ 629 SAVE_ALL; \
636 TRACE_IRQS_OFF \ 630 TRACE_IRQS_OFF \
637 movl %esp,%eax; \ 631 movl %esp,%eax; \
638 call smp_/**/name; \ 632 call smp_##name; \
639 jmp ret_from_intr; \ 633 jmp ret_from_intr; \
640 CFI_ENDPROC; \ 634 CFI_ENDPROC; \
641ENDPROC(name) 635ENDPROC(name)
@@ -643,11 +637,6 @@ ENDPROC(name)
643/* The include is where all of the SMP etc. interrupts come from */ 637/* The include is where all of the SMP etc. interrupts come from */
644#include "entry_arch.h" 638#include "entry_arch.h"
645 639
646/* This alternate entry is needed because we hijack the apic LVTT */
647#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
648BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
649#endif
650
651KPROBE_ENTRY(page_fault) 640KPROBE_ENTRY(page_fault)
652 RING0_EC_FRAME 641 RING0_EC_FRAME
653 pushl $do_page_fault 642 pushl $do_page_fault
@@ -686,7 +675,7 @@ error_code:
686 pushl %fs 675 pushl %fs
687 CFI_ADJUST_CFA_OFFSET 4 676 CFI_ADJUST_CFA_OFFSET 4
688 /*CFI_REL_OFFSET fs, 0*/ 677 /*CFI_REL_OFFSET fs, 0*/
689 movl $(__KERNEL_PDA), %ecx 678 movl $(__KERNEL_PERCPU), %ecx
690 movl %ecx, %fs 679 movl %ecx, %fs
691 UNWIND_ESPFIX_STACK 680 UNWIND_ESPFIX_STACK
692 popl %ecx 681 popl %ecx
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 3fa7f9389afe..9b10af65faaa 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -34,17 +34,32 @@
34 34
35/* 35/*
36 * This is how much memory *in addition to the memory covered up to 36 * This is how much memory *in addition to the memory covered up to
37 * and including _end* we need mapped initially. We need one bit for 37 * and including _end* we need mapped initially.
38 * each possible page, but only in low memory, which means 38 * We need:
39 * 2^32/4096/8 = 128K worst case (4G/4G split.) 39 * - one bit for each possible page, but only in low memory, which means
40 * 2^32/4096/8 = 128K worst case (4G/4G split.)
41 * - enough space to map all low memory, which means
42 * (2^32/4096) / 1024 pages (worst case, non PAE)
43 * (2^32/4096) / 512 + 4 pages (worst case for PAE)
44 * - a few pages for allocator use before the kernel pagetable has
45 * been set up
40 * 46 *
41 * Modulo rounding, each megabyte assigned here requires a kilobyte of 47 * Modulo rounding, each megabyte assigned here requires a kilobyte of
42 * memory, which is currently unreclaimed. 48 * memory, which is currently unreclaimed.
43 * 49 *
44 * This should be a multiple of a page. 50 * This should be a multiple of a page.
45 */ 51 */
46#define INIT_MAP_BEYOND_END (128*1024) 52LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
47 53
54#if PTRS_PER_PMD > 1
55PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
56#else
57PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
58#endif
59BOOTBITMAP_SIZE = LOW_PAGES / 8
60ALLOCATOR_SLOP = 4
61
62INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
48 63
49/* 64/*
50 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 65 * 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@@ -147,8 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
147/* 162/*
148 * Non-boot CPU entry point; entered from trampoline.S 163 * Non-boot CPU entry point; entered from trampoline.S
149 * We can't lgdt here, because lgdt itself uses a data segment, but 164 * We can't lgdt here, because lgdt itself uses a data segment, but
150 * we know the trampoline has already loaded the boot_gdt_table GDT 165 * we know the trampoline has already loaded the boot_gdt for us.
151 * for us.
152 * 166 *
153 * If cpu hotplug is not supported then this code can go in init section 167 * If cpu hotplug is not supported then this code can go in init section
154 * which will be freed later 168 * which will be freed later
@@ -318,12 +332,12 @@ is386: movl $2,%ecx # set MP
318 movl %eax,%cr0 332 movl %eax,%cr0
319 333
320 call check_x87 334 call check_x87
321 call setup_pda
322 lgdt early_gdt_descr 335 lgdt early_gdt_descr
323 lidt idt_descr 336 lidt idt_descr
324 ljmp $(__KERNEL_CS),$1f 337 ljmp $(__KERNEL_CS),$1f
3251: movl $(__KERNEL_DS),%eax # reload all the segment registers 3381: movl $(__KERNEL_DS),%eax # reload all the segment registers
326 movl %eax,%ss # after changing gdt. 339 movl %eax,%ss # after changing gdt.
340 movl %eax,%fs # gets reset once there's real percpu
327 341
328 movl $(__USER_DS),%eax # DS/ES contains default USER segment 342 movl $(__USER_DS),%eax # DS/ES contains default USER segment
329 movl %eax,%ds 343 movl %eax,%ds
@@ -333,16 +347,17 @@ is386: movl $2,%ecx # set MP
333 movl %eax,%gs 347 movl %eax,%gs
334 lldt %ax 348 lldt %ax
335 349
336 movl $(__KERNEL_PDA),%eax
337 mov %eax,%fs
338
339 cld # gcc2 wants the direction flag cleared at all times 350 cld # gcc2 wants the direction flag cleared at all times
340 pushl $0 # fake return address for unwinder 351 pushl $0 # fake return address for unwinder
341#ifdef CONFIG_SMP 352#ifdef CONFIG_SMP
342 movb ready, %cl 353 movb ready, %cl
343 movb $1, ready 354 movb $1, ready
344 cmpb $0,%cl # the first CPU calls start_kernel 355 cmpb $0,%cl # the first CPU calls start_kernel
345 jne initialize_secondary # all other CPUs call initialize_secondary 356 je 1f
357 movl $(__KERNEL_PERCPU), %eax
358 movl %eax,%fs # set this cpu's percpu
359 jmp initialize_secondary # all other CPUs call initialize_secondary
3601:
346#endif /* CONFIG_SMP */ 361#endif /* CONFIG_SMP */
347 jmp start_kernel 362 jmp start_kernel
348 363
@@ -366,23 +381,6 @@ check_x87:
366 ret 381 ret
367 382
368/* 383/*
369 * Point the GDT at this CPU's PDA. On boot this will be
370 * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
371 * that CPU's GDT and PDA.
372 */
373ENTRY(setup_pda)
374 /* get the PDA pointer */
375 movl start_pda, %eax
376
377 /* slot the PDA address into the GDT */
378 mov early_gdt_descr+2, %ecx
379 mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
380 shr $16, %eax
381 mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
382 mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
383 ret
384
385/*
386 * setup_idt 384 * setup_idt
387 * 385 *
388 * sets up a idt with 256 entries pointing to 386 * sets up a idt with 256 entries pointing to
@@ -554,9 +552,6 @@ ENTRY(empty_zero_page)
554 * This starts the data section. 552 * This starts the data section.
555 */ 553 */
556.data 554.data
557ENTRY(start_pda)
558 .long boot_pda
559
560ENTRY(stack_start) 555ENTRY(stack_start)
561 .long init_thread_union+THREAD_SIZE 556 .long init_thread_union+THREAD_SIZE
562 .long __BOOT_DS 557 .long __BOOT_DS
@@ -588,7 +583,7 @@ fault_msg:
588 .word 0 # 32 bit align gdt_desc.address 583 .word 0 # 32 bit align gdt_desc.address
589boot_gdt_descr: 584boot_gdt_descr:
590 .word __BOOT_DS+7 585 .word __BOOT_DS+7
591 .long boot_gdt_table - __PAGE_OFFSET 586 .long boot_gdt - __PAGE_OFFSET
592 587
593 .word 0 # 32-bit align idt_desc.address 588 .word 0 # 32-bit align idt_desc.address
594idt_descr: 589idt_descr:
@@ -599,67 +594,14 @@ idt_descr:
599 .word 0 # 32 bit align gdt_desc.address 594 .word 0 # 32 bit align gdt_desc.address
600ENTRY(early_gdt_descr) 595ENTRY(early_gdt_descr)
601 .word GDT_ENTRIES*8-1 596 .word GDT_ENTRIES*8-1
602 .long cpu_gdt_table 597 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */
603 598
604/* 599/*
605 * The boot_gdt_table must mirror the equivalent in setup.S and is 600 * The boot_gdt must mirror the equivalent in setup.S and is
606 * used only for booting. 601 * used only for booting.
607 */ 602 */
608 .align L1_CACHE_BYTES 603 .align L1_CACHE_BYTES
609ENTRY(boot_gdt_table) 604ENTRY(boot_gdt)
610 .fill GDT_ENTRY_BOOT_CS,8,0 605 .fill GDT_ENTRY_BOOT_CS,8,0
611 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ 606 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
612 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ 607 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
613
614/*
615 * The Global Descriptor Table contains 28 quadwords, per-CPU.
616 */
617 .align L1_CACHE_BYTES
618ENTRY(cpu_gdt_table)
619 .quad 0x0000000000000000 /* NULL descriptor */
620 .quad 0x0000000000000000 /* 0x0b reserved */
621 .quad 0x0000000000000000 /* 0x13 reserved */
622 .quad 0x0000000000000000 /* 0x1b reserved */
623 .quad 0x0000000000000000 /* 0x20 unused */
624 .quad 0x0000000000000000 /* 0x28 unused */
625 .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
626 .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
627 .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
628 .quad 0x0000000000000000 /* 0x4b reserved */
629 .quad 0x0000000000000000 /* 0x53 reserved */
630 .quad 0x0000000000000000 /* 0x5b reserved */
631
632 .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
633 .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
634 .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
635 .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
636
637 .quad 0x0000000000000000 /* 0x80 TSS descriptor */
638 .quad 0x0000000000000000 /* 0x88 LDT descriptor */
639
640 /*
641 * Segments used for calling PnP BIOS have byte granularity.
642 * They code segments and data segments have fixed 64k limits,
643 * the transfer segment sizes are set at run time.
644 */
645 .quad 0x00409a000000ffff /* 0x90 32-bit code */
646 .quad 0x00009a000000ffff /* 0x98 16-bit code */
647 .quad 0x000092000000ffff /* 0xa0 16-bit data */
648 .quad 0x0000920000000000 /* 0xa8 16-bit data */
649 .quad 0x0000920000000000 /* 0xb0 16-bit data */
650
651 /*
652 * The APM segments have byte granularity and their bases
653 * are set at run time. All have 64k limits.
654 */
655 .quad 0x00409a000000ffff /* 0xb8 APM CS code */
656 .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */
657 .quad 0x004092000000ffff /* 0xc8 APM DS data */
658
659 .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */
660 .quad 0x00cf92000000ffff /* 0xd8 - PDA */
661 .quad 0x0000000000000000 /* 0xe0 - unused */
662 .quad 0x0000000000000000 /* 0xe8 - unused */
663 .quad 0x0000000000000000 /* 0xf0 - unused */
664 .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
665
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 4afe26e86260..e3d4b73bfdb0 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed);
28#endif 28#endif
29 29
30EXPORT_SYMBOL(csum_partial); 30EXPORT_SYMBOL(csum_partial);
31
32EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 89d85d244926..1b623cda3a64 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -35,6 +35,7 @@
35#include <linux/msi.h> 35#include <linux/msi.h>
36#include <linux/htirq.h> 36#include <linux/htirq.h>
37#include <linux/freezer.h> 37#include <linux/freezer.h>
38#include <linux/kthread.h>
38 39
39#include <asm/io.h> 40#include <asm/io.h>
40#include <asm/smp.h> 41#include <asm/smp.h>
@@ -661,8 +662,6 @@ static int balanced_irq(void *unused)
661 unsigned long prev_balance_time = jiffies; 662 unsigned long prev_balance_time = jiffies;
662 long time_remaining = balanced_irq_interval; 663 long time_remaining = balanced_irq_interval;
663 664
664 daemonize("kirqd");
665
666 /* push everything to CPU 0 to give us a starting point. */ 665 /* push everything to CPU 0 to give us a starting point. */
667 for (i = 0 ; i < NR_IRQS ; i++) { 666 for (i = 0 ; i < NR_IRQS ; i++) {
668 irq_desc[i].pending_mask = cpumask_of_cpu(0); 667 irq_desc[i].pending_mask = cpumask_of_cpu(0);
@@ -722,10 +721,9 @@ static int __init balanced_irq_init(void)
722 } 721 }
723 722
724 printk(KERN_INFO "Starting balanced_irq\n"); 723 printk(KERN_INFO "Starting balanced_irq\n");
725 if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 724 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
726 return 0; 725 return 0;
727 else 726 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
728 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
729failed: 727failed:
730 for_each_possible_cpu(i) { 728 for_each_possible_cpu(i) {
731 kfree(irq_cpu_data[i].irq_delta); 729 kfree(irq_cpu_data[i].irq_delta);
@@ -1403,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
1403 enable_8259A_irq(0); 1401 enable_8259A_irq(0);
1404} 1402}
1405 1403
1406static inline void UNEXPECTED_IO_APIC(void)
1407{
1408}
1409
1410void __init print_IO_APIC(void) 1404void __init print_IO_APIC(void)
1411{ 1405{
1412 int apic, i; 1406 int apic, i;
@@ -1446,34 +1440,12 @@ void __init print_IO_APIC(void)
1446 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1440 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1447 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1441 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1448 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); 1442 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1449 if (reg_00.bits.ID >= get_physical_broadcast())
1450 UNEXPECTED_IO_APIC();
1451 if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
1452 UNEXPECTED_IO_APIC();
1453 1443
1454 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); 1444 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
1455 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 1445 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1456 if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
1457 (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
1458 (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
1459 (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
1460 (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
1461 (reg_01.bits.entries != 0x2E) &&
1462 (reg_01.bits.entries != 0x3F)
1463 )
1464 UNEXPECTED_IO_APIC();
1465 1446
1466 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 1447 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1467 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 1448 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1468 if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
1469 (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
1470 (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
1471 (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
1472 (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
1473 )
1474 UNEXPECTED_IO_APIC();
1475 if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
1476 UNEXPECTED_IO_APIC();
1477 1449
1478 /* 1450 /*
1479 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, 1451 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1483,8 +1455,6 @@ void __init print_IO_APIC(void)
1483 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { 1455 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1484 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 1456 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1485 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 1457 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1486 if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
1487 UNEXPECTED_IO_APIC();
1488 } 1458 }
1489 1459
1490 /* 1460 /*
@@ -1496,8 +1466,6 @@ void __init print_IO_APIC(void)
1496 reg_03.raw != reg_01.raw) { 1466 reg_03.raw != reg_01.raw) {
1497 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); 1467 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1498 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); 1468 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1499 if (reg_03.bits.__reserved_1)
1500 UNEXPECTED_IO_APIC();
1501 } 1469 }
1502 1470
1503 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1471 printk(KERN_DEBUG ".... IRQ redirection table:\n");
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 498e8bc197d5..d1e42e0dbe67 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -16,6 +16,7 @@
16#include <linux/stddef.h> 16#include <linux/stddef.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/thread_info.h> 18#include <linux/thread_info.h>
19#include <linux/syscalls.h>
19 20
20/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 21/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
21static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) 22static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
@@ -113,7 +114,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
113 * Reset the owner so that a process switch will not set 114 * Reset the owner so that a process switch will not set
114 * tss->io_bitmap_base to IO_BITMAP_OFFSET. 115 * tss->io_bitmap_base to IO_BITMAP_OFFSET.
115 */ 116 */
116 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; 117 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
117 tss->io_bitmap_owner = NULL; 118 tss->io_bitmap_owner = NULL;
118 119
119 put_cpu(); 120 put_cpu();
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 8db8d514c9c0..d2daf672f4a2 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -24,6 +24,9 @@
24DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; 24DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
25EXPORT_PER_CPU_SYMBOL(irq_stat); 25EXPORT_PER_CPU_SYMBOL(irq_stat);
26 26
27DEFINE_PER_CPU(struct pt_regs *, irq_regs);
28EXPORT_PER_CPU_SYMBOL(irq_regs);
29
27/* 30/*
28 * 'what should we do if we get a hw irq event on an illegal vector'. 31 * 'what should we do if we get a hw irq event on an illegal vector'.
29 * each architecture has to answer this themselves. 32 * each architecture has to answer this themselves.
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 4f5983c98669..0952eccd8f28 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -477,7 +477,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
477 } 477 }
478 ++mpc_record; 478 ++mpc_record;
479 } 479 }
480 clustered_apic_check(); 480 setup_apic_routing();
481 if (!num_processors) 481 if (!num_processors)
482 printk(KERN_ERR "SMP mptable: no processors registered!\n"); 482 printk(KERN_ERR "SMP mptable: no processors registered!\n");
483 return num_processors; 483 return num_processors;
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 84c3497efb60..33cf2f3c444f 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -20,7 +20,6 @@
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/sysctl.h> 21#include <linux/sysctl.h>
22#include <linux/percpu.h> 22#include <linux/percpu.h>
23#include <linux/dmi.h>
24#include <linux/kprobes.h> 23#include <linux/kprobes.h>
25#include <linux/cpumask.h> 24#include <linux/cpumask.h>
26#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
@@ -28,30 +27,14 @@
28#include <asm/smp.h> 27#include <asm/smp.h>
29#include <asm/nmi.h> 28#include <asm/nmi.h>
30#include <asm/kdebug.h> 29#include <asm/kdebug.h>
31#include <asm/intel_arch_perfmon.h>
32 30
33#include "mach_traps.h" 31#include "mach_traps.h"
34 32
35int unknown_nmi_panic; 33int unknown_nmi_panic;
36int nmi_watchdog_enabled; 34int nmi_watchdog_enabled;
37 35
38/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
39 * evtsel_nmi_owner tracks the ownership of the event selection
40 * - different performance counters/ event selection may be reserved for
41 * different subsystems this reservation system just tries to coordinate
42 * things a little
43 */
44
45/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
46 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
47 */
48#define NMI_MAX_COUNTER_BITS 66
49#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
50
51static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
52static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
53
54static cpumask_t backtrace_mask = CPU_MASK_NONE; 36static cpumask_t backtrace_mask = CPU_MASK_NONE;
37
55/* nmi_active: 38/* nmi_active:
56 * >0: the lapic NMI watchdog is active, but can be disabled 39 * >0: the lapic NMI watchdog is active, but can be disabled
57 * <0: the lapic NMI watchdog has not been set up, and cannot 40 * <0: the lapic NMI watchdog has not been set up, and cannot
@@ -63,206 +46,11 @@ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
63unsigned int nmi_watchdog = NMI_DEFAULT; 46unsigned int nmi_watchdog = NMI_DEFAULT;
64static unsigned int nmi_hz = HZ; 47static unsigned int nmi_hz = HZ;
65 48
66struct nmi_watchdog_ctlblk { 49static DEFINE_PER_CPU(short, wd_enabled);
67 int enabled;
68 u64 check_bit;
69 unsigned int cccr_msr;
70 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
71 unsigned int evntsel_msr; /* the MSR to select the events to handle */
72};
73static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
74 50
75/* local prototypes */ 51/* local prototypes */
76static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); 52static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
77 53
78extern void show_registers(struct pt_regs *regs);
79extern int unknown_nmi_panic;
80
81/* converts an msr to an appropriate reservation bit */
82static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
83{
84 /* returns the bit offset of the performance counter register */
85 switch (boot_cpu_data.x86_vendor) {
86 case X86_VENDOR_AMD:
87 return (msr - MSR_K7_PERFCTR0);
88 case X86_VENDOR_INTEL:
89 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
90 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
91
92 switch (boot_cpu_data.x86) {
93 case 6:
94 return (msr - MSR_P6_PERFCTR0);
95 case 15:
96 return (msr - MSR_P4_BPU_PERFCTR0);
97 }
98 }
99 return 0;
100}
101
102/* converts an msr to an appropriate reservation bit */
103static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
104{
105 /* returns the bit offset of the event selection register */
106 switch (boot_cpu_data.x86_vendor) {
107 case X86_VENDOR_AMD:
108 return (msr - MSR_K7_EVNTSEL0);
109 case X86_VENDOR_INTEL:
110 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
111 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
112
113 switch (boot_cpu_data.x86) {
114 case 6:
115 return (msr - MSR_P6_EVNTSEL0);
116 case 15:
117 return (msr - MSR_P4_BSU_ESCR0);
118 }
119 }
120 return 0;
121}
122
123/* checks for a bit availability (hack for oprofile) */
124int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
125{
126 int cpu;
127 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
128 for_each_possible_cpu (cpu) {
129 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
130 return 0;
131 }
132 return 1;
133}
134
135/* checks the an msr for availability */
136int avail_to_resrv_perfctr_nmi(unsigned int msr)
137{
138 unsigned int counter;
139 int cpu;
140
141 counter = nmi_perfctr_msr_to_bit(msr);
142 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
143
144 for_each_possible_cpu (cpu) {
145 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
146 return 0;
147 }
148 return 1;
149}
150
151static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
152{
153 unsigned int counter;
154 if (cpu < 0)
155 cpu = smp_processor_id();
156
157 counter = nmi_perfctr_msr_to_bit(msr);
158 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
159
160 if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
161 return 1;
162 return 0;
163}
164
165static void __release_perfctr_nmi(int cpu, unsigned int msr)
166{
167 unsigned int counter;
168 if (cpu < 0)
169 cpu = smp_processor_id();
170
171 counter = nmi_perfctr_msr_to_bit(msr);
172 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
173
174 clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]);
175}
176
177int reserve_perfctr_nmi(unsigned int msr)
178{
179 int cpu, i;
180 for_each_possible_cpu (cpu) {
181 if (!__reserve_perfctr_nmi(cpu, msr)) {
182 for_each_possible_cpu (i) {
183 if (i >= cpu)
184 break;
185 __release_perfctr_nmi(i, msr);
186 }
187 return 0;
188 }
189 }
190 return 1;
191}
192
193void release_perfctr_nmi(unsigned int msr)
194{
195 int cpu;
196 for_each_possible_cpu (cpu) {
197 __release_perfctr_nmi(cpu, msr);
198 }
199}
200
201int __reserve_evntsel_nmi(int cpu, unsigned int msr)
202{
203 unsigned int counter;
204 if (cpu < 0)
205 cpu = smp_processor_id();
206
207 counter = nmi_evntsel_msr_to_bit(msr);
208 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
209
210 if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
211 return 1;
212 return 0;
213}
214
215static void __release_evntsel_nmi(int cpu, unsigned int msr)
216{
217 unsigned int counter;
218 if (cpu < 0)
219 cpu = smp_processor_id();
220
221 counter = nmi_evntsel_msr_to_bit(msr);
222 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
223
224 clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
225}
226
227int reserve_evntsel_nmi(unsigned int msr)
228{
229 int cpu, i;
230 for_each_possible_cpu (cpu) {
231 if (!__reserve_evntsel_nmi(cpu, msr)) {
232 for_each_possible_cpu (i) {
233 if (i >= cpu)
234 break;
235 __release_evntsel_nmi(i, msr);
236 }
237 return 0;
238 }
239 }
240 return 1;
241}
242
243void release_evntsel_nmi(unsigned int msr)
244{
245 int cpu;
246 for_each_possible_cpu (cpu) {
247 __release_evntsel_nmi(cpu, msr);
248 }
249}
250
251static __cpuinit inline int nmi_known_cpu(void)
252{
253 switch (boot_cpu_data.x86_vendor) {
254 case X86_VENDOR_AMD:
255 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
256 || (boot_cpu_data.x86 == 16));
257 case X86_VENDOR_INTEL:
258 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
259 return 1;
260 else
261 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
262 }
263 return 0;
264}
265
266static int endflag __initdata = 0; 54static int endflag __initdata = 0;
267 55
268#ifdef CONFIG_SMP 56#ifdef CONFIG_SMP
@@ -284,28 +72,6 @@ static __init void nmi_cpu_busy(void *data)
284} 72}
285#endif 73#endif
286 74
287static unsigned int adjust_for_32bit_ctr(unsigned int hz)
288{
289 u64 counter_val;
290 unsigned int retval = hz;
291
292 /*
293 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
294 * are writable, with higher bits sign extending from bit 31.
295 * So, we can only program the counter with 31 bit values and
296 * 32nd bit should be 1, for 33.. to be 1.
297 * Find the appropriate nmi_hz
298 */
299 counter_val = (u64)cpu_khz * 1000;
300 do_div(counter_val, retval);
301 if (counter_val > 0x7fffffffULL) {
302 u64 count = (u64)cpu_khz * 1000;
303 do_div(count, 0x7fffffffUL);
304 retval = count + 1;
305 }
306 return retval;
307}
308
309static int __init check_nmi_watchdog(void) 75static int __init check_nmi_watchdog(void)
310{ 76{
311 unsigned int *prev_nmi_count; 77 unsigned int *prev_nmi_count;
@@ -338,14 +104,14 @@ static int __init check_nmi_watchdog(void)
338 if (!cpu_isset(cpu, cpu_callin_map)) 104 if (!cpu_isset(cpu, cpu_callin_map))
339 continue; 105 continue;
340#endif 106#endif
341 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) 107 if (!per_cpu(wd_enabled, cpu))
342 continue; 108 continue;
343 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 109 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
344 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 110 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
345 cpu, 111 cpu,
346 prev_nmi_count[cpu], 112 prev_nmi_count[cpu],
347 nmi_count(cpu)); 113 nmi_count(cpu));
348 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; 114 per_cpu(wd_enabled, cpu) = 0;
349 atomic_dec(&nmi_active); 115 atomic_dec(&nmi_active);
350 } 116 }
351 } 117 }
@@ -359,16 +125,8 @@ static int __init check_nmi_watchdog(void)
359 125
360 /* now that we know it works we can reduce NMI frequency to 126 /* now that we know it works we can reduce NMI frequency to
361 something more reasonable; makes a difference in some configs */ 127 something more reasonable; makes a difference in some configs */
362 if (nmi_watchdog == NMI_LOCAL_APIC) { 128 if (nmi_watchdog == NMI_LOCAL_APIC)
363 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 129 nmi_hz = lapic_adjust_nmi_hz(1);
364
365 nmi_hz = 1;
366
367 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
368 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
369 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
370 }
371 }
372 130
373 kfree(prev_nmi_count); 131 kfree(prev_nmi_count);
374 return 0; 132 return 0;
@@ -391,85 +149,8 @@ static int __init setup_nmi_watchdog(char *str)
391 149
392__setup("nmi_watchdog=", setup_nmi_watchdog); 150__setup("nmi_watchdog=", setup_nmi_watchdog);
393 151
394static void disable_lapic_nmi_watchdog(void)
395{
396 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
397
398 if (atomic_read(&nmi_active) <= 0)
399 return;
400
401 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
402
403 BUG_ON(atomic_read(&nmi_active) != 0);
404}
405
406static void enable_lapic_nmi_watchdog(void)
407{
408 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
409
410 /* are we already enabled */
411 if (atomic_read(&nmi_active) != 0)
412 return;
413
414 /* are we lapic aware */
415 if (nmi_known_cpu() <= 0)
416 return;
417 152
418 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); 153/* Suspend/resume support */
419 touch_nmi_watchdog();
420}
421
422void disable_timer_nmi_watchdog(void)
423{
424 BUG_ON(nmi_watchdog != NMI_IO_APIC);
425
426 if (atomic_read(&nmi_active) <= 0)
427 return;
428
429 disable_irq(0);
430 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
431
432 BUG_ON(atomic_read(&nmi_active) != 0);
433}
434
435void enable_timer_nmi_watchdog(void)
436{
437 BUG_ON(nmi_watchdog != NMI_IO_APIC);
438
439 if (atomic_read(&nmi_active) == 0) {
440 touch_nmi_watchdog();
441 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
442 enable_irq(0);
443 }
444}
445
446static void __acpi_nmi_disable(void *__unused)
447{
448 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
449}
450
451/*
452 * Disable timer based NMIs on all CPUs:
453 */
454void acpi_nmi_disable(void)
455{
456 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
457 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
458}
459
460static void __acpi_nmi_enable(void *__unused)
461{
462 apic_write_around(APIC_LVT0, APIC_DM_NMI);
463}
464
465/*
466 * Enable timer based NMIs on all CPUs:
467 */
468void acpi_nmi_enable(void)
469{
470 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
471 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
472}
473 154
474#ifdef CONFIG_PM 155#ifdef CONFIG_PM
475 156
@@ -516,7 +197,7 @@ static int __init init_lapic_nmi_sysfs(void)
516 if (nmi_watchdog != NMI_LOCAL_APIC) 197 if (nmi_watchdog != NMI_LOCAL_APIC)
517 return 0; 198 return 0;
518 199
519 if ( atomic_read(&nmi_active) < 0 ) 200 if (atomic_read(&nmi_active) < 0)
520 return 0; 201 return 0;
521 202
522 error = sysdev_class_register(&nmi_sysclass); 203 error = sysdev_class_register(&nmi_sysclass);
@@ -529,433 +210,69 @@ late_initcall(init_lapic_nmi_sysfs);
529 210
530#endif /* CONFIG_PM */ 211#endif /* CONFIG_PM */
531 212
532/* 213static void __acpi_nmi_enable(void *__unused)
533 * Activate the NMI watchdog via the local APIC.
534 * Original code written by Keith Owens.
535 */
536
537static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
538{
539 u64 count = (u64)cpu_khz * 1000;
540
541 do_div(count, nmi_hz);
542 if(descr)
543 Dprintk("setting %s to -0x%08Lx\n", descr, count);
544 wrmsrl(perfctr_msr, 0 - count);
545}
546
547static void write_watchdog_counter32(unsigned int perfctr_msr,
548 const char *descr)
549{
550 u64 count = (u64)cpu_khz * 1000;
551
552 do_div(count, nmi_hz);
553 if(descr)
554 Dprintk("setting %s to -0x%08Lx\n", descr, count);
555 wrmsr(perfctr_msr, (u32)(-count), 0);
556}
557
558/* Note that these events don't tick when the CPU idles. This means
559 the frequency varies with CPU load. */
560
561#define K7_EVNTSEL_ENABLE (1 << 22)
562#define K7_EVNTSEL_INT (1 << 20)
563#define K7_EVNTSEL_OS (1 << 17)
564#define K7_EVNTSEL_USR (1 << 16)
565#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
566#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
567
568static int setup_k7_watchdog(void)
569{
570 unsigned int perfctr_msr, evntsel_msr;
571 unsigned int evntsel;
572 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
573
574 perfctr_msr = MSR_K7_PERFCTR0;
575 evntsel_msr = MSR_K7_EVNTSEL0;
576 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
577 goto fail;
578
579 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
580 goto fail1;
581
582 wrmsrl(perfctr_msr, 0UL);
583
584 evntsel = K7_EVNTSEL_INT
585 | K7_EVNTSEL_OS
586 | K7_EVNTSEL_USR
587 | K7_NMI_EVENT;
588
589 /* setup the timer */
590 wrmsr(evntsel_msr, evntsel, 0);
591 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
592 apic_write(APIC_LVTPC, APIC_DM_NMI);
593 evntsel |= K7_EVNTSEL_ENABLE;
594 wrmsr(evntsel_msr, evntsel, 0);
595
596 wd->perfctr_msr = perfctr_msr;
597 wd->evntsel_msr = evntsel_msr;
598 wd->cccr_msr = 0; //unused
599 wd->check_bit = 1ULL<<63;
600 return 1;
601fail1:
602 __release_perfctr_nmi(-1, perfctr_msr);
603fail:
604 return 0;
605}
606
607static void stop_k7_watchdog(void)
608{
609 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
610
611 wrmsr(wd->evntsel_msr, 0, 0);
612
613 __release_evntsel_nmi(-1, wd->evntsel_msr);
614 __release_perfctr_nmi(-1, wd->perfctr_msr);
615}
616
617#define P6_EVNTSEL0_ENABLE (1 << 22)
618#define P6_EVNTSEL_INT (1 << 20)
619#define P6_EVNTSEL_OS (1 << 17)
620#define P6_EVNTSEL_USR (1 << 16)
621#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
622#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
623
624static int setup_p6_watchdog(void)
625{
626 unsigned int perfctr_msr, evntsel_msr;
627 unsigned int evntsel;
628 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
629
630 perfctr_msr = MSR_P6_PERFCTR0;
631 evntsel_msr = MSR_P6_EVNTSEL0;
632 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
633 goto fail;
634
635 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
636 goto fail1;
637
638 wrmsrl(perfctr_msr, 0UL);
639
640 evntsel = P6_EVNTSEL_INT
641 | P6_EVNTSEL_OS
642 | P6_EVNTSEL_USR
643 | P6_NMI_EVENT;
644
645 /* setup the timer */
646 wrmsr(evntsel_msr, evntsel, 0);
647 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
648 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
649 apic_write(APIC_LVTPC, APIC_DM_NMI);
650 evntsel |= P6_EVNTSEL0_ENABLE;
651 wrmsr(evntsel_msr, evntsel, 0);
652
653 wd->perfctr_msr = perfctr_msr;
654 wd->evntsel_msr = evntsel_msr;
655 wd->cccr_msr = 0; //unused
656 wd->check_bit = 1ULL<<39;
657 return 1;
658fail1:
659 __release_perfctr_nmi(-1, perfctr_msr);
660fail:
661 return 0;
662}
663
664static void stop_p6_watchdog(void)
665{
666 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
667
668 wrmsr(wd->evntsel_msr, 0, 0);
669
670 __release_evntsel_nmi(-1, wd->evntsel_msr);
671 __release_perfctr_nmi(-1, wd->perfctr_msr);
672}
673
674/* Note that these events don't tick when the CPU idles. This means
675 the frequency varies with CPU load. */
676
677#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
678#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
679#define P4_ESCR_OS (1<<3)
680#define P4_ESCR_USR (1<<2)
681#define P4_CCCR_OVF_PMI0 (1<<26)
682#define P4_CCCR_OVF_PMI1 (1<<27)
683#define P4_CCCR_THRESHOLD(N) ((N)<<20)
684#define P4_CCCR_COMPLEMENT (1<<19)
685#define P4_CCCR_COMPARE (1<<18)
686#define P4_CCCR_REQUIRED (3<<16)
687#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
688#define P4_CCCR_ENABLE (1<<12)
689#define P4_CCCR_OVF (1<<31)
690/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
691 CRU_ESCR0 (with any non-null event selector) through a complemented
692 max threshold. [IA32-Vol3, Section 14.9.9] */
693
694static int setup_p4_watchdog(void)
695{ 214{
696 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 215 apic_write_around(APIC_LVT0, APIC_DM_NMI);
697 unsigned int evntsel, cccr_val;
698 unsigned int misc_enable, dummy;
699 unsigned int ht_num;
700 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
701
702 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
703 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
704 return 0;
705
706#ifdef CONFIG_SMP
707 /* detect which hyperthread we are on */
708 if (smp_num_siblings == 2) {
709 unsigned int ebx, apicid;
710
711 ebx = cpuid_ebx(1);
712 apicid = (ebx >> 24) & 0xff;
713 ht_num = apicid & 1;
714 } else
715#endif
716 ht_num = 0;
717
718 /* performance counters are shared resources
719 * assign each hyperthread its own set
720 * (re-use the ESCR0 register, seems safe
721 * and keeps the cccr_val the same)
722 */
723 if (!ht_num) {
724 /* logical cpu 0 */
725 perfctr_msr = MSR_P4_IQ_PERFCTR0;
726 evntsel_msr = MSR_P4_CRU_ESCR0;
727 cccr_msr = MSR_P4_IQ_CCCR0;
728 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
729 } else {
730 /* logical cpu 1 */
731 perfctr_msr = MSR_P4_IQ_PERFCTR1;
732 evntsel_msr = MSR_P4_CRU_ESCR0;
733 cccr_msr = MSR_P4_IQ_CCCR1;
734 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
735 }
736
737 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
738 goto fail;
739
740 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
741 goto fail1;
742
743 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
744 | P4_ESCR_OS
745 | P4_ESCR_USR;
746
747 cccr_val |= P4_CCCR_THRESHOLD(15)
748 | P4_CCCR_COMPLEMENT
749 | P4_CCCR_COMPARE
750 | P4_CCCR_REQUIRED;
751
752 wrmsr(evntsel_msr, evntsel, 0);
753 wrmsr(cccr_msr, cccr_val, 0);
754 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
755 apic_write(APIC_LVTPC, APIC_DM_NMI);
756 cccr_val |= P4_CCCR_ENABLE;
757 wrmsr(cccr_msr, cccr_val, 0);
758 wd->perfctr_msr = perfctr_msr;
759 wd->evntsel_msr = evntsel_msr;
760 wd->cccr_msr = cccr_msr;
761 wd->check_bit = 1ULL<<39;
762 return 1;
763fail1:
764 __release_perfctr_nmi(-1, perfctr_msr);
765fail:
766 return 0;
767} 216}
768 217
769static void stop_p4_watchdog(void) 218/*
219 * Enable timer based NMIs on all CPUs:
220 */
221void acpi_nmi_enable(void)
770{ 222{
771 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 223 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
772 224 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
773 wrmsr(wd->cccr_msr, 0, 0);
774 wrmsr(wd->evntsel_msr, 0, 0);
775
776 __release_evntsel_nmi(-1, wd->evntsel_msr);
777 __release_perfctr_nmi(-1, wd->perfctr_msr);
778} 225}
779 226
780#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 227static void __acpi_nmi_disable(void *__unused)
781#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
782
783static int setup_intel_arch_watchdog(void)
784{ 228{
785 unsigned int ebx; 229 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
786 union cpuid10_eax eax;
787 unsigned int unused;
788 unsigned int perfctr_msr, evntsel_msr;
789 unsigned int evntsel;
790 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
791
792 /*
793 * Check whether the Architectural PerfMon supports
794 * Unhalted Core Cycles Event or not.
795 * NOTE: Corresponding bit = 0 in ebx indicates event present.
796 */
797 cpuid(10, &(eax.full), &ebx, &unused, &unused);
798 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
799 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
800 goto fail;
801
802 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
803 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
804
805 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
806 goto fail;
807
808 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
809 goto fail1;
810
811 wrmsrl(perfctr_msr, 0UL);
812
813 evntsel = ARCH_PERFMON_EVENTSEL_INT
814 | ARCH_PERFMON_EVENTSEL_OS
815 | ARCH_PERFMON_EVENTSEL_USR
816 | ARCH_PERFMON_NMI_EVENT_SEL
817 | ARCH_PERFMON_NMI_EVENT_UMASK;
818
819 /* setup the timer */
820 wrmsr(evntsel_msr, evntsel, 0);
821 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
822 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
823 apic_write(APIC_LVTPC, APIC_DM_NMI);
824 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
825 wrmsr(evntsel_msr, evntsel, 0);
826
827 wd->perfctr_msr = perfctr_msr;
828 wd->evntsel_msr = evntsel_msr;
829 wd->cccr_msr = 0; //unused
830 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
831 return 1;
832fail1:
833 __release_perfctr_nmi(-1, perfctr_msr);
834fail:
835 return 0;
836} 230}
837 231
838static void stop_intel_arch_watchdog(void) 232/*
233 * Disable timer based NMIs on all CPUs:
234 */
235void acpi_nmi_disable(void)
839{ 236{
840 unsigned int ebx; 237 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
841 union cpuid10_eax eax; 238 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
842 unsigned int unused;
843 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
844
845 /*
846 * Check whether the Architectural PerfMon supports
847 * Unhalted Core Cycles Event or not.
848 * NOTE: Corresponding bit = 0 in ebx indicates event present.
849 */
850 cpuid(10, &(eax.full), &ebx, &unused, &unused);
851 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
852 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
853 return;
854
855 wrmsr(wd->evntsel_msr, 0, 0);
856 __release_evntsel_nmi(-1, wd->evntsel_msr);
857 __release_perfctr_nmi(-1, wd->perfctr_msr);
858} 239}
859 240
860void setup_apic_nmi_watchdog (void *unused) 241void setup_apic_nmi_watchdog (void *unused)
861{ 242{
862 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 243 if (__get_cpu_var(wd_enabled))
863 244 return;
864 /* only support LOCAL and IO APICs for now */
865 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
866 (nmi_watchdog != NMI_IO_APIC))
867 return;
868
869 if (wd->enabled == 1)
870 return;
871 245
872 /* cheap hack to support suspend/resume */ 246 /* cheap hack to support suspend/resume */
873 /* if cpu0 is not active neither should the other cpus */ 247 /* if cpu0 is not active neither should the other cpus */
874 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) 248 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
875 return; 249 return;
876 250
877 if (nmi_watchdog == NMI_LOCAL_APIC) { 251 switch (nmi_watchdog) {
878 switch (boot_cpu_data.x86_vendor) { 252 case NMI_LOCAL_APIC:
879 case X86_VENDOR_AMD: 253 __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */
880 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 254 if (lapic_watchdog_init(nmi_hz) < 0) {
881 boot_cpu_data.x86 != 16) 255 __get_cpu_var(wd_enabled) = 0;
882 return;
883 if (!setup_k7_watchdog())
884 return;
885 break;
886 case X86_VENDOR_INTEL:
887 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
888 if (!setup_intel_arch_watchdog())
889 return;
890 break;
891 }
892 switch (boot_cpu_data.x86) {
893 case 6:
894 if (boot_cpu_data.x86_model > 0xd)
895 return;
896
897 if (!setup_p6_watchdog())
898 return;
899 break;
900 case 15:
901 if (boot_cpu_data.x86_model > 0x4)
902 return;
903
904 if (!setup_p4_watchdog())
905 return;
906 break;
907 default:
908 return;
909 }
910 break;
911 default:
912 return; 256 return;
913 } 257 }
258 /* FALL THROUGH */
259 case NMI_IO_APIC:
260 __get_cpu_var(wd_enabled) = 1;
261 atomic_inc(&nmi_active);
914 } 262 }
915 wd->enabled = 1;
916 atomic_inc(&nmi_active);
917} 263}
918 264
919void stop_apic_nmi_watchdog(void *unused) 265void stop_apic_nmi_watchdog(void *unused)
920{ 266{
921 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
922
923 /* only support LOCAL and IO APICs for now */ 267 /* only support LOCAL and IO APICs for now */
924 if ((nmi_watchdog != NMI_LOCAL_APIC) && 268 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
925 (nmi_watchdog != NMI_IO_APIC)) 269 (nmi_watchdog != NMI_IO_APIC))
926 return; 270 return;
927 271 if (__get_cpu_var(wd_enabled) == 0)
928 if (wd->enabled == 0)
929 return; 272 return;
930 273 if (nmi_watchdog == NMI_LOCAL_APIC)
931 if (nmi_watchdog == NMI_LOCAL_APIC) { 274 lapic_watchdog_stop();
932 switch (boot_cpu_data.x86_vendor) { 275 __get_cpu_var(wd_enabled) = 0;
933 case X86_VENDOR_AMD:
934 stop_k7_watchdog();
935 break;
936 case X86_VENDOR_INTEL:
937 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
938 stop_intel_arch_watchdog();
939 break;
940 }
941 switch (boot_cpu_data.x86) {
942 case 6:
943 if (boot_cpu_data.x86_model > 0xd)
944 break;
945 stop_p6_watchdog();
946 break;
947 case 15:
948 if (boot_cpu_data.x86_model > 0x4)
949 break;
950 stop_p4_watchdog();
951 break;
952 }
953 break;
954 default:
955 return;
956 }
957 }
958 wd->enabled = 0;
959 atomic_dec(&nmi_active); 276 atomic_dec(&nmi_active);
960} 277}
961 278
@@ -1011,8 +328,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
1011 unsigned int sum; 328 unsigned int sum;
1012 int touched = 0; 329 int touched = 0;
1013 int cpu = smp_processor_id(); 330 int cpu = smp_processor_id();
1014 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
1015 u64 dummy;
1016 int rc=0; 331 int rc=0;
1017 332
1018 /* check for other users first */ 333 /* check for other users first */
@@ -1055,53 +370,20 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
1055 alert_counter[cpu] = 0; 370 alert_counter[cpu] = 0;
1056 } 371 }
1057 /* see if the nmi watchdog went off */ 372 /* see if the nmi watchdog went off */
1058 if (wd->enabled) { 373 if (!__get_cpu_var(wd_enabled))
1059 if (nmi_watchdog == NMI_LOCAL_APIC) { 374 return rc;
1060 rdmsrl(wd->perfctr_msr, dummy); 375 switch (nmi_watchdog) {
1061 if (dummy & wd->check_bit){ 376 case NMI_LOCAL_APIC:
1062 /* this wasn't a watchdog timer interrupt */ 377 rc |= lapic_wd_event(nmi_hz);
1063 goto done; 378 break;
1064 } 379 case NMI_IO_APIC:
1065 380 /* don't know how to accurately check for this.
1066 /* only Intel P4 uses the cccr msr */ 381 * just assume it was a watchdog timer interrupt
1067 if (wd->cccr_msr != 0) { 382 * This matches the old behaviour.
1068 /* 383 */
1069 * P4 quirks: 384 rc = 1;
1070 * - An overflown perfctr will assert its interrupt 385 break;
1071 * until the OVF flag in its CCCR is cleared.
1072 * - LVTPC is masked on interrupt and must be
1073 * unmasked by the LVTPC handler.
1074 */
1075 rdmsrl(wd->cccr_msr, dummy);
1076 dummy &= ~P4_CCCR_OVF;
1077 wrmsrl(wd->cccr_msr, dummy);
1078 apic_write(APIC_LVTPC, APIC_DM_NMI);
1079 /* start the cycle over again */
1080 write_watchdog_counter(wd->perfctr_msr, NULL);
1081 }
1082 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
1083 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
1084 /* P6 based Pentium M need to re-unmask
1085 * the apic vector but it doesn't hurt
1086 * other P6 variant.
1087 * ArchPerfom/Core Duo also needs this */
1088 apic_write(APIC_LVTPC, APIC_DM_NMI);
1089 /* P6/ARCH_PERFMON has 32 bit counter write */
1090 write_watchdog_counter32(wd->perfctr_msr, NULL);
1091 } else {
1092 /* start the cycle over again */
1093 write_watchdog_counter(wd->perfctr_msr, NULL);
1094 }
1095 rc = 1;
1096 } else if (nmi_watchdog == NMI_IO_APIC) {
1097 /* don't know how to accurately check for this.
1098 * just assume it was a watchdog timer interrupt
1099 * This matches the old behaviour.
1100 */
1101 rc = 1;
1102 }
1103 } 386 }
1104done:
1105 return rc; 387 return rc;
1106} 388}
1107 389
@@ -1146,7 +428,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1146 } 428 }
1147 429
1148 if (nmi_watchdog == NMI_DEFAULT) { 430 if (nmi_watchdog == NMI_DEFAULT) {
1149 if (nmi_known_cpu() > 0) 431 if (lapic_watchdog_ok())
1150 nmi_watchdog = NMI_LOCAL_APIC; 432 nmi_watchdog = NMI_LOCAL_APIC;
1151 else 433 else
1152 nmi_watchdog = NMI_IO_APIC; 434 nmi_watchdog = NMI_IO_APIC;
@@ -1182,11 +464,3 @@ void __trigger_all_cpu_backtrace(void)
1182 464
1183EXPORT_SYMBOL(nmi_active); 465EXPORT_SYMBOL(nmi_active);
1184EXPORT_SYMBOL(nmi_watchdog); 466EXPORT_SYMBOL(nmi_watchdog);
1185EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1186EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1187EXPORT_SYMBOL(reserve_perfctr_nmi);
1188EXPORT_SYMBOL(release_perfctr_nmi);
1189EXPORT_SYMBOL(reserve_evntsel_nmi);
1190EXPORT_SYMBOL(release_evntsel_nmi);
1191EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1192EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 2ec331e03fa9..5c10f376bce1 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -20,6 +20,7 @@
20#include <linux/efi.h> 20#include <linux/efi.h>
21#include <linux/bcd.h> 21#include <linux/bcd.h>
22#include <linux/start_kernel.h> 22#include <linux/start_kernel.h>
23#include <linux/highmem.h>
23 24
24#include <asm/bug.h> 25#include <asm/bug.h>
25#include <asm/paravirt.h> 26#include <asm/paravirt.h>
@@ -35,7 +36,7 @@
35#include <asm/timer.h> 36#include <asm/timer.h>
36 37
37/* nop stub */ 38/* nop stub */
38static void native_nop(void) 39void _paravirt_nop(void)
39{ 40{
40} 41}
41 42
@@ -54,331 +55,148 @@ char *memory_setup(void)
54#define DEF_NATIVE(name, code) \ 55#define DEF_NATIVE(name, code) \
55 extern const char start_##name[], end_##name[]; \ 56 extern const char start_##name[], end_##name[]; \
56 asm("start_" #name ": " code "; end_" #name ":") 57 asm("start_" #name ": " code "; end_" #name ":")
57DEF_NATIVE(cli, "cli"); 58
58DEF_NATIVE(sti, "sti"); 59DEF_NATIVE(irq_disable, "cli");
59DEF_NATIVE(popf, "push %eax; popf"); 60DEF_NATIVE(irq_enable, "sti");
60DEF_NATIVE(pushf, "pushf; pop %eax"); 61DEF_NATIVE(restore_fl, "push %eax; popf");
61DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); 62DEF_NATIVE(save_fl, "pushf; pop %eax");
62DEF_NATIVE(iret, "iret"); 63DEF_NATIVE(iret, "iret");
63DEF_NATIVE(sti_sysexit, "sti; sysexit"); 64DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
65DEF_NATIVE(read_cr2, "mov %cr2, %eax");
66DEF_NATIVE(write_cr3, "mov %eax, %cr3");
67DEF_NATIVE(read_cr3, "mov %cr3, %eax");
68DEF_NATIVE(clts, "clts");
69DEF_NATIVE(read_tsc, "rdtsc");
64 70
65static const struct native_insns 71DEF_NATIVE(ud2a, "ud2a");
66{
67 const char *start, *end;
68} native_insns[] = {
69 [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
70 [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
71 [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
72 [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
73 [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
74 [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
75 [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
76};
77 72
78static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) 73static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
79{ 74{
80 unsigned int insn_len; 75 const unsigned char *start, *end;
81 76 unsigned ret;
82 /* Don't touch it if we don't have a replacement */ 77
83 if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) 78 switch(type) {
84 return len; 79#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
85 80 SITE(irq_disable);
86 insn_len = native_insns[type].end - native_insns[type].start; 81 SITE(irq_enable);
87 82 SITE(restore_fl);
88 /* Similarly if we can't fit replacement. */ 83 SITE(save_fl);
89 if (len < insn_len) 84 SITE(iret);
90 return len; 85 SITE(irq_enable_sysexit);
86 SITE(read_cr2);
87 SITE(read_cr3);
88 SITE(write_cr3);
89 SITE(clts);
90 SITE(read_tsc);
91#undef SITE
92
93 patch_site:
94 ret = paravirt_patch_insns(insns, len, start, end);
95 break;
91 96
92 memcpy(insns, native_insns[type].start, insn_len); 97 case PARAVIRT_PATCH(make_pgd):
93 return insn_len; 98 case PARAVIRT_PATCH(make_pte):
94} 99 case PARAVIRT_PATCH(pgd_val):
100 case PARAVIRT_PATCH(pte_val):
101#ifdef CONFIG_X86_PAE
102 case PARAVIRT_PATCH(make_pmd):
103 case PARAVIRT_PATCH(pmd_val):
104#endif
105 /* These functions end up returning exactly what
106 they're passed, in the same registers. */
107 ret = paravirt_patch_nop();
108 break;
95 109
96static unsigned long native_get_debugreg(int regno)
97{
98 unsigned long val = 0; /* Damn you, gcc! */
99
100 switch (regno) {
101 case 0:
102 asm("movl %%db0, %0" :"=r" (val)); break;
103 case 1:
104 asm("movl %%db1, %0" :"=r" (val)); break;
105 case 2:
106 asm("movl %%db2, %0" :"=r" (val)); break;
107 case 3:
108 asm("movl %%db3, %0" :"=r" (val)); break;
109 case 6:
110 asm("movl %%db6, %0" :"=r" (val)); break;
111 case 7:
112 asm("movl %%db7, %0" :"=r" (val)); break;
113 default: 110 default:
114 BUG(); 111 ret = paravirt_patch_default(type, clobbers, insns, len);
115 }
116 return val;
117}
118
119static void native_set_debugreg(int regno, unsigned long value)
120{
121 switch (regno) {
122 case 0:
123 asm("movl %0,%%db0" : /* no output */ :"r" (value));
124 break;
125 case 1:
126 asm("movl %0,%%db1" : /* no output */ :"r" (value));
127 break;
128 case 2:
129 asm("movl %0,%%db2" : /* no output */ :"r" (value));
130 break; 112 break;
131 case 3:
132 asm("movl %0,%%db3" : /* no output */ :"r" (value));
133 break;
134 case 6:
135 asm("movl %0,%%db6" : /* no output */ :"r" (value));
136 break;
137 case 7:
138 asm("movl %0,%%db7" : /* no output */ :"r" (value));
139 break;
140 default:
141 BUG();
142 } 113 }
143}
144
145void init_IRQ(void)
146{
147 paravirt_ops.init_IRQ();
148}
149
150static void native_clts(void)
151{
152 asm volatile ("clts");
153}
154
155static unsigned long native_read_cr0(void)
156{
157 unsigned long val;
158 asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
159 return val;
160}
161
162static void native_write_cr0(unsigned long val)
163{
164 asm volatile("movl %0,%%cr0": :"r" (val));
165}
166
167static unsigned long native_read_cr2(void)
168{
169 unsigned long val;
170 asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
171 return val;
172}
173
174static void native_write_cr2(unsigned long val)
175{
176 asm volatile("movl %0,%%cr2": :"r" (val));
177}
178
179static unsigned long native_read_cr3(void)
180{
181 unsigned long val;
182 asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
183 return val;
184}
185
186static void native_write_cr3(unsigned long val)
187{
188 asm volatile("movl %0,%%cr3": :"r" (val));
189}
190
191static unsigned long native_read_cr4(void)
192{
193 unsigned long val;
194 asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
195 return val;
196}
197
198static unsigned long native_read_cr4_safe(void)
199{
200 unsigned long val;
201 /* This could fault if %cr4 does not exist */
202 asm("1: movl %%cr4, %0 \n"
203 "2: \n"
204 ".section __ex_table,\"a\" \n"
205 ".long 1b,2b \n"
206 ".previous \n"
207 : "=r" (val): "0" (0));
208 return val;
209}
210
211static void native_write_cr4(unsigned long val)
212{
213 asm volatile("movl %0,%%cr4": :"r" (val));
214}
215
216static unsigned long native_save_fl(void)
217{
218 unsigned long f;
219 asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
220 return f;
221}
222
223static void native_restore_fl(unsigned long f)
224{
225 asm volatile("pushl %0 ; popfl": /* no output */
226 :"g" (f)
227 :"memory", "cc");
228}
229
230static void native_irq_disable(void)
231{
232 asm volatile("cli": : :"memory");
233}
234
235static void native_irq_enable(void)
236{
237 asm volatile("sti": : :"memory");
238}
239
240static void native_safe_halt(void)
241{
242 asm volatile("sti; hlt": : :"memory");
243}
244 114
245static void native_halt(void) 115 return ret;
246{
247 asm volatile("hlt": : :"memory");
248} 116}
249 117
250static void native_wbinvd(void) 118unsigned paravirt_patch_nop(void)
251{ 119{
252 asm volatile("wbinvd": : :"memory"); 120 return 0;
253} 121}
254 122
255static unsigned long long native_read_msr(unsigned int msr, int *err) 123unsigned paravirt_patch_ignore(unsigned len)
256{ 124{
257 unsigned long long val; 125 return len;
258
259 asm volatile("2: rdmsr ; xorl %0,%0\n"
260 "1:\n\t"
261 ".section .fixup,\"ax\"\n\t"
262 "3: movl %3,%0 ; jmp 1b\n\t"
263 ".previous\n\t"
264 ".section __ex_table,\"a\"\n"
265 " .align 4\n\t"
266 " .long 2b,3b\n\t"
267 ".previous"
268 : "=r" (*err), "=A" (val)
269 : "c" (msr), "i" (-EFAULT));
270
271 return val;
272} 126}
273 127
274static int native_write_msr(unsigned int msr, unsigned long long val) 128unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
129 void *site, u16 site_clobbers,
130 unsigned len)
275{ 131{
276 int err; 132 unsigned char *call = site;
277 asm volatile("2: wrmsr ; xorl %0,%0\n" 133 unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
278 "1:\n\t"
279 ".section .fixup,\"ax\"\n\t"
280 "3: movl %4,%0 ; jmp 1b\n\t"
281 ".previous\n\t"
282 ".section __ex_table,\"a\"\n"
283 " .align 4\n\t"
284 " .long 2b,3b\n\t"
285 ".previous"
286 : "=a" (err)
287 : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
288 "i" (-EFAULT));
289 return err;
290}
291 134
292static unsigned long long native_read_tsc(void) 135 if (tgt_clobbers & ~site_clobbers)
293{ 136 return len; /* target would clobber too much for this site */
294 unsigned long long val; 137 if (len < 5)
295 asm volatile("rdtsc" : "=A" (val)); 138 return len; /* call too long for patch site */
296 return val;
297}
298 139
299static unsigned long long native_read_pmc(void) 140 *call++ = 0xe8; /* call */
300{ 141 *(unsigned long *)call = delta;
301 unsigned long long val;
302 asm volatile("rdpmc" : "=A" (val));
303 return val;
304}
305 142
306static void native_load_tr_desc(void) 143 return 5;
307{
308 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
309} 144}
310 145
311static void native_load_gdt(const struct Xgt_desc_struct *dtr) 146unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
312{ 147{
313 asm volatile("lgdt %0"::"m" (*dtr)); 148 unsigned char *jmp = site;
314} 149 unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5);
315 150
316static void native_load_idt(const struct Xgt_desc_struct *dtr) 151 if (len < 5)
317{ 152 return len; /* call too long for patch site */
318 asm volatile("lidt %0"::"m" (*dtr));
319}
320 153
321static void native_store_gdt(struct Xgt_desc_struct *dtr) 154 *jmp++ = 0xe9; /* jmp */
322{ 155 *(unsigned long *)jmp = delta;
323 asm ("sgdt %0":"=m" (*dtr));
324}
325 156
326static void native_store_idt(struct Xgt_desc_struct *dtr) 157 return 5;
327{
328 asm ("sidt %0":"=m" (*dtr));
329} 158}
330 159
331static unsigned long native_store_tr(void) 160unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
332{ 161{
333 unsigned long tr; 162 void *opfunc = *((void **)&paravirt_ops + type);
334 asm ("str %0":"=r" (tr)); 163 unsigned ret;
335 return tr;
336}
337 164
338static void native_load_tls(struct thread_struct *t, unsigned int cpu) 165 if (opfunc == NULL)
339{ 166 /* If there's no function, patch it with a ud2a (BUG) */
340#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] 167 ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a);
341 C(0); C(1); C(2); 168 else if (opfunc == paravirt_nop)
342#undef C 169 /* If the operation is a nop, then nop the callsite */
343} 170 ret = paravirt_patch_nop();
171 else if (type == PARAVIRT_PATCH(iret) ||
172 type == PARAVIRT_PATCH(irq_enable_sysexit))
173 /* If operation requires a jmp, then jmp */
174 ret = paravirt_patch_jmp(opfunc, site, len);
175 else
176 /* Otherwise call the function; assume target could
177 clobber any caller-save reg */
178 ret = paravirt_patch_call(opfunc, CLBR_ANY,
179 site, clobbers, len);
344 180
345static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) 181 return ret;
346{
347 u32 *lp = (u32 *)((char *)dt + entry*8);
348 lp[0] = entry_low;
349 lp[1] = entry_high;
350} 182}
351 183
352static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) 184unsigned paravirt_patch_insns(void *site, unsigned len,
185 const char *start, const char *end)
353{ 186{
354 native_write_dt_entry(dt, entrynum, low, high); 187 unsigned insn_len = end - start;
355}
356 188
357static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) 189 if (insn_len > len || start == NULL)
358{ 190 insn_len = len;
359 native_write_dt_entry(dt, entrynum, low, high); 191 else
360} 192 memcpy(site, start, insn_len);
361
362static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
363{
364 native_write_dt_entry(dt, entrynum, low, high);
365}
366 193
367static void native_load_esp0(struct tss_struct *tss, 194 return insn_len;
368 struct thread_struct *thread)
369{
370 tss->esp0 = thread->esp0;
371
372 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
373 if (unlikely(tss->ss1 != thread->sysenter_cs)) {
374 tss->ss1 = thread->sysenter_cs;
375 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
376 }
377} 195}
378 196
379static void native_io_delay(void) 197void init_IRQ(void)
380{ 198{
381 asm volatile("outb %al,$0x80"); 199 paravirt_ops.init_IRQ();
382} 200}
383 201
384static void native_flush_tlb(void) 202static void native_flush_tlb(void)
@@ -395,83 +213,11 @@ static void native_flush_tlb_global(void)
395 __native_flush_tlb_global(); 213 __native_flush_tlb_global();
396} 214}
397 215
398static void native_flush_tlb_single(u32 addr) 216static void native_flush_tlb_single(unsigned long addr)
399{ 217{
400 __native_flush_tlb_single(addr); 218 __native_flush_tlb_single(addr);
401} 219}
402 220
403#ifndef CONFIG_X86_PAE
404static void native_set_pte(pte_t *ptep, pte_t pteval)
405{
406 *ptep = pteval;
407}
408
409static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
410{
411 *ptep = pteval;
412}
413
414static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
415{
416 *pmdp = pmdval;
417}
418
419#else /* CONFIG_X86_PAE */
420
421static void native_set_pte(pte_t *ptep, pte_t pte)
422{
423 ptep->pte_high = pte.pte_high;
424 smp_wmb();
425 ptep->pte_low = pte.pte_low;
426}
427
428static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
429{
430 ptep->pte_high = pte.pte_high;
431 smp_wmb();
432 ptep->pte_low = pte.pte_low;
433}
434
435static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
436{
437 ptep->pte_low = 0;
438 smp_wmb();
439 ptep->pte_high = pte.pte_high;
440 smp_wmb();
441 ptep->pte_low = pte.pte_low;
442}
443
444static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
445{
446 set_64bit((unsigned long long *)ptep,pte_val(pteval));
447}
448
449static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
450{
451 set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
452}
453
454static void native_set_pud(pud_t *pudp, pud_t pudval)
455{
456 *pudp = pudval;
457}
458
459static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
460{
461 ptep->pte_low = 0;
462 smp_wmb();
463 ptep->pte_high = 0;
464}
465
466static void native_pmd_clear(pmd_t *pmd)
467{
468 u32 *tmp = (u32 *)pmd;
469 *tmp = 0;
470 smp_wmb();
471 *(tmp + 1) = 0;
472}
473#endif /* CONFIG_X86_PAE */
474
475/* These are in entry.S */ 221/* These are in entry.S */
476extern void native_iret(void); 222extern void native_iret(void);
477extern void native_irq_enable_sysexit(void); 223extern void native_irq_enable_sysexit(void);
@@ -487,10 +233,11 @@ struct paravirt_ops paravirt_ops = {
487 .name = "bare hardware", 233 .name = "bare hardware",
488 .paravirt_enabled = 0, 234 .paravirt_enabled = 0,
489 .kernel_rpl = 0, 235 .kernel_rpl = 0,
236 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
490 237
491 .patch = native_patch, 238 .patch = native_patch,
492 .banner = default_banner, 239 .banner = default_banner,
493 .arch_setup = native_nop, 240 .arch_setup = paravirt_nop,
494 .memory_setup = machine_specific_memory_setup, 241 .memory_setup = machine_specific_memory_setup,
495 .get_wallclock = native_get_wallclock, 242 .get_wallclock = native_get_wallclock,
496 .set_wallclock = native_set_wallclock, 243 .set_wallclock = native_set_wallclock,
@@ -517,8 +264,8 @@ struct paravirt_ops paravirt_ops = {
517 .safe_halt = native_safe_halt, 264 .safe_halt = native_safe_halt,
518 .halt = native_halt, 265 .halt = native_halt,
519 .wbinvd = native_wbinvd, 266 .wbinvd = native_wbinvd,
520 .read_msr = native_read_msr, 267 .read_msr = native_read_msr_safe,
521 .write_msr = native_write_msr, 268 .write_msr = native_write_msr_safe,
522 .read_tsc = native_read_tsc, 269 .read_tsc = native_read_tsc,
523 .read_pmc = native_read_pmc, 270 .read_pmc = native_read_pmc,
524 .get_scheduled_cycles = native_read_tsc, 271 .get_scheduled_cycles = native_read_tsc,
@@ -531,9 +278,9 @@ struct paravirt_ops paravirt_ops = {
531 .store_idt = native_store_idt, 278 .store_idt = native_store_idt,
532 .store_tr = native_store_tr, 279 .store_tr = native_store_tr,
533 .load_tls = native_load_tls, 280 .load_tls = native_load_tls,
534 .write_ldt_entry = native_write_ldt_entry, 281 .write_ldt_entry = write_dt_entry,
535 .write_gdt_entry = native_write_gdt_entry, 282 .write_gdt_entry = write_dt_entry,
536 .write_idt_entry = native_write_idt_entry, 283 .write_idt_entry = write_dt_entry,
537 .load_esp0 = native_load_esp0, 284 .load_esp0 = native_load_esp0,
538 285
539 .set_iopl_mask = native_set_iopl_mask, 286 .set_iopl_mask = native_set_iopl_mask,
@@ -545,44 +292,57 @@ struct paravirt_ops paravirt_ops = {
545 .apic_read = native_apic_read, 292 .apic_read = native_apic_read,
546 .setup_boot_clock = setup_boot_APIC_clock, 293 .setup_boot_clock = setup_boot_APIC_clock,
547 .setup_secondary_clock = setup_secondary_APIC_clock, 294 .setup_secondary_clock = setup_secondary_APIC_clock,
295 .startup_ipi_hook = paravirt_nop,
548#endif 296#endif
549 .set_lazy_mode = (void *)native_nop, 297 .set_lazy_mode = paravirt_nop,
298
299 .pagetable_setup_start = native_pagetable_setup_start,
300 .pagetable_setup_done = native_pagetable_setup_done,
550 301
551 .flush_tlb_user = native_flush_tlb, 302 .flush_tlb_user = native_flush_tlb,
552 .flush_tlb_kernel = native_flush_tlb_global, 303 .flush_tlb_kernel = native_flush_tlb_global,
553 .flush_tlb_single = native_flush_tlb_single, 304 .flush_tlb_single = native_flush_tlb_single,
305 .flush_tlb_others = native_flush_tlb_others,
554 306
555 .map_pt_hook = (void *)native_nop, 307 .alloc_pt = paravirt_nop,
556 308 .alloc_pd = paravirt_nop,
557 .alloc_pt = (void *)native_nop, 309 .alloc_pd_clone = paravirt_nop,
558 .alloc_pd = (void *)native_nop, 310 .release_pt = paravirt_nop,
559 .alloc_pd_clone = (void *)native_nop, 311 .release_pd = paravirt_nop,
560 .release_pt = (void *)native_nop,
561 .release_pd = (void *)native_nop,
562 312
563 .set_pte = native_set_pte, 313 .set_pte = native_set_pte,
564 .set_pte_at = native_set_pte_at, 314 .set_pte_at = native_set_pte_at,
565 .set_pmd = native_set_pmd, 315 .set_pmd = native_set_pmd,
566 .pte_update = (void *)native_nop, 316 .pte_update = paravirt_nop,
567 .pte_update_defer = (void *)native_nop, 317 .pte_update_defer = paravirt_nop,
318
319#ifdef CONFIG_HIGHPTE
320 .kmap_atomic_pte = kmap_atomic,
321#endif
322
568#ifdef CONFIG_X86_PAE 323#ifdef CONFIG_X86_PAE
569 .set_pte_atomic = native_set_pte_atomic, 324 .set_pte_atomic = native_set_pte_atomic,
570 .set_pte_present = native_set_pte_present, 325 .set_pte_present = native_set_pte_present,
571 .set_pud = native_set_pud, 326 .set_pud = native_set_pud,
572 .pte_clear = native_pte_clear, 327 .pte_clear = native_pte_clear,
573 .pmd_clear = native_pmd_clear, 328 .pmd_clear = native_pmd_clear,
329
330 .pmd_val = native_pmd_val,
331 .make_pmd = native_make_pmd,
574#endif 332#endif
575 333
334 .pte_val = native_pte_val,
335 .pgd_val = native_pgd_val,
336
337 .make_pte = native_make_pte,
338 .make_pgd = native_make_pgd,
339
576 .irq_enable_sysexit = native_irq_enable_sysexit, 340 .irq_enable_sysexit = native_irq_enable_sysexit,
577 .iret = native_iret, 341 .iret = native_iret,
578 342
579 .startup_ipi_hook = (void *)native_nop, 343 .dup_mmap = paravirt_nop,
344 .exit_mmap = paravirt_nop,
345 .activate_mm = paravirt_nop,
580}; 346};
581 347
582/* 348EXPORT_SYMBOL(paravirt_ops);
583 * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
584 * semantics are subject to change. Hence we only do this
585 * internal-only export of this, until it gets sorted out and
586 * all lowlevel CPU ops used by modules are separately exported.
587 */
588EXPORT_SYMBOL_GPL(paravirt_ops);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 393a67d5d943..61999479b7a4 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -39,6 +39,7 @@
39#include <linux/random.h> 39#include <linux/random.h>
40#include <linux/personality.h> 40#include <linux/personality.h>
41#include <linux/tick.h> 41#include <linux/tick.h>
42#include <linux/percpu.h>
42 43
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44#include <asm/pgtable.h> 45#include <asm/pgtable.h>
@@ -57,7 +58,6 @@
57 58
58#include <asm/tlbflush.h> 59#include <asm/tlbflush.h>
59#include <asm/cpu.h> 60#include <asm/cpu.h>
60#include <asm/pda.h>
61 61
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 63
@@ -66,6 +66,12 @@ static int hlt_counter;
66unsigned long boot_option_idle_override = 0; 66unsigned long boot_option_idle_override = 0;
67EXPORT_SYMBOL(boot_option_idle_override); 67EXPORT_SYMBOL(boot_option_idle_override);
68 68
69DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
70EXPORT_PER_CPU_SYMBOL(current_task);
71
72DEFINE_PER_CPU(int, cpu_number);
73EXPORT_PER_CPU_SYMBOL(cpu_number);
74
69/* 75/*
70 * Return saved PC of a blocked thread. 76 * Return saved PC of a blocked thread.
71 */ 77 */
@@ -272,25 +278,24 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
272 } 278 }
273} 279}
274 280
275static int __init idle_setup (char *str) 281static int __init idle_setup(char *str)
276{ 282{
277 if (!strncmp(str, "poll", 4)) { 283 if (!strcmp(str, "poll")) {
278 printk("using polling idle threads.\n"); 284 printk("using polling idle threads.\n");
279 pm_idle = poll_idle; 285 pm_idle = poll_idle;
280#ifdef CONFIG_X86_SMP 286#ifdef CONFIG_X86_SMP
281 if (smp_num_siblings > 1) 287 if (smp_num_siblings > 1)
282 printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); 288 printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
283#endif 289#endif
284 } else if (!strncmp(str, "halt", 4)) { 290 } else if (!strcmp(str, "mwait"))
285 printk("using halt in idle threads.\n"); 291 force_mwait = 1;
286 pm_idle = default_idle; 292 else
287 } 293 return -1;
288 294
289 boot_option_idle_override = 1; 295 boot_option_idle_override = 1;
290 return 1; 296 return 0;
291} 297}
292 298early_param("idle", idle_setup);
293__setup("idle=", idle_setup);
294 299
295void show_regs(struct pt_regs * regs) 300void show_regs(struct pt_regs * regs)
296{ 301{
@@ -343,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
343 348
344 regs.xds = __USER_DS; 349 regs.xds = __USER_DS;
345 regs.xes = __USER_DS; 350 regs.xes = __USER_DS;
346 regs.xfs = __KERNEL_PDA; 351 regs.xfs = __KERNEL_PERCPU;
347 regs.orig_eax = -1; 352 regs.orig_eax = -1;
348 regs.eip = (unsigned long) kernel_thread_helper; 353 regs.eip = (unsigned long) kernel_thread_helper;
349 regs.xcs = __KERNEL_CS | get_kernel_rpl(); 354 regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -376,7 +381,7 @@ void exit_thread(void)
376 t->io_bitmap_max = 0; 381 t->io_bitmap_max = 0;
377 tss->io_bitmap_owner = NULL; 382 tss->io_bitmap_owner = NULL;
378 tss->io_bitmap_max = 0; 383 tss->io_bitmap_max = 0;
379 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 384 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
380 put_cpu(); 385 put_cpu();
381 } 386 }
382} 387}
@@ -555,7 +560,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
555 * Disable the bitmap via an invalid offset. We still cache 560 * Disable the bitmap via an invalid offset. We still cache
556 * the previous bitmap owner and the IO bitmap contents: 561 * the previous bitmap owner and the IO bitmap contents:
557 */ 562 */
558 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 563 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
559 return; 564 return;
560 } 565 }
561 566
@@ -565,7 +570,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
565 * matches the next task, we dont have to do anything but 570 * matches the next task, we dont have to do anything but
566 * to set a valid offset in the TSS: 571 * to set a valid offset in the TSS:
567 */ 572 */
568 tss->io_bitmap_base = IO_BITMAP_OFFSET; 573 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
569 return; 574 return;
570 } 575 }
571 /* 576 /*
@@ -577,7 +582,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
577 * redundant copies when the currently switched task does not 582 * redundant copies when the currently switched task does not
578 * perform any I/O during its timeslice. 583 * perform any I/O during its timeslice.
579 */ 584 */
580 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; 585 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
581} 586}
582 587
583/* 588/*
@@ -712,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
712 if (prev->gs | next->gs) 717 if (prev->gs | next->gs)
713 loadsegment(gs, next->gs); 718 loadsegment(gs, next->gs);
714 719
715 write_pda(pcurrent, next_p); 720 x86_write_percpu(current_task, next_p);
716 721
717 return prev_p; 722 return prev_p;
718} 723}
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
index 34874c398b44..9f6ab1789bb0 100644
--- a/arch/i386/kernel/quirks.c
+++ b/arch/i386/kernel/quirks.c
@@ -3,12 +3,10 @@
3 */ 3 */
4#include <linux/pci.h> 4#include <linux/pci.h>
5#include <linux/irq.h> 5#include <linux/irq.h>
6#include <asm/pci-direct.h>
7#include <asm/genapic.h>
8#include <asm/cpu.h>
9 6
10#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) 7#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
11static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) 8
9static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
12{ 10{
13 u8 config, rev; 11 u8 config, rev;
14 u32 word; 12 u32 word;
@@ -16,12 +14,14 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
16 /* BIOS may enable hardware IRQ balancing for 14 /* BIOS may enable hardware IRQ balancing for
17 * E7520/E7320/E7525(revision ID 0x9 and below) 15 * E7520/E7320/E7525(revision ID 0x9 and below)
18 * based platforms. 16 * based platforms.
19 * For those platforms, make sure that the genapic is set to 'flat' 17 * Disable SW irqbalance/affinity on those platforms.
20 */ 18 */
21 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); 19 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
22 if (rev > 0x9) 20 if (rev > 0x9)
23 return; 21 return;
24 22
23 printk(KERN_INFO "Intel E7520/7320/7525 detected.");
24
25 /* enable access to config space*/ 25 /* enable access to config space*/
26 pci_read_config_byte(dev, 0xf4, &config); 26 pci_read_config_byte(dev, 0xf4, &config);
27 pci_write_config_byte(dev, 0xf4, config|0x2); 27 pci_write_config_byte(dev, 0xf4, config|0x2);
@@ -30,44 +30,6 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
30 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); 30 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
31 31
32 if (!(word & (1 << 13))) { 32 if (!(word & (1 << 13))) {
33#ifdef CONFIG_X86_64
34 if (genapic != &apic_flat)
35 panic("APIC mode must be flat on this system\n");
36#elif defined(CONFIG_X86_GENERICARCH)
37 if (genapic != &apic_default)
38 panic("APIC mode must be default(flat) on this system. Use apic=default\n");
39#endif
40 }
41
42 /* put back the original value for config space*/
43 if (!(config & 0x2))
44 pci_write_config_byte(dev, 0xf4, config);
45}
46
47void __init quirk_intel_irqbalance(void)
48{
49 u8 config, rev;
50 u32 word;
51
52 /* BIOS may enable hardware IRQ balancing for
53 * E7520/E7320/E7525(revision ID 0x9 and below)
54 * based platforms.
55 * Disable SW irqbalance/affinity on those platforms.
56 */
57 rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
58 if (rev > 0x9)
59 return;
60
61 printk(KERN_INFO "Intel E7520/7320/7525 detected.");
62
63 /* enable access to config space */
64 config = read_pci_config_byte(0, 0, 0, 0xf4);
65 write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
66
67 /* read xTPR register */
68 word = read_pci_config_16(0, 0, 0x40, 0x4c);
69
70 if (!(word & (1 << 13))) {
71 printk(KERN_INFO "Disabling irq balancing and affinity\n"); 33 printk(KERN_INFO "Disabling irq balancing and affinity\n");
72#ifdef CONFIG_IRQBALANCE 34#ifdef CONFIG_IRQBALANCE
73 irqbalance_disable(""); 35 irqbalance_disable("");
@@ -76,24 +38,13 @@ void __init quirk_intel_irqbalance(void)
76#ifdef CONFIG_PROC_FS 38#ifdef CONFIG_PROC_FS
77 no_irq_affinity = 1; 39 no_irq_affinity = 1;
78#endif 40#endif
79#ifdef CONFIG_HOTPLUG_CPU
80 printk(KERN_INFO "Disabling cpu hotplug control\n");
81 enable_cpu_hotplug = 0;
82#endif
83#ifdef CONFIG_X86_64
84 /* force the genapic selection to flat mode so that
85 * interrupts can be redirected to more than one CPU.
86 */
87 genapic_force = &apic_flat;
88#endif
89 } 41 }
90 42
91 /* put back the original value for config space */ 43 /* put back the original value for config space*/
92 if (!(config & 0x2)) 44 if (!(config & 0x2))
93 write_pci_config_byte(0, 0, 0, 0xf4, config); 45 pci_write_config_byte(dev, 0xf4, config);
94} 46}
95DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); 47DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
96DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); 48DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
97DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); 49DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
98
99#endif 50#endif
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 3514b4153f7f..50dfc65319cd 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -17,7 +17,8 @@
17#include <asm/apic.h> 17#include <asm/apic.h>
18#include <asm/desc.h> 18#include <asm/desc.h>
19#include "mach_reboot.h" 19#include "mach_reboot.h"
20#include <linux/reboot_fixups.h> 20#include <asm/reboot_fixups.h>
21#include <asm/reboot.h>
21 22
22/* 23/*
23 * Power off function, if any 24 * Power off function, if any
@@ -197,8 +198,6 @@ static unsigned char jump_to_bios [] =
197 */ 198 */
198void machine_real_restart(unsigned char *code, int length) 199void machine_real_restart(unsigned char *code, int length)
199{ 200{
200 unsigned long flags;
201
202 local_irq_disable(); 201 local_irq_disable();
203 202
204 /* Write zero to CMOS register number 0x0f, which the BIOS POST 203 /* Write zero to CMOS register number 0x0f, which the BIOS POST
@@ -211,9 +210,9 @@ void machine_real_restart(unsigned char *code, int length)
211 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) 210 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
212 */ 211 */
213 212
214 spin_lock_irqsave(&rtc_lock, flags); 213 spin_lock(&rtc_lock);
215 CMOS_WRITE(0x00, 0x8f); 214 CMOS_WRITE(0x00, 0x8f);
216 spin_unlock_irqrestore(&rtc_lock, flags); 215 spin_unlock(&rtc_lock);
217 216
218 /* Remap the kernel at virtual address zero, as well as offset zero 217 /* Remap the kernel at virtual address zero, as well as offset zero
219 from the kernel segment. This assumes the kernel segment starts at 218 from the kernel segment. This assumes the kernel segment starts at
@@ -280,7 +279,7 @@ void machine_real_restart(unsigned char *code, int length)
280EXPORT_SYMBOL(machine_real_restart); 279EXPORT_SYMBOL(machine_real_restart);
281#endif 280#endif
282 281
283void machine_shutdown(void) 282static void native_machine_shutdown(void)
284{ 283{
285#ifdef CONFIG_SMP 284#ifdef CONFIG_SMP
286 int reboot_cpu_id; 285 int reboot_cpu_id;
@@ -316,7 +315,11 @@ void machine_shutdown(void)
316#endif 315#endif
317} 316}
318 317
319void machine_emergency_restart(void) 318void __attribute__((weak)) mach_reboot_fixups(void)
319{
320}
321
322static void native_machine_emergency_restart(void)
320{ 323{
321 if (!reboot_thru_bios) { 324 if (!reboot_thru_bios) {
322 if (efi_enabled) { 325 if (efi_enabled) {
@@ -340,17 +343,17 @@ void machine_emergency_restart(void)
340 machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); 343 machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
341} 344}
342 345
343void machine_restart(char * __unused) 346static void native_machine_restart(char * __unused)
344{ 347{
345 machine_shutdown(); 348 machine_shutdown();
346 machine_emergency_restart(); 349 machine_emergency_restart();
347} 350}
348 351
349void machine_halt(void) 352static void native_machine_halt(void)
350{ 353{
351} 354}
352 355
353void machine_power_off(void) 356static void native_machine_power_off(void)
354{ 357{
355 if (pm_power_off) { 358 if (pm_power_off) {
356 machine_shutdown(); 359 machine_shutdown();
@@ -359,3 +362,35 @@ void machine_power_off(void)
359} 362}
360 363
361 364
365struct machine_ops machine_ops = {
366 .power_off = native_machine_power_off,
367 .shutdown = native_machine_shutdown,
368 .emergency_restart = native_machine_emergency_restart,
369 .restart = native_machine_restart,
370 .halt = native_machine_halt,
371};
372
373void machine_power_off(void)
374{
375 machine_ops.power_off();
376}
377
378void machine_shutdown(void)
379{
380 machine_ops.shutdown();
381}
382
383void machine_emergency_restart(void)
384{
385 machine_ops.emergency_restart();
386}
387
388void machine_restart(char *cmd)
389{
390 machine_ops.restart(cmd);
391}
392
393void machine_halt(void)
394{
395 machine_ops.halt();
396}
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c
index 99aab41a05b0..2d78d918340f 100644
--- a/arch/i386/kernel/reboot_fixups.c
+++ b/arch/i386/kernel/reboot_fixups.c
@@ -10,7 +10,7 @@
10 10
11#include <asm/delay.h> 11#include <asm/delay.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14 14
15static void cs5530a_warm_reset(struct pci_dev *dev) 15static void cs5530a_warm_reset(struct pci_dev *dev)
16{ 16{
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 0e8977871b1f..89a45a9ddcd4 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -165,20 +165,20 @@ void fastcall send_IPI_self(int vector)
165} 165}
166 166
167/* 167/*
168 * This is only used on smaller machines. 168 * This is used to send an IPI with no shorthand notation (the destination is
169 * specified in bits 56 to 63 of the ICR).
169 */ 170 */
170void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 171static inline void __send_IPI_dest_field(unsigned long mask, int vector)
171{ 172{
172 unsigned long mask = cpus_addr(cpumask)[0];
173 unsigned long cfg; 173 unsigned long cfg;
174 unsigned long flags;
175 174
176 local_irq_save(flags);
177 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
178 /* 175 /*
179 * Wait for idle. 176 * Wait for idle.
180 */ 177 */
181 apic_wait_icr_idle(); 178 if (unlikely(vector == NMI_VECTOR))
179 safe_apic_wait_icr_idle();
180 else
181 apic_wait_icr_idle();
182 182
183 /* 183 /*
184 * prepare target chip field 184 * prepare target chip field
@@ -195,13 +195,25 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
195 * Send the IPI. The write to APIC_ICR fires this off. 195 * Send the IPI. The write to APIC_ICR fires this off.
196 */ 196 */
197 apic_write_around(APIC_ICR, cfg); 197 apic_write_around(APIC_ICR, cfg);
198}
199
200/*
201 * This is only used on smaller machines.
202 */
203void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
204{
205 unsigned long mask = cpus_addr(cpumask)[0];
206 unsigned long flags;
198 207
208 local_irq_save(flags);
209 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
210 __send_IPI_dest_field(mask, vector);
199 local_irq_restore(flags); 211 local_irq_restore(flags);
200} 212}
201 213
202void send_IPI_mask_sequence(cpumask_t mask, int vector) 214void send_IPI_mask_sequence(cpumask_t mask, int vector)
203{ 215{
204 unsigned long cfg, flags; 216 unsigned long flags;
205 unsigned int query_cpu; 217 unsigned int query_cpu;
206 218
207 /* 219 /*
@@ -211,30 +223,10 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
211 */ 223 */
212 224
213 local_irq_save(flags); 225 local_irq_save(flags);
214
215 for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { 226 for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
216 if (cpu_isset(query_cpu, mask)) { 227 if (cpu_isset(query_cpu, mask)) {
217 228 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
218 /* 229 vector);
219 * Wait for idle.
220 */
221 apic_wait_icr_idle();
222
223 /*
224 * prepare target chip field
225 */
226 cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
227 apic_write_around(APIC_ICR2, cfg);
228
229 /*
230 * program the ICR
231 */
232 cfg = __prepare_ICR(0, vector);
233
234 /*
235 * Send the IPI. The write to APIC_ICR fires this off.
236 */
237 apic_write_around(APIC_ICR, cfg);
238 } 230 }
239 } 231 }
240 local_irq_restore(flags); 232 local_irq_restore(flags);
@@ -256,7 +248,6 @@ static cpumask_t flush_cpumask;
256static struct mm_struct * flush_mm; 248static struct mm_struct * flush_mm;
257static unsigned long flush_va; 249static unsigned long flush_va;
258static DEFINE_SPINLOCK(tlbstate_lock); 250static DEFINE_SPINLOCK(tlbstate_lock);
259#define FLUSH_ALL 0xffffffff
260 251
261/* 252/*
262 * We cannot call mmdrop() because we are in interrupt context, 253 * We cannot call mmdrop() because we are in interrupt context,
@@ -338,7 +329,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
338 329
339 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { 330 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
340 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { 331 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
341 if (flush_va == FLUSH_ALL) 332 if (flush_va == TLB_FLUSH_ALL)
342 local_flush_tlb(); 333 local_flush_tlb();
343 else 334 else
344 __flush_tlb_one(flush_va); 335 __flush_tlb_one(flush_va);
@@ -353,9 +344,11 @@ out:
353 put_cpu_no_resched(); 344 put_cpu_no_resched();
354} 345}
355 346
356static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 347void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
357 unsigned long va) 348 unsigned long va)
358{ 349{
350 cpumask_t cpumask = *cpumaskp;
351
359 /* 352 /*
360 * A couple of (to be removed) sanity checks: 353 * A couple of (to be removed) sanity checks:
361 * 354 *
@@ -366,10 +359,12 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
366 BUG_ON(cpu_isset(smp_processor_id(), cpumask)); 359 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
367 BUG_ON(!mm); 360 BUG_ON(!mm);
368 361
362#ifdef CONFIG_HOTPLUG_CPU
369 /* If a CPU which we ran on has gone down, OK. */ 363 /* If a CPU which we ran on has gone down, OK. */
370 cpus_and(cpumask, cpumask, cpu_online_map); 364 cpus_and(cpumask, cpumask, cpu_online_map);
371 if (cpus_empty(cpumask)) 365 if (unlikely(cpus_empty(cpumask)))
372 return; 366 return;
367#endif
373 368
374 /* 369 /*
375 * i'm not happy about this global shared spinlock in the 370 * i'm not happy about this global shared spinlock in the
@@ -380,17 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
380 375
381 flush_mm = mm; 376 flush_mm = mm;
382 flush_va = va; 377 flush_va = va;
383#if NR_CPUS <= BITS_PER_LONG 378 cpus_or(flush_cpumask, cpumask, flush_cpumask);
384 atomic_set_mask(cpumask, &flush_cpumask);
385#else
386 {
387 int k;
388 unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
389 unsigned long *cpu_mask = (unsigned long *)&cpumask;
390 for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
391 atomic_set_mask(cpu_mask[k], &flush_mask[k]);
392 }
393#endif
394 /* 379 /*
395 * We have to send the IPI only to 380 * We have to send the IPI only to
396 * CPUs affected. 381 * CPUs affected.
@@ -417,7 +402,7 @@ void flush_tlb_current_task(void)
417 402
418 local_flush_tlb(); 403 local_flush_tlb();
419 if (!cpus_empty(cpu_mask)) 404 if (!cpus_empty(cpu_mask))
420 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 405 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
421 preempt_enable(); 406 preempt_enable();
422} 407}
423 408
@@ -436,7 +421,7 @@ void flush_tlb_mm (struct mm_struct * mm)
436 leave_mm(smp_processor_id()); 421 leave_mm(smp_processor_id());
437 } 422 }
438 if (!cpus_empty(cpu_mask)) 423 if (!cpus_empty(cpu_mask))
439 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 424 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
440 425
441 preempt_enable(); 426 preempt_enable();
442} 427}
@@ -483,7 +468,7 @@ void flush_tlb_all(void)
483 * it goes straight through and wastes no time serializing 468 * it goes straight through and wastes no time serializing
484 * anything. Worst case is that we lose a reschedule ... 469 * anything. Worst case is that we lose a reschedule ...
485 */ 470 */
486void smp_send_reschedule(int cpu) 471void native_smp_send_reschedule(int cpu)
487{ 472{
488 WARN_ON(cpu_is_offline(cpu)); 473 WARN_ON(cpu_is_offline(cpu));
489 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 474 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
@@ -515,36 +500,78 @@ void unlock_ipi_call_lock(void)
515 500
516static struct call_data_struct *call_data; 501static struct call_data_struct *call_data;
517 502
503static void __smp_call_function(void (*func) (void *info), void *info,
504 int nonatomic, int wait)
505{
506 struct call_data_struct data;
507 int cpus = num_online_cpus() - 1;
508
509 if (!cpus)
510 return;
511
512 data.func = func;
513 data.info = info;
514 atomic_set(&data.started, 0);
515 data.wait = wait;
516 if (wait)
517 atomic_set(&data.finished, 0);
518
519 call_data = &data;
520 mb();
521
522 /* Send a message to all other CPUs and wait for them to respond */
523 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
524
525 /* Wait for response */
526 while (atomic_read(&data.started) != cpus)
527 cpu_relax();
528
529 if (wait)
530 while (atomic_read(&data.finished) != cpus)
531 cpu_relax();
532}
533
534
518/** 535/**
519 * smp_call_function(): Run a function on all other CPUs. 536 * smp_call_function_mask(): Run a function on a set of other CPUs.
537 * @mask: The set of cpus to run on. Must not include the current cpu.
520 * @func: The function to run. This must be fast and non-blocking. 538 * @func: The function to run. This must be fast and non-blocking.
521 * @info: An arbitrary pointer to pass to the function. 539 * @info: An arbitrary pointer to pass to the function.
522 * @nonatomic: currently unused.
523 * @wait: If true, wait (atomically) until function has completed on other CPUs. 540 * @wait: If true, wait (atomically) until function has completed on other CPUs.
524 * 541 *
525 * Returns 0 on success, else a negative status code. Does not return until 542 * Returns 0 on success, else a negative status code.
526 * remote CPUs are nearly ready to execute <<func>> or are or have executed. 543 *
544 * If @wait is true, then returns once @func has returned; otherwise
545 * it returns just before the target cpu calls @func.
527 * 546 *
528 * You must not call this function with disabled interrupts or from a 547 * You must not call this function with disabled interrupts or from a
529 * hardware interrupt handler or from a bottom half handler. 548 * hardware interrupt handler or from a bottom half handler.
530 */ 549 */
531int smp_call_function (void (*func) (void *info), void *info, int nonatomic, 550int native_smp_call_function_mask(cpumask_t mask,
532 int wait) 551 void (*func)(void *), void *info,
552 int wait)
533{ 553{
534 struct call_data_struct data; 554 struct call_data_struct data;
555 cpumask_t allbutself;
535 int cpus; 556 int cpus;
536 557
558 /* Can deadlock when called with interrupts disabled */
559 WARN_ON(irqs_disabled());
560
537 /* Holding any lock stops cpus from going down. */ 561 /* Holding any lock stops cpus from going down. */
538 spin_lock(&call_lock); 562 spin_lock(&call_lock);
539 cpus = num_online_cpus() - 1; 563
564 allbutself = cpu_online_map;
565 cpu_clear(smp_processor_id(), allbutself);
566
567 cpus_and(mask, mask, allbutself);
568 cpus = cpus_weight(mask);
569
540 if (!cpus) { 570 if (!cpus) {
541 spin_unlock(&call_lock); 571 spin_unlock(&call_lock);
542 return 0; 572 return 0;
543 } 573 }
544 574
545 /* Can deadlock when called with interrupts disabled */
546 WARN_ON(irqs_disabled());
547
548 data.func = func; 575 data.func = func;
549 data.info = info; 576 data.info = info;
550 atomic_set(&data.started, 0); 577 atomic_set(&data.started, 0);
@@ -554,9 +581,12 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
554 581
555 call_data = &data; 582 call_data = &data;
556 mb(); 583 mb();
557 584
558 /* Send a message to all other CPUs and wait for them to respond */ 585 /* Send a message to other CPUs */
559 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 586 if (cpus_equal(mask, allbutself))
587 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
588 else
589 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
560 590
561 /* Wait for response */ 591 /* Wait for response */
562 while (atomic_read(&data.started) != cpus) 592 while (atomic_read(&data.started) != cpus)
@@ -569,15 +599,68 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
569 599
570 return 0; 600 return 0;
571} 601}
602
603/**
604 * smp_call_function(): Run a function on all other CPUs.
605 * @func: The function to run. This must be fast and non-blocking.
606 * @info: An arbitrary pointer to pass to the function.
607 * @nonatomic: Unused.
608 * @wait: If true, wait (atomically) until function has completed on other CPUs.
609 *
610 * Returns 0 on success, else a negative status code.
611 *
612 * If @wait is true, then returns once @func has returned; otherwise
613 * it returns just before the target cpu calls @func.
614 *
615 * You must not call this function with disabled interrupts or from a
616 * hardware interrupt handler or from a bottom half handler.
617 */
618int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
619 int wait)
620{
621 return smp_call_function_mask(cpu_online_map, func, info, wait);
622}
572EXPORT_SYMBOL(smp_call_function); 623EXPORT_SYMBOL(smp_call_function);
573 624
625/**
626 * smp_call_function_single - Run a function on another CPU
627 * @cpu: The target CPU. Cannot be the calling CPU.
628 * @func: The function to run. This must be fast and non-blocking.
629 * @info: An arbitrary pointer to pass to the function.
630 * @nonatomic: Unused.
631 * @wait: If true, wait until function has completed on other CPUs.
632 *
633 * Returns 0 on success, else a negative status code.
634 *
635 * If @wait is true, then returns once @func has returned; otherwise
636 * it returns just before the target cpu calls @func.
637 */
638int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
639 int nonatomic, int wait)
640{
641 /* prevent preemption and reschedule on another processor */
642 int ret;
643 int me = get_cpu();
644 if (cpu == me) {
645 WARN_ON(1);
646 put_cpu();
647 return -EBUSY;
648 }
649
650 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
651
652 put_cpu();
653 return ret;
654}
655EXPORT_SYMBOL(smp_call_function_single);
656
574static void stop_this_cpu (void * dummy) 657static void stop_this_cpu (void * dummy)
575{ 658{
659 local_irq_disable();
576 /* 660 /*
577 * Remove this CPU: 661 * Remove this CPU:
578 */ 662 */
579 cpu_clear(smp_processor_id(), cpu_online_map); 663 cpu_clear(smp_processor_id(), cpu_online_map);
580 local_irq_disable();
581 disable_local_APIC(); 664 disable_local_APIC();
582 if (cpu_data[smp_processor_id()].hlt_works_ok) 665 if (cpu_data[smp_processor_id()].hlt_works_ok)
583 for(;;) halt(); 666 for(;;) halt();
@@ -588,13 +671,18 @@ static void stop_this_cpu (void * dummy)
588 * this function calls the 'stop' function on all other CPUs in the system. 671 * this function calls the 'stop' function on all other CPUs in the system.
589 */ 672 */
590 673
591void smp_send_stop(void) 674void native_smp_send_stop(void)
592{ 675{
593 smp_call_function(stop_this_cpu, NULL, 1, 0); 676 /* Don't deadlock on the call lock in panic */
677 int nolock = !spin_trylock(&call_lock);
678 unsigned long flags;
594 679
595 local_irq_disable(); 680 local_irq_save(flags);
681 __smp_call_function(stop_this_cpu, NULL, 0, 0);
682 if (!nolock)
683 spin_unlock(&call_lock);
596 disable_local_APIC(); 684 disable_local_APIC();
597 local_irq_enable(); 685 local_irq_restore(flags);
598} 686}
599 687
600/* 688/*
@@ -633,77 +721,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
633 } 721 }
634} 722}
635 723
636/*
637 * this function sends a 'generic call function' IPI to one other CPU
638 * in the system.
639 *
640 * cpu is a standard Linux logical CPU number.
641 */
642static void
643__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
644 int nonatomic, int wait)
645{
646 struct call_data_struct data;
647 int cpus = 1;
648
649 data.func = func;
650 data.info = info;
651 atomic_set(&data.started, 0);
652 data.wait = wait;
653 if (wait)
654 atomic_set(&data.finished, 0);
655
656 call_data = &data;
657 wmb();
658 /* Send a message to all other CPUs and wait for them to respond */
659 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
660
661 /* Wait for response */
662 while (atomic_read(&data.started) != cpus)
663 cpu_relax();
664
665 if (!wait)
666 return;
667
668 while (atomic_read(&data.finished) != cpus)
669 cpu_relax();
670}
671
672/*
673 * smp_call_function_single - Run a function on another CPU
674 * @func: The function to run. This must be fast and non-blocking.
675 * @info: An arbitrary pointer to pass to the function.
676 * @nonatomic: Currently unused.
677 * @wait: If true, wait until function has completed on other CPUs.
678 *
679 * Retrurns 0 on success, else a negative status code.
680 *
681 * Does not return until the remote CPU is nearly ready to execute <func>
682 * or is or has executed.
683 */
684
685int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
686 int nonatomic, int wait)
687{
688 /* prevent preemption and reschedule on another processor */
689 int me = get_cpu();
690 if (cpu == me) {
691 WARN_ON(1);
692 put_cpu();
693 return -EBUSY;
694 }
695
696 /* Can deadlock when called with interrupts disabled */
697 WARN_ON(irqs_disabled());
698
699 spin_lock_bh(&call_lock);
700 __smp_call_function_single(cpu, func, info, nonatomic, wait);
701 spin_unlock_bh(&call_lock);
702 put_cpu();
703 return 0;
704}
705EXPORT_SYMBOL(smp_call_function_single);
706
707static int convert_apicid_to_cpu(int apic_id) 724static int convert_apicid_to_cpu(int apic_id)
708{ 725{
709 int i; 726 int i;
@@ -730,3 +747,14 @@ int safe_smp_processor_id(void)
730 747
731 return cpuid >= 0 ? cpuid : 0; 748 return cpuid >= 0 ? cpuid : 0;
732} 749}
750
751struct smp_ops smp_ops = {
752 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
753 .smp_prepare_cpus = native_smp_prepare_cpus,
754 .cpu_up = native_cpu_up,
755 .smp_cpus_done = native_smp_cpus_done,
756
757 .smp_send_stop = native_smp_send_stop,
758 .smp_send_reschedule = native_smp_send_reschedule,
759 .smp_call_function_mask = native_smp_call_function_mask,
760};
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 4ff55e675576..a4b7ad283f49 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -53,13 +53,12 @@
53#include <asm/desc.h> 53#include <asm/desc.h>
54#include <asm/arch_hooks.h> 54#include <asm/arch_hooks.h>
55#include <asm/nmi.h> 55#include <asm/nmi.h>
56#include <asm/pda.h>
57#include <asm/genapic.h>
58 56
59#include <mach_apic.h> 57#include <mach_apic.h>
60#include <mach_wakecpu.h> 58#include <mach_wakecpu.h>
61#include <smpboot_hooks.h> 59#include <smpboot_hooks.h>
62#include <asm/vmi.h> 60#include <asm/vmi.h>
61#include <asm/mtrr.h>
63 62
64/* Set if we find a B stepping CPU */ 63/* Set if we find a B stepping CPU */
65static int __devinitdata smp_b_stepping; 64static int __devinitdata smp_b_stepping;
@@ -100,6 +99,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
100 99
101u8 apicid_2_node[MAX_APICID]; 100u8 apicid_2_node[MAX_APICID];
102 101
102DEFINE_PER_CPU(unsigned long, this_cpu_off);
103EXPORT_PER_CPU_SYMBOL(this_cpu_off);
104
103/* 105/*
104 * Trampoline 80x86 program as an array. 106 * Trampoline 80x86 program as an array.
105 */ 107 */
@@ -156,7 +158,7 @@ static void __cpuinit smp_store_cpu_info(int id)
156 158
157 *c = boot_cpu_data; 159 *c = boot_cpu_data;
158 if (id!=0) 160 if (id!=0)
159 identify_cpu(c); 161 identify_secondary_cpu(c);
160 /* 162 /*
161 * Mask B, Pentium, but not Pentium MMX 163 * Mask B, Pentium, but not Pentium MMX
162 */ 164 */
@@ -379,14 +381,14 @@ set_cpu_sibling_map(int cpu)
379static void __cpuinit start_secondary(void *unused) 381static void __cpuinit start_secondary(void *unused)
380{ 382{
381 /* 383 /*
382 * Don't put *anything* before secondary_cpu_init(), SMP 384 * Don't put *anything* before cpu_init(), SMP booting is too
383 * booting is too fragile that we want to limit the 385 * fragile that we want to limit the things done here to the
384 * things done here to the most necessary things. 386 * most necessary things.
385 */ 387 */
386#ifdef CONFIG_VMI 388#ifdef CONFIG_VMI
387 vmi_bringup(); 389 vmi_bringup();
388#endif 390#endif
389 secondary_cpu_init(); 391 cpu_init();
390 preempt_disable(); 392 preempt_disable();
391 smp_callin(); 393 smp_callin();
392 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 394 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
@@ -441,12 +443,6 @@ static void __cpuinit start_secondary(void *unused)
441void __devinit initialize_secondary(void) 443void __devinit initialize_secondary(void)
442{ 444{
443 /* 445 /*
444 * switch to the per CPU GDT we already set up
445 * in do_boot_cpu()
446 */
447 cpu_set_gdt(current_thread_info()->cpu);
448
449 /*
450 * We don't actually need to load the full TSS, 446 * We don't actually need to load the full TSS,
451 * basically just the stack pointer and the eip. 447 * basically just the stack pointer and the eip.
452 */ 448 */
@@ -463,7 +459,6 @@ extern struct {
463 void * esp; 459 void * esp;
464 unsigned short ss; 460 unsigned short ss;
465} stack_start; 461} stack_start;
466extern struct i386_pda *start_pda;
467 462
468#ifdef CONFIG_NUMA 463#ifdef CONFIG_NUMA
469 464
@@ -521,12 +516,12 @@ static void unmap_cpu_to_logical_apicid(int cpu)
521 unmap_cpu_to_node(cpu); 516 unmap_cpu_to_node(cpu);
522} 517}
523 518
524#if APIC_DEBUG
525static inline void __inquire_remote_apic(int apicid) 519static inline void __inquire_remote_apic(int apicid)
526{ 520{
527 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 521 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
528 char *names[] = { "ID", "VERSION", "SPIV" }; 522 char *names[] = { "ID", "VERSION", "SPIV" };
529 int timeout, status; 523 int timeout;
524 unsigned long status;
530 525
531 printk("Inquiring remote APIC #%d...\n", apicid); 526 printk("Inquiring remote APIC #%d...\n", apicid);
532 527
@@ -536,7 +531,9 @@ static inline void __inquire_remote_apic(int apicid)
536 /* 531 /*
537 * Wait for idle. 532 * Wait for idle.
538 */ 533 */
539 apic_wait_icr_idle(); 534 status = safe_apic_wait_icr_idle();
535 if (status)
536 printk("a previous APIC delivery may have failed\n");
540 537
541 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 538 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
542 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); 539 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -550,14 +547,13 @@ static inline void __inquire_remote_apic(int apicid)
550 switch (status) { 547 switch (status) {
551 case APIC_ICR_RR_VALID: 548 case APIC_ICR_RR_VALID:
552 status = apic_read(APIC_RRR); 549 status = apic_read(APIC_RRR);
553 printk("%08x\n", status); 550 printk("%lx\n", status);
554 break; 551 break;
555 default: 552 default:
556 printk("failed\n"); 553 printk("failed\n");
557 } 554 }
558 } 555 }
559} 556}
560#endif
561 557
562#ifdef WAKE_SECONDARY_VIA_NMI 558#ifdef WAKE_SECONDARY_VIA_NMI
563/* 559/*
@@ -568,8 +564,8 @@ static inline void __inquire_remote_apic(int apicid)
568static int __devinit 564static int __devinit
569wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 565wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
570{ 566{
571 unsigned long send_status = 0, accept_status = 0; 567 unsigned long send_status, accept_status = 0;
572 int timeout, maxlvt; 568 int maxlvt;
573 569
574 /* Target chip */ 570 /* Target chip */
575 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); 571 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
@@ -579,12 +575,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
579 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 575 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
580 576
581 Dprintk("Waiting for send to finish...\n"); 577 Dprintk("Waiting for send to finish...\n");
582 timeout = 0; 578 send_status = safe_apic_wait_icr_idle();
583 do {
584 Dprintk("+");
585 udelay(100);
586 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
587 } while (send_status && (timeout++ < 1000));
588 579
589 /* 580 /*
590 * Give the other CPU some time to accept the IPI. 581 * Give the other CPU some time to accept the IPI.
@@ -614,8 +605,8 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
614static int __devinit 605static int __devinit
615wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) 606wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
616{ 607{
617 unsigned long send_status = 0, accept_status = 0; 608 unsigned long send_status, accept_status = 0;
618 int maxlvt, timeout, num_starts, j; 609 int maxlvt, num_starts, j;
619 610
620 /* 611 /*
621 * Be paranoid about clearing APIC errors. 612 * Be paranoid about clearing APIC errors.
@@ -640,12 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
640 | APIC_DM_INIT); 631 | APIC_DM_INIT);
641 632
642 Dprintk("Waiting for send to finish...\n"); 633 Dprintk("Waiting for send to finish...\n");
643 timeout = 0; 634 send_status = safe_apic_wait_icr_idle();
644 do {
645 Dprintk("+");
646 udelay(100);
647 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
648 } while (send_status && (timeout++ < 1000));
649 635
650 mdelay(10); 636 mdelay(10);
651 637
@@ -658,12 +644,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
658 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 644 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
659 645
660 Dprintk("Waiting for send to finish...\n"); 646 Dprintk("Waiting for send to finish...\n");
661 timeout = 0; 647 send_status = safe_apic_wait_icr_idle();
662 do {
663 Dprintk("+");
664 udelay(100);
665 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
666 } while (send_status && (timeout++ < 1000));
667 648
668 atomic_set(&init_deasserted, 1); 649 atomic_set(&init_deasserted, 1);
669 650
@@ -719,12 +700,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
719 Dprintk("Startup point 1.\n"); 700 Dprintk("Startup point 1.\n");
720 701
721 Dprintk("Waiting for send to finish...\n"); 702 Dprintk("Waiting for send to finish...\n");
722 timeout = 0; 703 send_status = safe_apic_wait_icr_idle();
723 do {
724 Dprintk("+");
725 udelay(100);
726 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
727 } while (send_status && (timeout++ < 1000));
728 704
729 /* 705 /*
730 * Give the other CPU some time to accept the IPI. 706 * Give the other CPU some time to accept the IPI.
@@ -788,6 +764,25 @@ static inline struct task_struct * alloc_idle_task(int cpu)
788#define alloc_idle_task(cpu) fork_idle(cpu) 764#define alloc_idle_task(cpu) fork_idle(cpu)
789#endif 765#endif
790 766
767/* Initialize the CPU's GDT. This is either the boot CPU doing itself
768 (still using the master per-cpu area), or a CPU doing it for a
769 secondary which will soon come up. */
770static __cpuinit void init_gdt(int cpu)
771{
772 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
773
774 pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
775 (u32 *)&gdt[GDT_ENTRY_PERCPU].b,
776 __per_cpu_offset[cpu], 0xFFFFF,
777 0x80 | DESCTYPE_S | 0x2, 0x8);
778
779 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
780 per_cpu(cpu_number, cpu) = cpu;
781}
782
783/* Defined in head.S */
784extern struct Xgt_desc_struct early_gdt_descr;
785
791static int __cpuinit do_boot_cpu(int apicid, int cpu) 786static int __cpuinit do_boot_cpu(int apicid, int cpu)
792/* 787/*
793 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 788 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -802,6 +797,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
802 unsigned short nmi_high = 0, nmi_low = 0; 797 unsigned short nmi_high = 0, nmi_low = 0;
803 798
804 /* 799 /*
800 * Save current MTRR state in case it was changed since early boot
801 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
802 */
803 mtrr_save_state();
804
805 /*
805 * We can't use kernel_thread since we must avoid to 806 * We can't use kernel_thread since we must avoid to
806 * reschedule the child. 807 * reschedule the child.
807 */ 808 */
@@ -809,13 +810,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
809 if (IS_ERR(idle)) 810 if (IS_ERR(idle))
810 panic("failed fork for CPU %d", cpu); 811 panic("failed fork for CPU %d", cpu);
811 812
812 /* Pre-allocate and initialize the CPU's GDT and PDA so it 813 init_gdt(cpu);
813 doesn't have to do any memory allocation during the 814 per_cpu(current_task, cpu) = idle;
814 delicate CPU-bringup phase. */ 815 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
815 if (!init_gdt(cpu, idle)) {
816 printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
817 return -1; /* ? */
818 }
819 816
820 idle->thread.eip = (unsigned long) start_secondary; 817 idle->thread.eip = (unsigned long) start_secondary;
821 /* start_eip had better be page-aligned! */ 818 /* start_eip had better be page-aligned! */
@@ -941,7 +938,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
941 DECLARE_COMPLETION_ONSTACK(done); 938 DECLARE_COMPLETION_ONSTACK(done);
942 struct warm_boot_cpu_info info; 939 struct warm_boot_cpu_info info;
943 int apicid, ret; 940 int apicid, ret;
944 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
945 941
946 apicid = x86_cpu_to_apicid[cpu]; 942 apicid = x86_cpu_to_apicid[cpu];
947 if (apicid == BAD_APICID) { 943 if (apicid == BAD_APICID) {
@@ -949,18 +945,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
949 goto exit; 945 goto exit;
950 } 946 }
951 947
952 /*
953 * the CPU isn't initialized at boot time, allocate gdt table here.
954 * cpu_init will initialize it
955 */
956 if (!cpu_gdt_descr->address) {
957 cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL);
958 if (!cpu_gdt_descr->address)
959 printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
960 ret = -ENOMEM;
961 goto exit;
962 }
963
964 info.complete = &done; 948 info.complete = &done;
965 info.apicid = apicid; 949 info.apicid = apicid;
966 info.cpu = cpu; 950 info.cpu = cpu;
@@ -1173,7 +1157,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1173 1157
1174/* These are wrappers to interface to the new boot process. Someone 1158/* These are wrappers to interface to the new boot process. Someone
1175 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ 1159 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1176void __init smp_prepare_cpus(unsigned int max_cpus) 1160void __init native_smp_prepare_cpus(unsigned int max_cpus)
1177{ 1161{
1178 smp_commenced_mask = cpumask_of_cpu(0); 1162 smp_commenced_mask = cpumask_of_cpu(0);
1179 cpu_callin_map = cpumask_of_cpu(0); 1163 cpu_callin_map = cpumask_of_cpu(0);
@@ -1181,13 +1165,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
1181 smp_boot_cpus(max_cpus); 1165 smp_boot_cpus(max_cpus);
1182} 1166}
1183 1167
1184void __devinit smp_prepare_boot_cpu(void) 1168void __init native_smp_prepare_boot_cpu(void)
1185{ 1169{
1186 cpu_set(smp_processor_id(), cpu_online_map); 1170 unsigned int cpu = smp_processor_id();
1187 cpu_set(smp_processor_id(), cpu_callout_map); 1171
1188 cpu_set(smp_processor_id(), cpu_present_map); 1172 init_gdt(cpu);
1189 cpu_set(smp_processor_id(), cpu_possible_map); 1173 switch_to_new_gdt();
1190 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 1174
1175 cpu_set(cpu, cpu_online_map);
1176 cpu_set(cpu, cpu_callout_map);
1177 cpu_set(cpu, cpu_present_map);
1178 cpu_set(cpu, cpu_possible_map);
1179 __get_cpu_var(cpu_state) = CPU_ONLINE;
1191} 1180}
1192 1181
1193#ifdef CONFIG_HOTPLUG_CPU 1182#ifdef CONFIG_HOTPLUG_CPU
@@ -1277,7 +1266,7 @@ void __cpu_die(unsigned int cpu)
1277} 1266}
1278#endif /* CONFIG_HOTPLUG_CPU */ 1267#endif /* CONFIG_HOTPLUG_CPU */
1279 1268
1280int __cpuinit __cpu_up(unsigned int cpu) 1269int __cpuinit native_cpu_up(unsigned int cpu)
1281{ 1270{
1282 unsigned long flags; 1271 unsigned long flags;
1283#ifdef CONFIG_HOTPLUG_CPU 1272#ifdef CONFIG_HOTPLUG_CPU
@@ -1319,15 +1308,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
1319 touch_nmi_watchdog(); 1308 touch_nmi_watchdog();
1320 } 1309 }
1321 1310
1322#ifdef CONFIG_X86_GENERICARCH
1323 if (num_online_cpus() > 8 && genapic == &apic_default)
1324 panic("Default flat APIC routing can't be used with > 8 cpus\n");
1325#endif
1326
1327 return 0; 1311 return 0;
1328} 1312}
1329 1313
1330void __init smp_cpus_done(unsigned int max_cpus) 1314void __init native_smp_cpus_done(unsigned int max_cpus)
1331{ 1315{
1332#ifdef CONFIG_X86_IO_APIC 1316#ifdef CONFIG_X86_IO_APIC
1333 setup_ioapic_dest(); 1317 setup_ioapic_dest();
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 13ca54a85a1c..ff4ee6f3326b 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -22,16 +22,26 @@
22#include <asm/msr.h> 22#include <asm/msr.h>
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/unistd.h> 24#include <asm/unistd.h>
25#include <asm/elf.h>
26#include <asm/tlbflush.h>
27
28enum {
29 VDSO_DISABLED = 0,
30 VDSO_ENABLED = 1,
31 VDSO_COMPAT = 2,
32};
33
34#ifdef CONFIG_COMPAT_VDSO
35#define VDSO_DEFAULT VDSO_COMPAT
36#else
37#define VDSO_DEFAULT VDSO_ENABLED
38#endif
25 39
26/* 40/*
27 * Should the kernel map a VDSO page into processes and pass its 41 * Should the kernel map a VDSO page into processes and pass its
28 * address down to glibc upon exec()? 42 * address down to glibc upon exec()?
29 */ 43 */
30#ifdef CONFIG_PARAVIRT 44unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
31unsigned int __read_mostly vdso_enabled = 0;
32#else
33unsigned int __read_mostly vdso_enabled = 1;
34#endif
35 45
36EXPORT_SYMBOL_GPL(vdso_enabled); 46EXPORT_SYMBOL_GPL(vdso_enabled);
37 47
@@ -46,6 +56,123 @@ __setup("vdso=", vdso_setup);
46 56
47extern asmlinkage void sysenter_entry(void); 57extern asmlinkage void sysenter_entry(void);
48 58
59static __init void reloc_symtab(Elf32_Ehdr *ehdr,
60 unsigned offset, unsigned size)
61{
62 Elf32_Sym *sym = (void *)ehdr + offset;
63 unsigned nsym = size / sizeof(*sym);
64 unsigned i;
65
66 for(i = 0; i < nsym; i++, sym++) {
67 if (sym->st_shndx == SHN_UNDEF ||
68 sym->st_shndx == SHN_ABS)
69 continue; /* skip */
70
71 if (sym->st_shndx > SHN_LORESERVE) {
72 printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
73 sym->st_shndx);
74 continue;
75 }
76
77 switch(ELF_ST_TYPE(sym->st_info)) {
78 case STT_OBJECT:
79 case STT_FUNC:
80 case STT_SECTION:
81 case STT_FILE:
82 sym->st_value += VDSO_HIGH_BASE;
83 }
84 }
85}
86
87static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
88{
89 Elf32_Dyn *dyn = (void *)ehdr + offset;
90
91 for(; dyn->d_tag != DT_NULL; dyn++)
92 switch(dyn->d_tag) {
93 case DT_PLTGOT:
94 case DT_HASH:
95 case DT_STRTAB:
96 case DT_SYMTAB:
97 case DT_RELA:
98 case DT_INIT:
99 case DT_FINI:
100 case DT_REL:
101 case DT_DEBUG:
102 case DT_JMPREL:
103 case DT_VERSYM:
104 case DT_VERDEF:
105 case DT_VERNEED:
106 case DT_ADDRRNGLO ... DT_ADDRRNGHI:
107 /* definitely pointers needing relocation */
108 dyn->d_un.d_ptr += VDSO_HIGH_BASE;
109 break;
110
111 case DT_ENCODING ... OLD_DT_LOOS-1:
112 case DT_LOOS ... DT_HIOS-1:
113 /* Tags above DT_ENCODING are pointers if
114 they're even */
115 if (dyn->d_tag >= DT_ENCODING &&
116 (dyn->d_tag & 1) == 0)
117 dyn->d_un.d_ptr += VDSO_HIGH_BASE;
118 break;
119
120 case DT_VERDEFNUM:
121 case DT_VERNEEDNUM:
122 case DT_FLAGS_1:
123 case DT_RELACOUNT:
124 case DT_RELCOUNT:
125 case DT_VALRNGLO ... DT_VALRNGHI:
126 /* definitely not pointers */
127 break;
128
129 case OLD_DT_LOOS ... DT_LOOS-1:
130 case DT_HIOS ... DT_VALRNGLO-1:
131 default:
132 if (dyn->d_tag > DT_ENCODING)
133 printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
134 dyn->d_tag);
135 break;
136 }
137}
138
139static __init void relocate_vdso(Elf32_Ehdr *ehdr)
140{
141 Elf32_Phdr *phdr;
142 Elf32_Shdr *shdr;
143 int i;
144
145 BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
146 !elf_check_arch(ehdr) ||
147 ehdr->e_type != ET_DYN);
148
149 ehdr->e_entry += VDSO_HIGH_BASE;
150
151 /* rebase phdrs */
152 phdr = (void *)ehdr + ehdr->e_phoff;
153 for (i = 0; i < ehdr->e_phnum; i++) {
154 phdr[i].p_vaddr += VDSO_HIGH_BASE;
155
156 /* relocate dynamic stuff */
157 if (phdr[i].p_type == PT_DYNAMIC)
158 reloc_dyn(ehdr, phdr[i].p_offset);
159 }
160
161 /* rebase sections */
162 shdr = (void *)ehdr + ehdr->e_shoff;
163 for(i = 0; i < ehdr->e_shnum; i++) {
164 if (!(shdr[i].sh_flags & SHF_ALLOC))
165 continue;
166
167 shdr[i].sh_addr += VDSO_HIGH_BASE;
168
169 if (shdr[i].sh_type == SHT_SYMTAB ||
170 shdr[i].sh_type == SHT_DYNSYM)
171 reloc_symtab(ehdr, shdr[i].sh_offset,
172 shdr[i].sh_size);
173 }
174}
175
49void enable_sep_cpu(void) 176void enable_sep_cpu(void)
50{ 177{
51 int cpu = get_cpu(); 178 int cpu = get_cpu();
@@ -56,14 +183,33 @@ void enable_sep_cpu(void)
56 return; 183 return;
57 } 184 }
58 185
59 tss->ss1 = __KERNEL_CS; 186 tss->x86_tss.ss1 = __KERNEL_CS;
60 tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; 187 tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
61 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 188 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
62 wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); 189 wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
63 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); 190 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
64 put_cpu(); 191 put_cpu();
65} 192}
66 193
194static struct vm_area_struct gate_vma;
195
196static int __init gate_vma_init(void)
197{
198 gate_vma.vm_mm = NULL;
199 gate_vma.vm_start = FIXADDR_USER_START;
200 gate_vma.vm_end = FIXADDR_USER_END;
201 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
202 gate_vma.vm_page_prot = __P101;
203 /*
204 * Make sure the vDSO gets into every core dump.
205 * Dumping its contents makes post-mortem fully interpretable later
206 * without matching up the same kernel and hardware config to see
207 * what PC values meant.
208 */
209 gate_vma.vm_flags |= VM_ALWAYSDUMP;
210 return 0;
211}
212
67/* 213/*
68 * These symbols are defined by vsyscall.o to mark the bounds 214 * These symbols are defined by vsyscall.o to mark the bounds
69 * of the ELF DSO images included therein. 215 * of the ELF DSO images included therein.
@@ -72,31 +218,48 @@ extern const char vsyscall_int80_start, vsyscall_int80_end;
72extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 218extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
73static struct page *syscall_pages[1]; 219static struct page *syscall_pages[1];
74 220
221static void map_compat_vdso(int map)
222{
223 static int vdso_mapped;
224
225 if (map == vdso_mapped)
226 return;
227
228 vdso_mapped = map;
229
230 __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
231 map ? PAGE_READONLY_EXEC : PAGE_NONE);
232
233 /* flush stray tlbs */
234 flush_tlb_all();
235}
236
75int __init sysenter_setup(void) 237int __init sysenter_setup(void)
76{ 238{
77 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); 239 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
240 const void *vsyscall;
241 size_t vsyscall_len;
242
78 syscall_pages[0] = virt_to_page(syscall_page); 243 syscall_pages[0] = virt_to_page(syscall_page);
79 244
80#ifdef CONFIG_COMPAT_VDSO 245 gate_vma_init();
81 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); 246
82 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); 247 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
83#endif
84 248
85 if (!boot_cpu_has(X86_FEATURE_SEP)) { 249 if (!boot_cpu_has(X86_FEATURE_SEP)) {
86 memcpy(syscall_page, 250 vsyscall = &vsyscall_int80_start;
87 &vsyscall_int80_start, 251 vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
88 &vsyscall_int80_end - &vsyscall_int80_start); 252 } else {
89 return 0; 253 vsyscall = &vsyscall_sysenter_start;
254 vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
90 } 255 }
91 256
92 memcpy(syscall_page, 257 memcpy(syscall_page, vsyscall, vsyscall_len);
93 &vsyscall_sysenter_start, 258 relocate_vdso(syscall_page);
94 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
95 259
96 return 0; 260 return 0;
97} 261}
98 262
99#ifndef CONFIG_COMPAT_VDSO
100/* Defined in vsyscall-sysenter.S */ 263/* Defined in vsyscall-sysenter.S */
101extern void SYSENTER_RETURN; 264extern void SYSENTER_RETURN;
102 265
@@ -105,36 +268,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
105{ 268{
106 struct mm_struct *mm = current->mm; 269 struct mm_struct *mm = current->mm;
107 unsigned long addr; 270 unsigned long addr;
108 int ret; 271 int ret = 0;
272 bool compat;
109 273
110 down_write(&mm->mmap_sem); 274 down_write(&mm->mmap_sem);
111 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
112 if (IS_ERR_VALUE(addr)) {
113 ret = addr;
114 goto up_fail;
115 }
116 275
117 /* 276 /* Test compat mode once here, in case someone
118 * MAYWRITE to allow gdb to COW and set breakpoints 277 changes it via sysctl */
119 * 278 compat = (vdso_enabled == VDSO_COMPAT);
120 * Make sure the vDSO gets into every core dump. 279
121 * Dumping its contents makes post-mortem fully interpretable later 280 map_compat_vdso(compat);
122 * without matching up the same kernel and hardware config to see 281
123 * what PC values meant. 282 if (compat)
124 */ 283 addr = VDSO_HIGH_BASE;
125 ret = install_special_mapping(mm, addr, PAGE_SIZE, 284 else {
126 VM_READ|VM_EXEC| 285 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
127 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 286 if (IS_ERR_VALUE(addr)) {
128 VM_ALWAYSDUMP, 287 ret = addr;
129 syscall_pages); 288 goto up_fail;
130 if (ret) 289 }
131 goto up_fail; 290
291 /*
292 * MAYWRITE to allow gdb to COW and set breakpoints
293 *
294 * Make sure the vDSO gets into every core dump.
295 * Dumping its contents makes post-mortem fully
296 * interpretable later without matching up the same
297 * kernel and hardware config to see what PC values
298 * meant.
299 */
300 ret = install_special_mapping(mm, addr, PAGE_SIZE,
301 VM_READ|VM_EXEC|
302 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
303 VM_ALWAYSDUMP,
304 syscall_pages);
305
306 if (ret)
307 goto up_fail;
308 }
132 309
133 current->mm->context.vdso = (void *)addr; 310 current->mm->context.vdso = (void *)addr;
134 current_thread_info()->sysenter_return = 311 current_thread_info()->sysenter_return =
135 (void *)VDSO_SYM(&SYSENTER_RETURN); 312 (void *)VDSO_SYM(&SYSENTER_RETURN);
136up_fail: 313
314 up_fail:
137 up_write(&mm->mmap_sem); 315 up_write(&mm->mmap_sem);
316
138 return ret; 317 return ret;
139} 318}
140 319
@@ -147,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma)
147 326
148struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 327struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
149{ 328{
329 struct mm_struct *mm = tsk->mm;
330
331 /* Check to see if this task was created in compat vdso mode */
332 if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
333 return &gate_vma;
150 return NULL; 334 return NULL;
151} 335}
152 336
@@ -159,4 +343,3 @@ int in_gate_area_no_task(unsigned long addr)
159{ 343{
160 return 0; 344 return 0;
161} 345}
162#endif
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 94e5cb091104..a665df61f08c 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -70,8 +70,6 @@
70 70
71#include <asm/i8259.h> 71#include <asm/i8259.h>
72 72
73int pit_latch_buggy; /* extern */
74
75#include "do_timer.h" 73#include "do_timer.h"
76 74
77unsigned int cpu_khz; /* Detected as we calibrate the TSC */ 75unsigned int cpu_khz; /* Detected as we calibrate the TSC */
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
index 2f1814c5cfd7..f62815f8d06a 100644
--- a/arch/i386/kernel/trampoline.S
+++ b/arch/i386/kernel/trampoline.S
@@ -29,7 +29,7 @@
29 * 29 *
30 * TYPE VALUE 30 * TYPE VALUE
31 * R_386_32 startup_32_smp 31 * R_386_32 startup_32_smp
32 * R_386_32 boot_gdt_table 32 * R_386_32 boot_gdt
33 */ 33 */
34 34
35#include <linux/linkage.h> 35#include <linux/linkage.h>
@@ -62,8 +62,8 @@ r_base = .
62 * to 32 bit. 62 * to 32 bit.
63 */ 63 */
64 64
65 lidtl boot_idt - r_base # load idt with 0, 0 65 lidtl boot_idt_descr - r_base # load idt with 0, 0
66 lgdtl boot_gdt - r_base # load gdt with whatever is appropriate 66 lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate
67 67
68 xor %ax, %ax 68 xor %ax, %ax
69 inc %ax # protected mode (PE) bit 69 inc %ax # protected mode (PE) bit
@@ -73,11 +73,11 @@ r_base = .
73 73
74 # These need to be in the same 64K segment as the above; 74 # These need to be in the same 64K segment as the above;
75 # hence we don't use the boot_gdt_descr defined in head.S 75 # hence we don't use the boot_gdt_descr defined in head.S
76boot_gdt: 76boot_gdt_descr:
77 .word __BOOT_DS + 7 # gdt limit 77 .word __BOOT_DS + 7 # gdt limit
78 .long boot_gdt_table-__PAGE_OFFSET # gdt base 78 .long boot_gdt - __PAGE_OFFSET # gdt base
79 79
80boot_idt: 80boot_idt_descr:
81 .word 0 # idt limit = 0 81 .word 0 # idt limit = 0
82 .long 0 # idt base = 0L 82 .long 0 # idt base = 0L
83 83
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index af0d3f70a817..f21b41e7770c 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -476,8 +476,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
476 siginfo_t *info) 476 siginfo_t *info)
477{ 477{
478 struct task_struct *tsk = current; 478 struct task_struct *tsk = current;
479 tsk->thread.error_code = error_code;
480 tsk->thread.trap_no = trapnr;
481 479
482 if (regs->eflags & VM_MASK) { 480 if (regs->eflags & VM_MASK) {
483 if (vm86) 481 if (vm86)
@@ -489,6 +487,18 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
489 goto kernel_trap; 487 goto kernel_trap;
490 488
491 trap_signal: { 489 trap_signal: {
490 /*
491 * We want error_code and trap_no set for userspace faults and
492 * kernelspace faults which result in die(), but not
493 * kernelspace faults which are fixed up. die() gives the
494 * process no chance to handle the signal and notice the
495 * kernel fault information, so that won't result in polluting
496 * the information about previously queued, but not yet
497 * delivered, faults. See also do_general_protection below.
498 */
499 tsk->thread.error_code = error_code;
500 tsk->thread.trap_no = trapnr;
501
492 if (info) 502 if (info)
493 force_sig_info(signr, info, tsk); 503 force_sig_info(signr, info, tsk);
494 else 504 else
@@ -497,8 +507,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
497 } 507 }
498 508
499 kernel_trap: { 509 kernel_trap: {
500 if (!fixup_exception(regs)) 510 if (!fixup_exception(regs)) {
511 tsk->thread.error_code = error_code;
512 tsk->thread.trap_no = trapnr;
501 die(str, regs, error_code); 513 die(str, regs, error_code);
514 }
502 return; 515 return;
503 } 516 }
504 517
@@ -583,7 +596,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
583 * and we set the offset field correctly. Then we let the CPU to 596 * and we set the offset field correctly. Then we let the CPU to
584 * restart the faulting instruction. 597 * restart the faulting instruction.
585 */ 598 */
586 if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && 599 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
587 thread->io_bitmap_ptr) { 600 thread->io_bitmap_ptr) {
588 memcpy(tss->io_bitmap, thread->io_bitmap_ptr, 601 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
589 thread->io_bitmap_max); 602 thread->io_bitmap_max);
@@ -596,16 +609,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
596 thread->io_bitmap_max, 0xff, 609 thread->io_bitmap_max, 0xff,
597 tss->io_bitmap_max - thread->io_bitmap_max); 610 tss->io_bitmap_max - thread->io_bitmap_max);
598 tss->io_bitmap_max = thread->io_bitmap_max; 611 tss->io_bitmap_max = thread->io_bitmap_max;
599 tss->io_bitmap_base = IO_BITMAP_OFFSET; 612 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
600 tss->io_bitmap_owner = thread; 613 tss->io_bitmap_owner = thread;
601 put_cpu(); 614 put_cpu();
602 return; 615 return;
603 } 616 }
604 put_cpu(); 617 put_cpu();
605 618
606 current->thread.error_code = error_code;
607 current->thread.trap_no = 13;
608
609 if (regs->eflags & VM_MASK) 619 if (regs->eflags & VM_MASK)
610 goto gp_in_vm86; 620 goto gp_in_vm86;
611 621
@@ -624,6 +634,8 @@ gp_in_vm86:
624 634
625gp_in_kernel: 635gp_in_kernel:
626 if (!fixup_exception(regs)) { 636 if (!fixup_exception(regs)) {
637 current->thread.error_code = error_code;
638 current->thread.trap_no = 13;
627 if (notify_die(DIE_GPF, "general protection fault", regs, 639 if (notify_die(DIE_GPF, "general protection fault", regs,
628 error_code, 13, SIGSEGV) == NOTIFY_STOP) 640 error_code, 13, SIGSEGV) == NOTIFY_STOP)
629 return; 641 return;
@@ -1018,9 +1030,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
1018fastcall unsigned long patch_espfix_desc(unsigned long uesp, 1030fastcall unsigned long patch_espfix_desc(unsigned long uesp,
1019 unsigned long kesp) 1031 unsigned long kesp)
1020{ 1032{
1021 int cpu = smp_processor_id(); 1033 struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
1022 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
1023 struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
1024 unsigned long base = (kesp - uesp) & -THREAD_SIZE; 1034 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
1025 unsigned long new_kesp = kesp - base; 1035 unsigned long new_kesp = kesp - base;
1026 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; 1036 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 6cb8f5336732..f64b81f3033b 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -200,13 +200,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
200{ 200{
201 struct cpufreq_freqs *freq = data; 201 struct cpufreq_freqs *freq = data;
202 202
203 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
204 write_seqlock_irq(&xtime_lock);
205
206 if (!ref_freq) { 203 if (!ref_freq) {
207 if (!freq->old){ 204 if (!freq->old){
208 ref_freq = freq->new; 205 ref_freq = freq->new;
209 goto end; 206 return 0;
210 } 207 }
211 ref_freq = freq->old; 208 ref_freq = freq->old;
212 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; 209 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
@@ -233,13 +230,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
233 * TSC based sched_clock turns 230 * TSC based sched_clock turns
234 * to junk w/ cpufreq 231 * to junk w/ cpufreq
235 */ 232 */
236 mark_tsc_unstable(); 233 mark_tsc_unstable("cpufreq changes");
237 } 234 }
238 } 235 }
239 } 236 }
240end:
241 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
242 write_sequnlock_irq(&xtime_lock);
243 237
244 return 0; 238 return 0;
245} 239}
@@ -281,11 +275,12 @@ static struct clocksource clocksource_tsc = {
281 CLOCK_SOURCE_MUST_VERIFY, 275 CLOCK_SOURCE_MUST_VERIFY,
282}; 276};
283 277
284void mark_tsc_unstable(void) 278void mark_tsc_unstable(char *reason)
285{ 279{
286 if (!tsc_unstable) { 280 if (!tsc_unstable) {
287 tsc_unstable = 1; 281 tsc_unstable = 1;
288 tsc_enabled = 0; 282 tsc_enabled = 0;
283 printk("Marking TSC unstable due to: %s.\n", reason);
289 /* Can be called before registration */ 284 /* Can be called before registration */
290 if (clocksource_tsc.mult) 285 if (clocksource_tsc.mult)
291 clocksource_change_rating(&clocksource_tsc, 0); 286 clocksource_change_rating(&clocksource_tsc, 0);
diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S
new file mode 100644
index 000000000000..e51a8695d54e
--- /dev/null
+++ b/arch/i386/kernel/verify_cpu.S
@@ -0,0 +1,65 @@
1/* Check if CPU has some minimum CPUID bits
2 This runs in 16bit mode so that the caller can still use the BIOS
3 to output errors on the screen */
4#include <asm/cpufeature.h>
5
6verify_cpu:
7 pushfl # Save caller passed flags
8 pushl $0 # Kill any dangerous flags
9 popfl
10
11#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4
12 pushfl
13 orl $(1<<18),(%esp) # try setting AC
14 popfl
15 pushfl
16 popl %eax
17 testl $(1<<18),%eax
18 jz bad
19#endif
20#if REQUIRED_MASK1 != 0
21 pushfl # standard way to check for cpuid
22 popl %eax
23 movl %eax,%ebx
24 xorl $0x200000,%eax
25 pushl %eax
26 popfl
27 pushfl
28 popl %eax
29 cmpl %eax,%ebx
30 pushfl # standard way to check for cpuid
31 popl %eax
32 movl %eax,%ebx
33 xorl $0x200000,%eax
34 pushl %eax
35 popfl
36 pushfl
37 popl %eax
38 cmpl %eax,%ebx
39 jz bad # REQUIRED_MASK1 != 0 requires CPUID
40
41 movl $0x0,%eax # See if cpuid 1 is implemented
42 cpuid
43 cmpl $0x1,%eax
44 jb bad # no cpuid 1
45
46 movl $0x1,%eax # Does the cpu have what it takes
47 cpuid
48
49#if CONFIG_X86_MINIMUM_CPU_MODEL > 4
50#error add proper model checking here
51#endif
52
53 andl $REQUIRED_MASK1,%edx
54 xorl $REQUIRED_MASK1,%edx
55 jnz bad
56#endif /* REQUIRED_MASK1 */
57
58 popfl
59 xor %eax,%eax
60 ret
61
62bad:
63 popfl
64 movl $1,%eax
65 ret
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 697a70e8c0c9..c8726c424b35 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -26,6 +26,7 @@
26#include <linux/cpu.h> 26#include <linux/cpu.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/highmem.h>
29#include <asm/vmi.h> 30#include <asm/vmi.h>
30#include <asm/io.h> 31#include <asm/io.h>
31#include <asm/fixmap.h> 32#include <asm/fixmap.h>
@@ -56,7 +57,7 @@ static int disable_noidle;
56static int disable_vmi_timer; 57static int disable_vmi_timer;
57 58
58/* Cached VMI operations */ 59/* Cached VMI operations */
59struct { 60static struct {
60 void (*cpuid)(void /* non-c */); 61 void (*cpuid)(void /* non-c */);
61 void (*_set_ldt)(u32 selector); 62 void (*_set_ldt)(u32 selector);
62 void (*set_tr)(u32 selector); 63 void (*set_tr)(u32 selector);
@@ -65,16 +66,15 @@ struct {
65 void (*release_page)(u32, u32); 66 void (*release_page)(u32, u32);
66 void (*set_pte)(pte_t, pte_t *, unsigned); 67 void (*set_pte)(pte_t, pte_t *, unsigned);
67 void (*update_pte)(pte_t *, unsigned); 68 void (*update_pte)(pte_t *, unsigned);
68 void (*set_linear_mapping)(int, u32, u32, u32); 69 void (*set_linear_mapping)(int, void *, u32, u32);
69 void (*flush_tlb)(int); 70 void (*_flush_tlb)(int);
70 void (*set_initial_ap_state)(int, int); 71 void (*set_initial_ap_state)(int, int);
71 void (*halt)(void); 72 void (*halt)(void);
72 void (*set_lazy_mode)(int mode); 73 void (*set_lazy_mode)(int mode);
73} vmi_ops; 74} vmi_ops;
74 75
75/* XXX move this to alternative.h */ 76/* Cached VMI operations */
76extern struct paravirt_patch __start_parainstructions[], 77struct vmi_timer_ops vmi_timer_ops;
77 __stop_parainstructions[];
78 78
79/* 79/*
80 * VMI patching routines. 80 * VMI patching routines.
@@ -83,11 +83,6 @@ extern struct paravirt_patch __start_parainstructions[],
83#define MNEM_JMP 0xe9 83#define MNEM_JMP 0xe9
84#define MNEM_RET 0xc3 84#define MNEM_RET 0xc3
85 85
86static char irq_save_disable_callout[] = {
87 MNEM_CALL, 0, 0, 0, 0,
88 MNEM_CALL, 0, 0, 0, 0,
89 MNEM_RET
90};
91#define IRQ_PATCH_INT_MASK 0 86#define IRQ_PATCH_INT_MASK 0
92#define IRQ_PATCH_DISABLE 5 87#define IRQ_PATCH_DISABLE 5
93 88
@@ -135,33 +130,17 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
135static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) 130static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len)
136{ 131{
137 switch (type) { 132 switch (type) {
138 case PARAVIRT_IRQ_DISABLE: 133 case PARAVIRT_PATCH(irq_disable):
139 return patch_internal(VMI_CALL_DisableInterrupts, len, insns); 134 return patch_internal(VMI_CALL_DisableInterrupts, len, insns);
140 case PARAVIRT_IRQ_ENABLE: 135 case PARAVIRT_PATCH(irq_enable):
141 return patch_internal(VMI_CALL_EnableInterrupts, len, insns); 136 return patch_internal(VMI_CALL_EnableInterrupts, len, insns);
142 case PARAVIRT_RESTORE_FLAGS: 137 case PARAVIRT_PATCH(restore_fl):
143 return patch_internal(VMI_CALL_SetInterruptMask, len, insns); 138 return patch_internal(VMI_CALL_SetInterruptMask, len, insns);
144 case PARAVIRT_SAVE_FLAGS: 139 case PARAVIRT_PATCH(save_fl):
145 return patch_internal(VMI_CALL_GetInterruptMask, len, insns); 140 return patch_internal(VMI_CALL_GetInterruptMask, len, insns);
146 case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE: 141 case PARAVIRT_PATCH(iret):
147 if (len >= 10) {
148 patch_internal(VMI_CALL_GetInterruptMask, len, insns);
149 patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5);
150 return 10;
151 } else {
152 /*
153 * You bastards didn't leave enough room to
154 * patch save_flags_irq_disable inline. Patch
155 * to a helper
156 */
157 BUG_ON(len < 5);
158 *(char *)insns = MNEM_CALL;
159 patch_offset(insns, irq_save_disable_callout);
160 return 5;
161 }
162 case PARAVIRT_INTERRUPT_RETURN:
163 return patch_internal(VMI_CALL_IRET, len, insns); 142 return patch_internal(VMI_CALL_IRET, len, insns);
164 case PARAVIRT_STI_SYSEXIT: 143 case PARAVIRT_PATCH(irq_enable_sysexit):
165 return patch_internal(VMI_CALL_SYSEXIT, len, insns); 144 return patch_internal(VMI_CALL_SYSEXIT, len, insns);
166 default: 145 default:
167 break; 146 break;
@@ -230,24 +209,24 @@ static void vmi_set_tr(void)
230static void vmi_load_esp0(struct tss_struct *tss, 209static void vmi_load_esp0(struct tss_struct *tss,
231 struct thread_struct *thread) 210 struct thread_struct *thread)
232{ 211{
233 tss->esp0 = thread->esp0; 212 tss->x86_tss.esp0 = thread->esp0;
234 213
235 /* This can only happen when SEP is enabled, no need to test "SEP"arately */ 214 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
236 if (unlikely(tss->ss1 != thread->sysenter_cs)) { 215 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
237 tss->ss1 = thread->sysenter_cs; 216 tss->x86_tss.ss1 = thread->sysenter_cs;
238 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 217 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
239 } 218 }
240 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0); 219 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0);
241} 220}
242 221
243static void vmi_flush_tlb_user(void) 222static void vmi_flush_tlb_user(void)
244{ 223{
245 vmi_ops.flush_tlb(VMI_FLUSH_TLB); 224 vmi_ops._flush_tlb(VMI_FLUSH_TLB);
246} 225}
247 226
248static void vmi_flush_tlb_kernel(void) 227static void vmi_flush_tlb_kernel(void)
249{ 228{
250 vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); 229 vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
251} 230}
252 231
253/* Stub to do nothing at all; used for delays and unimplemented calls */ 232/* Stub to do nothing at all; used for delays and unimplemented calls */
@@ -255,18 +234,6 @@ static void vmi_nop(void)
255{ 234{
256} 235}
257 236
258/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
259static fastcall void vmi_safe_halt(void)
260{
261 int idle = vmi_stop_hz_timer();
262 vmi_ops.halt();
263 if (idle) {
264 local_irq_disable();
265 vmi_account_time_restart_hz_timer();
266 local_irq_enable();
267 }
268}
269
270#ifdef CONFIG_DEBUG_PAGE_TYPE 237#ifdef CONFIG_DEBUG_PAGE_TYPE
271 238
272#ifdef CONFIG_X86_PAE 239#ifdef CONFIG_X86_PAE
@@ -370,8 +337,11 @@ static void vmi_check_page_type(u32 pfn, int type)
370#define vmi_check_page_type(p,t) do { } while (0) 337#define vmi_check_page_type(p,t) do { } while (0)
371#endif 338#endif
372 339
373static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) 340#ifdef CONFIG_HIGHPTE
341static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
374{ 342{
343 void *va = kmap_atomic(page, type);
344
375 /* 345 /*
376 * Internally, the VMI ROM must map virtual addresses to physical 346 * Internally, the VMI ROM must map virtual addresses to physical
377 * addresses for processing MMU updates. By the time MMU updates 347 * addresses for processing MMU updates. By the time MMU updates
@@ -385,8 +355,11 @@ static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
385 * args: SLOT VA COUNT PFN 355 * args: SLOT VA COUNT PFN
386 */ 356 */
387 BUG_ON(type != KM_PTE0 && type != KM_PTE1); 357 BUG_ON(type != KM_PTE0 && type != KM_PTE1);
388 vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn); 358 vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
359
360 return va;
389} 361}
362#endif
390 363
391static void vmi_allocate_pt(u32 pfn) 364static void vmi_allocate_pt(u32 pfn)
392{ 365{
@@ -443,13 +416,13 @@ static void vmi_release_pd(u32 pfn)
443 ((level) | (is_current_as(mm, user) ? \ 416 ((level) | (is_current_as(mm, user) ? \
444 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) 417 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
445 418
446static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep) 419static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
447{ 420{
448 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); 421 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
449 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 422 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
450} 423}
451 424
452static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) 425static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
453{ 426{
454 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); 427 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
455 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); 428 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
@@ -462,7 +435,7 @@ static void vmi_set_pte(pte_t *ptep, pte_t pte)
462 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); 435 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
463} 436}
464 437
465static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) 438static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
466{ 439{
467 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); 440 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
468 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 441 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
@@ -516,7 +489,7 @@ static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
516 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 489 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
517} 490}
518 491
519void vmi_pmd_clear(pmd_t *pmd) 492static void vmi_pmd_clear(pmd_t *pmd)
520{ 493{
521 const pte_t pte = { 0 }; 494 const pte_t pte = { 0 };
522 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); 495 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
@@ -525,8 +498,6 @@ void vmi_pmd_clear(pmd_t *pmd)
525#endif 498#endif
526 499
527#ifdef CONFIG_SMP 500#ifdef CONFIG_SMP
528extern void setup_pda(void);
529
530static void __devinit 501static void __devinit
531vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, 502vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
532 unsigned long start_esp) 503 unsigned long start_esp)
@@ -551,13 +522,11 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
551 522
552 ap.ds = __USER_DS; 523 ap.ds = __USER_DS;
553 ap.es = __USER_DS; 524 ap.es = __USER_DS;
554 ap.fs = __KERNEL_PDA; 525 ap.fs = __KERNEL_PERCPU;
555 ap.gs = 0; 526 ap.gs = 0;
556 527
557 ap.eflags = 0; 528 ap.eflags = 0;
558 529
559 setup_pda();
560
561#ifdef CONFIG_X86_PAE 530#ifdef CONFIG_X86_PAE
562 /* efer should match BSP efer. */ 531 /* efer should match BSP efer. */
563 if (cpu_has_nx) { 532 if (cpu_has_nx) {
@@ -575,9 +544,9 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
575} 544}
576#endif 545#endif
577 546
578static void vmi_set_lazy_mode(int mode) 547static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
579{ 548{
580 static DEFINE_PER_CPU(int, lazy_mode); 549 static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode);
581 550
582 if (!vmi_ops.set_lazy_mode) 551 if (!vmi_ops.set_lazy_mode)
583 return; 552 return;
@@ -685,7 +654,7 @@ void vmi_bringup(void)
685{ 654{
686 /* We must establish the lowmem mapping for MMU ops to work */ 655 /* We must establish the lowmem mapping for MMU ops to work */
687 if (vmi_ops.set_linear_mapping) 656 if (vmi_ops.set_linear_mapping)
688 vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); 657 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
689} 658}
690 659
691/* 660/*
@@ -740,7 +709,6 @@ do { \
740 } \ 709 } \
741} while (0) 710} while (0)
742 711
743
744/* 712/*
745 * Activate the VMI interface and switch into paravirtualized mode 713 * Activate the VMI interface and switch into paravirtualized mode
746 */ 714 */
@@ -796,12 +764,6 @@ static inline int __init activate_vmi(void)
796 para_fill(irq_disable, DisableInterrupts); 764 para_fill(irq_disable, DisableInterrupts);
797 para_fill(irq_enable, EnableInterrupts); 765 para_fill(irq_enable, EnableInterrupts);
798 766
799 /* irq_save_disable !!! sheer pain */
800 patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
801 (char *)paravirt_ops.save_fl);
802 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
803 (char *)paravirt_ops.irq_disable);
804
805 para_fill(wbinvd, WBINVD); 767 para_fill(wbinvd, WBINVD);
806 para_fill(read_tsc, RDTSC); 768 para_fill(read_tsc, RDTSC);
807 769
@@ -831,8 +793,8 @@ static inline int __init activate_vmi(void)
831 para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); 793 para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
832 794
833 /* user and kernel flush are just handled with different flags to FlushTLB */ 795 /* user and kernel flush are just handled with different flags to FlushTLB */
834 para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); 796 para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
835 para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); 797 para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
836 para_fill(flush_tlb_single, InvalPage); 798 para_fill(flush_tlb_single, InvalPage);
837 799
838 /* 800 /*
@@ -878,8 +840,13 @@ static inline int __init activate_vmi(void)
878 paravirt_ops.release_pt = vmi_release_pt; 840 paravirt_ops.release_pt = vmi_release_pt;
879 paravirt_ops.release_pd = vmi_release_pd; 841 paravirt_ops.release_pd = vmi_release_pd;
880 } 842 }
881 para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, 843
882 SetLinearMapping); 844 /* Set linear is needed in all cases */
845 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
846#ifdef CONFIG_HIGHPTE
847 if (vmi_ops.set_linear_mapping)
848 paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
849#endif
883 850
884 /* 851 /*
885 * These MUST always be patched. Don't support indirect jumps 852 * These MUST always be patched. Don't support indirect jumps
@@ -920,8 +887,8 @@ static inline int __init activate_vmi(void)
920 paravirt_ops.get_wallclock = vmi_get_wallclock; 887 paravirt_ops.get_wallclock = vmi_get_wallclock;
921 paravirt_ops.set_wallclock = vmi_set_wallclock; 888 paravirt_ops.set_wallclock = vmi_set_wallclock;
922#ifdef CONFIG_X86_LOCAL_APIC 889#ifdef CONFIG_X86_LOCAL_APIC
923 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; 890 paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
924 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; 891 paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
925#endif 892#endif
926 paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; 893 paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
927 paravirt_ops.get_cpu_khz = vmi_cpu_khz; 894 paravirt_ops.get_cpu_khz = vmi_cpu_khz;
@@ -933,11 +900,7 @@ static inline int __init activate_vmi(void)
933 disable_vmi_timer = 1; 900 disable_vmi_timer = 1;
934 } 901 }
935 902
936 /* No idle HZ mode only works if VMI timer and no idle is enabled */ 903 para_fill(safe_halt, Halt);
937 if (disable_noidle || disable_vmi_timer)
938 para_fill(safe_halt, Halt);
939 else
940 para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
941 904
942 /* 905 /*
943 * Alternative instruction rewriting doesn't happen soon enough 906 * Alternative instruction rewriting doesn't happen soon enough
@@ -945,7 +908,7 @@ static inline int __init activate_vmi(void)
945 * to do this before IRQs get reenabled. Fortunately, it is 908 * to do this before IRQs get reenabled. Fortunately, it is
946 * idempotent. 909 * idempotent.
947 */ 910 */
948 apply_paravirt(__start_parainstructions, __stop_parainstructions); 911 apply_paravirt(__parainstructions, __parainstructions_end);
949 912
950 vmi_bringup(); 913 vmi_bringup();
951 914
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
new file mode 100644
index 000000000000..26a37f8a8762
--- /dev/null
+++ b/arch/i386/kernel/vmiclock.c
@@ -0,0 +1,318 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2007, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 */
22
23#include <linux/smp.h>
24#include <linux/interrupt.h>
25#include <linux/cpumask.h>
26#include <linux/clocksource.h>
27#include <linux/clockchips.h>
28
29#include <asm/vmi.h>
30#include <asm/vmi_time.h>
31#include <asm/arch_hooks.h>
32#include <asm/apicdef.h>
33#include <asm/apic.h>
34#include <asm/timer.h>
35
36#include <irq_vectors.h>
37#include "io_ports.h"
38
39#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
40#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
41
42static DEFINE_PER_CPU(struct clock_event_device, local_events);
43
44static inline u32 vmi_counter(u32 flags)
45{
46 /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding
47 * cycle counter. */
48 return flags & VMI_ALARM_COUNTER_MASK;
49}
50
51/* paravirt_ops.get_wallclock = vmi_get_wallclock */
52unsigned long vmi_get_wallclock(void)
53{
54 unsigned long long wallclock;
55 wallclock = vmi_timer_ops.get_wallclock(); // nsec
56 (void)do_div(wallclock, 1000000000); // sec
57
58 return wallclock;
59}
60
61/* paravirt_ops.set_wallclock = vmi_set_wallclock */
62int vmi_set_wallclock(unsigned long now)
63{
64 return 0;
65}
66
67/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
68unsigned long long vmi_get_sched_cycles(void)
69{
70 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
71}
72
73/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
74unsigned long vmi_cpu_khz(void)
75{
76 unsigned long long khz;
77 khz = vmi_timer_ops.get_cycle_frequency();
78 (void)do_div(khz, 1000);
79 return khz;
80}
81
82static inline unsigned int vmi_get_timer_vector(void)
83{
84#ifdef CONFIG_X86_IO_APIC
85 return FIRST_DEVICE_VECTOR;
86#else
87 return FIRST_EXTERNAL_VECTOR;
88#endif
89}
90
91/** vmi clockchip */
92#ifdef CONFIG_X86_LOCAL_APIC
93static unsigned int startup_timer_irq(unsigned int irq)
94{
95 unsigned long val = apic_read(APIC_LVTT);
96 apic_write(APIC_LVTT, vmi_get_timer_vector());
97
98 return (val & APIC_SEND_PENDING);
99}
100
101static void mask_timer_irq(unsigned int irq)
102{
103 unsigned long val = apic_read(APIC_LVTT);
104 apic_write(APIC_LVTT, val | APIC_LVT_MASKED);
105}
106
107static void unmask_timer_irq(unsigned int irq)
108{
109 unsigned long val = apic_read(APIC_LVTT);
110 apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED);
111}
112
113static void ack_timer_irq(unsigned int irq)
114{
115 ack_APIC_irq();
116}
117
118static struct irq_chip vmi_chip __read_mostly = {
119 .name = "VMI-LOCAL",
120 .startup = startup_timer_irq,
121 .mask = mask_timer_irq,
122 .unmask = unmask_timer_irq,
123 .ack = ack_timer_irq
124};
125#endif
126
127/** vmi clockevent */
128#define VMI_ALARM_WIRED_IRQ0 0x00000000
129#define VMI_ALARM_WIRED_LVTT 0x00010000
130static int vmi_wiring = VMI_ALARM_WIRED_IRQ0;
131
132static inline int vmi_get_alarm_wiring(void)
133{
134 return vmi_wiring;
135}
136
137static void vmi_timer_set_mode(enum clock_event_mode mode,
138 struct clock_event_device *evt)
139{
140 cycle_t now, cycles_per_hz;
141 BUG_ON(!irqs_disabled());
142
143 switch (mode) {
144 case CLOCK_EVT_MODE_ONESHOT:
145 break;
146 case CLOCK_EVT_MODE_PERIODIC:
147 cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
148 (void)do_div(cycles_per_hz, HZ);
149 now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC));
150 vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz);
151 break;
152 case CLOCK_EVT_MODE_UNUSED:
153 case CLOCK_EVT_MODE_SHUTDOWN:
154 switch (evt->mode) {
155 case CLOCK_EVT_MODE_ONESHOT:
156 vmi_timer_ops.cancel_alarm(VMI_ONESHOT);
157 break;
158 case CLOCK_EVT_MODE_PERIODIC:
159 vmi_timer_ops.cancel_alarm(VMI_PERIODIC);
160 break;
161 default:
162 break;
163 }
164 break;
165 default:
166 break;
167 }
168}
169
170static int vmi_timer_next_event(unsigned long delta,
171 struct clock_event_device *evt)
172{
173 /* Unfortunately, set_next_event interface only passes relative
174 * expiry, but we want absolute expiry. It'd be better if were
175 * were passed an aboslute expiry, since a bunch of time may
176 * have been stolen between the time the delta is computed and
177 * when we set the alarm below. */
178 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
179
180 BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
181 vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0);
182 return 0;
183}
184
185static struct clock_event_device vmi_clockevent = {
186 .name = "vmi-timer",
187 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
188 .shift = 22,
189 .set_mode = vmi_timer_set_mode,
190 .set_next_event = vmi_timer_next_event,
191 .rating = 1000,
192 .irq = 0,
193};
194
195static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
196{
197 struct clock_event_device *evt = &__get_cpu_var(local_events);
198 evt->event_handler(evt);
199 return IRQ_HANDLED;
200}
201
202static struct irqaction vmi_clock_action = {
203 .name = "vmi-timer",
204 .handler = vmi_timer_interrupt,
205 .flags = IRQF_DISABLED | IRQF_NOBALANCING,
206 .mask = CPU_MASK_ALL,
207};
208
209static void __devinit vmi_time_init_clockevent(void)
210{
211 cycle_t cycles_per_msec;
212 struct clock_event_device *evt;
213
214 int cpu = smp_processor_id();
215 evt = &__get_cpu_var(local_events);
216
217 /* Use cycles_per_msec since div_sc params are 32-bits. */
218 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
219 (void)do_div(cycles_per_msec, 1000);
220
221 memcpy(evt, &vmi_clockevent, sizeof(*evt));
222 /* Must pick .shift such that .mult fits in 32-bits. Choosing
223 * .shift to be 22 allows 2^(32-22) cycles per nano-seconds
224 * before overflow. */
225 evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift);
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu);
230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift);
233 clockevents_register_device(evt);
234}
235
236void __init vmi_time_init(void)
237{
238 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
239 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
240
241 vmi_time_init_clockevent();
242 setup_irq(0, &vmi_clock_action);
243}
244
245#ifdef CONFIG_X86_LOCAL_APIC
246void __devinit vmi_time_bsp_init(void)
247{
248 /*
249 * On APIC systems, we want local timers to fire on each cpu. We do
250 * this by programming LVTT to deliver timer events to the IRQ handler
251 * for IRQ-0, since we can't re-use the APIC local timer handler
252 * without interfering with that code.
253 */
254 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
255 local_irq_disable();
256#ifdef CONFIG_X86_SMP
257 /*
258 * XXX handle_percpu_irq only defined for SMP; we need to switch over
259 * to using it, since this is a local interrupt, which each CPU must
260 * handle individually without locking out or dropping simultaneous
261 * local timers on other CPUs. We also don't want to trigger the
262 * quirk workaround code for interrupts which gets invoked from
263 * handle_percpu_irq via eoi, so we use our own IRQ chip.
264 */
265 set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt");
266#else
267 set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt");
268#endif
269 vmi_wiring = VMI_ALARM_WIRED_LVTT;
270 apic_write(APIC_LVTT, vmi_get_timer_vector());
271 local_irq_enable();
272 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
273}
274
275void __devinit vmi_time_ap_init(void)
276{
277 vmi_time_init_clockevent();
278 apic_write(APIC_LVTT, vmi_get_timer_vector());
279}
280#endif
281
282/** vmi clocksource */
283
284static cycle_t read_real_cycles(void)
285{
286 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
287}
288
289static struct clocksource clocksource_vmi = {
290 .name = "vmi-timer",
291 .rating = 450,
292 .read = read_real_cycles,
293 .mask = CLOCKSOURCE_MASK(64),
294 .mult = 0, /* to be set */
295 .shift = 22,
296 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
297};
298
299static int __init init_vmi_clocksource(void)
300{
301 cycle_t cycles_per_msec;
302
303 if (!vmi_timer_ops.get_cycle_frequency)
304 return 0;
305 /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */
306 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
307 (void)do_div(cycles_per_msec, 1000);
308
309 /* Note that clocksource.{mult, shift} converts in the opposite direction
310 * as clockevents. */
311 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
312 clocksource_vmi.shift);
313
314 printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec);
315 return clocksource_register(&clocksource_vmi);
316
317}
318module_init(init_vmi_clocksource);
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
deleted file mode 100644
index 9dfb17739b67..000000000000
--- a/arch/i386/kernel/vmitime.c
+++ /dev/null
@@ -1,482 +0,0 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 .handler = vmi_timer_interrupt,
127 .flags = IRQF_DISABLED,
128 .mask = CPU_MASK_NONE,
129 .name = "VMI-alarm",
130};
131
132/* Alarm rate */
133static int __init vmi_timer_alarm_rate_setup(char* str)
134{
135 int alarm_rate;
136 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
137 alarm_hz = alarm_rate;
138 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
139 }
140 return 1;
141}
142__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
143
144
145/* Initialization */
146static void vmi_get_wallclock_ts(struct timespec *ts)
147{
148 unsigned long long wallclock;
149 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
150 ts->tv_nsec = do_div(wallclock, 1000000000);
151 ts->tv_sec = wallclock;
152}
153
154unsigned long vmi_get_wallclock(void)
155{
156 struct timespec ts;
157 vmi_get_wallclock_ts(&ts);
158 return ts.tv_sec;
159}
160
161int vmi_set_wallclock(unsigned long now)
162{
163 return -1;
164}
165
166unsigned long long vmi_get_sched_cycles(void)
167{
168 return read_available_cycles();
169}
170
171unsigned long vmi_cpu_khz(void)
172{
173 unsigned long long khz;
174
175 khz = vmi_timer_ops.get_cycle_frequency();
176 (void)do_div(khz, 1000);
177 return khz;
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183 unsigned long flags;
184
185 local_irq_save(flags);
186 setup_irq(0, &vmi_timer_irq);
187#ifdef CONFIG_X86_LOCAL_APIC
188 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
189#endif
190
191 real_cycles_accounted_system = read_real_cycles();
192 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
193
194 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
195 cycles_per_jiffy = cycles_per_sec;
196 (void)do_div(cycles_per_jiffy, HZ);
197 cycles_per_alarm = cycles_per_sec;
198 (void)do_div(cycles_per_alarm, alarm_hz);
199 cycles_per_msec = cycles_per_sec;
200 (void)do_div(cycles_per_msec, 1000);
201
202 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
203 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
204 cycles_per_alarm);
205
206 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
207 clocksource_vmi.shift);
208 if (clocksource_register(&clocksource_vmi))
209 printk(KERN_WARNING "Error registering VMITIME clocksource.");
210
211 /* Disable PIT. */
212 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
213
214 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
215 * reduce the latency calling update_process_times. */
216 vmi_timer_ops.set_alarm(
217 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
218 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
219 cycles_per_alarm);
220
221 local_irq_restore(flags);
222}
223
224#ifdef CONFIG_X86_LOCAL_APIC
225
226void __init vmi_timer_setup_boot_alarm(void)
227{
228 local_irq_disable();
229
230 /* Route the interrupt to the correct vector. */
231 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
232
233 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
234 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
235 vmi_timer_ops.set_alarm(
236 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
237 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
238 cycles_per_alarm);
239 local_irq_enable();
240}
241
242/* Initialize the time accounting variables for an AP on an SMP system.
243 * Also, set the local alarm for the AP. */
244void __devinit vmi_timer_setup_secondary_alarm(void)
245{
246 int cpu = smp_processor_id();
247
248 /* Route the interrupt to the correct vector. */
249 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
250
251 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
252
253 vmi_timer_ops.set_alarm(
254 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
255 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
256 cycles_per_alarm);
257}
258
259#endif
260
261/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
262static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
263{
264 long long cycles_not_accounted;
265
266 write_seqlock(&xtime_lock);
267
268 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
269 while (cycles_not_accounted >= cycles_per_jiffy) {
270 /* systems wide jiffies. */
271 do_timer(1);
272
273 cycles_not_accounted -= cycles_per_jiffy;
274 real_cycles_accounted_system += cycles_per_jiffy;
275 }
276
277 write_sequnlock(&xtime_lock);
278}
279
280/* Update per-cpu process times. */
281static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
282 unsigned long long cur_process_times_cycles)
283{
284 long long cycles_not_accounted;
285 cycles_not_accounted = cur_process_times_cycles -
286 per_cpu(process_times_cycles_accounted_cpu, cpu);
287
288 while (cycles_not_accounted >= cycles_per_jiffy) {
289 /* Account time to the current process. This includes
290 * calling into the scheduler to decrement the timeslice
291 * and possibly reschedule.*/
292 update_process_times(user_mode(regs));
293 /* XXX handle /proc/profile multiplier. */
294 profile_tick(CPU_PROFILING);
295
296 cycles_not_accounted -= cycles_per_jiffy;
297 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
298 }
299}
300
301#ifdef CONFIG_NO_IDLE_HZ
302/* Update per-cpu idle times. Used when a no-hz halt is ended. */
303static void vmi_account_no_hz_idle_cycles(int cpu,
304 unsigned long long cur_process_times_cycles)
305{
306 long long cycles_not_accounted;
307 unsigned long no_idle_hz_jiffies = 0;
308
309 cycles_not_accounted = cur_process_times_cycles -
310 per_cpu(process_times_cycles_accounted_cpu, cpu);
311
312 while (cycles_not_accounted >= cycles_per_jiffy) {
313 no_idle_hz_jiffies++;
314 cycles_not_accounted -= cycles_per_jiffy;
315 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
316 }
317 /* Account time to the idle process. */
318 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
319}
320#endif
321
322/* Update per-cpu stolen time. */
323static void vmi_account_stolen_cycles(int cpu,
324 unsigned long long cur_real_cycles,
325 unsigned long long cur_avail_cycles)
326{
327 long long stolen_cycles_not_accounted;
328 unsigned long stolen_jiffies = 0;
329
330 if (cur_real_cycles < cur_avail_cycles)
331 return;
332
333 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
334 per_cpu(stolen_cycles_accounted_cpu, cpu);
335
336 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
337 stolen_jiffies++;
338 stolen_cycles_not_accounted -= cycles_per_jiffy;
339 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
340 }
341 /* HACK: pass NULL to force time onto cpustat->steal. */
342 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
343}
344
345/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
346 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
347static void vmi_local_timer_interrupt(int cpu)
348{
349 unsigned long long cur_real_cycles, cur_process_times_cycles;
350
351 cur_real_cycles = read_real_cycles();
352 cur_process_times_cycles = read_available_cycles();
353 /* Update system wide (real) time state (xtime, jiffies). */
354 vmi_account_real_cycles(cur_real_cycles);
355 /* Update per-cpu process times. */
356 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
357 /* Update time stolen from this cpu by the hypervisor. */
358 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
359}
360
361#ifdef CONFIG_NO_IDLE_HZ
362
363/* Must be called only from idle loop, with interrupts disabled. */
364int vmi_stop_hz_timer(void)
365{
366 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
367
368 unsigned long seq, next;
369 unsigned long long real_cycles_expiry;
370 int cpu = smp_processor_id();
371
372 BUG_ON(!irqs_disabled());
373 if (sysctl_hz_timer != 0)
374 return 0;
375
376 cpu_set(cpu, nohz_cpu_mask);
377 smp_mb();
378
379 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
380 (next = next_timer_interrupt(),
381 time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
382 cpu_clear(cpu, nohz_cpu_mask);
383 return 0;
384 }
385
386 /* Convert jiffies to the real cycle counter. */
387 do {
388 seq = read_seqbegin(&xtime_lock);
389 real_cycles_expiry = real_cycles_accounted_system +
390 (long)(next - jiffies) * cycles_per_jiffy;
391 } while (read_seqretry(&xtime_lock, seq));
392
393 /* This cpu is going idle. Disable the periodic alarm. */
394 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
395 per_cpu(idle_start_jiffies, cpu) = jiffies;
396 /* Set the real time alarm to expire at the next event. */
397 vmi_timer_ops.set_alarm(
398 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
399 real_cycles_expiry, 0);
400 return 1;
401}
402
403static void vmi_reenable_hz_timer(int cpu)
404{
405 /* For /proc/vmi/info idle_hz stat. */
406 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
407 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
408
409 /* Don't bother explicitly cancelling the one-shot alarm -- at
410 * worse we will receive a spurious timer interrupt. */
411 vmi_timer_ops.set_alarm(
412 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
413 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
414 cycles_per_alarm);
415 /* Indicate this cpu is no longer nohz idle. */
416 cpu_clear(cpu, nohz_cpu_mask);
417}
418
419/* Called from interrupt handlers when (local) HZ timer is disabled. */
420void vmi_account_time_restart_hz_timer(void)
421{
422 unsigned long long cur_real_cycles, cur_process_times_cycles;
423 int cpu = smp_processor_id();
424
425 BUG_ON(!irqs_disabled());
426 /* Account the time during which the HZ timer was disabled. */
427 cur_real_cycles = read_real_cycles();
428 cur_process_times_cycles = read_available_cycles();
429 /* Update system wide (real) time state (xtime, jiffies). */
430 vmi_account_real_cycles(cur_real_cycles);
431 /* Update per-cpu idle times. */
432 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
433 /* Update time stolen from this cpu by the hypervisor. */
434 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
435 /* Reenable the hz timer. */
436 vmi_reenable_hz_timer(cpu);
437}
438
439#endif /* CONFIG_NO_IDLE_HZ */
440
441/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
442 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
443 * APIC setup and setup_boot_vmi_alarm() is called. */
444static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
445{
446 vmi_local_timer_interrupt(smp_processor_id());
447 return IRQ_HANDLED;
448}
449
450#ifdef CONFIG_X86_LOCAL_APIC
451
452/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
453 * Also used in UP when CONFIG_X86_LOCAL_APIC.
454 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
455void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
456{
457 struct pt_regs *old_regs = set_irq_regs(regs);
458 int cpu = smp_processor_id();
459
460 /*
461 * the NMI deadlock-detector uses this.
462 */
463 per_cpu(irq_stat,cpu).apic_timer_irqs++;
464
465 /*
466 * NOTE! We'd better ACK the irq immediately,
467 * because timer handling can be slow.
468 */
469 ack_APIC_irq();
470
471 /*
472 * update_process_times() expects us to have done irq_enter().
473 * Besides, if we don't timer interrupts ignore the global
474 * interrupt lock, which is the WrongThing (tm) to do.
475 */
476 irq_enter();
477 vmi_local_timer_interrupt(cpu);
478 irq_exit();
479 set_irq_regs(old_regs);
480}
481
482#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 6f38f818380b..23e8614edeee 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -26,12 +26,11 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
26OUTPUT_ARCH(i386) 26OUTPUT_ARCH(i386)
27ENTRY(phys_startup_32) 27ENTRY(phys_startup_32)
28jiffies = jiffies_64; 28jiffies = jiffies_64;
29_proxy_pda = 1;
30 29
31PHDRS { 30PHDRS {
32 text PT_LOAD FLAGS(5); /* R_E */ 31 text PT_LOAD FLAGS(5); /* R_E */
33 data PT_LOAD FLAGS(7); /* RWE */ 32 data PT_LOAD FLAGS(7); /* RWE */
34 note PT_NOTE FLAGS(4); /* R__ */ 33 note PT_NOTE FLAGS(0); /* ___ */
35} 34}
36SECTIONS 35SECTIONS
37{ 36{
@@ -61,8 +60,6 @@ SECTIONS
61 __stop___ex_table = .; 60 __stop___ex_table = .;
62 } 61 }
63 62
64 RODATA
65
66 BUG_TABLE 63 BUG_TABLE
67 64
68 . = ALIGN(4); 65 . = ALIGN(4);
@@ -72,6 +69,8 @@ SECTIONS
72 __tracedata_end = .; 69 __tracedata_end = .;
73 } 70 }
74 71
72 RODATA
73
75 /* writeable */ 74 /* writeable */
76 . = ALIGN(4096); 75 . = ALIGN(4096);
77 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ 76 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
@@ -117,22 +116,11 @@ SECTIONS
117 116
118 /* might get freed after init */ 117 /* might get freed after init */
119 . = ALIGN(4096); 118 . = ALIGN(4096);
120 .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
121 __smp_alt_begin = .;
122 __smp_alt_instructions = .;
123 *(.smp_altinstructions)
124 __smp_alt_instructions_end = .;
125 }
126 . = ALIGN(4);
127 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 119 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
128 __smp_locks = .; 120 __smp_locks = .;
129 *(.smp_locks) 121 *(.smp_locks)
130 __smp_locks_end = .; 122 __smp_locks_end = .;
131 } 123 }
132 .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
133 *(.smp_altinstr_replacement)
134 __smp_alt_end = .;
135 }
136 /* will be freed after init 124 /* will be freed after init
137 * Following ALIGN() is required to make sure no other data falls on the 125 * Following ALIGN() is required to make sure no other data falls on the
138 * same page where __smp_alt_end is pointing as that page might be freed 126 * same page where __smp_alt_end is pointing as that page might be freed
@@ -178,9 +166,9 @@ SECTIONS
178 } 166 }
179 . = ALIGN(4); 167 . = ALIGN(4);
180 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 168 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
181 __start_parainstructions = .; 169 __parainstructions = .;
182 *(.parainstructions) 170 *(.parainstructions)
183 __stop_parainstructions = .; 171 __parainstructions_end = .;
184 } 172 }
185 /* .exit.text is discard at runtime, not link time, to deal with references 173 /* .exit.text is discard at runtime, not link time, to deal with references
186 from .altinstructions and .eh_frame */ 174 from .altinstructions and .eh_frame */
@@ -194,7 +182,7 @@ SECTIONS
194 __initramfs_end = .; 182 __initramfs_end = .;
195 } 183 }
196#endif 184#endif
197 . = ALIGN(L1_CACHE_BYTES); 185 . = ALIGN(4096);
198 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 186 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
199 __per_cpu_start = .; 187 __per_cpu_start = .;
200 *(.data.percpu) 188 *(.data.percpu)
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
index f66cd11adb72..4a8b0ed9b8fb 100644
--- a/arch/i386/kernel/vsyscall.lds.S
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -7,7 +7,7 @@
7 7
8SECTIONS 8SECTIONS
9{ 9{
10 . = VDSO_PRELINK + SIZEOF_HEADERS; 10 . = VDSO_PRELINK_asm + SIZEOF_HEADERS;
11 11
12 .hash : { *(.hash) } :text 12 .hash : { *(.hash) } :text
13 .gnu.hash : { *(.gnu.hash) } 13 .gnu.hash : { *(.gnu.hash) }
@@ -21,7 +21,7 @@ SECTIONS
21 For the layouts to match, we need to skip more than enough 21 For the layouts to match, we need to skip more than enough
22 space for the dynamic symbol table et al. If this amount 22 space for the dynamic symbol table et al. If this amount
23 is insufficient, ld -shared will barf. Just increase it here. */ 23 is insufficient, ld -shared will barf. Just increase it here. */
24 . = VDSO_PRELINK + 0x400; 24 . = VDSO_PRELINK_asm + 0x400;
25 25
26 .text : { *(.text) } :text =0x90909090 26 .text : { *(.text) } :text =0x90909090
27 .note : { *(.note.*) } :text :note 27 .note : { *(.note.*) } :text :note
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c
index 97db3853dc82..afd0045595d4 100644
--- a/arch/i386/lib/bitops.c
+++ b/arch/i386/lib/bitops.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(find_next_bit);
43 */ 43 */
44int find_next_zero_bit(const unsigned long *addr, int size, int offset) 44int find_next_zero_bit(const unsigned long *addr, int size, int offset)
45{ 45{
46 unsigned long * p = ((unsigned long *) addr) + (offset >> 5); 46 const unsigned long *p = addr + (offset >> 5);
47 int set = 0, bit = offset & 31, res; 47 int set = 0, bit = offset & 31, res;
48 48
49 if (bit) { 49 if (bit) {
@@ -64,7 +64,7 @@ int find_next_zero_bit(const unsigned long *addr, int size, int offset)
64 /* 64 /*
65 * No zero yet, search remaining full bytes for a zero 65 * No zero yet, search remaining full bytes for a zero
66 */ 66 */
67 res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); 67 res = find_first_zero_bit(p, size - 32 * (p - addr));
68 return (offset + set + res); 68 return (offset + set + res);
69} 69}
70EXPORT_SYMBOL(find_next_zero_bit); 70EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
index 75ffd02654fc..adbccd0bbb78 100644
--- a/arch/i386/lib/checksum.S
+++ b/arch/i386/lib/checksum.S
@@ -25,6 +25,8 @@
25 * 2 of the License, or (at your option) any later version. 25 * 2 of the License, or (at your option) any later version.
26 */ 26 */
27 27
28#include <linux/linkage.h>
29#include <asm/dwarf2.h>
28#include <asm/errno.h> 30#include <asm/errno.h>
29 31
30/* 32/*
@@ -36,8 +38,6 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
36 */ 38 */
37 39
38.text 40.text
39.align 4
40.globl csum_partial
41 41
42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
43 43
@@ -48,9 +48,14 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49 * alignment for the unrolled loop. 49 * alignment for the unrolled loop.
50 */ 50 */
51csum_partial: 51ENTRY(csum_partial)
52 CFI_STARTPROC
52 pushl %esi 53 pushl %esi
54 CFI_ADJUST_CFA_OFFSET 4
55 CFI_REL_OFFSET esi, 0
53 pushl %ebx 56 pushl %ebx
57 CFI_ADJUST_CFA_OFFSET 4
58 CFI_REL_OFFSET ebx, 0
54 movl 20(%esp),%eax # Function arg: unsigned int sum 59 movl 20(%esp),%eax # Function arg: unsigned int sum
55 movl 16(%esp),%ecx # Function arg: int len 60 movl 16(%esp),%ecx # Function arg: int len
56 movl 12(%esp),%esi # Function arg: unsigned char *buff 61 movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -128,16 +133,27 @@ csum_partial:
128 roll $8, %eax 133 roll $8, %eax
1298: 1348:
130 popl %ebx 135 popl %ebx
136 CFI_ADJUST_CFA_OFFSET -4
137 CFI_RESTORE ebx
131 popl %esi 138 popl %esi
139 CFI_ADJUST_CFA_OFFSET -4
140 CFI_RESTORE esi
132 ret 141 ret
142 CFI_ENDPROC
143ENDPROC(csum_partial)
133 144
134#else 145#else
135 146
136/* Version for PentiumII/PPro */ 147/* Version for PentiumII/PPro */
137 148
138csum_partial: 149ENTRY(csum_partial)
150 CFI_STARTPROC
139 pushl %esi 151 pushl %esi
152 CFI_ADJUST_CFA_OFFSET 4
153 CFI_REL_OFFSET esi, 0
140 pushl %ebx 154 pushl %ebx
155 CFI_ADJUST_CFA_OFFSET 4
156 CFI_REL_OFFSET ebx, 0
141 movl 20(%esp),%eax # Function arg: unsigned int sum 157 movl 20(%esp),%eax # Function arg: unsigned int sum
142 movl 16(%esp),%ecx # Function arg: int len 158 movl 16(%esp),%ecx # Function arg: int len
143 movl 12(%esp),%esi # Function arg: const unsigned char *buf 159 movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -245,8 +261,14 @@ csum_partial:
245 roll $8, %eax 261 roll $8, %eax
24690: 26290:
247 popl %ebx 263 popl %ebx
264 CFI_ADJUST_CFA_OFFSET -4
265 CFI_RESTORE ebx
248 popl %esi 266 popl %esi
267 CFI_ADJUST_CFA_OFFSET -4
268 CFI_RESTORE esi
249 ret 269 ret
270 CFI_ENDPROC
271ENDPROC(csum_partial)
250 272
251#endif 273#endif
252 274
@@ -278,19 +300,24 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
278 .long 9999b, 6002f ; \ 300 .long 9999b, 6002f ; \
279 .previous 301 .previous
280 302
281.align 4
282.globl csum_partial_copy_generic
283
284#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 303#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
285 304
286#define ARGBASE 16 305#define ARGBASE 16
287#define FP 12 306#define FP 12
288 307
289csum_partial_copy_generic: 308ENTRY(csum_partial_copy_generic)
309 CFI_STARTPROC
290 subl $4,%esp 310 subl $4,%esp
311 CFI_ADJUST_CFA_OFFSET 4
291 pushl %edi 312 pushl %edi
313 CFI_ADJUST_CFA_OFFSET 4
314 CFI_REL_OFFSET edi, 0
292 pushl %esi 315 pushl %esi
316 CFI_ADJUST_CFA_OFFSET 4
317 CFI_REL_OFFSET esi, 0
293 pushl %ebx 318 pushl %ebx
319 CFI_ADJUST_CFA_OFFSET 4
320 CFI_REL_OFFSET ebx, 0
294 movl ARGBASE+16(%esp),%eax # sum 321 movl ARGBASE+16(%esp),%eax # sum
295 movl ARGBASE+12(%esp),%ecx # len 322 movl ARGBASE+12(%esp),%ecx # len
296 movl ARGBASE+4(%esp),%esi # src 323 movl ARGBASE+4(%esp),%esi # src
@@ -400,10 +427,19 @@ DST( movb %cl, (%edi) )
400.previous 427.previous
401 428
402 popl %ebx 429 popl %ebx
430 CFI_ADJUST_CFA_OFFSET -4
431 CFI_RESTORE ebx
403 popl %esi 432 popl %esi
433 CFI_ADJUST_CFA_OFFSET -4
434 CFI_RESTORE esi
404 popl %edi 435 popl %edi
436 CFI_ADJUST_CFA_OFFSET -4
437 CFI_RESTORE edi
405 popl %ecx # equivalent to addl $4,%esp 438 popl %ecx # equivalent to addl $4,%esp
439 CFI_ADJUST_CFA_OFFSET -4
406 ret 440 ret
441 CFI_ENDPROC
442ENDPROC(csum_partial_copy_generic)
407 443
408#else 444#else
409 445
@@ -421,10 +457,17 @@ DST( movb %cl, (%edi) )
421 457
422#define ARGBASE 12 458#define ARGBASE 12
423 459
424csum_partial_copy_generic: 460ENTRY(csum_partial_copy_generic)
461 CFI_STARTPROC
425 pushl %ebx 462 pushl %ebx
463 CFI_ADJUST_CFA_OFFSET 4
464 CFI_REL_OFFSET ebx, 0
426 pushl %edi 465 pushl %edi
466 CFI_ADJUST_CFA_OFFSET 4
467 CFI_REL_OFFSET edi, 0
427 pushl %esi 468 pushl %esi
469 CFI_ADJUST_CFA_OFFSET 4
470 CFI_REL_OFFSET esi, 0
428 movl ARGBASE+4(%esp),%esi #src 471 movl ARGBASE+4(%esp),%esi #src
429 movl ARGBASE+8(%esp),%edi #dst 472 movl ARGBASE+8(%esp),%edi #dst
430 movl ARGBASE+12(%esp),%ecx #len 473 movl ARGBASE+12(%esp),%ecx #len
@@ -485,9 +528,17 @@ DST( movb %dl, (%edi) )
485.previous 528.previous
486 529
487 popl %esi 530 popl %esi
531 CFI_ADJUST_CFA_OFFSET -4
532 CFI_RESTORE esi
488 popl %edi 533 popl %edi
534 CFI_ADJUST_CFA_OFFSET -4
535 CFI_RESTORE edi
489 popl %ebx 536 popl %ebx
537 CFI_ADJUST_CFA_OFFSET -4
538 CFI_RESTORE ebx
490 ret 539 ret
540 CFI_ENDPROC
541ENDPROC(csum_partial_copy_generic)
491 542
492#undef ROUND 543#undef ROUND
493#undef ROUND1 544#undef ROUND1
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S
index 62d7f178a326..6d84b53f12a2 100644
--- a/arch/i386/lib/getuser.S
+++ b/arch/i386/lib/getuser.S
@@ -8,6 +8,8 @@
8 * return an error value in addition to the "real" 8 * return an error value in addition to the "real"
9 * return value. 9 * return value.
10 */ 10 */
11#include <linux/linkage.h>
12#include <asm/dwarf2.h>
11#include <asm/thread_info.h> 13#include <asm/thread_info.h>
12 14
13 15
@@ -24,19 +26,19 @@
24 */ 26 */
25 27
26.text 28.text
27.align 4 29ENTRY(__get_user_1)
28.globl __get_user_1 30 CFI_STARTPROC
29__get_user_1:
30 GET_THREAD_INFO(%edx) 31 GET_THREAD_INFO(%edx)
31 cmpl TI_addr_limit(%edx),%eax 32 cmpl TI_addr_limit(%edx),%eax
32 jae bad_get_user 33 jae bad_get_user
331: movzbl (%eax),%edx 341: movzbl (%eax),%edx
34 xorl %eax,%eax 35 xorl %eax,%eax
35 ret 36 ret
37 CFI_ENDPROC
38ENDPROC(__get_user_1)
36 39
37.align 4 40ENTRY(__get_user_2)
38.globl __get_user_2 41 CFI_STARTPROC
39__get_user_2:
40 addl $1,%eax 42 addl $1,%eax
41 jc bad_get_user 43 jc bad_get_user
42 GET_THREAD_INFO(%edx) 44 GET_THREAD_INFO(%edx)
@@ -45,10 +47,11 @@ __get_user_2:
452: movzwl -1(%eax),%edx 472: movzwl -1(%eax),%edx
46 xorl %eax,%eax 48 xorl %eax,%eax
47 ret 49 ret
50 CFI_ENDPROC
51ENDPROC(__get_user_2)
48 52
49.align 4 53ENTRY(__get_user_4)
50.globl __get_user_4 54 CFI_STARTPROC
51__get_user_4:
52 addl $3,%eax 55 addl $3,%eax
53 jc bad_get_user 56 jc bad_get_user
54 GET_THREAD_INFO(%edx) 57 GET_THREAD_INFO(%edx)
@@ -57,11 +60,16 @@ __get_user_4:
573: movl -3(%eax),%edx 603: movl -3(%eax),%edx
58 xorl %eax,%eax 61 xorl %eax,%eax
59 ret 62 ret
63 CFI_ENDPROC
64ENDPROC(__get_user_4)
60 65
61bad_get_user: 66bad_get_user:
67 CFI_STARTPROC
62 xorl %edx,%edx 68 xorl %edx,%edx
63 movl $-14,%eax 69 movl $-14,%eax
64 ret 70 ret
71 CFI_ENDPROC
72END(bad_get_user)
65 73
66.section __ex_table,"a" 74.section __ex_table,"a"
67 .long 1b,bad_get_user 75 .long 1b,bad_get_user
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
index a32d9f570f48..f58fba109d18 100644
--- a/arch/i386/lib/putuser.S
+++ b/arch/i386/lib/putuser.S
@@ -8,6 +8,8 @@
8 * return an error value in addition to the "real" 8 * return an error value in addition to the "real"
9 * return value. 9 * return value.
10 */ 10 */
11#include <linux/linkage.h>
12#include <asm/dwarf2.h>
11#include <asm/thread_info.h> 13#include <asm/thread_info.h>
12 14
13 15
@@ -23,23 +25,28 @@
23 * as they get called from within inline assembly. 25 * as they get called from within inline assembly.
24 */ 26 */
25 27
26#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx) 28#define ENTER CFI_STARTPROC ; \
27#define EXIT popl %ebx ; ret 29 pushl %ebx ; \
30 CFI_ADJUST_CFA_OFFSET 4 ; \
31 CFI_REL_OFFSET ebx, 0 ; \
32 GET_THREAD_INFO(%ebx)
33#define EXIT popl %ebx ; \
34 CFI_ADJUST_CFA_OFFSET -4 ; \
35 CFI_RESTORE ebx ; \
36 ret ; \
37 CFI_ENDPROC
28 38
29.text 39.text
30.align 4 40ENTRY(__put_user_1)
31.globl __put_user_1
32__put_user_1:
33 ENTER 41 ENTER
34 cmpl TI_addr_limit(%ebx),%ecx 42 cmpl TI_addr_limit(%ebx),%ecx
35 jae bad_put_user 43 jae bad_put_user
361: movb %al,(%ecx) 441: movb %al,(%ecx)
37 xorl %eax,%eax 45 xorl %eax,%eax
38 EXIT 46 EXIT
47ENDPROC(__put_user_1)
39 48
40.align 4 49ENTRY(__put_user_2)
41.globl __put_user_2
42__put_user_2:
43 ENTER 50 ENTER
44 movl TI_addr_limit(%ebx),%ebx 51 movl TI_addr_limit(%ebx),%ebx
45 subl $1,%ebx 52 subl $1,%ebx
@@ -48,10 +55,9 @@ __put_user_2:
482: movw %ax,(%ecx) 552: movw %ax,(%ecx)
49 xorl %eax,%eax 56 xorl %eax,%eax
50 EXIT 57 EXIT
58ENDPROC(__put_user_2)
51 59
52.align 4 60ENTRY(__put_user_4)
53.globl __put_user_4
54__put_user_4:
55 ENTER 61 ENTER
56 movl TI_addr_limit(%ebx),%ebx 62 movl TI_addr_limit(%ebx),%ebx
57 subl $3,%ebx 63 subl $3,%ebx
@@ -60,10 +66,9 @@ __put_user_4:
603: movl %eax,(%ecx) 663: movl %eax,(%ecx)
61 xorl %eax,%eax 67 xorl %eax,%eax
62 EXIT 68 EXIT
69ENDPROC(__put_user_4)
63 70
64.align 4 71ENTRY(__put_user_8)
65.globl __put_user_8
66__put_user_8:
67 ENTER 72 ENTER
68 movl TI_addr_limit(%ebx),%ebx 73 movl TI_addr_limit(%ebx),%ebx
69 subl $7,%ebx 74 subl $7,%ebx
@@ -73,10 +78,16 @@ __put_user_8:
735: movl %edx,4(%ecx) 785: movl %edx,4(%ecx)
74 xorl %eax,%eax 79 xorl %eax,%eax
75 EXIT 80 EXIT
81ENDPROC(__put_user_8)
76 82
77bad_put_user: 83bad_put_user:
84 CFI_STARTPROC simple
85 CFI_DEF_CFA esp, 2*4
86 CFI_OFFSET eip, -1*4
87 CFI_OFFSET ebx, -2*4
78 movl $-14,%eax 88 movl $-14,%eax
79 EXIT 89 EXIT
90END(bad_put_user)
80 91
81.section __ex_table,"a" 92.section __ex_table,"a"
82 .long 1b,bad_put_user 93 .long 1b,bad_put_user
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 086b3726862a..9f38b12b4af1 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -716,7 +716,6 @@ do { \
716unsigned long __copy_to_user_ll(void __user *to, const void *from, 716unsigned long __copy_to_user_ll(void __user *to, const void *from,
717 unsigned long n) 717 unsigned long n)
718{ 718{
719 BUG_ON((long) n < 0);
720#ifndef CONFIG_X86_WP_WORKS_OK 719#ifndef CONFIG_X86_WP_WORKS_OK
721 if (unlikely(boot_cpu_data.wp_works_ok == 0) && 720 if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
722 ((unsigned long )to) < TASK_SIZE) { 721 ((unsigned long )to) < TASK_SIZE) {
@@ -785,7 +784,6 @@ EXPORT_SYMBOL(__copy_to_user_ll);
785unsigned long __copy_from_user_ll(void *to, const void __user *from, 784unsigned long __copy_from_user_ll(void *to, const void __user *from,
786 unsigned long n) 785 unsigned long n)
787{ 786{
788 BUG_ON((long)n < 0);
789 if (movsl_is_ok(to, from, n)) 787 if (movsl_is_ok(to, from, n))
790 __copy_user_zeroing(to, from, n); 788 __copy_user_zeroing(to, from, n);
791 else 789 else
@@ -797,7 +795,6 @@ EXPORT_SYMBOL(__copy_from_user_ll);
797unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, 795unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
798 unsigned long n) 796 unsigned long n)
799{ 797{
800 BUG_ON((long)n < 0);
801 if (movsl_is_ok(to, from, n)) 798 if (movsl_is_ok(to, from, n))
802 __copy_user(to, from, n); 799 __copy_user(to, from, n);
803 else 800 else
@@ -810,7 +807,6 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero);
810unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, 807unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
811 unsigned long n) 808 unsigned long n)
812{ 809{
813 BUG_ON((long)n < 0);
814#ifdef CONFIG_X86_INTEL_USERCOPY 810#ifdef CONFIG_X86_INTEL_USERCOPY
815 if ( n > 64 && cpu_has_xmm2) 811 if ( n > 64 && cpu_has_xmm2)
816 n = __copy_user_zeroing_intel_nocache(to, from, n); 812 n = __copy_user_zeroing_intel_nocache(to, from, n);
@@ -825,7 +821,6 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
825unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, 821unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
826 unsigned long n) 822 unsigned long n)
827{ 823{
828 BUG_ON((long)n < 0);
829#ifdef CONFIG_X86_INTEL_USERCOPY 824#ifdef CONFIG_X86_INTEL_USERCOPY
830 if ( n > 64 && cpu_has_xmm2) 825 if ( n > 64 && cpu_has_xmm2)
831 n = __copy_user_intel_nocache(to, from, n); 826 n = __copy_user_intel_nocache(to, from, n);
@@ -853,7 +848,6 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
853unsigned long 848unsigned long
854copy_to_user(void __user *to, const void *from, unsigned long n) 849copy_to_user(void __user *to, const void *from, unsigned long n)
855{ 850{
856 BUG_ON((long) n < 0);
857 if (access_ok(VERIFY_WRITE, to, n)) 851 if (access_ok(VERIFY_WRITE, to, n))
858 n = __copy_to_user(to, from, n); 852 n = __copy_to_user(to, from, n);
859 return n; 853 return n;
@@ -879,7 +873,6 @@ EXPORT_SYMBOL(copy_to_user);
879unsigned long 873unsigned long
880copy_from_user(void *to, const void __user *from, unsigned long n) 874copy_from_user(void *to, const void __user *from, unsigned long n)
881{ 875{
882 BUG_ON((long) n < 0);
883 if (access_ok(VERIFY_READ, from, n)) 876 if (access_ok(VERIFY_READ, from, n))
884 n = __copy_from_user(to, from, n); 877 n = __copy_from_user(to, from, n);
885 else 878 else
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c
index 8a210fa915b5..e932d3485ae2 100644
--- a/arch/i386/mach-generic/bigsmp.c
+++ b/arch/i386/mach-generic/bigsmp.c
@@ -45,7 +45,7 @@ static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
45}; 45};
46 46
47 47
48static int probe_bigsmp(void) 48static int __init probe_bigsmp(void)
49{ 49{
50 if (def_to_bigsmp) 50 if (def_to_bigsmp)
51 dmi_bigsmp = 1; 51 dmi_bigsmp = 1;
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c
index b8963a5a3b25..b47f951c0ec2 100644
--- a/arch/i386/mach-generic/es7000.c
+++ b/arch/i386/mach-generic/es7000.c
@@ -25,4 +25,45 @@ static int probe_es7000(void)
25 return 0; 25 return 0;
26} 26}
27 27
28extern void es7000_sw_apic(void);
29static void __init enable_apic_mode(void)
30{
31 es7000_sw_apic();
32 return;
33}
34
35static __init int mps_oem_check(struct mp_config_table *mpc, char *oem,
36 char *productid)
37{
38 if (mpc->mpc_oemptr) {
39 struct mp_config_oemtable *oem_table =
40 (struct mp_config_oemtable *)mpc->mpc_oemptr;
41 if (!strncmp(oem, "UNISYS", 6))
42 return parse_unisys_oem((char *)oem_table);
43 }
44 return 0;
45}
46
47#ifdef CONFIG_ACPI
48/* Hook from generic ACPI tables.c */
49static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
50{
51 unsigned long oem_addr;
52 if (!find_unisys_acpi_oem_table(&oem_addr)) {
53 if (es7000_check_dsdt())
54 return parse_unisys_oem((char *)oem_addr);
55 else {
56 setup_unisys();
57 return 1;
58 }
59 }
60 return 0;
61}
62#else
63static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
64{
65 return 0;
66}
67#endif
68
28struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000); 69struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index fe0ed393294c..1a5e448a29c7 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -573,15 +573,7 @@ do_boot_cpu(__u8 cpu)
573 /* init_tasks (in sched.c) is indexed logically */ 573 /* init_tasks (in sched.c) is indexed logically */
574 stack_start.esp = (void *) idle->thread.esp; 574 stack_start.esp = (void *) idle->thread.esp;
575 575
576 /* Pre-allocate and initialize the CPU's GDT and PDA so it 576 init_gdt(cpu, idle);
577 doesn't have to do any memory allocation during the
578 delicate CPU-bringup phase. */
579 if (!init_gdt(cpu, idle)) {
580 printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
581 cpucount--;
582 return;
583 }
584
585 irq_ctx_init(cpu); 577 irq_ctx_init(cpu);
586 578
587 /* Note: Don't modify initial ss override */ 579 /* Note: Don't modify initial ss override */
@@ -749,12 +741,6 @@ initialize_secondary(void)
749#endif 741#endif
750 742
751 /* 743 /*
752 * switch to the per CPU GDT we already set up
753 * in do_boot_cpu()
754 */
755 cpu_set_gdt(current_thread_info()->cpu);
756
757 /*
758 * We don't actually need to load the full TSS, 744 * We don't actually need to load the full TSS,
759 * basically just the stack pointer and the eip. 745 * basically just the stack pointer and the eip.
760 */ 746 */
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index b8c4e259fc8b..f534c29e80b2 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -20,6 +20,7 @@
20#include <linux/tty.h> 20#include <linux/tty.h>
21#include <linux/vt_kern.h> /* For unblank_screen() */ 21#include <linux/vt_kern.h> /* For unblank_screen() */
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/bootmem.h> /* for max_low_pfn */
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/kprobes.h> 25#include <linux/kprobes.h>
25#include <linux/uaccess.h> 26#include <linux/uaccess.h>
@@ -301,7 +302,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
301 struct mm_struct *mm; 302 struct mm_struct *mm;
302 struct vm_area_struct * vma; 303 struct vm_area_struct * vma;
303 unsigned long address; 304 unsigned long address;
304 unsigned long page;
305 int write, si_code; 305 int write, si_code;
306 306
307 /* get the address */ 307 /* get the address */
@@ -510,7 +510,9 @@ no_context:
510 bust_spinlocks(1); 510 bust_spinlocks(1);
511 511
512 if (oops_may_print()) { 512 if (oops_may_print()) {
513 #ifdef CONFIG_X86_PAE 513 __typeof__(pte_val(__pte(0))) page;
514
515#ifdef CONFIG_X86_PAE
514 if (error_code & 16) { 516 if (error_code & 16) {
515 pte_t *pte = lookup_address(address); 517 pte_t *pte = lookup_address(address);
516 518
@@ -519,7 +521,7 @@ no_context:
519 "NX-protected page - exploit attempt? " 521 "NX-protected page - exploit attempt? "
520 "(uid: %d)\n", current->uid); 522 "(uid: %d)\n", current->uid);
521 } 523 }
522 #endif 524#endif
523 if (address < PAGE_SIZE) 525 if (address < PAGE_SIZE)
524 printk(KERN_ALERT "BUG: unable to handle kernel NULL " 526 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
525 "pointer dereference"); 527 "pointer dereference");
@@ -529,25 +531,38 @@ no_context:
529 printk(" at virtual address %08lx\n",address); 531 printk(" at virtual address %08lx\n",address);
530 printk(KERN_ALERT " printing eip:\n"); 532 printk(KERN_ALERT " printing eip:\n");
531 printk("%08lx\n", regs->eip); 533 printk("%08lx\n", regs->eip);
532 } 534
533 page = read_cr3(); 535 page = read_cr3();
534 page = ((unsigned long *) __va(page))[address >> 22]; 536 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
535 if (oops_may_print()) 537#ifdef CONFIG_X86_PAE
538 printk(KERN_ALERT "*pdpt = %016Lx\n", page);
539 if ((page >> PAGE_SHIFT) < max_low_pfn
540 && page & _PAGE_PRESENT) {
541 page &= PAGE_MASK;
542 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
543 & (PTRS_PER_PMD - 1)];
544 printk(KERN_ALERT "*pde = %016Lx\n", page);
545 page &= ~_PAGE_NX;
546 }
547#else
536 printk(KERN_ALERT "*pde = %08lx\n", page); 548 printk(KERN_ALERT "*pde = %08lx\n", page);
537 /*
538 * We must not directly access the pte in the highpte
539 * case, the page table might be allocated in highmem.
540 * And lets rather not kmap-atomic the pte, just in case
541 * it's allocated already.
542 */
543#ifndef CONFIG_HIGHPTE
544 if ((page & 1) && oops_may_print()) {
545 page &= PAGE_MASK;
546 address &= 0x003ff000;
547 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
548 printk(KERN_ALERT "*pte = %08lx\n", page);
549 }
550#endif 549#endif
550
551 /*
552 * We must not directly access the pte in the highpte
553 * case if the page table is located in highmem.
554 * And let's rather not kmap-atomic the pte, just in case
555 * it's allocated already.
556 */
557 if ((page >> PAGE_SHIFT) < max_low_pfn
558 && (page & _PAGE_PRESENT)) {
559 page &= PAGE_MASK;
560 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
561 & (PTRS_PER_PTE - 1)];
562 printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page);
563 }
564 }
565
551 tsk->thread.cr2 = address; 566 tsk->thread.cr2 = address;
552 tsk->thread.trap_no = 14; 567 tsk->thread.trap_no = 14;
553 tsk->thread.error_code = error_code; 568 tsk->thread.error_code = error_code;
@@ -588,7 +603,6 @@ do_sigbus:
588 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 603 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
589} 604}
590 605
591#ifndef CONFIG_X86_PAE
592void vmalloc_sync_all(void) 606void vmalloc_sync_all(void)
593{ 607{
594 /* 608 /*
@@ -601,6 +615,9 @@ void vmalloc_sync_all(void)
601 static unsigned long start = TASK_SIZE; 615 static unsigned long start = TASK_SIZE;
602 unsigned long address; 616 unsigned long address;
603 617
618 if (SHARED_KERNEL_PMD)
619 return;
620
604 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); 621 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
605 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { 622 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
606 if (!test_bit(pgd_index(address), insync)) { 623 if (!test_bit(pgd_index(address), insync)) {
@@ -623,4 +640,3 @@ void vmalloc_sync_all(void)
623 start = address + PGDIR_SIZE; 640 start = address + PGDIR_SIZE;
624 } 641 }
625} 642}
626#endif
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index ac70d09df7ee..ad8d86cc683e 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -26,7 +26,7 @@ void kunmap(struct page *page)
26 * However when holding an atomic kmap is is not legal to sleep, so atomic 26 * However when holding an atomic kmap is is not legal to sleep, so atomic
27 * kmaps are appropriate for short, tight code paths only. 27 * kmaps are appropriate for short, tight code paths only.
28 */ 28 */
29void *kmap_atomic(struct page *page, enum km_type type) 29void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
30{ 30{
31 enum fixed_addresses idx; 31 enum fixed_addresses idx;
32 unsigned long vaddr; 32 unsigned long vaddr;
@@ -41,12 +41,17 @@ void *kmap_atomic(struct page *page, enum km_type type)
41 return page_address(page); 41 return page_address(page);
42 42
43 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 43 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
44 set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); 44 set_pte(kmap_pte-idx, mk_pte(page, prot));
45 arch_flush_lazy_mmu_mode(); 45 arch_flush_lazy_mmu_mode();
46 46
47 return (void*) vaddr; 47 return (void*) vaddr;
48} 48}
49 49
50void *kmap_atomic(struct page *page, enum km_type type)
51{
52 return kmap_atomic_prot(page, type, kmap_prot);
53}
54
50void kunmap_atomic(void *kvaddr, enum km_type type) 55void kunmap_atomic(void *kvaddr, enum km_type type)
51{ 56{
52 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; 57 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
@@ -67,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
67#endif 72#endif
68 } 73 }
69 74
75 arch_flush_lazy_mmu_mode();
70 pagefault_enable(); 76 pagefault_enable();
71} 77}
72 78
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index ae436882af7a..dbe16f63a566 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/highmem.h> 23#include <linux/highmem.h>
24#include <linux/pagemap.h> 24#include <linux/pagemap.h>
25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
@@ -42,6 +43,7 @@
42#include <asm/tlb.h> 43#include <asm/tlb.h>
43#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
44#include <asm/sections.h> 45#include <asm/sections.h>
46#include <asm/paravirt.h>
45 47
46unsigned int __VMALLOC_RESERVE = 128 << 20; 48unsigned int __VMALLOC_RESERVE = 128 << 20;
47 49
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
61 pmd_t *pmd_table; 63 pmd_t *pmd_table;
62 64
63#ifdef CONFIG_X86_PAE 65#ifdef CONFIG_X86_PAE
64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 66 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
65 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); 67 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
66 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 68
67 pud = pud_offset(pgd, 0); 69 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
68 if (pmd_table != pmd_offset(pud, 0)) 70 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
69 BUG(); 71 pud = pud_offset(pgd, 0);
70#else 72 if (pmd_table != pmd_offset(pud, 0))
73 BUG();
74 }
75#endif
71 pud = pud_offset(pgd, 0); 76 pud = pud_offset(pgd, 0);
72 pmd_table = pmd_offset(pud, 0); 77 pmd_table = pmd_offset(pud, 0);
73#endif
74
75 return pmd_table; 78 return pmd_table;
76} 79}
77 80
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
81 */ 84 */
82static pte_t * __init one_page_table_init(pmd_t *pmd) 85static pte_t * __init one_page_table_init(pmd_t *pmd)
83{ 86{
84 if (pmd_none(*pmd)) { 87 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
85 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 88 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
89
86 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); 90 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
87 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 91 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
88 if (page_table != pte_offset_kernel(pmd, 0)) 92 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
89 BUG();
90
91 return page_table;
92 } 93 }
93 94
94 return pte_offset_kernel(pmd, 0); 95 return pte_offset_kernel(pmd, 0);
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
108static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 109static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
109{ 110{
110 pgd_t *pgd; 111 pgd_t *pgd;
111 pud_t *pud;
112 pmd_t *pmd; 112 pmd_t *pmd;
113 int pgd_idx, pmd_idx; 113 int pgd_idx, pmd_idx;
114 unsigned long vaddr; 114 unsigned long vaddr;
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
119 pgd = pgd_base + pgd_idx; 119 pgd = pgd_base + pgd_idx;
120 120
121 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { 121 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
122 if (pgd_none(*pgd)) 122 pmd = one_md_table_init(pgd);
123 one_md_table_init(pgd); 123 pmd = pmd + pmd_index(vaddr);
124 pud = pud_offset(pgd, vaddr);
125 pmd = pmd_offset(pud, vaddr);
126 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { 124 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
127 if (pmd_none(*pmd)) 125 one_page_table_init(pmd);
128 one_page_table_init(pmd);
129 126
130 vaddr += PMD_SIZE; 127 vaddr += PMD_SIZE;
131 } 128 }
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
167 /* Map with big pages if possible, otherwise create normal page tables. */ 164 /* Map with big pages if possible, otherwise create normal page tables. */
168 if (cpu_has_pse) { 165 if (cpu_has_pse) {
169 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; 166 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
170
171 if (is_kernel_text(address) || is_kernel_text(address2)) 167 if (is_kernel_text(address) || is_kernel_text(address2))
172 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); 168 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
173 else 169 else
174 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); 170 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
171
175 pfn += PTRS_PER_PTE; 172 pfn += PTRS_PER_PTE;
176 } else { 173 } else {
177 pte = one_page_table_init(pmd); 174 pte = one_page_table_init(pmd);
178 175
179 for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { 176 for (pte_ofs = 0;
180 if (is_kernel_text(address)) 177 pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
181 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 178 pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
182 else 179 if (is_kernel_text(address))
183 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); 180 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
181 else
182 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
184 } 183 }
185 } 184 }
186 } 185 }
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void);
337#define remap_numa_kva() do {} while (0) 336#define remap_numa_kva() do {} while (0)
338#endif 337#endif
339 338
340static void __init pagetable_init (void) 339void __init native_pagetable_setup_start(pgd_t *base)
341{ 340{
342 unsigned long vaddr;
343 pgd_t *pgd_base = swapper_pg_dir;
344
345#ifdef CONFIG_X86_PAE 341#ifdef CONFIG_X86_PAE
346 int i; 342 int i;
347 /* Init entries of the first-level page table to the zero page */ 343
348 for (i = 0; i < PTRS_PER_PGD; i++) 344 /*
349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 345 * Init entries of the first-level page table to the
346 * zero page, if they haven't already been set up.
347 *
348 * In a normal native boot, we'll be running on a
349 * pagetable rooted in swapper_pg_dir, but not in PAE
350 * mode, so this will end up clobbering the mappings
351 * for the lower 24Mbytes of the address space,
352 * without affecting the kernel address space.
353 */
354 for (i = 0; i < USER_PTRS_PER_PGD; i++)
355 set_pgd(&base[i],
356 __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
357
358 /* Make sure kernel address space is empty so that a pagetable
359 will be allocated for it. */
360 memset(&base[USER_PTRS_PER_PGD], 0,
361 KERNEL_PGD_PTRS * sizeof(pgd_t));
350#else 362#else
351 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); 363 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
352#endif 364#endif
365}
366
367void __init native_pagetable_setup_done(pgd_t *base)
368{
369#ifdef CONFIG_X86_PAE
370 /*
371 * Add low memory identity-mappings - SMP needs it when
372 * starting up on an AP from real-mode. In the non-PAE
373 * case we already have these mappings through head.S.
374 * All user-space mappings are explicitly cleared after
375 * SMP startup.
376 */
377 set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
378#endif
379}
380
381/*
382 * Build a proper pagetable for the kernel mappings. Up until this
383 * point, we've been running on some set of pagetables constructed by
384 * the boot process.
385 *
386 * If we're booting on native hardware, this will be a pagetable
387 * constructed in arch/i386/kernel/head.S, and not running in PAE mode
388 * (even if we'll end up running in PAE). The root of the pagetable
389 * will be swapper_pg_dir.
390 *
391 * If we're booting paravirtualized under a hypervisor, then there are
392 * more options: we may already be running PAE, and the pagetable may
393 * or may not be based in swapper_pg_dir. In any case,
394 * paravirt_pagetable_setup_start() will set up swapper_pg_dir
395 * appropriately for the rest of the initialization to work.
396 *
397 * In general, pagetable_init() assumes that the pagetable may already
398 * be partially populated, and so it avoids stomping on any existing
399 * mappings.
400 */
401static void __init pagetable_init (void)
402{
403 unsigned long vaddr, end;
404 pgd_t *pgd_base = swapper_pg_dir;
405
406 paravirt_pagetable_setup_start(pgd_base);
353 407
354 /* Enable PSE if available */ 408 /* Enable PSE if available */
355 if (cpu_has_pse) { 409 if (cpu_has_pse)
356 set_in_cr4(X86_CR4_PSE); 410 set_in_cr4(X86_CR4_PSE);
357 }
358 411
359 /* Enable PGE if available */ 412 /* Enable PGE if available */
360 if (cpu_has_pge) { 413 if (cpu_has_pge) {
@@ -371,20 +424,12 @@ static void __init pagetable_init (void)
371 * created - mappings will be set by set_fixmap(): 424 * created - mappings will be set by set_fixmap():
372 */ 425 */
373 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 426 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
374 page_table_range_init(vaddr, 0, pgd_base); 427 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
428 page_table_range_init(vaddr, end, pgd_base);
375 429
376 permanent_kmaps_init(pgd_base); 430 permanent_kmaps_init(pgd_base);
377 431
378#ifdef CONFIG_X86_PAE 432 paravirt_pagetable_setup_done(pgd_base);
379 /*
380 * Add low memory identity-mappings - SMP needs it when
381 * starting up on an AP from real-mode. In the non-PAE
382 * case we already have these mappings through head.S.
383 * All user-space mappings are explicitly cleared after
384 * SMP startup.
385 */
386 set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
387#endif
388} 433}
389 434
390#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) 435#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache;
700 745
701void __init pgtable_cache_init(void) 746void __init pgtable_cache_init(void)
702{ 747{
748 size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
749
703 if (PTRS_PER_PMD > 1) { 750 if (PTRS_PER_PMD > 1) {
704 pmd_cache = kmem_cache_create("pmd", 751 pmd_cache = kmem_cache_create("pmd",
705 PTRS_PER_PMD*sizeof(pmd_t), 752 PTRS_PER_PMD*sizeof(pmd_t),
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void)
709 NULL); 756 NULL);
710 if (!pmd_cache) 757 if (!pmd_cache)
711 panic("pgtable_cache_init(): cannot create pmd cache"); 758 panic("pgtable_cache_init(): cannot create pmd cache");
759
760 if (!SHARED_KERNEL_PMD) {
761 /* If we're in PAE mode and have a non-shared
762 kernel pmd, then the pgd size must be a
763 page size. This is because the pgd_list
764 links through the page structure, so there
765 can only be one pgd per page for this to
766 work. */
767 pgd_size = PAGE_SIZE;
768 }
712 } 769 }
713 pgd_cache = kmem_cache_create("pgd", 770 pgd_cache = kmem_cache_create("pgd",
714 PTRS_PER_PGD*sizeof(pgd_t), 771 pgd_size,
715 PTRS_PER_PGD*sizeof(pgd_t), 772 pgd_size,
716 0, 773 0,
717 pgd_ctor, 774 pgd_ctor,
718 PTRS_PER_PMD == 1 ? pgd_dtor : NULL); 775 (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
719 if (!pgd_cache) 776 if (!pgd_cache)
720 panic("pgtable_cache_init(): Cannot create pgd cache"); 777 panic("pgtable_cache_init(): Cannot create pgd cache");
721} 778}
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void)
751 808
752void mark_rodata_ro(void) 809void mark_rodata_ro(void)
753{ 810{
754 unsigned long addr = (unsigned long)__start_rodata; 811 unsigned long start = PFN_ALIGN(_text);
812 unsigned long size = PFN_ALIGN(_etext) - start;
755 813
756 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) 814#ifdef CONFIG_HOTPLUG_CPU
757 change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); 815 /* It must still be possible to apply SMP alternatives. */
816 if (num_possible_cpus() <= 1)
817#endif
818 {
819 change_page_attr(virt_to_page(start),
820 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
821 printk("Write protecting the kernel text: %luk\n", size >> 10);
822 }
758 823
759 printk("Write protecting the kernel read-only data: %uk\n", 824 start += size;
760 (__end_rodata - __start_rodata) >> 10); 825 size = (unsigned long)__end_rodata - start;
826 change_page_attr(virt_to_page(start),
827 size >> PAGE_SHIFT, PAGE_KERNEL_RO);
828 printk("Write protecting the kernel read-only data: %luk\n",
829 size >> 10);
761 830
762 /* 831 /*
763 * change_page_attr() requires a global_flush_tlb() call after it. 832 * change_page_attr() requires a global_flush_tlb() call after it.
@@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
774 unsigned long addr; 843 unsigned long addr;
775 844
776 for (addr = begin; addr < end; addr += PAGE_SIZE) { 845 for (addr = begin; addr < end; addr += PAGE_SIZE) {
777 ClearPageReserved(virt_to_page(addr)); 846 struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
778 init_page_count(virt_to_page(addr)); 847 ClearPageReserved(page);
779 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); 848 init_page_count(page);
780 free_page(addr); 849 memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
850 __free_page(page);
781 totalram_pages++; 851 totalram_pages++;
782 } 852 }
783 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 853 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
784} 854}
785 855
786void free_initmem(void) 856void free_initmem(void)
787{ 857{
788 free_init_pages("unused kernel memory", 858 free_init_pages("unused kernel memory",
789 (unsigned long)(&__init_begin), 859 __pa_symbol(&__init_begin),
790 (unsigned long)(&__init_end)); 860 __pa_symbol(&__init_end));
791} 861}
792 862
793#ifdef CONFIG_BLK_DEV_INITRD 863#ifdef CONFIG_BLK_DEV_INITRD
794void free_initrd_mem(unsigned long start, unsigned long end) 864void free_initrd_mem(unsigned long start, unsigned long end)
795{ 865{
796 free_init_pages("initrd memory", start, end); 866 free_init_pages("initrd memory", __pa(start), __pa(end));
797} 867}
798#endif 868#endif
799 869
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 412ebbd8adb0..47bd477c8ecc 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
91 unsigned long flags; 91 unsigned long flags;
92 92
93 set_pte_atomic(kpte, pte); /* change init_mm */ 93 set_pte_atomic(kpte, pte); /* change init_mm */
94 if (PTRS_PER_PMD > 1) 94 if (SHARED_KERNEL_PMD)
95 return; 95 return;
96 96
97 spin_lock_irqsave(&pgd_lock, flags); 97 spin_lock_irqsave(&pgd_lock, flags);
@@ -142,7 +142,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
142 return -EINVAL; 142 return -EINVAL;
143 kpte_page = virt_to_page(kpte); 143 kpte_page = virt_to_page(kpte);
144 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 144 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
145 if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 145 if (!pte_huge(*kpte)) {
146 set_pte_atomic(kpte, mk_pte(page, prot)); 146 set_pte_atomic(kpte, mk_pte(page, prot));
147 } else { 147 } else {
148 pgprot_t ref_prot; 148 pgprot_t ref_prot;
@@ -158,7 +158,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
158 kpte_page = split; 158 kpte_page = split;
159 } 159 }
160 page_private(kpte_page)++; 160 page_private(kpte_page)++;
161 } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 161 } else if (!pte_huge(*kpte)) {
162 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); 162 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
163 BUG_ON(page_private(kpte_page) == 0); 163 BUG_ON(page_private(kpte_page) == 0);
164 page_private(kpte_page)--; 164 page_private(kpte_page)--;
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index fa0cfbd551e1..9a96c1647428 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
144} 144}
145 145
146static int fixmaps; 146static int fixmaps;
147#ifndef CONFIG_COMPAT_VDSO
148unsigned long __FIXADDR_TOP = 0xfffff000; 147unsigned long __FIXADDR_TOP = 0xfffff000;
149EXPORT_SYMBOL(__FIXADDR_TOP); 148EXPORT_SYMBOL(__FIXADDR_TOP);
150#endif
151 149
152void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 150void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
153{ 151{
@@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve)
173 BUG_ON(fixmaps > 0); 171 BUG_ON(fixmaps > 0);
174 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 172 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
175 (int)-reserve); 173 (int)-reserve);
176#ifdef CONFIG_COMPAT_VDSO
177 BUG_ON(reserve != 0);
178#else
179 __FIXADDR_TOP = -reserve - PAGE_SIZE; 174 __FIXADDR_TOP = -reserve - PAGE_SIZE;
180 __VMALLOC_RESERVE += reserve; 175 __VMALLOC_RESERVE += reserve;
181#endif
182} 176}
183 177
184pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 178pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
@@ -238,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd)
238 set_page_private(next, (unsigned long)pprev); 232 set_page_private(next, (unsigned long)pprev);
239} 233}
240 234
235#if (PTRS_PER_PMD == 1)
236/* Non-PAE pgd constructor */
241void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) 237void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
242{ 238{
243 unsigned long flags; 239 unsigned long flags;
244 240
245 if (PTRS_PER_PMD == 1) { 241 /* !PAE, no pagetable sharing */
246 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 242 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
247 spin_lock_irqsave(&pgd_lock, flags); 243
248 } 244 spin_lock_irqsave(&pgd_lock, flags);
249 245
246 /* must happen under lock */
250 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 247 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
251 swapper_pg_dir + USER_PTRS_PER_PGD, 248 swapper_pg_dir + USER_PTRS_PER_PGD,
252 KERNEL_PGD_PTRS); 249 KERNEL_PGD_PTRS);
253
254 if (PTRS_PER_PMD > 1)
255 return;
256
257 /* must happen under lock */
258 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 250 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
259 __pa(swapper_pg_dir) >> PAGE_SHIFT, 251 __pa(swapper_pg_dir) >> PAGE_SHIFT,
260 USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); 252 USER_PTRS_PER_PGD,
261 253 KERNEL_PGD_PTRS);
262 pgd_list_add(pgd); 254 pgd_list_add(pgd);
263 spin_unlock_irqrestore(&pgd_lock, flags); 255 spin_unlock_irqrestore(&pgd_lock, flags);
264} 256}
257#else /* PTRS_PER_PMD > 1 */
258/* PAE pgd constructor */
259void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
260{
261 /* PAE, kernel PMD may be shared */
262
263 if (SHARED_KERNEL_PMD) {
264 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
265 swapper_pg_dir + USER_PTRS_PER_PGD,
266 KERNEL_PGD_PTRS);
267 } else {
268 unsigned long flags;
269
270 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
271 spin_lock_irqsave(&pgd_lock, flags);
272 pgd_list_add(pgd);
273 spin_unlock_irqrestore(&pgd_lock, flags);
274 }
275}
276#endif /* PTRS_PER_PMD */
265 277
266/* never called when PTRS_PER_PMD > 1 */
267void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) 278void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
268{ 279{
269 unsigned long flags; /* can be called from interrupt context */ 280 unsigned long flags; /* can be called from interrupt context */
270 281
282 BUG_ON(SHARED_KERNEL_PMD);
283
271 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); 284 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
272 spin_lock_irqsave(&pgd_lock, flags); 285 spin_lock_irqsave(&pgd_lock, flags);
273 pgd_list_del(pgd); 286 pgd_list_del(pgd);
274 spin_unlock_irqrestore(&pgd_lock, flags); 287 spin_unlock_irqrestore(&pgd_lock, flags);
275} 288}
276 289
290#define UNSHARED_PTRS_PER_PGD \
291 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
292
293/* If we allocate a pmd for part of the kernel address space, then
294 make sure its initialized with the appropriate kernel mappings.
295 Otherwise use a cached zeroed pmd. */
296static pmd_t *pmd_cache_alloc(int idx)
297{
298 pmd_t *pmd;
299
300 if (idx >= USER_PTRS_PER_PGD) {
301 pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
302
303 if (pmd)
304 memcpy(pmd,
305 (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
306 sizeof(pmd_t) * PTRS_PER_PMD);
307 } else
308 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
309
310 return pmd;
311}
312
313static void pmd_cache_free(pmd_t *pmd, int idx)
314{
315 if (idx >= USER_PTRS_PER_PGD)
316 free_page((unsigned long)pmd);
317 else
318 kmem_cache_free(pmd_cache, pmd);
319}
320
277pgd_t *pgd_alloc(struct mm_struct *mm) 321pgd_t *pgd_alloc(struct mm_struct *mm)
278{ 322{
279 int i; 323 int i;
@@ -282,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
282 if (PTRS_PER_PMD == 1 || !pgd) 326 if (PTRS_PER_PMD == 1 || !pgd)
283 return pgd; 327 return pgd;
284 328
285 for (i = 0; i < USER_PTRS_PER_PGD; ++i) { 329 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
286 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 330 pmd_t *pmd = pmd_cache_alloc(i);
331
287 if (!pmd) 332 if (!pmd)
288 goto out_oom; 333 goto out_oom;
334
289 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); 335 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
290 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 336 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
291 } 337 }
@@ -296,7 +342,7 @@ out_oom:
296 pgd_t pgdent = pgd[i]; 342 pgd_t pgdent = pgd[i];
297 void* pmd = (void *)__va(pgd_val(pgdent)-1); 343 void* pmd = (void *)__va(pgd_val(pgdent)-1);
298 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 344 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
299 kmem_cache_free(pmd_cache, pmd); 345 pmd_cache_free(pmd, i);
300 } 346 }
301 kmem_cache_free(pgd_cache, pgd); 347 kmem_cache_free(pgd_cache, pgd);
302 return NULL; 348 return NULL;
@@ -308,11 +354,11 @@ void pgd_free(pgd_t *pgd)
308 354
309 /* in the PAE case user pgd entries are overwritten before usage */ 355 /* in the PAE case user pgd entries are overwritten before usage */
310 if (PTRS_PER_PMD > 1) 356 if (PTRS_PER_PMD > 1)
311 for (i = 0; i < USER_PTRS_PER_PGD; ++i) { 357 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
312 pgd_t pgdent = pgd[i]; 358 pgd_t pgdent = pgd[i];
313 void* pmd = (void *)__va(pgd_val(pgdent)-1); 359 void* pmd = (void *)__va(pgd_val(pgdent)-1);
314 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 360 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
315 kmem_cache_free(pmd_cache, pmd); 361 pmd_cache_free(pmd, i);
316 } 362 }
317 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 363 /* in the non-PAE case, free_pgtables() clears user pgd entries */
318 kmem_cache_free(pgd_cache, pgd); 364 kmem_cache_free(pgd_cache, pgd);
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 8fda7be9dd4d..695f737516ae 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -414,6 +414,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
414 user space an consistent name. */ 414 user space an consistent name. */
415 cpu_type = "x86-64/hammer"; 415 cpu_type = "x86-64/hammer";
416 break; 416 break;
417 case 0x10:
418 model = &op_athlon_spec;
419 cpu_type = "x86-64/family10";
420 break;
417 } 421 }
418 break; 422 break;
419 423
diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c
index b21b6da8ab1d..1cf11af96de2 100644
--- a/arch/i386/pci/init.c
+++ b/arch/i386/pci/init.c
@@ -6,7 +6,7 @@
6 in the right sequence from here. */ 6 in the right sequence from here. */
7static __init int pci_access_init(void) 7static __init int pci_access_init(void)
8{ 8{
9 int type = 0; 9 int type __attribute__((unused)) = 0;
10 10
11#ifdef CONFIG_PCI_DIRECT 11#ifdef CONFIG_PCI_DIRECT
12 type = pci_direct_probe(); 12 type = pci_direct_probe();
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c
index 747d8c63b0c4..c7cabeed4d7b 100644
--- a/arch/i386/pci/mmconfig-shared.c
+++ b/arch/i386/pci/mmconfig-shared.c
@@ -60,14 +60,19 @@ static const char __init *pci_mmcfg_e7520(void)
60 u32 win; 60 u32 win;
61 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); 61 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
62 62
63 pci_mmcfg_config_num = 1; 63 win = win & 0xf000;
64 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); 64 if(win == 0x0000 || win == 0xf000)
65 if (!pci_mmcfg_config) 65 pci_mmcfg_config_num = 0;
66 return NULL; 66 else {
67 pci_mmcfg_config[0].address = (win & 0xf000) << 16; 67 pci_mmcfg_config_num = 1;
68 pci_mmcfg_config[0].pci_segment = 0; 68 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
69 pci_mmcfg_config[0].start_bus_number = 0; 69 if (!pci_mmcfg_config)
70 pci_mmcfg_config[0].end_bus_number = 255; 70 return NULL;
71 pci_mmcfg_config[0].address = win << 16;
72 pci_mmcfg_config[0].pci_segment = 0;
73 pci_mmcfg_config[0].start_bus_number = 0;
74 pci_mmcfg_config[0].end_bus_number = 255;
75 }
71 76
72 return "Intel Corporation E7520 Memory Controller Hub"; 77 return "Intel Corporation E7520 Memory Controller Hub";
73} 78}
@@ -108,6 +113,10 @@ static const char __init *pci_mmcfg_intel_945(void)
108 if ((pciexbar & mask) & 0x0fffffffU) 113 if ((pciexbar & mask) & 0x0fffffffU)
109 pci_mmcfg_config_num = 0; 114 pci_mmcfg_config_num = 0;
110 115
116 /* Don't hit the APIC registers and their friends */
117 if ((pciexbar & mask) >= 0xf0000000U)
118 pci_mmcfg_config_num = 0;
119
111 if (pci_mmcfg_config_num) { 120 if (pci_mmcfg_config_num) {
112 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); 121 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
113 if (!pci_mmcfg_config) 122 if (!pci_mmcfg_config)
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 2c15500f8713..998fd3ec0d68 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -21,6 +21,7 @@ unsigned long saved_context_eflags;
21 21
22void __save_processor_state(struct saved_context *ctxt) 22void __save_processor_state(struct saved_context *ctxt)
23{ 23{
24 mtrr_save_fixed_ranges(NULL);
24 kernel_fpu_begin(); 25 kernel_fpu_begin();
25 26
26 /* 27 /*
diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c
index db5e98d2eb73..a0020b913f31 100644
--- a/arch/i386/power/suspend.c
+++ b/arch/i386/power/suspend.c
@@ -16,6 +16,9 @@
16/* Defined in arch/i386/power/swsusp.S */ 16/* Defined in arch/i386/power/swsusp.S */
17extern int restore_image(void); 17extern int restore_image(void);
18 18
19/* References to section boundaries */
20extern const void __nosave_begin, __nosave_end;
21
19/* Pointer to the temporary resume page tables */ 22/* Pointer to the temporary resume page tables */
20pgd_t *resume_pg_dir; 23pgd_t *resume_pg_dir;
21 24
@@ -156,3 +159,14 @@ int swsusp_arch_resume(void)
156 restore_image(); 159 restore_image();
157 return 0; 160 return 0;
158} 161}
162
163/*
164 * pfn_is_nosave - check if given pfn is in the 'nosave' section
165 */
166
167int pfn_is_nosave(unsigned long pfn)
168{
169 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
170 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
171 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
172}
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 439cc257cd1d..6c73bca3f478 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -110,7 +110,7 @@ SECTIONS
110 __initramfs_end = .; 110 __initramfs_end = .;
111#endif 111#endif
112 112
113 . = ALIGN(32); 113 . = ALIGN(4096);
114 __per_cpu_start = .; 114 __per_cpu_start = .;
115 .data.percpu : { *(.data.percpu) } 115 .data.percpu : { *(.data.percpu) }
116 __per_cpu_end = .; 116 __per_cpu_end = .;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index c76b793310c2..043f637e3d10 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -119,7 +119,7 @@ SECTIONS
119 .init.ramfs : { *(.init.ramfs) } 119 .init.ramfs : { *(.init.ramfs) }
120 __initramfs_end = .; 120 __initramfs_end = .;
121#endif 121#endif
122 . = ALIGN(32); 122 . = ALIGN(_PAGE_SIZE);
123 __per_cpu_start = .; 123 __per_cpu_start = .;
124 .data.percpu : { *(.data.percpu) } 124 .data.percpu : { *(.data.percpu) }
125 __per_cpu_end = .; 125 __per_cpu_end = .;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 2a8253358c6c..c74585990598 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -181,7 +181,7 @@ SECTIONS
181 .init.ramfs : { *(.init.ramfs) } 181 .init.ramfs : { *(.init.ramfs) }
182 __initramfs_end = .; 182 __initramfs_end = .;
183#endif 183#endif
184 . = ALIGN(32); 184 . = ALIGN(ASM_PAGE_SIZE);
185 __per_cpu_start = .; 185 __per_cpu_start = .;
186 .data.percpu : { *(.data.percpu) } 186 .data.percpu : { *(.data.percpu) }
187 __per_cpu_end = .; 187 __per_cpu_end = .;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index e0fa80eca366..aa693d0f151a 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
37obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o 37obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
38obj-$(CONFIG_TAU) += tau_6xx.o 38obj-$(CONFIG_TAU) += tau_6xx.o
39obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o 39obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o
40obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
40obj32-$(CONFIG_MODULES) += module_32.o 41obj32-$(CONFIG_MODULES) += module_32.o
41 42
42ifeq ($(CONFIG_PPC_MERGE),y) 43ifeq ($(CONFIG_PPC_MERGE),y)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 22083ce3cc30..6018178708a5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -582,14 +582,14 @@ void __init setup_per_cpu_areas(void)
582 char *ptr; 582 char *ptr;
583 583
584 /* Copy section for each CPU (we discard the original) */ 584 /* Copy section for each CPU (we discard the original) */
585 size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); 585 size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
586#ifdef CONFIG_MODULES 586#ifdef CONFIG_MODULES
587 if (size < PERCPU_ENOUGH_ROOM) 587 if (size < PERCPU_ENOUGH_ROOM)
588 size = PERCPU_ENOUGH_ROOM; 588 size = PERCPU_ENOUGH_ROOM;
589#endif 589#endif
590 590
591 for_each_possible_cpu(i) { 591 for_each_possible_cpu(i) {
592 ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); 592 ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
593 if (!ptr) 593 if (!ptr)
594 panic("Cannot allocate cpu data for CPU %d\n", i); 594 panic("Cannot allocate cpu data for CPU %d\n", i);
595 595
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
new file mode 100644
index 000000000000..8cee57107541
--- /dev/null
+++ b/arch/powerpc/kernel/suspend.c
@@ -0,0 +1,24 @@
1/*
2 * Suspend support specific for power.
3 *
4 * Distribute under GPLv2
5 *
6 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
7 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
8 */
9
10#include <asm/page.h>
11
12/* References to section boundaries */
13extern const void __nosave_begin, __nosave_end;
14
15/*
16 * pfn_is_nosave - check if given pfn is in the 'nosave' section
17 */
18
19int pfn_is_nosave(unsigned long pfn)
20{
21 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
22 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
23 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
24}
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7eefeb4a30e7..132067313147 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -139,11 +139,7 @@ SECTIONS
139 __initramfs_end = .; 139 __initramfs_end = .;
140 } 140 }
141#endif 141#endif
142#ifdef CONFIG_PPC32 142 . = ALIGN(PAGE_SIZE);
143 . = ALIGN(32);
144#else
145 . = ALIGN(128);
146#endif
147 .data.percpu : { 143 .data.percpu : {
148 __per_cpu_start = .; 144 __per_cpu_start = .;
149 *(.data.percpu) 145 *(.data.percpu)
diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S
index a0625562a44b..44cd128fb719 100644
--- a/arch/ppc/kernel/vmlinux.lds.S
+++ b/arch/ppc/kernel/vmlinux.lds.S
@@ -130,7 +130,7 @@ SECTIONS
130 __ftr_fixup : { *(__ftr_fixup) } 130 __ftr_fixup : { *(__ftr_fixup) }
131 __stop___ftr_fixup = .; 131 __stop___ftr_fixup = .;
132 132
133 . = ALIGN(32); 133 . = ALIGN(4096);
134 __per_cpu_start = .; 134 __per_cpu_start = .;
135 .data.percpu : { *(.data.percpu) } 135 .data.percpu : { *(.data.percpu) }
136 __per_cpu_end = .; 136 __per_cpu_end = .;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 418f6426a949..e9d3432aba60 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -107,7 +107,7 @@ SECTIONS
107 . = ALIGN(2); 107 . = ALIGN(2);
108 __initramfs_end = .; 108 __initramfs_end = .;
109#endif 109#endif
110 . = ALIGN(256); 110 . = ALIGN(4096);
111 __per_cpu_start = .; 111 __per_cpu_start = .;
112 .data.percpu : { *(.data.percpu) } 112 .data.percpu : { *(.data.percpu) }
113 __per_cpu_end = .; 113 __per_cpu_end = .;
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 78a6c09875b2..2f606d0ce1f6 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -54,7 +54,7 @@ SECTIONS
54 . = ALIGN(PAGE_SIZE); 54 . = ALIGN(PAGE_SIZE);
55 .data.page_aligned : { *(.data.page_aligned) } 55 .data.page_aligned : { *(.data.page_aligned) }
56 56
57 . = ALIGN(L1_CACHE_BYTES); 57 . = ALIGN(PAGE_SIZE);
58 __per_cpu_start = .; 58 __per_cpu_start = .;
59 .data.percpu : { *(.data.percpu) } 59 .data.percpu : { *(.data.percpu) }
60 __per_cpu_end = .; 60 __per_cpu_end = .;
diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S
index a59c5e998131..4f9616f39830 100644
--- a/arch/sh64/kernel/vmlinux.lds.S
+++ b/arch/sh64/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
85 . = ALIGN(PAGE_SIZE); 85 . = ALIGN(PAGE_SIZE);
86 .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) } 86 .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) }
87 87
88 . = ALIGN(L1_CACHE_BYTES); 88 . = ALIGN(PAGE_SIZE);
89 __per_cpu_start = .; 89 __per_cpu_start = .;
90 .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) } 90 .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) }
91 __per_cpu_end = . ; 91 __per_cpu_end = . ;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index e5c24e0521de..f0bb6e60e620 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -65,7 +65,7 @@ SECTIONS
65 __initramfs_end = .; 65 __initramfs_end = .;
66#endif 66#endif
67 67
68 . = ALIGN(32); 68 . = ALIGN(4096);
69 __per_cpu_start = .; 69 __per_cpu_start = .;
70 .data.percpu : { *(.data.percpu) } 70 .data.percpu : { *(.data.percpu) }
71 __per_cpu_end = .; 71 __per_cpu_end = .;
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index d4f0a70f4845..1fac215252e4 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1343,11 +1343,11 @@ void __init setup_per_cpu_areas(void)
1343 /* Copy section for each CPU (we discard the original) */ 1343 /* Copy section for each CPU (we discard the original) */
1344 goal = PERCPU_ENOUGH_ROOM; 1344 goal = PERCPU_ENOUGH_ROOM;
1345 1345
1346 __per_cpu_shift = 0; 1346 __per_cpu_shift = PAGE_SHIFT;
1347 for (size = 1UL; size < goal; size <<= 1UL) 1347 for (size = PAGE_SIZE; size < goal; size <<= 1UL)
1348 __per_cpu_shift++; 1348 __per_cpu_shift++;
1349 1349
1350 ptr = alloc_bootmem(size * NR_CPUS); 1350 ptr = alloc_bootmem_pages(size * NR_CPUS);
1351 1351
1352 __per_cpu_base = ptr - __per_cpu_start; 1352 __per_cpu_base = ptr - __per_cpu_start;
1353 1353
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 780cc0a4a128..f938fa822146 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -41,6 +41,7 @@ CONFIG_M686=y
41# CONFIG_MGEODE_LX is not set 41# CONFIG_MGEODE_LX is not set
42# CONFIG_MCYRIXIII is not set 42# CONFIG_MCYRIXIII is not set
43# CONFIG_MVIAC3_2 is not set 43# CONFIG_MVIAC3_2 is not set
44# CONFIG_MVIAC7 is not set
44# CONFIG_X86_GENERIC is not set 45# CONFIG_X86_GENERIC is not set
45CONFIG_X86_CMPXCHG=y 46CONFIG_X86_CMPXCHG=y
46CONFIG_X86_XADD=y 47CONFIG_X86_XADD=y
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e9b4f058a49a..145bb824b2a8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -415,13 +415,13 @@ config OUT_OF_LINE_PFN_TO_PAGE
415 depends on DISCONTIGMEM 415 depends on DISCONTIGMEM
416 416
417config NR_CPUS 417config NR_CPUS
418 int "Maximum number of CPUs (2-256)" 418 int "Maximum number of CPUs (2-255)"
419 range 2 255 419 range 2 255
420 depends on SMP 420 depends on SMP
421 default "8" 421 default "8"
422 help 422 help
423 This allows you to specify the maximum number of CPUs which this 423 This allows you to specify the maximum number of CPUs which this
424 kernel will support. Current maximum is 256 CPUs due to 424 kernel will support. Current maximum is 255 CPUs due to
425 APIC addressing limits. Less depending on the hardware. 425 APIC addressing limits. Less depending on the hardware.
426 426
427 This is purely to save memory - each supported CPU requires 427 This is purely to save memory - each supported CPU requires
@@ -565,23 +565,56 @@ config CRASH_DUMP
565 PHYSICAL_START. 565 PHYSICAL_START.
566 For more details see Documentation/kdump/kdump.txt 566 For more details see Documentation/kdump/kdump.txt
567 567
568config RELOCATABLE
569 bool "Build a relocatable kernel(EXPERIMENTAL)"
570 depends on EXPERIMENTAL
571 help
572 Builds a relocatable kernel. This enables loading and running
573 a kernel binary from a different physical address than it has
574 been compiled for.
575
576 One use is for the kexec on panic case where the recovery kernel
577 must live at a different physical address than the primary
578 kernel.
579
580 Note: If CONFIG_RELOCATABLE=y, then kernel run from the address
581 it has been loaded at and compile time physical address
582 (CONFIG_PHYSICAL_START) is ignored.
583
568config PHYSICAL_START 584config PHYSICAL_START
569 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 585 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
570 default "0x1000000" if CRASH_DUMP
571 default "0x200000" 586 default "0x200000"
572 help 587 help
573 This gives the physical address where the kernel is loaded. Normally 588 This gives the physical address where the kernel is loaded. It
574 for regular kernels this value is 0x200000 (2MB). But in the case 589 should be aligned to 2MB boundary.
575 of kexec on panic the fail safe kernel needs to run at a different 590
576 address than the panic-ed kernel. This option is used to set the load 591 If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
577 address for kernels used to capture crash dump on being kexec'ed 592 bzImage will decompress itself to above physical address and
578 after panic. The default value for crash dump kernels is 593 run from there. Otherwise, bzImage will run from the address where
579 0x1000000 (16MB). This can also be set based on the "X" value as 594 it has been loaded by the boot loader and will ignore above physical
595 address.
596
597 In normal kdump cases one does not have to set/change this option
598 as now bzImage can be compiled as a completely relocatable image
599 (CONFIG_RELOCATABLE=y) and be used to load and run from a different
600 address. This option is mainly useful for the folks who don't want
601 to use a bzImage for capturing the crash dump and want to use a
602 vmlinux instead.
603
604 So if you are using bzImage for capturing the crash dump, leave
605 the value here unchanged to 0x200000 and set CONFIG_RELOCATABLE=y.
606 Otherwise if you plan to use vmlinux for capturing the crash dump
607 change this value to start of the reserved region (Typically 16MB
608 0x1000000). In other words, it can be set based on the "X" value as
580 specified in the "crashkernel=YM@XM" command line boot parameter 609 specified in the "crashkernel=YM@XM" command line boot parameter
581 passed to the panic-ed kernel. Typically this parameter is set as 610 passed to the panic-ed kernel. Typically this parameter is set as
582 crashkernel=64M@16M. Please take a look at 611 crashkernel=64M@16M. Please take a look at
583 Documentation/kdump/kdump.txt for more details about crash dumps. 612 Documentation/kdump/kdump.txt for more details about crash dumps.
584 613
614 Usage of bzImage for capturing the crash dump is advantageous as
615 one does not have to build two kernels. Same kernel can be used
616 as production kernel and capture kernel.
617
585 Don't change this unless you know what you are doing. 618 Don't change this unless you know what you are doing.
586 619
587config SECCOMP 620config SECCOMP
@@ -627,14 +660,6 @@ config CC_STACKPROTECTOR_ALL
627 660
628source kernel/Kconfig.hz 661source kernel/Kconfig.hz
629 662
630config REORDER
631 bool "Function reordering"
632 default n
633 help
634 This option enables the toolchain to reorder functions for a more
635 optimal TLB usage. If you have pretty much any version of binutils,
636 this can increase your kernel build time by roughly one minute.
637
638config K8_NB 663config K8_NB
639 def_bool y 664 def_bool y
640 depends on AGP_AMD64 || IOMMU || (PCI && NUMA) 665 depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 2941a915d4ef..29617ae3926d 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -40,10 +40,6 @@ cflags-y += -m64
40cflags-y += -mno-red-zone 40cflags-y += -mno-red-zone
41cflags-y += -mcmodel=kernel 41cflags-y += -mcmodel=kernel
42cflags-y += -pipe 42cflags-y += -pipe
43cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
44# this makes reading assembly source easier, but produces worse code
45# actually it makes the kernel smaller too.
46cflags-y += -fno-reorder-blocks
47cflags-y += -Wno-sign-compare 43cflags-y += -Wno-sign-compare
48cflags-y += -fno-asynchronous-unwind-tables 44cflags-y += -fno-asynchronous-unwind-tables
49ifneq ($(CONFIG_DEBUG_INFO),y) 45ifneq ($(CONFIG_DEBUG_INFO),y)
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index deb063e7762d..ee6f6505f95f 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -36,7 +36,7 @@ subdir- := compressed/ #Let make clean descend in compressed/
36# --------------------------------------------------------------------------- 36# ---------------------------------------------------------------------------
37 37
38$(obj)/bzImage: IMAGE_OFFSET := 0x100000 38$(obj)/bzImage: IMAGE_OFFSET := 0x100000
39$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ 39$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
40$(obj)/bzImage: BUILDFLAGS := -b 40$(obj)/bzImage: BUILDFLAGS := -b
41 41
42quiet_cmd_image = BUILD $@ 42quiet_cmd_image = BUILD $@
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index e70fa6e1da08..705a3e33d7e1 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -8,16 +8,14 @@
8 8
9targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o 9targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
10EXTRA_AFLAGS := -traditional 10EXTRA_AFLAGS := -traditional
11AFLAGS := $(subst -m64,-m32,$(AFLAGS))
12 11
13# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with 12# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
14# -m32 13# -m32
15CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing 14CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin
16LDFLAGS := -m elf_i386 15LDFLAGS := -m elf_x86_64
17 16
18LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386 17LDFLAGS_vmlinux := -T
19 18$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
20$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
21 $(call if_changed,ld) 19 $(call if_changed,ld)
22 @: 20 @:
23 21
@@ -27,7 +25,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
27$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE 25$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
28 $(call if_changed,gzip) 26 $(call if_changed,gzip)
29 27
30LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T 28LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
31 29
32$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE 30$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
33 $(call if_changed,ld) 31 $(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 6f55565e4d42..f9d5692a0106 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -26,116 +26,279 @@
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/segment.h> 28#include <asm/segment.h>
29#include <asm/pgtable.h>
29#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/msr.h>
30 32
33.section ".text.head"
31 .code32 34 .code32
32 .globl startup_32 35 .globl startup_32
33 36
34startup_32: 37startup_32:
35 cld 38 cld
36 cli 39 cli
37 movl $(__KERNEL_DS),%eax 40 movl $(__KERNEL_DS), %eax
38 movl %eax,%ds 41 movl %eax, %ds
39 movl %eax,%es 42 movl %eax, %es
40 movl %eax,%fs 43 movl %eax, %ss
41 movl %eax,%gs 44
42 45/* Calculate the delta between where we were compiled to run
43 lss stack_start,%esp 46 * at and where we were actually loaded at. This can only be done
44 xorl %eax,%eax 47 * with a short local call on x86. Nothing else will tell us what
451: incl %eax # check that A20 really IS enabled 48 * address we are running at. The reserved chunk of the real-mode
46 movl %eax,0x000000 # loop forever if it isn't 49 * data at 0x34-0x3f are used as the stack for this calculation.
47 cmpl %eax,0x100000 50 * Only 4 bytes are needed.
48 je 1b 51 */
52 leal 0x40(%esi), %esp
53 call 1f
541: popl %ebp
55 subl $1b, %ebp
56
57/* setup a stack and make sure cpu supports long mode. */
58 movl $user_stack_end, %eax
59 addl %ebp, %eax
60 movl %eax, %esp
61
62 call verify_cpu
63 testl %eax, %eax
64 jnz no_longmode
65
66/* Compute the delta between where we were compiled to run at
67 * and where the code will actually run at.
68 */
69/* %ebp contains the address we are loaded at by the boot loader and %ebx
70 * contains the address where we should move the kernel image temporarily
71 * for safe in-place decompression.
72 */
73
74#ifdef CONFIG_RELOCATABLE
75 movl %ebp, %ebx
76 addl $(LARGE_PAGE_SIZE -1), %ebx
77 andl $LARGE_PAGE_MASK, %ebx
78#else
79 movl $CONFIG_PHYSICAL_START, %ebx
80#endif
81
82 /* Replace the compressed data size with the uncompressed size */
83 subl input_len(%ebp), %ebx
84 movl output_len(%ebp), %eax
85 addl %eax, %ebx
86 /* Add 8 bytes for every 32K input block */
87 shrl $12, %eax
88 addl %eax, %ebx
89 /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
90 addl $(32768 + 18 + 4095), %ebx
91 andl $~4095, %ebx
49 92
50/* 93/*
51 * Initialize eflags. Some BIOS's leave bits like NT set. This would 94 * Prepare for entering 64 bit mode
52 * confuse the debugger if this code is traced.
53 * XXX - best to initialize before switching to protected mode.
54 */ 95 */
55 pushl $0 96
56 popfl 97 /* Load new GDT with the 64bit segments using 32bit descriptor */
98 leal gdt(%ebp), %eax
99 movl %eax, gdt+2(%ebp)
100 lgdt gdt(%ebp)
101
102 /* Enable PAE mode */
103 xorl %eax, %eax
104 orl $(1 << 5), %eax
105 movl %eax, %cr4
106
107 /*
108 * Build early 4G boot pagetable
109 */
110 /* Initialize Page tables to 0*/
111 leal pgtable(%ebx), %edi
112 xorl %eax, %eax
113 movl $((4096*6)/4), %ecx
114 rep stosl
115
116 /* Build Level 4 */
117 leal pgtable + 0(%ebx), %edi
118 leal 0x1007 (%edi), %eax
119 movl %eax, 0(%edi)
120
121 /* Build Level 3 */
122 leal pgtable + 0x1000(%ebx), %edi
123 leal 0x1007(%edi), %eax
124 movl $4, %ecx
1251: movl %eax, 0x00(%edi)
126 addl $0x00001000, %eax
127 addl $8, %edi
128 decl %ecx
129 jnz 1b
130
131 /* Build Level 2 */
132 leal pgtable + 0x2000(%ebx), %edi
133 movl $0x00000183, %eax
134 movl $2048, %ecx
1351: movl %eax, 0(%edi)
136 addl $0x00200000, %eax
137 addl $8, %edi
138 decl %ecx
139 jnz 1b
140
141 /* Enable the boot page tables */
142 leal pgtable(%ebx), %eax
143 movl %eax, %cr3
144
145 /* Enable Long mode in EFER (Extended Feature Enable Register) */
146 movl $MSR_EFER, %ecx
147 rdmsr
148 btsl $_EFER_LME, %eax
149 wrmsr
150
151 /* Setup for the jump to 64bit mode
152 *
153 * When the jump is performend we will be in long mode but
154 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
155 * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
156 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
157 * We place all of the values on our mini stack so lret can
158 * used to perform that far jump.
159 */
160 pushl $__KERNEL_CS
161 leal startup_64(%ebp), %eax
162 pushl %eax
163
164 /* Enter paged protected Mode, activating Long Mode */
165 movl $0x80000001, %eax /* Enable Paging and Protected mode */
166 movl %eax, %cr0
167
168 /* Jump from 32bit compatibility mode into 64bit mode. */
169 lret
170
171no_longmode:
172 /* This isn't an x86-64 CPU so hang */
1731:
174 hlt
175 jmp 1b
176
177#include "../../kernel/verify_cpu.S"
178
179 /* Be careful here startup_64 needs to be at a predictable
180 * address so I can export it in an ELF header. Bootloaders
181 * should look at the ELF header to find this address, as
182 * it may change in the future.
183 */
184 .code64
185 .org 0x200
186ENTRY(startup_64)
187 /* We come here either from startup_32 or directly from a
188 * 64bit bootloader. If we come here from a bootloader we depend on
189 * an identity mapped page table being provied that maps our
190 * entire text+data+bss and hopefully all of memory.
191 */
192
193 /* Setup data segments. */
194 xorl %eax, %eax
195 movl %eax, %ds
196 movl %eax, %es
197 movl %eax, %ss
198
199 /* Compute the decompressed kernel start address. It is where
200 * we were loaded at aligned to a 2M boundary. %rbp contains the
201 * decompressed kernel start address.
202 *
203 * If it is a relocatable kernel then decompress and run the kernel
204 * from load address aligned to 2MB addr, otherwise decompress and
205 * run the kernel from CONFIG_PHYSICAL_START
206 */
207
208 /* Start with the delta to where the kernel will run at. */
209#ifdef CONFIG_RELOCATABLE
210 leaq startup_32(%rip) /* - $startup_32 */, %rbp
211 addq $(LARGE_PAGE_SIZE - 1), %rbp
212 andq $LARGE_PAGE_MASK, %rbp
213 movq %rbp, %rbx
214#else
215 movq $CONFIG_PHYSICAL_START, %rbp
216 movq %rbp, %rbx
217#endif
218
219 /* Replace the compressed data size with the uncompressed size */
220 movl input_len(%rip), %eax
221 subq %rax, %rbx
222 movl output_len(%rip), %eax
223 addq %rax, %rbx
224 /* Add 8 bytes for every 32K input block */
225 shrq $12, %rax
226 addq %rax, %rbx
227 /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
228 addq $(32768 + 18 + 4095), %rbx
229 andq $~4095, %rbx
230
231/* Copy the compressed kernel to the end of our buffer
232 * where decompression in place becomes safe.
233 */
234 leaq _end(%rip), %r8
235 leaq _end(%rbx), %r9
236 movq $_end /* - $startup_32 */, %rcx
2371: subq $8, %r8
238 subq $8, %r9
239 movq 0(%r8), %rax
240 movq %rax, 0(%r9)
241 subq $8, %rcx
242 jnz 1b
243
244/*
245 * Jump to the relocated address.
246 */
247 leaq relocated(%rbx), %rax
248 jmp *%rax
249
250.section ".text"
251relocated:
252
57/* 253/*
58 * Clear BSS 254 * Clear BSS
59 */ 255 */
60 xorl %eax,%eax 256 xorq %rax, %rax
61 movl $_edata,%edi 257 leaq _edata(%rbx), %rdi
62 movl $_end,%ecx 258 leaq _end(%rbx), %rcx
63 subl %edi,%ecx 259 subq %rdi, %rcx
64 cld 260 cld
65 rep 261 rep
66 stosb 262 stosb
263
264 /* Setup the stack */
265 leaq user_stack_end(%rip), %rsp
266
267 /* zero EFLAGS after setting rsp */
268 pushq $0
269 popfq
270
67/* 271/*
68 * Do the decompression, and jump to the new kernel.. 272 * Do the decompression, and jump to the new kernel..
69 */ 273 */
70 subl $16,%esp # place for structure on the stack 274 pushq %rsi # Save the real mode argument
71 movl %esp,%eax 275 movq %rsi, %rdi # real mode address
72 pushl %esi # real mode pointer as second arg 276 leaq _heap(%rip), %rsi # _heap
73 pushl %eax # address of structure as first arg 277 leaq input_data(%rip), %rdx # input_data
74 call decompress_kernel 278 movl input_len(%rip), %eax
75 orl %eax,%eax 279 movq %rax, %rcx # input_len
76 jnz 3f 280 movq %rbp, %r8 # output
77 addl $8,%esp 281 call decompress_kernel
78 xorl %ebx,%ebx 282 popq %rsi
79 ljmp $(__KERNEL_CS), $__PHYSICAL_START
80 283
81/*
82 * We come here, if we were loaded high.
83 * We need to move the move-in-place routine down to 0x1000
84 * and then start it with the buffer addresses in registers,
85 * which we got from the stack.
86 */
873:
88 movl %esi,%ebx
89 movl $move_routine_start,%esi
90 movl $0x1000,%edi
91 movl $move_routine_end,%ecx
92 subl %esi,%ecx
93 addl $3,%ecx
94 shrl $2,%ecx
95 cld
96 rep
97 movsl
98
99 popl %esi # discard the address
100 addl $4,%esp # real mode pointer
101 popl %esi # low_buffer_start
102 popl %ecx # lcount
103 popl %edx # high_buffer_start
104 popl %eax # hcount
105 movl $__PHYSICAL_START,%edi
106 cli # make sure we don't get interrupted
107 ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
108 284
109/* 285/*
110 * Routine (template) for moving the decompressed kernel in place, 286 * Jump to the decompressed kernel.
111 * if we were high loaded. This _must_ PIC-code !
112 */ 287 */
113move_routine_start: 288 jmp *%rbp
114 movl %ecx,%ebp
115 shrl $2,%ecx
116 rep
117 movsl
118 movl %ebp,%ecx
119 andl $3,%ecx
120 rep
121 movsb
122 movl %edx,%esi
123 movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
124 addl $3,%ecx
125 shrl $2,%ecx
126 rep
127 movsl
128 movl %ebx,%esi # Restore setup pointer
129 xorl %ebx,%ebx
130 ljmp $(__KERNEL_CS), $__PHYSICAL_START
131move_routine_end:
132 289
133 290 .data
134/* Stack for uncompression */ 291gdt:
135 .align 32 292 .word gdt_end - gdt
136user_stack: 293 .long gdt
294 .word 0
295 .quad 0x0000000000000000 /* NULL descriptor */
296 .quad 0x00af9a000000ffff /* __KERNEL_CS */
297 .quad 0x00cf92000000ffff /* __KERNEL_DS */
298gdt_end:
299 .bss
300/* Stack for uncompression */
301 .balign 4
302user_stack:
137 .fill 4096,4,0 303 .fill 4096,4,0
138stack_start: 304user_stack_end:
139 .long user_stack+4096
140 .word __KERNEL_DS
141
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 3755b2e394d0..f932b0e89096 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,10 +9,95 @@
9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
10 */ 10 */
11 11
12#define _LINUX_STRING_H_ 1
13#define __LINUX_BITMAP_H 1
14
15#include <linux/linkage.h>
12#include <linux/screen_info.h> 16#include <linux/screen_info.h>
13#include <asm/io.h> 17#include <asm/io.h>
14#include <asm/page.h> 18#include <asm/page.h>
15 19
20/* WARNING!!
21 * This code is compiled with -fPIC and it is relocated dynamically
22 * at run time, but no relocation processing is performed.
23 * This means that it is not safe to place pointers in static structures.
24 */
25
26/*
27 * Getting to provable safe in place decompression is hard.
28 * Worst case behaviours need to be analized.
29 * Background information:
30 *
31 * The file layout is:
32 * magic[2]
33 * method[1]
34 * flags[1]
35 * timestamp[4]
36 * extraflags[1]
37 * os[1]
38 * compressed data blocks[N]
39 * crc[4] orig_len[4]
40 *
41 * resulting in 18 bytes of non compressed data overhead.
42 *
43 * Files divided into blocks
44 * 1 bit (last block flag)
45 * 2 bits (block type)
46 *
47 * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
48 * The smallest block type encoding is always used.
49 *
50 * stored:
51 * 32 bits length in bytes.
52 *
53 * fixed:
54 * magic fixed tree.
55 * symbols.
56 *
57 * dynamic:
58 * dynamic tree encoding.
59 * symbols.
60 *
61 *
62 * The buffer for decompression in place is the length of the
63 * uncompressed data, plus a small amount extra to keep the algorithm safe.
64 * The compressed data is placed at the end of the buffer. The output
65 * pointer is placed at the start of the buffer and the input pointer
66 * is placed where the compressed data starts. Problems will occur
67 * when the output pointer overruns the input pointer.
68 *
69 * The output pointer can only overrun the input pointer if the input
70 * pointer is moving faster than the output pointer. A condition only
71 * triggered by data whose compressed form is larger than the uncompressed
72 * form.
73 *
74 * The worst case at the block level is a growth of the compressed data
75 * of 5 bytes per 32767 bytes.
76 *
77 * The worst case internal to a compressed block is very hard to figure.
78 * The worst case can at least be boundined by having one bit that represents
79 * 32764 bytes and then all of the rest of the bytes representing the very
80 * very last byte.
81 *
82 * All of which is enough to compute an amount of extra data that is required
83 * to be safe. To avoid problems at the block level allocating 5 extra bytes
84 * per 32767 bytes of data is sufficient. To avoind problems internal to a block
85 * adding an extra 32767 bytes (the worst case uncompressed block size) is
86 * sufficient, to ensure that in the worst case the decompressed data for
87 * block will stop the byte before the compressed data for a block begins.
88 * To avoid problems with the compressed data's meta information an extra 18
89 * bytes are needed. Leading to the formula:
90 *
91 * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
92 *
93 * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
94 * Adding 32768 instead of 32767 just makes for round numbers.
95 * Adding the decompressor_size is necessary as it musht live after all
96 * of the data as well. Last I measured the decompressor is about 14K.
97 * 10K of actuall data and 4K of bss.
98 *
99 */
100
16/* 101/*
17 * gzip declarations 102 * gzip declarations
18 */ 103 */
@@ -28,15 +113,20 @@ typedef unsigned char uch;
28typedef unsigned short ush; 113typedef unsigned short ush;
29typedef unsigned long ulg; 114typedef unsigned long ulg;
30 115
31#define WSIZE 0x8000 /* Window size must be at least 32k, */ 116#define WSIZE 0x80000000 /* Window size must be at least 32k,
32 /* and a power of two */ 117 * and a power of two
118 * We don't actually have a window just
119 * a huge output buffer so I report
120 * a 2G windows size, as that should
121 * always be larger than our output buffer.
122 */
33 123
34static uch *inbuf; /* input buffer */ 124static uch *inbuf; /* input buffer */
35static uch window[WSIZE]; /* Sliding window buffer */ 125static uch *window; /* Sliding window buffer, (and final output buffer) */
36 126
37static unsigned insize = 0; /* valid bytes in inbuf */ 127static unsigned insize; /* valid bytes in inbuf */
38static unsigned inptr = 0; /* index of next byte to be processed in inbuf */ 128static unsigned inptr; /* index of next byte to be processed in inbuf */
39static unsigned outcnt = 0; /* bytes in output buffer */ 129static unsigned outcnt; /* bytes in output buffer */
40 130
41/* gzip flag byte */ 131/* gzip flag byte */
42#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ 132#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
@@ -87,8 +177,6 @@ extern unsigned char input_data[];
87extern int input_len; 177extern int input_len;
88 178
89static long bytes_out = 0; 179static long bytes_out = 0;
90static uch *output_data;
91static unsigned long output_ptr = 0;
92 180
93static void *malloc(int size); 181static void *malloc(int size);
94static void free(void *where); 182static void free(void *where);
@@ -98,17 +186,10 @@ static void *memcpy(void *dest, const void *src, unsigned n);
98 186
99static void putstr(const char *); 187static void putstr(const char *);
100 188
101extern int end; 189static long free_mem_ptr;
102static long free_mem_ptr = (long)&end;
103static long free_mem_end_ptr; 190static long free_mem_end_ptr;
104 191
105#define INPLACE_MOVE_ROUTINE 0x1000 192#define HEAP_SIZE 0x7000
106#define LOW_BUFFER_START 0x2000
107#define LOW_BUFFER_MAX 0x90000
108#define HEAP_SIZE 0x3000
109static unsigned int low_buffer_end, low_buffer_size;
110static int high_loaded =0;
111static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
112 193
113static char *vidmem = (char *)0xb8000; 194static char *vidmem = (char *)0xb8000;
114static int vidport; 195static int vidport;
@@ -218,58 +299,31 @@ static void* memcpy(void* dest, const void* src, unsigned n)
218 */ 299 */
219static int fill_inbuf(void) 300static int fill_inbuf(void)
220{ 301{
221 if (insize != 0) { 302 error("ran out of input data");
222 error("ran out of input data"); 303 return 0;
223 }
224
225 inbuf = input_data;
226 insize = input_len;
227 inptr = 1;
228 return inbuf[0];
229} 304}
230 305
231/* =========================================================================== 306/* ===========================================================================
232 * Write the output window window[0..outcnt-1] and update crc and bytes_out. 307 * Write the output window window[0..outcnt-1] and update crc and bytes_out.
233 * (Used for the decompressed data only.) 308 * (Used for the decompressed data only.)
234 */ 309 */
235static void flush_window_low(void)
236{
237 ulg c = crc; /* temporary variable */
238 unsigned n;
239 uch *in, *out, ch;
240
241 in = window;
242 out = &output_data[output_ptr];
243 for (n = 0; n < outcnt; n++) {
244 ch = *out++ = *in++;
245 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
246 }
247 crc = c;
248 bytes_out += (ulg)outcnt;
249 output_ptr += (ulg)outcnt;
250 outcnt = 0;
251}
252
253static void flush_window_high(void)
254{
255 ulg c = crc; /* temporary variable */
256 unsigned n;
257 uch *in, ch;
258 in = window;
259 for (n = 0; n < outcnt; n++) {
260 ch = *output_data++ = *in++;
261 if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
262 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
263 }
264 crc = c;
265 bytes_out += (ulg)outcnt;
266 outcnt = 0;
267}
268
269static void flush_window(void) 310static void flush_window(void)
270{ 311{
271 if (high_loaded) flush_window_high(); 312 /* With my window equal to my output buffer
272 else flush_window_low(); 313 * I only need to compute the crc here.
314 */
315 ulg c = crc; /* temporary variable */
316 unsigned n;
317 uch *in, ch;
318
319 in = window;
320 for (n = 0; n < outcnt; n++) {
321 ch = *in++;
322 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
323 }
324 crc = c;
325 bytes_out += (ulg)outcnt;
326 outcnt = 0;
273} 327}
274 328
275static void error(char *x) 329static void error(char *x)
@@ -281,57 +335,8 @@ static void error(char *x)
281 while(1); /* Halt */ 335 while(1); /* Halt */
282} 336}
283 337
284static void setup_normal_output_buffer(void) 338asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
285{ 339 uch *input_data, unsigned long input_len, uch *output)
286#ifdef STANDARD_MEMORY_BIOS_CALL
287 if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
288#else
289 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
290#endif
291 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
292 free_mem_end_ptr = (long)real_mode;
293}
294
295struct moveparams {
296 uch *low_buffer_start; int lcount;
297 uch *high_buffer_start; int hcount;
298};
299
300static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
301{
302 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
303#ifdef STANDARD_MEMORY_BIOS_CALL
304 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
305#else
306 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
307#endif
308 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
309 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
310 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
311 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
312 high_loaded = 1;
313 free_mem_end_ptr = (long)high_buffer_start;
314 if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
315 high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
316 mv->hcount = 0; /* say: we need not to move high_buffer */
317 }
318 else mv->hcount = -1;
319 mv->high_buffer_start = high_buffer_start;
320}
321
322static void close_output_buffer_if_we_run_high(struct moveparams *mv)
323{
324 if (bytes_out > low_buffer_size) {
325 mv->lcount = low_buffer_size;
326 if (mv->hcount)
327 mv->hcount = bytes_out - low_buffer_size;
328 } else {
329 mv->lcount = bytes_out;
330 mv->hcount = 0;
331 }
332}
333
334int decompress_kernel(struct moveparams *mv, void *rmode)
335{ 340{
336 real_mode = rmode; 341 real_mode = rmode;
337 342
@@ -346,13 +351,21 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
346 lines = RM_SCREEN_INFO.orig_video_lines; 351 lines = RM_SCREEN_INFO.orig_video_lines;
347 cols = RM_SCREEN_INFO.orig_video_cols; 352 cols = RM_SCREEN_INFO.orig_video_cols;
348 353
349 if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); 354 window = output; /* Output buffer (Normally at 1M) */
350 else setup_output_buffer_if_we_run_high(mv); 355 free_mem_ptr = heap; /* Heap */
356 free_mem_end_ptr = heap + HEAP_SIZE;
357 inbuf = input_data; /* Input buffer */
358 insize = input_len;
359 inptr = 0;
360
361 if ((ulg)output & (__KERNEL_ALIGN - 1))
362 error("Destination address not 2M aligned");
363 if ((ulg)output >= 0xffffffffffUL)
364 error("Destination address too large");
351 365
352 makecrc(); 366 makecrc();
353 putstr(".\nDecompressing Linux..."); 367 putstr(".\nDecompressing Linux...");
354 gunzip(); 368 gunzip();
355 putstr("done.\nBooting the kernel.\n"); 369 putstr("done.\nBooting the kernel.\n");
356 if (high_loaded) close_output_buffer_if_we_run_high(mv); 370 return;
357 return high_loaded;
358} 371}
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
new file mode 100644
index 000000000000..94c13e557fb4
--- /dev/null
+++ b/arch/x86_64/boot/compressed/vmlinux.lds
@@ -0,0 +1,44 @@
1OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
2OUTPUT_ARCH(i386:x86-64)
3ENTRY(startup_64)
4SECTIONS
5{
6 /* Be careful parts of head.S assume startup_32 is at
7 * address 0.
8 */
9 . = 0;
10 .text : {
11 _head = . ;
12 *(.text.head)
13 _ehead = . ;
14 *(.text.compressed)
15 _text = .; /* Text */
16 *(.text)
17 *(.text.*)
18 _etext = . ;
19 }
20 .rodata : {
21 _rodata = . ;
22 *(.rodata) /* read-only data */
23 *(.rodata.*)
24 _erodata = . ;
25 }
26 .data : {
27 _data = . ;
28 *(.data)
29 *(.data.*)
30 _edata = . ;
31 }
32 .bss : {
33 _bss = . ;
34 *(.bss)
35 *(.bss.*)
36 *(COMMON)
37 . = ALIGN(8);
38 _end = . ;
39 . = ALIGN(4096);
40 pgtable = . ;
41 . = . + 4096 * 6;
42 _heap = .;
43 }
44}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
index 1ed9d791f863..bd1429ce193e 100644
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ b/arch/x86_64/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
1SECTIONS 1SECTIONS
2{ 2{
3 .data : { 3 .text.compressed : {
4 input_len = .; 4 input_len = .;
5 LONG(input_data_end - input_data) input_data = .; 5 LONG(input_data_end - input_data) input_data = .;
6 *(.data) 6 *(.data)
7 input_data_end = .; 7 output_len = . - 4;
8 input_data_end = .;
8 } 9 }
9} 10}
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index 770940cc0108..e9e33f949697 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -51,6 +51,7 @@
51#include <asm/boot.h> 51#include <asm/boot.h>
52#include <asm/e820.h> 52#include <asm/e820.h>
53#include <asm/page.h> 53#include <asm/page.h>
54#include <asm/setup.h>
54 55
55/* Signature words to ensure LILO loaded us right */ 56/* Signature words to ensure LILO loaded us right */
56#define SIG1 0xAA55 57#define SIG1 0xAA55
@@ -80,7 +81,7 @@ start:
80# This is the setup header, and it must start at %cs:2 (old 0x9020:2) 81# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
81 82
82 .ascii "HdrS" # header signature 83 .ascii "HdrS" # header signature
83 .word 0x0204 # header version number (>= 0x0105) 84 .word 0x0206 # header version number (>= 0x0105)
84 # or else old loadlin-1.5 will fail) 85 # or else old loadlin-1.5 will fail)
85realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 86realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
86start_sys_seg: .word SYSSEG 87start_sys_seg: .word SYSSEG
@@ -155,7 +156,20 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
155 # low memory 0x10000 or higher. 156 # low memory 0x10000 or higher.
156 157
157ramdisk_max: .long 0xffffffff 158ramdisk_max: .long 0xffffffff
158 159kernel_alignment: .long 0x200000 # physical addr alignment required for
160 # protected mode relocatable kernel
161#ifdef CONFIG_RELOCATABLE
162relocatable_kernel: .byte 1
163#else
164relocatable_kernel: .byte 0
165#endif
166pad2: .byte 0
167pad3: .word 0
168
169cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
170 #added with boot protocol
171 #version 2.06
172
159trampoline: call start_of_setup 173trampoline: call start_of_setup
160 .align 16 174 .align 16
161 # The offset at this point is 0x240 175 # The offset at this point is 0x240
@@ -290,64 +304,10 @@ loader_ok:
290 movw %cs,%ax 304 movw %cs,%ax
291 movw %ax,%ds 305 movw %ax,%ds
292 306
293 /* minimum CPUID flags for x86-64 */ 307 call verify_cpu
294 /* see http://www.x86-64.org/lists/discuss/msg02971.html */ 308 testl %eax,%eax
295#define SSE_MASK ((1<<25)|(1<<26)) 309 jz sse_ok
296#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\ 310
297 (1<<13)|(1<<15)|(1<<24))
298#define REQUIRED_MASK2 (1<<29)
299
300 pushfl /* standard way to check for cpuid */
301 popl %eax
302 movl %eax,%ebx
303 xorl $0x200000,%eax
304 pushl %eax
305 popfl
306 pushfl
307 popl %eax
308 cmpl %eax,%ebx
309 jz no_longmode /* cpu has no cpuid */
310 movl $0x0,%eax
311 cpuid
312 cmpl $0x1,%eax
313 jb no_longmode /* no cpuid 1 */
314 xor %di,%di
315 cmpl $0x68747541,%ebx /* AuthenticAMD */
316 jnz noamd
317 cmpl $0x69746e65,%edx
318 jnz noamd
319 cmpl $0x444d4163,%ecx
320 jnz noamd
321 mov $1,%di /* cpu is from AMD */
322noamd:
323 movl $0x1,%eax
324 cpuid
325 andl $REQUIRED_MASK1,%edx
326 xorl $REQUIRED_MASK1,%edx
327 jnz no_longmode
328 movl $0x80000000,%eax
329 cpuid
330 cmpl $0x80000001,%eax
331 jb no_longmode /* no extended cpuid */
332 movl $0x80000001,%eax
333 cpuid
334 andl $REQUIRED_MASK2,%edx
335 xorl $REQUIRED_MASK2,%edx
336 jnz no_longmode
337sse_test:
338 movl $1,%eax
339 cpuid
340 andl $SSE_MASK,%edx
341 cmpl $SSE_MASK,%edx
342 je sse_ok
343 test %di,%di
344 jz no_longmode /* only try to force SSE on AMD */
345 movl $0xc0010015,%ecx /* HWCR */
346 rdmsr
347 btr $15,%eax /* enable SSE */
348 wrmsr
349 xor %di,%di /* don't loop */
350 jmp sse_test /* try again */
351no_longmode: 311no_longmode:
352 call beep 312 call beep
353 lea long_mode_panic,%si 313 lea long_mode_panic,%si
@@ -357,7 +317,8 @@ no_longmode_loop:
357long_mode_panic: 317long_mode_panic:
358 .string "Your CPU does not support long mode. Use a 32bit distribution." 318 .string "Your CPU does not support long mode. Use a 32bit distribution."
359 .byte 0 319 .byte 0
360 320
321#include "../kernel/verify_cpu.S"
361sse_ok: 322sse_ok:
362 popw %ds 323 popw %ds
363 324
@@ -846,7 +807,7 @@ gdt_48:
846 807
847# Include video setup & detection code 808# Include video setup & detection code
848 809
849#include "video.S" 810#include "../../i386/boot/video.S"
850 811
851# Setup signature -- must be last 812# Setup signature -- must be last
852setup_sig1: .word SIG1 813setup_sig1: .word SIG1
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
deleted file mode 100644
index 6090516c9c7f..000000000000
--- a/arch/x86_64/boot/video.S
+++ /dev/null
@@ -1,2043 +0,0 @@
1/* video.S
2 *
3 * Display adapter & video mode setup, version 2.13 (14-May-99)
4 *
5 * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
6 * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
7 *
8 * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
9 *
10 * For further information, look at Documentation/svga.txt.
11 *
12 */
13
14/* Enable autodetection of SVGA adapters and modes. */
15#undef CONFIG_VIDEO_SVGA
16
17/* Enable autodetection of VESA modes */
18#define CONFIG_VIDEO_VESA
19
20/* Enable compacting of mode table */
21#define CONFIG_VIDEO_COMPACT
22
23/* Retain screen contents when switching modes */
24#define CONFIG_VIDEO_RETAIN
25
26/* Enable local mode list */
27#undef CONFIG_VIDEO_LOCAL
28
29/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
30#undef CONFIG_VIDEO_400_HACK
31
32/* Hack that lets you force specific BIOS mode ID and specific dimensions */
33#undef CONFIG_VIDEO_GFX_HACK
34#define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */
35#define VIDEO_GFX_BIOS_BX 0x0102
36#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */
37
38/* This code uses an extended set of video mode numbers. These include:
39 * Aliases for standard modes
40 * NORMAL_VGA (-1)
41 * EXTENDED_VGA (-2)
42 * ASK_VGA (-3)
43 * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
44 * of compatibility when extending the table. These are between 0x00 and 0xff.
45 */
46#define VIDEO_FIRST_MENU 0x0000
47
48/* Standard BIOS video modes (BIOS number + 0x0100) */
49#define VIDEO_FIRST_BIOS 0x0100
50
51/* VESA BIOS video modes (VESA number + 0x0200) */
52#define VIDEO_FIRST_VESA 0x0200
53
54/* Video7 special modes (BIOS number + 0x0900) */
55#define VIDEO_FIRST_V7 0x0900
56
57/* Special video modes */
58#define VIDEO_FIRST_SPECIAL 0x0f00
59#define VIDEO_80x25 0x0f00
60#define VIDEO_8POINT 0x0f01
61#define VIDEO_80x43 0x0f02
62#define VIDEO_80x28 0x0f03
63#define VIDEO_CURRENT_MODE 0x0f04
64#define VIDEO_80x30 0x0f05
65#define VIDEO_80x34 0x0f06
66#define VIDEO_80x60 0x0f07
67#define VIDEO_GFX_HACK 0x0f08
68#define VIDEO_LAST_SPECIAL 0x0f09
69
70/* Video modes given by resolution */
71#define VIDEO_FIRST_RESOLUTION 0x1000
72
73/* The "recalculate timings" flag */
74#define VIDEO_RECALC 0x8000
75
76/* Positions of various video parameters passed to the kernel */
77/* (see also include/linux/tty.h) */
78#define PARAM_CURSOR_POS 0x00
79#define PARAM_VIDEO_PAGE 0x04
80#define PARAM_VIDEO_MODE 0x06
81#define PARAM_VIDEO_COLS 0x07
82#define PARAM_VIDEO_EGA_BX 0x0a
83#define PARAM_VIDEO_LINES 0x0e
84#define PARAM_HAVE_VGA 0x0f
85#define PARAM_FONT_POINTS 0x10
86
87#define PARAM_LFB_WIDTH 0x12
88#define PARAM_LFB_HEIGHT 0x14
89#define PARAM_LFB_DEPTH 0x16
90#define PARAM_LFB_BASE 0x18
91#define PARAM_LFB_SIZE 0x1c
92#define PARAM_LFB_LINELENGTH 0x24
93#define PARAM_LFB_COLORS 0x26
94#define PARAM_VESAPM_SEG 0x2e
95#define PARAM_VESAPM_OFF 0x30
96#define PARAM_LFB_PAGES 0x32
97#define PARAM_VESA_ATTRIB 0x34
98#define PARAM_CAPABILITIES 0x36
99
100/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
101#ifdef CONFIG_VIDEO_RETAIN
102#define DO_STORE call store_screen
103#else
104#define DO_STORE
105#endif /* CONFIG_VIDEO_RETAIN */
106
107# This is the main entry point called by setup.S
108# %ds *must* be pointing to the bootsector
109video: pushw %ds # We use different segments
110 pushw %ds # FS contains original DS
111 popw %fs
112 pushw %cs # DS is equal to CS
113 popw %ds
114 pushw %cs # ES is equal to CS
115 popw %es
116 xorw %ax, %ax
117 movw %ax, %gs # GS is zero
118 cld
119 call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA)
120#ifdef CONFIG_VIDEO_SELECT
121 movw %fs:(0x01fa), %ax # User selected video mode
122 cmpw $ASK_VGA, %ax # Bring up the menu
123 jz vid2
124
125 call mode_set # Set the mode
126 jc vid1
127
128 leaw badmdt, %si # Invalid mode ID
129 call prtstr
130vid2: call mode_menu
131vid1:
132#ifdef CONFIG_VIDEO_RETAIN
133 call restore_screen # Restore screen contents
134#endif /* CONFIG_VIDEO_RETAIN */
135 call store_edid
136#endif /* CONFIG_VIDEO_SELECT */
137 call mode_params # Store mode parameters
138 popw %ds # Restore original DS
139 ret
140
141# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
142basic_detect:
143 movb $0, %fs:(PARAM_HAVE_VGA)
144 movb $0x12, %ah # Check EGA/VGA
145 movb $0x10, %bl
146 int $0x10
147 movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel
148 cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card.
149 je basret
150
151 incb adapter
152 movw $0x1a00, %ax # Check EGA or VGA?
153 int $0x10
154 cmpb $0x1a, %al # 1a means VGA...
155 jne basret # anything else is EGA.
156
157 incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA
158 incb adapter
159basret: ret
160
161# Store the video mode parameters for later usage by the kernel.
162# This is done by asking the BIOS except for the rows/columns
163# parameters in the default 80x25 mode -- these are set directly,
164# because some very obscure BIOSes supply insane values.
165mode_params:
166#ifdef CONFIG_VIDEO_SELECT
167 cmpb $0, graphic_mode
168 jnz mopar_gr
169#endif
170 movb $0x03, %ah # Read cursor position
171 xorb %bh, %bh
172 int $0x10
173 movw %dx, %fs:(PARAM_CURSOR_POS)
174 movb $0x0f, %ah # Read page/mode/width
175 int $0x10
176 movw %bx, %fs:(PARAM_VIDEO_PAGE)
177 movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width
178 cmpb $0x7, %al # MDA/HGA => segment differs
179 jnz mopar0
180
181 movw $0xb000, video_segment
182mopar0: movw %gs:(0x485), %ax # Font size
183 movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA)
184 movw force_size, %ax # Forced size?
185 orw %ax, %ax
186 jz mopar1
187
188 movb %ah, %fs:(PARAM_VIDEO_COLS)
189 movb %al, %fs:(PARAM_VIDEO_LINES)
190 ret
191
192mopar1: movb $25, %al
193 cmpb $0, adapter # If we are on CGA/MDA/HGA, the
194 jz mopar2 # screen must have 25 lines.
195
196 movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS
197 incb %al # location of max lines.
198mopar2: movb %al, %fs:(PARAM_VIDEO_LINES)
199 ret
200
201#ifdef CONFIG_VIDEO_SELECT
202# Fetching of VESA frame buffer parameters
203mopar_gr:
204 leaw modelist+1024, %di
205 movb $0x23, %fs:(PARAM_HAVE_VGA)
206 movw 16(%di), %ax
207 movw %ax, %fs:(PARAM_LFB_LINELENGTH)
208 movw 18(%di), %ax
209 movw %ax, %fs:(PARAM_LFB_WIDTH)
210 movw 20(%di), %ax
211 movw %ax, %fs:(PARAM_LFB_HEIGHT)
212 movb 25(%di), %al
213 movb $0, %ah
214 movw %ax, %fs:(PARAM_LFB_DEPTH)
215 movb 29(%di), %al
216 movb $0, %ah
217 movw %ax, %fs:(PARAM_LFB_PAGES)
218 movl 40(%di), %eax
219 movl %eax, %fs:(PARAM_LFB_BASE)
220 movl 31(%di), %eax
221 movl %eax, %fs:(PARAM_LFB_COLORS)
222 movl 35(%di), %eax
223 movl %eax, %fs:(PARAM_LFB_COLORS+4)
224 movw 0(%di), %ax
225 movw %ax, %fs:(PARAM_VESA_ATTRIB)
226
227# get video mem size
228 leaw modelist+1024, %di
229 movw $0x4f00, %ax
230 int $0x10
231 xorl %eax, %eax
232 movw 18(%di), %ax
233 movl %eax, %fs:(PARAM_LFB_SIZE)
234
235# store mode capabilities
236 movl 10(%di), %eax
237 movl %eax, %fs:(PARAM_CAPABILITIES)
238
239# switching the DAC to 8-bit is for <= 8 bpp only
240 movw %fs:(PARAM_LFB_DEPTH), %ax
241 cmpw $8, %ax
242 jg dac_done
243
244# get DAC switching capability
245 xorl %eax, %eax
246 movb 10(%di), %al
247 testb $1, %al
248 jz dac_set
249
250# attempt to switch DAC to 8-bit
251 movw $0x4f08, %ax
252 movw $0x0800, %bx
253 int $0x10
254 cmpw $0x004f, %ax
255 jne dac_set
256 movb %bh, dac_size # store actual DAC size
257
258dac_set:
259# set color size to DAC size
260 movb dac_size, %al
261 movb %al, %fs:(PARAM_LFB_COLORS+0)
262 movb %al, %fs:(PARAM_LFB_COLORS+2)
263 movb %al, %fs:(PARAM_LFB_COLORS+4)
264 movb %al, %fs:(PARAM_LFB_COLORS+6)
265
266# set color offsets to 0
267 movb $0, %fs:(PARAM_LFB_COLORS+1)
268 movb $0, %fs:(PARAM_LFB_COLORS+3)
269 movb $0, %fs:(PARAM_LFB_COLORS+5)
270 movb $0, %fs:(PARAM_LFB_COLORS+7)
271
272dac_done:
273# get protected mode interface informations
274 movw $0x4f0a, %ax
275 xorw %bx, %bx
276 xorw %di, %di
277 int $0x10
278 cmp $0x004f, %ax
279 jnz no_pm
280
281 movw %es, %fs:(PARAM_VESAPM_SEG)
282 movw %di, %fs:(PARAM_VESAPM_OFF)
283no_pm: ret
284
285# The video mode menu
286mode_menu:
287 leaw keymsg, %si # "Return/Space/Timeout" message
288 call prtstr
289 call flush
290nokey: call getkt
291
292 cmpb $0x0d, %al # ENTER ?
293 je listm # yes - manual mode selection
294
295 cmpb $0x20, %al # SPACE ?
296 je defmd1 # no - repeat
297
298 call beep
299 jmp nokey
300
301defmd1: ret # No mode chosen? Default 80x25
302
303listm: call mode_table # List mode table
304listm0: leaw name_bann, %si # Print adapter name
305 call prtstr
306 movw card_name, %si
307 orw %si, %si
308 jnz an2
309
310 movb adapter, %al
311 leaw old_name, %si
312 orb %al, %al
313 jz an1
314
315 leaw ega_name, %si
316 decb %al
317 jz an1
318
319 leaw vga_name, %si
320 jmp an1
321
322an2: call prtstr
323 leaw svga_name, %si
324an1: call prtstr
325 leaw listhdr, %si # Table header
326 call prtstr
327 movb $0x30, %dl # DL holds mode number
328 leaw modelist, %si
329lm1: cmpw $ASK_VGA, (%si) # End?
330 jz lm2
331
332 movb %dl, %al # Menu selection number
333 call prtchr
334 call prtsp2
335 lodsw
336 call prthw # Mode ID
337 call prtsp2
338 movb 0x1(%si), %al
339 call prtdec # Rows
340 movb $0x78, %al # the letter 'x'
341 call prtchr
342 lodsw
343 call prtdec # Columns
344 movb $0x0d, %al # New line
345 call prtchr
346 movb $0x0a, %al
347 call prtchr
348 incb %dl # Next character
349 cmpb $0x3a, %dl
350 jnz lm1
351
352 movb $0x61, %dl
353 jmp lm1
354
355lm2: leaw prompt, %si # Mode prompt
356 call prtstr
357 leaw edit_buf, %di # Editor buffer
358lm3: call getkey
359 cmpb $0x0d, %al # Enter?
360 jz lment
361
362 cmpb $0x08, %al # Backspace?
363 jz lmbs
364
365 cmpb $0x20, %al # Printable?
366 jc lm3
367
368 cmpw $edit_buf+4, %di # Enough space?
369 jz lm3
370
371 stosb
372 call prtchr
373 jmp lm3
374
375lmbs: cmpw $edit_buf, %di # Backspace
376 jz lm3
377
378 decw %di
379 movb $0x08, %al
380 call prtchr
381 call prtspc
382 movb $0x08, %al
383 call prtchr
384 jmp lm3
385
386lment: movb $0, (%di)
387 leaw crlft, %si
388 call prtstr
389 leaw edit_buf, %si
390 cmpb $0, (%si) # Empty string = default mode
391 jz lmdef
392
393 cmpb $0, 1(%si) # One character = menu selection
394 jz mnusel
395
396 cmpw $0x6373, (%si) # "scan" => mode scanning
397 jnz lmhx
398
399 cmpw $0x6e61, 2(%si)
400 jz lmscan
401
402lmhx: xorw %bx, %bx # Else => mode ID in hex
403lmhex: lodsb
404 orb %al, %al
405 jz lmuse1
406
407 subb $0x30, %al
408 jc lmbad
409
410 cmpb $10, %al
411 jc lmhx1
412
413 subb $7, %al
414 andb $0xdf, %al
415 cmpb $10, %al
416 jc lmbad
417
418 cmpb $16, %al
419 jnc lmbad
420
421lmhx1: shlw $4, %bx
422 orb %al, %bl
423 jmp lmhex
424
425lmuse1: movw %bx, %ax
426 jmp lmuse
427
428mnusel: lodsb # Menu selection
429 xorb %ah, %ah
430 subb $0x30, %al
431 jc lmbad
432
433 cmpb $10, %al
434 jc lmuse
435
436 cmpb $0x61-0x30, %al
437 jc lmbad
438
439 subb $0x61-0x30-10, %al
440 cmpb $36, %al
441 jnc lmbad
442
443lmuse: call mode_set
444 jc lmdef
445
446lmbad: leaw unknt, %si
447 call prtstr
448 jmp lm2
449lmscan: cmpb $0, adapter # Scanning only on EGA/VGA
450 jz lmbad
451
452 movw $0, mt_end # Scanning of modes is
453 movb $1, scanning # done as new autodetection.
454 call mode_table
455 jmp listm0
456lmdef: ret
457
458# Additional parts of mode_set... (relative jumps, you know)
459setv7: # Video7 extended modes
460 DO_STORE
461 subb $VIDEO_FIRST_V7>>8, %bh
462 movw $0x6f05, %ax
463 int $0x10
464 stc
465 ret
466
467_setrec: jmp setrec # Ugly...
468_set_80x25: jmp set_80x25
469
470# Aliases for backward compatibility.
471setalias:
472 movw $VIDEO_80x25, %ax
473 incw %bx
474 jz mode_set
475
476 movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
477 incw %bx
478 jnz setbad # Fall-through!
479
480# Setting of user mode (AX=mode ID) => CF=success
481mode_set:
482 movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S
483 movw %ax, %bx
484 cmpb $0xff, %ah
485 jz setalias
486
487 testb $VIDEO_RECALC>>8, %ah
488 jnz _setrec
489
490 cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
491 jnc setres
492
493 cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
494 jz setspc
495
496 cmpb $VIDEO_FIRST_V7>>8, %ah
497 jz setv7
498
499 cmpb $VIDEO_FIRST_VESA>>8, %ah
500 jnc check_vesa
501
502 orb %ah, %ah
503 jz setmenu
504
505 decb %ah
506 jz setbios
507
508setbad: clc
509 movb $0, do_restore # The screen needn't be restored
510 ret
511
512setvesa:
513 DO_STORE
514 subb $VIDEO_FIRST_VESA>>8, %bh
515 movw $0x4f02, %ax # VESA BIOS mode set call
516 int $0x10
517 cmpw $0x004f, %ax # AL=4f if implemented
518 jnz setbad # AH=0 if OK
519
520 stc
521 ret
522
523setbios:
524 DO_STORE
525 int $0x10 # Standard BIOS mode set call
526 pushw %bx
527 movb $0x0f, %ah # Check if really set
528 int $0x10
529 popw %bx
530 cmpb %bl, %al
531 jnz setbad
532
533 stc
534 ret
535
536setspc: xorb %bh, %bh # Set special mode
537 cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
538 jnc setbad
539
540 addw %bx, %bx
541 jmp *spec_inits(%bx)
542
543setmenu:
544 orb %al, %al # 80x25 is an exception
545 jz _set_80x25
546
547 pushw %bx # Set mode chosen from menu
548 call mode_table # Build the mode table
549 popw %ax
550 shlw $2, %ax
551 addw %ax, %si
552 cmpw %di, %si
553 jnc setbad
554
555 movw (%si), %ax # Fetch mode ID
556_m_s: jmp mode_set
557
558setres: pushw %bx # Set mode chosen by resolution
559 call mode_table
560 popw %bx
561 xchgb %bl, %bh
562setr1: lodsw
563 cmpw $ASK_VGA, %ax # End of the list?
564 jz setbad
565
566 lodsw
567 cmpw %bx, %ax
568 jnz setr1
569
570 movw -4(%si), %ax # Fetch mode ID
571 jmp _m_s
572
573check_vesa:
574#ifdef CONFIG_FIRMWARE_EDID
575 leaw modelist+1024, %di
576 movw $0x4f00, %ax
577 int $0x10
578 cmpw $0x004f, %ax
579 jnz setbad
580
581 movw 4(%di), %ax
582 movw %ax, vbe_version
583#endif
584 leaw modelist+1024, %di
585 subb $VIDEO_FIRST_VESA>>8, %bh
586 movw %bx, %cx # Get mode information structure
587 movw $0x4f01, %ax
588 int $0x10
589 addb $VIDEO_FIRST_VESA>>8, %bh
590 cmpw $0x004f, %ax
591 jnz setbad
592
593 movb (%di), %al # Check capabilities.
594 andb $0x19, %al
595 cmpb $0x09, %al
596 jz setvesa # This is a text mode
597
598 movb (%di), %al # Check capabilities.
599 andb $0x99, %al
600 cmpb $0x99, %al
601 jnz _setbad # Doh! No linear frame buffer.
602
603 subb $VIDEO_FIRST_VESA>>8, %bh
604 orw $0x4000, %bx # Use linear frame buffer
605 movw $0x4f02, %ax # VESA BIOS mode set call
606 int $0x10
607 cmpw $0x004f, %ax # AL=4f if implemented
608 jnz _setbad # AH=0 if OK
609
610 movb $1, graphic_mode # flag graphic mode
611 movb $0, do_restore # no screen restore
612 stc
613 ret
614
615_setbad: jmp setbad # Ugly...
616
617# Recalculate vertical display end registers -- this fixes various
618# inconsistencies of extended modes on many adapters. Called when
619# the VIDEO_RECALC flag is set in the mode ID.
620
621setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode
622 call mode_set
623 jnc rct3
624
625 movw %gs:(0x485), %ax # Font size in pixels
626 movb %gs:(0x484), %bl # Number of rows
627 incb %bl
628 mulb %bl # Number of visible
629 decw %ax # scan lines - 1
630 movw $0x3d4, %dx
631 movw %ax, %bx
632 movb $0x12, %al # Lower 8 bits
633 movb %bl, %ah
634 outw %ax, %dx
635 movb $0x07, %al # Bits 8 and 9 in the overflow register
636 call inidx
637 xchgb %al, %ah
638 andb $0xbd, %ah
639 shrb %bh
640 jnc rct1
641 orb $0x02, %ah
642rct1: shrb %bh
643 jnc rct2
644 orb $0x40, %ah
645rct2: movb $0x07, %al
646 outw %ax, %dx
647 stc
648rct3: ret
649
650# Table of routines for setting of the special modes.
651spec_inits:
652 .word set_80x25
653 .word set_8pixel
654 .word set_80x43
655 .word set_80x28
656 .word set_current
657 .word set_80x30
658 .word set_80x34
659 .word set_80x60
660 .word set_gfx
661
662# Set the 80x25 mode. If already set, do nothing.
663set_80x25:
664 movw $0x5019, force_size # Override possibly broken BIOS
665use_80x25:
666#ifdef CONFIG_VIDEO_400_HACK
667 movw $0x1202, %ax # Force 400 scan lines
668 movb $0x30, %bl
669 int $0x10
670#else
671 movb $0x0f, %ah # Get current mode ID
672 int $0x10
673 cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available
674 jz st80 # on CGA/MDA/HGA and is also available on EGAM
675
676 cmpw $0x5003, %ax # Unknown mode, force 80x25 color
677 jnz force3
678
679st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25
680 jz set80
681
682 movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc.
683 orb %al, %al # Some buggy BIOS'es set 0 rows
684 jz set80
685
686 cmpb $24, %al # It's hopefully correct
687 jz set80
688#endif /* CONFIG_VIDEO_400_HACK */
689force3: DO_STORE
690 movw $0x0003, %ax # Forced set
691 int $0x10
692set80: stc
693 ret
694
695# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
696set_8pixel:
697 DO_STORE
698 call use_80x25 # The base is 80x25
699set_8pt:
700 movw $0x1112, %ax # Use 8x8 font
701 xorb %bl, %bl
702 int $0x10
703 movw $0x1200, %ax # Use alternate print screen
704 movb $0x20, %bl
705 int $0x10
706 movw $0x1201, %ax # Turn off cursor emulation
707 movb $0x34, %bl
708 int $0x10
709 movb $0x01, %ah # Define cursor scan lines 6-7
710 movw $0x0607, %cx
711 int $0x10
712set_current:
713 stc
714 ret
715
716# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
717# 80x25 mode with 14-point fonts instead of 16-point.
718set_80x28:
719 DO_STORE
720 call use_80x25 # The base is 80x25
721set14: movw $0x1111, %ax # Use 9x14 font
722 xorb %bl, %bl
723 int $0x10
724 movb $0x01, %ah # Define cursor scan lines 11-12
725 movw $0x0b0c, %cx
726 int $0x10
727 stc
728 ret
729
730# Set the 80x43 mode. This mode is works on all VGA's.
731# It's a 350-scanline mode with 8-pixel font.
732set_80x43:
733 DO_STORE
734 movw $0x1201, %ax # Set 350 scans
735 movb $0x30, %bl
736 int $0x10
737 movw $0x0003, %ax # Reset video mode
738 int $0x10
739 jmp set_8pt # Use 8-pixel font
740
741# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
742set_80x30:
743 call use_80x25 # Start with real 80x25
744 DO_STORE
745 movw $0x3cc, %dx # Get CRTC port
746 inb %dx, %al
747 movb $0xd4, %dl
748 rorb %al # Mono or color?
749 jc set48a
750
751 movb $0xb4, %dl
752set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7)
753 call outidx
754 movw $0x0b06, %ax # Vertical total
755 call outidx
756 movw $0x3e07, %ax # (Vertical) overflow
757 call outidx
758 movw $0xea10, %ax # Vertical sync start
759 call outidx
760 movw $0xdf12, %ax # Vertical display end
761 call outidx
762 movw $0xe715, %ax # Vertical blank start
763 call outidx
764 movw $0x0416, %ax # Vertical blank end
765 call outidx
766 pushw %dx
767 movb $0xcc, %dl # Misc output register (read)
768 inb %dx, %al
769 movb $0xc2, %dl # (write)
770 andb $0x0d, %al # Preserve clock select bits and color bit
771 orb $0xe2, %al # Set correct sync polarity
772 outb %al, %dx
773 popw %dx
774 movw $0x501e, force_size
775 stc # That's all.
776 ret
777
778# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
779set_80x34:
780 call set_80x30 # Set 480 scans
781 call set14 # And 14-pt font
782 movw $0xdb12, %ax # VGA vertical display end
783 movw $0x5022, force_size
784setvde: call outidx
785 stc
786 ret
787
788# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
789set_80x60:
790 call set_80x30 # Set 480 scans
791 call set_8pt # And 8-pt font
792 movw $0xdf12, %ax # VGA vertical display end
793 movw $0x503c, force_size
794 jmp setvde
795
796# Special hack for ThinkPad graphics
797set_gfx:
798#ifdef CONFIG_VIDEO_GFX_HACK
799 movw $VIDEO_GFX_BIOS_AX, %ax
800 movw $VIDEO_GFX_BIOS_BX, %bx
801 int $0x10
802 movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size
803 stc
804#endif
805 ret
806
807#ifdef CONFIG_VIDEO_RETAIN
808
809# Store screen contents to temporary buffer.
810store_screen:
811 cmpb $0, do_restore # Already stored?
812 jnz stsr
813
814 testb $CAN_USE_HEAP, loadflags # Have we space for storing?
815 jz stsr
816
817 pushw %ax
818 pushw %bx
819 pushw force_size # Don't force specific size
820 movw $0, force_size
821 call mode_params # Obtain params of current mode
822 popw force_size
823 movb %fs:(PARAM_VIDEO_LINES), %ah
824 movb %fs:(PARAM_VIDEO_COLS), %al
825 movw %ax, %bx # BX=dimensions
826 mulb %ah
827 movw %ax, %cx # CX=number of characters
828 addw %ax, %ax # Calculate image size
829 addw $modelist+1024+4, %ax
830 cmpw heap_end_ptr, %ax
831 jnc sts1 # Unfortunately, out of memory
832
833 movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params
834 leaw modelist+1024, %di
835 stosw
836 movw %bx, %ax
837 stosw
838 pushw %ds # Store the screen
839 movw video_segment, %ds
840 xorw %si, %si
841 rep
842 movsw
843 popw %ds
844 incb do_restore # Screen will be restored later
845sts1: popw %bx
846 popw %ax
847stsr: ret
848
849# Restore screen contents from temporary buffer.
850restore_screen:
851 cmpb $0, do_restore # Has the screen been stored?
852 jz res1
853
854 call mode_params # Get parameters of current mode
855 movb %fs:(PARAM_VIDEO_LINES), %cl
856 movb %fs:(PARAM_VIDEO_COLS), %ch
857 leaw modelist+1024, %si # Screen buffer
858 lodsw # Set cursor position
859 movw %ax, %dx
860 cmpb %cl, %dh
861 jc res2
862
863 movb %cl, %dh
864 decb %dh
865res2: cmpb %ch, %dl
866 jc res3
867
868 movb %ch, %dl
869 decb %dl
870res3: movb $0x02, %ah
871 movb $0x00, %bh
872 int $0x10
873 lodsw # Display size
874 movb %ah, %dl # DL=number of lines
875 movb $0, %ah # BX=phys. length of orig. line
876 movw %ax, %bx
877 cmpb %cl, %dl # Too many?
878 jc res4
879
880 pushw %ax
881 movb %dl, %al
882 subb %cl, %al
883 mulb %bl
884 addw %ax, %si
885 addw %ax, %si
886 popw %ax
887 movb %cl, %dl
888res4: cmpb %ch, %al # Too wide?
889 jc res5
890
891 movb %ch, %al # AX=width of src. line
892res5: movb $0, %cl
893 xchgb %ch, %cl
894 movw %cx, %bp # BP=width of dest. line
895 pushw %es
896 movw video_segment, %es
897 xorw %di, %di # Move the data
898 addw %bx, %bx # Convert BX and BP to _bytes_
899 addw %bp, %bp
900res6: pushw %si
901 pushw %di
902 movw %ax, %cx
903 rep
904 movsw
905 popw %di
906 popw %si
907 addw %bp, %di
908 addw %bx, %si
909 decb %dl
910 jnz res6
911
912 popw %es # Done
913res1: ret
914#endif /* CONFIG_VIDEO_RETAIN */
915
916# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
917outidx: outb %al, %dx
918 pushw %ax
919 movb %ah, %al
920 incw %dx
921 outb %al, %dx
922 decw %dx
923 popw %ax
924 ret
925
926# Build the table of video modes (stored after the setup.S code at the
927# `modelist' label. Each video mode record looks like:
928# .word MODE-ID (our special mode ID (see above))
929# .byte rows (number of rows)
930# .byte columns (number of columns)
931# Returns address of the end of the table in DI, the end is marked
932# with a ASK_VGA ID.
933mode_table:
934 movw mt_end, %di # Already filled?
935 orw %di, %di
936 jnz mtab1x
937
938 leaw modelist, %di # Store standard modes:
939 movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
940 stosl
941 movb adapter, %al # CGA/MDA/HGA -- no more modes
942 orb %al, %al
943 jz mtabe
944
945 decb %al
946 jnz mtabv
947
948 movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode
949 stosl
950 jmp mtabe
951
952mtab1x: jmp mtab1
953
954mtabv: leaw vga_modes, %si # All modes for std VGA
955 movw $vga_modes_end-vga_modes, %cx
956 rep # I'm unable to use movsw as I don't know how to store a half
957 movsb # of the expression above to cx without using explicit shr.
958
959 cmpb $0, scanning # Mode scan requested?
960 jz mscan1
961
962 call mode_scan
963mscan1:
964
965#ifdef CONFIG_VIDEO_LOCAL
966 call local_modes
967#endif /* CONFIG_VIDEO_LOCAL */
968
969#ifdef CONFIG_VIDEO_VESA
970 call vesa_modes # Detect VESA VGA modes
971#endif /* CONFIG_VIDEO_VESA */
972
973#ifdef CONFIG_VIDEO_SVGA
974 cmpb $0, scanning # Bypass when scanning
975 jnz mscan2
976
977 call svga_modes # Detect SVGA cards & modes
978mscan2:
979#endif /* CONFIG_VIDEO_SVGA */
980
981mtabe:
982
983#ifdef CONFIG_VIDEO_COMPACT
984 leaw modelist, %si
985 movw %di, %dx
986 movw %si, %di
987cmt1: cmpw %dx, %si # Scan all modes
988 jz cmt2
989
990 leaw modelist, %bx # Find in previous entries
991 movw 2(%si), %cx
992cmt3: cmpw %bx, %si
993 jz cmt4
994
995 cmpw 2(%bx), %cx # Found => don't copy this entry
996 jz cmt5
997
998 addw $4, %bx
999 jmp cmt3
1000
1001cmt4: movsl # Copy entry
1002 jmp cmt1
1003
1004cmt5: addw $4, %si # Skip entry
1005 jmp cmt1
1006
1007cmt2:
1008#endif /* CONFIG_VIDEO_COMPACT */
1009
1010 movw $ASK_VGA, (%di) # End marker
1011 movw %di, mt_end
1012mtab1: leaw modelist, %si # SI=mode list, DI=list end
1013ret0: ret
1014
1015# Modes usable on all standard VGAs
1016vga_modes:
1017 .word VIDEO_8POINT
1018 .word 0x5032 # 80x50
1019 .word VIDEO_80x43
1020 .word 0x502b # 80x43
1021 .word VIDEO_80x28
1022 .word 0x501c # 80x28
1023 .word VIDEO_80x30
1024 .word 0x501e # 80x30
1025 .word VIDEO_80x34
1026 .word 0x5022 # 80x34
1027 .word VIDEO_80x60
1028 .word 0x503c # 80x60
1029#ifdef CONFIG_VIDEO_GFX_HACK
1030 .word VIDEO_GFX_HACK
1031 .word VIDEO_GFX_DUMMY_RESOLUTION
1032#endif
1033
1034vga_modes_end:
1035# Detect VESA modes.
1036
1037#ifdef CONFIG_VIDEO_VESA
1038vesa_modes:
1039 cmpb $2, adapter # VGA only
1040 jnz ret0
1041
1042 movw %di, %bp # BP=original mode table end
1043 addw $0x200, %di # Buffer space
1044 movw $0x4f00, %ax # VESA Get card info call
1045 int $0x10
1046 movw %bp, %di
1047 cmpw $0x004f, %ax # Successful?
1048 jnz ret0
1049
1050 cmpw $0x4556, 0x200(%di)
1051 jnz ret0
1052
1053 cmpw $0x4153, 0x202(%di)
1054 jnz ret0
1055
1056 movw $vesa_name, card_name # Set name to "VESA VGA"
1057 pushw %gs
1058 lgsw 0x20e(%di), %si # GS:SI=mode list
1059 movw $128, %cx # Iteration limit
1060vesa1:
1061# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
1062# XXX: lodsw %gs:(%si), %ax # Get next mode in the list
1063 gs; lodsw
1064 cmpw $0xffff, %ax # End of the table?
1065 jz vesar
1066
1067 cmpw $0x0080, %ax # Check validity of mode ID
1068 jc vesa2
1069
1070 orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
1071 jz vesan # Certain BIOSes report 0x80-0xff!
1072
1073 cmpw $0x0800, %ax
1074 jnc vesae
1075
1076vesa2: pushw %cx
1077 movw %ax, %cx # Get mode information structure
1078 movw $0x4f01, %ax
1079 int $0x10
1080 movw %cx, %bx # BX=mode number
1081 addb $VIDEO_FIRST_VESA>>8, %bh
1082 popw %cx
1083 cmpw $0x004f, %ax
1084 jnz vesan # Don't report errors (buggy BIOSES)
1085
1086 movb (%di), %al # Check capabilities. We require
1087 andb $0x19, %al # a color text mode.
1088 cmpb $0x09, %al
1089 jnz vesan
1090
1091 cmpw $0xb800, 8(%di) # Standard video memory address required
1092 jnz vesan
1093
1094 testb $2, (%di) # Mode characteristics supplied?
1095 movw %bx, (%di) # Store mode number
1096 jz vesa3
1097
1098 xorw %dx, %dx
1099 movw 0x12(%di), %bx # Width
1100 orb %bh, %bh
1101 jnz vesan
1102
1103 movb %bl, 0x3(%di)
1104 movw 0x14(%di), %ax # Height
1105 orb %ah, %ah
1106 jnz vesan
1107
1108 movb %al, 2(%di)
1109 mulb %bl
1110 cmpw $8193, %ax # Small enough for Linux console driver?
1111 jnc vesan
1112
1113 jmp vesaok
1114
1115vesa3: subw $0x8108, %bx # This mode has no detailed info specified,
1116 jc vesan # so it must be a standard VESA mode.
1117
1118 cmpw $5, %bx
1119 jnc vesan
1120
1121 movw vesa_text_mode_table(%bx), %ax
1122 movw %ax, 2(%di)
1123vesaok: addw $4, %di # The mode is valid. Store it.
1124vesan: loop vesa1 # Next mode. Limit exceeded => error
1125vesae: leaw vesaer, %si
1126 call prtstr
1127 movw %bp, %di # Discard already found modes.
1128vesar: popw %gs
1129 ret
1130
1131# Dimensions of standard VESA text modes
1132vesa_text_mode_table:
1133 .byte 60, 80 # 0108
1134 .byte 25, 132 # 0109
1135 .byte 43, 132 # 010A
1136 .byte 50, 132 # 010B
1137 .byte 60, 132 # 010C
1138#endif /* CONFIG_VIDEO_VESA */
1139
1140# Scan for video modes. A bit dirty, but should work.
1141mode_scan:
1142 movw $0x0100, %cx # Start with mode 0
1143scm1: movb $0, %ah # Test the mode
1144 movb %cl, %al
1145 int $0x10
1146 movb $0x0f, %ah
1147 int $0x10
1148 cmpb %cl, %al
1149 jnz scm2 # Mode not set
1150
1151 movw $0x3c0, %dx # Test if it's a text mode
1152 movb $0x10, %al # Mode bits
1153 call inidx
1154 andb $0x03, %al
1155 jnz scm2
1156
1157 movb $0xce, %dl # Another set of mode bits
1158 movb $0x06, %al
1159 call inidx
1160 shrb %al
1161 jc scm2
1162
1163 movb $0xd4, %dl # Cursor location
1164 movb $0x0f, %al
1165 call inidx
1166 orb %al, %al
1167 jnz scm2
1168
1169 movw %cx, %ax # Ok, store the mode
1170 stosw
1171 movb %gs:(0x484), %al # Number of rows
1172 incb %al
1173 stosb
1174 movw %gs:(0x44a), %ax # Number of columns
1175 stosb
1176scm2: incb %cl
1177 jns scm1
1178
1179 movw $0x0003, %ax # Return back to mode 3
1180 int $0x10
1181 ret
1182
1183tstidx: outw %ax, %dx # OUT DX,AX and inidx
1184inidx: outb %al, %dx # Read from indexed VGA register
1185 incw %dx # AL=index, DX=index reg port -> AL=data
1186 inb %dx, %al
1187 decw %dx
1188 ret
1189
1190# Try to detect type of SVGA card and supply (usually approximate) video
1191# mode table for it.
1192
1193#ifdef CONFIG_VIDEO_SVGA
1194svga_modes:
1195 leaw svga_table, %si # Test all known SVGA adapters
1196dosvga: lodsw
1197 movw %ax, %bp # Default mode table
1198 orw %ax, %ax
1199 jz didsv1
1200
1201 lodsw # Pointer to test routine
1202 pushw %si
1203 pushw %di
1204 pushw %es
1205 movw $0xc000, %bx
1206 movw %bx, %es
1207 call *%ax # Call test routine
1208 popw %es
1209 popw %di
1210 popw %si
1211 orw %bp, %bp
1212 jz dosvga
1213
1214 movw %bp, %si # Found, copy the modes
1215 movb svga_prefix, %ah
1216cpsvga: lodsb
1217 orb %al, %al
1218 jz didsv
1219
1220 stosw
1221 movsw
1222 jmp cpsvga
1223
1224didsv: movw %si, card_name # Store pointer to card name
1225didsv1: ret
1226
1227# Table of all known SVGA cards. For each card, we store a pointer to
1228# a table of video modes supported by the card and a pointer to a routine
1229# used for testing of presence of the card. The video mode table is always
1230# followed by the name of the card or the chipset.
1231svga_table:
1232 .word ati_md, ati_test
1233 .word oak_md, oak_test
1234 .word paradise_md, paradise_test
1235 .word realtek_md, realtek_test
1236 .word s3_md, s3_test
1237 .word chips_md, chips_test
1238 .word video7_md, video7_test
1239 .word cirrus5_md, cirrus5_test
1240 .word cirrus6_md, cirrus6_test
1241 .word cirrus1_md, cirrus1_test
1242 .word ahead_md, ahead_test
1243 .word everex_md, everex_test
1244 .word genoa_md, genoa_test
1245 .word trident_md, trident_test
1246 .word tseng_md, tseng_test
1247 .word 0
1248
1249# Test routines and mode tables:
1250
1251# S3 - The test algorithm was taken from the SuperProbe package
1252# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
1253s3_test:
1254 movw $0x0f35, %cx # we store some constants in cl/ch
1255 movw $0x03d4, %dx
1256 movb $0x38, %al
1257 call inidx
1258 movb %al, %bh # store current CRT-register 0x38
1259 movw $0x0038, %ax
1260 call outidx # disable writing to special regs
1261 movb %cl, %al # check whether we can write special reg 0x35
1262 call inidx
1263 movb %al, %bl # save the current value of CRT reg 0x35
1264 andb $0xf0, %al # clear bits 0-3
1265 movb %al, %ah
1266 movb %cl, %al # and write it to CRT reg 0x35
1267 call outidx
1268 call inidx # now read it back
1269 andb %ch, %al # clear the upper 4 bits
1270 jz s3_2 # the first test failed. But we have a
1271
1272 movb %bl, %ah # second chance
1273 movb %cl, %al
1274 call outidx
1275 jmp s3_1 # do the other tests
1276
1277s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35
1278 orb %bl, %ah # set the upper 4 bits of ah with the orig value
1279 call outidx # write ...
1280 call inidx # ... and reread
1281 andb %cl, %al # turn off the upper 4 bits
1282 pushw %ax
1283 movb %bl, %ah # restore old value in register 0x35
1284 movb %cl, %al
1285 call outidx
1286 popw %ax
1287 cmpb %ch, %al # setting lower 4 bits was successful => bad
1288 je no_s3 # writing is allowed => this is not an S3
1289
1290s3_1: movw $0x4838, %ax # allow writing to special regs by putting
1291 call outidx # magic number into CRT-register 0x38
1292 movb %cl, %al # check whether we can write special reg 0x35
1293 call inidx
1294 movb %al, %bl
1295 andb $0xf0, %al
1296 movb %al, %ah
1297 movb %cl, %al
1298 call outidx
1299 call inidx
1300 andb %ch, %al
1301 jnz no_s3 # no, we can't write => no S3
1302
1303 movw %cx, %ax
1304 orb %bl, %ah
1305 call outidx
1306 call inidx
1307 andb %ch, %al
1308 pushw %ax
1309 movb %bl, %ah # restore old value in register 0x35
1310 movb %cl, %al
1311 call outidx
1312 popw %ax
1313 cmpb %ch, %al
1314 jne no_s31 # writing not possible => no S3
1315 movb $0x30, %al
1316 call inidx # now get the S3 id ...
1317 leaw idS3, %di
1318 movw $0x10, %cx
1319 repne
1320 scasb
1321 je no_s31
1322
1323 movb %bh, %ah
1324 movb $0x38, %al
1325 jmp s3rest
1326
1327no_s3: movb $0x35, %al # restore CRT register 0x35
1328 movb %bl, %ah
1329 call outidx
1330no_s31: xorw %bp, %bp # Detection failed
1331s3rest: movb %bh, %ah
1332 movb $0x38, %al # restore old value of CRT register 0x38
1333 jmp outidx
1334
1335idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
1336 .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
1337
1338s3_md: .byte 0x54, 0x2b, 0x84
1339 .byte 0x55, 0x19, 0x84
1340 .byte 0
1341 .ascii "S3"
1342 .byte 0
1343
1344# ATI cards.
1345ati_test:
1346 leaw idati, %si
1347 movw $0x31, %di
1348 movw $0x09, %cx
1349 repe
1350 cmpsb
1351 je atiok
1352
1353 xorw %bp, %bp
1354atiok: ret
1355
1356idati: .ascii "761295520"
1357
1358ati_md: .byte 0x23, 0x19, 0x84
1359 .byte 0x33, 0x2c, 0x84
1360 .byte 0x22, 0x1e, 0x64
1361 .byte 0x21, 0x19, 0x64
1362 .byte 0x58, 0x21, 0x50
1363 .byte 0x5b, 0x1e, 0x50
1364 .byte 0
1365 .ascii "ATI"
1366 .byte 0
1367
1368# AHEAD
1369ahead_test:
1370 movw $0x200f, %ax
1371 movw $0x3ce, %dx
1372 outw %ax, %dx
1373 incw %dx
1374 inb %dx, %al
1375 cmpb $0x20, %al
1376 je isahed
1377
1378 cmpb $0x21, %al
1379 je isahed
1380
1381 xorw %bp, %bp
1382isahed: ret
1383
1384ahead_md:
1385 .byte 0x22, 0x2c, 0x84
1386 .byte 0x23, 0x19, 0x84
1387 .byte 0x24, 0x1c, 0x84
1388 .byte 0x2f, 0x32, 0xa0
1389 .byte 0x32, 0x22, 0x50
1390 .byte 0x34, 0x42, 0x50
1391 .byte 0
1392 .ascii "Ahead"
1393 .byte 0
1394
1395# Chips & Tech.
1396chips_test:
1397 movw $0x3c3, %dx
1398 inb %dx, %al
1399 orb $0x10, %al
1400 outb %al, %dx
1401 movw $0x104, %dx
1402 inb %dx, %al
1403 movb %al, %bl
1404 movw $0x3c3, %dx
1405 inb %dx, %al
1406 andb $0xef, %al
1407 outb %al, %dx
1408 cmpb $0xa5, %bl
1409 je cantok
1410
1411 xorw %bp, %bp
1412cantok: ret
1413
1414chips_md:
1415 .byte 0x60, 0x19, 0x84
1416 .byte 0x61, 0x32, 0x84
1417 .byte 0
1418 .ascii "Chips & Technologies"
1419 .byte 0
1420
1421# Cirrus Logic 5X0
1422cirrus1_test:
1423 movw $0x3d4, %dx
1424 movb $0x0c, %al
1425 outb %al, %dx
1426 incw %dx
1427 inb %dx, %al
1428 movb %al, %bl
1429 xorb %al, %al
1430 outb %al, %dx
1431 decw %dx
1432 movb $0x1f, %al
1433 outb %al, %dx
1434 incw %dx
1435 inb %dx, %al
1436 movb %al, %bh
1437 xorb %ah, %ah
1438 shlb $4, %al
1439 movw %ax, %cx
1440 movb %bh, %al
1441 shrb $4, %al
1442 addw %ax, %cx
1443 shlw $8, %cx
1444 addw $6, %cx
1445 movw %cx, %ax
1446 movw $0x3c4, %dx
1447 outw %ax, %dx
1448 incw %dx
1449 inb %dx, %al
1450 andb %al, %al
1451 jnz nocirr
1452
1453 movb %bh, %al
1454 outb %al, %dx
1455 inb %dx, %al
1456 cmpb $0x01, %al
1457 je iscirr
1458
1459nocirr: xorw %bp, %bp
1460iscirr: movw $0x3d4, %dx
1461 movb %bl, %al
1462 xorb %ah, %ah
1463 shlw $8, %ax
1464 addw $0x0c, %ax
1465 outw %ax, %dx
1466 ret
1467
1468cirrus1_md:
1469 .byte 0x1f, 0x19, 0x84
1470 .byte 0x20, 0x2c, 0x84
1471 .byte 0x22, 0x1e, 0x84
1472 .byte 0x31, 0x25, 0x64
1473 .byte 0
1474 .ascii "Cirrus Logic 5X0"
1475 .byte 0
1476
1477# Cirrus Logic 54XX
1478cirrus5_test:
1479 movw $0x3c4, %dx
1480 movb $6, %al
1481 call inidx
1482 movb %al, %bl # BL=backup
1483 movw $6, %ax
1484 call tstidx
1485 cmpb $0x0f, %al
1486 jne c5fail
1487
1488 movw $0x1206, %ax
1489 call tstidx
1490 cmpb $0x12, %al
1491 jne c5fail
1492
1493 movb $0x1e, %al
1494 call inidx
1495 movb %al, %bh
1496 movb %bh, %ah
1497 andb $0xc0, %ah
1498 movb $0x1e, %al
1499 call tstidx
1500 andb $0x3f, %al
1501 jne c5xx
1502
1503 movb $0x1e, %al
1504 movb %bh, %ah
1505 orb $0x3f, %ah
1506 call tstidx
1507 xorb $0x3f, %al
1508 andb $0x3f, %al
1509c5xx: pushf
1510 movb $0x1e, %al
1511 movb %bh, %ah
1512 outw %ax, %dx
1513 popf
1514 je c5done
1515
1516c5fail: xorw %bp, %bp
1517c5done: movb $6, %al
1518 movb %bl, %ah
1519 outw %ax, %dx
1520 ret
1521
1522cirrus5_md:
1523 .byte 0x14, 0x19, 0x84
1524 .byte 0x54, 0x2b, 0x84
1525 .byte 0
1526 .ascii "Cirrus Logic 54XX"
1527 .byte 0
1528
1529# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
1530# it's misidentified by the Ahead test.
1531cirrus6_test:
1532 movw $0x3ce, %dx
1533 movb $0x0a, %al
1534 call inidx
1535 movb %al, %bl # BL=backup
1536 movw $0xce0a, %ax
1537 call tstidx
1538 orb %al, %al
1539 jne c2fail
1540
1541 movw $0xec0a, %ax
1542 call tstidx
1543 cmpb $0x01, %al
1544 jne c2fail
1545
1546 movb $0xaa, %al
1547 call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's.
1548 shrb $4, %al
1549 subb $4, %al
1550 jz c6done
1551
1552 decb %al
1553 jz c6done
1554
1555 subb $2, %al
1556 jz c6done
1557
1558 decb %al
1559 jz c6done
1560
1561c2fail: xorw %bp, %bp
1562c6done: movb $0x0a, %al
1563 movb %bl, %ah
1564 outw %ax, %dx
1565 ret
1566
1567cirrus6_md:
1568 .byte 0
1569 .ascii "Cirrus Logic 64XX"
1570 .byte 0
1571
1572# Everex / Trident
1573everex_test:
1574 movw $0x7000, %ax
1575 xorw %bx, %bx
1576 int $0x10
1577 cmpb $0x70, %al
1578 jne noevrx
1579
1580 shrw $4, %dx
1581 cmpw $0x678, %dx
1582 je evtrid
1583
1584 cmpw $0x236, %dx
1585 jne evrxok
1586
1587evtrid: leaw trident_md, %bp
1588evrxok: ret
1589
1590noevrx: xorw %bp, %bp
1591 ret
1592
1593everex_md:
1594 .byte 0x03, 0x22, 0x50
1595 .byte 0x04, 0x3c, 0x50
1596 .byte 0x07, 0x2b, 0x64
1597 .byte 0x08, 0x4b, 0x64
1598 .byte 0x0a, 0x19, 0x84
1599 .byte 0x0b, 0x2c, 0x84
1600 .byte 0x16, 0x1e, 0x50
1601 .byte 0x18, 0x1b, 0x64
1602 .byte 0x21, 0x40, 0xa0
1603 .byte 0x40, 0x1e, 0x84
1604 .byte 0
1605 .ascii "Everex/Trident"
1606 .byte 0
1607
1608# Genoa.
1609genoa_test:
1610 leaw idgenoa, %si # Check Genoa 'clues'
1611 xorw %ax, %ax
1612 movb %es:(0x37), %al
1613 movw %ax, %di
1614 movw $0x04, %cx
1615 decw %si
1616 decw %di
1617l1: incw %si
1618 incw %di
1619 movb (%si), %al
1620 testb %al, %al
1621 jz l2
1622
1623 cmpb %es:(%di), %al
1624l2: loope l1
1625 orw %cx, %cx
1626 je isgen
1627
1628 xorw %bp, %bp
1629isgen: ret
1630
1631idgenoa: .byte 0x77, 0x00, 0x99, 0x66
1632
1633genoa_md:
1634 .byte 0x58, 0x20, 0x50
1635 .byte 0x5a, 0x2a, 0x64
1636 .byte 0x60, 0x19, 0x84
1637 .byte 0x61, 0x1d, 0x84
1638 .byte 0x62, 0x20, 0x84
1639 .byte 0x63, 0x2c, 0x84
1640 .byte 0x64, 0x3c, 0x84
1641 .byte 0x6b, 0x4f, 0x64
1642 .byte 0x72, 0x3c, 0x50
1643 .byte 0x74, 0x42, 0x50
1644 .byte 0x78, 0x4b, 0x64
1645 .byte 0
1646 .ascii "Genoa"
1647 .byte 0
1648
1649# OAK
1650oak_test:
1651 leaw idoakvga, %si
1652 movw $0x08, %di
1653 movw $0x08, %cx
1654 repe
1655 cmpsb
1656 je isoak
1657
1658 xorw %bp, %bp
1659isoak: ret
1660
1661idoakvga: .ascii "OAK VGA "
1662
1663oak_md: .byte 0x4e, 0x3c, 0x50
1664 .byte 0x4f, 0x3c, 0x84
1665 .byte 0x50, 0x19, 0x84
1666 .byte 0x51, 0x2b, 0x84
1667 .byte 0
1668 .ascii "OAK"
1669 .byte 0
1670
1671# WD Paradise.
1672paradise_test:
1673 leaw idparadise, %si
1674 movw $0x7d, %di
1675 movw $0x04, %cx
1676 repe
1677 cmpsb
1678 je ispara
1679
1680 xorw %bp, %bp
1681ispara: ret
1682
1683idparadise: .ascii "VGA="
1684
1685paradise_md:
1686 .byte 0x41, 0x22, 0x50
1687 .byte 0x47, 0x1c, 0x84
1688 .byte 0x55, 0x19, 0x84
1689 .byte 0x54, 0x2c, 0x84
1690 .byte 0
1691 .ascii "Paradise"
1692 .byte 0
1693
1694# Trident.
1695trident_test:
1696 movw $0x3c4, %dx
1697 movb $0x0e, %al
1698 outb %al, %dx
1699 incw %dx
1700 inb %dx, %al
1701 xchgb %al, %ah
1702 xorb %al, %al
1703 outb %al, %dx
1704 inb %dx, %al
1705 xchgb %ah, %al
1706 movb %al, %bl # Strange thing ... in the book this wasn't
1707 andb $0x02, %bl # necessary but it worked on my card which
1708 jz setb2 # is a trident. Without it the screen goes
1709 # blurred ...
1710 andb $0xfd, %al
1711 jmp clrb2
1712
1713setb2: orb $0x02, %al
1714clrb2: outb %al, %dx
1715 andb $0x0f, %ah
1716 cmpb $0x02, %ah
1717 je istrid
1718
1719 xorw %bp, %bp
1720istrid: ret
1721
1722trident_md:
1723 .byte 0x50, 0x1e, 0x50
1724 .byte 0x51, 0x2b, 0x50
1725 .byte 0x52, 0x3c, 0x50
1726 .byte 0x57, 0x19, 0x84
1727 .byte 0x58, 0x1e, 0x84
1728 .byte 0x59, 0x2b, 0x84
1729 .byte 0x5a, 0x3c, 0x84
1730 .byte 0
1731 .ascii "Trident"
1732 .byte 0
1733
1734# Tseng.
1735tseng_test:
1736 movw $0x3cd, %dx
1737 inb %dx, %al # Could things be this simple ! :-)
1738 movb %al, %bl
1739 movb $0x55, %al
1740 outb %al, %dx
1741 inb %dx, %al
1742 movb %al, %ah
1743 movb %bl, %al
1744 outb %al, %dx
1745 cmpb $0x55, %ah
1746 je istsen
1747
1748isnot: xorw %bp, %bp
1749istsen: ret
1750
1751tseng_md:
1752 .byte 0x26, 0x3c, 0x50
1753 .byte 0x2a, 0x28, 0x64
1754 .byte 0x23, 0x19, 0x84
1755 .byte 0x24, 0x1c, 0x84
1756 .byte 0x22, 0x2c, 0x84
1757 .byte 0x21, 0x3c, 0x84
1758 .byte 0
1759 .ascii "Tseng"
1760 .byte 0
1761
1762# Video7.
1763video7_test:
1764 movw $0x3cc, %dx
1765 inb %dx, %al
1766 movw $0x3b4, %dx
1767 andb $0x01, %al
1768 jz even7
1769
1770 movw $0x3d4, %dx
1771even7: movb $0x0c, %al
1772 outb %al, %dx
1773 incw %dx
1774 inb %dx, %al
1775 movb %al, %bl
1776 movb $0x55, %al
1777 outb %al, %dx
1778 inb %dx, %al
1779 decw %dx
1780 movb $0x1f, %al
1781 outb %al, %dx
1782 incw %dx
1783 inb %dx, %al
1784 movb %al, %bh
1785 decw %dx
1786 movb $0x0c, %al
1787 outb %al, %dx
1788 incw %dx
1789 movb %bl, %al
1790 outb %al, %dx
1791 movb $0x55, %al
1792 xorb $0xea, %al
1793 cmpb %bh, %al
1794 jne isnot
1795
1796 movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
1797 ret
1798
1799video7_md:
1800 .byte 0x40, 0x2b, 0x50
1801 .byte 0x43, 0x3c, 0x50
1802 .byte 0x44, 0x3c, 0x64
1803 .byte 0x41, 0x19, 0x84
1804 .byte 0x42, 0x2c, 0x84
1805 .byte 0x45, 0x1c, 0x84
1806 .byte 0
1807 .ascii "Video 7"
1808 .byte 0
1809
1810# Realtek VGA
1811realtek_test:
1812 leaw idrtvga, %si
1813 movw $0x45, %di
1814 movw $0x0b, %cx
1815 repe
1816 cmpsb
1817 je isrt
1818
1819 xorw %bp, %bp
1820isrt: ret
1821
1822idrtvga: .ascii "REALTEK VGA"
1823
1824realtek_md:
1825 .byte 0x1a, 0x3c, 0x50
1826 .byte 0x1b, 0x19, 0x84
1827 .byte 0x1c, 0x1e, 0x84
1828 .byte 0x1d, 0x2b, 0x84
1829 .byte 0x1e, 0x3c, 0x84
1830 .byte 0
1831 .ascii "REALTEK"
1832 .byte 0
1833
1834#endif /* CONFIG_VIDEO_SVGA */
1835
1836# User-defined local mode table (VGA only)
1837#ifdef CONFIG_VIDEO_LOCAL
1838local_modes:
1839 leaw local_mode_table, %si
1840locm1: lodsw
1841 orw %ax, %ax
1842 jz locm2
1843
1844 stosw
1845 movsw
1846 jmp locm1
1847
1848locm2: ret
1849
1850# This is the table of local video modes which can be supplied manually
1851# by the user. Each entry consists of mode ID (word) and dimensions
1852# (byte for column count and another byte for row count). These modes
1853# are placed before all SVGA and VESA modes and override them if table
1854# compacting is enabled. The table must end with a zero word followed
1855# by NUL-terminated video adapter name.
1856local_mode_table:
1857 .word 0x0100 # Example: 40x25
1858 .byte 25,40
1859 .word 0
1860 .ascii "Local"
1861 .byte 0
1862#endif /* CONFIG_VIDEO_LOCAL */
1863
1864# Read a key and return the ASCII code in al, scan code in ah
1865getkey: xorb %ah, %ah
1866 int $0x16
1867 ret
1868
1869# Read a key with a timeout of 30 seconds.
1870# The hardware clock is used to get the time.
1871getkt: call gettime
1872 addb $30, %al # Wait 30 seconds
1873 cmpb $60, %al
1874 jl lminute
1875
1876 subb $60, %al
1877lminute:
1878 movb %al, %cl
1879again: movb $0x01, %ah
1880 int $0x16
1881 jnz getkey # key pressed, so get it
1882
1883 call gettime
1884 cmpb %cl, %al
1885 jne again
1886
1887 movb $0x20, %al # timeout, return `space'
1888 ret
1889
1890# Flush the keyboard buffer
1891flush: movb $0x01, %ah
1892 int $0x16
1893 jz empty
1894
1895 xorb %ah, %ah
1896 int $0x16
1897 jmp flush
1898
1899empty: ret
1900
1901# Print hexadecimal number.
1902prthw: pushw %ax
1903 movb %ah, %al
1904 call prthb
1905 popw %ax
1906prthb: pushw %ax
1907 shrb $4, %al
1908 call prthn
1909 popw %ax
1910 andb $0x0f, %al
1911prthn: cmpb $0x0a, %al
1912 jc prth1
1913
1914 addb $0x07, %al
1915prth1: addb $0x30, %al
1916 jmp prtchr
1917
1918# Print decimal number in al
1919prtdec: pushw %ax
1920 pushw %cx
1921 xorb %ah, %ah
1922 movb $0x0a, %cl
1923 idivb %cl
1924 cmpb $0x09, %al
1925 jbe lt100
1926
1927 call prtdec
1928 jmp skip10
1929
1930lt100: addb $0x30, %al
1931 call prtchr
1932skip10: movb %ah, %al
1933 addb $0x30, %al
1934 call prtchr
1935 popw %cx
1936 popw %ax
1937 ret
1938
1939store_edid:
1940#ifdef CONFIG_FIRMWARE_EDID
1941 pushw %es # just save all registers
1942 pushw %ax
1943 pushw %bx
1944 pushw %cx
1945 pushw %dx
1946 pushw %di
1947
1948 pushw %fs
1949 popw %es
1950
1951 movl $0x13131313, %eax # memset block with 0x13
1952 movw $32, %cx
1953 movw $0x140, %di
1954 cld
1955 rep
1956 stosl
1957
1958 cmpw $0x0200, vbe_version # only do EDID on >= VBE2.0
1959 jl no_edid
1960
1961 pushw %es # save ES
1962 xorw %di, %di # Report Capability
1963 pushw %di
1964 popw %es # ES:DI must be 0:0
1965 movw $0x4f15, %ax
1966 xorw %bx, %bx
1967 xorw %cx, %cx
1968 int $0x10
1969 popw %es # restore ES
1970
1971 cmpb $0x00, %ah # call successful
1972 jne no_edid
1973
1974 cmpb $0x4f, %al # function supported
1975 jne no_edid
1976
1977 movw $0x4f15, %ax # do VBE/DDC
1978 movw $0x01, %bx
1979 movw $0x00, %cx
1980 movw $0x01, %dx
1981 movw $0x140, %di
1982 int $0x10
1983
1984no_edid:
1985 popw %di # restore all registers
1986 popw %dx
1987 popw %cx
1988 popw %bx
1989 popw %ax
1990 popw %es
1991#endif
1992 ret
1993
1994# VIDEO_SELECT-only variables
1995mt_end: .word 0 # End of video mode table if built
1996edit_buf: .space 6 # Line editor buffer
1997card_name: .word 0 # Pointer to adapter name
1998scanning: .byte 0 # Performing mode scan
1999do_restore: .byte 0 # Screen contents altered during mode change
2000svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes
2001graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
2002dac_size: .byte 6 # DAC bit depth
2003vbe_version: .word 0 # VBE bios version
2004
2005# Status messages
2006keymsg: .ascii "Press <RETURN> to see video modes available, "
2007 .ascii "<SPACE> to continue or wait 30 secs"
2008 .byte 0x0d, 0x0a, 0
2009
2010listhdr: .byte 0x0d, 0x0a
2011 .ascii "Mode: COLSxROWS:"
2012
2013crlft: .byte 0x0d, 0x0a, 0
2014
2015prompt: .byte 0x0d, 0x0a
2016 .asciz "Enter mode number or `scan': "
2017
2018unknt: .asciz "Unknown mode ID. Try again."
2019
2020badmdt: .ascii "You passed an undefined mode number."
2021 .byte 0x0d, 0x0a, 0
2022
2023vesaer: .ascii "Error: Scanning of VESA modes failed. Please "
2024 .ascii "report to <mj@ucw.cz>."
2025 .byte 0x0d, 0x0a, 0
2026
2027old_name: .asciz "CGA/MDA/HGA"
2028
2029ega_name: .asciz "EGA"
2030
2031svga_name: .ascii " "
2032
2033vga_name: .asciz "VGA"
2034
2035vesa_name: .asciz "VESA"
2036
2037name_bann: .asciz "Video adapter: "
2038#endif /* CONFIG_VIDEO_SELECT */
2039
2040# Other variables:
2041adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
2042video_segment: .word 0xb800 # Video memory segment
2043force_size: .word 0 # Use this size instead of the one in BIOS vars
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index b26378815b91..941a7e3aa5fb 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.21-rc3 3# Linux kernel version: 2.6.21-git3
4# Wed Mar 7 15:29:47 2007 4# Tue May 1 07:30:48 2007
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -118,11 +118,11 @@ CONFIG_X86_PC=y
118# CONFIG_X86_VSMP is not set 118# CONFIG_X86_VSMP is not set
119# CONFIG_MK8 is not set 119# CONFIG_MK8 is not set
120# CONFIG_MPSC is not set 120# CONFIG_MPSC is not set
121# CONFIG_MCORE2 is not set 121CONFIG_MCORE2=y
122CONFIG_GENERIC_CPU=y 122# CONFIG_GENERIC_CPU is not set
123CONFIG_X86_L1_CACHE_BYTES=128 123CONFIG_X86_L1_CACHE_BYTES=64
124CONFIG_X86_L1_CACHE_SHIFT=7 124CONFIG_X86_L1_CACHE_SHIFT=6
125CONFIG_X86_INTERNODE_CACHE_BYTES=128 125CONFIG_X86_INTERNODE_CACHE_BYTES=64
126CONFIG_X86_TSC=y 126CONFIG_X86_TSC=y
127CONFIG_X86_GOOD_APIC=y 127CONFIG_X86_GOOD_APIC=y
128# CONFIG_MICROCODE is not set 128# CONFIG_MICROCODE is not set
@@ -174,6 +174,7 @@ CONFIG_X86_MCE_INTEL=y
174CONFIG_X86_MCE_AMD=y 174CONFIG_X86_MCE_AMD=y
175# CONFIG_KEXEC is not set 175# CONFIG_KEXEC is not set
176# CONFIG_CRASH_DUMP is not set 176# CONFIG_CRASH_DUMP is not set
177# CONFIG_RELOCATABLE is not set
177CONFIG_PHYSICAL_START=0x200000 178CONFIG_PHYSICAL_START=0x200000
178CONFIG_SECCOMP=y 179CONFIG_SECCOMP=y
179# CONFIG_CC_STACKPROTECTOR is not set 180# CONFIG_CC_STACKPROTECTOR is not set
@@ -182,7 +183,6 @@ CONFIG_HZ_250=y
182# CONFIG_HZ_300 is not set 183# CONFIG_HZ_300 is not set
183# CONFIG_HZ_1000 is not set 184# CONFIG_HZ_1000 is not set
184CONFIG_HZ=250 185CONFIG_HZ=250
185# CONFIG_REORDER is not set
186CONFIG_K8_NB=y 186CONFIG_K8_NB=y
187CONFIG_GENERIC_HARDIRQS=y 187CONFIG_GENERIC_HARDIRQS=y
188CONFIG_GENERIC_IRQ_PROBE=y 188CONFIG_GENERIC_IRQ_PROBE=y
@@ -218,7 +218,6 @@ CONFIG_ACPI_HOTPLUG_CPU=y
218CONFIG_ACPI_THERMAL=y 218CONFIG_ACPI_THERMAL=y
219CONFIG_ACPI_NUMA=y 219CONFIG_ACPI_NUMA=y
220# CONFIG_ACPI_ASUS is not set 220# CONFIG_ACPI_ASUS is not set
221# CONFIG_ACPI_IBM is not set
222# CONFIG_ACPI_TOSHIBA is not set 221# CONFIG_ACPI_TOSHIBA is not set
223CONFIG_ACPI_BLACKLIST_YEAR=0 222CONFIG_ACPI_BLACKLIST_YEAR=0
224# CONFIG_ACPI_DEBUG is not set 223# CONFIG_ACPI_DEBUG is not set
@@ -243,7 +242,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
243# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 242# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
244CONFIG_CPU_FREQ_GOV_USERSPACE=y 243CONFIG_CPU_FREQ_GOV_USERSPACE=y
245CONFIG_CPU_FREQ_GOV_ONDEMAND=y 244CONFIG_CPU_FREQ_GOV_ONDEMAND=y
246# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set 245CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
247 246
248# 247#
249# CPUFreq processor drivers 248# CPUFreq processor drivers
@@ -299,7 +298,6 @@ CONFIG_NET=y
299# 298#
300# Networking options 299# Networking options
301# 300#
302# CONFIG_NETDEBUG is not set
303CONFIG_PACKET=y 301CONFIG_PACKET=y
304# CONFIG_PACKET_MMAP is not set 302# CONFIG_PACKET_MMAP is not set
305CONFIG_UNIX=y 303CONFIG_UNIX=y
@@ -334,6 +332,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
334CONFIG_IPV6=y 332CONFIG_IPV6=y
335# CONFIG_IPV6_PRIVACY is not set 333# CONFIG_IPV6_PRIVACY is not set
336# CONFIG_IPV6_ROUTER_PREF is not set 334# CONFIG_IPV6_ROUTER_PREF is not set
335# CONFIG_IPV6_OPTIMISTIC_DAD is not set
337# CONFIG_INET6_AH is not set 336# CONFIG_INET6_AH is not set
338# CONFIG_INET6_ESP is not set 337# CONFIG_INET6_ESP is not set
339# CONFIG_INET6_IPCOMP is not set 338# CONFIG_INET6_IPCOMP is not set
@@ -389,6 +388,13 @@ CONFIG_IPV6_SIT=y
389# CONFIG_HAMRADIO is not set 388# CONFIG_HAMRADIO is not set
390# CONFIG_IRDA is not set 389# CONFIG_IRDA is not set
391# CONFIG_BT is not set 390# CONFIG_BT is not set
391# CONFIG_AF_RXRPC is not set
392
393#
394# Wireless
395#
396# CONFIG_CFG80211 is not set
397# CONFIG_WIRELESS_EXT is not set
392# CONFIG_IEEE80211 is not set 398# CONFIG_IEEE80211 is not set
393 399
394# 400#
@@ -409,10 +415,6 @@ CONFIG_FW_LOADER=y
409# Connector - unified userspace <-> kernelspace linker 415# Connector - unified userspace <-> kernelspace linker
410# 416#
411# CONFIG_CONNECTOR is not set 417# CONFIG_CONNECTOR is not set
412
413#
414# Memory Technology Devices (MTD)
415#
416# CONFIG_MTD is not set 418# CONFIG_MTD is not set
417 419
418# 420#
@@ -459,6 +461,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
459# CONFIG_SGI_IOC4 is not set 461# CONFIG_SGI_IOC4 is not set
460# CONFIG_TIFM_CORE is not set 462# CONFIG_TIFM_CORE is not set
461# CONFIG_SONY_LAPTOP is not set 463# CONFIG_SONY_LAPTOP is not set
464# CONFIG_THINKPAD_ACPI is not set
462 465
463# 466#
464# ATA/ATAPI/MFM/RLL support 467# ATA/ATAPI/MFM/RLL support
@@ -494,7 +497,6 @@ CONFIG_BLK_DEV_IDEPCI=y
494# CONFIG_BLK_DEV_RZ1000 is not set 497# CONFIG_BLK_DEV_RZ1000 is not set
495CONFIG_BLK_DEV_IDEDMA_PCI=y 498CONFIG_BLK_DEV_IDEDMA_PCI=y
496# CONFIG_BLK_DEV_IDEDMA_FORCED is not set 499# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
497CONFIG_IDEDMA_PCI_AUTO=y
498# CONFIG_IDEDMA_ONLYDISK is not set 500# CONFIG_IDEDMA_ONLYDISK is not set
499# CONFIG_BLK_DEV_AEC62XX is not set 501# CONFIG_BLK_DEV_AEC62XX is not set
500# CONFIG_BLK_DEV_ALI15X3 is not set 502# CONFIG_BLK_DEV_ALI15X3 is not set
@@ -525,7 +527,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y
525# CONFIG_IDE_ARM is not set 527# CONFIG_IDE_ARM is not set
526CONFIG_BLK_DEV_IDEDMA=y 528CONFIG_BLK_DEV_IDEDMA=y
527# CONFIG_IDEDMA_IVB is not set 529# CONFIG_IDEDMA_IVB is not set
528CONFIG_IDEDMA_AUTO=y
529# CONFIG_BLK_DEV_HD is not set 530# CONFIG_BLK_DEV_HD is not set
530 531
531# 532#
@@ -584,11 +585,9 @@ CONFIG_AIC79XX_DEBUG_MASK=0
584# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set 585# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
585# CONFIG_SCSI_AIC94XX is not set 586# CONFIG_SCSI_AIC94XX is not set
586# CONFIG_SCSI_ARCMSR is not set 587# CONFIG_SCSI_ARCMSR is not set
587CONFIG_MEGARAID_NEWGEN=y 588# CONFIG_MEGARAID_NEWGEN is not set
588CONFIG_MEGARAID_MM=y
589CONFIG_MEGARAID_MAILBOX=y
590# CONFIG_MEGARAID_LEGACY is not set 589# CONFIG_MEGARAID_LEGACY is not set
591CONFIG_MEGARAID_SAS=y 590# CONFIG_MEGARAID_SAS is not set
592# CONFIG_SCSI_HPTIOP is not set 591# CONFIG_SCSI_HPTIOP is not set
593# CONFIG_SCSI_BUSLOGIC is not set 592# CONFIG_SCSI_BUSLOGIC is not set
594# CONFIG_SCSI_DMX3191D is not set 593# CONFIG_SCSI_DMX3191D is not set
@@ -608,6 +607,7 @@ CONFIG_MEGARAID_SAS=y
608# CONFIG_SCSI_DC395x is not set 607# CONFIG_SCSI_DC395x is not set
609# CONFIG_SCSI_DC390T is not set 608# CONFIG_SCSI_DC390T is not set
610# CONFIG_SCSI_DEBUG is not set 609# CONFIG_SCSI_DEBUG is not set
610# CONFIG_SCSI_ESP_CORE is not set
611# CONFIG_SCSI_SRP is not set 611# CONFIG_SCSI_SRP is not set
612 612
613# 613#
@@ -636,6 +636,7 @@ CONFIG_SATA_ACPI=y
636# CONFIG_PATA_AMD is not set 636# CONFIG_PATA_AMD is not set
637# CONFIG_PATA_ARTOP is not set 637# CONFIG_PATA_ARTOP is not set
638# CONFIG_PATA_ATIIXP is not set 638# CONFIG_PATA_ATIIXP is not set
639# CONFIG_PATA_CMD640_PCI is not set
639# CONFIG_PATA_CMD64X is not set 640# CONFIG_PATA_CMD64X is not set
640# CONFIG_PATA_CS5520 is not set 641# CONFIG_PATA_CS5520 is not set
641# CONFIG_PATA_CS5530 is not set 642# CONFIG_PATA_CS5530 is not set
@@ -687,7 +688,7 @@ CONFIG_BLK_DEV_DM=y
687CONFIG_FUSION=y 688CONFIG_FUSION=y
688CONFIG_FUSION_SPI=y 689CONFIG_FUSION_SPI=y
689# CONFIG_FUSION_FC is not set 690# CONFIG_FUSION_FC is not set
690CONFIG_FUSION_SAS=y 691# CONFIG_FUSION_SAS is not set
691CONFIG_FUSION_MAX_SGE=128 692CONFIG_FUSION_MAX_SGE=128
692# CONFIG_FUSION_CTL is not set 693# CONFIG_FUSION_CTL is not set
693 694
@@ -700,19 +701,22 @@ CONFIG_IEEE1394=y
700# Subsystem Options 701# Subsystem Options
701# 702#
702# CONFIG_IEEE1394_VERBOSEDEBUG is not set 703# CONFIG_IEEE1394_VERBOSEDEBUG is not set
703# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
704 704
705# 705#
706# Device Drivers 706# Controllers
707#
708
709#
710# Texas Instruments PCILynx requires I2C
707# 711#
708# CONFIG_IEEE1394_PCILYNX is not set
709CONFIG_IEEE1394_OHCI1394=y 712CONFIG_IEEE1394_OHCI1394=y
710 713
711# 714#
712# Protocol Drivers 715# Protocols
713# 716#
714# CONFIG_IEEE1394_VIDEO1394 is not set 717# CONFIG_IEEE1394_VIDEO1394 is not set
715# CONFIG_IEEE1394_SBP2 is not set 718# CONFIG_IEEE1394_SBP2 is not set
719# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
716# CONFIG_IEEE1394_ETH1394 is not set 720# CONFIG_IEEE1394_ETH1394 is not set
717# CONFIG_IEEE1394_DV1394 is not set 721# CONFIG_IEEE1394_DV1394 is not set
718CONFIG_IEEE1394_RAWIO=y 722CONFIG_IEEE1394_RAWIO=y
@@ -775,7 +779,8 @@ CONFIG_TULIP=y
775# CONFIG_HP100 is not set 779# CONFIG_HP100 is not set
776CONFIG_NET_PCI=y 780CONFIG_NET_PCI=y
777# CONFIG_PCNET32 is not set 781# CONFIG_PCNET32 is not set
778# CONFIG_AMD8111_ETH is not set 782CONFIG_AMD8111_ETH=y
783# CONFIG_AMD8111E_NAPI is not set
779# CONFIG_ADAPTEC_STARFIRE is not set 784# CONFIG_ADAPTEC_STARFIRE is not set
780CONFIG_B44=y 785CONFIG_B44=y
781CONFIG_FORCEDETH=y 786CONFIG_FORCEDETH=y
@@ -837,9 +842,10 @@ CONFIG_S2IO=m
837# CONFIG_TR is not set 842# CONFIG_TR is not set
838 843
839# 844#
840# Wireless LAN (non-hamradio) 845# Wireless LAN
841# 846#
842# CONFIG_NET_RADIO is not set 847# CONFIG_WLAN_PRE80211 is not set
848# CONFIG_WLAN_80211 is not set
843 849
844# 850#
845# Wan interfaces 851# Wan interfaces
@@ -853,7 +859,6 @@ CONFIG_S2IO=m
853# CONFIG_SHAPER is not set 859# CONFIG_SHAPER is not set
854CONFIG_NETCONSOLE=y 860CONFIG_NETCONSOLE=y
855CONFIG_NETPOLL=y 861CONFIG_NETPOLL=y
856# CONFIG_NETPOLL_RX is not set
857# CONFIG_NETPOLL_TRAP is not set 862# CONFIG_NETPOLL_TRAP is not set
858CONFIG_NET_POLL_CONTROLLER=y 863CONFIG_NET_POLL_CONTROLLER=y
859 864
@@ -987,57 +992,7 @@ CONFIG_HPET_MMAP=y
987# 992#
988# I2C support 993# I2C support
989# 994#
990CONFIG_I2C=m 995# CONFIG_I2C is not set
991CONFIG_I2C_CHARDEV=m
992
993#
994# I2C Algorithms
995#
996# CONFIG_I2C_ALGOBIT is not set
997# CONFIG_I2C_ALGOPCF is not set
998# CONFIG_I2C_ALGOPCA is not set
999
1000#
1001# I2C Hardware Bus support
1002#
1003# CONFIG_I2C_ALI1535 is not set
1004# CONFIG_I2C_ALI1563 is not set
1005# CONFIG_I2C_ALI15X3 is not set
1006# CONFIG_I2C_AMD756 is not set
1007# CONFIG_I2C_AMD8111 is not set
1008# CONFIG_I2C_I801 is not set
1009# CONFIG_I2C_I810 is not set
1010# CONFIG_I2C_PIIX4 is not set
1011CONFIG_I2C_ISA=m
1012# CONFIG_I2C_NFORCE2 is not set
1013# CONFIG_I2C_OCORES is not set
1014# CONFIG_I2C_PARPORT_LIGHT is not set
1015# CONFIG_I2C_PASEMI is not set
1016# CONFIG_I2C_PROSAVAGE is not set
1017# CONFIG_I2C_SAVAGE4 is not set
1018# CONFIG_I2C_SIS5595 is not set
1019# CONFIG_I2C_SIS630 is not set
1020# CONFIG_I2C_SIS96X is not set
1021# CONFIG_I2C_STUB is not set
1022# CONFIG_I2C_VIA is not set
1023# CONFIG_I2C_VIAPRO is not set
1024# CONFIG_I2C_VOODOO3 is not set
1025# CONFIG_I2C_PCA_ISA is not set
1026
1027#
1028# Miscellaneous I2C Chip support
1029#
1030# CONFIG_SENSORS_DS1337 is not set
1031# CONFIG_SENSORS_DS1374 is not set
1032# CONFIG_SENSORS_EEPROM is not set
1033# CONFIG_SENSORS_PCF8574 is not set
1034# CONFIG_SENSORS_PCA9539 is not set
1035# CONFIG_SENSORS_PCF8591 is not set
1036# CONFIG_SENSORS_MAX6875 is not set
1037# CONFIG_I2C_DEBUG_CORE is not set
1038# CONFIG_I2C_DEBUG_ALGO is not set
1039# CONFIG_I2C_DEBUG_BUS is not set
1040# CONFIG_I2C_DEBUG_CHIP is not set
1041 996
1042# 997#
1043# SPI support 998# SPI support
@@ -1053,54 +1008,8 @@ CONFIG_I2C_ISA=m
1053# 1008#
1054# Hardware Monitoring support 1009# Hardware Monitoring support
1055# 1010#
1056CONFIG_HWMON=y 1011# CONFIG_HWMON is not set
1057# CONFIG_HWMON_VID is not set 1012# CONFIG_HWMON_VID is not set
1058# CONFIG_SENSORS_ABITUGURU is not set
1059# CONFIG_SENSORS_ADM1021 is not set
1060# CONFIG_SENSORS_ADM1025 is not set
1061# CONFIG_SENSORS_ADM1026 is not set
1062# CONFIG_SENSORS_ADM1029 is not set
1063# CONFIG_SENSORS_ADM1031 is not set
1064# CONFIG_SENSORS_ADM9240 is not set
1065# CONFIG_SENSORS_K8TEMP is not set
1066# CONFIG_SENSORS_ASB100 is not set
1067# CONFIG_SENSORS_ATXP1 is not set
1068# CONFIG_SENSORS_DS1621 is not set
1069# CONFIG_SENSORS_F71805F is not set
1070# CONFIG_SENSORS_FSCHER is not set
1071# CONFIG_SENSORS_FSCPOS is not set
1072# CONFIG_SENSORS_GL518SM is not set
1073# CONFIG_SENSORS_GL520SM is not set
1074# CONFIG_SENSORS_IT87 is not set
1075# CONFIG_SENSORS_LM63 is not set
1076# CONFIG_SENSORS_LM75 is not set
1077# CONFIG_SENSORS_LM77 is not set
1078# CONFIG_SENSORS_LM78 is not set
1079# CONFIG_SENSORS_LM80 is not set
1080# CONFIG_SENSORS_LM83 is not set
1081# CONFIG_SENSORS_LM85 is not set
1082# CONFIG_SENSORS_LM87 is not set
1083# CONFIG_SENSORS_LM90 is not set
1084# CONFIG_SENSORS_LM92 is not set
1085# CONFIG_SENSORS_MAX1619 is not set
1086# CONFIG_SENSORS_PC87360 is not set
1087# CONFIG_SENSORS_PC87427 is not set
1088# CONFIG_SENSORS_SIS5595 is not set
1089# CONFIG_SENSORS_SMSC47M1 is not set
1090# CONFIG_SENSORS_SMSC47M192 is not set
1091CONFIG_SENSORS_SMSC47B397=m
1092# CONFIG_SENSORS_VIA686A is not set
1093# CONFIG_SENSORS_VT1211 is not set
1094# CONFIG_SENSORS_VT8231 is not set
1095# CONFIG_SENSORS_W83781D is not set
1096# CONFIG_SENSORS_W83791D is not set
1097# CONFIG_SENSORS_W83792D is not set
1098# CONFIG_SENSORS_W83793 is not set
1099# CONFIG_SENSORS_W83L785TS is not set
1100# CONFIG_SENSORS_W83627HF is not set
1101# CONFIG_SENSORS_W83627EHF is not set
1102# CONFIG_SENSORS_HDAPS is not set
1103# CONFIG_HWMON_DEBUG_CHIP is not set
1104 1013
1105# 1014#
1106# Multifunction device drivers 1015# Multifunction device drivers
@@ -1147,8 +1056,9 @@ CONFIG_SOUND=y
1147# Open Sound System 1056# Open Sound System
1148# 1057#
1149CONFIG_SOUND_PRIME=y 1058CONFIG_SOUND_PRIME=y
1150# CONFIG_OBSOLETE_OSS is not set 1059CONFIG_OBSOLETE_OSS=y
1151# CONFIG_SOUND_BT878 is not set 1060# CONFIG_SOUND_BT878 is not set
1061# CONFIG_SOUND_ES1371 is not set
1152CONFIG_SOUND_ICH=y 1062CONFIG_SOUND_ICH=y
1153# CONFIG_SOUND_TRIDENT is not set 1063# CONFIG_SOUND_TRIDENT is not set
1154# CONFIG_SOUND_MSNDCLAS is not set 1064# CONFIG_SOUND_MSNDCLAS is not set
@@ -1163,6 +1073,14 @@ CONFIG_HID=y
1163# CONFIG_HID_DEBUG is not set 1073# CONFIG_HID_DEBUG is not set
1164 1074
1165# 1075#
1076# USB Input Devices
1077#
1078CONFIG_USB_HID=y
1079# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1080# CONFIG_HID_FF is not set
1081# CONFIG_USB_HIDDEV is not set
1082
1083#
1166# USB support 1084# USB support
1167# 1085#
1168CONFIG_USB_ARCH_HAS_HCD=y 1086CONFIG_USB_ARCH_HAS_HCD=y
@@ -1175,6 +1093,7 @@ CONFIG_USB=y
1175# Miscellaneous USB options 1093# Miscellaneous USB options
1176# 1094#
1177CONFIG_USB_DEVICEFS=y 1095CONFIG_USB_DEVICEFS=y
1096# CONFIG_USB_DEVICE_CLASS is not set
1178# CONFIG_USB_DYNAMIC_MINORS is not set 1097# CONFIG_USB_DYNAMIC_MINORS is not set
1179# CONFIG_USB_SUSPEND is not set 1098# CONFIG_USB_SUSPEND is not set
1180# CONFIG_USB_OTG is not set 1099# CONFIG_USB_OTG is not set
@@ -1225,10 +1144,6 @@ CONFIG_USB_STORAGE=y
1225# 1144#
1226# USB Input Devices 1145# USB Input Devices
1227# 1146#
1228CONFIG_USB_HID=y
1229# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1230# CONFIG_HID_FF is not set
1231# CONFIG_USB_HIDDEV is not set
1232# CONFIG_USB_AIPTEK is not set 1147# CONFIG_USB_AIPTEK is not set
1233# CONFIG_USB_WACOM is not set 1148# CONFIG_USB_WACOM is not set
1234# CONFIG_USB_ACECAD is not set 1149# CONFIG_USB_ACECAD is not set
@@ -1556,7 +1471,7 @@ CONFIG_DEBUG_KERNEL=y
1556CONFIG_LOG_BUF_SHIFT=18 1471CONFIG_LOG_BUF_SHIFT=18
1557CONFIG_DETECT_SOFTLOCKUP=y 1472CONFIG_DETECT_SOFTLOCKUP=y
1558# CONFIG_SCHEDSTATS is not set 1473# CONFIG_SCHEDSTATS is not set
1559# CONFIG_TIMER_STATS is not set 1474CONFIG_TIMER_STATS=y
1560# CONFIG_DEBUG_SLAB is not set 1475# CONFIG_DEBUG_SLAB is not set
1561# CONFIG_DEBUG_RT_MUTEXES is not set 1476# CONFIG_DEBUG_RT_MUTEXES is not set
1562# CONFIG_RT_MUTEX_TESTER is not set 1477# CONFIG_RT_MUTEX_TESTER is not set
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 071100ea1251..185399baaf6d 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -5,6 +5,11 @@
5 * This tricks binfmt_elf.c into loading 32bit binaries using lots 5 * This tricks binfmt_elf.c into loading 32bit binaries using lots
6 * of ugly preprocessor tricks. Talk about very very poor man's inheritance. 6 * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
7 */ 7 */
8#define __ASM_X86_64_ELF_H 1
9
10#undef ELF_CLASS
11#define ELF_CLASS ELFCLASS32
12
8#include <linux/types.h> 13#include <linux/types.h>
9#include <linux/stddef.h> 14#include <linux/stddef.h>
10#include <linux/rwsem.h> 15#include <linux/rwsem.h>
@@ -50,9 +55,6 @@ struct elf_phdr;
50#undef ELF_ARCH 55#undef ELF_ARCH
51#define ELF_ARCH EM_386 56#define ELF_ARCH EM_386
52 57
53#undef ELF_CLASS
54#define ELF_CLASS ELFCLASS32
55
56#define ELF_DATA ELFDATA2LSB 58#define ELF_DATA ELFDATA2LSB
57 59
58#define USE_ELF_CORE_DUMP 1 60#define USE_ELF_CORE_DUMP 1
@@ -136,7 +138,7 @@ struct elf_prpsinfo
136 138
137#define user user32 139#define user user32
138 140
139#define __ASM_X86_64_ELF_H 1 141#undef elf_read_implies_exec
140#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) 142#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X)
141//#include <asm/ia32.h> 143//#include <asm/ia32.h>
142#include <linux/elf.h> 144#include <linux/elf.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 796df6992f62..c48087db6f75 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -481,11 +481,7 @@ ia32_sys_call_table:
481 .quad sys_symlink 481 .quad sys_symlink
482 .quad sys_lstat 482 .quad sys_lstat
483 .quad sys_readlink /* 85 */ 483 .quad sys_readlink /* 85 */
484#ifdef CONFIG_IA32_AOUT
485 .quad sys_uselib 484 .quad sys_uselib
486#else
487 .quad quiet_ni_syscall
488#endif
489 .quad sys_swapon 485 .quad sys_swapon
490 .quad sys_reboot 486 .quad sys_reboot
491 .quad compat_sys_old_readdir 487 .quad compat_sys_old_readdir
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 568ff0df89e7..fc4419ff0355 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -13,6 +13,7 @@
13#include <asm/proto.h> 13#include <asm/proto.h>
14#include <asm/tlbflush.h> 14#include <asm/tlbflush.h>
15#include <asm/ia32_unistd.h> 15#include <asm/ia32_unistd.h>
16#include <asm/vsyscall32.h>
16 17
17extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; 18extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
18extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; 19extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bb47e86f3d02..4d94c51803d8 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,8 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o 11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
12 perfctr-watchdog.o
12 13
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o 15obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -21,8 +22,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
21obj-$(CONFIG_X86_CPUID) += cpuid.o 22obj-$(CONFIG_X86_CPUID) += cpuid.o
22obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o 23obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
23obj-y += apic.o nmi.o 24obj-y += apic.o nmi.o
24obj-y += io_apic.o mpparse.o \ 25obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
25 genapic.o genapic_cluster.o genapic_flat.o
26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
28obj-$(CONFIG_PM) += suspend.o 28obj-$(CONFIG_PM) += suspend.o
@@ -58,3 +58,4 @@ i8237-y += ../../i386/kernel/i8237.o
58msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 58msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
59alternative-y += ../../i386/kernel/alternative.o 59alternative-y += ../../i386/kernel/alternative.o
60pcspeaker-y += ../../i386/kernel/pcspeaker.o 60pcspeaker-y += ../../i386/kernel/pcspeaker.o
61perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index e1548fbe95ae..195b7034a148 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -60,19 +60,6 @@ extern char wakeup_start, wakeup_end;
60 60
61extern unsigned long acpi_copy_wakeup_routine(unsigned long); 61extern unsigned long acpi_copy_wakeup_routine(unsigned long);
62 62
63static pgd_t low_ptr;
64
65static void init_low_mapping(void)
66{
67 pgd_t *slot0 = pgd_offset(current->mm, 0UL);
68 low_ptr = *slot0;
69 /* FIXME: We're playing with the current task's page tables here, which
70 * is potentially dangerous on SMP systems.
71 */
72 set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
73 local_flush_tlb();
74}
75
76/** 63/**
77 * acpi_save_state_mem - save kernel state 64 * acpi_save_state_mem - save kernel state
78 * 65 *
@@ -81,8 +68,6 @@ static void init_low_mapping(void)
81 */ 68 */
82int acpi_save_state_mem(void) 69int acpi_save_state_mem(void)
83{ 70{
84 init_low_mapping();
85
86 memcpy((void *)acpi_wakeup_address, &wakeup_start, 71 memcpy((void *)acpi_wakeup_address, &wakeup_start,
87 &wakeup_end - &wakeup_start); 72 &wakeup_end - &wakeup_start);
88 acpi_copy_wakeup_routine(acpi_wakeup_address); 73 acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -95,8 +80,6 @@ int acpi_save_state_mem(void)
95 */ 80 */
96void acpi_restore_state_mem(void) 81void acpi_restore_state_mem(void)
97{ 82{
98 set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
99 local_flush_tlb();
100} 83}
101 84
102/** 85/**
@@ -109,10 +92,11 @@ void acpi_restore_state_mem(void)
109 */ 92 */
110void __init acpi_reserve_bootmem(void) 93void __init acpi_reserve_bootmem(void)
111{ 94{
112 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); 95 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
113 if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) 96 if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
114 printk(KERN_CRIT 97 printk(KERN_CRIT
115 "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); 98 "ACPI: Wakeup code way too big, will crash on attempt"
99 " to suspend\n");
116} 100}
117 101
118static int __init acpi_sleep_setup(char *str) 102static int __init acpi_sleep_setup(char *str)
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
index 185faa911db5..8550a6ffa275 100644
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ b/arch/x86_64/kernel/acpi/wakeup.S
@@ -1,6 +1,7 @@
1.text 1.text
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/pgtable.h>
4#include <asm/page.h> 5#include <asm/page.h>
5#include <asm/msr.h> 6#include <asm/msr.h>
6 7
@@ -30,22 +31,28 @@ wakeup_code:
30 cld 31 cld
31 # setup data segment 32 # setup data segment
32 movw %cs, %ax 33 movw %cs, %ax
33 movw %ax, %ds # Make ds:0 point to wakeup_start 34 movw %ax, %ds # Make ds:0 point to wakeup_start
34 movw %ax, %ss 35 movw %ax, %ss
35 mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board 36 # Private stack is needed for ASUS board
37 mov $(wakeup_stack - wakeup_code), %sp
36 38
37 pushl $0 # Kill any dangerous flags 39 pushl $0 # Kill any dangerous flags
38 popfl 40 popfl
39 41
40 movl real_magic - wakeup_code, %eax 42 movl real_magic - wakeup_code, %eax
41 cmpl $0x12345678, %eax 43 cmpl $0x12345678, %eax
42 jne bogus_real_magic 44 jne bogus_real_magic
43 45
46 call verify_cpu # Verify the cpu supports long
47 # mode
48 testl %eax, %eax
49 jnz no_longmode
50
44 testl $1, video_flags - wakeup_code 51 testl $1, video_flags - wakeup_code
45 jz 1f 52 jz 1f
46 lcall $0xc000,$3 53 lcall $0xc000,$3
47 movw %cs, %ax 54 movw %cs, %ax
48 movw %ax, %ds # Bios might have played with that 55 movw %ax, %ds # Bios might have played with that
49 movw %ax, %ss 56 movw %ax, %ss
501: 571:
51 58
@@ -61,12 +68,15 @@ wakeup_code:
61 68
62 movb $0xa2, %al ; outb %al, $0x80 69 movb $0xa2, %al ; outb %al, $0x80
63 70
64 lidt %ds:idt_48a - wakeup_code 71 mov %ds, %ax # Find 32bit wakeup_code addr
65 xorl %eax, %eax 72 movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
66 movw %ds, %ax # (Convert %ds:gdt to a linear ptr) 73 shll $4, %esi
67 shll $4, %eax 74 # Fix up the vectors
68 addl $(gdta - wakeup_code), %eax 75 addl %esi, wakeup_32_vector - wakeup_code
69 movl %eax, gdt_48a +2 - wakeup_code 76 addl %esi, wakeup_long64_vector - wakeup_code
77 addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
78
79 lidtl %ds:idt_48a - wakeup_code
70 lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is 80 lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is
71 # appropriate 81 # appropriate
72 82
@@ -75,86 +85,63 @@ wakeup_code:
75 jmp 1f 85 jmp 1f
761: 861:
77 87
78 .byte 0x66, 0xea # prefix + jmpi-opcode 88 ljmpl *(wakeup_32_vector - wakeup_code)
79 .long wakeup_32 - __START_KERNEL_map 89
80 .word __KERNEL_CS 90 .balign 4
91wakeup_32_vector:
92 .long wakeup_32 - wakeup_code
93 .word __KERNEL32_CS, 0
81 94
82 .code32 95 .code32
83wakeup_32: 96wakeup_32:
84# Running in this code, but at low address; paging is not yet turned on. 97# Running in this code, but at low address; paging is not yet turned on.
85 movb $0xa5, %al ; outb %al, $0x80 98 movb $0xa5, %al ; outb %al, $0x80
86 99
87 /* Check if extended functions are implemented */ 100 movl $__KERNEL_DS, %eax
88 movl $0x80000000, %eax 101 movl %eax, %ds
89 cpuid
90 cmpl $0x80000000, %eax
91 jbe bogus_cpu
92 wbinvd
93 mov $0x80000001, %eax
94 cpuid
95 btl $29, %edx
96 jnc bogus_cpu
97 movl %edx,%edi
98
99 movw $__KERNEL_DS, %ax
100 movw %ax, %ds
101 movw %ax, %es
102 movw %ax, %fs
103 movw %ax, %gs
104
105 movw $__KERNEL_DS, %ax
106 movw %ax, %ss
107 102
108 mov $(wakeup_stack - __START_KERNEL_map), %esp 103 movw $0x0e00 + 'i', %ds:(0xb8012)
109 movl saved_magic - __START_KERNEL_map, %eax 104 movb $0xa8, %al ; outb %al, $0x80;
110 cmpl $0x9abcdef0, %eax
111 jne bogus_32_magic
112 105
113 /* 106 /*
114 * Prepare for entering 64bits mode 107 * Prepare for entering 64bits mode
115 */ 108 */
116 109
117 /* Enable PAE mode and PGE */ 110 /* Enable PAE */
118 xorl %eax, %eax 111 xorl %eax, %eax
119 btsl $5, %eax 112 btsl $5, %eax
120 btsl $7, %eax
121 movl %eax, %cr4 113 movl %eax, %cr4
122 114
123 /* Setup early boot stage 4 level pagetables */ 115 /* Setup early boot stage 4 level pagetables */
124 movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax 116 leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax
125 movl %eax, %cr3 117 movl %eax, %cr3
126 118
127 /* Setup EFER (Extended Feature Enable Register) */ 119 /* Check if nx is implemented */
128 movl $MSR_EFER, %ecx 120 movl $0x80000001, %eax
129 rdmsr 121 cpuid
130 /* Fool rdmsr and reset %eax to avoid dependences */ 122 movl %edx,%edi
131 xorl %eax, %eax 123
132 /* Enable Long Mode */ 124 /* Enable Long Mode */
125 xorl %eax, %eax
133 btsl $_EFER_LME, %eax 126 btsl $_EFER_LME, %eax
134 /* Enable System Call */
135 btsl $_EFER_SCE, %eax
136 127
137 /* No Execute supported? */ 128 /* No Execute supported? */
138 btl $20,%edi 129 btl $20,%edi
139 jnc 1f 130 jnc 1f
140 btsl $_EFER_NX, %eax 131 btsl $_EFER_NX, %eax
1411:
142 132
143 /* Make changes effective */ 133 /* Make changes effective */
1341: movl $MSR_EFER, %ecx
135 xorl %edx, %edx
144 wrmsr 136 wrmsr
145 wbinvd
146 137
147 xorl %eax, %eax 138 xorl %eax, %eax
148 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 139 btsl $31, %eax /* Enable paging and in turn activate Long Mode */
149 btsl $0, %eax /* Enable protected mode */ 140 btsl $0, %eax /* Enable protected mode */
150 btsl $1, %eax /* Enable MP */
151 btsl $4, %eax /* Enable ET */
152 btsl $5, %eax /* Enable NE */
153 btsl $16, %eax /* Enable WP */
154 btsl $18, %eax /* Enable AM */
155 141
156 /* Make changes effective */ 142 /* Make changes effective */
157 movl %eax, %cr0 143 movl %eax, %cr0
144
158 /* At this point: 145 /* At this point:
159 CR4.PAE must be 1 146 CR4.PAE must be 1
160 CS.L must be 0 147 CS.L must be 0
@@ -162,11 +149,6 @@ wakeup_32:
162 Next instruction must be a branch 149 Next instruction must be a branch
163 This must be on identity-mapped page 150 This must be on identity-mapped page
164 */ 151 */
165 jmp reach_compatibility_mode
166reach_compatibility_mode:
167 movw $0x0e00 + 'i', %ds:(0xb8012)
168 movb $0xa8, %al ; outb %al, $0x80;
169
170 /* 152 /*
171 * At this point we're in long mode but in 32bit compatibility mode 153 * At this point we're in long mode but in 32bit compatibility mode
172 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn 154 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
@@ -174,24 +156,19 @@ reach_compatibility_mode:
174 * the new gdt/idt that has __KERNEL_CS with CS.L = 1. 156 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
175 */ 157 */
176 158
177 movw $0x0e00 + 'n', %ds:(0xb8014)
178 movb $0xa9, %al ; outb %al, $0x80
179
180 /* Load new GDT with the 64bit segment using 32bit descriptor */
181 movl $(pGDT32 - __START_KERNEL_map), %eax
182 lgdt (%eax)
183
184 movl $(wakeup_jumpvector - __START_KERNEL_map), %eax
185 /* Finally jump in 64bit mode */ 159 /* Finally jump in 64bit mode */
186 ljmp *(%eax) 160 ljmp *(wakeup_long64_vector - wakeup_code)(%esi)
187 161
188wakeup_jumpvector: 162 .balign 4
189 .long wakeup_long64 - __START_KERNEL_map 163wakeup_long64_vector:
190 .word __KERNEL_CS 164 .long wakeup_long64 - wakeup_code
165 .word __KERNEL_CS, 0
191 166
192.code64 167.code64
193 168
194 /* Hooray, we are in Long 64-bit mode (but still running in low memory) */ 169 /* Hooray, we are in Long 64-bit mode (but still running in
170 * low memory)
171 */
195wakeup_long64: 172wakeup_long64:
196 /* 173 /*
197 * We must switch to a new descriptor in kernel space for the GDT 174 * We must switch to a new descriptor in kernel space for the GDT
@@ -199,7 +176,15 @@ wakeup_long64:
199 * addresses where we're currently running on. We have to do that here 176 * addresses where we're currently running on. We have to do that here
200 * because in 32bit we couldn't load a 64bit linear address. 177 * because in 32bit we couldn't load a 64bit linear address.
201 */ 178 */
202 lgdt cpu_gdt_descr - __START_KERNEL_map 179 lgdt cpu_gdt_descr
180
181 movw $0x0e00 + 'n', %ds:(0xb8014)
182 movb $0xa9, %al ; outb %al, $0x80
183
184 movq saved_magic, %rax
185 movq $0x123456789abcdef0, %rdx
186 cmpq %rdx, %rax
187 jne bogus_64_magic
203 188
204 movw $0x0e00 + 'u', %ds:(0xb8016) 189 movw $0x0e00 + 'u', %ds:(0xb8016)
205 190
@@ -211,75 +196,58 @@ wakeup_long64:
211 movw %ax, %es 196 movw %ax, %es
212 movw %ax, %fs 197 movw %ax, %fs
213 movw %ax, %gs 198 movw %ax, %gs
214 movq saved_esp, %rsp 199 movq saved_rsp, %rsp
215 200
216 movw $0x0e00 + 'x', %ds:(0xb8018) 201 movw $0x0e00 + 'x', %ds:(0xb8018)
217 movq saved_ebx, %rbx 202 movq saved_rbx, %rbx
218 movq saved_edi, %rdi 203 movq saved_rdi, %rdi
219 movq saved_esi, %rsi 204 movq saved_rsi, %rsi
220 movq saved_ebp, %rbp 205 movq saved_rbp, %rbp
221 206
222 movw $0x0e00 + '!', %ds:(0xb801a) 207 movw $0x0e00 + '!', %ds:(0xb801a)
223 movq saved_eip, %rax 208 movq saved_rip, %rax
224 jmp *%rax 209 jmp *%rax
225 210
226.code32 211.code32
227 212
228 .align 64 213 .align 64
229gdta: 214gdta:
215 /* Its good to keep gdt in sync with one in trampoline.S */
230 .word 0, 0, 0, 0 # dummy 216 .word 0, 0, 0, 0 # dummy
231 217 /* ??? Why I need the accessed bit set in order for this to work? */
232 .word 0, 0, 0, 0 # unused 218 .quad 0x00cf9b000000ffff # __KERNEL32_CS
233 219 .quad 0x00af9b000000ffff # __KERNEL_CS
234 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) 220 .quad 0x00cf93000000ffff # __KERNEL_DS
235 .word 0 # base address = 0
236 .word 0x9B00 # code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?)
237 .word 0x00CF # granularity = 4096, 386
238 # (+5th nibble of limit)
239
240 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
241 .word 0 # base address = 0
242 .word 0x9200 # data read/write
243 .word 0x00CF # granularity = 4096, 386
244 # (+5th nibble of limit)
245# this is 64bit descriptor for code
246 .word 0xFFFF
247 .word 0
248 .word 0x9A00 # code read/exec
249 .word 0x00AF # as above, but it is long mode and with D=0
250 221
251idt_48a: 222idt_48a:
252 .word 0 # idt limit = 0 223 .word 0 # idt limit = 0
253 .word 0, 0 # idt base = 0L 224 .word 0, 0 # idt base = 0L
254 225
255gdt_48a: 226gdt_48a:
256 .word 0x8000 # gdt limit=2048, 227 .word 0x800 # gdt limit=2048,
257 # 256 GDT entries 228 # 256 GDT entries
258 .word 0, 0 # gdt base (filled in later) 229 .long gdta - wakeup_code # gdt base (relocated in later)
259
260 230
261real_save_gdt: .word 0
262 .quad 0
263real_magic: .quad 0 231real_magic: .quad 0
264video_mode: .quad 0 232video_mode: .quad 0
265video_flags: .quad 0 233video_flags: .quad 0
266 234
235.code16
267bogus_real_magic: 236bogus_real_magic:
268 movb $0xba,%al ; outb %al,$0x80 237 movb $0xba,%al ; outb %al,$0x80
269 jmp bogus_real_magic 238 jmp bogus_real_magic
270 239
271bogus_32_magic: 240.code64
241bogus_64_magic:
272 movb $0xb3,%al ; outb %al,$0x80 242 movb $0xb3,%al ; outb %al,$0x80
273 jmp bogus_32_magic 243 jmp bogus_64_magic
274 244
275bogus_31_magic: 245.code16
276 movb $0xb1,%al ; outb %al,$0x80 246no_longmode:
277 jmp bogus_31_magic 247 movb $0xbc,%al ; outb %al,$0x80
278 248 jmp no_longmode
279bogus_cpu:
280 movb $0xbc,%al ; outb %al,$0x80
281 jmp bogus_cpu
282 249
250#include "../verify_cpu.S"
283 251
284/* This code uses an extended set of video mode numbers. These include: 252/* This code uses an extended set of video mode numbers. These include:
285 * Aliases for standard modes 253 * Aliases for standard modes
@@ -301,6 +269,7 @@ bogus_cpu:
301#define VIDEO_FIRST_V7 0x0900 269#define VIDEO_FIRST_V7 0x0900
302 270
303# Setting of user mode (AX=mode ID) => CF=success 271# Setting of user mode (AX=mode ID) => CF=success
272.code16
304mode_seta: 273mode_seta:
305 movw %ax, %bx 274 movw %ax, %bx
306#if 0 275#if 0
@@ -346,21 +315,18 @@ check_vesaa:
346 315
347_setbada: jmp setbada 316_setbada: jmp setbada
348 317
349 .code64
350bogus_magic:
351 movw $0x0e00 + 'B', %ds:(0xb8018)
352 jmp bogus_magic
353
354bogus_magic2:
355 movw $0x0e00 + '2', %ds:(0xb8018)
356 jmp bogus_magic2
357
358
359wakeup_stack_begin: # Stack grows down 318wakeup_stack_begin: # Stack grows down
360 319
361.org 0xff0 320.org 0xff0
362wakeup_stack: # Just below end of page 321wakeup_stack: # Just below end of page
363 322
323.org 0x1000
324ENTRY(wakeup_level4_pgt)
325 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
326 .fill 510,8,0
327 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
328 .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
329
364ENTRY(wakeup_end) 330ENTRY(wakeup_end)
365 331
366## 332##
@@ -373,28 +339,11 @@ ENTRY(wakeup_end)
373# 339#
374# Returned address is location of code in low memory (past data and stack) 340# Returned address is location of code in low memory (past data and stack)
375# 341#
342 .code64
376ENTRY(acpi_copy_wakeup_routine) 343ENTRY(acpi_copy_wakeup_routine)
377 pushq %rax 344 pushq %rax
378 pushq %rcx
379 pushq %rdx 345 pushq %rdx
380 346
381 sgdt saved_gdt
382 sidt saved_idt
383 sldt saved_ldt
384 str saved_tss
385
386 movq %cr3, %rdx
387 movq %rdx, saved_cr3
388 movq %cr4, %rdx
389 movq %rdx, saved_cr4
390 movq %cr0, %rdx
391 movq %rdx, saved_cr0
392 sgdt real_save_gdt - wakeup_start (,%rdi)
393 movl $MSR_EFER, %ecx
394 rdmsr
395 movl %eax, saved_efer
396 movl %edx, saved_efer2
397
398 movl saved_video_mode, %edx 347 movl saved_video_mode, %edx
399 movl %edx, video_mode - wakeup_start (,%rdi) 348 movl %edx, video_mode - wakeup_start (,%rdi)
400 movl acpi_video_flags, %edx 349 movl acpi_video_flags, %edx
@@ -403,21 +352,13 @@ ENTRY(acpi_copy_wakeup_routine)
403 movq $0x123456789abcdef0, %rdx 352 movq $0x123456789abcdef0, %rdx
404 movq %rdx, saved_magic 353 movq %rdx, saved_magic
405 354
406 movl saved_magic - __START_KERNEL_map, %eax 355 movq saved_magic, %rax
407 cmpl $0x9abcdef0, %eax 356 movq $0x123456789abcdef0, %rdx
408 jne bogus_32_magic 357 cmpq %rdx, %rax
409 358 jne bogus_64_magic
410 # make sure %cr4 is set correctly (features, etc)
411 movl saved_cr4 - __START_KERNEL_map, %eax
412 movq %rax, %cr4
413 359
414 movl saved_cr0 - __START_KERNEL_map, %eax
415 movq %rax, %cr0
416 jmp 1f # Flush pipelines
4171:
418 # restore the regs we used 360 # restore the regs we used
419 popq %rdx 361 popq %rdx
420 popq %rcx
421 popq %rax 362 popq %rax
422ENTRY(do_suspend_lowlevel_s4bios) 363ENTRY(do_suspend_lowlevel_s4bios)
423 ret 364 ret
@@ -450,13 +391,13 @@ do_suspend_lowlevel:
450 movq %r15, saved_context_r15(%rip) 391 movq %r15, saved_context_r15(%rip)
451 pushfq ; popq saved_context_eflags(%rip) 392 pushfq ; popq saved_context_eflags(%rip)
452 393
453 movq $.L97, saved_eip(%rip) 394 movq $.L97, saved_rip(%rip)
454 395
455 movq %rsp,saved_esp 396 movq %rsp,saved_rsp
456 movq %rbp,saved_ebp 397 movq %rbp,saved_rbp
457 movq %rbx,saved_ebx 398 movq %rbx,saved_rbx
458 movq %rdi,saved_edi 399 movq %rdi,saved_rdi
459 movq %rsi,saved_esi 400 movq %rsi,saved_rsi
460 401
461 addq $8, %rsp 402 addq $8, %rsp
462 movl $3, %edi 403 movl $3, %edi
@@ -503,25 +444,12 @@ do_suspend_lowlevel:
503 444
504.data 445.data
505ALIGN 446ALIGN
506ENTRY(saved_ebp) .quad 0 447ENTRY(saved_rbp) .quad 0
507ENTRY(saved_esi) .quad 0 448ENTRY(saved_rsi) .quad 0
508ENTRY(saved_edi) .quad 0 449ENTRY(saved_rdi) .quad 0
509ENTRY(saved_ebx) .quad 0 450ENTRY(saved_rbx) .quad 0
510 451
511ENTRY(saved_eip) .quad 0 452ENTRY(saved_rip) .quad 0
512ENTRY(saved_esp) .quad 0 453ENTRY(saved_rsp) .quad 0
513 454
514ENTRY(saved_magic) .quad 0 455ENTRY(saved_magic) .quad 0
515
516ALIGN
517# saved registers
518saved_gdt: .quad 0,0
519saved_idt: .quad 0,0
520saved_ldt: .quad 0
521saved_tss: .quad 0
522
523saved_cr0: .quad 0
524saved_cr3: .quad 0
525saved_cr4: .quad 0
526saved_efer: .quad 0
527saved_efer2: .quad 0
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index b487396c4c5b..a52af5820592 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -51,7 +51,6 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
51 51
52static u32 __init allocate_aperture(void) 52static u32 __init allocate_aperture(void)
53{ 53{
54 pg_data_t *nd0 = NODE_DATA(0);
55 u32 aper_size; 54 u32 aper_size;
56 void *p; 55 void *p;
57 56
@@ -65,12 +64,12 @@ static u32 __init allocate_aperture(void)
65 * Unfortunately we cannot move it up because that would make the 64 * Unfortunately we cannot move it up because that would make the
66 * IOMMU useless. 65 * IOMMU useless.
67 */ 66 */
68 p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0); 67 p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
69 if (!p || __pa(p)+aper_size > 0xffffffff) { 68 if (!p || __pa(p)+aper_size > 0xffffffff) {
70 printk("Cannot allocate aperture memory hole (%p,%uK)\n", 69 printk("Cannot allocate aperture memory hole (%p,%uK)\n",
71 p, aper_size>>10); 70 p, aper_size>>10);
72 if (p) 71 if (p)
73 free_bootmem_node(nd0, __pa(p), aper_size); 72 free_bootmem(__pa(p), aper_size);
74 return 0; 73 return 0;
75 } 74 }
76 printk("Mapping aperture over %d KB of RAM @ %lx\n", 75 printk("Mapping aperture over %d KB of RAM @ %lx\n",
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index bd3e45d47c37..d198f7d82e5a 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -68,6 +68,28 @@ int using_apic_timer __read_mostly = 0;
68 68
69static void apic_pm_activate(void); 69static void apic_pm_activate(void);
70 70
71void apic_wait_icr_idle(void)
72{
73 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
74 cpu_relax();
75}
76
77unsigned int safe_apic_wait_icr_idle(void)
78{
79 unsigned int send_status;
80 int timeout;
81
82 timeout = 0;
83 do {
84 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
85 if (!send_status)
86 break;
87 udelay(100);
88 } while (timeout++ < 1000);
89
90 return send_status;
91}
92
71void enable_NMI_through_LVT0 (void * dummy) 93void enable_NMI_through_LVT0 (void * dummy)
72{ 94{
73 unsigned int v; 95 unsigned int v;
@@ -817,14 +839,15 @@ static void setup_APIC_timer(unsigned int clocks)
817 839
818static int __init calibrate_APIC_clock(void) 840static int __init calibrate_APIC_clock(void)
819{ 841{
820 int apic, apic_start, tsc, tsc_start; 842 unsigned apic, apic_start;
843 unsigned long tsc, tsc_start;
821 int result; 844 int result;
822 /* 845 /*
823 * Put whatever arbitrary (but long enough) timeout 846 * Put whatever arbitrary (but long enough) timeout
824 * value into the APIC clock, we just want to get the 847 * value into the APIC clock, we just want to get the
825 * counter running for calibration. 848 * counter running for calibration.
826 */ 849 */
827 __setup_APIC_LVTT(1000000000); 850 __setup_APIC_LVTT(4000000000);
828 851
829 apic_start = apic_read(APIC_TMCCT); 852 apic_start = apic_read(APIC_TMCCT);
830#ifdef CONFIG_X86_PM_TIMER 853#ifdef CONFIG_X86_PM_TIMER
@@ -835,15 +858,15 @@ static int __init calibrate_APIC_clock(void)
835 } else 858 } else
836#endif 859#endif
837 { 860 {
838 rdtscl(tsc_start); 861 rdtscll(tsc_start);
839 862
840 do { 863 do {
841 apic = apic_read(APIC_TMCCT); 864 apic = apic_read(APIC_TMCCT);
842 rdtscl(tsc); 865 rdtscll(tsc);
843 } while ((tsc - tsc_start) < TICK_COUNT && 866 } while ((tsc - tsc_start) < TICK_COUNT &&
844 (apic - apic_start) < TICK_COUNT); 867 (apic_start - apic) < TICK_COUNT);
845 868
846 result = (apic_start - apic) * 1000L * cpu_khz / 869 result = (apic_start - apic) * 1000L * tsc_khz /
847 (tsc - tsc_start); 870 (tsc - tsc_start);
848 } 871 }
849 printk("result %d\n", result); 872 printk("result %d\n", result);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2beb2c..778953bc636c 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -21,6 +21,14 @@
21 21
22#define BLANK() asm volatile("\n->" : : ) 22#define BLANK() asm volatile("\n->" : : )
23 23
24#define __NO_STUBS 1
25#undef __SYSCALL
26#undef _ASM_X86_64_UNISTD_H_
27#define __SYSCALL(nr, sym) [nr] = 1,
28static char syscalls[] = {
29#include <asm/unistd.h>
30};
31
24int main(void) 32int main(void)
25{ 33{
26#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) 34#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
@@ -71,5 +79,7 @@ int main(void)
71 DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); 79 DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
72 BLANK(); 80 BLANK();
73 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); 81 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
82 BLANK();
83 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
74 return 0; 84 return 0;
75} 85}
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
new file mode 100644
index 000000000000..12b585b5345d
--- /dev/null
+++ b/arch/x86_64/kernel/bugs.c
@@ -0,0 +1,21 @@
1/*
2 * arch/x86_64/kernel/bugs.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 * Copyright (C) 2000 SuSE
6 */
7
8#include <linux/kernel.h>
9#include <linux/init.h>
10#include <asm/alternative.h>
11#include <asm/processor.h>
12
13void __init check_bugs(void)
14{
15 identify_cpu(&boot_cpu_data);
16#if !defined(CONFIG_SMP)
17 printk("CPU: ");
18 print_cpu_info(&boot_cpu_data);
19#endif
20 alternative_instructions();
21}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a490fabfcf47..be8965427a93 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -25,7 +25,7 @@
25#include <asm/bootsetup.h> 25#include <asm/bootsetup.h>
26#include <asm/sections.h> 26#include <asm/sections.h>
27 27
28struct e820map e820 __initdata; 28struct e820map e820;
29 29
30/* 30/*
31 * PFN of last memory page. 31 * PFN of last memory page.
@@ -98,7 +98,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
98 * This function checks if any part of the range <start,end> is mapped 98 * This function checks if any part of the range <start,end> is mapped
99 * with type. 99 * with type.
100 */ 100 */
101int __meminit 101int
102e820_any_mapped(unsigned long start, unsigned long end, unsigned type) 102e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
103{ 103{
104 int i; 104 int i;
@@ -112,6 +112,7 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
112 } 112 }
113 return 0; 113 return 0;
114} 114}
115EXPORT_SYMBOL_GPL(e820_any_mapped);
115 116
116/* 117/*
117 * This function checks if the entire range <start,end> is mapped with type. 118 * This function checks if the entire range <start,end> is mapped with type.
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index fede55a53995..990d9c218a5d 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -71,18 +71,6 @@ static void __init ati_bugs(void)
71 } 71 }
72} 72}
73 73
74static void intel_bugs(void)
75{
76 u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
77
78#ifdef CONFIG_SMP
79 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
80 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
81 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
82 quirk_intel_irqbalance();
83#endif
84}
85
86struct chipset { 74struct chipset {
87 u16 vendor; 75 u16 vendor;
88 void (*f)(void); 76 void (*f)(void);
@@ -92,7 +80,6 @@ static struct chipset early_qrk[] __initdata = {
92 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, 80 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
93 { PCI_VENDOR_ID_VIA, via_bugs }, 81 { PCI_VENDOR_ID_VIA, via_bugs },
94 { PCI_VENDOR_ID_ATI, ati_bugs }, 82 { PCI_VENDOR_ID_ATI, ati_bugs },
95 { PCI_VENDOR_ID_INTEL, intel_bugs},
96 {} 83 {}
97}; 84};
98 85
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 47b6d90349da..92213d2b7c11 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@
11 11
12#ifdef __i386__ 12#ifdef __i386__
13#include <asm/setup.h> 13#include <asm/setup.h>
14#define VGABASE (__ISA_IO_base + 0xb8000)
15#else 14#else
16#include <asm/bootsetup.h> 15#include <asm/bootsetup.h>
17#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
18#endif 16#endif
17#define VGABASE (__ISA_IO_base + 0xb8000)
19 18
20static int max_ypos = 25, max_xpos = 80; 19static int max_ypos = 25, max_xpos = 80;
21static int current_ypos = 25, current_xpos = 0; 20static int current_ypos = 25, current_xpos = 0;
@@ -176,7 +175,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
176 return ret; 175 return ret;
177} 176}
178 177
179void __init simnow_init(char *str) 178static void __init simnow_init(char *str)
180{ 179{
181 char *fn = "klog"; 180 char *fn = "klog";
182 if (*str == '=') 181 if (*str == '=')
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed4350ced3d0..fa984b53e7e6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -701,6 +701,7 @@ END(spurious_interrupt)
701 CFI_ADJUST_CFA_OFFSET 8 701 CFI_ADJUST_CFA_OFFSET 8
702 pushq %rax /* push real oldrax to the rdi slot */ 702 pushq %rax /* push real oldrax to the rdi slot */
703 CFI_ADJUST_CFA_OFFSET 8 703 CFI_ADJUST_CFA_OFFSET 8
704 CFI_REL_OFFSET rax,0
704 leaq \sym(%rip),%rax 705 leaq \sym(%rip),%rax
705 jmp error_entry 706 jmp error_entry
706 CFI_ENDPROC 707 CFI_ENDPROC
@@ -710,6 +711,7 @@ END(spurious_interrupt)
710 XCPT_FRAME 711 XCPT_FRAME
711 pushq %rax 712 pushq %rax
712 CFI_ADJUST_CFA_OFFSET 8 713 CFI_ADJUST_CFA_OFFSET 8
714 CFI_REL_OFFSET rax,0
713 leaq \sym(%rip),%rax 715 leaq \sym(%rip),%rax
714 jmp error_entry 716 jmp error_entry
715 CFI_ENDPROC 717 CFI_ENDPROC
@@ -817,6 +819,7 @@ paranoid_schedule\trace:
817 */ 819 */
818KPROBE_ENTRY(error_entry) 820KPROBE_ENTRY(error_entry)
819 _frame RDI 821 _frame RDI
822 CFI_REL_OFFSET rax,0
820 /* rdi slot contains rax, oldrax contains error code */ 823 /* rdi slot contains rax, oldrax contains error code */
821 cld 824 cld
822 subq $14*8,%rsp 825 subq $14*8,%rsp
@@ -824,6 +827,7 @@ KPROBE_ENTRY(error_entry)
824 movq %rsi,13*8(%rsp) 827 movq %rsi,13*8(%rsp)
825 CFI_REL_OFFSET rsi,RSI 828 CFI_REL_OFFSET rsi,RSI
826 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 829 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
830 CFI_REGISTER rax,rsi
827 movq %rdx,12*8(%rsp) 831 movq %rdx,12*8(%rsp)
828 CFI_REL_OFFSET rdx,RDX 832 CFI_REL_OFFSET rdx,RDX
829 movq %rcx,11*8(%rsp) 833 movq %rcx,11*8(%rsp)
@@ -857,6 +861,7 @@ error_swapgs:
857 swapgs 861 swapgs
858error_sti: 862error_sti:
859 movq %rdi,RDI(%rsp) 863 movq %rdi,RDI(%rsp)
864 CFI_REL_OFFSET rdi,RDI
860 movq %rsp,%rdi 865 movq %rsp,%rdi
861 movq ORIG_RAX(%rsp),%rsi /* get error code */ 866 movq ORIG_RAX(%rsp),%rsi /* get error code */
862 movq $-1,ORIG_RAX(%rsp) 867 movq $-1,ORIG_RAX(%rsp)
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
deleted file mode 100644
index 7ae18ec12454..000000000000
--- a/arch/x86_64/kernel/functionlist
+++ /dev/null
@@ -1,1284 +0,0 @@
1*(.text.flush_thread)
2*(.text.check_poison_obj)
3*(.text.copy_page)
4*(.text.__set_personality)
5*(.text.gart_map_sg)
6*(.text.kmem_cache_free)
7*(.text.find_get_page)
8*(.text._raw_spin_lock)
9*(.text.ide_outb)
10*(.text.unmap_vmas)
11*(.text.copy_page_range)
12*(.text.kprobe_handler)
13*(.text.__handle_mm_fault)
14*(.text.__d_lookup)
15*(.text.copy_user_generic)
16*(.text.__link_path_walk)
17*(.text.get_page_from_freelist)
18*(.text.kmem_cache_alloc)
19*(.text.drive_cmd_intr)
20*(.text.ia32_setup_sigcontext)
21*(.text.huge_pte_offset)
22*(.text.do_page_fault)
23*(.text.page_remove_rmap)
24*(.text.release_pages)
25*(.text.ide_end_request)
26*(.text.__mutex_lock_slowpath)
27*(.text.__find_get_block)
28*(.text.kfree)
29*(.text.vfs_read)
30*(.text._raw_spin_unlock)
31*(.text.free_hot_cold_page)
32*(.text.fget_light)
33*(.text.schedule)
34*(.text.memcmp)
35*(.text.touch_atime)
36*(.text.__might_sleep)
37*(.text.__down_read_trylock)
38*(.text.arch_pick_mmap_layout)
39*(.text.find_vma)
40*(.text.__make_request)
41*(.text.do_generic_mapping_read)
42*(.text.mutex_lock_interruptible)
43*(.text.__generic_file_aio_read)
44*(.text._atomic_dec_and_lock)
45*(.text.__wake_up_bit)
46*(.text.add_to_page_cache)
47*(.text.cache_alloc_debugcheck_after)
48*(.text.vm_normal_page)
49*(.text.mutex_debug_check_no_locks_freed)
50*(.text.net_rx_action)
51*(.text.__find_first_zero_bit)
52*(.text.put_page)
53*(.text._raw_read_lock)
54*(.text.__delay)
55*(.text.dnotify_parent)
56*(.text.do_path_lookup)
57*(.text.do_sync_read)
58*(.text.do_lookup)
59*(.text.bit_waitqueue)
60*(.text.file_read_actor)
61*(.text.strncpy_from_user)
62*(.text.__pagevec_lru_add_active)
63*(.text.fget)
64*(.text.dput)
65*(.text.__strnlen_user)
66*(.text.inotify_inode_queue_event)
67*(.text.rw_verify_area)
68*(.text.ide_intr)
69*(.text.inotify_dentry_parent_queue_event)
70*(.text.permission)
71*(.text.memscan)
72*(.text.hpet_rtc_interrupt)
73*(.text.do_mmap_pgoff)
74*(.text.current_fs_time)
75*(.text.vfs_getattr)
76*(.text.kmem_flagcheck)
77*(.text.mark_page_accessed)
78*(.text.free_pages_and_swap_cache)
79*(.text.generic_fillattr)
80*(.text.__block_prepare_write)
81*(.text.__set_page_dirty_nobuffers)
82*(.text.link_path_walk)
83*(.text.find_get_pages_tag)
84*(.text.ide_do_request)
85*(.text.__alloc_pages)
86*(.text.generic_permission)
87*(.text.mod_page_state_offset)
88*(.text.free_pgd_range)
89*(.text.generic_file_buffered_write)
90*(.text.number)
91*(.text.ide_do_rw_disk)
92*(.text.__brelse)
93*(.text.__mod_page_state_offset)
94*(.text.rotate_reclaimable_page)
95*(.text.find_vma_prepare)
96*(.text.find_vma_prev)
97*(.text.lru_cache_add_active)
98*(.text.__kmalloc_track_caller)
99*(.text.smp_invalidate_interrupt)
100*(.text.handle_IRQ_event)
101*(.text.__find_get_block_slow)
102*(.text.do_wp_page)
103*(.text.do_select)
104*(.text.set_user_nice)
105*(.text.sys_read)
106*(.text.do_munmap)
107*(.text.csum_partial)
108*(.text.__do_softirq)
109*(.text.may_open)
110*(.text.getname)
111*(.text.get_empty_filp)
112*(.text.__fput)
113*(.text.remove_mapping)
114*(.text.filp_ctor)
115*(.text.poison_obj)
116*(.text.unmap_region)
117*(.text.test_set_page_writeback)
118*(.text.__do_page_cache_readahead)
119*(.text.sock_def_readable)
120*(.text.ide_outl)
121*(.text.shrink_zone)
122*(.text.rb_insert_color)
123*(.text.get_request)
124*(.text.sys_pread64)
125*(.text.spin_bug)
126*(.text.ide_outsl)
127*(.text.mask_and_ack_8259A)
128*(.text.filemap_nopage)
129*(.text.page_add_file_rmap)
130*(.text.find_lock_page)
131*(.text.tcp_poll)
132*(.text.__mark_inode_dirty)
133*(.text.file_ra_state_init)
134*(.text.generic_file_llseek)
135*(.text.__pagevec_lru_add)
136*(.text.page_cache_readahead)
137*(.text.n_tty_receive_buf)
138*(.text.zonelist_policy)
139*(.text.vma_adjust)
140*(.text.test_clear_page_dirty)
141*(.text.sync_buffer)
142*(.text.do_exit)
143*(.text.__bitmap_weight)
144*(.text.alloc_pages_current)
145*(.text.get_unused_fd)
146*(.text.zone_watermark_ok)
147*(.text.cpuset_update_task_memory_state)
148*(.text.__bitmap_empty)
149*(.text.sys_munmap)
150*(.text.__inode_dir_notify)
151*(.text.__generic_file_aio_write_nolock)
152*(.text.__pte_alloc)
153*(.text.sys_select)
154*(.text.vm_acct_memory)
155*(.text.vfs_write)
156*(.text.__lru_add_drain)
157*(.text.prio_tree_insert)
158*(.text.generic_file_aio_read)
159*(.text.vma_merge)
160*(.text.block_write_full_page)
161*(.text.__page_set_anon_rmap)
162*(.text.apic_timer_interrupt)
163*(.text.release_console_sem)
164*(.text.sys_write)
165*(.text.sys_brk)
166*(.text.dup_mm)
167*(.text.read_current_timer)
168*(.text.ll_rw_block)
169*(.text.blk_rq_map_sg)
170*(.text.dbg_userword)
171*(.text.__block_commit_write)
172*(.text.cache_grow)
173*(.text.copy_strings)
174*(.text.release_task)
175*(.text.do_sync_write)
176*(.text.unlock_page)
177*(.text.load_elf_binary)
178*(.text.__follow_mount)
179*(.text.__getblk)
180*(.text.do_sys_open)
181*(.text.current_kernel_time)
182*(.text.call_rcu)
183*(.text.write_chan)
184*(.text.vsnprintf)
185*(.text.dummy_inode_setsecurity)
186*(.text.submit_bh)
187*(.text.poll_freewait)
188*(.text.bio_alloc_bioset)
189*(.text.skb_clone)
190*(.text.page_waitqueue)
191*(.text.__mutex_lock_interruptible_slowpath)
192*(.text.get_index)
193*(.text.csum_partial_copy_generic)
194*(.text.bad_range)
195*(.text.remove_vma)
196*(.text.cp_new_stat)
197*(.text.alloc_arraycache)
198*(.text.test_clear_page_writeback)
199*(.text.strsep)
200*(.text.open_namei)
201*(.text._raw_read_unlock)
202*(.text.get_vma_policy)
203*(.text.__down_write_trylock)
204*(.text.find_get_pages)
205*(.text.tcp_rcv_established)
206*(.text.generic_make_request)
207*(.text.__block_write_full_page)
208*(.text.cfq_set_request)
209*(.text.sys_inotify_init)
210*(.text.split_vma)
211*(.text.__mod_timer)
212*(.text.get_options)
213*(.text.vma_link)
214*(.text.mpage_writepages)
215*(.text.truncate_complete_page)
216*(.text.tcp_recvmsg)
217*(.text.sigprocmask)
218*(.text.filemap_populate)
219*(.text.sys_close)
220*(.text.inotify_dev_queue_event)
221*(.text.do_task_stat)
222*(.text.__dentry_open)
223*(.text.unlink_file_vma)
224*(.text.__pollwait)
225*(.text.packet_rcv_spkt)
226*(.text.drop_buffers)
227*(.text.free_pgtables)
228*(.text.generic_file_direct_write)
229*(.text.copy_process)
230*(.text.netif_receive_skb)
231*(.text.dnotify_flush)
232*(.text.print_bad_pte)
233*(.text.anon_vma_unlink)
234*(.text.sys_mprotect)
235*(.text.sync_sb_inodes)
236*(.text.find_inode_fast)
237*(.text.dummy_inode_readlink)
238*(.text.putname)
239*(.text.init_smp_flush)
240*(.text.dbg_redzone2)
241*(.text.sk_run_filter)
242*(.text.may_expand_vm)
243*(.text.generic_file_aio_write)
244*(.text.find_next_zero_bit)
245*(.text.file_kill)
246*(.text.audit_getname)
247*(.text.arch_unmap_area_topdown)
248*(.text.alloc_page_vma)
249*(.text.tcp_transmit_skb)
250*(.text.rb_next)
251*(.text.dbg_redzone1)
252*(.text.generic_file_mmap)
253*(.text.vfs_fstat)
254*(.text.sys_time)
255*(.text.page_lock_anon_vma)
256*(.text.get_unmapped_area)
257*(.text.remote_llseek)
258*(.text.__up_read)
259*(.text.fd_install)
260*(.text.eventpoll_init_file)
261*(.text.dma_alloc_coherent)
262*(.text.create_empty_buffers)
263*(.text.__mutex_unlock_slowpath)
264*(.text.dup_fd)
265*(.text.d_alloc)
266*(.text.tty_ldisc_try)
267*(.text.sys_stime)
268*(.text.__rb_rotate_right)
269*(.text.d_validate)
270*(.text.rb_erase)
271*(.text.path_release)
272*(.text.memmove)
273*(.text.invalidate_complete_page)
274*(.text.clear_inode)
275*(.text.cache_estimate)
276*(.text.alloc_buffer_head)
277*(.text.smp_call_function_interrupt)
278*(.text.flush_tlb_others)
279*(.text.file_move)
280*(.text.balance_dirty_pages_ratelimited)
281*(.text.vma_prio_tree_add)
282*(.text.timespec_trunc)
283*(.text.mempool_alloc)
284*(.text.iget_locked)
285*(.text.d_alloc_root)
286*(.text.cpuset_populate_dir)
287*(.text.anon_vma_prepare)
288*(.text.sys_newstat)
289*(.text.alloc_page_interleave)
290*(.text.__path_lookup_intent_open)
291*(.text.__pagevec_free)
292*(.text.inode_init_once)
293*(.text.free_vfsmnt)
294*(.text.__user_walk_fd)
295*(.text.cfq_idle_slice_timer)
296*(.text.sys_mmap)
297*(.text.sys_llseek)
298*(.text.prio_tree_remove)
299*(.text.filp_close)
300*(.text.file_permission)
301*(.text.vma_prio_tree_remove)
302*(.text.tcp_ack)
303*(.text.nameidata_to_filp)
304*(.text.sys_lseek)
305*(.text.percpu_counter_mod)
306*(.text.igrab)
307*(.text.__bread)
308*(.text.alloc_inode)
309*(.text.filldir)
310*(.text.__rb_rotate_left)
311*(.text.irq_affinity_write_proc)
312*(.text.init_request_from_bio)
313*(.text.find_or_create_page)
314*(.text.tty_poll)
315*(.text.tcp_sendmsg)
316*(.text.ide_wait_stat)
317*(.text.free_buffer_head)
318*(.text.flush_signal_handlers)
319*(.text.tcp_v4_rcv)
320*(.text.nr_blockdev_pages)
321*(.text.locks_remove_flock)
322*(.text.__iowrite32_copy)
323*(.text.do_filp_open)
324*(.text.try_to_release_page)
325*(.text.page_add_new_anon_rmap)
326*(.text.kmem_cache_size)
327*(.text.eth_type_trans)
328*(.text.try_to_free_buffers)
329*(.text.schedule_tail)
330*(.text.proc_lookup)
331*(.text.no_llseek)
332*(.text.kfree_skbmem)
333*(.text.do_wait)
334*(.text.do_mpage_readpage)
335*(.text.vfs_stat_fd)
336*(.text.tty_write)
337*(.text.705)
338*(.text.sync_page)
339*(.text.__remove_shared_vm_struct)
340*(.text.__kfree_skb)
341*(.text.sock_poll)
342*(.text.get_request_wait)
343*(.text.do_sigaction)
344*(.text.do_brk)
345*(.text.tcp_event_data_recv)
346*(.text.read_chan)
347*(.text.pipe_writev)
348*(.text.__emul_lookup_dentry)
349*(.text.rtc_get_rtc_time)
350*(.text.print_objinfo)
351*(.text.file_update_time)
352*(.text.do_signal)
353*(.text.disable_8259A_irq)
354*(.text.blk_queue_bounce)
355*(.text.__anon_vma_link)
356*(.text.__vma_link)
357*(.text.vfs_rename)
358*(.text.sys_newlstat)
359*(.text.sys_newfstat)
360*(.text.sys_mknod)
361*(.text.__show_regs)
362*(.text.iput)
363*(.text.get_signal_to_deliver)
364*(.text.flush_tlb_page)
365*(.text.debug_mutex_wake_waiter)
366*(.text.copy_thread)
367*(.text.clear_page_dirty_for_io)
368*(.text.buffer_io_error)
369*(.text.vfs_permission)
370*(.text.truncate_inode_pages_range)
371*(.text.sys_recvfrom)
372*(.text.remove_suid)
373*(.text.mark_buffer_dirty)
374*(.text.local_bh_enable)
375*(.text.get_zeroed_page)
376*(.text.get_vmalloc_info)
377*(.text.flush_old_exec)
378*(.text.dummy_inode_permission)
379*(.text.__bio_add_page)
380*(.text.prio_tree_replace)
381*(.text.notify_change)
382*(.text.mntput_no_expire)
383*(.text.fput)
384*(.text.__end_that_request_first)
385*(.text.wake_up_bit)
386*(.text.unuse_mm)
387*(.text.shrink_icache_memory)
388*(.text.sched_balance_self)
389*(.text.__pmd_alloc)
390*(.text.pipe_poll)
391*(.text.normal_poll)
392*(.text.__free_pages)
393*(.text.follow_mount)
394*(.text.cdrom_start_packet_command)
395*(.text.blk_recount_segments)
396*(.text.bio_put)
397*(.text.__alloc_skb)
398*(.text.__wake_up)
399*(.text.vm_stat_account)
400*(.text.sys_fcntl)
401*(.text.sys_fadvise64)
402*(.text._raw_write_unlock)
403*(.text.__pud_alloc)
404*(.text.alloc_page_buffers)
405*(.text.vfs_llseek)
406*(.text.sockfd_lookup)
407*(.text._raw_write_lock)
408*(.text.put_compound_page)
409*(.text.prune_dcache)
410*(.text.pipe_readv)
411*(.text.mempool_free)
412*(.text.make_ahead_window)
413*(.text.lru_add_drain)
414*(.text.constant_test_bit)
415*(.text.__clear_user)
416*(.text.arch_unmap_area)
417*(.text.anon_vma_link)
418*(.text.sys_chroot)
419*(.text.setup_arg_pages)
420*(.text.radix_tree_preload)
421*(.text.init_rwsem)
422*(.text.generic_osync_inode)
423*(.text.generic_delete_inode)
424*(.text.do_sys_poll)
425*(.text.dev_queue_xmit)
426*(.text.default_llseek)
427*(.text.__writeback_single_inode)
428*(.text.vfs_ioctl)
429*(.text.__up_write)
430*(.text.unix_poll)
431*(.text.sys_rt_sigprocmask)
432*(.text.sock_recvmsg)
433*(.text.recalc_bh_state)
434*(.text.__put_unused_fd)
435*(.text.process_backlog)
436*(.text.locks_remove_posix)
437*(.text.lease_modify)
438*(.text.expand_files)
439*(.text.end_buffer_read_nobh)
440*(.text.d_splice_alias)
441*(.text.debug_mutex_init_waiter)
442*(.text.copy_from_user)
443*(.text.cap_vm_enough_memory)
444*(.text.show_vfsmnt)
445*(.text.release_sock)
446*(.text.pfifo_fast_enqueue)
447*(.text.half_md4_transform)
448*(.text.fs_may_remount_ro)
449*(.text.do_fork)
450*(.text.copy_hugetlb_page_range)
451*(.text.cache_free_debugcheck)
452*(.text.__tcp_select_window)
453*(.text.task_handoff_register)
454*(.text.sys_open)
455*(.text.strlcpy)
456*(.text.skb_copy_datagram_iovec)
457*(.text.set_up_list3s)
458*(.text.release_open_intent)
459*(.text.qdisc_restart)
460*(.text.n_tty_chars_in_buffer)
461*(.text.inode_change_ok)
462*(.text.__downgrade_write)
463*(.text.debug_mutex_unlock)
464*(.text.add_timer_randomness)
465*(.text.sock_common_recvmsg)
466*(.text.set_bh_page)
467*(.text.printk_lock)
468*(.text.path_release_on_umount)
469*(.text.ip_output)
470*(.text.ide_build_dmatable)
471*(.text.__get_user_8)
472*(.text.end_buffer_read_sync)
473*(.text.__d_path)
474*(.text.d_move)
475*(.text.del_timer)
476*(.text.constant_test_bit)
477*(.text.blockable_page_cache_readahead)
478*(.text.tty_read)
479*(.text.sys_readlink)
480*(.text.sys_faccessat)
481*(.text.read_swap_cache_async)
482*(.text.pty_write_room)
483*(.text.page_address_in_vma)
484*(.text.kthread)
485*(.text.cfq_exit_io_context)
486*(.text.__tcp_push_pending_frames)
487*(.text.sys_pipe)
488*(.text.submit_bio)
489*(.text.pid_revalidate)
490*(.text.page_referenced_file)
491*(.text.lock_sock)
492*(.text.get_page_state_node)
493*(.text.generic_block_bmap)
494*(.text.do_setitimer)
495*(.text.dev_queue_xmit_nit)
496*(.text.copy_from_read_buf)
497*(.text.__const_udelay)
498*(.text.console_conditional_schedule)
499*(.text.wake_up_new_task)
500*(.text.wait_for_completion_interruptible)
501*(.text.tcp_rcv_rtt_update)
502*(.text.sys_mlockall)
503*(.text.set_fs_altroot)
504*(.text.schedule_timeout)
505*(.text.nr_free_pagecache_pages)
506*(.text.nf_iterate)
507*(.text.mapping_tagged)
508*(.text.ip_queue_xmit)
509*(.text.ip_local_deliver)
510*(.text.follow_page)
511*(.text.elf_map)
512*(.text.dummy_file_permission)
513*(.text.dispose_list)
514*(.text.dentry_open)
515*(.text.dentry_iput)
516*(.text.bio_alloc)
517*(.text.wait_on_page_bit)
518*(.text.vfs_readdir)
519*(.text.vfs_lstat)
520*(.text.seq_escape)
521*(.text.__posix_lock_file)
522*(.text.mm_release)
523*(.text.kref_put)
524*(.text.ip_rcv)
525*(.text.__iget)
526*(.text.free_pages)
527*(.text.find_mergeable_anon_vma)
528*(.text.find_extend_vma)
529*(.text.dummy_inode_listsecurity)
530*(.text.bio_add_page)
531*(.text.__vm_enough_memory)
532*(.text.vfs_stat)
533*(.text.tty_paranoia_check)
534*(.text.tcp_read_sock)
535*(.text.tcp_data_queue)
536*(.text.sys_uname)
537*(.text.sys_renameat)
538*(.text.__strncpy_from_user)
539*(.text.__mutex_init)
540*(.text.__lookup_hash)
541*(.text.kref_get)
542*(.text.ip_route_input)
543*(.text.__insert_inode_hash)
544*(.text.do_sock_write)
545*(.text.blk_done_softirq)
546*(.text.__wake_up_sync)
547*(.text.__vma_link_rb)
548*(.text.tty_ioctl)
549*(.text.tracesys)
550*(.text.sys_getdents)
551*(.text.sys_dup)
552*(.text.stub_execve)
553*(.text.sha_transform)
554*(.text.radix_tree_tag_clear)
555*(.text.put_unused_fd)
556*(.text.put_files_struct)
557*(.text.mpage_readpages)
558*(.text.may_delete)
559*(.text.kmem_cache_create)
560*(.text.ip_mc_output)
561*(.text.interleave_nodes)
562*(.text.groups_search)
563*(.text.generic_drop_inode)
564*(.text.generic_commit_write)
565*(.text.fcntl_setlk)
566*(.text.exit_mmap)
567*(.text.end_page_writeback)
568*(.text.__d_rehash)
569*(.text.debug_mutex_free_waiter)
570*(.text.csum_ipv6_magic)
571*(.text.count)
572*(.text.cleanup_rbuf)
573*(.text.check_spinlock_acquired_node)
574*(.text.can_vma_merge_after)
575*(.text.bio_endio)
576*(.text.alloc_pidmap)
577*(.text.write_ldt)
578*(.text.vmtruncate_range)
579*(.text.vfs_create)
580*(.text.__user_walk)
581*(.text.update_send_head)
582*(.text.unmap_underlying_metadata)
583*(.text.tty_ldisc_deref)
584*(.text.tcp_setsockopt)
585*(.text.tcp_send_ack)
586*(.text.sys_pause)
587*(.text.sys_gettimeofday)
588*(.text.sync_dirty_buffer)
589*(.text.strncmp)
590*(.text.release_posix_timer)
591*(.text.proc_file_read)
592*(.text.prepare_to_wait)
593*(.text.locks_mandatory_locked)
594*(.text.interruptible_sleep_on_timeout)
595*(.text.inode_sub_bytes)
596*(.text.in_group_p)
597*(.text.hrtimer_try_to_cancel)
598*(.text.filldir64)
599*(.text.fasync_helper)
600*(.text.dummy_sb_pivotroot)
601*(.text.d_lookup)
602*(.text.d_instantiate)
603*(.text.__d_find_alias)
604*(.text.cpu_idle_wait)
605*(.text.cond_resched_lock)
606*(.text.chown_common)
607*(.text.blk_congestion_wait)
608*(.text.activate_page)
609*(.text.unlock_buffer)
610*(.text.tty_wakeup)
611*(.text.tcp_v4_do_rcv)
612*(.text.tcp_current_mss)
613*(.text.sys_openat)
614*(.text.sys_fchdir)
615*(.text.strnlen_user)
616*(.text.strnlen)
617*(.text.strchr)
618*(.text.sock_common_getsockopt)
619*(.text.skb_checksum)
620*(.text.remove_wait_queue)
621*(.text.rb_replace_node)
622*(.text.radix_tree_node_ctor)
623*(.text.pty_chars_in_buffer)
624*(.text.profile_hit)
625*(.text.prio_tree_left)
626*(.text.pgd_clear_bad)
627*(.text.pfifo_fast_dequeue)
628*(.text.page_referenced)
629*(.text.open_exec)
630*(.text.mmput)
631*(.text.mm_init)
632*(.text.__ide_dma_off_quietly)
633*(.text.ide_dma_intr)
634*(.text.hrtimer_start)
635*(.text.get_io_context)
636*(.text.__get_free_pages)
637*(.text.find_first_zero_bit)
638*(.text.file_free_rcu)
639*(.text.dummy_socket_sendmsg)
640*(.text.do_unlinkat)
641*(.text.do_arch_prctl)
642*(.text.destroy_inode)
643*(.text.can_vma_merge_before)
644*(.text.block_sync_page)
645*(.text.block_prepare_write)
646*(.text.bio_init)
647*(.text.arch_ptrace)
648*(.text.wake_up_inode)
649*(.text.wait_on_retry_sync_kiocb)
650*(.text.vma_prio_tree_next)
651*(.text.tcp_rcv_space_adjust)
652*(.text.__tcp_ack_snd_check)
653*(.text.sys_utime)
654*(.text.sys_recvmsg)
655*(.text.sys_mremap)
656*(.text.sys_bdflush)
657*(.text.sleep_on)
658*(.text.set_page_dirty_lock)
659*(.text.seq_path)
660*(.text.schedule_timeout_interruptible)
661*(.text.sched_fork)
662*(.text.rt_run_flush)
663*(.text.profile_munmap)
664*(.text.prepare_binprm)
665*(.text.__pagevec_release_nonlru)
666*(.text.m_show)
667*(.text.lookup_mnt)
668*(.text.__lookup_mnt)
669*(.text.lock_timer_base)
670*(.text.is_subdir)
671*(.text.invalidate_bh_lru)
672*(.text.init_buffer_head)
673*(.text.ifind_fast)
674*(.text.ide_dma_start)
675*(.text.__get_page_state)
676*(.text.flock_to_posix_lock)
677*(.text.__find_symbol)
678*(.text.do_futex)
679*(.text.do_execve)
680*(.text.dirty_writeback_centisecs_handler)
681*(.text.dev_watchdog)
682*(.text.can_share_swap_page)
683*(.text.blkdev_put)
684*(.text.bio_get_nr_vecs)
685*(.text.xfrm_compile_policy)
686*(.text.vma_prio_tree_insert)
687*(.text.vfs_lstat_fd)
688*(.text.__user_path_lookup_open)
689*(.text.thread_return)
690*(.text.tcp_send_delayed_ack)
691*(.text.sock_def_error_report)
692*(.text.shrink_slab)
693*(.text.serial_out)
694*(.text.seq_read)
695*(.text.secure_ip_id)
696*(.text.search_binary_handler)
697*(.text.proc_pid_unhash)
698*(.text.pagevec_lookup)
699*(.text.new_inode)
700*(.text.memcpy_toiovec)
701*(.text.locks_free_lock)
702*(.text.__lock_page)
703*(.text.__lock_buffer)
704*(.text.load_module)
705*(.text.is_bad_inode)
706*(.text.invalidate_inode_buffers)
707*(.text.insert_vm_struct)
708*(.text.inode_setattr)
709*(.text.inode_add_bytes)
710*(.text.ide_read_24)
711*(.text.ide_get_error_location)
712*(.text.ide_do_drive_cmd)
713*(.text.get_locked_pte)
714*(.text.get_filesystem_list)
715*(.text.generic_file_open)
716*(.text.follow_down)
717*(.text.find_next_bit)
718*(.text.__find_first_bit)
719*(.text.exit_mm)
720*(.text.exec_keys)
721*(.text.end_buffer_write_sync)
722*(.text.end_bio_bh_io_sync)
723*(.text.dummy_socket_shutdown)
724*(.text.d_rehash)
725*(.text.d_path)
726*(.text.do_ioctl)
727*(.text.dget_locked)
728*(.text.copy_thread_group_keys)
729*(.text.cdrom_end_request)
730*(.text.cap_bprm_apply_creds)
731*(.text.blk_rq_bio_prep)
732*(.text.__bitmap_intersects)
733*(.text.bio_phys_segments)
734*(.text.bio_free)
735*(.text.arch_get_unmapped_area_topdown)
736*(.text.writeback_in_progress)
737*(.text.vfs_follow_link)
738*(.text.tcp_rcv_state_process)
739*(.text.tcp_check_space)
740*(.text.sys_stat)
741*(.text.sys_rt_sigreturn)
742*(.text.sys_rt_sigaction)
743*(.text.sys_remap_file_pages)
744*(.text.sys_pwrite64)
745*(.text.sys_fchownat)
746*(.text.sys_fchmodat)
747*(.text.strncat)
748*(.text.strlcat)
749*(.text.strcmp)
750*(.text.steal_locks)
751*(.text.sock_create)
752*(.text.sk_stream_rfree)
753*(.text.sk_stream_mem_schedule)
754*(.text.skip_atoi)
755*(.text.sk_alloc)
756*(.text.show_stat)
757*(.text.set_fs_pwd)
758*(.text.set_binfmt)
759*(.text.pty_unthrottle)
760*(.text.proc_symlink)
761*(.text.pipe_release)
762*(.text.pageout)
763*(.text.n_tty_write_wakeup)
764*(.text.n_tty_ioctl)
765*(.text.nr_free_zone_pages)
766*(.text.migration_thread)
767*(.text.mempool_free_slab)
768*(.text.meminfo_read_proc)
769*(.text.max_sane_readahead)
770*(.text.lru_cache_add)
771*(.text.kill_fasync)
772*(.text.kernel_read)
773*(.text.invalidate_mapping_pages)
774*(.text.inode_has_buffers)
775*(.text.init_once)
776*(.text.inet_sendmsg)
777*(.text.idedisk_issue_flush)
778*(.text.generic_file_write)
779*(.text.free_more_memory)
780*(.text.__free_fdtable)
781*(.text.filp_dtor)
782*(.text.exit_sem)
783*(.text.exit_itimers)
784*(.text.error_interrupt)
785*(.text.end_buffer_async_write)
786*(.text.eligible_child)
787*(.text.elf_map)
788*(.text.dump_task_regs)
789*(.text.dummy_task_setscheduler)
790*(.text.dummy_socket_accept)
791*(.text.dummy_file_free_security)
792*(.text.__down_read)
793*(.text.do_sock_read)
794*(.text.do_sigaltstack)
795*(.text.do_mremap)
796*(.text.current_io_context)
797*(.text.cpu_swap_callback)
798*(.text.copy_vma)
799*(.text.cap_bprm_set_security)
800*(.text.blk_insert_request)
801*(.text.bio_map_kern_endio)
802*(.text.bio_hw_segments)
803*(.text.bictcp_cong_avoid)
804*(.text.add_interrupt_randomness)
805*(.text.wait_for_completion)
806*(.text.version_read_proc)
807*(.text.unix_write_space)
808*(.text.tty_ldisc_ref_wait)
809*(.text.tty_ldisc_put)
810*(.text.try_to_wake_up)
811*(.text.tcp_v4_tw_remember_stamp)
812*(.text.tcp_try_undo_dsack)
813*(.text.tcp_may_send_now)
814*(.text.sys_waitid)
815*(.text.sys_sched_getparam)
816*(.text.sys_getppid)
817*(.text.sys_getcwd)
818*(.text.sys_dup2)
819*(.text.sys_chmod)
820*(.text.sys_chdir)
821*(.text.sprintf)
822*(.text.sock_wfree)
823*(.text.sock_aio_write)
824*(.text.skb_drop_fraglist)
825*(.text.skb_dequeue)
826*(.text.set_close_on_exec)
827*(.text.set_brk)
828*(.text.seq_puts)
829*(.text.SELECT_DRIVE)
830*(.text.sched_exec)
831*(.text.return_EIO)
832*(.text.remove_from_page_cache)
833*(.text.rcu_start_batch)
834*(.text.__put_task_struct)
835*(.text.proc_pid_readdir)
836*(.text.proc_get_inode)
837*(.text.prepare_to_wait_exclusive)
838*(.text.pipe_wait)
839*(.text.pipe_new)
840*(.text.pdflush_operation)
841*(.text.__pagevec_release)
842*(.text.pagevec_lookup_tag)
843*(.text.packet_rcv)
844*(.text.n_tty_set_room)
845*(.text.nr_free_pages)
846*(.text.__net_timestamp)
847*(.text.mpage_end_io_read)
848*(.text.mod_timer)
849*(.text.__memcpy)
850*(.text.mb_cache_shrink_fn)
851*(.text.lock_rename)
852*(.text.kstrdup)
853*(.text.is_ignored)
854*(.text.int_very_careful)
855*(.text.inotify_inode_is_dead)
856*(.text.inotify_get_cookie)
857*(.text.inode_get_bytes)
858*(.text.init_timer)
859*(.text.init_dev)
860*(.text.inet_getname)
861*(.text.ide_map_sg)
862*(.text.__ide_dma_end)
863*(.text.hrtimer_get_remaining)
864*(.text.get_task_mm)
865*(.text.get_random_int)
866*(.text.free_pipe_info)
867*(.text.filemap_write_and_wait_range)
868*(.text.exit_thread)
869*(.text.enter_idle)
870*(.text.end_that_request_first)
871*(.text.end_8259A_irq)
872*(.text.dummy_file_alloc_security)
873*(.text.do_group_exit)
874*(.text.debug_mutex_init)
875*(.text.cpuset_exit)
876*(.text.cpu_idle)
877*(.text.copy_semundo)
878*(.text.copy_files)
879*(.text.chrdev_open)
880*(.text.cdrom_transfer_packet_command)
881*(.text.cdrom_mode_sense)
882*(.text.blk_phys_contig_segment)
883*(.text.blk_get_queue)
884*(.text.bio_split)
885*(.text.audit_alloc)
886*(.text.anon_pipe_buf_release)
887*(.text.add_wait_queue_exclusive)
888*(.text.add_wait_queue)
889*(.text.acct_process)
890*(.text.account)
891*(.text.zeromap_page_range)
892*(.text.yield)
893*(.text.writeback_acquire)
894*(.text.worker_thread)
895*(.text.wait_on_page_writeback_range)
896*(.text.__wait_on_buffer)
897*(.text.vscnprintf)
898*(.text.vmalloc_to_pfn)
899*(.text.vgacon_save_screen)
900*(.text.vfs_unlink)
901*(.text.vfs_rmdir)
902*(.text.unregister_md_personality)
903*(.text.unlock_new_inode)
904*(.text.unix_stream_sendmsg)
905*(.text.unix_stream_recvmsg)
906*(.text.unhash_process)
907*(.text.udp_v4_lookup_longway)
908*(.text.tty_ldisc_flush)
909*(.text.tty_ldisc_enable)
910*(.text.tty_hung_up_p)
911*(.text.tty_buffer_free_all)
912*(.text.tso_fragment)
913*(.text.try_to_del_timer_sync)
914*(.text.tcp_v4_err)
915*(.text.tcp_unhash)
916*(.text.tcp_seq_next)
917*(.text.tcp_select_initial_window)
918*(.text.tcp_sacktag_write_queue)
919*(.text.tcp_cwnd_validate)
920*(.text.sys_vhangup)
921*(.text.sys_uselib)
922*(.text.sys_symlink)
923*(.text.sys_signal)
924*(.text.sys_poll)
925*(.text.sys_mount)
926*(.text.sys_kill)
927*(.text.sys_ioctl)
928*(.text.sys_inotify_add_watch)
929*(.text.sys_getuid)
930*(.text.sys_getrlimit)
931*(.text.sys_getitimer)
932*(.text.sys_getgroups)
933*(.text.sys_ftruncate)
934*(.text.sysfs_lookup)
935*(.text.sys_exit_group)
936*(.text.stub_fork)
937*(.text.sscanf)
938*(.text.sock_map_fd)
939*(.text.sock_get_timestamp)
940*(.text.__sock_create)
941*(.text.smp_call_function_single)
942*(.text.sk_stop_timer)
943*(.text.skb_copy_and_csum_datagram)
944*(.text.__skb_checksum_complete)
945*(.text.single_next)
946*(.text.sigqueue_alloc)
947*(.text.shrink_dcache_parent)
948*(.text.select_idle_routine)
949*(.text.run_workqueue)
950*(.text.run_local_timers)
951*(.text.remove_inode_hash)
952*(.text.remove_dquot_ref)
953*(.text.register_binfmt)
954*(.text.read_cache_pages)
955*(.text.rb_last)
956*(.text.pty_open)
957*(.text.proc_root_readdir)
958*(.text.proc_pid_flush)
959*(.text.proc_pident_lookup)
960*(.text.proc_fill_super)
961*(.text.proc_exe_link)
962*(.text.posix_locks_deadlock)
963*(.text.pipe_iov_copy_from_user)
964*(.text.opost)
965*(.text.nf_register_hook)
966*(.text.netif_rx_ni)
967*(.text.m_start)
968*(.text.mpage_writepage)
969*(.text.mm_alloc)
970*(.text.memory_open)
971*(.text.mark_buffer_async_write)
972*(.text.lru_add_drain_all)
973*(.text.locks_init_lock)
974*(.text.locks_delete_lock)
975*(.text.lock_hrtimer_base)
976*(.text.load_script)
977*(.text.__kill_fasync)
978*(.text.ip_mc_sf_allow)
979*(.text.__ioremap)
980*(.text.int_with_check)
981*(.text.int_sqrt)
982*(.text.install_thread_keyring)
983*(.text.init_page_buffers)
984*(.text.inet_sock_destruct)
985*(.text.idle_notifier_register)
986*(.text.ide_execute_command)
987*(.text.ide_end_drive_cmd)
988*(.text.__ide_dma_host_on)
989*(.text.hrtimer_run_queues)
990*(.text.hpet_mask_rtc_irq_bit)
991*(.text.__get_zone_counts)
992*(.text.get_zone_counts)
993*(.text.get_write_access)
994*(.text.get_fs_struct)
995*(.text.get_dirty_limits)
996*(.text.generic_readlink)
997*(.text.free_hot_page)
998*(.text.finish_wait)
999*(.text.find_inode)
1000*(.text.find_first_bit)
1001*(.text.__filemap_fdatawrite_range)
1002*(.text.__filemap_copy_from_user_iovec)
1003*(.text.exit_aio)
1004*(.text.elv_set_request)
1005*(.text.elv_former_request)
1006*(.text.dup_namespace)
1007*(.text.dupfd)
1008*(.text.dummy_socket_getsockopt)
1009*(.text.dummy_sb_post_mountroot)
1010*(.text.dummy_quotactl)
1011*(.text.dummy_inode_rename)
1012*(.text.__do_SAK)
1013*(.text.do_pipe)
1014*(.text.do_fsync)
1015*(.text.d_instantiate_unique)
1016*(.text.d_find_alias)
1017*(.text.deny_write_access)
1018*(.text.dentry_unhash)
1019*(.text.d_delete)
1020*(.text.datagram_poll)
1021*(.text.cpuset_fork)
1022*(.text.cpuid_read)
1023*(.text.copy_namespace)
1024*(.text.cond_resched)
1025*(.text.check_version)
1026*(.text.__change_page_attr)
1027*(.text.cfq_slab_kill)
1028*(.text.cfq_completed_request)
1029*(.text.cdrom_pc_intr)
1030*(.text.cdrom_decode_status)
1031*(.text.cap_capset_check)
1032*(.text.blk_put_request)
1033*(.text.bio_fs_destructor)
1034*(.text.bictcp_min_cwnd)
1035*(.text.alloc_chrdev_region)
1036*(.text.add_element)
1037*(.text.acct_update_integrals)
1038*(.text.write_boundary_block)
1039*(.text.writeback_release)
1040*(.text.writeback_inodes)
1041*(.text.wake_up_state)
1042*(.text.__wake_up_locked)
1043*(.text.wake_futex)
1044*(.text.wait_task_inactive)
1045*(.text.__wait_on_freeing_inode)
1046*(.text.wait_noreap_copyout)
1047*(.text.vmstat_start)
1048*(.text.vgacon_do_font_op)
1049*(.text.vfs_readv)
1050*(.text.vfs_quota_sync)
1051*(.text.update_queue)
1052*(.text.unshare_files)
1053*(.text.unmap_vm_area)
1054*(.text.unix_socketpair)
1055*(.text.unix_release_sock)
1056*(.text.unix_detach_fds)
1057*(.text.unix_create1)
1058*(.text.unix_bind)
1059*(.text.udp_sendmsg)
1060*(.text.udp_rcv)
1061*(.text.udp_queue_rcv_skb)
1062*(.text.uart_write)
1063*(.text.uart_startup)
1064*(.text.uart_open)
1065*(.text.tty_vhangup)
1066*(.text.tty_termios_baud_rate)
1067*(.text.tty_release)
1068*(.text.tty_ldisc_ref)
1069*(.text.throttle_vm_writeout)
1070*(.text.058)
1071*(.text.tcp_xmit_probe_skb)
1072*(.text.tcp_v4_send_check)
1073*(.text.tcp_v4_destroy_sock)
1074*(.text.tcp_sync_mss)
1075*(.text.tcp_snd_test)
1076*(.text.tcp_slow_start)
1077*(.text.tcp_send_fin)
1078*(.text.tcp_rtt_estimator)
1079*(.text.tcp_parse_options)
1080*(.text.tcp_ioctl)
1081*(.text.tcp_init_tso_segs)
1082*(.text.tcp_init_cwnd)
1083*(.text.tcp_getsockopt)
1084*(.text.tcp_fin)
1085*(.text.tcp_connect)
1086*(.text.tcp_cong_avoid)
1087*(.text.__tcp_checksum_complete_user)
1088*(.text.task_dumpable)
1089*(.text.sys_wait4)
1090*(.text.sys_utimes)
1091*(.text.sys_symlinkat)
1092*(.text.sys_socketpair)
1093*(.text.sys_rmdir)
1094*(.text.sys_readahead)
1095*(.text.sys_nanosleep)
1096*(.text.sys_linkat)
1097*(.text.sys_fstat)
1098*(.text.sysfs_readdir)
1099*(.text.sys_execve)
1100*(.text.sysenter_tracesys)
1101*(.text.sys_chown)
1102*(.text.stub_clone)
1103*(.text.strrchr)
1104*(.text.strncpy)
1105*(.text.stopmachine_set_state)
1106*(.text.sock_sendmsg)
1107*(.text.sock_release)
1108*(.text.sock_fasync)
1109*(.text.sock_close)
1110*(.text.sk_stream_write_space)
1111*(.text.sk_reset_timer)
1112*(.text.skb_split)
1113*(.text.skb_recv_datagram)
1114*(.text.skb_queue_tail)
1115*(.text.sk_attach_filter)
1116*(.text.si_swapinfo)
1117*(.text.simple_strtoll)
1118*(.text.set_termios)
1119*(.text.set_task_comm)
1120*(.text.set_shrinker)
1121*(.text.set_normalized_timespec)
1122*(.text.set_brk)
1123*(.text.serial_in)
1124*(.text.seq_printf)
1125*(.text.secure_dccp_sequence_number)
1126*(.text.rwlock_bug)
1127*(.text.rt_hash_code)
1128*(.text.__rta_fill)
1129*(.text.__request_resource)
1130*(.text.relocate_new_kernel)
1131*(.text.release_thread)
1132*(.text.release_mem)
1133*(.text.rb_prev)
1134*(.text.rb_first)
1135*(.text.random_poll)
1136*(.text.__put_super_and_need_restart)
1137*(.text.pty_write)
1138*(.text.ptrace_stop)
1139*(.text.proc_self_readlink)
1140*(.text.proc_root_lookup)
1141*(.text.proc_root_link)
1142*(.text.proc_pid_make_inode)
1143*(.text.proc_pid_attr_write)
1144*(.text.proc_lookupfd)
1145*(.text.proc_delete_inode)
1146*(.text.posix_same_owner)
1147*(.text.posix_block_lock)
1148*(.text.poll_initwait)
1149*(.text.pipe_write)
1150*(.text.pipe_read_fasync)
1151*(.text.pipe_ioctl)
1152*(.text.pdflush)
1153*(.text.pci_user_read_config_dword)
1154*(.text.page_readlink)
1155*(.text.null_lseek)
1156*(.text.nf_hook_slow)
1157*(.text.netlink_sock_destruct)
1158*(.text.netlink_broadcast)
1159*(.text.neigh_resolve_output)
1160*(.text.name_to_int)
1161*(.text.mwait_idle)
1162*(.text.mutex_trylock)
1163*(.text.mutex_debug_check_no_locks_held)
1164*(.text.m_stop)
1165*(.text.mpage_end_io_write)
1166*(.text.mpage_alloc)
1167*(.text.move_page_tables)
1168*(.text.mounts_open)
1169*(.text.__memset)
1170*(.text.memcpy_fromiovec)
1171*(.text.make_8259A_irq)
1172*(.text.lookup_user_key_possessed)
1173*(.text.lookup_create)
1174*(.text.locks_insert_lock)
1175*(.text.locks_alloc_lock)
1176*(.text.kthread_should_stop)
1177*(.text.kswapd)
1178*(.text.kobject_uevent)
1179*(.text.kobject_get_path)
1180*(.text.kobject_get)
1181*(.text.klist_children_put)
1182*(.text.__ip_route_output_key)
1183*(.text.ip_flush_pending_frames)
1184*(.text.ip_compute_csum)
1185*(.text.ip_append_data)
1186*(.text.ioc_set_batching)
1187*(.text.invalidate_inode_pages)
1188*(.text.__invalidate_device)
1189*(.text.install_arg_page)
1190*(.text.in_sched_functions)
1191*(.text.inotify_unmount_inodes)
1192*(.text.init_once)
1193*(.text.init_cdrom_command)
1194*(.text.inet_stream_connect)
1195*(.text.inet_sk_rebuild_header)
1196*(.text.inet_csk_addr2sockaddr)
1197*(.text.inet_create)
1198*(.text.ifind)
1199*(.text.ide_setup_dma)
1200*(.text.ide_outsw)
1201*(.text.ide_fixstring)
1202*(.text.ide_dma_setup)
1203*(.text.ide_cdrom_packet)
1204*(.text.ide_cd_put)
1205*(.text.ide_build_sglist)
1206*(.text.i8259A_shutdown)
1207*(.text.hung_up_tty_ioctl)
1208*(.text.hrtimer_nanosleep)
1209*(.text.hrtimer_init)
1210*(.text.hrtimer_cancel)
1211*(.text.hash_futex)
1212*(.text.group_send_sig_info)
1213*(.text.grab_cache_page_nowait)
1214*(.text.get_wchan)
1215*(.text.get_stack)
1216*(.text.get_page_state)
1217*(.text.getnstimeofday)
1218*(.text.get_node)
1219*(.text.get_kprobe)
1220*(.text.generic_unplug_device)
1221*(.text.free_task)
1222*(.text.frag_show)
1223*(.text.find_next_zero_string)
1224*(.text.filp_open)
1225*(.text.fillonedir)
1226*(.text.exit_io_context)
1227*(.text.exit_idle)
1228*(.text.exact_lock)
1229*(.text.eth_header)
1230*(.text.dummy_unregister_security)
1231*(.text.dummy_socket_post_create)
1232*(.text.dummy_socket_listen)
1233*(.text.dummy_quota_on)
1234*(.text.dummy_inode_follow_link)
1235*(.text.dummy_file_receive)
1236*(.text.dummy_file_mprotect)
1237*(.text.dummy_file_lock)
1238*(.text.dummy_file_ioctl)
1239*(.text.dummy_bprm_post_apply_creds)
1240*(.text.do_writepages)
1241*(.text.__down_interruptible)
1242*(.text.do_notify_resume)
1243*(.text.do_acct_process)
1244*(.text.del_timer_sync)
1245*(.text.default_rebuild_header)
1246*(.text.d_callback)
1247*(.text.dcache_readdir)
1248*(.text.ctrl_dumpfamily)
1249*(.text.cpuset_rmdir)
1250*(.text.copy_strings_kernel)
1251*(.text.con_write_room)
1252*(.text.complete_all)
1253*(.text.collect_sigign_sigcatch)
1254*(.text.clear_user)
1255*(.text.check_unthrottle)
1256*(.text.cdrom_release)
1257*(.text.cdrom_newpc_intr)
1258*(.text.cdrom_ioctl)
1259*(.text.cdrom_check_status)
1260*(.text.cdev_put)
1261*(.text.cdev_add)
1262*(.text.cap_ptrace)
1263*(.text.cap_bprm_secureexec)
1264*(.text.cache_alloc_refill)
1265*(.text.bmap)
1266*(.text.blk_run_queue)
1267*(.text.blk_queue_dma_alignment)
1268*(.text.blk_ordered_req_seq)
1269*(.text.blk_backing_dev_unplug)
1270*(.text.__bitmap_subset)
1271*(.text.__bitmap_and)
1272*(.text.bio_unmap_user)
1273*(.text.__bforget)
1274*(.text.bd_forget)
1275*(.text.bad_pipe_w)
1276*(.text.bad_get_user)
1277*(.text.audit_free)
1278*(.text.anon_vma_ctor)
1279*(.text.anon_pipe_buf_map)
1280*(.text.alloc_sock_iocb)
1281*(.text.alloc_fdset)
1282*(.text.aio_kick_handler)
1283*(.text.__add_entropy_words)
1284*(.text.add_disk_randomness)
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 0b3603adf56d..47496a40e84f 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -11,120 +11,54 @@
11#include <linux/threads.h> 11#include <linux/threads.h>
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/module.h>
14#include <linux/kernel.h> 15#include <linux/kernel.h>
15#include <linux/ctype.h> 16#include <linux/ctype.h>
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/module.h>
18 18
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/ipi.h> 20#include <asm/ipi.h>
21#include <asm/genapic.h>
21 22
22#if defined(CONFIG_ACPI) 23#ifdef CONFIG_ACPI
23#include <acpi/acpi_bus.h> 24#include <acpi/acpi_bus.h>
24#endif 25#endif
25 26
26/* which logical CPU number maps to which CPU (physical APIC ID) */ 27/* which logical CPU number maps to which CPU (physical APIC ID) */
27u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 28u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
29 = { [0 ... NR_CPUS-1] = BAD_APICID };
28EXPORT_SYMBOL(x86_cpu_to_apicid); 30EXPORT_SYMBOL(x86_cpu_to_apicid);
29u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
30 31
31extern struct genapic apic_cluster; 32u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
32extern struct genapic apic_flat;
33extern struct genapic apic_physflat;
34 33
35struct genapic *genapic = &apic_flat; 34struct genapic __read_mostly *genapic = &apic_flat;
36struct genapic *genapic_force;
37 35
38/* 36/*
39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 37 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
40 */ 38 */
41void __init clustered_apic_check(void) 39void __init setup_apic_routing(void)
42{ 40{
43 long i; 41#ifdef CONFIG_ACPI
44 u8 clusters, max_cluster;
45 u8 id;
46 u8 cluster_cnt[NUM_APIC_CLUSTERS];
47 int max_apic = 0;
48
49 /* genapic selection can be forced because of certain quirks.
50 */
51 if (genapic_force) {
52 genapic = genapic_force;
53 goto print;
54 }
55
56#if defined(CONFIG_ACPI)
57 /* 42 /*
58 * Some x86_64 machines use physical APIC mode regardless of how many 43 * Quirk: some x86_64 machines can only use physical APIC mode
59 * procs/clusters are present (x86_64 ES7000 is an example). 44 * regardless of how many processors are present (x86_64 ES7000
45 * is an example).
60 */ 46 */
61 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) 47 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
62 if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { 48 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
63 genapic = &apic_cluster;
64 goto print;
65 }
66#endif
67
68 memset(cluster_cnt, 0, sizeof(cluster_cnt));
69 for (i = 0; i < NR_CPUS; i++) {
70 id = bios_cpu_apicid[i];
71 if (id == BAD_APICID)
72 continue;
73 if (id > max_apic)
74 max_apic = id;
75 cluster_cnt[APIC_CLUSTERID(id)]++;
76 }
77
78 /* Don't use clustered mode on AMD platforms. */
79 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
80 genapic = &apic_physflat; 49 genapic = &apic_physflat;
81#ifndef CONFIG_HOTPLUG_CPU 50 else
82 /* In the CPU hotplug case we cannot use broadcast mode
83 because that opens a race when a CPU is removed.
84 Stay at physflat mode in this case.
85 It is bad to do this unconditionally though. Once
86 we have ACPI platform support for CPU hotplug
87 we should detect hotplug capablity from ACPI tables and
88 only do this when really needed. -AK */
89 if (max_apic <= 8)
90 genapic = &apic_flat;
91#endif 51#endif
92 goto print;
93 }
94 52
95 clusters = 0; 53 if (cpus_weight(cpu_possible_map) <= 8)
96 max_cluster = 0;
97
98 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
99 if (cluster_cnt[i] > 0) {
100 ++clusters;
101 if (cluster_cnt[i] > max_cluster)
102 max_cluster = cluster_cnt[i];
103 }
104 }
105
106 /*
107 * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
108 * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
109 * else physical mode.
110 * (We don't use lowest priority delivery + HW APIC IRQ steering, so
111 * can ignore the clustered logical case and go straight to physical.)
112 */
113 if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
114#ifdef CONFIG_HOTPLUG_CPU
115 /* Don't use APIC shortcuts in CPU hotplug to avoid races */
116 genapic = &apic_physflat;
117#else
118 genapic = &apic_flat; 54 genapic = &apic_flat;
119#endif 55 else
120 } else 56 genapic = &apic_physflat;
121 genapic = &apic_cluster;
122 57
123print:
124 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 58 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
125} 59}
126 60
127/* Same for both flat and clustered. */ 61/* Same for both flat and physical. */
128 62
129void send_IPI_self(int vector) 63void send_IPI_self(int vector)
130{ 64{
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
deleted file mode 100644
index 73d76308b955..000000000000
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ /dev/null
@@ -1,137 +0,0 @@
1/*
2 * Copyright 2004 James Cleverdon, IBM.
3 * Subject to the GNU Public License, v.2
4 *
5 * Clustered APIC subarch code. Up to 255 CPUs, physical delivery.
6 * (A more realistic maximum is around 230 CPUs.)
7 *
8 * Hacked for x86-64 by James Cleverdon from i386 architecture code by
9 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
10 * James Cleverdon.
11 */
12#include <linux/threads.h>
13#include <linux/cpumask.h>
14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h>
17#include <linux/init.h>
18#include <asm/smp.h>
19#include <asm/ipi.h>
20
21
22/*
23 * Set up the logical destination ID.
24 *
25 * Intel recommends to set DFR, LDR and TPR before enabling
26 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
27 * document number 292116). So here it goes...
28 */
29static void cluster_init_apic_ldr(void)
30{
31 unsigned long val, id;
32 long i, count;
33 u8 lid;
34 u8 my_id = hard_smp_processor_id();
35 u8 my_cluster = APIC_CLUSTER(my_id);
36
37 /* Create logical APIC IDs by counting CPUs already in cluster. */
38 for (count = 0, i = NR_CPUS; --i >= 0; ) {
39 lid = x86_cpu_to_log_apicid[i];
40 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
41 ++count;
42 }
43 /*
44 * We only have a 4 wide bitmap in cluster mode. There's no way
45 * to get above 60 CPUs and still give each one it's own bit.
46 * But, we're using physical IRQ delivery, so we don't care.
47 * Use bit 3 for the 4th through Nth CPU in each cluster.
48 */
49 if (count >= XAPIC_DEST_CPUS_SHIFT)
50 count = 3;
51 id = my_cluster | (1UL << count);
52 x86_cpu_to_log_apicid[smp_processor_id()] = id;
53 apic_write(APIC_DFR, APIC_DFR_CLUSTER);
54 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
55 val |= SET_APIC_LOGICAL_ID(id);
56 apic_write(APIC_LDR, val);
57}
58
59/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
60
61static cpumask_t cluster_target_cpus(void)
62{
63 return cpumask_of_cpu(0);
64}
65
66static cpumask_t cluster_vector_allocation_domain(int cpu)
67{
68 cpumask_t domain = CPU_MASK_NONE;
69 cpu_set(cpu, domain);
70 return domain;
71}
72
73static void cluster_send_IPI_mask(cpumask_t mask, int vector)
74{
75 send_IPI_mask_sequence(mask, vector);
76}
77
78static void cluster_send_IPI_allbutself(int vector)
79{
80 cpumask_t mask = cpu_online_map;
81
82 cpu_clear(smp_processor_id(), mask);
83
84 if (!cpus_empty(mask))
85 cluster_send_IPI_mask(mask, vector);
86}
87
88static void cluster_send_IPI_all(int vector)
89{
90 cluster_send_IPI_mask(cpu_online_map, vector);
91}
92
93static int cluster_apic_id_registered(void)
94{
95 return 1;
96}
97
98static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask)
99{
100 int cpu;
101
102 /*
103 * We're using fixed IRQ delivery, can only return one phys APIC ID.
104 * May as well be the first.
105 */
106 cpu = first_cpu(cpumask);
107 if ((unsigned)cpu < NR_CPUS)
108 return x86_cpu_to_apicid[cpu];
109 else
110 return BAD_APICID;
111}
112
113/* cpuid returns the value latched in the HW at reset, not the APIC ID
114 * register's value. For any box whose BIOS changes APIC IDs, like
115 * clustered APIC systems, we must use hard_smp_processor_id.
116 *
117 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
118 */
119static unsigned int phys_pkg_id(int index_msb)
120{
121 return hard_smp_processor_id() >> index_msb;
122}
123
124struct genapic apic_cluster = {
125 .name = "clustered",
126 .int_delivery_mode = dest_Fixed,
127 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
128 .target_cpus = cluster_target_cpus,
129 .vector_allocation_domain = cluster_vector_allocation_domain,
130 .apic_id_registered = cluster_apic_id_registered,
131 .init_apic_ldr = cluster_init_apic_ldr,
132 .send_IPI_all = cluster_send_IPI_all,
133 .send_IPI_allbutself = cluster_send_IPI_allbutself,
134 .send_IPI_mask = cluster_send_IPI_mask,
135 .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid,
136 .phys_pkg_id = phys_pkg_id,
137};
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 7c01db8fa9d1..ecb01eefdd27 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -8,6 +8,7 @@
8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and 8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
9 * James Cleverdon. 9 * James Cleverdon.
10 */ 10 */
11#include <linux/errno.h>
11#include <linux/threads.h> 12#include <linux/threads.h>
12#include <linux/cpumask.h> 13#include <linux/cpumask.h>
13#include <linux/string.h> 14#include <linux/string.h>
@@ -16,6 +17,7 @@
16#include <linux/init.h> 17#include <linux/init.h>
17#include <asm/smp.h> 18#include <asm/smp.h>
18#include <asm/ipi.h> 19#include <asm/ipi.h>
20#include <asm/genapic.h>
19 21
20static cpumask_t flat_target_cpus(void) 22static cpumask_t flat_target_cpus(void)
21{ 23{
@@ -60,31 +62,10 @@ static void flat_init_apic_ldr(void)
60static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 62static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
61{ 63{
62 unsigned long mask = cpus_addr(cpumask)[0]; 64 unsigned long mask = cpus_addr(cpumask)[0];
63 unsigned long cfg;
64 unsigned long flags; 65 unsigned long flags;
65 66
66 local_irq_save(flags); 67 local_irq_save(flags);
67 68 __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
68 /*
69 * Wait for idle.
70 */
71 apic_wait_icr_idle();
72
73 /*
74 * prepare target chip field
75 */
76 cfg = __prepare_ICR2(mask);
77 apic_write(APIC_ICR2, cfg);
78
79 /*
80 * program the ICR
81 */
82 cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL);
83
84 /*
85 * Send the IPI. The write to APIC_ICR fires this off.
86 */
87 apic_write(APIC_ICR, cfg);
88 local_irq_restore(flags); 69 local_irq_restore(flags);
89} 70}
90 71
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 598a4d0351fc..1fab487dee86 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> 6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> 7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
8 * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
8 */ 9 */
9 10
10 11
@@ -13,97 +14,131 @@
13#include <linux/init.h> 14#include <linux/init.h>
14#include <asm/desc.h> 15#include <asm/desc.h>
15#include <asm/segment.h> 16#include <asm/segment.h>
17#include <asm/pgtable.h>
16#include <asm/page.h> 18#include <asm/page.h>
17#include <asm/msr.h> 19#include <asm/msr.h>
18#include <asm/cache.h> 20#include <asm/cache.h>
19 21
20/* we are not able to switch in one step to the final KERNEL ADRESS SPACE 22/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
21 * because we need identity-mapped pages on setup so define __START_KERNEL to 23 * because we need identity-mapped pages.
22 * 0x100000 for this stage 24 *
23 *
24 */ 25 */
25 26
26 .text 27 .text
27 .section .bootstrap.text 28 .section .bootstrap.text
28 .code32 29 .code64
29 .globl startup_32 30 .globl startup_64
30/* %bx: 1 if coming from smp trampoline on secondary cpu */ 31startup_64:
31startup_32: 32
32
33 /* 33 /*
34 * At this point the CPU runs in 32bit protected mode (CS.D = 1) with 34 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
35 * paging disabled and the point of this file is to switch to 64bit 35 * and someone has loaded an identity mapped page table
36 * long mode with a kernel mapping for kerneland to jump into the 36 * for us. These identity mapped page tables map all of the
37 * kernel virtual addresses. 37 * kernel pages and possibly all of memory.
38 * There is no stack until we set one up. 38 *
39 * %esi holds a physical pointer to real_mode_data.
40 *
41 * We come here either directly from a 64bit bootloader, or from
42 * arch/x86_64/boot/compressed/head.S.
43 *
44 * We only come here initially at boot nothing else comes here.
45 *
46 * Since we may be loaded at an address different from what we were
47 * compiled to run at we first fixup the physical addresses in our page
48 * tables and then reload them.
39 */ 49 */
40 50
41 /* Initialize the %ds segment register */ 51 /* Compute the delta between the address I am compiled to run at and the
42 movl $__KERNEL_DS,%eax 52 * address I am actually running at.
43 movl %eax,%ds
44
45 /* Load new GDT with the 64bit segments using 32bit descriptor */
46 lgdt pGDT32 - __START_KERNEL_map
47
48 /* If the CPU doesn't support CPUID this will double fault.
49 * Unfortunately it is hard to check for CPUID without a stack.
50 */ 53 */
51 54 leaq _text(%rip), %rbp
52 /* Check if extended functions are implemented */ 55 subq $_text - __START_KERNEL_map, %rbp
53 movl $0x80000000, %eax 56
54 cpuid 57 /* Is the address not 2M aligned? */
55 cmpl $0x80000000, %eax 58 movq %rbp, %rax
56 jbe no_long_mode 59 andl $~LARGE_PAGE_MASK, %eax
57 /* Check if long mode is implemented */ 60 testl %eax, %eax
58 mov $0x80000001, %eax 61 jnz bad_address
59 cpuid 62
60 btl $29, %edx 63 /* Is the address too large? */
61 jnc no_long_mode 64 leaq _text(%rip), %rdx
62 65 movq $PGDIR_SIZE, %rax
63 /* 66 cmpq %rax, %rdx
64 * Prepare for entering 64bits mode 67 jae bad_address
68
69 /* Fixup the physical addresses in the page table
65 */ 70 */
71 addq %rbp, init_level4_pgt + 0(%rip)
72 addq %rbp, init_level4_pgt + (258*8)(%rip)
73 addq %rbp, init_level4_pgt + (511*8)(%rip)
74
75 addq %rbp, level3_ident_pgt + 0(%rip)
76 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
77
78 /* Add an Identity mapping if I am above 1G */
79 leaq _text(%rip), %rdi
80 andq $LARGE_PAGE_MASK, %rdi
81
82 movq %rdi, %rax
83 shrq $PUD_SHIFT, %rax
84 andq $(PTRS_PER_PUD - 1), %rax
85 jz ident_complete
86
87 leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
88 leaq level3_ident_pgt(%rip), %rbx
89 movq %rdx, 0(%rbx, %rax, 8)
90
91 movq %rdi, %rax
92 shrq $PMD_SHIFT, %rax
93 andq $(PTRS_PER_PMD - 1), %rax
94 leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
95 leaq level2_spare_pgt(%rip), %rbx
96 movq %rdx, 0(%rbx, %rax, 8)
97ident_complete:
98
99 /* Fixup the kernel text+data virtual addresses
100 */
101 leaq level2_kernel_pgt(%rip), %rdi
102 leaq 4096(%rdi), %r8
103 /* See if it is a valid page table entry */
1041: testq $1, 0(%rdi)
105 jz 2f
106 addq %rbp, 0(%rdi)
107 /* Go to the next page */
1082: addq $8, %rdi
109 cmp %r8, %rdi
110 jne 1b
111
112 /* Fixup phys_base */
113 addq %rbp, phys_base(%rip)
66 114
67 /* Enable PAE mode */ 115#ifdef CONFIG_SMP
68 xorl %eax, %eax 116 addq %rbp, trampoline_level4_pgt + 0(%rip)
69 btsl $5, %eax 117 addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
70 movl %eax, %cr4 118#endif
71 119#ifdef CONFIG_ACPI_SLEEP
72 /* Setup early boot stage 4 level pagetables */ 120 addq %rbp, wakeup_level4_pgt + 0(%rip)
73 movl $(boot_level4_pgt - __START_KERNEL_map), %eax 121 addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
74 movl %eax, %cr3 122#endif
75
76 /* Setup EFER (Extended Feature Enable Register) */
77 movl $MSR_EFER, %ecx
78 rdmsr
79
80 /* Enable Long Mode */
81 btsl $_EFER_LME, %eax
82
83 /* Make changes effective */
84 wrmsr
85 123
86 xorl %eax, %eax 124 /* Due to ENTRY(), sometimes the empty space gets filled with
87 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 125 * zeros. Better take a jmp than relying on empty space being
88 btsl $0, %eax /* Enable protected mode */ 126 * filled with 0x90 (nop)
89 /* Make changes effective */
90 movl %eax, %cr0
91 /*
92 * At this point we're in long mode but in 32bit compatibility mode
93 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
94 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
95 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
96 */ 127 */
97 ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) 128 jmp secondary_startup_64
98 129ENTRY(secondary_startup_64)
99 .code64 130 /*
100 .org 0x100 131 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
101 .globl startup_64 132 * and someone has loaded a mapped page table.
102startup_64: 133 *
103 /* We come here either from startup_32 134 * %esi holds a physical pointer to real_mode_data.
104 * or directly from a 64bit bootloader. 135 *
105 * Since we may have come directly from a bootloader we 136 * We come here either from startup_64 (using physical addresses)
106 * reload the page tables here. 137 * or from trampoline.S (using virtual addresses).
138 *
139 * Using virtual addresses from trampoline.S removes the need
140 * to have any identity mapped pages in the kernel page table
141 * after the boot processor executes this code.
107 */ 142 */
108 143
109 /* Enable PAE mode and PGE */ 144 /* Enable PAE mode and PGE */
@@ -113,9 +148,15 @@ startup_64:
113 movq %rax, %cr4 148 movq %rax, %cr4
114 149
115 /* Setup early boot stage 4 level pagetables. */ 150 /* Setup early boot stage 4 level pagetables. */
116 movq $(boot_level4_pgt - __START_KERNEL_map), %rax 151 movq $(init_level4_pgt - __START_KERNEL_map), %rax
152 addq phys_base(%rip), %rax
117 movq %rax, %cr3 153 movq %rax, %cr3
118 154
155 /* Ensure I am executing from virtual addresses */
156 movq $1f, %rax
157 jmp *%rax
1581:
159
119 /* Check if nx is implemented */ 160 /* Check if nx is implemented */
120 movl $0x80000001, %eax 161 movl $0x80000001, %eax
121 cpuid 162 cpuid
@@ -124,17 +165,11 @@ startup_64:
124 /* Setup EFER (Extended Feature Enable Register) */ 165 /* Setup EFER (Extended Feature Enable Register) */
125 movl $MSR_EFER, %ecx 166 movl $MSR_EFER, %ecx
126 rdmsr 167 rdmsr
127 168 btsl $_EFER_SCE, %eax /* Enable System Call */
128 /* Enable System Call */ 169 btl $20,%edi /* No Execute supported? */
129 btsl $_EFER_SCE, %eax
130
131 /* No Execute supported? */
132 btl $20,%edi
133 jnc 1f 170 jnc 1f
134 btsl $_EFER_NX, %eax 171 btsl $_EFER_NX, %eax
1351: 1721: wrmsr /* Make changes effective */
136 /* Make changes effective */
137 wrmsr
138 173
139 /* Setup cr0 */ 174 /* Setup cr0 */
140#define CR0_PM 1 /* protected mode */ 175#define CR0_PM 1 /* protected mode */
@@ -161,7 +196,7 @@ startup_64:
161 * addresses where we're currently running on. We have to do that here 196 * addresses where we're currently running on. We have to do that here
162 * because in 32bit we couldn't load a 64bit linear address. 197 * because in 32bit we couldn't load a 64bit linear address.
163 */ 198 */
164 lgdt cpu_gdt_descr 199 lgdt cpu_gdt_descr(%rip)
165 200
166 /* set up data segments. actually 0 would do too */ 201 /* set up data segments. actually 0 would do too */
167 movl $__KERNEL_DS,%eax 202 movl $__KERNEL_DS,%eax
@@ -212,6 +247,9 @@ initial_code:
212init_rsp: 247init_rsp:
213 .quad init_thread_union+THREAD_SIZE-8 248 .quad init_thread_union+THREAD_SIZE-8
214 249
250bad_address:
251 jmp bad_address
252
215ENTRY(early_idt_handler) 253ENTRY(early_idt_handler)
216 cmpl $2,early_recursion_flag(%rip) 254 cmpl $2,early_recursion_flag(%rip)
217 jz 1f 255 jz 1f
@@ -240,110 +278,66 @@ early_idt_msg:
240early_idt_ripmsg: 278early_idt_ripmsg:
241 .asciz "RIP %s\n" 279 .asciz "RIP %s\n"
242 280
243.code32 281.balign PAGE_SIZE
244ENTRY(no_long_mode)
245 /* This isn't an x86-64 CPU so hang */
2461:
247 jmp 1b
248
249.org 0xf00
250 .globl pGDT32
251pGDT32:
252 .word gdt_end-cpu_gdt_table-1
253 .long cpu_gdt_table-__START_KERNEL_map
254
255.org 0xf10
256ljumpvector:
257 .long startup_64-__START_KERNEL_map
258 .word __KERNEL_CS
259 282
260ENTRY(stext)
261ENTRY(_stext)
262
263 $page = 0
264#define NEXT_PAGE(name) \ 283#define NEXT_PAGE(name) \
265 $page = $page + 1; \ 284 .balign PAGE_SIZE; \
266 .org $page * 0x1000; \
267 phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
268ENTRY(name) 285ENTRY(name)
269 286
287/* Automate the creation of 1 to 1 mapping pmd entries */
288#define PMDS(START, PERM, COUNT) \
289 i = 0 ; \
290 .rept (COUNT) ; \
291 .quad (START) + (i << 21) + (PERM) ; \
292 i = i + 1 ; \
293 .endr
294
295 /*
296 * This default setting generates an ident mapping at address 0x100000
297 * and a mapping for the kernel that precisely maps virtual address
298 * 0xffffffff80000000 to physical address 0x000000. (always using
299 * 2Mbyte large pages provided by PAE mode)
300 */
270NEXT_PAGE(init_level4_pgt) 301NEXT_PAGE(init_level4_pgt)
271 /* This gets initialized in x86_64_start_kernel */ 302 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
272 .fill 512,8,0 303 .fill 257,8,0
304 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
305 .fill 252,8,0
306 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
307 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
273 308
274NEXT_PAGE(level3_ident_pgt) 309NEXT_PAGE(level3_ident_pgt)
275 .quad phys_level2_ident_pgt | 0x007 310 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
276 .fill 511,8,0 311 .fill 511,8,0
277 312
278NEXT_PAGE(level3_kernel_pgt) 313NEXT_PAGE(level3_kernel_pgt)
279 .fill 510,8,0 314 .fill 510,8,0
280 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 315 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
281 .quad phys_level2_kernel_pgt | 0x007 316 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
282 .fill 1,8,0 317 .fill 1,8,0
283 318
284NEXT_PAGE(level2_ident_pgt) 319NEXT_PAGE(level2_ident_pgt)
285 /* 40MB for bootup. */ 320 /* Since I easily can, map the first 1G.
286 i = 0 321 * Don't set NX because code runs from these pages.
287 .rept 20 322 */
288 .quad i << 21 | 0x083 323 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
289 i = i + 1 324
290 .endr
291 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
292 .globl temp_boot_pmds
293temp_boot_pmds:
294 .fill 492,8,0
295
296NEXT_PAGE(level2_kernel_pgt) 325NEXT_PAGE(level2_kernel_pgt)
297 /* 40MB kernel mapping. The kernel code cannot be bigger than that. 326 /* 40MB kernel mapping. The kernel code cannot be bigger than that.
298 When you change this change KERNEL_TEXT_SIZE in page.h too. */ 327 When you change this change KERNEL_TEXT_SIZE in page.h too. */
299 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ 328 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
300 i = 0 329 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
301 .rept 20 330 KERNEL_TEXT_SIZE/PMD_SIZE)
302 .quad i << 21 | 0x183
303 i = i + 1
304 .endr
305 /* Module mapping starts here */ 331 /* Module mapping starts here */
306 .fill 492,8,0 332 .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
307 333
308NEXT_PAGE(level3_physmem_pgt) 334NEXT_PAGE(level2_spare_pgt)
309 .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ 335 .fill 512,8,0
310 .fill 511,8,0
311 336
337#undef PMDS
312#undef NEXT_PAGE 338#undef NEXT_PAGE
313 339
314 .data 340 .data
315
316#ifdef CONFIG_ACPI_SLEEP
317 .align PAGE_SIZE
318ENTRY(wakeup_level4_pgt)
319 .quad phys_level3_ident_pgt | 0x007
320 .fill 255,8,0
321 .quad phys_level3_physmem_pgt | 0x007
322 .fill 254,8,0
323 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
324 .quad phys_level3_kernel_pgt | 0x007
325#endif
326
327#ifndef CONFIG_HOTPLUG_CPU
328 __INITDATA
329#endif
330 /*
331 * This default setting generates an ident mapping at address 0x100000
332 * and a mapping for the kernel that precisely maps virtual address
333 * 0xffffffff80000000 to physical address 0x000000. (always using
334 * 2Mbyte large pages provided by PAE mode)
335 */
336 .align PAGE_SIZE
337ENTRY(boot_level4_pgt)
338 .quad phys_level3_ident_pgt | 0x007
339 .fill 255,8,0
340 .quad phys_level3_physmem_pgt | 0x007
341 .fill 254,8,0
342 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
343 .quad phys_level3_kernel_pgt | 0x007
344
345 .data
346
347 .align 16 341 .align 16
348 .globl cpu_gdt_descr 342 .globl cpu_gdt_descr
349cpu_gdt_descr: 343cpu_gdt_descr:
@@ -357,6 +351,10 @@ gdt:
357 .endr 351 .endr
358#endif 352#endif
359 353
354ENTRY(phys_base)
355 /* This must match the first entry in level2_kernel_pgt */
356 .quad 0x0000000000000000
357
360/* We need valid kernel segments for data and code in long mode too 358/* We need valid kernel segments for data and code in long mode too
361 * IRET will check the segment types kkeil 2000/10/28 359 * IRET will check the segment types kkeil 2000/10/28
362 * Also sysret mandates a special GDT layout 360 * Also sysret mandates a special GDT layout
@@ -370,13 +368,13 @@ gdt:
370 368
371ENTRY(cpu_gdt_table) 369ENTRY(cpu_gdt_table)
372 .quad 0x0000000000000000 /* NULL descriptor */ 370 .quad 0x0000000000000000 /* NULL descriptor */
371 .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
372 .quad 0x00af9b000000ffff /* __KERNEL_CS */
373 .quad 0x00cf93000000ffff /* __KERNEL_DS */
374 .quad 0x00cffb000000ffff /* __USER32_CS */
375 .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
376 .quad 0x00affb000000ffff /* __USER_CS */
373 .quad 0x0 /* unused */ 377 .quad 0x0 /* unused */
374 .quad 0x00af9a000000ffff /* __KERNEL_CS */
375 .quad 0x00cf92000000ffff /* __KERNEL_DS */
376 .quad 0x00cffa000000ffff /* __USER32_CS */
377 .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
378 .quad 0x00affa000000ffff /* __USER_CS */
379 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
380 .quad 0,0 /* TSS */ 378 .quad 0,0 /* TSS */
381 .quad 0,0 /* LDT */ 379 .quad 0,0 /* LDT */
382 .quad 0,0,0 /* three TLS descriptors */ 380 .quad 0,0,0 /* three TLS descriptors */
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 5f197b0a330a..213d90e04755 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/pgtable.h> 20#include <asm/pgtable.h>
21#include <asm/tlbflush.h>
21#include <asm/sections.h> 22#include <asm/sections.h>
22 23
24static void __init zap_identity_mappings(void)
25{
26 pgd_t *pgd = pgd_offset_k(0UL);
27 pgd_clear(pgd);
28 __flush_tlb();
29}
30
23/* Don't add a printk in there. printk relies on the PDA which is not initialized 31/* Don't add a printk in there. printk relies on the PDA which is not initialized
24 yet. */ 32 yet. */
25static void __init clear_bss(void) 33static void __init clear_bss(void)
@@ -29,25 +37,24 @@ static void __init clear_bss(void)
29} 37}
30 38
31#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ 39#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
32#define OLD_CL_MAGIC_ADDR 0x90020 40#define OLD_CL_MAGIC_ADDR 0x20
33#define OLD_CL_MAGIC 0xA33F 41#define OLD_CL_MAGIC 0xA33F
34#define OLD_CL_BASE_ADDR 0x90000 42#define OLD_CL_OFFSET 0x22
35#define OLD_CL_OFFSET 0x90022
36 43
37static void __init copy_bootdata(char *real_mode_data) 44static void __init copy_bootdata(char *real_mode_data)
38{ 45{
39 int new_data; 46 unsigned long new_data;
40 char * command_line; 47 char * command_line;
41 48
42 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); 49 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
43 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); 50 new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
44 if (!new_data) { 51 if (!new_data) {
45 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { 52 if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
46 return; 53 return;
47 } 54 }
48 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; 55 new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
49 } 56 }
50 command_line = (char *) ((u64)(new_data)); 57 command_line = __va(new_data);
51 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 58 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
52} 59}
53 60
@@ -55,26 +62,30 @@ void __init x86_64_start_kernel(char * real_mode_data)
55{ 62{
56 int i; 63 int i;
57 64
65 /*
66 * Make sure kernel is aligned to 2MB address. Catching it at compile
67 * time is better. Change your config file and compile the kernel
68 * for a 2MB aligned address (CONFIG_PHYSICAL_START)
69 */
70 BUILD_BUG_ON(CONFIG_PHYSICAL_START & (__KERNEL_ALIGN - 1));
71
58 /* clear bss before set_intr_gate with early_idt_handler */ 72 /* clear bss before set_intr_gate with early_idt_handler */
59 clear_bss(); 73 clear_bss();
60 74
75 /* Make NULL pointers segfault */
76 zap_identity_mappings();
77
61 for (i = 0; i < IDT_ENTRIES; i++) 78 for (i = 0; i < IDT_ENTRIES; i++)
62 set_intr_gate(i, early_idt_handler); 79 set_intr_gate(i, early_idt_handler);
63 asm volatile("lidt %0" :: "m" (idt_descr)); 80 asm volatile("lidt %0" :: "m" (idt_descr));
64 81
65 early_printk("Kernel alive\n"); 82 early_printk("Kernel alive\n");
66 83
67 /*
68 * switch to init_level4_pgt from boot_level4_pgt
69 */
70 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
71 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
72
73 for (i = 0; i < NR_CPUS; i++) 84 for (i = 0; i < NR_CPUS; i++)
74 cpu_pda(i) = &boot_cpu_pda[i]; 85 cpu_pda(i) = &boot_cpu_pda[i];
75 86
76 pda_init(0); 87 pda_init(0);
77 copy_bootdata(real_mode_data); 88 copy_bootdata(__va(real_mode_data));
78#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
79 cpu_set(0, cpu_online_map); 90 cpu_set(0, cpu_online_map);
80#endif 91#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b7d2b76b92d4..2a2df14dab7e 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -907,10 +907,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
907 enable_8259A_irq(0); 907 enable_8259A_irq(0);
908} 908}
909 909
910void __init UNEXPECTED_IO_APIC(void)
911{
912}
913
914void __apicdebuginit print_IO_APIC(void) 910void __apicdebuginit print_IO_APIC(void)
915{ 911{
916 int apic, i; 912 int apic, i;
@@ -946,40 +942,16 @@ void __apicdebuginit print_IO_APIC(void)
946 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 942 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
947 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 943 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
948 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 944 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
949 if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
950 UNEXPECTED_IO_APIC();
951 945
952 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 946 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
953 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 947 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
954 if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
955 (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
956 (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
957 (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
958 (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
959 (reg_01.bits.entries != 0x2E) &&
960 (reg_01.bits.entries != 0x3F) &&
961 (reg_01.bits.entries != 0x03)
962 )
963 UNEXPECTED_IO_APIC();
964 948
965 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 949 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
966 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 950 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
967 if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
968 (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
969 (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
970 (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
971 (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
972 (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
973 )
974 UNEXPECTED_IO_APIC();
975 if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
976 UNEXPECTED_IO_APIC();
977 951
978 if (reg_01.bits.version >= 0x10) { 952 if (reg_01.bits.version >= 0x10) {
979 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 953 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
980 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 954 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
981 if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
982 UNEXPECTED_IO_APIC();
983 } 955 }
984 956
985 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 957 printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1407,8 +1379,7 @@ static void irq_complete_move(unsigned int irq)
1407 1379
1408 vector = ~get_irq_regs()->orig_rax; 1380 vector = ~get_irq_regs()->orig_rax;
1409 me = smp_processor_id(); 1381 me = smp_processor_id();
1410 if ((vector == cfg->vector) && 1382 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
1411 cpu_isset(smp_processor_id(), cfg->domain)) {
1412 cpumask_t cleanup_mask; 1383 cpumask_t cleanup_mask;
1413 1384
1414 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 1385 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
index 745b1f0f494e..387d347b0e07 100644
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -16,6 +16,7 @@
16#include <linux/stddef.h> 16#include <linux/stddef.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/thread_info.h> 18#include <linux/thread_info.h>
19#include <linux/syscalls.h>
19 20
20/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 21/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
21static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) 22static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 0497e3bd5bff..a8bb33c1a8f2 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image)
191 191
192 page_list[PA_CONTROL_PAGE] = __pa(control_page); 192 page_list[PA_CONTROL_PAGE] = __pa(control_page);
193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
194 page_list[PA_PGD] = __pa(kexec_pgd); 194 page_list[PA_PGD] = __pa_symbol(&kexec_pgd);
195 page_list[VA_PGD] = (unsigned long)kexec_pgd; 195 page_list[VA_PGD] = (unsigned long)kexec_pgd;
196 page_list[PA_PUD_0] = __pa(kexec_pud0); 196 page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0);
197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0; 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
198 page_list[PA_PMD_0] = __pa(kexec_pmd0); 198 page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0);
199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
200 page_list[PA_PTE_0] = __pa(kexec_pte0); 200 page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0);
201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
202 page_list[PA_PUD_1] = __pa(kexec_pud1); 202 page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1);
203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1; 203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
204 page_list[PA_PMD_1] = __pa(kexec_pmd1); 204 page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1);
205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; 205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
206 page_list[PA_PTE_1] = __pa(kexec_pte1); 206 page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1);
207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
208 208
209 page_list[PA_TABLE_PAGE] = 209 page_list[PA_TABLE_PAGE] =
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 8011a8e1c7d4..fa2672682477 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -323,10 +323,13 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
323#endif /* CONFIG_X86_MCE_INTEL */ 323#endif /* CONFIG_X86_MCE_INTEL */
324 324
325/* 325/*
326 * Periodic polling timer for "silent" machine check errors. 326 * Periodic polling timer for "silent" machine check errors. If the
327 * poller finds an MCE, poll 2x faster. When the poller finds no more
328 * errors, poll 2x slower (up to check_interval seconds).
327 */ 329 */
328 330
329static int check_interval = 5 * 60; /* 5 minutes */ 331static int check_interval = 5 * 60; /* 5 minutes */
332static int next_interval; /* in jiffies */
330static void mcheck_timer(struct work_struct *work); 333static void mcheck_timer(struct work_struct *work);
331static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); 334static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
332 335
@@ -339,7 +342,6 @@ static void mcheck_check_cpu(void *info)
339static void mcheck_timer(struct work_struct *work) 342static void mcheck_timer(struct work_struct *work)
340{ 343{
341 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 344 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
342 schedule_delayed_work(&mcheck_work, check_interval * HZ);
343 345
344 /* 346 /*
345 * It's ok to read stale data here for notify_user and 347 * It's ok to read stale data here for notify_user and
@@ -349,17 +351,30 @@ static void mcheck_timer(struct work_struct *work)
349 * writes. 351 * writes.
350 */ 352 */
351 if (notify_user && console_logged) { 353 if (notify_user && console_logged) {
354 static unsigned long last_print;
355 unsigned long now = jiffies;
356
357 /* if we logged an MCE, reduce the polling interval */
358 next_interval = max(next_interval/2, HZ/100);
352 notify_user = 0; 359 notify_user = 0;
353 clear_bit(0, &console_logged); 360 clear_bit(0, &console_logged);
354 printk(KERN_INFO "Machine check events logged\n"); 361 if (time_after_eq(now, last_print + (check_interval*HZ))) {
362 last_print = now;
363 printk(KERN_INFO "Machine check events logged\n");
364 }
365 } else {
366 next_interval = min(next_interval*2, check_interval*HZ);
355 } 367 }
368
369 schedule_delayed_work(&mcheck_work, next_interval);
356} 370}
357 371
358 372
359static __init int periodic_mcheck_init(void) 373static __init int periodic_mcheck_init(void)
360{ 374{
361 if (check_interval) 375 next_interval = check_interval * HZ;
362 schedule_delayed_work(&mcheck_work, check_interval*HZ); 376 if (next_interval)
377 schedule_delayed_work(&mcheck_work, next_interval);
363 return 0; 378 return 0;
364} 379}
365__initcall(periodic_mcheck_init); 380__initcall(periodic_mcheck_init);
@@ -597,12 +612,13 @@ static int mce_resume(struct sys_device *dev)
597/* Reinit MCEs after user configuration changes */ 612/* Reinit MCEs after user configuration changes */
598static void mce_restart(void) 613static void mce_restart(void)
599{ 614{
600 if (check_interval) 615 if (next_interval)
601 cancel_delayed_work(&mcheck_work); 616 cancel_delayed_work(&mcheck_work);
602 /* Timer race is harmless here */ 617 /* Timer race is harmless here */
603 on_each_cpu(mce_init, NULL, 1, 1); 618 on_each_cpu(mce_init, NULL, 1, 1);
604 if (check_interval) 619 next_interval = check_interval * HZ;
605 schedule_delayed_work(&mcheck_work, check_interval*HZ); 620 if (next_interval)
621 schedule_delayed_work(&mcheck_work, next_interval);
606} 622}
607 623
608static struct sysdev_class mce_sysclass = { 624static struct sysdev_class mce_sysclass = {
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 455aa0b932f0..d0dc4891599b 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -300,7 +300,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
300 } 300 }
301 } 301 }
302 } 302 }
303 clustered_apic_check(); 303 setup_apic_routing();
304 if (!num_processors) 304 if (!num_processors)
305 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 305 printk(KERN_ERR "MPTABLE: no processors registered!\n");
306 return num_processors; 306 return num_processors;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index dfab9f167366..6cd2b30e2ffc 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -27,28 +27,11 @@
27#include <asm/proto.h> 27#include <asm/proto.h>
28#include <asm/kdebug.h> 28#include <asm/kdebug.h>
29#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/intel_arch_perfmon.h>
31 30
32int unknown_nmi_panic; 31int unknown_nmi_panic;
33int nmi_watchdog_enabled; 32int nmi_watchdog_enabled;
34int panic_on_unrecovered_nmi; 33int panic_on_unrecovered_nmi;
35 34
36/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
37 * evtsel_nmi_owner tracks the ownership of the event selection
38 * - different performance counters/ event selection may be reserved for
39 * different subsystems this reservation system just tries to coordinate
40 * things a little
41 */
42
43/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
45 */
46#define NMI_MAX_COUNTER_BITS 66
47#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
48
49static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
50static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
51
52static cpumask_t backtrace_mask = CPU_MASK_NONE; 35static cpumask_t backtrace_mask = CPU_MASK_NONE;
53 36
54/* nmi_active: 37/* nmi_active:
@@ -63,191 +46,11 @@ int panic_on_timeout;
63unsigned int nmi_watchdog = NMI_DEFAULT; 46unsigned int nmi_watchdog = NMI_DEFAULT;
64static unsigned int nmi_hz = HZ; 47static unsigned int nmi_hz = HZ;
65 48
66struct nmi_watchdog_ctlblk { 49static DEFINE_PER_CPU(short, wd_enabled);
67 int enabled;
68 u64 check_bit;
69 unsigned int cccr_msr;
70 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
71 unsigned int evntsel_msr; /* the MSR to select the events to handle */
72};
73static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
74 50
75/* local prototypes */ 51/* local prototypes */
76static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); 52static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
77 53
78/* converts an msr to an appropriate reservation bit */
79static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
80{
81 /* returns the bit offset of the performance counter register */
82 switch (boot_cpu_data.x86_vendor) {
83 case X86_VENDOR_AMD:
84 return (msr - MSR_K7_PERFCTR0);
85 case X86_VENDOR_INTEL:
86 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
87 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
88 else
89 return (msr - MSR_P4_BPU_PERFCTR0);
90 }
91 return 0;
92}
93
94/* converts an msr to an appropriate reservation bit */
95static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
96{
97 /* returns the bit offset of the event selection register */
98 switch (boot_cpu_data.x86_vendor) {
99 case X86_VENDOR_AMD:
100 return (msr - MSR_K7_EVNTSEL0);
101 case X86_VENDOR_INTEL:
102 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
103 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
104 else
105 return (msr - MSR_P4_BSU_ESCR0);
106 }
107 return 0;
108}
109
110/* checks for a bit availability (hack for oprofile) */
111int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
112{
113 int cpu;
114 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115 for_each_possible_cpu (cpu) {
116 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
117 return 0;
118 }
119 return 1;
120}
121
122/* checks the an msr for availability */
123int avail_to_resrv_perfctr_nmi(unsigned int msr)
124{
125 unsigned int counter;
126 int cpu;
127
128 counter = nmi_perfctr_msr_to_bit(msr);
129 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
130
131 for_each_possible_cpu (cpu) {
132 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
133 return 0;
134 }
135 return 1;
136}
137
138static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
139{
140 unsigned int counter;
141 if (cpu < 0)
142 cpu = smp_processor_id();
143
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
146
147 if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
148 return 1;
149 return 0;
150}
151
152static void __release_perfctr_nmi(int cpu, unsigned int msr)
153{
154 unsigned int counter;
155 if (cpu < 0)
156 cpu = smp_processor_id();
157
158 counter = nmi_perfctr_msr_to_bit(msr);
159 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
160
161 clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
162}
163
164int reserve_perfctr_nmi(unsigned int msr)
165{
166 int cpu, i;
167 for_each_possible_cpu (cpu) {
168 if (!__reserve_perfctr_nmi(cpu, msr)) {
169 for_each_possible_cpu (i) {
170 if (i >= cpu)
171 break;
172 __release_perfctr_nmi(i, msr);
173 }
174 return 0;
175 }
176 }
177 return 1;
178}
179
180void release_perfctr_nmi(unsigned int msr)
181{
182 int cpu;
183 for_each_possible_cpu (cpu)
184 __release_perfctr_nmi(cpu, msr);
185}
186
187int __reserve_evntsel_nmi(int cpu, unsigned int msr)
188{
189 unsigned int counter;
190 if (cpu < 0)
191 cpu = smp_processor_id();
192
193 counter = nmi_evntsel_msr_to_bit(msr);
194 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
195
196 if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
197 return 1;
198 return 0;
199}
200
201static void __release_evntsel_nmi(int cpu, unsigned int msr)
202{
203 unsigned int counter;
204 if (cpu < 0)
205 cpu = smp_processor_id();
206
207 counter = nmi_evntsel_msr_to_bit(msr);
208 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
209
210 clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
211}
212
213int reserve_evntsel_nmi(unsigned int msr)
214{
215 int cpu, i;
216 for_each_possible_cpu (cpu) {
217 if (!__reserve_evntsel_nmi(cpu, msr)) {
218 for_each_possible_cpu (i) {
219 if (i >= cpu)
220 break;
221 __release_evntsel_nmi(i, msr);
222 }
223 return 0;
224 }
225 }
226 return 1;
227}
228
229void release_evntsel_nmi(unsigned int msr)
230{
231 int cpu;
232 for_each_possible_cpu (cpu) {
233 __release_evntsel_nmi(cpu, msr);
234 }
235}
236
237static __cpuinit inline int nmi_known_cpu(void)
238{
239 switch (boot_cpu_data.x86_vendor) {
240 case X86_VENDOR_AMD:
241 return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
242 case X86_VENDOR_INTEL:
243 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
244 return 1;
245 else
246 return (boot_cpu_data.x86 == 15);
247 }
248 return 0;
249}
250
251/* Run after command line and cpu_init init, but before all other checks */ 54/* Run after command line and cpu_init init, but before all other checks */
252void nmi_watchdog_default(void) 55void nmi_watchdog_default(void)
253{ 56{
@@ -277,23 +80,6 @@ static __init void nmi_cpu_busy(void *data)
277} 80}
278#endif 81#endif
279 82
280static unsigned int adjust_for_32bit_ctr(unsigned int hz)
281{
282 unsigned int retval = hz;
283
284 /*
285 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
286 * are writable, with higher bits sign extending from bit 31.
287 * So, we can only program the counter with 31 bit values and
288 * 32nd bit should be 1, for 33.. to be 1.
289 * Find the appropriate nmi_hz
290 */
291 if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
292 retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
293 }
294 return retval;
295}
296
297int __init check_nmi_watchdog (void) 83int __init check_nmi_watchdog (void)
298{ 84{
299 int *counts; 85 int *counts;
@@ -322,14 +108,14 @@ int __init check_nmi_watchdog (void)
322 mdelay((20*1000)/nmi_hz); // wait 20 ticks 108 mdelay((20*1000)/nmi_hz); // wait 20 ticks
323 109
324 for_each_online_cpu(cpu) { 110 for_each_online_cpu(cpu) {
325 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) 111 if (!per_cpu(wd_enabled, cpu))
326 continue; 112 continue;
327 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { 113 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
328 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 114 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
329 cpu, 115 cpu,
330 counts[cpu], 116 counts[cpu],
331 cpu_pda(cpu)->__nmi_count); 117 cpu_pda(cpu)->__nmi_count);
332 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; 118 per_cpu(wd_enabled, cpu) = 0;
333 atomic_dec(&nmi_active); 119 atomic_dec(&nmi_active);
334 } 120 }
335 } 121 }
@@ -344,13 +130,8 @@ int __init check_nmi_watchdog (void)
344 130
345 /* now that we know it works we can reduce NMI frequency to 131 /* now that we know it works we can reduce NMI frequency to
346 something more reasonable; makes a difference in some configs */ 132 something more reasonable; makes a difference in some configs */
347 if (nmi_watchdog == NMI_LOCAL_APIC) { 133 if (nmi_watchdog == NMI_LOCAL_APIC)
348 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 134 nmi_hz = lapic_adjust_nmi_hz(1);
349
350 nmi_hz = 1;
351 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
352 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
353 }
354 135
355 kfree(counts); 136 kfree(counts);
356 return 0; 137 return 0;
@@ -379,57 +160,6 @@ int __init setup_nmi_watchdog(char *str)
379 160
380__setup("nmi_watchdog=", setup_nmi_watchdog); 161__setup("nmi_watchdog=", setup_nmi_watchdog);
381 162
382static void disable_lapic_nmi_watchdog(void)
383{
384 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
385
386 if (atomic_read(&nmi_active) <= 0)
387 return;
388
389 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
390
391 BUG_ON(atomic_read(&nmi_active) != 0);
392}
393
394static void enable_lapic_nmi_watchdog(void)
395{
396 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
397
398 /* are we already enabled */
399 if (atomic_read(&nmi_active) != 0)
400 return;
401
402 /* are we lapic aware */
403 if (nmi_known_cpu() <= 0)
404 return;
405
406 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
407 touch_nmi_watchdog();
408}
409
410void disable_timer_nmi_watchdog(void)
411{
412 BUG_ON(nmi_watchdog != NMI_IO_APIC);
413
414 if (atomic_read(&nmi_active) <= 0)
415 return;
416
417 disable_irq(0);
418 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
419
420 BUG_ON(atomic_read(&nmi_active) != 0);
421}
422
423void enable_timer_nmi_watchdog(void)
424{
425 BUG_ON(nmi_watchdog != NMI_IO_APIC);
426
427 if (atomic_read(&nmi_active) == 0) {
428 touch_nmi_watchdog();
429 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
430 enable_irq(0);
431 }
432}
433 163
434static void __acpi_nmi_disable(void *__unused) 164static void __acpi_nmi_disable(void *__unused)
435{ 165{
@@ -515,275 +245,9 @@ late_initcall(init_lapic_nmi_sysfs);
515 245
516#endif /* CONFIG_PM */ 246#endif /* CONFIG_PM */
517 247
518/*
519 * Activate the NMI watchdog via the local APIC.
520 * Original code written by Keith Owens.
521 */
522
523/* Note that these events don't tick when the CPU idles. This means
524 the frequency varies with CPU load. */
525
526#define K7_EVNTSEL_ENABLE (1 << 22)
527#define K7_EVNTSEL_INT (1 << 20)
528#define K7_EVNTSEL_OS (1 << 17)
529#define K7_EVNTSEL_USR (1 << 16)
530#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
531#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
532
533static int setup_k7_watchdog(void)
534{
535 unsigned int perfctr_msr, evntsel_msr;
536 unsigned int evntsel;
537 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
538
539 perfctr_msr = MSR_K7_PERFCTR0;
540 evntsel_msr = MSR_K7_EVNTSEL0;
541 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
542 goto fail;
543
544 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
545 goto fail1;
546
547 /* Simulator may not support it */
548 if (checking_wrmsrl(evntsel_msr, 0UL))
549 goto fail2;
550 wrmsrl(perfctr_msr, 0UL);
551
552 evntsel = K7_EVNTSEL_INT
553 | K7_EVNTSEL_OS
554 | K7_EVNTSEL_USR
555 | K7_NMI_EVENT;
556
557 /* setup the timer */
558 wrmsr(evntsel_msr, evntsel, 0);
559 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
560 apic_write(APIC_LVTPC, APIC_DM_NMI);
561 evntsel |= K7_EVNTSEL_ENABLE;
562 wrmsr(evntsel_msr, evntsel, 0);
563
564 wd->perfctr_msr = perfctr_msr;
565 wd->evntsel_msr = evntsel_msr;
566 wd->cccr_msr = 0; //unused
567 wd->check_bit = 1ULL<<63;
568 return 1;
569fail2:
570 __release_evntsel_nmi(-1, evntsel_msr);
571fail1:
572 __release_perfctr_nmi(-1, perfctr_msr);
573fail:
574 return 0;
575}
576
577static void stop_k7_watchdog(void)
578{
579 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
580
581 wrmsr(wd->evntsel_msr, 0, 0);
582
583 __release_evntsel_nmi(-1, wd->evntsel_msr);
584 __release_perfctr_nmi(-1, wd->perfctr_msr);
585}
586
587/* Note that these events don't tick when the CPU idles. This means
588 the frequency varies with CPU load. */
589
590#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
591#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
592#define P4_ESCR_OS (1<<3)
593#define P4_ESCR_USR (1<<2)
594#define P4_CCCR_OVF_PMI0 (1<<26)
595#define P4_CCCR_OVF_PMI1 (1<<27)
596#define P4_CCCR_THRESHOLD(N) ((N)<<20)
597#define P4_CCCR_COMPLEMENT (1<<19)
598#define P4_CCCR_COMPARE (1<<18)
599#define P4_CCCR_REQUIRED (3<<16)
600#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
601#define P4_CCCR_ENABLE (1<<12)
602#define P4_CCCR_OVF (1<<31)
603/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
604 CRU_ESCR0 (with any non-null event selector) through a complemented
605 max threshold. [IA32-Vol3, Section 14.9.9] */
606
607static int setup_p4_watchdog(void)
608{
609 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
610 unsigned int evntsel, cccr_val;
611 unsigned int misc_enable, dummy;
612 unsigned int ht_num;
613 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
614
615 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
616 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
617 return 0;
618
619#ifdef CONFIG_SMP
620 /* detect which hyperthread we are on */
621 if (smp_num_siblings == 2) {
622 unsigned int ebx, apicid;
623
624 ebx = cpuid_ebx(1);
625 apicid = (ebx >> 24) & 0xff;
626 ht_num = apicid & 1;
627 } else
628#endif
629 ht_num = 0;
630
631 /* performance counters are shared resources
632 * assign each hyperthread its own set
633 * (re-use the ESCR0 register, seems safe
634 * and keeps the cccr_val the same)
635 */
636 if (!ht_num) {
637 /* logical cpu 0 */
638 perfctr_msr = MSR_P4_IQ_PERFCTR0;
639 evntsel_msr = MSR_P4_CRU_ESCR0;
640 cccr_msr = MSR_P4_IQ_CCCR0;
641 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
642 } else {
643 /* logical cpu 1 */
644 perfctr_msr = MSR_P4_IQ_PERFCTR1;
645 evntsel_msr = MSR_P4_CRU_ESCR0;
646 cccr_msr = MSR_P4_IQ_CCCR1;
647 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
648 }
649
650 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
651 goto fail;
652
653 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
654 goto fail1;
655
656 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
657 | P4_ESCR_OS
658 | P4_ESCR_USR;
659
660 cccr_val |= P4_CCCR_THRESHOLD(15)
661 | P4_CCCR_COMPLEMENT
662 | P4_CCCR_COMPARE
663 | P4_CCCR_REQUIRED;
664
665 wrmsr(evntsel_msr, evntsel, 0);
666 wrmsr(cccr_msr, cccr_val, 0);
667 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
668 apic_write(APIC_LVTPC, APIC_DM_NMI);
669 cccr_val |= P4_CCCR_ENABLE;
670 wrmsr(cccr_msr, cccr_val, 0);
671
672 wd->perfctr_msr = perfctr_msr;
673 wd->evntsel_msr = evntsel_msr;
674 wd->cccr_msr = cccr_msr;
675 wd->check_bit = 1ULL<<39;
676 return 1;
677fail1:
678 __release_perfctr_nmi(-1, perfctr_msr);
679fail:
680 return 0;
681}
682
683static void stop_p4_watchdog(void)
684{
685 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
686
687 wrmsr(wd->cccr_msr, 0, 0);
688 wrmsr(wd->evntsel_msr, 0, 0);
689
690 __release_evntsel_nmi(-1, wd->evntsel_msr);
691 __release_perfctr_nmi(-1, wd->perfctr_msr);
692}
693
694#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
695#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
696
697static int setup_intel_arch_watchdog(void)
698{
699 unsigned int ebx;
700 union cpuid10_eax eax;
701 unsigned int unused;
702 unsigned int perfctr_msr, evntsel_msr;
703 unsigned int evntsel;
704 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
705
706 /*
707 * Check whether the Architectural PerfMon supports
708 * Unhalted Core Cycles Event or not.
709 * NOTE: Corresponding bit = 0 in ebx indicates event present.
710 */
711 cpuid(10, &(eax.full), &ebx, &unused, &unused);
712 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
713 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
714 goto fail;
715
716 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
717 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
718
719 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
720 goto fail;
721
722 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
723 goto fail1;
724
725 wrmsrl(perfctr_msr, 0UL);
726
727 evntsel = ARCH_PERFMON_EVENTSEL_INT
728 | ARCH_PERFMON_EVENTSEL_OS
729 | ARCH_PERFMON_EVENTSEL_USR
730 | ARCH_PERFMON_NMI_EVENT_SEL
731 | ARCH_PERFMON_NMI_EVENT_UMASK;
732
733 /* setup the timer */
734 wrmsr(evntsel_msr, evntsel, 0);
735
736 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
737 wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
738
739 apic_write(APIC_LVTPC, APIC_DM_NMI);
740 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
741 wrmsr(evntsel_msr, evntsel, 0);
742
743 wd->perfctr_msr = perfctr_msr;
744 wd->evntsel_msr = evntsel_msr;
745 wd->cccr_msr = 0; //unused
746 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
747 return 1;
748fail1:
749 __release_perfctr_nmi(-1, perfctr_msr);
750fail:
751 return 0;
752}
753
754static void stop_intel_arch_watchdog(void)
755{
756 unsigned int ebx;
757 union cpuid10_eax eax;
758 unsigned int unused;
759 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
760
761 /*
762 * Check whether the Architectural PerfMon supports
763 * Unhalted Core Cycles Event or not.
764 * NOTE: Corresponding bit = 0 in ebx indicates event present.
765 */
766 cpuid(10, &(eax.full), &ebx, &unused, &unused);
767 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
768 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
769 return;
770
771 wrmsr(wd->evntsel_msr, 0, 0);
772
773 __release_evntsel_nmi(-1, wd->evntsel_msr);
774 __release_perfctr_nmi(-1, wd->perfctr_msr);
775}
776
777void setup_apic_nmi_watchdog(void *unused) 248void setup_apic_nmi_watchdog(void *unused)
778{ 249{
779 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 250 if (__get_cpu_var(wd_enabled) == 1)
780
781 /* only support LOCAL and IO APICs for now */
782 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
783 (nmi_watchdog != NMI_IO_APIC))
784 return;
785
786 if (wd->enabled == 1)
787 return; 251 return;
788 252
789 /* cheap hack to support suspend/resume */ 253 /* cheap hack to support suspend/resume */
@@ -791,62 +255,31 @@ void setup_apic_nmi_watchdog(void *unused)
791 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) 255 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
792 return; 256 return;
793 257
794 if (nmi_watchdog == NMI_LOCAL_APIC) { 258 switch (nmi_watchdog) {
795 switch (boot_cpu_data.x86_vendor) { 259 case NMI_LOCAL_APIC:
796 case X86_VENDOR_AMD: 260 __get_cpu_var(wd_enabled) = 1;
797 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) 261 if (lapic_watchdog_init(nmi_hz) < 0) {
798 return; 262 __get_cpu_var(wd_enabled) = 0;
799 if (!setup_k7_watchdog())
800 return;
801 break;
802 case X86_VENDOR_INTEL:
803 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
804 if (!setup_intel_arch_watchdog())
805 return;
806 break;
807 }
808 if (!setup_p4_watchdog())
809 return;
810 break;
811 default:
812 return; 263 return;
813 } 264 }
265 /* FALL THROUGH */
266 case NMI_IO_APIC:
267 __get_cpu_var(wd_enabled) = 1;
268 atomic_inc(&nmi_active);
814 } 269 }
815 wd->enabled = 1;
816 atomic_inc(&nmi_active);
817} 270}
818 271
819void stop_apic_nmi_watchdog(void *unused) 272void stop_apic_nmi_watchdog(void *unused)
820{ 273{
821 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
822
823 /* only support LOCAL and IO APICs for now */ 274 /* only support LOCAL and IO APICs for now */
824 if ((nmi_watchdog != NMI_LOCAL_APIC) && 275 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
825 (nmi_watchdog != NMI_IO_APIC)) 276 (nmi_watchdog != NMI_IO_APIC))
826 return; 277 return;
827 278 if (__get_cpu_var(wd_enabled) == 0)
828 if (wd->enabled == 0)
829 return; 279 return;
830 280 if (nmi_watchdog == NMI_LOCAL_APIC)
831 if (nmi_watchdog == NMI_LOCAL_APIC) { 281 lapic_watchdog_stop();
832 switch (boot_cpu_data.x86_vendor) { 282 __get_cpu_var(wd_enabled) = 0;
833 case X86_VENDOR_AMD:
834 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
835 return;
836 stop_k7_watchdog();
837 break;
838 case X86_VENDOR_INTEL:
839 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
840 stop_intel_arch_watchdog();
841 break;
842 }
843 stop_p4_watchdog();
844 break;
845 default:
846 return;
847 }
848 }
849 wd->enabled = 0;
850 atomic_dec(&nmi_active); 283 atomic_dec(&nmi_active);
851} 284}
852 285
@@ -885,9 +318,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
885 int sum; 318 int sum;
886 int touched = 0; 319 int touched = 0;
887 int cpu = smp_processor_id(); 320 int cpu = smp_processor_id();
888 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 321 int rc = 0;
889 u64 dummy;
890 int rc=0;
891 322
892 /* check for other users first */ 323 /* check for other users first */
893 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 324 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
@@ -934,55 +365,20 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
934 } 365 }
935 366
936 /* see if the nmi watchdog went off */ 367 /* see if the nmi watchdog went off */
937 if (wd->enabled) { 368 if (!__get_cpu_var(wd_enabled))
938 if (nmi_watchdog == NMI_LOCAL_APIC) { 369 return rc;
939 rdmsrl(wd->perfctr_msr, dummy); 370 switch (nmi_watchdog) {
940 if (dummy & wd->check_bit){ 371 case NMI_LOCAL_APIC:
941 /* this wasn't a watchdog timer interrupt */ 372 rc |= lapic_wd_event(nmi_hz);
942 goto done; 373 break;
943 } 374 case NMI_IO_APIC:
944 375 /* don't know how to accurately check for this.
945 /* only Intel uses the cccr msr */ 376 * just assume it was a watchdog timer interrupt
946 if (wd->cccr_msr != 0) { 377 * This matches the old behaviour.
947 /* 378 */
948 * P4 quirks: 379 rc = 1;
949 * - An overflown perfctr will assert its interrupt 380 break;
950 * until the OVF flag in its CCCR is cleared.
951 * - LVTPC is masked on interrupt and must be
952 * unmasked by the LVTPC handler.
953 */
954 rdmsrl(wd->cccr_msr, dummy);
955 dummy &= ~P4_CCCR_OVF;
956 wrmsrl(wd->cccr_msr, dummy);
957 apic_write(APIC_LVTPC, APIC_DM_NMI);
958 /* start the cycle over again */
959 wrmsrl(wd->perfctr_msr,
960 -((u64)cpu_khz * 1000 / nmi_hz));
961 } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
962 /*
963 * ArchPerfom/Core Duo needs to re-unmask
964 * the apic vector
965 */
966 apic_write(APIC_LVTPC, APIC_DM_NMI);
967 /* ARCH_PERFMON has 32 bit counter writes */
968 wrmsr(wd->perfctr_msr,
969 (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
970 } else {
971 /* start the cycle over again */
972 wrmsrl(wd->perfctr_msr,
973 -((u64)cpu_khz * 1000 / nmi_hz));
974 }
975 rc = 1;
976 } else if (nmi_watchdog == NMI_IO_APIC) {
977 /* don't know how to accurately check for this.
978 * just assume it was a watchdog timer interrupt
979 * This matches the old behaviour.
980 */
981 rc = 1;
982 } else
983 printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
984 } 381 }
985done:
986 return rc; 382 return rc;
987} 383}
988 384
@@ -1067,12 +463,4 @@ void __trigger_all_cpu_backtrace(void)
1067 463
1068EXPORT_SYMBOL(nmi_active); 464EXPORT_SYMBOL(nmi_active);
1069EXPORT_SYMBOL(nmi_watchdog); 465EXPORT_SYMBOL(nmi_watchdog);
1070EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1071EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1072EXPORT_SYMBOL(reserve_perfctr_nmi);
1073EXPORT_SYMBOL(release_perfctr_nmi);
1074EXPORT_SYMBOL(reserve_evntsel_nmi);
1075EXPORT_SYMBOL(release_evntsel_nmi);
1076EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1077EXPORT_SYMBOL(enable_timer_nmi_watchdog);
1078EXPORT_SYMBOL(touch_nmi_watchdog); 466EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 04480c3b68f5..5bd20b542c1e 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -507,7 +507,7 @@ error:
507 return ret; 507 return ret;
508} 508}
509 509
510static struct dma_mapping_ops calgary_dma_ops = { 510static const struct dma_mapping_ops calgary_dma_ops = {
511 .alloc_coherent = calgary_alloc_coherent, 511 .alloc_coherent = calgary_alloc_coherent,
512 .map_single = calgary_map_single, 512 .map_single = calgary_map_single,
513 .unmap_single = calgary_unmap_single, 513 .unmap_single = calgary_unmap_single,
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 0bae862e9a55..0a762e10f2be 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -556,7 +556,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
556 556
557extern int agp_amd64_init(void); 557extern int agp_amd64_init(void);
558 558
559static struct dma_mapping_ops gart_dma_ops = { 559static const struct dma_mapping_ops gart_dma_ops = {
560 .mapping_error = NULL, 560 .mapping_error = NULL,
561 .map_single = gart_map_single, 561 .map_single = gart_map_single,
562 .map_simple = gart_map_simple, 562 .map_simple = gart_map_simple,
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index df09ab05a1bd..6dade0c867cc 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -79,7 +79,7 @@ void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
79{ 79{
80} 80}
81 81
82struct dma_mapping_ops nommu_dma_ops = { 82const struct dma_mapping_ops nommu_dma_ops = {
83 .map_single = nommu_map_single, 83 .map_single = nommu_map_single,
84 .unmap_single = nommu_unmap_single, 84 .unmap_single = nommu_unmap_single,
85 .map_sg = nommu_map_sg, 85 .map_sg = nommu_map_sg,
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index eb18be5a6569..4b4569abc60c 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -12,7 +12,7 @@
12int swiotlb __read_mostly; 12int swiotlb __read_mostly;
13EXPORT_SYMBOL(swiotlb); 13EXPORT_SYMBOL(swiotlb);
14 14
15struct dma_mapping_ops swiotlb_dma_ops = { 15const struct dma_mapping_ops swiotlb_dma_ops = {
16 .mapping_error = swiotlb_dma_mapping_error, 16 .mapping_error = swiotlb_dma_mapping_error,
17 .alloc_coherent = swiotlb_alloc_coherent, 17 .alloc_coherent = swiotlb_alloc_coherent,
18 .free_coherent = swiotlb_free_coherent, 18 .free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc245c8..4f21765078b7 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -288,16 +288,18 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
288 288
289static int __init idle_setup (char *str) 289static int __init idle_setup (char *str)
290{ 290{
291 if (!strncmp(str, "poll", 4)) { 291 if (!strcmp(str, "poll")) {
292 printk("using polling idle threads.\n"); 292 printk("using polling idle threads.\n");
293 pm_idle = poll_idle; 293 pm_idle = poll_idle;
294 } 294 } else if (!strcmp(str, "mwait"))
295 force_mwait = 1;
296 else
297 return -1;
295 298
296 boot_option_idle_override = 1; 299 boot_option_idle_override = 1;
297 return 1; 300 return 0;
298} 301}
299 302early_param("idle", idle_setup);
300__setup("idle=", idle_setup);
301 303
302/* Prints also some state that isn't saved in the pt_regs */ 304/* Prints also some state that isn't saved in the pt_regs */
303void __show_regs(struct pt_regs * regs) 305void __show_regs(struct pt_regs * regs)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b696881d..db30b5bcef61 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -79,6 +79,8 @@ int bootloader_type;
79 79
80unsigned long saved_video_mode; 80unsigned long saved_video_mode;
81 81
82int force_mwait __cpuinitdata;
83
82/* 84/*
83 * Early DMI memory 85 * Early DMI memory
84 */ 86 */
@@ -205,10 +207,10 @@ static void discover_ebda(void)
205 * there is a real-mode segmented pointer pointing to the 207 * there is a real-mode segmented pointer pointing to the
206 * 4K EBDA area at 0x40E 208 * 4K EBDA area at 0x40E
207 */ 209 */
208 ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; 210 ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
209 ebda_addr <<= 4; 211 ebda_addr <<= 4;
210 212
211 ebda_size = *(unsigned short *)(unsigned long)ebda_addr; 213 ebda_size = *(unsigned short *)__va(ebda_addr);
212 214
213 /* Round EBDA up to pages */ 215 /* Round EBDA up to pages */
214 if (ebda_size == 0) 216 if (ebda_size == 0)
@@ -243,11 +245,12 @@ void __init setup_arch(char **cmdline_p)
243 init_mm.end_code = (unsigned long) &_etext; 245 init_mm.end_code = (unsigned long) &_etext;
244 init_mm.end_data = (unsigned long) &_edata; 246 init_mm.end_data = (unsigned long) &_edata;
245 init_mm.brk = (unsigned long) &_end; 247 init_mm.brk = (unsigned long) &_end;
248 init_mm.pgd = __va(__pa_symbol(&init_level4_pgt));
246 249
247 code_resource.start = virt_to_phys(&_text); 250 code_resource.start = __pa_symbol(&_text);
248 code_resource.end = virt_to_phys(&_etext)-1; 251 code_resource.end = __pa_symbol(&_etext)-1;
249 data_resource.start = virt_to_phys(&_etext); 252 data_resource.start = __pa_symbol(&_etext);
250 data_resource.end = virt_to_phys(&_edata)-1; 253 data_resource.end = __pa_symbol(&_edata)-1;
251 254
252 early_identify_cpu(&boot_cpu_data); 255 early_identify_cpu(&boot_cpu_data);
253 256
@@ -274,8 +277,6 @@ void __init setup_arch(char **cmdline_p)
274 277
275 dmi_scan_machine(); 278 dmi_scan_machine();
276 279
277 zap_low_mappings(0);
278
279#ifdef CONFIG_ACPI 280#ifdef CONFIG_ACPI
280 /* 281 /*
281 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). 282 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -329,15 +330,8 @@ void __init setup_arch(char **cmdline_p)
329#endif 330#endif
330 331
331#ifdef CONFIG_SMP 332#ifdef CONFIG_SMP
332 /*
333 * But first pinch a few for the stack/trampoline stuff
334 * FIXME: Don't need the extra page at 4K, but need to fix
335 * trampoline before removing it. (see the GDT stuff)
336 */
337 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
338
339 /* Reserve SMP trampoline */ 333 /* Reserve SMP trampoline */
340 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); 334 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
341#endif 335#endif
342 336
343#ifdef CONFIG_ACPI_SLEEP 337#ifdef CONFIG_ACPI_SLEEP
@@ -612,6 +606,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
612 606
613 /* RDTSC can be speculated around */ 607 /* RDTSC can be speculated around */
614 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 608 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
609
610 /* Family 10 doesn't support C states in MWAIT so don't use it */
611 if (c->x86 == 0x10 && !force_mwait)
612 clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
615} 613}
616 614
617static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 615static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -987,9 +985,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
987 "stc", 985 "stc",
988 "100mhzsteps", 986 "100mhzsteps",
989 "hwpstate", 987 "hwpstate",
990 NULL, /* tsc invariant mapped to constant_tsc */ 988 "", /* tsc invariant mapped to constant_tsc */
991 NULL, 989 /* nothing */
992 /* nothing */ /* constant_tsc - moved to flags */
993 }; 990 };
994 991
995 992
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6a70b55f719d..64379a80d763 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void)
103 if (!NODE_DATA(cpu_to_node(i))) { 103 if (!NODE_DATA(cpu_to_node(i))) {
104 printk("cpu with no node %d, num_online_nodes %d\n", 104 printk("cpu with no node %d, num_online_nodes %d\n",
105 i, num_online_nodes()); 105 i, num_online_nodes());
106 ptr = alloc_bootmem(size); 106 ptr = alloc_bootmem_pages(size);
107 } else { 107 } else {
108 ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); 108 ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
109 } 109 }
110 if (!ptr) 110 if (!ptr)
111 panic("Cannot allocate cpu data for CPU %d\n", i); 111 panic("Cannot allocate cpu data for CPU %d\n", i);
@@ -201,7 +201,6 @@ void __cpuinit cpu_init (void)
201 /* CPU 0 is initialised in head64.c */ 201 /* CPU 0 is initialised in head64.c */
202 if (cpu != 0) { 202 if (cpu != 0) {
203 pda_init(cpu); 203 pda_init(cpu);
204 zap_low_mappings(cpu);
205 } else 204 } else
206 estacks = boot_exception_stacks; 205 estacks = boot_exception_stacks;
207 206
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 49ec324cd141..c819625f3316 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -141,7 +141,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
141 goto badframe; 141 goto badframe;
142 142
143#ifdef DEBUG_SIG 143#ifdef DEBUG_SIG
144 printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax); 144 printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
145#endif 145#endif
146 146
147 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT) 147 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
@@ -301,7 +301,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
301 if (test_thread_flag(TIF_SINGLESTEP)) 301 if (test_thread_flag(TIF_SINGLESTEP))
302 ptrace_notify(SIGTRAP); 302 ptrace_notify(SIGTRAP);
303#ifdef DEBUG_SIG 303#ifdef DEBUG_SIG
304 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 304 printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
305 current->comm, current->pid, frame, regs->rip, frame->pretcode); 305 current->comm, current->pid, frame, regs->rip, frame->pretcode);
306#endif 306#endif
307 307
@@ -463,7 +463,7 @@ void
463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
464{ 464{
465#ifdef DEBUG_SIG 465#ifdef DEBUG_SIG
466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", 466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
467 thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); 467 thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
468#endif 468#endif
469 469
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index af1ec4d23cf8..22abae4e9f39 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu)
76 if (read_pda(mmu_state) == TLBSTATE_OK) 76 if (read_pda(mmu_state) == TLBSTATE_OK)
77 BUG(); 77 BUG();
78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
79 load_cr3(swapper_pg_dir); 79 load_cr3(init_mm.pgd);
80} 80}
81 81
82/* 82/*
@@ -452,42 +452,34 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
452} 452}
453EXPORT_SYMBOL(smp_call_function); 453EXPORT_SYMBOL(smp_call_function);
454 454
455void smp_stop_cpu(void) 455static void stop_this_cpu(void *dummy)
456{ 456{
457 unsigned long flags; 457 local_irq_disable();
458 /* 458 /*
459 * Remove this CPU: 459 * Remove this CPU:
460 */ 460 */
461 cpu_clear(smp_processor_id(), cpu_online_map); 461 cpu_clear(smp_processor_id(), cpu_online_map);
462 local_irq_save(flags);
463 disable_local_APIC(); 462 disable_local_APIC();
464 local_irq_restore(flags);
465}
466
467static void smp_really_stop_cpu(void *dummy)
468{
469 smp_stop_cpu();
470 for (;;) 463 for (;;)
471 halt(); 464 halt();
472} 465}
473 466
474void smp_send_stop(void) 467void smp_send_stop(void)
475{ 468{
476 int nolock = 0; 469 int nolock;
470 unsigned long flags;
471
477 if (reboot_force) 472 if (reboot_force)
478 return; 473 return;
474
479 /* Don't deadlock on the call lock in panic */ 475 /* Don't deadlock on the call lock in panic */
480 if (!spin_trylock(&call_lock)) { 476 nolock = !spin_trylock(&call_lock);
481 /* ignore locking because we have panicked anyways */ 477 local_irq_save(flags);
482 nolock = 1; 478 __smp_call_function(stop_this_cpu, NULL, 0, 0);
483 }
484 __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
485 if (!nolock) 479 if (!nolock)
486 spin_unlock(&call_lock); 480 spin_unlock(&call_lock);
487
488 local_irq_disable();
489 disable_local_APIC(); 481 disable_local_APIC();
490 local_irq_enable(); 482 local_irq_restore(flags);
491} 483}
492 484
493/* 485/*
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index cd4643a37022..4d9dacfae575 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,7 +60,6 @@
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/hw_irq.h> 61#include <asm/hw_irq.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/genapic.h>
64 63
65/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
66int smp_num_siblings = 1; 65int smp_num_siblings = 1;
@@ -68,7 +67,6 @@ EXPORT_SYMBOL(smp_num_siblings);
68 67
69/* Last level cache ID of each logical CPU */ 68/* Last level cache ID of each logical CPU */
70u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 69u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
71EXPORT_SYMBOL(cpu_llc_id);
72 70
73/* Bitmask of currently online CPUs */ 71/* Bitmask of currently online CPUs */
74cpumask_t cpu_online_map __read_mostly; 72cpumask_t cpu_online_map __read_mostly;
@@ -392,7 +390,8 @@ static void inquire_remote_apic(int apicid)
392{ 390{
393 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 391 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
394 char *names[] = { "ID", "VERSION", "SPIV" }; 392 char *names[] = { "ID", "VERSION", "SPIV" };
395 int timeout, status; 393 int timeout;
394 unsigned int status;
396 395
397 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); 396 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
398 397
@@ -402,7 +401,9 @@ static void inquire_remote_apic(int apicid)
402 /* 401 /*
403 * Wait for idle. 402 * Wait for idle.
404 */ 403 */
405 apic_wait_icr_idle(); 404 status = safe_apic_wait_icr_idle();
405 if (status)
406 printk("a previous APIC delivery may have failed\n");
406 407
407 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 408 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
408 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); 409 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -430,8 +431,8 @@ static void inquire_remote_apic(int apicid)
430 */ 431 */
431static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) 432static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
432{ 433{
433 unsigned long send_status = 0, accept_status = 0; 434 unsigned long send_status, accept_status = 0;
434 int maxlvt, timeout, num_starts, j; 435 int maxlvt, num_starts, j;
435 436
436 Dprintk("Asserting INIT.\n"); 437 Dprintk("Asserting INIT.\n");
437 438
@@ -447,12 +448,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
447 | APIC_DM_INIT); 448 | APIC_DM_INIT);
448 449
449 Dprintk("Waiting for send to finish...\n"); 450 Dprintk("Waiting for send to finish...\n");
450 timeout = 0; 451 send_status = safe_apic_wait_icr_idle();
451 do {
452 Dprintk("+");
453 udelay(100);
454 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
455 } while (send_status && (timeout++ < 1000));
456 452
457 mdelay(10); 453 mdelay(10);
458 454
@@ -465,12 +461,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
465 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 461 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
466 462
467 Dprintk("Waiting for send to finish...\n"); 463 Dprintk("Waiting for send to finish...\n");
468 timeout = 0; 464 send_status = safe_apic_wait_icr_idle();
469 do {
470 Dprintk("+");
471 udelay(100);
472 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
473 } while (send_status && (timeout++ < 1000));
474 465
475 mb(); 466 mb();
476 atomic_set(&init_deasserted, 1); 467 atomic_set(&init_deasserted, 1);
@@ -509,12 +500,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
509 Dprintk("Startup point 1.\n"); 500 Dprintk("Startup point 1.\n");
510 501
511 Dprintk("Waiting for send to finish...\n"); 502 Dprintk("Waiting for send to finish...\n");
512 timeout = 0; 503 send_status = safe_apic_wait_icr_idle();
513 do {
514 Dprintk("+");
515 udelay(100);
516 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
517 } while (send_status && (timeout++ < 1000));
518 504
519 /* 505 /*
520 * Give the other CPU some time to accept the IPI. 506 * Give the other CPU some time to accept the IPI.
@@ -945,6 +931,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
945 return -ENOSYS; 931 return -ENOSYS;
946 } 932 }
947 933
934 /*
935 * Save current MTRR state in case it was changed since early boot
936 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
937 */
938 mtrr_save_state();
939
948 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 940 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
949 /* Boot it! */ 941 /* Boot it! */
950 err = do_boot_cpu(cpu, apicid); 942 err = do_boot_cpu(cpu, apicid);
@@ -965,13 +957,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
965 957
966 while (!cpu_isset(cpu, cpu_online_map)) 958 while (!cpu_isset(cpu, cpu_online_map))
967 cpu_relax(); 959 cpu_relax();
968
969 if (num_online_cpus() > 8 && genapic == &apic_flat) {
970 printk(KERN_WARNING
971 "flat APIC routing can't be used with > 8 cpus\n");
972 BUG();
973 }
974
975 err = 0; 960 err = 0;
976 961
977 return err; 962 return err;
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 91f7e678bae7..6a5a98f2a75c 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -12,6 +12,10 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mtrr.h>
16
17/* References to section boundaries */
18extern const void __nosave_begin, __nosave_end;
15 19
16struct saved_context saved_context; 20struct saved_context saved_context;
17 21
@@ -33,7 +37,6 @@ void __save_processor_state(struct saved_context *ctxt)
33 asm volatile ("str %0" : "=m" (ctxt->tr)); 37 asm volatile ("str %0" : "=m" (ctxt->tr));
34 38
35 /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ 39 /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
36 /* EFER should be constant for kernel version, no need to handle it. */
37 /* 40 /*
38 * segment registers 41 * segment registers
39 */ 42 */
@@ -46,10 +49,12 @@ void __save_processor_state(struct saved_context *ctxt)
46 rdmsrl(MSR_FS_BASE, ctxt->fs_base); 49 rdmsrl(MSR_FS_BASE, ctxt->fs_base);
47 rdmsrl(MSR_GS_BASE, ctxt->gs_base); 50 rdmsrl(MSR_GS_BASE, ctxt->gs_base);
48 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 51 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
52 mtrr_save_fixed_ranges(NULL);
49 53
50 /* 54 /*
51 * control registers 55 * control registers
52 */ 56 */
57 rdmsrl(MSR_EFER, ctxt->efer);
53 asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0)); 58 asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0));
54 asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); 59 asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2));
55 asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); 60 asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
@@ -75,6 +80,7 @@ void __restore_processor_state(struct saved_context *ctxt)
75 /* 80 /*
76 * control registers 81 * control registers
77 */ 82 */
83 wrmsrl(MSR_EFER, ctxt->efer);
78 asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); 84 asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8));
79 asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); 85 asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4));
80 asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); 86 asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3));
@@ -219,4 +225,15 @@ int swsusp_arch_resume(void)
219 restore_image(); 225 restore_image();
220 return 0; 226 return 0;
221} 227}
228
229/*
230 * pfn_is_nosave - check if given pfn is in the 'nosave' section
231 */
232
233int pfn_is_nosave(unsigned long pfn)
234{
235 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
236 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
237 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
238}
222#endif /* CONFIG_SOFTWARE_SUSPEND */ 239#endif /* CONFIG_SOFTWARE_SUSPEND */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index bfbe00763c68..16d183f67bc1 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -71,9 +71,10 @@ loop:
71 jmp loop 71 jmp loop
72done: 72done:
73 /* go back to the original page tables */ 73 /* go back to the original page tables */
74 leaq init_level4_pgt(%rip), %rax 74 movq $(init_level4_pgt - __START_KERNEL_map), %rax
75 subq $__START_KERNEL_map, %rax 75 addq phys_base(%rip), %rax
76 movq %rax, %cr3 76 movq %rax, %cr3
77
77 /* Flush TLB, including "global" things (vmalloc) */ 78 /* Flush TLB, including "global" things (vmalloc) */
78 movq mmu_cr4_features(%rip), %rax 79 movq mmu_cr4_features(%rip), %rax
79 movq %rax, %rdx 80 movq %rax, %rdx
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
index 213fd6ab789d..63d592c276cc 100644
--- a/arch/x86_64/kernel/syscall.c
+++ b/arch/x86_64/kernel/syscall.c
@@ -3,6 +3,7 @@
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <linux/sys.h> 4#include <linux/sys.h>
5#include <linux/cache.h> 5#include <linux/cache.h>
6#include <asm/asm-offsets.h>
6 7
7#define __NO_STUBS 8#define __NO_STUBS
8 9
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 75d73a9aa9ff..0652e173813b 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -39,13 +39,11 @@
39#include <asm/proto.h> 39#include <asm/proto.h>
40#include <asm/hpet.h> 40#include <asm/hpet.h>
41#include <asm/sections.h> 41#include <asm/sections.h>
42#include <linux/cpufreq.h>
43#include <linux/hpet.h> 42#include <linux/hpet.h>
44#include <asm/apic.h> 43#include <asm/apic.h>
45#include <asm/hpet.h> 44#include <asm/hpet.h>
46 45#include <asm/mpspec.h>
47extern void i8254_timer_resume(void); 46#include <asm/nmi.h>
48extern int using_apic_timer;
49 47
50static char *timename = NULL; 48static char *timename = NULL;
51 49
@@ -252,6 +250,51 @@ static unsigned long get_cmos_time(void)
252 return mktime(year, mon, day, hour, min, sec); 250 return mktime(year, mon, day, hour, min, sec);
253} 251}
254 252
253/* calibrate_cpu is used on systems with fixed rate TSCs to determine
254 * processor frequency */
255#define TICK_COUNT 100000000
256static unsigned int __init tsc_calibrate_cpu_khz(void)
257{
258 int tsc_start, tsc_now;
259 int i, no_ctr_free;
260 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
261 unsigned long flags;
262
263 for (i = 0; i < 4; i++)
264 if (avail_to_resrv_perfctr_nmi_bit(i))
265 break;
266 no_ctr_free = (i == 4);
267 if (no_ctr_free) {
268 i = 3;
269 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
270 wrmsrl(MSR_K7_EVNTSEL3, 0);
271 rdmsrl(MSR_K7_PERFCTR3, pmc3);
272 } else {
273 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
274 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
275 }
276 local_irq_save(flags);
277 /* start meauring cycles, incrementing from 0 */
278 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
279 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
280 rdtscl(tsc_start);
281 do {
282 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
283 tsc_now = get_cycles_sync();
284 } while ((tsc_now - tsc_start) < TICK_COUNT);
285
286 local_irq_restore(flags);
287 if (no_ctr_free) {
288 wrmsrl(MSR_K7_EVNTSEL3, 0);
289 wrmsrl(MSR_K7_PERFCTR3, pmc3);
290 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
291 } else {
292 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
293 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
294 }
295
296 return pmc_now * tsc_khz / (tsc_now - tsc_start);
297}
255 298
256/* 299/*
257 * pit_calibrate_tsc() uses the speaker output (channel 2) of 300 * pit_calibrate_tsc() uses the speaker output (channel 2) of
@@ -285,7 +328,7 @@ static unsigned int __init pit_calibrate_tsc(void)
285#define PIT_MODE 0x43 328#define PIT_MODE 0x43
286#define PIT_CH0 0x40 329#define PIT_CH0 0x40
287 330
288static void __init __pit_init(int val, u8 mode) 331static void __pit_init(int val, u8 mode)
289{ 332{
290 unsigned long flags; 333 unsigned long flags;
291 334
@@ -301,12 +344,12 @@ void __init pit_init(void)
301 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ 344 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
302} 345}
303 346
304void __init pit_stop_interrupt(void) 347void pit_stop_interrupt(void)
305{ 348{
306 __pit_init(0, 0x30); /* mode 0 */ 349 __pit_init(0, 0x30); /* mode 0 */
307} 350}
308 351
309void __init stop_timer_interrupt(void) 352void stop_timer_interrupt(void)
310{ 353{
311 char *name; 354 char *name;
312 if (hpet_address) { 355 if (hpet_address) {
@@ -339,23 +382,29 @@ void __init time_init(void)
339 if (hpet_use_timer) { 382 if (hpet_use_timer) {
340 /* set tick_nsec to use the proper rate for HPET */ 383 /* set tick_nsec to use the proper rate for HPET */
341 tick_nsec = TICK_NSEC_HPET; 384 tick_nsec = TICK_NSEC_HPET;
342 cpu_khz = hpet_calibrate_tsc(); 385 tsc_khz = hpet_calibrate_tsc();
343 timename = "HPET"; 386 timename = "HPET";
344 } else { 387 } else {
345 pit_init(); 388 pit_init();
346 cpu_khz = pit_calibrate_tsc(); 389 tsc_khz = pit_calibrate_tsc();
347 timename = "PIT"; 390 timename = "PIT";
348 } 391 }
349 392
393 cpu_khz = tsc_khz;
394 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
395 boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
396 boot_cpu_data.x86 == 16)
397 cpu_khz = tsc_calibrate_cpu_khz();
398
350 if (unsynchronized_tsc()) 399 if (unsynchronized_tsc())
351 mark_tsc_unstable(); 400 mark_tsc_unstable("TSCs unsynchronized");
352 401
353 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 402 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
354 vgetcpu_mode = VGETCPU_RDTSCP; 403 vgetcpu_mode = VGETCPU_RDTSCP;
355 else 404 else
356 vgetcpu_mode = VGETCPU_LSL; 405 vgetcpu_mode = VGETCPU_LSL;
357 406
358 set_cyc2ns_scale(cpu_khz); 407 set_cyc2ns_scale(tsc_khz);
359 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 408 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
360 cpu_khz / 1000, cpu_khz % 1000); 409 cpu_khz / 1000, cpu_khz % 1000);
361 init_tsc_clocksource(); 410 init_tsc_clocksource();
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index c79b99a9e2f6..e7e2764c461b 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -3,6 +3,7 @@
3 * Trampoline.S Derived from Setup.S by Linus Torvalds 3 * Trampoline.S Derived from Setup.S by Linus Torvalds
4 * 4 *
5 * 4 Jan 1997 Michael Chastain: changed to gnu as. 5 * 4 Jan 1997 Michael Chastain: changed to gnu as.
6 * 15 Sept 2005 Eric Biederman: 64bit PIC support
6 * 7 *
7 * Entry: CS:IP point to the start of our code, we are 8 * Entry: CS:IP point to the start of our code, we are
8 * in real mode with no stack, but the rest of the 9 * in real mode with no stack, but the rest of the
@@ -17,15 +18,20 @@
17 * and IP is zero. Thus, data addresses need to be absolute 18 * and IP is zero. Thus, data addresses need to be absolute
18 * (no relocation) and are taken with regard to r_base. 19 * (no relocation) and are taken with regard to r_base.
19 * 20 *
21 * With the addition of trampoline_level4_pgt this code can
22 * now enter a 64bit kernel that lives at arbitrary 64bit
23 * physical addresses.
24 *
20 * If you work on this file, check the object module with objdump 25 * If you work on this file, check the object module with objdump
21 * --full-contents --reloc to make sure there are no relocation 26 * --full-contents --reloc to make sure there are no relocation
22 * entries. For the GDT entry we do hand relocation in smpboot.c 27 * entries.
23 * because of 64bit linker limitations.
24 */ 28 */
25 29
26#include <linux/linkage.h> 30#include <linux/linkage.h>
27#include <asm/segment.h> 31#include <asm/pgtable.h>
28#include <asm/page.h> 32#include <asm/page.h>
33#include <asm/msr.h>
34#include <asm/segment.h>
29 35
30.data 36.data
31 37
@@ -33,15 +39,33 @@
33 39
34ENTRY(trampoline_data) 40ENTRY(trampoline_data)
35r_base = . 41r_base = .
42 cli # We should be safe anyway
36 wbinvd 43 wbinvd
37 mov %cs, %ax # Code and data in the same place 44 mov %cs, %ax # Code and data in the same place
38 mov %ax, %ds 45 mov %ax, %ds
46 mov %ax, %es
47 mov %ax, %ss
39 48
40 cli # We should be safe anyway
41 49
42 movl $0xA5A5A5A5, trampoline_data - r_base 50 movl $0xA5A5A5A5, trampoline_data - r_base
43 # write marker for master knows we're running 51 # write marker for master knows we're running
44 52
53 # Setup stack
54 movw $(trampoline_stack_end - r_base), %sp
55
56 call verify_cpu # Verify the cpu supports long mode
57 testl %eax, %eax # Check for return code
58 jnz no_longmode
59
60 mov %cs, %ax
61 movzx %ax, %esi # Find the 32bit trampoline location
62 shll $4, %esi
63
64 # Fixup the vectors
65 addl %esi, startup_32_vector - r_base
66 addl %esi, startup_64_vector - r_base
67 addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
68
45 /* 69 /*
46 * GDT tables in non default location kernel can be beyond 16MB and 70 * GDT tables in non default location kernel can be beyond 16MB and
47 * lgdt will not be able to load the address as in real mode default 71 * lgdt will not be able to load the address as in real mode default
@@ -49,23 +73,94 @@ r_base = .
49 * to 32 bit. 73 * to 32 bit.
50 */ 74 */
51 75
52 lidtl idt_48 - r_base # load idt with 0, 0 76 lidtl tidt - r_base # load idt with 0, 0
53 lgdtl gdt_48 - r_base # load gdt with whatever is appropriate 77 lgdtl tgdt - r_base # load gdt with whatever is appropriate
54 78
55 xor %ax, %ax 79 xor %ax, %ax
56 inc %ax # protected mode (PE) bit 80 inc %ax # protected mode (PE) bit
57 lmsw %ax # into protected mode 81 lmsw %ax # into protected mode
58 # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S 82
59 ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map) 83 # flush prefetch and jump to startup_32
84 ljmpl *(startup_32_vector - r_base)
85
86 .code32
87 .balign 4
88startup_32:
89 movl $__KERNEL_DS, %eax # Initialize the %ds segment register
90 movl %eax, %ds
91
92 xorl %eax, %eax
93 btsl $5, %eax # Enable PAE mode
94 movl %eax, %cr4
95
96 # Setup trampoline 4 level pagetables
97 leal (trampoline_level4_pgt - r_base)(%esi), %eax
98 movl %eax, %cr3
99
100 movl $MSR_EFER, %ecx
101 movl $(1 << _EFER_LME), %eax # Enable Long Mode
102 xorl %edx, %edx
103 wrmsr
104
105 xorl %eax, %eax
106 btsl $31, %eax # Enable paging and in turn activate Long Mode
107 btsl $0, %eax # Enable protected mode
108 movl %eax, %cr0
109
110 /*
111 * At this point we're in long mode but in 32bit compatibility mode
112 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
113 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
114 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
115 */
116 ljmp *(startup_64_vector - r_base)(%esi)
117
118 .code64
119 .balign 4
120startup_64:
121 # Now jump into the kernel using virtual addresses
122 movq $secondary_startup_64, %rax
123 jmp *%rax
124
125 .code16
126no_longmode:
127 hlt
128 jmp no_longmode
129#include "verify_cpu.S"
60 130
61 # Careful these need to be in the same 64K segment as the above; 131 # Careful these need to be in the same 64K segment as the above;
62idt_48: 132tidt:
63 .word 0 # idt limit = 0 133 .word 0 # idt limit = 0
64 .word 0, 0 # idt base = 0L 134 .word 0, 0 # idt base = 0L
65 135
66gdt_48: 136 # Duplicate the global descriptor table
67 .short GDT_ENTRIES*8 - 1 # gdt limit 137 # so the kernel can live anywhere
68 .long cpu_gdt_table-__START_KERNEL_map 138 .balign 4
139tgdt:
140 .short tgdt_end - tgdt # gdt limit
141 .long tgdt - r_base
142 .short 0
143 .quad 0x00cf9b000000ffff # __KERNEL32_CS
144 .quad 0x00af9b000000ffff # __KERNEL_CS
145 .quad 0x00cf93000000ffff # __KERNEL_DS
146tgdt_end:
147
148 .balign 4
149startup_32_vector:
150 .long startup_32 - r_base
151 .word __KERNEL32_CS, 0
152
153 .balign 4
154startup_64_vector:
155 .long startup_64 - r_base
156 .word __KERNEL_CS, 0
157
158trampoline_stack:
159 .org 0x1000
160trampoline_stack_end:
161ENTRY(trampoline_level4_pgt)
162 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
163 .fill 510,8,0
164 .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
69 165
70.globl trampoline_end 166ENTRY(trampoline_end)
71trampoline_end:
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 09d2e8a10a49..d76fc32d4599 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -426,8 +426,7 @@ void show_registers(struct pt_regs *regs)
426 const int cpu = smp_processor_id(); 426 const int cpu = smp_processor_id();
427 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 427 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
428 428
429 rsp = regs->rsp; 429 rsp = regs->rsp;
430
431 printk("CPU %d ", cpu); 430 printk("CPU %d ", cpu);
432 __show_regs(regs); 431 __show_regs(regs);
433 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 432 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -438,7 +437,6 @@ void show_registers(struct pt_regs *regs)
438 * time of the fault.. 437 * time of the fault..
439 */ 438 */
440 if (in_kernel) { 439 if (in_kernel) {
441
442 printk("Stack: "); 440 printk("Stack: ");
443 _show_stack(NULL, regs, (unsigned long*)rsp); 441 _show_stack(NULL, regs, (unsigned long*)rsp);
444 442
@@ -581,10 +579,20 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
581{ 579{
582 struct task_struct *tsk = current; 580 struct task_struct *tsk = current;
583 581
584 tsk->thread.error_code = error_code;
585 tsk->thread.trap_no = trapnr;
586
587 if (user_mode(regs)) { 582 if (user_mode(regs)) {
583 /*
584 * We want error_code and trap_no set for userspace
585 * faults and kernelspace faults which result in
586 * die(), but not kernelspace faults which are fixed
587 * up. die() gives the process no chance to handle
588 * the signal and notice the kernel fault information,
589 * so that won't result in polluting the information
590 * about previously queued, but not yet delivered,
591 * faults. See also do_general_protection below.
592 */
593 tsk->thread.error_code = error_code;
594 tsk->thread.trap_no = trapnr;
595
588 if (exception_trace && unhandled_signal(tsk, signr)) 596 if (exception_trace && unhandled_signal(tsk, signr))
589 printk(KERN_INFO 597 printk(KERN_INFO
590 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", 598 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
@@ -605,8 +613,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
605 fixup = search_exception_tables(regs->rip); 613 fixup = search_exception_tables(regs->rip);
606 if (fixup) 614 if (fixup)
607 regs->rip = fixup->fixup; 615 regs->rip = fixup->fixup;
608 else 616 else {
617 tsk->thread.error_code = error_code;
618 tsk->thread.trap_no = trapnr;
609 die(str, regs, error_code); 619 die(str, regs, error_code);
620 }
610 return; 621 return;
611 } 622 }
612} 623}
@@ -682,10 +693,10 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
682 693
683 conditional_sti(regs); 694 conditional_sti(regs);
684 695
685 tsk->thread.error_code = error_code;
686 tsk->thread.trap_no = 13;
687
688 if (user_mode(regs)) { 696 if (user_mode(regs)) {
697 tsk->thread.error_code = error_code;
698 tsk->thread.trap_no = 13;
699
689 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) 700 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
690 printk(KERN_INFO 701 printk(KERN_INFO
691 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", 702 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
@@ -704,6 +715,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
704 regs->rip = fixup->fixup; 715 regs->rip = fixup->fixup;
705 return; 716 return;
706 } 717 }
718
719 tsk->thread.error_code = error_code;
720 tsk->thread.trap_no = 13;
707 if (notify_die(DIE_GPF, "general protection fault", regs, 721 if (notify_die(DIE_GPF, "general protection fault", regs,
708 error_code, 13, SIGSEGV) == NOTIFY_STOP) 722 error_code, 13, SIGSEGV) == NOTIFY_STOP)
709 return; 723 return;
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index 1a0edbbffaa0..48f9a8e6aa91 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -13,6 +13,8 @@ static int notsc __initdata = 0;
13 13
14unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 14unsigned int cpu_khz; /* TSC clocks / usec, not used here */
15EXPORT_SYMBOL(cpu_khz); 15EXPORT_SYMBOL(cpu_khz);
16unsigned int tsc_khz;
17EXPORT_SYMBOL(tsc_khz);
16 18
17static unsigned int cyc2ns_scale __read_mostly; 19static unsigned int cyc2ns_scale __read_mostly;
18 20
@@ -77,7 +79,7 @@ static void handle_cpufreq_delayed_get(struct work_struct *v)
77static unsigned int ref_freq = 0; 79static unsigned int ref_freq = 0;
78static unsigned long loops_per_jiffy_ref = 0; 80static unsigned long loops_per_jiffy_ref = 0;
79 81
80static unsigned long cpu_khz_ref = 0; 82static unsigned long tsc_khz_ref = 0;
81 83
82static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 84static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
83 void *data) 85 void *data)
@@ -99,7 +101,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
99 if (!ref_freq) { 101 if (!ref_freq) {
100 ref_freq = freq->old; 102 ref_freq = freq->old;
101 loops_per_jiffy_ref = *lpj; 103 loops_per_jiffy_ref = *lpj;
102 cpu_khz_ref = cpu_khz; 104 tsc_khz_ref = tsc_khz;
103 } 105 }
104 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 106 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
105 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || 107 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
@@ -107,12 +109,12 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
107 *lpj = 109 *lpj =
108 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 110 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
109 111
110 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); 112 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
111 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 113 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
112 mark_tsc_unstable(); 114 mark_tsc_unstable("cpufreq changes");
113 } 115 }
114 116
115 set_cyc2ns_scale(cpu_khz_ref); 117 set_cyc2ns_scale(tsc_khz_ref);
116 118
117 return 0; 119 return 0;
118} 120}
@@ -197,10 +199,11 @@ static struct clocksource clocksource_tsc = {
197 .vread = vread_tsc, 199 .vread = vread_tsc,
198}; 200};
199 201
200void mark_tsc_unstable(void) 202void mark_tsc_unstable(char *reason)
201{ 203{
202 if (!tsc_unstable) { 204 if (!tsc_unstable) {
203 tsc_unstable = 1; 205 tsc_unstable = 1;
206 printk("Marking TSC unstable due to %s\n", reason);
204 /* Change only the rating, when not registered */ 207 /* Change only the rating, when not registered */
205 if (clocksource_tsc.mult) 208 if (clocksource_tsc.mult)
206 clocksource_change_rating(&clocksource_tsc, 0); 209 clocksource_change_rating(&clocksource_tsc, 0);
@@ -213,7 +216,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
213void __init init_tsc_clocksource(void) 216void __init init_tsc_clocksource(void)
214{ 217{
215 if (!notsc) { 218 if (!notsc) {
216 clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, 219 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
217 clocksource_tsc.shift); 220 clocksource_tsc.shift);
218 if (check_tsc_unstable()) 221 if (check_tsc_unstable())
219 clocksource_tsc.rating = 0; 222 clocksource_tsc.rating = 0;
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c
index 014f0db45dfa..355f5f506c81 100644
--- a/arch/x86_64/kernel/tsc_sync.c
+++ b/arch/x86_64/kernel/tsc_sync.c
@@ -50,7 +50,7 @@ static __cpuinit void check_tsc_warp(void)
50 /* 50 /*
51 * The measurement runs for 20 msecs: 51 * The measurement runs for 20 msecs:
52 */ 52 */
53 end = start + cpu_khz * 20ULL; 53 end = start + tsc_khz * 20ULL;
54 now = start; 54 now = start;
55 55
56 for (i = 0; ; i++) { 56 for (i = 0; ; i++) {
@@ -138,7 +138,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
138 printk("\n"); 138 printk("\n");
139 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," 139 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
140 " turning off TSC clock.\n", max_warp); 140 " turning off TSC clock.\n", max_warp);
141 mark_tsc_unstable(); 141 mark_tsc_unstable("check_tsc_sync_source failed");
142 nr_warps = 0; 142 nr_warps = 0;
143 max_warp = 0; 143 max_warp = 0;
144 last_tsc = 0; 144 last_tsc = 0;
diff --git a/arch/x86_64/kernel/verify_cpu.S b/arch/x86_64/kernel/verify_cpu.S
new file mode 100644
index 000000000000..e035f5948199
--- /dev/null
+++ b/arch/x86_64/kernel/verify_cpu.S
@@ -0,0 +1,119 @@
1/*
2 *
3 * verify_cpu.S - Code for cpu long mode and SSE verification. This
4 * code has been borrowed from boot/setup.S and was introduced by
5 * Andi Kleen.
6 *
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 *
11 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details.
13 *
14 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context.
17 * Following are the current usage.
18 *
19 * This file is included by both 16bit and 32bit code.
20 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure
28 *
29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt.
31 */
32
33#include <asm/cpufeature.h>
34
35verify_cpu:
36 pushfl # Save caller passed flags
37 pushl $0 # Kill any dangerous flags
38 popfl
39
40 /* minimum CPUID flags for x86-64 as defined by AMD */
41#define M(x) (1<<(x))
42#define M2(a,b) M(a)|M(b)
43#define M4(a,b,c,d) M(a)|M(b)|M(c)|M(d)
44
45#define SSE_MASK \
46 (M2(X86_FEATURE_XMM,X86_FEATURE_XMM2))
47#define REQUIRED_MASK1 \
48 (M4(X86_FEATURE_FPU,X86_FEATURE_PSE,X86_FEATURE_TSC,X86_FEATURE_MSR)|\
49 M4(X86_FEATURE_PAE,X86_FEATURE_CX8,X86_FEATURE_PGE,X86_FEATURE_CMOV)|\
50 M(X86_FEATURE_FXSR))
51#define REQUIRED_MASK2 \
52 (M(X86_FEATURE_LM - 32))
53
54 pushfl # standard way to check for cpuid
55 popl %eax
56 movl %eax,%ebx
57 xorl $0x200000,%eax
58 pushl %eax
59 popfl
60 pushfl
61 popl %eax
62 cmpl %eax,%ebx
63 jz verify_cpu_no_longmode # cpu has no cpuid
64
65 movl $0x0,%eax # See if cpuid 1 is implemented
66 cpuid
67 cmpl $0x1,%eax
68 jb verify_cpu_no_longmode # no cpuid 1
69
70 xor %di,%di
71 cmpl $0x68747541,%ebx # AuthenticAMD
72 jnz verify_cpu_noamd
73 cmpl $0x69746e65,%edx
74 jnz verify_cpu_noamd
75 cmpl $0x444d4163,%ecx
76 jnz verify_cpu_noamd
77 mov $1,%di # cpu is from AMD
78
79verify_cpu_noamd:
80 movl $0x1,%eax # Does the cpu have what it takes
81 cpuid
82 andl $REQUIRED_MASK1,%edx
83 xorl $REQUIRED_MASK1,%edx
84 jnz verify_cpu_no_longmode
85
86 movl $0x80000000,%eax # See if extended cpuid is implemented
87 cpuid
88 cmpl $0x80000001,%eax
89 jb verify_cpu_no_longmode # no extended cpuid
90
91 movl $0x80000001,%eax # Does the cpu have what it takes
92 cpuid
93 andl $REQUIRED_MASK2,%edx
94 xorl $REQUIRED_MASK2,%edx
95 jnz verify_cpu_no_longmode
96
97verify_cpu_sse_test:
98 movl $1,%eax
99 cpuid
100 andl $SSE_MASK,%edx
101 cmpl $SSE_MASK,%edx
102 je verify_cpu_sse_ok
103 test %di,%di
104 jz verify_cpu_no_longmode # only try to force SSE on AMD
105 movl $0xc0010015,%ecx # HWCR
106 rdmsr
107 btr $15,%eax # enable SSE
108 wrmsr
109 xor %di,%di # don't loop
110 jmp verify_cpu_sse_test # try again
111
112verify_cpu_no_longmode:
113 popfl # Restore caller passed flags
114 movl $1,%eax
115 ret
116verify_cpu_sse_ok:
117 popfl # Restore caller passed flags
118 xorl %eax, %eax
119 ret
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 5176ecf006ee..88cfa50b424d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -29,9 +29,7 @@ SECTIONS
29 .text : AT(ADDR(.text) - LOAD_OFFSET) { 29 .text : AT(ADDR(.text) - LOAD_OFFSET) {
30 /* First the code that has to be first for bootstrapping */ 30 /* First the code that has to be first for bootstrapping */
31 *(.bootstrap.text) 31 *(.bootstrap.text)
32 /* Then all the functions that are "hot" in profiles, to group them 32 _stext = .;
33 onto the same hugetlb entry */
34 #include "functionlist"
35 /* Then the rest */ 33 /* Then the rest */
36 *(.text) 34 *(.text)
37 SCHED_TEXT 35 SCHED_TEXT
@@ -50,10 +48,10 @@ SECTIONS
50 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } 48 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
51 __stop___ex_table = .; 49 __stop___ex_table = .;
52 50
53 RODATA
54
55 BUG_TABLE 51 BUG_TABLE
56 52
53 RODATA
54
57 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 55 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
58 /* Data */ 56 /* Data */
59 .data : AT(ADDR(.data) - LOAD_OFFSET) { 57 .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -94,6 +92,12 @@ SECTIONS
94 { *(.vsyscall_gtod_data) } 92 { *(.vsyscall_gtod_data) }
95 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); 93 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
96 94
95
96 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
97 { *(.vsyscall_1) }
98 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
99 { *(.vsyscall_2) }
100
97 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } 101 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
98 vgetcpu_mode = VVIRT(.vgetcpu_mode); 102 vgetcpu_mode = VVIRT(.vgetcpu_mode);
99 103
@@ -101,10 +105,6 @@ SECTIONS
101 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } 105 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
102 jiffies = VVIRT(.jiffies); 106 jiffies = VVIRT(.jiffies);
103 107
104 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
105 { *(.vsyscall_1) }
106 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
107 { *(.vsyscall_2) }
108 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) 108 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
109 { *(.vsyscall_3) } 109 { *(.vsyscall_3) }
110 110
@@ -194,7 +194,7 @@ SECTIONS
194 __initramfs_end = .; 194 __initramfs_end = .;
195#endif 195#endif
196 196
197 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 197 . = ALIGN(4096);
198 __per_cpu_start = .; 198 __per_cpu_start = .;
199 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } 199 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
200 __per_cpu_end = .; 200 __per_cpu_end = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b43c698cf7d3..dc32cef96195 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -45,14 +45,34 @@
45 45
46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
47#define __syscall_clobber "r11","rcx","memory" 47#define __syscall_clobber "r11","rcx","memory"
48#define __pa_vsymbol(x) \
49 ({unsigned long v; \
50 extern char __vsyscall_0; \
51 asm("" : "=r" (v) : "0" (x)); \
52 ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
48 53
54/*
55 * vsyscall_gtod_data contains data that is :
56 * - readonly from vsyscalls
57 * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
58 * Try to keep this structure as small as possible to avoid cache line ping pongs
59 */
49struct vsyscall_gtod_data_t { 60struct vsyscall_gtod_data_t {
50 seqlock_t lock; 61 seqlock_t lock;
51 int sysctl_enabled; 62
52 struct timeval wall_time_tv; 63 /* open coded 'struct timespec' */
64 time_t wall_time_sec;
65 u32 wall_time_nsec;
66
67 int sysctl_enabled;
53 struct timezone sys_tz; 68 struct timezone sys_tz;
54 cycle_t offset_base; 69 struct { /* extract of a clocksource struct */
55 struct clocksource clock; 70 cycle_t (*vread)(void);
71 cycle_t cycle_last;
72 cycle_t mask;
73 u32 mult;
74 u32 shift;
75 } clock;
56}; 76};
57int __vgetcpu_mode __section_vgetcpu_mode; 77int __vgetcpu_mode __section_vgetcpu_mode;
58 78
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
68 88
69 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 89 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
70 /* copy vsyscall data */ 90 /* copy vsyscall data */
71 vsyscall_gtod_data.clock = *clock; 91 vsyscall_gtod_data.clock.vread = clock->vread;
72 vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; 92 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
73 vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; 93 vsyscall_gtod_data.clock.mask = clock->mask;
94 vsyscall_gtod_data.clock.mult = clock->mult;
95 vsyscall_gtod_data.clock.shift = clock->shift;
96 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
97 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
74 vsyscall_gtod_data.sys_tz = sys_tz; 98 vsyscall_gtod_data.sys_tz = sys_tz;
75 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 99 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
76} 100}
@@ -105,7 +129,8 @@ static __always_inline long time_syscall(long *t)
105static __always_inline void do_vgettimeofday(struct timeval * tv) 129static __always_inline void do_vgettimeofday(struct timeval * tv)
106{ 130{
107 cycle_t now, base, mask, cycle_delta; 131 cycle_t now, base, mask, cycle_delta;
108 unsigned long seq, mult, shift, nsec_delta; 132 unsigned seq;
133 unsigned long mult, shift, nsec;
109 cycle_t (*vread)(void); 134 cycle_t (*vread)(void);
110 do { 135 do {
111 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 136 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
121 mult = __vsyscall_gtod_data.clock.mult; 146 mult = __vsyscall_gtod_data.clock.mult;
122 shift = __vsyscall_gtod_data.clock.shift; 147 shift = __vsyscall_gtod_data.clock.shift;
123 148
124 *tv = __vsyscall_gtod_data.wall_time_tv; 149 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
125 150 nsec = __vsyscall_gtod_data.wall_time_nsec;
126 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 151 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
127 152
128 /* calculate interval: */ 153 /* calculate interval: */
129 cycle_delta = (now - base) & mask; 154 cycle_delta = (now - base) & mask;
130 /* convert to nsecs: */ 155 /* convert to nsecs: */
131 nsec_delta = (cycle_delta * mult) >> shift; 156 nsec += (cycle_delta * mult) >> shift;
132 157
133 /* convert to usecs and add to timespec: */ 158 while (nsec >= NSEC_PER_SEC) {
134 tv->tv_usec += nsec_delta / NSEC_PER_USEC;
135 while (tv->tv_usec > USEC_PER_SEC) {
136 tv->tv_sec += 1; 159 tv->tv_sec += 1;
137 tv->tv_usec -= USEC_PER_SEC; 160 nsec -= NSEC_PER_SEC;
138 } 161 }
162 tv->tv_usec = nsec / NSEC_PER_USEC;
139} 163}
140 164
141int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 165int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
@@ -151,11 +175,13 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
151 * unlikely */ 175 * unlikely */
152time_t __vsyscall(1) vtime(time_t *t) 176time_t __vsyscall(1) vtime(time_t *t)
153{ 177{
178 time_t result;
154 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 179 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
155 return time_syscall(t); 180 return time_syscall(t);
156 else if (t) 181 result = __vsyscall_gtod_data.wall_time_sec;
157 *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; 182 if (t)
158 return __vsyscall_gtod_data.wall_time_tv.tv_sec; 183 *t = result;
184 return result;
159} 185}
160 186
161/* Fast way to get current CPU and node. 187/* Fast way to get current CPU and node.
@@ -224,10 +250,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
224 return ret; 250 return ret;
225 /* gcc has some trouble with __va(__pa()), so just do it this 251 /* gcc has some trouble with __va(__pa()), so just do it this
226 way. */ 252 way. */
227 map1 = ioremap(__pa_symbol(&vsysc1), 2); 253 map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
228 if (!map1) 254 if (!map1)
229 return -ENOMEM; 255 return -ENOMEM;
230 map2 = ioremap(__pa_symbol(&vsysc2), 2); 256 map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
231 if (!map2) { 257 if (!map2) {
232 ret = -ENOMEM; 258 ret = -ENOMEM;
233 goto out; 259 goto out;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 6ada7231f3ab..de99dba2c515 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -585,7 +585,7 @@ do_sigbus:
585} 585}
586 586
587DEFINE_SPINLOCK(pgd_lock); 587DEFINE_SPINLOCK(pgd_lock);
588struct page *pgd_list; 588LIST_HEAD(pgd_list);
589 589
590void vmalloc_sync_all(void) 590void vmalloc_sync_all(void)
591{ 591{
@@ -605,8 +605,7 @@ void vmalloc_sync_all(void)
605 if (pgd_none(*pgd_ref)) 605 if (pgd_none(*pgd_ref))
606 continue; 606 continue;
607 spin_lock(&pgd_lock); 607 spin_lock(&pgd_lock);
608 for (page = pgd_list; page; 608 list_for_each_entry(page, &pgd_list, lru) {
609 page = (struct page *)page->index) {
610 pgd_t *pgd; 609 pgd_t *pgd;
611 pgd = (pgd_t *)page_address(page) + pgd_index(address); 610 pgd = (pgd_t *)page_address(page) + pgd_index(address);
612 if (pgd_none(*pgd)) 611 if (pgd_none(*pgd))
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index ec31534eb104..282b0a8f00ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -22,10 +22,12 @@
22#include <linux/bootmem.h> 22#include <linux/bootmem.h>
23#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
24#include <linux/pci.h> 24#include <linux/pci.h>
25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/dma-mapping.h> 27#include <linux/dma-mapping.h>
27#include <linux/module.h> 28#include <linux/module.h>
28#include <linux/memory_hotplug.h> 29#include <linux/memory_hotplug.h>
30#include <linux/nmi.h>
29 31
30#include <asm/processor.h> 32#include <asm/processor.h>
31#include <asm/system.h> 33#include <asm/system.h>
@@ -46,7 +48,7 @@
46#define Dprintk(x...) 48#define Dprintk(x...)
47#endif 49#endif
48 50
49struct dma_mapping_ops* dma_ops; 51const struct dma_mapping_ops* dma_ops;
50EXPORT_SYMBOL(dma_ops); 52EXPORT_SYMBOL(dma_ops);
51 53
52static unsigned long dma_reserve __initdata; 54static unsigned long dma_reserve __initdata;
@@ -72,6 +74,11 @@ void show_mem(void)
72 74
73 for_each_online_pgdat(pgdat) { 75 for_each_online_pgdat(pgdat) {
74 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 /* this loop can take a while with 256 GB and 4k pages
78 so update the NMI watchdog */
79 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 touch_nmi_watchdog();
81 }
75 page = pfn_to_page(pgdat->node_start_pfn + i); 82 page = pfn_to_page(pgdat->node_start_pfn + i);
76 total++; 83 total++;
77 if (PageReserved(page)) 84 if (PageReserved(page))
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167 174
168unsigned long __initdata table_start, table_end; 175unsigned long __initdata table_start, table_end;
169 176
170extern pmd_t temp_boot_pmds[]; 177static __meminit void *alloc_low_page(unsigned long *phys)
171
172static struct temp_map {
173 pmd_t *pmd;
174 void *address;
175 int allocated;
176} temp_mappings[] __initdata = {
177 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
178 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
179 {}
180};
181
182static __meminit void *alloc_low_page(int *index, unsigned long *phys)
183{ 178{
184 struct temp_map *ti; 179 unsigned long pfn = table_end++;
185 int i;
186 unsigned long pfn = table_end++, paddr;
187 void *adr; 180 void *adr;
188 181
189 if (after_bootmem) { 182 if (after_bootmem) {
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys)
194 187
195 if (pfn >= end_pfn) 188 if (pfn >= end_pfn)
196 panic("alloc_low_page: ran out of memory"); 189 panic("alloc_low_page: ran out of memory");
197 for (i = 0; temp_mappings[i].allocated; i++) { 190
198 if (!temp_mappings[i].pmd) 191 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
199 panic("alloc_low_page: ran out of temp mappings");
200 }
201 ti = &temp_mappings[i];
202 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
203 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
204 ti->allocated = 1;
205 __flush_tlb();
206 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
207 memset(adr, 0, PAGE_SIZE); 192 memset(adr, 0, PAGE_SIZE);
208 *index = i; 193 *phys = pfn * PAGE_SIZE;
209 *phys = pfn * PAGE_SIZE; 194 return adr;
210 return adr; 195}
211}
212 196
213static __meminit void unmap_low_page(int i) 197static __meminit void unmap_low_page(void *adr)
214{ 198{
215 struct temp_map *ti;
216 199
217 if (after_bootmem) 200 if (after_bootmem)
218 return; 201 return;
219 202
220 ti = &temp_mappings[i]; 203 early_iounmap(adr, PAGE_SIZE);
221 set_pmd(ti->pmd, __pmd(0));
222 ti->allocated = 0;
223} 204}
224 205
225/* Must run before zap_low_mappings */ 206/* Must run before zap_low_mappings */
226__init void *early_ioremap(unsigned long addr, unsigned long size) 207__init void *early_ioremap(unsigned long addr, unsigned long size)
227{ 208{
228 unsigned long map = round_down(addr, LARGE_PAGE_SIZE); 209 unsigned long vaddr;
229 210 pmd_t *pmd, *last_pmd;
230 /* actually usually some more */ 211 int i, pmds;
231 if (size >= LARGE_PAGE_SIZE) { 212
232 return NULL; 213 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
214 vaddr = __START_KERNEL_map;
215 pmd = level2_kernel_pgt;
216 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
217 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
218 for (i = 0; i < pmds; i++) {
219 if (pmd_present(pmd[i]))
220 goto next;
221 }
222 vaddr += addr & ~PMD_MASK;
223 addr &= PMD_MASK;
224 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
225 set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
226 __flush_tlb();
227 return (void *)vaddr;
228 next:
229 ;
233 } 230 }
234 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 231 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
235 map += LARGE_PAGE_SIZE; 232 return NULL;
236 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
237 __flush_tlb();
238 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
239} 233}
240 234
241/* To avoid virtual aliases later */ 235/* To avoid virtual aliases later */
242__init void early_iounmap(void *addr, unsigned long size) 236__init void early_iounmap(void *addr, unsigned long size)
243{ 237{
244 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) 238 unsigned long vaddr;
245 printk("early_iounmap: bad address %p\n", addr); 239 pmd_t *pmd;
246 set_pmd(temp_mappings[0].pmd, __pmd(0)); 240 int i, pmds;
247 set_pmd(temp_mappings[1].pmd, __pmd(0)); 241
242 vaddr = (unsigned long)addr;
243 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
244 pmd = level2_kernel_pgt + pmd_index(vaddr);
245 for (i = 0; i < pmds; i++)
246 pmd_clear(pmd + i);
248 __flush_tlb(); 247 __flush_tlb();
249} 248}
250 249
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
289 288
290 289
291 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { 290 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
292 int map;
293 unsigned long pmd_phys; 291 unsigned long pmd_phys;
294 pud_t *pud = pud_page + pud_index(addr); 292 pud_t *pud = pud_page + pud_index(addr);
295 pmd_t *pmd; 293 pmd_t *pmd;
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
307 continue; 305 continue;
308 } 306 }
309 307
310 pmd = alloc_low_page(&map, &pmd_phys); 308 pmd = alloc_low_page(&pmd_phys);
311 spin_lock(&init_mm.page_table_lock); 309 spin_lock(&init_mm.page_table_lock);
312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 310 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 phys_pmd_init(pmd, addr, end); 311 phys_pmd_init(pmd, addr, end);
314 spin_unlock(&init_mm.page_table_lock); 312 spin_unlock(&init_mm.page_table_lock);
315 unmap_low_page(map); 313 unmap_low_page(pmd);
316 } 314 }
317 __flush_tlb(); 315 __flush_tlb();
318} 316}
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
364 end = (unsigned long)__va(end); 362 end = (unsigned long)__va(end);
365 363
366 for (; start < end; start = next) { 364 for (; start < end; start = next) {
367 int map;
368 unsigned long pud_phys; 365 unsigned long pud_phys;
369 pgd_t *pgd = pgd_offset_k(start); 366 pgd_t *pgd = pgd_offset_k(start);
370 pud_t *pud; 367 pud_t *pud;
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
372 if (after_bootmem) 369 if (after_bootmem)
373 pud = pud_offset(pgd, start & PGDIR_MASK); 370 pud = pud_offset(pgd, start & PGDIR_MASK);
374 else 371 else
375 pud = alloc_low_page(&map, &pud_phys); 372 pud = alloc_low_page(&pud_phys);
376 373
377 next = start + PGDIR_SIZE; 374 next = start + PGDIR_SIZE;
378 if (next > end) 375 if (next > end)
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
380 phys_pud_init(pud, __pa(start), __pa(next)); 377 phys_pud_init(pud, __pa(start), __pa(next));
381 if (!after_bootmem) 378 if (!after_bootmem)
382 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 379 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
383 unmap_low_page(map); 380 unmap_low_page(pud);
384 } 381 }
385 382
386 if (!after_bootmem) 383 if (!after_bootmem)
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
388 __flush_tlb_all(); 385 __flush_tlb_all();
389} 386}
390 387
391void __cpuinit zap_low_mappings(int cpu)
392{
393 if (cpu == 0) {
394 pgd_t *pgd = pgd_offset_k(0UL);
395 pgd_clear(pgd);
396 } else {
397 /*
398 * For AP's, zap the low identity mappings by changing the cr3
399 * to init_level4_pgt and doing local flush tlb all
400 */
401 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
402 }
403 __flush_tlb_all();
404}
405
406#ifndef CONFIG_NUMA 388#ifndef CONFIG_NUMA
407void __init paging_init(void) 389void __init paging_init(void)
408{ 390{
@@ -579,15 +561,6 @@ void __init mem_init(void)
579 reservedpages << (PAGE_SHIFT-10), 561 reservedpages << (PAGE_SHIFT-10),
580 datasize >> 10, 562 datasize >> 10,
581 initsize >> 10); 563 initsize >> 10);
582
583#ifdef CONFIG_SMP
584 /*
585 * Sync boot_level4_pgt mappings with the init_level4_pgt
586 * except for the low identity mappings which are already zapped
587 * in init_level4_pgt. This sync-up is essential for AP's bringup
588 */
589 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
590#endif
591} 564}
592 565
593void free_init_pages(char *what, unsigned long begin, unsigned long end) 566void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -597,37 +570,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
597 if (begin >= end) 570 if (begin >= end)
598 return; 571 return;
599 572
600 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 573 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
601 for (addr = begin; addr < end; addr += PAGE_SIZE) { 574 for (addr = begin; addr < end; addr += PAGE_SIZE) {
602 ClearPageReserved(virt_to_page(addr)); 575 struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
603 init_page_count(virt_to_page(addr)); 576 ClearPageReserved(page);
604 memset((void *)(addr & ~(PAGE_SIZE-1)), 577 init_page_count(page);
605 POISON_FREE_INITMEM, PAGE_SIZE); 578 memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
606 free_page(addr); 579 if (addr >= __START_KERNEL_map)
580 change_page_attr_addr(addr, 1, __pgprot(0));
581 __free_page(page);
607 totalram_pages++; 582 totalram_pages++;
608 } 583 }
584 if (addr > __START_KERNEL_map)
585 global_flush_tlb();
609} 586}
610 587
611void free_initmem(void) 588void free_initmem(void)
612{ 589{
613 memset(__initdata_begin, POISON_FREE_INITDATA,
614 __initdata_end - __initdata_begin);
615 free_init_pages("unused kernel memory", 590 free_init_pages("unused kernel memory",
616 (unsigned long)(&__init_begin), 591 __pa_symbol(&__init_begin),
617 (unsigned long)(&__init_end)); 592 __pa_symbol(&__init_end));
618} 593}
619 594
620#ifdef CONFIG_DEBUG_RODATA 595#ifdef CONFIG_DEBUG_RODATA
621 596
622void mark_rodata_ro(void) 597void mark_rodata_ro(void)
623{ 598{
624 unsigned long addr = (unsigned long)__start_rodata; 599 unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size;
625 600
626 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) 601#ifdef CONFIG_HOTPLUG_CPU
627 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); 602 /* It must still be possible to apply SMP alternatives. */
603 if (num_possible_cpus() > 1)
604 start = PFN_ALIGN(__va(__pa_symbol(&_etext)));
605#endif
606 size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start;
607 change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO);
628 608
629 printk ("Write protecting the kernel read-only data: %luk\n", 609 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
630 (__end_rodata - __start_rodata) >> 10); 610 size >> 10);
631 611
632 /* 612 /*
633 * change_page_attr_addr() requires a global_flush_tlb() call after it. 613 * change_page_attr_addr() requires a global_flush_tlb() call after it.
@@ -642,7 +622,7 @@ void mark_rodata_ro(void)
642#ifdef CONFIG_BLK_DEV_INITRD 622#ifdef CONFIG_BLK_DEV_INITRD
643void free_initrd_mem(unsigned long start, unsigned long end) 623void free_initrd_mem(unsigned long start, unsigned long end)
644{ 624{
645 free_init_pages("initrd memory", start, end); 625 free_init_pages("initrd memory", __pa(start), __pa(end));
646} 626}
647#endif 627#endif
648 628
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index b5b8dba28b4e..f983c75825d0 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -49,11 +49,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
49 int found = 0; 49 int found = 0;
50 u32 reg; 50 u32 reg;
51 unsigned numnodes; 51 unsigned numnodes;
52 nodemask_t nodes_parsed;
53 unsigned dualcore = 0; 52 unsigned dualcore = 0;
54 53
55 nodes_clear(nodes_parsed);
56
57 if (!early_pci_allowed()) 54 if (!early_pci_allowed())
58 return -1; 55 return -1;
59 56
@@ -65,6 +62,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
65 62
66 reg = read_pci_config(0, nb, 0, 0x60); 63 reg = read_pci_config(0, nb, 0, 0x60);
67 numnodes = ((reg >> 4) & 0xF) + 1; 64 numnodes = ((reg >> 4) & 0xF) + 1;
65 if (numnodes <= 1)
66 return -1;
68 67
69 printk(KERN_INFO "Number of nodes %d\n", numnodes); 68 printk(KERN_INFO "Number of nodes %d\n", numnodes);
70 69
@@ -102,7 +101,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
102 nodeid, (base>>8)&3, (limit>>8) & 3); 101 nodeid, (base>>8)&3, (limit>>8) & 3);
103 return -1; 102 return -1;
104 } 103 }
105 if (node_isset(nodeid, nodes_parsed)) { 104 if (node_isset(nodeid, node_possible_map)) {
106 printk(KERN_INFO "Node %d already present. Skipping\n", 105 printk(KERN_INFO "Node %d already present. Skipping\n",
107 nodeid); 106 nodeid);
108 continue; 107 continue;
@@ -155,7 +154,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
155 154
156 prevbase = base; 155 prevbase = base;
157 156
158 node_set(nodeid, nodes_parsed); 157 node_set(nodeid, node_possible_map);
159 } 158 }
160 159
161 if (!found) 160 if (!found)
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 41b8fb069924..51548947ad3b 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -273,125 +273,213 @@ void __init numa_init_array(void)
273 273
274#ifdef CONFIG_NUMA_EMU 274#ifdef CONFIG_NUMA_EMU
275/* Numa emulation */ 275/* Numa emulation */
276int numa_fake __initdata = 0; 276#define E820_ADDR_HOLE_SIZE(start, end) \
277 (e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) << \
278 PAGE_SHIFT)
279char *cmdline __initdata;
277 280
278/* 281/*
279 * This function is used to find out if the start and end correspond to 282 * Setups up nid to range from addr to addr + size. If the end boundary is
280 * different zones. 283 * greater than max_addr, then max_addr is used instead. The return value is 0
284 * if there is additional memory left for allocation past addr and -1 otherwise.
285 * addr is adjusted to be at the end of the node.
281 */ 286 */
282int zone_cross_over(unsigned long start, unsigned long end) 287static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
288 u64 size, u64 max_addr)
283{ 289{
284 if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) && 290 int ret = 0;
285 (end >= (MAX_DMA32_PFN << PAGE_SHIFT))) 291 nodes[nid].start = *addr;
286 return 1; 292 *addr += size;
287 return 0; 293 if (*addr >= max_addr) {
294 *addr = max_addr;
295 ret = -1;
296 }
297 nodes[nid].end = *addr;
298 node_set(nid, node_possible_map);
299 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
300 nodes[nid].start, nodes[nid].end,
301 (nodes[nid].end - nodes[nid].start) >> 20);
302 return ret;
288} 303}
289 304
290static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 305/*
306 * Splits num_nodes nodes up equally starting at node_start. The return value
307 * is the number of nodes split up and addr is adjusted to be at the end of the
308 * last node allocated.
309 */
310static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
311 u64 max_addr, int node_start,
312 int num_nodes)
291{ 313{
292 int i, big; 314 unsigned int big;
293 struct bootnode nodes[MAX_NUMNODES]; 315 u64 size;
294 unsigned long sz, old_sz; 316 int i;
295 unsigned long hole_size;
296 unsigned long start, end;
297 unsigned long max_addr = (end_pfn << PAGE_SHIFT);
298
299 start = (start_pfn << PAGE_SHIFT);
300 hole_size = e820_hole_size(start, max_addr);
301 sz = (max_addr - start - hole_size) / numa_fake;
302
303 /* Kludge needed for the hash function */
304
305 old_sz = sz;
306 /*
307 * Round down to the nearest FAKE_NODE_MIN_SIZE.
308 */
309 sz &= FAKE_NODE_MIN_HASH_MASK;
310 317
318 if (num_nodes <= 0)
319 return -1;
320 if (num_nodes > MAX_NUMNODES)
321 num_nodes = MAX_NUMNODES;
322 size = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) /
323 num_nodes;
311 /* 324 /*
312 * We ensure that each node is at least 64MB big. Smaller than this 325 * Calculate the number of big nodes that can be allocated as a result
313 * size can cause VM hiccups. 326 * of consolidating the leftovers.
314 */ 327 */
315 if (sz == 0) { 328 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
316 printk(KERN_INFO "Not enough memory for %d nodes. Reducing " 329 FAKE_NODE_MIN_SIZE;
317 "the number of nodes\n", numa_fake); 330
318 numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE; 331 /* Round down to nearest FAKE_NODE_MIN_SIZE. */
319 printk(KERN_INFO "Number of fake nodes will be = %d\n", 332 size &= FAKE_NODE_MIN_HASH_MASK;
320 numa_fake); 333 if (!size) {
321 sz = FAKE_NODE_MIN_SIZE; 334 printk(KERN_ERR "Not enough memory for each node. "
335 "NUMA emulation disabled.\n");
336 return -1;
322 } 337 }
323 /* 338
324 * Find out how many nodes can get an extra NODE_MIN_SIZE granule. 339 for (i = node_start; i < num_nodes + node_start; i++) {
325 * This logic ensures the extra memory gets distributed among as many 340 u64 end = *addr + size;
326 * nodes as possible (as compared to one single node getting all that
327 * extra memory.
328 */
329 big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE;
330 printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: "
331 "%d\n",
332 (sz >> 20), (hole_size >> 20), big);
333 memset(&nodes,0,sizeof(nodes));
334 end = start;
335 for (i = 0; i < numa_fake; i++) {
336 /*
337 * In case we are not able to allocate enough memory for all
338 * the nodes, we reduce the number of fake nodes.
339 */
340 if (end >= max_addr) {
341 numa_fake = i - 1;
342 break;
343 }
344 start = nodes[i].start = end;
345 /*
346 * Final node can have all the remaining memory.
347 */
348 if (i == numa_fake-1)
349 sz = max_addr - start;
350 end = nodes[i].start + sz;
351 /*
352 * Fir "big" number of nodes get extra granule.
353 */
354 if (i < big) 341 if (i < big)
355 end += FAKE_NODE_MIN_SIZE; 342 end += FAKE_NODE_MIN_SIZE;
356 /* 343 /*
357 * Iterate over the range to ensure that this node gets at 344 * The final node can have the remaining system RAM. Other
358 * least sz amount of RAM (excluding holes) 345 * nodes receive roughly the same amount of available pages.
359 */ 346 */
360 while ((end - start - e820_hole_size(start, end)) < sz) { 347 if (i == num_nodes + node_start - 1)
361 end += FAKE_NODE_MIN_SIZE; 348 end = max_addr;
362 if (end >= max_addr) 349 else
363 break; 350 while (end - *addr - E820_ADDR_HOLE_SIZE(*addr, end) <
351 size) {
352 end += FAKE_NODE_MIN_SIZE;
353 if (end > max_addr) {
354 end = max_addr;
355 break;
356 }
357 }
358 if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
359 break;
360 }
361 return i - node_start + 1;
362}
363
364/*
365 * Splits the remaining system RAM into chunks of size. The remaining memory is
366 * always assigned to a final node and can be asymmetric. Returns the number of
367 * nodes split.
368 */
369static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
370 u64 max_addr, int node_start, u64 size)
371{
372 int i = node_start;
373 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
374 while (!setup_node_range(i++, nodes, addr, size, max_addr))
375 ;
376 return i - node_start;
377}
378
379/*
380 * Sets up the system RAM area from start_pfn to end_pfn according to the
381 * numa=fake command-line option.
382 */
383static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
384{
385 struct bootnode nodes[MAX_NUMNODES];
386 u64 addr = start_pfn << PAGE_SHIFT;
387 u64 max_addr = end_pfn << PAGE_SHIFT;
388 int num_nodes = 0;
389 int coeff_flag;
390 int coeff = -1;
391 int num = 0;
392 u64 size;
393 int i;
394
395 memset(&nodes, 0, sizeof(nodes));
396 /*
397 * If the numa=fake command-line is just a single number N, split the
398 * system RAM into N fake nodes.
399 */
400 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
401 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
402 simple_strtol(cmdline, NULL, 0));
403 if (num_nodes < 0)
404 return num_nodes;
405 goto out;
406 }
407
408 /* Parse the command line. */
409 for (coeff_flag = 0; ; cmdline++) {
410 if (*cmdline && isdigit(*cmdline)) {
411 num = num * 10 + *cmdline - '0';
412 continue;
364 } 413 }
365 /* 414 if (*cmdline == '*') {
366 * Look at the next node to make sure there is some real memory 415 if (num > 0)
367 * to map. Bad things happen when the only memory present 416 coeff = num;
368 * in a zone on a fake node is IO hole. 417 coeff_flag = 1;
369 */ 418 }
370 while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) { 419 if (!*cmdline || *cmdline == ',') {
371 if (zone_cross_over(start, end + sz)) { 420 if (!coeff_flag)
372 end = (MAX_DMA32_PFN << PAGE_SHIFT); 421 coeff = 1;
422 /*
423 * Round down to the nearest FAKE_NODE_MIN_SIZE.
424 * Command-line coefficients are in megabytes.
425 */
426 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
427 if (size)
428 for (i = 0; i < coeff; i++, num_nodes++)
429 if (setup_node_range(num_nodes, nodes,
430 &addr, size, max_addr) < 0)
431 goto done;
432 if (!*cmdline)
373 break; 433 break;
374 } 434 coeff_flag = 0;
375 if (end >= max_addr) 435 coeff = -1;
436 }
437 num = 0;
438 }
439done:
440 if (!num_nodes)
441 return -1;
442 /* Fill remainder of system RAM, if appropriate. */
443 if (addr < max_addr) {
444 if (coeff_flag && coeff < 0) {
445 /* Split remaining nodes into num-sized chunks */
446 num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
447 num_nodes, num);
448 goto out;
449 }
450 switch (*(cmdline - 1)) {
451 case '*':
452 /* Split remaining nodes into coeff chunks */
453 if (coeff <= 0)
376 break; 454 break;
377 end += FAKE_NODE_MIN_SIZE; 455 num_nodes += split_nodes_equally(nodes, &addr, max_addr,
456 num_nodes, coeff);
457 break;
458 case ',':
459 /* Do not allocate remaining system RAM */
460 break;
461 default:
462 /* Give one final node */
463 setup_node_range(num_nodes, nodes, &addr,
464 max_addr - addr, max_addr);
465 num_nodes++;
378 } 466 }
379 if (end > max_addr) 467 }
380 end = max_addr; 468out:
381 nodes[i].end = end; 469 memnode_shift = compute_hash_shift(nodes, num_nodes);
382 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 470 if (memnode_shift < 0) {
383 i, 471 memnode_shift = 0;
384 nodes[i].start, nodes[i].end, 472 printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
385 (nodes[i].end - nodes[i].start) >> 20); 473 "disabled.\n");
386 node_set_online(i); 474 return -1;
387 } 475 }
388 memnode_shift = compute_hash_shift(nodes, numa_fake); 476
389 if (memnode_shift < 0) { 477 /*
390 memnode_shift = 0; 478 * We need to vacate all active ranges that may have been registered by
391 printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); 479 * SRAT.
392 return -1; 480 */
393 } 481 remove_all_active_ranges();
394 for_each_online_node(i) { 482 for_each_node_mask(i, node_possible_map) {
395 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 483 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
396 nodes[i].end >> PAGE_SHIFT); 484 nodes[i].end >> PAGE_SHIFT);
397 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 485 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
@@ -399,26 +487,32 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
399 numa_init_array(); 487 numa_init_array();
400 return 0; 488 return 0;
401} 489}
402#endif 490#undef E820_ADDR_HOLE_SIZE
491#endif /* CONFIG_NUMA_EMU */
403 492
404void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) 493void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
405{ 494{
406 int i; 495 int i;
407 496
497 nodes_clear(node_possible_map);
498
408#ifdef CONFIG_NUMA_EMU 499#ifdef CONFIG_NUMA_EMU
409 if (numa_fake && !numa_emulation(start_pfn, end_pfn)) 500 if (cmdline && !numa_emulation(start_pfn, end_pfn))
410 return; 501 return;
502 nodes_clear(node_possible_map);
411#endif 503#endif
412 504
413#ifdef CONFIG_ACPI_NUMA 505#ifdef CONFIG_ACPI_NUMA
414 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 506 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
415 end_pfn << PAGE_SHIFT)) 507 end_pfn << PAGE_SHIFT))
416 return; 508 return;
509 nodes_clear(node_possible_map);
417#endif 510#endif
418 511
419#ifdef CONFIG_K8_NUMA 512#ifdef CONFIG_K8_NUMA
420 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) 513 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
421 return; 514 return;
515 nodes_clear(node_possible_map);
422#endif 516#endif
423 printk(KERN_INFO "%s\n", 517 printk(KERN_INFO "%s\n",
424 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 518 numa_off ? "NUMA turned off" : "No NUMA configuration found");
@@ -432,6 +526,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
432 memnodemap[0] = 0; 526 memnodemap[0] = 0;
433 nodes_clear(node_online_map); 527 nodes_clear(node_online_map);
434 node_set_online(0); 528 node_set_online(0);
529 node_set(0, node_possible_map);
435 for (i = 0; i < NR_CPUS; i++) 530 for (i = 0; i < NR_CPUS; i++)
436 numa_set_node(i, 0); 531 numa_set_node(i, 0);
437 node_to_cpumask[0] = cpumask_of_cpu(0); 532 node_to_cpumask[0] = cpumask_of_cpu(0);
@@ -486,11 +581,8 @@ static __init int numa_setup(char *opt)
486 if (!strncmp(opt,"off",3)) 581 if (!strncmp(opt,"off",3))
487 numa_off = 1; 582 numa_off = 1;
488#ifdef CONFIG_NUMA_EMU 583#ifdef CONFIG_NUMA_EMU
489 if(!strncmp(opt, "fake=", 5)) { 584 if (!strncmp(opt, "fake=", 5))
490 numa_fake = simple_strtoul(opt+5,NULL,0); ; 585 cmdline = opt + 5;
491 if (numa_fake >= MAX_NUMNODES)
492 numa_fake = MAX_NUMNODES;
493 }
494#endif 586#endif
495#ifdef CONFIG_ACPI_NUMA 587#ifdef CONFIG_ACPI_NUMA
496 if (!strncmp(opt,"noacpi",6)) 588 if (!strncmp(opt,"noacpi",6))
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 081409aa3452..bf4aa8dd4254 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -51,7 +51,6 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
51 SetPagePrivate(base); 51 SetPagePrivate(base);
52 page_private(base) = 0; 52 page_private(base) = 0;
53 53
54 address = __pa(address);
55 addr = address & LARGE_PAGE_MASK; 54 addr = address & LARGE_PAGE_MASK;
56 pbase = (pte_t *)page_address(base); 55 pbase = (pte_t *)page_address(base);
57 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 56 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
@@ -101,13 +100,12 @@ static inline void save_page(struct page *fpage)
101 * No more special protections in this 2/4MB area - revert to a 100 * No more special protections in this 2/4MB area - revert to a
102 * large page again. 101 * large page again.
103 */ 102 */
104static void revert_page(unsigned long address, pgprot_t ref_prot) 103static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot)
105{ 104{
106 pgd_t *pgd; 105 pgd_t *pgd;
107 pud_t *pud; 106 pud_t *pud;
108 pmd_t *pmd; 107 pmd_t *pmd;
109 pte_t large_pte; 108 pte_t large_pte;
110 unsigned long pfn;
111 109
112 pgd = pgd_offset_k(address); 110 pgd = pgd_offset_k(address);
113 BUG_ON(pgd_none(*pgd)); 111 BUG_ON(pgd_none(*pgd));
@@ -115,7 +113,6 @@ static void revert_page(unsigned long address, pgprot_t ref_prot)
115 BUG_ON(pud_none(*pud)); 113 BUG_ON(pud_none(*pud));
116 pmd = pmd_offset(pud, address); 114 pmd = pmd_offset(pud, address);
117 BUG_ON(pmd_val(*pmd) & _PAGE_PSE); 115 BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
118 pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
119 large_pte = pfn_pte(pfn, ref_prot); 116 large_pte = pfn_pte(pfn, ref_prot);
120 large_pte = pte_mkhuge(large_pte); 117 large_pte = pte_mkhuge(large_pte);
121 set_pte((pte_t *)pmd, large_pte); 118 set_pte((pte_t *)pmd, large_pte);
@@ -141,7 +138,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
141 */ 138 */
142 struct page *split; 139 struct page *split;
143 ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); 140 ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
144 split = split_large_page(address, prot, ref_prot2); 141 split = split_large_page(pfn << PAGE_SHIFT, prot,
142 ref_prot2);
145 if (!split) 143 if (!split)
146 return -ENOMEM; 144 return -ENOMEM;
147 set_pte(kpte, mk_pte(split, ref_prot2)); 145 set_pte(kpte, mk_pte(split, ref_prot2));
@@ -160,7 +158,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
160 158
161 if (page_private(kpte_page) == 0) { 159 if (page_private(kpte_page) == 0) {
162 save_page(kpte_page); 160 save_page(kpte_page);
163 revert_page(address, ref_prot); 161 revert_page(address, pfn, ref_prot);
164 } 162 }
165 return 0; 163 return 0;
166} 164}
@@ -180,22 +178,32 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
180 */ 178 */
181int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) 179int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
182{ 180{
183 int err = 0; 181 unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT;
182 int err = 0, kernel_map = 0;
184 int i; 183 int i;
185 184
185 if (address >= __START_KERNEL_map
186 && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
187 address = (unsigned long)__va(__pa(address));
188 kernel_map = 1;
189 }
190
186 down_write(&init_mm.mmap_sem); 191 down_write(&init_mm.mmap_sem);
187 for (i = 0; i < numpages; i++, address += PAGE_SIZE) { 192 for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
188 unsigned long pfn = __pa(address) >> PAGE_SHIFT; 193 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
189 194
190 err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); 195 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
191 if (err) 196 err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
192 break; 197 if (err)
198 break;
199 }
193 /* Handle kernel mapping too which aliases part of the 200 /* Handle kernel mapping too which aliases part of the
194 * lowmem */ 201 * lowmem */
195 if (__pa(address) < KERNEL_TEXT_SIZE) { 202 if ((pfn >= phys_base_pfn) &&
203 ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) {
196 unsigned long addr2; 204 unsigned long addr2;
197 pgprot_t prot2; 205 pgprot_t prot2;
198 addr2 = __START_KERNEL_map + __pa(address); 206 addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT);
199 /* Make sure the kernel mappings stay executable */ 207 /* Make sure the kernel mappings stay executable */
200 prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); 208 prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
201 err = __change_page_attr(addr2, pfn, prot2, 209 err = __change_page_attr(addr2, pfn, prot2,
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 2efe215fc76a..1e76bb0a7277 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -419,19 +419,21 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
419 return -1; 419 return -1;
420 } 420 }
421 421
422 node_possible_map = nodes_parsed;
423
422 /* Finally register nodes */ 424 /* Finally register nodes */
423 for_each_node_mask(i, nodes_parsed) 425 for_each_node_mask(i, node_possible_map)
424 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 426 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
425 /* Try again in case setup_node_bootmem missed one due 427 /* Try again in case setup_node_bootmem missed one due
426 to missing bootmem */ 428 to missing bootmem */
427 for_each_node_mask(i, nodes_parsed) 429 for_each_node_mask(i, node_possible_map)
428 if (!node_online(i)) 430 if (!node_online(i))
429 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 431 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
430 432
431 for (i = 0; i < NR_CPUS; i++) { 433 for (i = 0; i < NR_CPUS; i++) {
432 if (cpu_to_node[i] == NUMA_NO_NODE) 434 if (cpu_to_node[i] == NUMA_NO_NODE)
433 continue; 435 continue;
434 if (!node_isset(cpu_to_node[i], nodes_parsed)) 436 if (!node_isset(cpu_to_node[i], node_possible_map))
435 numa_set_node(i, NUMA_NO_NODE); 437 numa_set_node(i, NUMA_NO_NODE);
436 } 438 }
437 numa_init_array(); 439 numa_init_array();
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index ab6370054cee..4fbd66a52a88 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -198,7 +198,7 @@ SECTIONS
198 __ftr_fixup : { *(__ftr_fixup) } 198 __ftr_fixup : { *(__ftr_fixup) }
199 __stop___ftr_fixup = .; 199 __stop___ftr_fixup = .;
200 200
201 . = ALIGN(32); 201 . = ALIGN(4096);
202 __per_cpu_start = .; 202 __per_cpu_start = .;
203 .data.percpu : { *(.data.percpu) } 203 .data.percpu : { *(.data.percpu) }
204 __per_cpu_end = .; 204 __per_cpu_end = .;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ae0654cd11ea..ee5759bef945 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -475,7 +475,7 @@ static void acpi_processor_idle(void)
475 475
476#ifdef CONFIG_GENERIC_TIME 476#ifdef CONFIG_GENERIC_TIME
477 /* TSC halts in C2, so notify users */ 477 /* TSC halts in C2, so notify users */
478 mark_tsc_unstable(); 478 mark_tsc_unstable("possible TSC halt in C2");
479#endif 479#endif
480 /* Re-enable interrupts */ 480 /* Re-enable interrupts */
481 local_irq_enable(); 481 local_irq_enable();
@@ -517,7 +517,7 @@ static void acpi_processor_idle(void)
517 517
518#ifdef CONFIG_GENERIC_TIME 518#ifdef CONFIG_GENERIC_TIME
519 /* TSC halts in C3, so notify users */ 519 /* TSC halts in C3, so notify users */
520 mark_tsc_unstable(); 520 mark_tsc_unstable("TSC halts in C3");
521#endif 521#endif
522 /* Re-enable interrupts */ 522 /* Re-enable interrupts */
523 local_irq_enable(); 523 local_irq_enable();
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 485720486d60..c9f0f250d78f 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -14,6 +14,7 @@
14#include <linux/agp_backend.h> 14#include <linux/agp_backend.h>
15#include <linux/mmzone.h> 15#include <linux/mmzone.h>
16#include <asm/page.h> /* PAGE_SIZE */ 16#include <asm/page.h> /* PAGE_SIZE */
17#include <asm/e820.h>
17#include <asm/k8.h> 18#include <asm/k8.h>
18#include "agp.h" 19#include "agp.h"
19 20
@@ -259,7 +260,6 @@ static const struct agp_bridge_driver amd_8151_driver = {
259/* Some basic sanity checks for the aperture. */ 260/* Some basic sanity checks for the aperture. */
260static int __devinit aperture_valid(u64 aper, u32 size) 261static int __devinit aperture_valid(u64 aper, u32 size)
261{ 262{
262 u32 pfn, c;
263 if (aper == 0) { 263 if (aper == 0) {
264 printk(KERN_ERR PFX "No aperture\n"); 264 printk(KERN_ERR PFX "No aperture\n");
265 return 0; 265 return 0;
@@ -272,14 +272,9 @@ static int __devinit aperture_valid(u64 aper, u32 size)
272 printk(KERN_ERR PFX "Aperture out of bounds\n"); 272 printk(KERN_ERR PFX "Aperture out of bounds\n");
273 return 0; 273 return 0;
274 } 274 }
275 pfn = aper >> PAGE_SHIFT; 275 if (e820_any_mapped(aper, aper + size, E820_RAM)) {
276 for (c = 0; c < size/PAGE_SIZE; c++) { 276 printk(KERN_ERR PFX "Aperture pointing to RAM\n");
277 if (!pfn_valid(pfn + c)) 277 return 0;
278 break;
279 if (!PageReserved(pfn_to_page(pfn + c))) {
280 printk(KERN_ERR PFX "Aperture pointing to RAM\n");
281 return 0;
282 }
283 } 278 }
284 279
285 /* Request the Aperture. This catches cases when someone else 280 /* Request the Aperture. This catches cases when someone else
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 91a20785108a..3e67c34df9a5 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -371,7 +371,8 @@ static const char *vgacon_startup(void)
371 } 371 }
372 372
373 /* VGA16 modes are not handled by VGACON */ 373 /* VGA16 modes are not handled by VGACON */
374 if ((ORIG_VIDEO_MODE == 0x0D) || /* 320x200/4 */ 374 if ((ORIG_VIDEO_MODE == 0x00) || /* SCREEN_INFO not initialized */
375 (ORIG_VIDEO_MODE == 0x0D) || /* 320x200/4 */
375 (ORIG_VIDEO_MODE == 0x0E) || /* 640x200/4 */ 376 (ORIG_VIDEO_MODE == 0x0E) || /* 640x200/4 */
376 (ORIG_VIDEO_MODE == 0x10) || /* 640x350/4 */ 377 (ORIG_VIDEO_MODE == 0x10) || /* 640x350/4 */
377 (ORIG_VIDEO_MODE == 0x12) || /* 640x480/4 */ 378 (ORIG_VIDEO_MODE == 0x12) || /* 640x480/4 */
diff --git a/fs/compat.c b/fs/compat.c
index 040a8be38a48..72e5e6923828 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -371,13 +371,14 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
371 fn = "?"; 371 fn = "?";
372 } 372 }
373 373
374 sprintf(buf,"'%c'", (cmd>>24) & 0x3f); 374 sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
375 if (!isprint(buf[1])) 375 if (!isprint(buf[1]))
376 sprintf(buf, "%02x", buf[1]); 376 sprintf(buf, "%02x", buf[1]);
377 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) " 377 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
378 "cmd(%08x){%s} arg(%08x) on %s\n", 378 "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
379 current->comm, current->pid, 379 current->comm, current->pid,
380 (int)fd, (unsigned int)cmd, buf, 380 (int)fd, (unsigned int)cmd, buf,
381 (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
381 (unsigned int)arg, fn); 382 (unsigned int)arg, fn);
382 383
383 if (path) 384 if (path)
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c68b055fa26e..464c04a9541d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2396,6 +2396,14 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
2396#define ULONG_IOCTL(cmd) \ 2396#define ULONG_IOCTL(cmd) \
2397 { (cmd), (ioctl_trans_handler_t)sys_ioctl }, 2397 { (cmd), (ioctl_trans_handler_t)sys_ioctl },
2398 2398
2399/* ioctl should not be warned about even if it's not implemented.
2400 Valid reasons to use this:
2401 - It is implemented with ->compat_ioctl on some device, but programs
2402 call it on others too.
2403 - The ioctl is not implemented in the native kernel, but programs
2404 call it commonly anyways.
2405 Most other reasons are not valid. */
2406#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd)
2399 2407
2400struct ioctl_trans ioctl_start[] = { 2408struct ioctl_trans ioctl_start[] = {
2401#include <linux/compat_ioctl.h> 2409#include <linux/compat_ioctl.h>
@@ -2594,6 +2602,8 @@ HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl)
2594HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl) 2602HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl)
2595HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) 2603HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
2596HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) 2604HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
2605/* Not implemented in the native kernel */
2606IGNORE_IOCTL(SIOCGIFCOUNT)
2597HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl) 2607HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
2598HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl) 2608HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
2599HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl) 2609HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
@@ -2617,6 +2627,15 @@ COMPATIBLE_IOCTL(LPRESET)
2617/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/ 2627/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
2618COMPATIBLE_IOCTL(LPGETFLAGS) 2628COMPATIBLE_IOCTL(LPGETFLAGS)
2619HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) 2629HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
2630
2631/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
2632 but we don't want warnings on other file systems. So declare
2633 them as compatible here. */
2634#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2])
2635#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
2636
2637IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
2638IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
2620}; 2639};
2621 2640
2622int ioctl_table_size = ARRAY_SIZE(ioctl_start); 2641int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index d96050728c43..523e1098ae88 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -514,7 +514,7 @@ static int __init parse_crash_elf64_headers(void)
514 /* Do some basic Verification. */ 514 /* Do some basic Verification. */
515 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || 515 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
516 (ehdr.e_type != ET_CORE) || 516 (ehdr.e_type != ET_CORE) ||
517 !elf_check_arch(&ehdr) || 517 !vmcore_elf_check_arch(&ehdr) ||
518 ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 518 ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
519 ehdr.e_ident[EI_VERSION] != EV_CURRENT || 519 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
520 ehdr.e_version != EV_CURRENT || 520 ehdr.e_version != EV_CURRENT ||
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index fe249e9d3360..0bd7bd2ccb90 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -10,6 +10,7 @@
10#include <asm/system.h> 10#include <asm/system.h>
11#include <asm/machvec.h> 11#include <asm/machvec.h>
12#include <asm/compiler.h> 12#include <asm/compiler.h>
13#include <asm-generic/mm_hooks.h>
13 14
14/* 15/*
15 * Force a context reload. This is needed when we change the page 16 * Force a context reload. This is needed when we change the page
diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h
index 651ebb141b24..48348fe34c19 100644
--- a/include/asm-alpha/percpu.h
+++ b/include/asm-alpha/percpu.h
@@ -1,20 +1,6 @@
1#ifndef __ALPHA_PERCPU_H 1#ifndef __ALPHA_PERCPU_H
2#define __ALPHA_PERCPU_H 2#define __ALPHA_PERCPU_H
3 3
4/*
5 * Increase the per cpu area for Alpha so that
6 * modules using percpu area can load.
7 */
8#ifdef CONFIG_MODULES
9# define PERCPU_MODULE_RESERVE 8192
10#else
11# define PERCPU_MODULE_RESERVE 0
12#endif
13
14#define PERCPU_ENOUGH_ROOM \
15 (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
16 PERCPU_MODULE_RESERVE)
17
18#include <asm-generic/percpu.h> 4#include <asm-generic/percpu.h>
19 5
20#endif /* __ALPHA_PERCPU_H */ 6#endif /* __ALPHA_PERCPU_H */
diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h
index d1a65b1edcaa..f8755c818b54 100644
--- a/include/asm-arm/mmu_context.h
+++ b/include/asm-arm/mmu_context.h
@@ -16,6 +16,7 @@
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/proc-fns.h> 18#include <asm/proc-fns.h>
19#include <asm-generic/mm_hooks.h>
19 20
20void __check_kvm_seq(struct mm_struct *mm); 21void __check_kvm_seq(struct mm_struct *mm);
21 22
diff --git a/include/asm-arm26/mmu_context.h b/include/asm-arm26/mmu_context.h
index 1a929bfe5c3a..16c821f81b8d 100644
--- a/include/asm-arm26/mmu_context.h
+++ b/include/asm-arm26/mmu_context.h
@@ -13,6 +13,8 @@
13#ifndef __ASM_ARM_MMU_CONTEXT_H 13#ifndef __ASM_ARM_MMU_CONTEXT_H
14#define __ASM_ARM_MMU_CONTEXT_H 14#define __ASM_ARM_MMU_CONTEXT_H
15 15
16#include <asm-generic/mm_hooks.h>
17
16#define init_new_context(tsk,mm) 0 18#define init_new_context(tsk,mm) 0
17#define destroy_context(mm) do { } while(0) 19#define destroy_context(mm) do { } while(0)
18 20
diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h
index 31add1ae8089..c37c391faef6 100644
--- a/include/asm-avr32/mmu_context.h
+++ b/include/asm-avr32/mmu_context.h
@@ -15,6 +15,7 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/pgalloc.h> 16#include <asm/pgalloc.h>
17#include <asm/sysreg.h> 17#include <asm/sysreg.h>
18#include <asm-generic/mm_hooks.h>
18 19
19/* 20/*
20 * The MMU "context" consists of two things: 21 * The MMU "context" consists of two things:
diff --git a/include/asm-cris/mmu_context.h b/include/asm-cris/mmu_context.h
index e6e659dc757b..72ba08dcfd18 100644
--- a/include/asm-cris/mmu_context.h
+++ b/include/asm-cris/mmu_context.h
@@ -1,6 +1,8 @@
1#ifndef __CRIS_MMU_CONTEXT_H 1#ifndef __CRIS_MMU_CONTEXT_H
2#define __CRIS_MMU_CONTEXT_H 2#define __CRIS_MMU_CONTEXT_H
3 3
4#include <asm-generic/mm_hooks.h>
5
4extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 6extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
5extern void get_mmu_context(struct mm_struct *mm); 7extern void get_mmu_context(struct mm_struct *mm);
6extern void destroy_context(struct mm_struct *mm); 8extern void destroy_context(struct mm_struct *mm);
diff --git a/include/asm-frv/mmu_context.h b/include/asm-frv/mmu_context.h
index 72edcaaccd5d..c7daa395156a 100644
--- a/include/asm-frv/mmu_context.h
+++ b/include/asm-frv/mmu_context.h
@@ -15,6 +15,7 @@
15#include <asm/setup.h> 15#include <asm/setup.h>
16#include <asm/page.h> 16#include <asm/page.h>
17#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
18#include <asm-generic/mm_hooks.h>
18 19
19static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 20static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
20{ 21{
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
new file mode 100644
index 000000000000..67dea8123683
--- /dev/null
+++ b/include/asm-generic/mm_hooks.h
@@ -0,0 +1,18 @@
1/*
2 * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap, to
3 * be included in asm-FOO/mmu_context.h for any arch FOO which doesn't
4 * need to hook these.
5 */
6#ifndef _ASM_GENERIC_MM_HOOKS_H
7#define _ASM_GENERIC_MM_HOOKS_H
8
9static inline void arch_dup_mmap(struct mm_struct *oldmm,
10 struct mm_struct *mm)
11{
12}
13
14static inline void arch_exit_mmap(struct mm_struct *mm)
15{
16}
17
18#endif /* _ASM_GENERIC_MM_HOOKS_H */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 196376262240..d984a9041436 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_GENERIC_PERCPU_H_ 1#ifndef _ASM_GENERIC_PERCPU_H_
2#define _ASM_GENERIC_PERCPU_H_ 2#define _ASM_GENERIC_PERCPU_H_
3#include <linux/compiler.h> 3#include <linux/compiler.h>
4#include <linux/threads.h>
4 5
5#define __GENERIC_PER_CPU 6#define __GENERIC_PER_CPU
6#ifdef CONFIG_SMP 7#ifdef CONFIG_SMP
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 9fcc8d9fbb14..f3806a74c478 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -208,7 +208,7 @@
208 } 208 }
209 209
210#define NOTES \ 210#define NOTES \
211 .notes : { *(.note.*) } :note 211 .notes : { *(.note.*) } :note
212 212
213#define INITCALLS \ 213#define INITCALLS \
214 *(.initcall0.init) \ 214 *(.initcall0.init) \
diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h
index 5c165f7bee0e..f44b730da54d 100644
--- a/include/asm-h8300/mmu_context.h
+++ b/include/asm-h8300/mmu_context.h
@@ -4,6 +4,7 @@
4#include <asm/setup.h> 4#include <asm/setup.h>
5#include <asm/page.h> 5#include <asm/page.h>
6#include <asm/pgalloc.h> 6#include <asm/pgalloc.h>
7#include <asm-generic/mm_hooks.h>
7 8
8static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 9static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
9{ 10{
diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index 5ae93afc67e1..cbf6e8f1087b 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -3,8 +3,10 @@ include include/asm-generic/Kbuild.asm
3header-y += boot.h 3header-y += boot.h
4header-y += debugreg.h 4header-y += debugreg.h
5header-y += ldt.h 5header-y += ldt.h
6header-y += msr-index.h
6header-y += ptrace-abi.h 7header-y += ptrace-abi.h
7header-y += ucontext.h 8header-y += ucontext.h
8 9
10unifdef-y += msr.h
9unifdef-y += mtrr.h 11unifdef-y += mtrr.h
10unifdef-y += vm86.h 12unifdef-y += vm86.h
diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h
index b8fa9557c532..0f70b379b029 100644
--- a/include/asm-i386/alternative.h
+++ b/include/asm-i386/alternative.h
@@ -1,8 +1,6 @@
1#ifndef _I386_ALTERNATIVE_H 1#ifndef _I386_ALTERNATIVE_H
2#define _I386_ALTERNATIVE_H 2#define _I386_ALTERNATIVE_H
3 3
4#ifdef __KERNEL__
5
6#include <asm/types.h> 4#include <asm/types.h>
7#include <linux/stddef.h> 5#include <linux/stddef.h>
8#include <linux/types.h> 6#include <linux/types.h>
@@ -16,6 +14,7 @@ struct alt_instr {
16 u8 pad; 14 u8 pad;
17}; 15};
18 16
17extern void alternative_instructions(void);
19extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 18extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
20 19
21struct module; 20struct module;
@@ -31,9 +30,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name,
31 void *text, void *text_end) {} 30 void *text, void *text_end) {}
32static inline void alternatives_smp_module_del(struct module *mod) {} 31static inline void alternatives_smp_module_del(struct module *mod) {}
33static inline void alternatives_smp_switch(int smp) {} 32static inline void alternatives_smp_switch(int smp) {}
34#endif 33#endif /* CONFIG_SMP */
35
36#endif
37 34
38/* 35/*
39 * Alternative instructions for different CPU types or capabilities. 36 * Alternative instructions for different CPU types or capabilities.
@@ -85,6 +82,21 @@ static inline void alternatives_smp_switch(int smp) {}
85 "663:\n\t" newinstr "\n664:\n" /* replacement */\ 82 "663:\n\t" newinstr "\n664:\n" /* replacement */\
86 ".previous" :: "i" (feature), ##input) 83 ".previous" :: "i" (feature), ##input)
87 84
85/* Like alternative_input, but with a single output argument */
86#define alternative_io(oldinstr, newinstr, feature, output, input...) \
87 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
88 ".section .altinstructions,\"a\"\n" \
89 " .align 4\n" \
90 " .long 661b\n" /* label */ \
91 " .long 663f\n" /* new instruction */ \
92 " .byte %c[feat]\n" /* feature bit */ \
93 " .byte 662b-661b\n" /* sourcelen */ \
94 " .byte 664f-663f\n" /* replacementlen */ \
95 ".previous\n" \
96 ".section .altinstr_replacement,\"ax\"\n" \
97 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
98 ".previous" : output : [feat] "i" (feature), ##input)
99
88/* 100/*
89 * Alternative inline assembly for SMP. 101 * Alternative inline assembly for SMP.
90 * 102 *
@@ -118,15 +130,17 @@ static inline void alternatives_smp_switch(int smp) {}
118#define LOCK_PREFIX "" 130#define LOCK_PREFIX ""
119#endif 131#endif
120 132
121struct paravirt_patch; 133struct paravirt_patch_site;
122#ifdef CONFIG_PARAVIRT 134#ifdef CONFIG_PARAVIRT
123void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); 135void apply_paravirt(struct paravirt_patch_site *start,
136 struct paravirt_patch_site *end);
124#else 137#else
125static inline void 138static inline void
126apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) 139apply_paravirt(struct paravirt_patch_site *start,
140 struct paravirt_patch_site *end)
127{} 141{}
128#define __start_parainstructions NULL 142#define __parainstructions NULL
129#define __stop_parainstructions NULL 143#define __parainstructions_end NULL
130#endif 144#endif
131 145
132#endif /* _I386_ALTERNATIVE_H */ 146#endif /* _I386_ALTERNATIVE_H */
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index a19810a08ae9..1e8f6f252dd3 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -2,6 +2,7 @@
2#define __ASM_APIC_H 2#define __ASM_APIC_H
3 3
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/delay.h>
5#include <asm/fixmap.h> 6#include <asm/fixmap.h>
6#include <asm/apicdef.h> 7#include <asm/apicdef.h>
7#include <asm/processor.h> 8#include <asm/processor.h>
@@ -64,12 +65,8 @@ static __inline fastcall unsigned long native_apic_read(unsigned long reg)
64 return *((volatile unsigned long *)(APIC_BASE+reg)); 65 return *((volatile unsigned long *)(APIC_BASE+reg));
65} 66}
66 67
67static __inline__ void apic_wait_icr_idle(void) 68void apic_wait_icr_idle(void);
68{ 69unsigned long safe_apic_wait_icr_idle(void);
69 while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
70 cpu_relax();
71}
72
73int get_physical_broadcast(void); 70int get_physical_broadcast(void);
74 71
75#ifdef CONFIG_X86_GOOD_APIC 72#ifdef CONFIG_X86_GOOD_APIC
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h
index c90c7c499302..d28979ff73be 100644
--- a/include/asm-i386/bugs.h
+++ b/include/asm-i386/bugs.h
@@ -1,198 +1,12 @@
1/* 1/*
2 * include/asm-i386/bugs.h
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 *
6 * Cyrix stuff, June 1998 by:
7 * - Rafael R. Reilova (moved everything from head.S),
8 * <rreilova@ececs.uc.edu>
9 * - Channing Corn (tests & fixes),
10 * - Andrew D. Balsa (code cleanup).
11 */
12
13/*
14 * This is included by init/main.c to check for architecture-dependent bugs. 2 * This is included by init/main.c to check for architecture-dependent bugs.
15 * 3 *
16 * Needs: 4 * Needs:
17 * void check_bugs(void); 5 * void check_bugs(void);
18 */ 6 */
7#ifndef _ASM_I386_BUG_H
8#define _ASM_I386_BUG_H
19 9
20#include <linux/init.h> 10void check_bugs(void);
21#include <asm/processor.h>
22#include <asm/i387.h>
23#include <asm/msr.h>
24#include <asm/paravirt.h>
25
26static int __init no_halt(char *s)
27{
28 boot_cpu_data.hlt_works_ok = 0;
29 return 1;
30}
31
32__setup("no-hlt", no_halt);
33
34static int __init mca_pentium(char *s)
35{
36 mca_pentium_flag = 1;
37 return 1;
38}
39
40__setup("mca-pentium", mca_pentium);
41
42static int __init no_387(char *s)
43{
44 boot_cpu_data.hard_math = 0;
45 write_cr0(0xE | read_cr0());
46 return 1;
47}
48
49__setup("no387", no_387);
50
51static double __initdata x = 4195835.0;
52static double __initdata y = 3145727.0;
53
54/*
55 * This used to check for exceptions..
56 * However, it turns out that to support that,
57 * the XMM trap handlers basically had to
58 * be buggy. So let's have a correct XMM trap
59 * handler, and forget about printing out
60 * some status at boot.
61 *
62 * We should really only care about bugs here
63 * anyway. Not features.
64 */
65static void __init check_fpu(void)
66{
67 if (!boot_cpu_data.hard_math) {
68#ifndef CONFIG_MATH_EMULATION
69 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
70 printk(KERN_EMERG "Giving up.\n");
71 for (;;) ;
72#endif
73 return;
74 }
75
76/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
77 /* Test for the divl bug.. */
78 __asm__("fninit\n\t"
79 "fldl %1\n\t"
80 "fdivl %2\n\t"
81 "fmull %2\n\t"
82 "fldl %1\n\t"
83 "fsubp %%st,%%st(1)\n\t"
84 "fistpl %0\n\t"
85 "fwait\n\t"
86 "fninit"
87 : "=m" (*&boot_cpu_data.fdiv_bug)
88 : "m" (*&x), "m" (*&y));
89 if (boot_cpu_data.fdiv_bug)
90 printk("Hmm, FPU with FDIV bug.\n");
91}
92
93static void __init check_hlt(void)
94{
95 if (paravirt_enabled())
96 return;
97
98 printk(KERN_INFO "Checking 'hlt' instruction... ");
99 if (!boot_cpu_data.hlt_works_ok) {
100 printk("disabled\n");
101 return;
102 }
103 halt();
104 halt();
105 halt();
106 halt();
107 printk("OK.\n");
108}
109
110/*
111 * Most 386 processors have a bug where a POPAD can lock the
112 * machine even from user space.
113 */
114
115static void __init check_popad(void)
116{
117#ifndef CONFIG_X86_POPAD_OK
118 int res, inp = (int) &res;
119
120 printk(KERN_INFO "Checking for popad bug... ");
121 __asm__ __volatile__(
122 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
123 : "=&a" (res)
124 : "d" (inp)
125 : "ecx", "edi" );
126 /* If this fails, it means that any user program may lock the CPU hard. Too bad. */
127 if (res != 12345678) printk( "Buggy.\n" );
128 else printk( "OK.\n" );
129#endif
130}
131
132/*
133 * Check whether we are able to run this kernel safely on SMP.
134 *
135 * - In order to run on a i386, we need to be compiled for i386
136 * (for due to lack of "invlpg" and working WP on a i386)
137 * - In order to run on anything without a TSC, we need to be
138 * compiled for a i486.
139 * - In order to support the local APIC on a buggy Pentium machine,
140 * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
141 * which happens implicitly if compiled for a Pentium or lower
142 * (unless an advanced selection of CPU features is used) as an
143 * otherwise config implies a properly working local APIC without
144 * the need to do extra reads from the APIC.
145*/
146
147static void __init check_config(void)
148{
149/*
150 * We'd better not be a i386 if we're configured to use some
151 * i486+ only features! (WP works in supervisor mode and the
152 * new "invlpg" and "bswap" instructions)
153 */
154#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
155 if (boot_cpu_data.x86 == 3)
156 panic("Kernel requires i486+ for 'invlpg' and other features");
157#endif
158
159/*
160 * If we configured ourselves for a TSC, we'd better have one!
161 */
162#ifdef CONFIG_X86_TSC
163 if (!cpu_has_tsc && !tsc_disable)
164 panic("Kernel compiled for Pentium+, requires TSC feature!");
165#endif
166
167/*
168 * If we were told we had a good local APIC, check for buggy Pentia,
169 * i.e. all B steppings and the C2 stepping of P54C when using their
170 * integrated APIC (see 11AP erratum in "Pentium Processor
171 * Specification Update").
172 */
173#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
174 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
175 && cpu_has_apic
176 && boot_cpu_data.x86 == 5
177 && boot_cpu_data.x86_model == 2
178 && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
179 panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
180#endif
181}
182
183extern void alternative_instructions(void);
184 11
185static void __init check_bugs(void) 12#endif /* _ASM_I386_BUG_H */
186{
187 identify_cpu(&boot_cpu_data);
188#ifndef CONFIG_SMP
189 printk("CPU: ");
190 print_cpu_info(&boot_cpu_data);
191#endif
192 check_config();
193 check_fpu();
194 check_hlt();
195 check_popad();
196 init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
197 alternative_instructions();
198}
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index d1b8e4ab6c1a..f514e906643a 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -7,7 +7,10 @@
7#ifndef __ASM_I386_CPUFEATURE_H 7#ifndef __ASM_I386_CPUFEATURE_H
8#define __ASM_I386_CPUFEATURE_H 8#define __ASM_I386_CPUFEATURE_H
9 9
10#ifndef __ASSEMBLY__
10#include <linux/bitops.h> 11#include <linux/bitops.h>
12#endif
13#include <asm/required-features.h>
11 14
12#define NCAPINTS 7 /* N 32-bit words worth of info */ 15#define NCAPINTS 7 /* N 32-bit words worth of info */
13 16
@@ -49,6 +52,7 @@
49#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ 52#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
50#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ 53#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
51#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 54#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
55#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
52#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ 56#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
53#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ 57#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
54#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ 58#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
@@ -76,6 +80,7 @@
76#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 80#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
77#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 81#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
78#define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */ 82#define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */
83#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */
79 84
80/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 85/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
81#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ 86#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -103,8 +108,12 @@
103#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ 108#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
104#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ 109#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
105 110
106#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) 111#define cpu_has(c, bit) \
107#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) 112 ((__builtin_constant_p(bit) && (bit) < 32 && \
113 (1UL << (bit)) & REQUIRED_MASK1) ? \
114 1 : \
115 test_bit(bit, (c)->x86_capability))
116#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
108 117
109#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) 118#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
110#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) 119#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
diff --git a/include/asm-i386/current.h b/include/asm-i386/current.h
index 5252ee0f6d7a..d35248539912 100644
--- a/include/asm-i386/current.h
+++ b/include/asm-i386/current.h
@@ -1,14 +1,15 @@
1#ifndef _I386_CURRENT_H 1#ifndef _I386_CURRENT_H
2#define _I386_CURRENT_H 2#define _I386_CURRENT_H
3 3
4#include <asm/pda.h>
5#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/percpu.h>
6 6
7struct task_struct; 7struct task_struct;
8 8
9DECLARE_PER_CPU(struct task_struct *, current_task);
9static __always_inline struct task_struct *get_current(void) 10static __always_inline struct task_struct *get_current(void)
10{ 11{
11 return read_pda(pcurrent); 12 return x86_read_percpu(current_task);
12} 13}
13 14
14#define current get_current() 15#define current get_current()
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h
index 050831f34f71..c547403f341d 100644
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -12,23 +12,24 @@
12 12
13#include <asm/mmu.h> 13#include <asm/mmu.h>
14 14
15extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
16
17struct Xgt_desc_struct { 15struct Xgt_desc_struct {
18 unsigned short size; 16 unsigned short size;
19 unsigned long address __attribute__((packed)); 17 unsigned long address __attribute__((packed));
20 unsigned short pad; 18 unsigned short pad;
21} __attribute__ ((packed)); 19} __attribute__ ((packed));
22 20
23extern struct Xgt_desc_struct idt_descr; 21struct gdt_page
24DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); 22{
25extern struct Xgt_desc_struct early_gdt_descr; 23 struct desc_struct gdt[GDT_ENTRIES];
24} __attribute__((aligned(PAGE_SIZE)));
25DECLARE_PER_CPU(struct gdt_page, gdt_page);
26 26
27static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) 27static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
28{ 28{
29 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; 29 return per_cpu(gdt_page, cpu).gdt;
30} 30}
31 31
32extern struct Xgt_desc_struct idt_descr;
32extern struct desc_struct idt_table[]; 33extern struct desc_struct idt_table[];
33extern void set_intr_gate(unsigned int irq, void * addr); 34extern void set_intr_gate(unsigned int irq, void * addr);
34 35
@@ -58,45 +59,33 @@ static inline void pack_gate(__u32 *a, __u32 *b,
58#ifdef CONFIG_PARAVIRT 59#ifdef CONFIG_PARAVIRT
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#else 61#else
61#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) 62#define load_TR_desc() native_load_tr_desc()
62 63#define load_gdt(dtr) native_load_gdt(dtr)
63#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) 64#define load_idt(dtr) native_load_idt(dtr)
64#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
65#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) 65#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
66#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) 66#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
67 67
68#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) 68#define store_gdt(dtr) native_store_gdt(dtr)
69#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) 69#define store_idt(dtr) native_store_idt(dtr)
70#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) 70#define store_tr(tr) (tr = native_store_tr())
71#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) 71#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
72 72
73#if TLS_SIZE != 24 73#define load_TLS(t, cpu) native_load_tls(t, cpu)
74# error update this code. 74#define set_ldt native_set_ldt
75#endif
76
77static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
78{
79#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
80 C(0); C(1); C(2);
81#undef C
82}
83 75
84#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) 76#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
85#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) 77#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
86#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) 78#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
79#endif
87 80
88static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) 81static inline void write_dt_entry(struct desc_struct *dt,
82 int entry, u32 entry_low, u32 entry_high)
89{ 83{
90 __u32 *lp = (__u32 *)((char *)dt + entry*8); 84 dt[entry].a = entry_low;
91 *lp = entry_a; 85 dt[entry].b = entry_high;
92 *(lp+1) = entry_b;
93} 86}
94 87
95#define set_ldt native_set_ldt 88static inline void native_set_ldt(const void *addr, unsigned int entries)
96#endif /* CONFIG_PARAVIRT */
97
98static inline fastcall void native_set_ldt(const void *addr,
99 unsigned int entries)
100{ 89{
101 if (likely(entries == 0)) 90 if (likely(entries == 0))
102 __asm__ __volatile__("lldt %w0"::"q" (0)); 91 __asm__ __volatile__("lldt %w0"::"q" (0));
@@ -112,6 +101,48 @@ static inline fastcall void native_set_ldt(const void *addr,
112 } 101 }
113} 102}
114 103
104
105static inline void native_load_tr_desc(void)
106{
107 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
108}
109
110static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
111{
112 asm volatile("lgdt %0"::"m" (*dtr));
113}
114
115static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
116{
117 asm volatile("lidt %0"::"m" (*dtr));
118}
119
120static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
121{
122 asm ("sgdt %0":"=m" (*dtr));
123}
124
125static inline void native_store_idt(struct Xgt_desc_struct *dtr)
126{
127 asm ("sidt %0":"=m" (*dtr));
128}
129
130static inline unsigned long native_store_tr(void)
131{
132 unsigned long tr;
133 asm ("str %0":"=r" (tr));
134 return tr;
135}
136
137static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
138{
139 unsigned int i;
140 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
141
142 for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
143 gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
144}
145
115static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) 146static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
116{ 147{
117 __u32 a, b; 148 __u32 a, b;
diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h
index c5b8fc6109d6..096a2a8eb1da 100644
--- a/include/asm-i386/e820.h
+++ b/include/asm-i386/e820.h
@@ -38,6 +38,7 @@ extern struct e820map e820;
38 38
39extern int e820_all_mapped(unsigned long start, unsigned long end, 39extern int e820_all_mapped(unsigned long start, unsigned long end,
40 unsigned type); 40 unsigned type);
41extern int e820_any_mapped(u64 start, u64 end, unsigned type);
41extern void find_max_pfn(void); 42extern void find_max_pfn(void);
42extern void register_bootmem_low_pages(unsigned long max_low_pfn); 43extern void register_bootmem_low_pages(unsigned long max_low_pfn);
43extern void e820_register_memory(void); 44extern void e820_register_memory(void);
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 952b3ee3c9bb..d304ab4161ff 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct
133#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) 133#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
134 134
135#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) 135#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
136#define VDSO_BASE ((unsigned long)current->mm->context.vdso) 136#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
137 137#define VDSO_PRELINK 0
138#ifdef CONFIG_COMPAT_VDSO
139# define VDSO_COMPAT_BASE VDSO_HIGH_BASE
140# define VDSO_PRELINK VDSO_HIGH_BASE
141#else
142# define VDSO_COMPAT_BASE VDSO_BASE
143# define VDSO_PRELINK 0
144#endif
145 138
146#define VDSO_SYM(x) \ 139#define VDSO_SYM(x) \
147 (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) 140 (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK)
148 141
149#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) 142#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE)
150#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) 143#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE)
151 144
152extern void __kernel_vsyscall; 145extern void __kernel_vsyscall;
153 146
154#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) 147#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall)
155 148
156#ifndef CONFIG_COMPAT_VDSO
157#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
158struct linux_binprm; 149struct linux_binprm;
150
151#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
159extern int arch_setup_additional_pages(struct linux_binprm *bprm, 152extern int arch_setup_additional_pages(struct linux_binprm *bprm,
160 int executable_stack); 153 int executable_stack);
161#endif
162 154
163extern unsigned int vdso_enabled; 155extern unsigned int vdso_enabled;
164 156
165#define ARCH_DLINFO \ 157#define ARCH_DLINFO \
166do if (vdso_enabled) { \ 158do if (vdso_enabled) { \
167 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ 159 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
168 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ 160 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
169} while (0) 161} while (0)
170 162
171#endif 163#endif
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index 3e9f610c35df..80ea052ee3a4 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -19,13 +19,9 @@
19 * Leave one empty page between vmalloc'ed areas and 19 * Leave one empty page between vmalloc'ed areas and
20 * the start of the fixmap. 20 * the start of the fixmap.
21 */ 21 */
22#ifndef CONFIG_COMPAT_VDSO
23extern unsigned long __FIXADDR_TOP; 22extern unsigned long __FIXADDR_TOP;
24#else 23#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
25#define __FIXADDR_TOP 0xfffff000 24#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
26#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
27#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
28#endif
29 25
30#ifndef __ASSEMBLY__ 26#ifndef __ASSEMBLY__
31#include <linux/kernel.h> 27#include <linux/kernel.h>
@@ -88,6 +84,9 @@ enum fixed_addresses {
88#ifdef CONFIG_PCI_MMCONFIG 84#ifdef CONFIG_PCI_MMCONFIG
89 FIX_PCIE_MCFG, 85 FIX_PCIE_MCFG,
90#endif 86#endif
87#ifdef CONFIG_PARAVIRT
88 FIX_PARAVIRT_BOOTMAP,
89#endif
91 __end_of_permanent_fixed_addresses, 90 __end_of_permanent_fixed_addresses,
92 /* temporary boot-time mappings, used before ioremap() is functional */ 91 /* temporary boot-time mappings, used before ioremap() is functional */
93#define NR_FIX_BTMAPS 16 92#define NR_FIX_BTMAPS 16
diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h
index fd2be593b06e..33e3ffe1766c 100644
--- a/include/asm-i386/genapic.h
+++ b/include/asm-i386/genapic.h
@@ -36,7 +36,7 @@ struct genapic {
36 void (*init_apic_ldr)(void); 36 void (*init_apic_ldr)(void);
37 physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); 37 physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
38 38
39 void (*clustered_apic_check)(void); 39 void (*setup_apic_routing)(void);
40 int (*multi_timer_check)(int apic, int irq); 40 int (*multi_timer_check)(int apic, int irq);
41 int (*apicid_to_node)(int logical_apicid); 41 int (*apicid_to_node)(int logical_apicid);
42 int (*cpu_to_logical_apicid)(int cpu); 42 int (*cpu_to_logical_apicid)(int cpu);
@@ -99,7 +99,7 @@ struct genapic {
99 APICFUNC(check_apicid_present) \ 99 APICFUNC(check_apicid_present) \
100 APICFUNC(init_apic_ldr) \ 100 APICFUNC(init_apic_ldr) \
101 APICFUNC(ioapic_phys_id_map) \ 101 APICFUNC(ioapic_phys_id_map) \
102 APICFUNC(clustered_apic_check) \ 102 APICFUNC(setup_apic_routing) \
103 APICFUNC(multi_timer_check) \ 103 APICFUNC(multi_timer_check) \
104 APICFUNC(apicid_to_node) \ 104 APICFUNC(apicid_to_node) \
105 APICFUNC(cpu_to_logical_apicid) \ 105 APICFUNC(cpu_to_logical_apicid) \
@@ -122,6 +122,6 @@ struct genapic {
122 APICFUNC(phys_pkg_id) \ 122 APICFUNC(phys_pkg_id) \
123 } 123 }
124 124
125extern struct genapic *genapic, apic_default; 125extern struct genapic *genapic;
126 126
127#endif 127#endif
diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h
index e9a34ebc25d5..13cdcd66fff2 100644
--- a/include/asm-i386/highmem.h
+++ b/include/asm-i386/highmem.h
@@ -24,6 +24,7 @@
24#include <linux/threads.h> 24#include <linux/threads.h>
25#include <asm/kmap_types.h> 25#include <asm/kmap_types.h>
26#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
27#include <asm/paravirt.h>
27 28
28/* declarations for highmem.c */ 29/* declarations for highmem.c */
29extern unsigned long highstart_pfn, highend_pfn; 30extern unsigned long highstart_pfn, highend_pfn;
@@ -67,11 +68,16 @@ extern void FASTCALL(kunmap_high(struct page *page));
67 68
68void *kmap(struct page *page); 69void *kmap(struct page *page);
69void kunmap(struct page *page); 70void kunmap(struct page *page);
71void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
70void *kmap_atomic(struct page *page, enum km_type type); 72void *kmap_atomic(struct page *page, enum km_type type);
71void kunmap_atomic(void *kvaddr, enum km_type type); 73void kunmap_atomic(void *kvaddr, enum km_type type);
72void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); 74void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
73struct page *kmap_atomic_to_page(void *ptr); 75struct page *kmap_atomic_to_page(void *ptr);
74 76
77#ifndef CONFIG_PARAVIRT
78#define kmap_atomic_pte(page, type) kmap_atomic(page, type)
79#endif
80
75#define flush_cache_kmaps() do { } while (0) 81#define flush_cache_kmaps() do { } while (0)
76 82
77#endif /* __KERNEL__ */ 83#endif /* __KERNEL__ */
diff --git a/include/asm-i386/hpet.h b/include/asm-i386/hpet.h
index fc03cf9de5c4..dddeedf504b7 100644
--- a/include/asm-i386/hpet.h
+++ b/include/asm-i386/hpet.h
@@ -28,8 +28,6 @@
28 28
29#include <linux/timex.h> 29#include <linux/timex.h>
30 30
31#include <asm/fixmap.h>
32
33/* 31/*
34 * Documentation on HPET can be found at: 32 * Documentation on HPET can be found at:
35 * http://www.intel.com/ial/home/sp/pcmmspec.htm 33 * http://www.intel.com/ial/home/sp/pcmmspec.htm
diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h
index 434936c732d6..cdd1e248e3b4 100644
--- a/include/asm-i386/i387.h
+++ b/include/asm-i386/i387.h
@@ -74,17 +74,18 @@ static inline void __save_init_fpu( struct task_struct *tsk )
74 task_thread_info(tsk)->status &= ~TS_USEDFPU; 74 task_thread_info(tsk)->status &= ~TS_USEDFPU;
75} 75}
76 76
77#define __unlazy_fpu( tsk ) do { \ 77#define __unlazy_fpu( tsk ) do { \
78 if (task_thread_info(tsk)->status & TS_USEDFPU) \ 78 if (task_thread_info(tsk)->status & TS_USEDFPU) { \
79 save_init_fpu( tsk ); \ 79 __save_init_fpu(tsk); \
80 else \ 80 stts(); \
81 tsk->fpu_counter = 0; \ 81 } else \
82 tsk->fpu_counter = 0; \
82} while (0) 83} while (0)
83 84
84#define __clear_fpu( tsk ) \ 85#define __clear_fpu( tsk ) \
85do { \ 86do { \
86 if (task_thread_info(tsk)->status & TS_USEDFPU) { \ 87 if (task_thread_info(tsk)->status & TS_USEDFPU) { \
87 asm volatile("fnclex ; fwait"); \ 88 asm volatile("fnclex ; fwait"); \
88 task_thread_info(tsk)->status &= ~TS_USEDFPU; \ 89 task_thread_info(tsk)->status &= ~TS_USEDFPU; \
89 stts(); \ 90 stts(); \
90 } \ 91 } \
@@ -113,7 +114,7 @@ static inline void save_init_fpu( struct task_struct *tsk )
113 __clear_fpu( tsk ); \ 114 __clear_fpu( tsk ); \
114 preempt_enable(); \ 115 preempt_enable(); \
115} while (0) 116} while (0)
116 \ 117
117/* 118/*
118 * FPU state interaction... 119 * FPU state interaction...
119 */ 120 */
diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h
index 59fe616933c4..e797586a5bfc 100644
--- a/include/asm-i386/io.h
+++ b/include/asm-i386/io.h
@@ -250,19 +250,22 @@ static inline void flush_write_buffers(void)
250 250
251#endif /* __KERNEL__ */ 251#endif /* __KERNEL__ */
252 252
253static inline void native_io_delay(void)
254{
255 asm volatile("outb %%al,$0x80" : : : "memory");
256}
257
253#if defined(CONFIG_PARAVIRT) 258#if defined(CONFIG_PARAVIRT)
254#include <asm/paravirt.h> 259#include <asm/paravirt.h>
255#else 260#else
256 261
257#define __SLOW_DOWN_IO "outb %%al,$0x80;"
258
259static inline void slow_down_io(void) { 262static inline void slow_down_io(void) {
260 __asm__ __volatile__( 263 native_io_delay();
261 __SLOW_DOWN_IO
262#ifdef REALLY_SLOW_IO 264#ifdef REALLY_SLOW_IO
263 __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO 265 native_io_delay();
266 native_io_delay();
267 native_io_delay();
264#endif 268#endif
265 : : );
266} 269}
267 270
268#endif 271#endif
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index 11761cdaae19..9e15ce0006eb 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -37,8 +37,6 @@ static __inline__ int irq_canonicalize(int irq)
37extern int irqbalance_disable(char *str); 37extern int irqbalance_disable(char *str);
38#endif 38#endif
39 39
40extern void quirk_intel_irqbalance(void);
41
42#ifdef CONFIG_HOTPLUG_CPU 40#ifdef CONFIG_HOTPLUG_CPU
43extern void fixup_irqs(cpumask_t map); 41extern void fixup_irqs(cpumask_t map);
44#endif 42#endif
diff --git a/include/asm-i386/irq_regs.h b/include/asm-i386/irq_regs.h
index a1b3f7f594a2..3368b20c0b48 100644
--- a/include/asm-i386/irq_regs.h
+++ b/include/asm-i386/irq_regs.h
@@ -1,25 +1,27 @@
1/* 1/*
2 * Per-cpu current frame pointer - the location of the last exception frame on 2 * Per-cpu current frame pointer - the location of the last exception frame on
3 * the stack, stored in the PDA. 3 * the stack, stored in the per-cpu area.
4 * 4 *
5 * Jeremy Fitzhardinge <jeremy@goop.org> 5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */ 6 */
7#ifndef _ASM_I386_IRQ_REGS_H 7#ifndef _ASM_I386_IRQ_REGS_H
8#define _ASM_I386_IRQ_REGS_H 8#define _ASM_I386_IRQ_REGS_H
9 9
10#include <asm/pda.h> 10#include <asm/percpu.h>
11
12DECLARE_PER_CPU(struct pt_regs *, irq_regs);
11 13
12static inline struct pt_regs *get_irq_regs(void) 14static inline struct pt_regs *get_irq_regs(void)
13{ 15{
14 return read_pda(irq_regs); 16 return x86_read_percpu(irq_regs);
15} 17}
16 18
17static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 19static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
18{ 20{
19 struct pt_regs *old_regs; 21 struct pt_regs *old_regs;
20 22
21 old_regs = read_pda(irq_regs); 23 old_regs = get_irq_regs();
22 write_pda(irq_regs, new_regs); 24 x86_write_percpu(irq_regs, new_regs);
23 25
24 return old_regs; 26 return old_regs;
25} 27}
diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
index 17b18cf4fe9d..eff8585cb741 100644
--- a/include/asm-i386/irqflags.h
+++ b/include/asm-i386/irqflags.h
@@ -9,6 +9,43 @@
9 */ 9 */
10#ifndef _ASM_IRQFLAGS_H 10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H 11#define _ASM_IRQFLAGS_H
12#include <asm/processor-flags.h>
13
14#ifndef __ASSEMBLY__
15static inline unsigned long native_save_fl(void)
16{
17 unsigned long f;
18 asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
19 return f;
20}
21
22static inline void native_restore_fl(unsigned long f)
23{
24 asm volatile("pushl %0 ; popfl": /* no output */
25 :"g" (f)
26 :"memory", "cc");
27}
28
29static inline void native_irq_disable(void)
30{
31 asm volatile("cli": : :"memory");
32}
33
34static inline void native_irq_enable(void)
35{
36 asm volatile("sti": : :"memory");
37}
38
39static inline void native_safe_halt(void)
40{
41 asm volatile("sti; hlt": : :"memory");
42}
43
44static inline void native_halt(void)
45{
46 asm volatile("hlt": : :"memory");
47}
48#endif /* __ASSEMBLY__ */
12 49
13#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
14#include <asm/paravirt.h> 51#include <asm/paravirt.h>
@@ -17,35 +54,22 @@
17 54
18static inline unsigned long __raw_local_save_flags(void) 55static inline unsigned long __raw_local_save_flags(void)
19{ 56{
20 unsigned long flags; 57 return native_save_fl();
21
22 __asm__ __volatile__(
23 "pushfl ; popl %0"
24 : "=g" (flags)
25 : /* no input */
26 );
27
28 return flags;
29} 58}
30 59
31static inline void raw_local_irq_restore(unsigned long flags) 60static inline void raw_local_irq_restore(unsigned long flags)
32{ 61{
33 __asm__ __volatile__( 62 native_restore_fl(flags);
34 "pushl %0 ; popfl"
35 : /* no output */
36 :"g" (flags)
37 :"memory", "cc"
38 );
39} 63}
40 64
41static inline void raw_local_irq_disable(void) 65static inline void raw_local_irq_disable(void)
42{ 66{
43 __asm__ __volatile__("cli" : : : "memory"); 67 native_irq_disable();
44} 68}
45 69
46static inline void raw_local_irq_enable(void) 70static inline void raw_local_irq_enable(void)
47{ 71{
48 __asm__ __volatile__("sti" : : : "memory"); 72 native_irq_enable();
49} 73}
50 74
51/* 75/*
@@ -54,7 +78,7 @@ static inline void raw_local_irq_enable(void)
54 */ 78 */
55static inline void raw_safe_halt(void) 79static inline void raw_safe_halt(void)
56{ 80{
57 __asm__ __volatile__("sti; hlt" : : : "memory"); 81 native_safe_halt();
58} 82}
59 83
60/* 84/*
@@ -63,7 +87,7 @@ static inline void raw_safe_halt(void)
63 */ 87 */
64static inline void halt(void) 88static inline void halt(void)
65{ 89{
66 __asm__ __volatile__("hlt": : :"memory"); 90 native_halt();
67} 91}
68 92
69/* 93/*
@@ -96,7 +120,7 @@ static inline unsigned long __raw_local_irq_save(void)
96 120
97static inline int raw_irqs_disabled_flags(unsigned long flags) 121static inline int raw_irqs_disabled_flags(unsigned long flags)
98{ 122{
99 return !(flags & (1 << 9)); 123 return !(flags & X86_EFLAGS_IF);
100} 124}
101 125
102static inline int raw_irqs_disabled(void) 126static inline int raw_irqs_disabled(void)
diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h
index 4dfc9f5ed031..bcb5b21de2d2 100644
--- a/include/asm-i386/kexec.h
+++ b/include/asm-i386/kexec.h
@@ -21,7 +21,6 @@
21 21
22#ifndef __ASSEMBLY__ 22#ifndef __ASSEMBLY__
23 23
24#include <asm/fixmap.h>
25#include <asm/ptrace.h> 24#include <asm/ptrace.h>
26#include <asm/string.h> 25#include <asm/string.h>
27 26
@@ -29,10 +28,6 @@
29 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. 28 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
30 * I.e. Maximum page that is mapped directly into kernel memory, 29 * I.e. Maximum page that is mapped directly into kernel memory,
31 * and kmap is not required. 30 * and kmap is not required.
32 *
33 * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
34 * calculation for the amount of memory directly mappable into the
35 * kernel memory space.
36 */ 31 */
37 32
38/* Maximum physical address we can use pages from */ 33/* Maximum physical address we can use pages from */
@@ -47,6 +42,9 @@
47/* The native architecture */ 42/* The native architecture */
48#define KEXEC_ARCH KEXEC_ARCH_386 43#define KEXEC_ARCH KEXEC_ARCH_386
49 44
45/* We can also handle crash dumps from 64 bit kernel. */
46#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
47
50#define MAX_NOTE_BYTES 1024 48#define MAX_NOTE_BYTES 1024
51 49
52/* CPU does not save ss and esp on stack if execution is already 50/* CPU does not save ss and esp on stack if execution is already
diff --git a/include/asm-i386/mach-bigsmp/mach_apic.h b/include/asm-i386/mach-bigsmp/mach_apic.h
index 18b19a773440..ebd319f838ab 100644
--- a/include/asm-i386/mach-bigsmp/mach_apic.h
+++ b/include/asm-i386/mach-bigsmp/mach_apic.h
@@ -71,7 +71,7 @@ static inline void init_apic_ldr(void)
71 apic_write_around(APIC_LDR, val); 71 apic_write_around(APIC_LDR, val);
72} 72}
73 73
74static inline void clustered_apic_check(void) 74static inline void setup_apic_routing(void)
75{ 75{
76 printk("Enabling APIC mode: %s. Using %d I/O APICs\n", 76 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
77 "Physflat", nr_ioapics); 77 "Physflat", nr_ioapics);
diff --git a/include/asm-i386/mach-default/mach_apic.h b/include/asm-i386/mach-default/mach_apic.h
index 3ef6292db780..6db1c3babe9a 100644
--- a/include/asm-i386/mach-default/mach_apic.h
+++ b/include/asm-i386/mach-default/mach_apic.h
@@ -54,7 +54,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
54 return phys_map; 54 return phys_map;
55} 55}
56 56
57static inline void clustered_apic_check(void) 57static inline void setup_apic_routing(void)
58{ 58{
59 printk("Enabling APIC mode: %s. Using %d I/O APICs\n", 59 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
60 "Flat", nr_ioapics); 60 "Flat", nr_ioapics);
diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h
index 26333685a7fb..2d978928a395 100644
--- a/include/asm-i386/mach-es7000/mach_apic.h
+++ b/include/asm-i386/mach-es7000/mach_apic.h
@@ -73,15 +73,8 @@ static inline void init_apic_ldr(void)
73 apic_write_around(APIC_LDR, val); 73 apic_write_around(APIC_LDR, val);
74} 74}
75 75
76extern void es7000_sw_apic(void);
77static inline void enable_apic_mode(void)
78{
79 es7000_sw_apic();
80 return;
81}
82
83extern int apic_version [MAX_APICS]; 76extern int apic_version [MAX_APICS];
84static inline void clustered_apic_check(void) 77static inline void setup_apic_routing(void)
85{ 78{
86 int apic = bios_cpu_apicid[smp_processor_id()]; 79 int apic = bios_cpu_apicid[smp_processor_id()];
87 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 80 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
diff --git a/include/asm-i386/mach-es7000/mach_mpparse.h b/include/asm-i386/mach-es7000/mach_mpparse.h
index 24990e546da3..b9fb784e1fd5 100644
--- a/include/asm-i386/mach-es7000/mach_mpparse.h
+++ b/include/asm-i386/mach-es7000/mach_mpparse.h
@@ -18,18 +18,6 @@ extern int parse_unisys_oem (char *oemptr);
18extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); 18extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
19extern void setup_unisys(void); 19extern void setup_unisys(void);
20 20
21static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
22 char *productid)
23{
24 if (mpc->mpc_oemptr) {
25 struct mp_config_oemtable *oem_table =
26 (struct mp_config_oemtable *)mpc->mpc_oemptr;
27 if (!strncmp(oem, "UNISYS", 6))
28 return parse_unisys_oem((char *)oem_table);
29 }
30 return 0;
31}
32
33#ifdef CONFIG_ACPI 21#ifdef CONFIG_ACPI
34 22
35static inline int es7000_check_dsdt(void) 23static inline int es7000_check_dsdt(void)
@@ -41,26 +29,6 @@ static inline int es7000_check_dsdt(void)
41 return 1; 29 return 1;
42 return 0; 30 return 0;
43} 31}
44
45/* Hook from generic ACPI tables.c */
46static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
47{
48 unsigned long oem_addr;
49 if (!find_unisys_acpi_oem_table(&oem_addr)) {
50 if (es7000_check_dsdt())
51 return parse_unisys_oem((char *)oem_addr);
52 else {
53 setup_unisys();
54 return 1;
55 }
56 }
57 return 0;
58}
59#else
60static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
61{
62 return 0;
63}
64#endif 32#endif
65 33
66#endif /* __ASM_MACH_MPPARSE_H */ 34#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-i386/mach-generic/mach_apic.h b/include/asm-i386/mach-generic/mach_apic.h
index d9dc039da94a..a236e7021528 100644
--- a/include/asm-i386/mach-generic/mach_apic.h
+++ b/include/asm-i386/mach-generic/mach_apic.h
@@ -13,7 +13,7 @@
13#define apic_id_registered (genapic->apic_id_registered) 13#define apic_id_registered (genapic->apic_id_registered)
14#define init_apic_ldr (genapic->init_apic_ldr) 14#define init_apic_ldr (genapic->init_apic_ldr)
15#define ioapic_phys_id_map (genapic->ioapic_phys_id_map) 15#define ioapic_phys_id_map (genapic->ioapic_phys_id_map)
16#define clustered_apic_check (genapic->clustered_apic_check) 16#define setup_apic_routing (genapic->setup_apic_routing)
17#define multi_timer_check (genapic->multi_timer_check) 17#define multi_timer_check (genapic->multi_timer_check)
18#define apicid_to_node (genapic->apicid_to_node) 18#define apicid_to_node (genapic->apicid_to_node)
19#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) 19#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid)
diff --git a/include/asm-i386/mach-numaq/mach_apic.h b/include/asm-i386/mach-numaq/mach_apic.h
index 9d158095da82..5e5e7dd2692e 100644
--- a/include/asm-i386/mach-numaq/mach_apic.h
+++ b/include/asm-i386/mach-numaq/mach_apic.h
@@ -34,7 +34,7 @@ static inline void init_apic_ldr(void)
34 /* Already done in NUMA-Q firmware */ 34 /* Already done in NUMA-Q firmware */
35} 35}
36 36
37static inline void clustered_apic_check(void) 37static inline void setup_apic_routing(void)
38{ 38{
39 printk("Enabling APIC mode: %s. Using %d I/O APICs\n", 39 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
40 "NUMA-Q", nr_ioapics); 40 "NUMA-Q", nr_ioapics);
diff --git a/include/asm-i386/mach-summit/mach_apic.h b/include/asm-i386/mach-summit/mach_apic.h
index 43e5bd8f4a19..732f776aab8e 100644
--- a/include/asm-i386/mach-summit/mach_apic.h
+++ b/include/asm-i386/mach-summit/mach_apic.h
@@ -80,7 +80,7 @@ static inline int apic_id_registered(void)
80 return 1; 80 return 1;
81} 81}
82 82
83static inline void clustered_apic_check(void) 83static inline void setup_apic_routing(void)
84{ 84{
85 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", 85 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
86 nr_ioapics); 86 nr_ioapics);
diff --git a/include/asm-i386/mach-summit/mach_mpparse.h b/include/asm-i386/mach-summit/mach_mpparse.h
index 94268399170d..c2520539d934 100644
--- a/include/asm-i386/mach-summit/mach_mpparse.h
+++ b/include/asm-i386/mach-summit/mach_mpparse.h
@@ -30,7 +30,7 @@ static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
30 (!strncmp(productid, "VIGIL SMP", 9) 30 (!strncmp(productid, "VIGIL SMP", 9)
31 || !strncmp(productid, "EXA", 3) 31 || !strncmp(productid, "EXA", 3)
32 || !strncmp(productid, "RUTHLESS SMP", 12))){ 32 || !strncmp(productid, "RUTHLESS SMP", 12))){
33 mark_tsc_unstable(); 33 mark_tsc_unstable("Summit based system");
34 use_cyclone = 1; /*enable cyclone-timer*/ 34 use_cyclone = 1; /*enable cyclone-timer*/
35 setup_summit(); 35 setup_summit();
36 return 1; 36 return 1;
@@ -44,7 +44,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
44 if (!strncmp(oem_id, "IBM", 3) && 44 if (!strncmp(oem_id, "IBM", 3) &&
45 (!strncmp(oem_table_id, "SERVIGIL", 8) 45 (!strncmp(oem_table_id, "SERVIGIL", 8)
46 || !strncmp(oem_table_id, "EXA", 3))){ 46 || !strncmp(oem_table_id, "EXA", 3))){
47 mark_tsc_unstable(); 47 mark_tsc_unstable("Summit based system");
48 use_cyclone = 1; /*enable cyclone-timer*/ 48 use_cyclone = 1; /*enable cyclone-timer*/
49 setup_summit(); 49 setup_summit();
50 return 1; 50 return 1;
diff --git a/include/asm-i386/mach-visws/mach_apic.h b/include/asm-i386/mach-visws/mach_apic.h
index 18afe6b6fc4d..efac6f0d139f 100644
--- a/include/asm-i386/mach-visws/mach_apic.h
+++ b/include/asm-i386/mach-visws/mach_apic.h
@@ -47,7 +47,7 @@ static inline void summit_check(char *oem, char *productid)
47{ 47{
48} 48}
49 49
50static inline void clustered_apic_check(void) 50static inline void setup_apic_routing(void)
51{ 51{
52} 52}
53 53
diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h
index e6aa30f8de5b..8198d1cca1f3 100644
--- a/include/asm-i386/mmu_context.h
+++ b/include/asm-i386/mmu_context.h
@@ -5,6 +5,16 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <asm/pgalloc.h> 6#include <asm/pgalloc.h>
7#include <asm/tlbflush.h> 7#include <asm/tlbflush.h>
8#include <asm/paravirt.h>
9#ifndef CONFIG_PARAVIRT
10#include <asm-generic/mm_hooks.h>
11
12static inline void paravirt_activate_mm(struct mm_struct *prev,
13 struct mm_struct *next)
14{
15}
16#endif /* !CONFIG_PARAVIRT */
17
8 18
9/* 19/*
10 * Used for LDT copy/destruction. 20 * Used for LDT copy/destruction.
@@ -65,7 +75,10 @@ static inline void switch_mm(struct mm_struct *prev,
65#define deactivate_mm(tsk, mm) \ 75#define deactivate_mm(tsk, mm) \
66 asm("movl %0,%%gs": :"r" (0)); 76 asm("movl %0,%%gs": :"r" (0));
67 77
68#define activate_mm(prev, next) \ 78#define activate_mm(prev, next) \
69 switch_mm((prev),(next),NULL) 79 do { \
80 paravirt_activate_mm(prev, next); \
81 switch_mm((prev),(next),NULL); \
82 } while(0);
70 83
71#endif 84#endif
diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h
index 02f8f541cbe0..7e5fda6c3976 100644
--- a/include/asm-i386/module.h
+++ b/include/asm-i386/module.h
@@ -54,6 +54,8 @@ struct mod_arch_specific
54#define MODULE_PROC_FAMILY "CYRIXIII " 54#define MODULE_PROC_FAMILY "CYRIXIII "
55#elif defined CONFIG_MVIAC3_2 55#elif defined CONFIG_MVIAC3_2
56#define MODULE_PROC_FAMILY "VIAC3-2 " 56#define MODULE_PROC_FAMILY "VIAC3-2 "
57#elif defined CONFIG_MVIAC7
58#define MODULE_PROC_FAMILY "VIAC7 "
57#elif defined CONFIG_MGEODEGX1 59#elif defined CONFIG_MGEODEGX1
58#define MODULE_PROC_FAMILY "GEODEGX1 " 60#define MODULE_PROC_FAMILY "GEODEGX1 "
59#elif defined CONFIG_MGEODE_LX 61#elif defined CONFIG_MGEODE_LX
diff --git a/include/asm-i386/msr-index.h b/include/asm-i386/msr-index.h
new file mode 100644
index 000000000000..a02eb2991349
--- /dev/null
+++ b/include/asm-i386/msr-index.h
@@ -0,0 +1,278 @@
1#ifndef __ASM_MSR_INDEX_H
2#define __ASM_MSR_INDEX_H
3
4/* CPU model specific register (MSR) numbers */
5
6/* x86-64 specific MSRs */
7#define MSR_EFER 0xc0000080 /* extended feature register */
8#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
9#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
10#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
11#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
12#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
13#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
14#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
15
16/* EFER bits: */
17#define _EFER_SCE 0 /* SYSCALL/SYSRET */
18#define _EFER_LME 8 /* Long mode enable */
19#define _EFER_LMA 10 /* Long mode active (read-only) */
20#define _EFER_NX 11 /* No execute enable */
21
22#define EFER_SCE (1<<_EFER_SCE)
23#define EFER_LME (1<<_EFER_LME)
24#define EFER_LMA (1<<_EFER_LMA)
25#define EFER_NX (1<<_EFER_NX)
26
27/* Intel MSRs. Some also available on other CPUs */
28#define MSR_IA32_PERFCTR0 0x000000c1
29#define MSR_IA32_PERFCTR1 0x000000c2
30#define MSR_FSB_FREQ 0x000000cd
31
32#define MSR_MTRRcap 0x000000fe
33#define MSR_IA32_BBL_CR_CTL 0x00000119
34
35#define MSR_IA32_SYSENTER_CS 0x00000174
36#define MSR_IA32_SYSENTER_ESP 0x00000175
37#define MSR_IA32_SYSENTER_EIP 0x00000176
38
39#define MSR_IA32_MCG_CAP 0x00000179
40#define MSR_IA32_MCG_STATUS 0x0000017a
41#define MSR_IA32_MCG_CTL 0x0000017b
42
43#define MSR_IA32_PEBS_ENABLE 0x000003f1
44#define MSR_IA32_DS_AREA 0x00000600
45#define MSR_IA32_PERF_CAPABILITIES 0x00000345
46
47#define MSR_MTRRfix64K_00000 0x00000250
48#define MSR_MTRRfix16K_80000 0x00000258
49#define MSR_MTRRfix16K_A0000 0x00000259
50#define MSR_MTRRfix4K_C0000 0x00000268
51#define MSR_MTRRfix4K_C8000 0x00000269
52#define MSR_MTRRfix4K_D0000 0x0000026a
53#define MSR_MTRRfix4K_D8000 0x0000026b
54#define MSR_MTRRfix4K_E0000 0x0000026c
55#define MSR_MTRRfix4K_E8000 0x0000026d
56#define MSR_MTRRfix4K_F0000 0x0000026e
57#define MSR_MTRRfix4K_F8000 0x0000026f
58#define MSR_MTRRdefType 0x000002ff
59
60#define MSR_IA32_DEBUGCTLMSR 0x000001d9
61#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
62#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
63#define MSR_IA32_LASTINTFROMIP 0x000001dd
64#define MSR_IA32_LASTINTTOIP 0x000001de
65
66#define MSR_IA32_MC0_CTL 0x00000400
67#define MSR_IA32_MC0_STATUS 0x00000401
68#define MSR_IA32_MC0_ADDR 0x00000402
69#define MSR_IA32_MC0_MISC 0x00000403
70
71#define MSR_P6_PERFCTR0 0x000000c1
72#define MSR_P6_PERFCTR1 0x000000c2
73#define MSR_P6_EVNTSEL0 0x00000186
74#define MSR_P6_EVNTSEL1 0x00000187
75
76/* K7/K8 MSRs. Not complete. See the architecture manual for a more
77 complete list. */
78#define MSR_K7_EVNTSEL0 0xc0010000
79#define MSR_K7_PERFCTR0 0xc0010004
80#define MSR_K7_EVNTSEL1 0xc0010001
81#define MSR_K7_PERFCTR1 0xc0010005
82#define MSR_K7_EVNTSEL2 0xc0010002
83#define MSR_K7_PERFCTR2 0xc0010006
84#define MSR_K7_EVNTSEL3 0xc0010003
85#define MSR_K7_PERFCTR3 0xc0010007
86#define MSR_K8_TOP_MEM1 0xc001001a
87#define MSR_K7_CLK_CTL 0xc001001b
88#define MSR_K8_TOP_MEM2 0xc001001d
89#define MSR_K8_SYSCFG 0xc0010010
90
91#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
92#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
93#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
94
95#define MSR_K7_HWCR 0xc0010015
96#define MSR_K8_HWCR 0xc0010015
97#define MSR_K7_FID_VID_CTL 0xc0010041
98#define MSR_K7_FID_VID_STATUS 0xc0010042
99#define MSR_K8_ENABLE_C1E 0xc0010055
100
101/* K6 MSRs */
102#define MSR_K6_EFER 0xc0000080
103#define MSR_K6_STAR 0xc0000081
104#define MSR_K6_WHCR 0xc0000082
105#define MSR_K6_UWCCR 0xc0000085
106#define MSR_K6_EPMR 0xc0000086
107#define MSR_K6_PSOR 0xc0000087
108#define MSR_K6_PFIR 0xc0000088
109
110/* Centaur-Hauls/IDT defined MSRs. */
111#define MSR_IDT_FCR1 0x00000107
112#define MSR_IDT_FCR2 0x00000108
113#define MSR_IDT_FCR3 0x00000109
114#define MSR_IDT_FCR4 0x0000010a
115
116#define MSR_IDT_MCR0 0x00000110
117#define MSR_IDT_MCR1 0x00000111
118#define MSR_IDT_MCR2 0x00000112
119#define MSR_IDT_MCR3 0x00000113
120#define MSR_IDT_MCR4 0x00000114
121#define MSR_IDT_MCR5 0x00000115
122#define MSR_IDT_MCR6 0x00000116
123#define MSR_IDT_MCR7 0x00000117
124#define MSR_IDT_MCR_CTRL 0x00000120
125
126/* VIA Cyrix defined MSRs*/
127#define MSR_VIA_FCR 0x00001107
128#define MSR_VIA_LONGHAUL 0x0000110a
129#define MSR_VIA_RNG 0x0000110b
130#define MSR_VIA_BCR2 0x00001147
131
132/* Transmeta defined MSRs */
133#define MSR_TMTA_LONGRUN_CTRL 0x80868010
134#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
135#define MSR_TMTA_LRTI_READOUT 0x80868018
136#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
137
138/* Intel defined MSRs. */
139#define MSR_IA32_P5_MC_ADDR 0x00000000
140#define MSR_IA32_P5_MC_TYPE 0x00000001
141#define MSR_IA32_TSC 0x00000010
142#define MSR_IA32_PLATFORM_ID 0x00000017
143#define MSR_IA32_EBL_CR_POWERON 0x0000002a
144
145#define MSR_IA32_APICBASE 0x0000001b
146#define MSR_IA32_APICBASE_BSP (1<<8)
147#define MSR_IA32_APICBASE_ENABLE (1<<11)
148#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
149
150#define MSR_IA32_UCODE_WRITE 0x00000079
151#define MSR_IA32_UCODE_REV 0x0000008b
152
153#define MSR_IA32_PERF_STATUS 0x00000198
154#define MSR_IA32_PERF_CTL 0x00000199
155
156#define MSR_IA32_MPERF 0x000000e7
157#define MSR_IA32_APERF 0x000000e8
158
159#define MSR_IA32_THERM_CONTROL 0x0000019a
160#define MSR_IA32_THERM_INTERRUPT 0x0000019b
161#define MSR_IA32_THERM_STATUS 0x0000019c
162#define MSR_IA32_MISC_ENABLE 0x000001a0
163
164/* Intel Model 6 */
165#define MSR_P6_EVNTSEL0 0x00000186
166#define MSR_P6_EVNTSEL1 0x00000187
167
168/* P4/Xeon+ specific */
169#define MSR_IA32_MCG_EAX 0x00000180
170#define MSR_IA32_MCG_EBX 0x00000181
171#define MSR_IA32_MCG_ECX 0x00000182
172#define MSR_IA32_MCG_EDX 0x00000183
173#define MSR_IA32_MCG_ESI 0x00000184
174#define MSR_IA32_MCG_EDI 0x00000185
175#define MSR_IA32_MCG_EBP 0x00000186
176#define MSR_IA32_MCG_ESP 0x00000187
177#define MSR_IA32_MCG_EFLAGS 0x00000188
178#define MSR_IA32_MCG_EIP 0x00000189
179#define MSR_IA32_MCG_RESERVED 0x0000018a
180
181/* Pentium IV performance counter MSRs */
182#define MSR_P4_BPU_PERFCTR0 0x00000300
183#define MSR_P4_BPU_PERFCTR1 0x00000301
184#define MSR_P4_BPU_PERFCTR2 0x00000302
185#define MSR_P4_BPU_PERFCTR3 0x00000303
186#define MSR_P4_MS_PERFCTR0 0x00000304
187#define MSR_P4_MS_PERFCTR1 0x00000305
188#define MSR_P4_MS_PERFCTR2 0x00000306
189#define MSR_P4_MS_PERFCTR3 0x00000307
190#define MSR_P4_FLAME_PERFCTR0 0x00000308
191#define MSR_P4_FLAME_PERFCTR1 0x00000309
192#define MSR_P4_FLAME_PERFCTR2 0x0000030a
193#define MSR_P4_FLAME_PERFCTR3 0x0000030b
194#define MSR_P4_IQ_PERFCTR0 0x0000030c
195#define MSR_P4_IQ_PERFCTR1 0x0000030d
196#define MSR_P4_IQ_PERFCTR2 0x0000030e
197#define MSR_P4_IQ_PERFCTR3 0x0000030f
198#define MSR_P4_IQ_PERFCTR4 0x00000310
199#define MSR_P4_IQ_PERFCTR5 0x00000311
200#define MSR_P4_BPU_CCCR0 0x00000360
201#define MSR_P4_BPU_CCCR1 0x00000361
202#define MSR_P4_BPU_CCCR2 0x00000362
203#define MSR_P4_BPU_CCCR3 0x00000363
204#define MSR_P4_MS_CCCR0 0x00000364
205#define MSR_P4_MS_CCCR1 0x00000365
206#define MSR_P4_MS_CCCR2 0x00000366
207#define MSR_P4_MS_CCCR3 0x00000367
208#define MSR_P4_FLAME_CCCR0 0x00000368
209#define MSR_P4_FLAME_CCCR1 0x00000369
210#define MSR_P4_FLAME_CCCR2 0x0000036a
211#define MSR_P4_FLAME_CCCR3 0x0000036b
212#define MSR_P4_IQ_CCCR0 0x0000036c
213#define MSR_P4_IQ_CCCR1 0x0000036d
214#define MSR_P4_IQ_CCCR2 0x0000036e
215#define MSR_P4_IQ_CCCR3 0x0000036f
216#define MSR_P4_IQ_CCCR4 0x00000370
217#define MSR_P4_IQ_CCCR5 0x00000371
218#define MSR_P4_ALF_ESCR0 0x000003ca
219#define MSR_P4_ALF_ESCR1 0x000003cb
220#define MSR_P4_BPU_ESCR0 0x000003b2
221#define MSR_P4_BPU_ESCR1 0x000003b3
222#define MSR_P4_BSU_ESCR0 0x000003a0
223#define MSR_P4_BSU_ESCR1 0x000003a1
224#define MSR_P4_CRU_ESCR0 0x000003b8
225#define MSR_P4_CRU_ESCR1 0x000003b9
226#define MSR_P4_CRU_ESCR2 0x000003cc
227#define MSR_P4_CRU_ESCR3 0x000003cd
228#define MSR_P4_CRU_ESCR4 0x000003e0
229#define MSR_P4_CRU_ESCR5 0x000003e1
230#define MSR_P4_DAC_ESCR0 0x000003a8
231#define MSR_P4_DAC_ESCR1 0x000003a9
232#define MSR_P4_FIRM_ESCR0 0x000003a4
233#define MSR_P4_FIRM_ESCR1 0x000003a5
234#define MSR_P4_FLAME_ESCR0 0x000003a6
235#define MSR_P4_FLAME_ESCR1 0x000003a7
236#define MSR_P4_FSB_ESCR0 0x000003a2
237#define MSR_P4_FSB_ESCR1 0x000003a3
238#define MSR_P4_IQ_ESCR0 0x000003ba
239#define MSR_P4_IQ_ESCR1 0x000003bb
240#define MSR_P4_IS_ESCR0 0x000003b4
241#define MSR_P4_IS_ESCR1 0x000003b5
242#define MSR_P4_ITLB_ESCR0 0x000003b6
243#define MSR_P4_ITLB_ESCR1 0x000003b7
244#define MSR_P4_IX_ESCR0 0x000003c8
245#define MSR_P4_IX_ESCR1 0x000003c9
246#define MSR_P4_MOB_ESCR0 0x000003aa
247#define MSR_P4_MOB_ESCR1 0x000003ab
248#define MSR_P4_MS_ESCR0 0x000003c0
249#define MSR_P4_MS_ESCR1 0x000003c1
250#define MSR_P4_PMH_ESCR0 0x000003ac
251#define MSR_P4_PMH_ESCR1 0x000003ad
252#define MSR_P4_RAT_ESCR0 0x000003bc
253#define MSR_P4_RAT_ESCR1 0x000003bd
254#define MSR_P4_SAAT_ESCR0 0x000003ae
255#define MSR_P4_SAAT_ESCR1 0x000003af
256#define MSR_P4_SSU_ESCR0 0x000003be
257#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
258
259#define MSR_P4_TBPU_ESCR0 0x000003c2
260#define MSR_P4_TBPU_ESCR1 0x000003c3
261#define MSR_P4_TC_ESCR0 0x000003c4
262#define MSR_P4_TC_ESCR1 0x000003c5
263#define MSR_P4_U2L_ESCR0 0x000003b0
264#define MSR_P4_U2L_ESCR1 0x000003b1
265
266/* Intel Core-based CPU performance counters */
267#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
268#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
269#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
270#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
271#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
272#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
273#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
274
275/* Geode defined MSRs */
276#define MSR_GEODE_BUSCONT_CONF0 0x00001900
277
278#endif /* __ASM_MSR_INDEX_H */
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h
index 2ad3f30b1a68..9559894c7658 100644
--- a/include/asm-i386/msr.h
+++ b/include/asm-i386/msr.h
@@ -1,6 +1,79 @@
1#ifndef __ASM_MSR_H 1#ifndef __ASM_MSR_H
2#define __ASM_MSR_H 2#define __ASM_MSR_H
3 3
4#include <asm/msr-index.h>
5
6#ifdef __KERNEL__
7#ifndef __ASSEMBLY__
8
9#include <asm/errno.h>
10
11static inline unsigned long long native_read_msr(unsigned int msr)
12{
13 unsigned long long val;
14
15 asm volatile("rdmsr" : "=A" (val) : "c" (msr));
16 return val;
17}
18
19static inline unsigned long long native_read_msr_safe(unsigned int msr,
20 int *err)
21{
22 unsigned long long val;
23
24 asm volatile("2: rdmsr ; xorl %0,%0\n"
25 "1:\n\t"
26 ".section .fixup,\"ax\"\n\t"
27 "3: movl %3,%0 ; jmp 1b\n\t"
28 ".previous\n\t"
29 ".section __ex_table,\"a\"\n"
30 " .align 4\n\t"
31 " .long 2b,3b\n\t"
32 ".previous"
33 : "=r" (*err), "=A" (val)
34 : "c" (msr), "i" (-EFAULT));
35
36 return val;
37}
38
39static inline void native_write_msr(unsigned int msr, unsigned long long val)
40{
41 asm volatile("wrmsr" : : "c" (msr), "A"(val));
42}
43
44static inline int native_write_msr_safe(unsigned int msr,
45 unsigned long long val)
46{
47 int err;
48 asm volatile("2: wrmsr ; xorl %0,%0\n"
49 "1:\n\t"
50 ".section .fixup,\"ax\"\n\t"
51 "3: movl %4,%0 ; jmp 1b\n\t"
52 ".previous\n\t"
53 ".section __ex_table,\"a\"\n"
54 " .align 4\n\t"
55 " .long 2b,3b\n\t"
56 ".previous"
57 : "=a" (err)
58 : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
59 "i" (-EFAULT));
60 return err;
61}
62
63static inline unsigned long long native_read_tsc(void)
64{
65 unsigned long long val;
66 asm volatile("rdtsc" : "=A" (val));
67 return val;
68}
69
70static inline unsigned long long native_read_pmc(void)
71{
72 unsigned long long val;
73 asm volatile("rdpmc" : "=A" (val));
74 return val;
75}
76
4#ifdef CONFIG_PARAVIRT 77#ifdef CONFIG_PARAVIRT
5#include <asm/paravirt.h> 78#include <asm/paravirt.h>
6#else 79#else
@@ -11,22 +84,20 @@
11 * pointer indirection), this allows gcc to optimize better 84 * pointer indirection), this allows gcc to optimize better
12 */ 85 */
13 86
14#define rdmsr(msr,val1,val2) \ 87#define rdmsr(msr,val1,val2) \
15 __asm__ __volatile__("rdmsr" \ 88 do { \
16 : "=a" (val1), "=d" (val2) \ 89 unsigned long long __val = native_read_msr(msr); \
17 : "c" (msr)) 90 val1 = __val; \
91 val2 = __val >> 32; \
92 } while(0)
18 93
19#define wrmsr(msr,val1,val2) \ 94#define wrmsr(msr,val1,val2) \
20 __asm__ __volatile__("wrmsr" \ 95 native_write_msr(msr, ((unsigned long long)val2 << 32) | val1)
21 : /* no outputs */ \
22 : "c" (msr), "a" (val1), "d" (val2))
23 96
24#define rdmsrl(msr,val) do { \ 97#define rdmsrl(msr,val) \
25 unsigned long l__,h__; \ 98 do { \
26 rdmsr (msr, l__, h__); \ 99 (val) = native_read_msr(msr); \
27 val = l__; \ 100 } while(0)
28 val |= ((u64)h__<<32); \
29} while(0)
30 101
31static inline void wrmsrl (unsigned long msr, unsigned long long val) 102static inline void wrmsrl (unsigned long msr, unsigned long long val)
32{ 103{
@@ -37,50 +108,41 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val)
37} 108}
38 109
39/* wrmsr with exception handling */ 110/* wrmsr with exception handling */
40#define wrmsr_safe(msr,a,b) ({ int ret__; \ 111#define wrmsr_safe(msr,val1,val2) \
41 asm volatile("2: wrmsr ; xorl %0,%0\n" \ 112 (native_write_msr_safe(msr, ((unsigned long long)val2 << 32) | val1))
42 "1:\n\t" \
43 ".section .fixup,\"ax\"\n\t" \
44 "3: movl %4,%0 ; jmp 1b\n\t" \
45 ".previous\n\t" \
46 ".section __ex_table,\"a\"\n" \
47 " .align 4\n\t" \
48 " .long 2b,3b\n\t" \
49 ".previous" \
50 : "=a" (ret__) \
51 : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT));\
52 ret__; })
53 113
54/* rdmsr with exception handling */ 114/* rdmsr with exception handling */
55#define rdmsr_safe(msr,a,b) ({ int ret__; \ 115#define rdmsr_safe(msr,p1,p2) \
56 asm volatile("2: rdmsr ; xorl %0,%0\n" \ 116 ({ \
57 "1:\n\t" \ 117 int __err; \
58 ".section .fixup,\"ax\"\n\t" \ 118 unsigned long long __val = native_read_msr_safe(msr, &__err);\
59 "3: movl %4,%0 ; jmp 1b\n\t" \ 119 (*p1) = __val; \
60 ".previous\n\t" \ 120 (*p2) = __val >> 32; \
61 ".section __ex_table,\"a\"\n" \ 121 __err; \
62 " .align 4\n\t" \ 122 })
63 " .long 2b,3b\n\t" \ 123
64 ".previous" \ 124#define rdtsc(low,high) \
65 : "=r" (ret__), "=a" (*(a)), "=d" (*(b)) \ 125 do { \
66 : "c" (msr), "i" (-EFAULT));\ 126 u64 _l = native_read_tsc(); \
67 ret__; }) 127 (low) = (u32)_l; \
68 128 (high) = _l >> 32; \
69#define rdtsc(low,high) \ 129 } while(0)
70 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) 130
71 131#define rdtscl(low) \
72#define rdtscl(low) \ 132 do { \
73 __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") 133 (low) = native_read_tsc(); \
74 134 } while(0)
75#define rdtscll(val) \ 135
76 __asm__ __volatile__("rdtsc" : "=A" (val)) 136#define rdtscll(val) ((val) = native_read_tsc())
77 137
78#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 138#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
79 139
80#define rdpmc(counter,low,high) \ 140#define rdpmc(counter,low,high) \
81 __asm__ __volatile__("rdpmc" \ 141 do { \
82 : "=a" (low), "=d" (high) \ 142 u64 _l = native_read_pmc(); \
83 : "c" (counter)) 143 low = (u32)_l; \
144 high = _l >> 32; \
145 } while(0)
84#endif /* !CONFIG_PARAVIRT */ 146#endif /* !CONFIG_PARAVIRT */
85 147
86#ifdef CONFIG_SMP 148#ifdef CONFIG_SMP
@@ -96,234 +158,6 @@ static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
96 wrmsr(msr_no, l, h); 158 wrmsr(msr_no, l, h);
97} 159}
98#endif /* CONFIG_SMP */ 160#endif /* CONFIG_SMP */
99 161#endif
100/* symbolic names for some interesting MSRs */ 162#endif
101/* Intel defined MSRs. */
102#define MSR_IA32_P5_MC_ADDR 0
103#define MSR_IA32_P5_MC_TYPE 1
104#define MSR_IA32_PLATFORM_ID 0x17
105#define MSR_IA32_EBL_CR_POWERON 0x2a
106
107#define MSR_IA32_APICBASE 0x1b
108#define MSR_IA32_APICBASE_BSP (1<<8)
109#define MSR_IA32_APICBASE_ENABLE (1<<11)
110#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
111
112#define MSR_IA32_UCODE_WRITE 0x79
113#define MSR_IA32_UCODE_REV 0x8b
114
115#define MSR_P6_PERFCTR0 0xc1
116#define MSR_P6_PERFCTR1 0xc2
117#define MSR_FSB_FREQ 0xcd
118
119
120#define MSR_IA32_BBL_CR_CTL 0x119
121
122#define MSR_IA32_SYSENTER_CS 0x174
123#define MSR_IA32_SYSENTER_ESP 0x175
124#define MSR_IA32_SYSENTER_EIP 0x176
125
126#define MSR_IA32_MCG_CAP 0x179
127#define MSR_IA32_MCG_STATUS 0x17a
128#define MSR_IA32_MCG_CTL 0x17b
129
130/* P4/Xeon+ specific */
131#define MSR_IA32_MCG_EAX 0x180
132#define MSR_IA32_MCG_EBX 0x181
133#define MSR_IA32_MCG_ECX 0x182
134#define MSR_IA32_MCG_EDX 0x183
135#define MSR_IA32_MCG_ESI 0x184
136#define MSR_IA32_MCG_EDI 0x185
137#define MSR_IA32_MCG_EBP 0x186
138#define MSR_IA32_MCG_ESP 0x187
139#define MSR_IA32_MCG_EFLAGS 0x188
140#define MSR_IA32_MCG_EIP 0x189
141#define MSR_IA32_MCG_RESERVED 0x18A
142
143#define MSR_P6_EVNTSEL0 0x186
144#define MSR_P6_EVNTSEL1 0x187
145
146#define MSR_IA32_PERF_STATUS 0x198
147#define MSR_IA32_PERF_CTL 0x199
148
149#define MSR_IA32_MPERF 0xE7
150#define MSR_IA32_APERF 0xE8
151
152#define MSR_IA32_THERM_CONTROL 0x19a
153#define MSR_IA32_THERM_INTERRUPT 0x19b
154#define MSR_IA32_THERM_STATUS 0x19c
155#define MSR_IA32_MISC_ENABLE 0x1a0
156
157#define MSR_IA32_DEBUGCTLMSR 0x1d9
158#define MSR_IA32_LASTBRANCHFROMIP 0x1db
159#define MSR_IA32_LASTBRANCHTOIP 0x1dc
160#define MSR_IA32_LASTINTFROMIP 0x1dd
161#define MSR_IA32_LASTINTTOIP 0x1de
162
163#define MSR_IA32_MC0_CTL 0x400
164#define MSR_IA32_MC0_STATUS 0x401
165#define MSR_IA32_MC0_ADDR 0x402
166#define MSR_IA32_MC0_MISC 0x403
167
168#define MSR_IA32_PEBS_ENABLE 0x3f1
169#define MSR_IA32_DS_AREA 0x600
170#define MSR_IA32_PERF_CAPABILITIES 0x345
171
172/* Pentium IV performance counter MSRs */
173#define MSR_P4_BPU_PERFCTR0 0x300
174#define MSR_P4_BPU_PERFCTR1 0x301
175#define MSR_P4_BPU_PERFCTR2 0x302
176#define MSR_P4_BPU_PERFCTR3 0x303
177#define MSR_P4_MS_PERFCTR0 0x304
178#define MSR_P4_MS_PERFCTR1 0x305
179#define MSR_P4_MS_PERFCTR2 0x306
180#define MSR_P4_MS_PERFCTR3 0x307
181#define MSR_P4_FLAME_PERFCTR0 0x308
182#define MSR_P4_FLAME_PERFCTR1 0x309
183#define MSR_P4_FLAME_PERFCTR2 0x30a
184#define MSR_P4_FLAME_PERFCTR3 0x30b
185#define MSR_P4_IQ_PERFCTR0 0x30c
186#define MSR_P4_IQ_PERFCTR1 0x30d
187#define MSR_P4_IQ_PERFCTR2 0x30e
188#define MSR_P4_IQ_PERFCTR3 0x30f
189#define MSR_P4_IQ_PERFCTR4 0x310
190#define MSR_P4_IQ_PERFCTR5 0x311
191#define MSR_P4_BPU_CCCR0 0x360
192#define MSR_P4_BPU_CCCR1 0x361
193#define MSR_P4_BPU_CCCR2 0x362
194#define MSR_P4_BPU_CCCR3 0x363
195#define MSR_P4_MS_CCCR0 0x364
196#define MSR_P4_MS_CCCR1 0x365
197#define MSR_P4_MS_CCCR2 0x366
198#define MSR_P4_MS_CCCR3 0x367
199#define MSR_P4_FLAME_CCCR0 0x368
200#define MSR_P4_FLAME_CCCR1 0x369
201#define MSR_P4_FLAME_CCCR2 0x36a
202#define MSR_P4_FLAME_CCCR3 0x36b
203#define MSR_P4_IQ_CCCR0 0x36c
204#define MSR_P4_IQ_CCCR1 0x36d
205#define MSR_P4_IQ_CCCR2 0x36e
206#define MSR_P4_IQ_CCCR3 0x36f
207#define MSR_P4_IQ_CCCR4 0x370
208#define MSR_P4_IQ_CCCR5 0x371
209#define MSR_P4_ALF_ESCR0 0x3ca
210#define MSR_P4_ALF_ESCR1 0x3cb
211#define MSR_P4_BPU_ESCR0 0x3b2
212#define MSR_P4_BPU_ESCR1 0x3b3
213#define MSR_P4_BSU_ESCR0 0x3a0
214#define MSR_P4_BSU_ESCR1 0x3a1
215#define MSR_P4_CRU_ESCR0 0x3b8
216#define MSR_P4_CRU_ESCR1 0x3b9
217#define MSR_P4_CRU_ESCR2 0x3cc
218#define MSR_P4_CRU_ESCR3 0x3cd
219#define MSR_P4_CRU_ESCR4 0x3e0
220#define MSR_P4_CRU_ESCR5 0x3e1
221#define MSR_P4_DAC_ESCR0 0x3a8
222#define MSR_P4_DAC_ESCR1 0x3a9
223#define MSR_P4_FIRM_ESCR0 0x3a4
224#define MSR_P4_FIRM_ESCR1 0x3a5
225#define MSR_P4_FLAME_ESCR0 0x3a6
226#define MSR_P4_FLAME_ESCR1 0x3a7
227#define MSR_P4_FSB_ESCR0 0x3a2
228#define MSR_P4_FSB_ESCR1 0x3a3
229#define MSR_P4_IQ_ESCR0 0x3ba
230#define MSR_P4_IQ_ESCR1 0x3bb
231#define MSR_P4_IS_ESCR0 0x3b4
232#define MSR_P4_IS_ESCR1 0x3b5
233#define MSR_P4_ITLB_ESCR0 0x3b6
234#define MSR_P4_ITLB_ESCR1 0x3b7
235#define MSR_P4_IX_ESCR0 0x3c8
236#define MSR_P4_IX_ESCR1 0x3c9
237#define MSR_P4_MOB_ESCR0 0x3aa
238#define MSR_P4_MOB_ESCR1 0x3ab
239#define MSR_P4_MS_ESCR0 0x3c0
240#define MSR_P4_MS_ESCR1 0x3c1
241#define MSR_P4_PMH_ESCR0 0x3ac
242#define MSR_P4_PMH_ESCR1 0x3ad
243#define MSR_P4_RAT_ESCR0 0x3bc
244#define MSR_P4_RAT_ESCR1 0x3bd
245#define MSR_P4_SAAT_ESCR0 0x3ae
246#define MSR_P4_SAAT_ESCR1 0x3af
247#define MSR_P4_SSU_ESCR0 0x3be
248#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */
249#define MSR_P4_TBPU_ESCR0 0x3c2
250#define MSR_P4_TBPU_ESCR1 0x3c3
251#define MSR_P4_TC_ESCR0 0x3c4
252#define MSR_P4_TC_ESCR1 0x3c5
253#define MSR_P4_U2L_ESCR0 0x3b0
254#define MSR_P4_U2L_ESCR1 0x3b1
255
256/* AMD Defined MSRs */
257#define MSR_K6_EFER 0xC0000080
258#define MSR_K6_STAR 0xC0000081
259#define MSR_K6_WHCR 0xC0000082
260#define MSR_K6_UWCCR 0xC0000085
261#define MSR_K6_EPMR 0xC0000086
262#define MSR_K6_PSOR 0xC0000087
263#define MSR_K6_PFIR 0xC0000088
264
265#define MSR_K7_EVNTSEL0 0xC0010000
266#define MSR_K7_EVNTSEL1 0xC0010001
267#define MSR_K7_EVNTSEL2 0xC0010002
268#define MSR_K7_EVNTSEL3 0xC0010003
269#define MSR_K7_PERFCTR0 0xC0010004
270#define MSR_K7_PERFCTR1 0xC0010005
271#define MSR_K7_PERFCTR2 0xC0010006
272#define MSR_K7_PERFCTR3 0xC0010007
273#define MSR_K7_HWCR 0xC0010015
274#define MSR_K7_CLK_CTL 0xC001001b
275#define MSR_K7_FID_VID_CTL 0xC0010041
276#define MSR_K7_FID_VID_STATUS 0xC0010042
277
278#define MSR_K8_ENABLE_C1E 0xC0010055
279
280/* extended feature register */
281#define MSR_EFER 0xc0000080
282
283/* EFER bits: */
284
285/* Execute Disable enable */
286#define _EFER_NX 11
287#define EFER_NX (1<<_EFER_NX)
288
289/* Centaur-Hauls/IDT defined MSRs. */
290#define MSR_IDT_FCR1 0x107
291#define MSR_IDT_FCR2 0x108
292#define MSR_IDT_FCR3 0x109
293#define MSR_IDT_FCR4 0x10a
294
295#define MSR_IDT_MCR0 0x110
296#define MSR_IDT_MCR1 0x111
297#define MSR_IDT_MCR2 0x112
298#define MSR_IDT_MCR3 0x113
299#define MSR_IDT_MCR4 0x114
300#define MSR_IDT_MCR5 0x115
301#define MSR_IDT_MCR6 0x116
302#define MSR_IDT_MCR7 0x117
303#define MSR_IDT_MCR_CTRL 0x120
304
305/* VIA Cyrix defined MSRs*/
306#define MSR_VIA_FCR 0x1107
307#define MSR_VIA_LONGHAUL 0x110a
308#define MSR_VIA_RNG 0x110b
309#define MSR_VIA_BCR2 0x1147
310
311/* Transmeta defined MSRs */
312#define MSR_TMTA_LONGRUN_CTRL 0x80868010
313#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
314#define MSR_TMTA_LRTI_READOUT 0x80868018
315#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
316
317/* Intel Core-based CPU performance counters */
318#define MSR_CORE_PERF_FIXED_CTR0 0x309
319#define MSR_CORE_PERF_FIXED_CTR1 0x30a
320#define MSR_CORE_PERF_FIXED_CTR2 0x30b
321#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d
322#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e
323#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f
324#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390
325
326/* Geode defined MSRs */
327#define MSR_GEODE_BUSCONT_CONF0 0x1900
328
329#endif /* __ASM_MSR_H */ 163#endif /* __ASM_MSR_H */
diff --git a/include/asm-i386/mtrr.h b/include/asm-i386/mtrr.h
index 07f063ae26ea..7e9c7ccbdcfe 100644
--- a/include/asm-i386/mtrr.h
+++ b/include/asm-i386/mtrr.h
@@ -69,6 +69,8 @@ struct mtrr_gentry
69 69
70/* The following functions are for use by other drivers */ 70/* The following functions are for use by other drivers */
71# ifdef CONFIG_MTRR 71# ifdef CONFIG_MTRR
72extern void mtrr_save_fixed_ranges(void *);
73extern void mtrr_save_state(void);
72extern int mtrr_add (unsigned long base, unsigned long size, 74extern int mtrr_add (unsigned long base, unsigned long size,
73 unsigned int type, char increment); 75 unsigned int type, char increment);
74extern int mtrr_add_page (unsigned long base, unsigned long size, 76extern int mtrr_add_page (unsigned long base, unsigned long size,
@@ -79,6 +81,8 @@ extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
79extern void mtrr_ap_init(void); 81extern void mtrr_ap_init(void);
80extern void mtrr_bp_init(void); 82extern void mtrr_bp_init(void);
81# else 83# else
84#define mtrr_save_fixed_ranges(arg) do {} while (0)
85#define mtrr_save_state() do {} while (0)
82static __inline__ int mtrr_add (unsigned long base, unsigned long size, 86static __inline__ int mtrr_add (unsigned long base, unsigned long size,
83 unsigned int type, char increment) 87 unsigned int type, char increment)
84{ 88{
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index b04333ea6f31..fb1e133efd9f 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -50,4 +50,12 @@ void __trigger_all_cpu_backtrace(void);
50 50
51#endif 51#endif
52 52
53void lapic_watchdog_stop(void);
54int lapic_watchdog_init(unsigned nmi_hz);
55int lapic_wd_event(unsigned nmi_hz);
56unsigned lapic_adjust_nmi_hz(unsigned hz);
57int lapic_watchdog_ok(void);
58void disable_lapic_nmi_watchdog(void);
59void enable_lapic_nmi_watchdog(void);
60
53#endif /* ASM_NMI_H */ 61#endif /* ASM_NMI_H */
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index 7b19f454761d..818ac8bf01e2 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -12,7 +12,6 @@
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
14 14
15
16#ifdef CONFIG_X86_USE_3DNOW 15#ifdef CONFIG_X86_USE_3DNOW
17 16
18#include <asm/mmx.h> 17#include <asm/mmx.h>
@@ -42,26 +41,81 @@
42 * These are used to make use of C type-checking.. 41 * These are used to make use of C type-checking..
43 */ 42 */
44extern int nx_enabled; 43extern int nx_enabled;
44
45#ifdef CONFIG_X86_PAE 45#ifdef CONFIG_X86_PAE
46extern unsigned long long __supported_pte_mask; 46extern unsigned long long __supported_pte_mask;
47typedef struct { unsigned long pte_low, pte_high; } pte_t; 47typedef struct { unsigned long pte_low, pte_high; } pte_t;
48typedef struct { unsigned long long pmd; } pmd_t; 48typedef struct { unsigned long long pmd; } pmd_t;
49typedef struct { unsigned long long pgd; } pgd_t; 49typedef struct { unsigned long long pgd; } pgd_t;
50typedef struct { unsigned long long pgprot; } pgprot_t; 50typedef struct { unsigned long long pgprot; } pgprot_t;
51#define pmd_val(x) ((x).pmd) 51
52#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) 52static inline unsigned long long native_pgd_val(pgd_t pgd)
53#define __pmd(x) ((pmd_t) { (x) } ) 53{
54 return pgd.pgd;
55}
56
57static inline unsigned long long native_pmd_val(pmd_t pmd)
58{
59 return pmd.pmd;
60}
61
62static inline unsigned long long native_pte_val(pte_t pte)
63{
64 return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
65}
66
67static inline pgd_t native_make_pgd(unsigned long long val)
68{
69 return (pgd_t) { val };
70}
71
72static inline pmd_t native_make_pmd(unsigned long long val)
73{
74 return (pmd_t) { val };
75}
76
77static inline pte_t native_make_pte(unsigned long long val)
78{
79 return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
80}
81
82#ifndef CONFIG_PARAVIRT
83#define pmd_val(x) native_pmd_val(x)
84#define __pmd(x) native_make_pmd(x)
85#endif
86
54#define HPAGE_SHIFT 21 87#define HPAGE_SHIFT 21
55#include <asm-generic/pgtable-nopud.h> 88#include <asm-generic/pgtable-nopud.h>
56#else 89#else /* !CONFIG_X86_PAE */
57typedef struct { unsigned long pte_low; } pte_t; 90typedef struct { unsigned long pte_low; } pte_t;
58typedef struct { unsigned long pgd; } pgd_t; 91typedef struct { unsigned long pgd; } pgd_t;
59typedef struct { unsigned long pgprot; } pgprot_t; 92typedef struct { unsigned long pgprot; } pgprot_t;
60#define boot_pte_t pte_t /* or would you rather have a typedef */ 93#define boot_pte_t pte_t /* or would you rather have a typedef */
61#define pte_val(x) ((x).pte_low) 94
95static inline unsigned long native_pgd_val(pgd_t pgd)
96{
97 return pgd.pgd;
98}
99
100static inline unsigned long native_pte_val(pte_t pte)
101{
102 return pte.pte_low;
103}
104
105static inline pgd_t native_make_pgd(unsigned long val)
106{
107 return (pgd_t) { val };
108}
109
110static inline pte_t native_make_pte(unsigned long val)
111{
112 return (pte_t) { .pte_low = val };
113}
114
62#define HPAGE_SHIFT 22 115#define HPAGE_SHIFT 22
63#include <asm-generic/pgtable-nopmd.h> 116#include <asm-generic/pgtable-nopmd.h>
64#endif 117#endif /* CONFIG_X86_PAE */
118
65#define PTE_MASK PAGE_MASK 119#define PTE_MASK PAGE_MASK
66 120
67#ifdef CONFIG_HUGETLB_PAGE 121#ifdef CONFIG_HUGETLB_PAGE
@@ -71,13 +125,16 @@ typedef struct { unsigned long pgprot; } pgprot_t;
71#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 125#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
72#endif 126#endif
73 127
74#define pgd_val(x) ((x).pgd)
75#define pgprot_val(x) ((x).pgprot) 128#define pgprot_val(x) ((x).pgprot)
76
77#define __pte(x) ((pte_t) { (x) } )
78#define __pgd(x) ((pgd_t) { (x) } )
79#define __pgprot(x) ((pgprot_t) { (x) } ) 129#define __pgprot(x) ((pgprot_t) { (x) } )
80 130
131#ifndef CONFIG_PARAVIRT
132#define pgd_val(x) native_pgd_val(x)
133#define __pgd(x) native_make_pgd(x)
134#define pte_val(x) native_pte_val(x)
135#define __pte(x) native_make_pte(x)
136#endif
137
81#endif /* !__ASSEMBLY__ */ 138#endif /* !__ASSEMBLY__ */
82 139
83/* to align the pointer to the (next) page boundary */ 140/* to align the pointer to the (next) page boundary */
@@ -143,9 +200,7 @@ extern int page_is_ram(unsigned long pagenr);
143#include <asm-generic/memory_model.h> 200#include <asm-generic/memory_model.h>
144#include <asm-generic/page.h> 201#include <asm-generic/page.h>
145 202
146#ifndef CONFIG_COMPAT_VDSO
147#define __HAVE_ARCH_GATE_AREA 1 203#define __HAVE_ARCH_GATE_AREA 1
148#endif
149#endif /* __KERNEL__ */ 204#endif /* __KERNEL__ */
150 205
151#endif /* _I386_PAGE_H */ 206#endif /* _I386_PAGE_H */
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index e63f1e444fcf..e2e7f98723c5 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -2,20 +2,9 @@
2#define __ASM_PARAVIRT_H 2#define __ASM_PARAVIRT_H
3/* Various instructions on x86 need to be replaced for 3/* Various instructions on x86 need to be replaced for
4 * para-virtualization: those hooks are defined here. */ 4 * para-virtualization: those hooks are defined here. */
5#include <linux/linkage.h>
6#include <linux/stringify.h>
7#include <asm/page.h>
8 5
9#ifdef CONFIG_PARAVIRT 6#ifdef CONFIG_PARAVIRT
10/* These are the most performance critical ops, so we want to be able to patch 7#include <asm/page.h>
11 * callers */
12#define PARAVIRT_IRQ_DISABLE 0
13#define PARAVIRT_IRQ_ENABLE 1
14#define PARAVIRT_RESTORE_FLAGS 2
15#define PARAVIRT_SAVE_FLAGS 3
16#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4
17#define PARAVIRT_INTERRUPT_RETURN 5
18#define PARAVIRT_STI_SYSEXIT 6
19 8
20/* Bitmask of what can be clobbered: usually at least eax. */ 9/* Bitmask of what can be clobbered: usually at least eax. */
21#define CLBR_NONE 0x0 10#define CLBR_NONE 0x0
@@ -25,13 +14,29 @@
25#define CLBR_ANY 0x7 14#define CLBR_ANY 0x7
26 15
27#ifndef __ASSEMBLY__ 16#ifndef __ASSEMBLY__
17#include <linux/types.h>
18#include <linux/cpumask.h>
19#include <asm/kmap_types.h>
20
21struct page;
28struct thread_struct; 22struct thread_struct;
29struct Xgt_desc_struct; 23struct Xgt_desc_struct;
30struct tss_struct; 24struct tss_struct;
31struct mm_struct; 25struct mm_struct;
26struct desc_struct;
27
28/* Lazy mode for batching updates / context switch */
29enum paravirt_lazy_mode {
30 PARAVIRT_LAZY_NONE = 0,
31 PARAVIRT_LAZY_MMU = 1,
32 PARAVIRT_LAZY_CPU = 2,
33 PARAVIRT_LAZY_FLUSH = 3,
34};
35
32struct paravirt_ops 36struct paravirt_ops
33{ 37{
34 unsigned int kernel_rpl; 38 unsigned int kernel_rpl;
39 int shared_kernel_pmd;
35 int paravirt_enabled; 40 int paravirt_enabled;
36 const char *name; 41 const char *name;
37 42
@@ -44,24 +49,33 @@ struct paravirt_ops
44 */ 49 */
45 unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); 50 unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len);
46 51
52 /* Basic arch-specific setup */
47 void (*arch_setup)(void); 53 void (*arch_setup)(void);
48 char *(*memory_setup)(void); 54 char *(*memory_setup)(void);
49 void (*init_IRQ)(void); 55 void (*init_IRQ)(void);
56 void (*time_init)(void);
50 57
58 /*
59 * Called before/after init_mm pagetable setup. setup_start
60 * may reset %cr3, and may pre-install parts of the pagetable;
61 * pagetable setup is expected to preserve any existing
62 * mapping.
63 */
64 void (*pagetable_setup_start)(pgd_t *pgd_base);
65 void (*pagetable_setup_done)(pgd_t *pgd_base);
66
67 /* Print a banner to identify the environment */
51 void (*banner)(void); 68 void (*banner)(void);
52 69
70 /* Set and set time of day */
53 unsigned long (*get_wallclock)(void); 71 unsigned long (*get_wallclock)(void);
54 int (*set_wallclock)(unsigned long); 72 int (*set_wallclock)(unsigned long);
55 void (*time_init)(void);
56
57 /* All the function pointers here are declared as "fastcall"
58 so that we get a specific register-based calling
59 convention. This makes it easier to implement inline
60 assembler replacements. */
61 73
74 /* cpuid emulation, mostly so that caps bits can be disabled */
62 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 75 void (*cpuid)(unsigned int *eax, unsigned int *ebx,
63 unsigned int *ecx, unsigned int *edx); 76 unsigned int *ecx, unsigned int *edx);
64 77
78 /* hooks for various privileged instructions */
65 unsigned long (*get_debugreg)(int regno); 79 unsigned long (*get_debugreg)(int regno);
66 void (*set_debugreg)(int regno, unsigned long value); 80 void (*set_debugreg)(int regno, unsigned long value);
67 81
@@ -80,15 +94,23 @@ struct paravirt_ops
80 unsigned long (*read_cr4)(void); 94 unsigned long (*read_cr4)(void);
81 void (*write_cr4)(unsigned long); 95 void (*write_cr4)(unsigned long);
82 96
97 /*
98 * Get/set interrupt state. save_fl and restore_fl are only
99 * expected to use X86_EFLAGS_IF; all other bits
100 * returned from save_fl are undefined, and may be ignored by
101 * restore_fl.
102 */
83 unsigned long (*save_fl)(void); 103 unsigned long (*save_fl)(void);
84 void (*restore_fl)(unsigned long); 104 void (*restore_fl)(unsigned long);
85 void (*irq_disable)(void); 105 void (*irq_disable)(void);
86 void (*irq_enable)(void); 106 void (*irq_enable)(void);
87 void (*safe_halt)(void); 107 void (*safe_halt)(void);
88 void (*halt)(void); 108 void (*halt)(void);
109
89 void (*wbinvd)(void); 110 void (*wbinvd)(void);
90 111
91 /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 112 /* MSR, PMC and TSR operations.
113 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
92 u64 (*read_msr)(unsigned int msr, int *err); 114 u64 (*read_msr)(unsigned int msr, int *err);
93 int (*write_msr)(unsigned int msr, u64 val); 115 int (*write_msr)(unsigned int msr, u64 val);
94 116
@@ -97,6 +119,7 @@ struct paravirt_ops
97 u64 (*get_scheduled_cycles)(void); 119 u64 (*get_scheduled_cycles)(void);
98 unsigned long (*get_cpu_khz)(void); 120 unsigned long (*get_cpu_khz)(void);
99 121
122 /* Segment descriptor handling */
100 void (*load_tr_desc)(void); 123 void (*load_tr_desc)(void);
101 void (*load_gdt)(const struct Xgt_desc_struct *); 124 void (*load_gdt)(const struct Xgt_desc_struct *);
102 void (*load_idt)(const struct Xgt_desc_struct *); 125 void (*load_idt)(const struct Xgt_desc_struct *);
@@ -105,59 +128,98 @@ struct paravirt_ops
105 void (*set_ldt)(const void *desc, unsigned entries); 128 void (*set_ldt)(const void *desc, unsigned entries);
106 unsigned long (*store_tr)(void); 129 unsigned long (*store_tr)(void);
107 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 130 void (*load_tls)(struct thread_struct *t, unsigned int cpu);
108 void (*write_ldt_entry)(void *dt, int entrynum, 131 void (*write_ldt_entry)(struct desc_struct *,
109 u32 low, u32 high); 132 int entrynum, u32 low, u32 high);
110 void (*write_gdt_entry)(void *dt, int entrynum, 133 void (*write_gdt_entry)(struct desc_struct *,
111 u32 low, u32 high); 134 int entrynum, u32 low, u32 high);
112 void (*write_idt_entry)(void *dt, int entrynum, 135 void (*write_idt_entry)(struct desc_struct *,
113 u32 low, u32 high); 136 int entrynum, u32 low, u32 high);
114 void (*load_esp0)(struct tss_struct *tss, 137 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
115 struct thread_struct *thread);
116 138
117 void (*set_iopl_mask)(unsigned mask); 139 void (*set_iopl_mask)(unsigned mask);
118
119 void (*io_delay)(void); 140 void (*io_delay)(void);
120 141
142 /*
143 * Hooks for intercepting the creation/use/destruction of an
144 * mm_struct.
145 */
146 void (*activate_mm)(struct mm_struct *prev,
147 struct mm_struct *next);
148 void (*dup_mmap)(struct mm_struct *oldmm,
149 struct mm_struct *mm);
150 void (*exit_mmap)(struct mm_struct *mm);
151
121#ifdef CONFIG_X86_LOCAL_APIC 152#ifdef CONFIG_X86_LOCAL_APIC
153 /*
154 * Direct APIC operations, principally for VMI. Ideally
155 * these shouldn't be in this interface.
156 */
122 void (*apic_write)(unsigned long reg, unsigned long v); 157 void (*apic_write)(unsigned long reg, unsigned long v);
123 void (*apic_write_atomic)(unsigned long reg, unsigned long v); 158 void (*apic_write_atomic)(unsigned long reg, unsigned long v);
124 unsigned long (*apic_read)(unsigned long reg); 159 unsigned long (*apic_read)(unsigned long reg);
125 void (*setup_boot_clock)(void); 160 void (*setup_boot_clock)(void);
126 void (*setup_secondary_clock)(void); 161 void (*setup_secondary_clock)(void);
162
163 void (*startup_ipi_hook)(int phys_apicid,
164 unsigned long start_eip,
165 unsigned long start_esp);
127#endif 166#endif
128 167
168 /* TLB operations */
129 void (*flush_tlb_user)(void); 169 void (*flush_tlb_user)(void);
130 void (*flush_tlb_kernel)(void); 170 void (*flush_tlb_kernel)(void);
131 void (*flush_tlb_single)(u32 addr); 171 void (*flush_tlb_single)(unsigned long addr);
132 172 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
133 void (*map_pt_hook)(int type, pte_t *va, u32 pfn); 173 unsigned long va);
134 174
175 /* Hooks for allocating/releasing pagetable pages */
135 void (*alloc_pt)(u32 pfn); 176 void (*alloc_pt)(u32 pfn);
136 void (*alloc_pd)(u32 pfn); 177 void (*alloc_pd)(u32 pfn);
137 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 178 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
138 void (*release_pt)(u32 pfn); 179 void (*release_pt)(u32 pfn);
139 void (*release_pd)(u32 pfn); 180 void (*release_pd)(u32 pfn);
140 181
182 /* Pagetable manipulation functions */
141 void (*set_pte)(pte_t *ptep, pte_t pteval); 183 void (*set_pte)(pte_t *ptep, pte_t pteval);
142 void (*set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); 184 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
185 pte_t *ptep, pte_t pteval);
143 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); 186 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
144 void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); 187 void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
145 void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); 188 void (*pte_update_defer)(struct mm_struct *mm,
189 unsigned long addr, pte_t *ptep);
190
191#ifdef CONFIG_HIGHPTE
192 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
193#endif
194
146#ifdef CONFIG_X86_PAE 195#ifdef CONFIG_X86_PAE
147 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 196 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
148 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); 197 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
149 void (*set_pud)(pud_t *pudp, pud_t pudval); 198 void (*set_pud)(pud_t *pudp, pud_t pudval);
150 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 199 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
151 void (*pmd_clear)(pmd_t *pmdp); 200 void (*pmd_clear)(pmd_t *pmdp);
201
202 unsigned long long (*pte_val)(pte_t);
203 unsigned long long (*pmd_val)(pmd_t);
204 unsigned long long (*pgd_val)(pgd_t);
205
206 pte_t (*make_pte)(unsigned long long pte);
207 pmd_t (*make_pmd)(unsigned long long pmd);
208 pgd_t (*make_pgd)(unsigned long long pgd);
209#else
210 unsigned long (*pte_val)(pte_t);
211 unsigned long (*pgd_val)(pgd_t);
212
213 pte_t (*make_pte)(unsigned long pte);
214 pgd_t (*make_pgd)(unsigned long pgd);
152#endif 215#endif
153 216
154 void (*set_lazy_mode)(int mode); 217 /* Set deferred update mode, used for batching operations. */
218 void (*set_lazy_mode)(enum paravirt_lazy_mode mode);
155 219
156 /* These two are jmp to, not actually called. */ 220 /* These two are jmp to, not actually called. */
157 void (*irq_enable_sysexit)(void); 221 void (*irq_enable_sysexit)(void);
158 void (*iret)(void); 222 void (*iret)(void);
159
160 void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp);
161}; 223};
162 224
163/* Mark a paravirt probe function. */ 225/* Mark a paravirt probe function. */
@@ -167,23 +229,202 @@ struct paravirt_ops
167 229
168extern struct paravirt_ops paravirt_ops; 230extern struct paravirt_ops paravirt_ops;
169 231
170#define paravirt_enabled() (paravirt_ops.paravirt_enabled) 232#define PARAVIRT_PATCH(x) \
233 (offsetof(struct paravirt_ops, x) / sizeof(void *))
234
235#define paravirt_type(type) \
236 [paravirt_typenum] "i" (PARAVIRT_PATCH(type))
237#define paravirt_clobber(clobber) \
238 [paravirt_clobber] "i" (clobber)
239
240/*
241 * Generate some code, and mark it as patchable by the
242 * apply_paravirt() alternate instruction patcher.
243 */
244#define _paravirt_alt(insn_string, type, clobber) \
245 "771:\n\t" insn_string "\n" "772:\n" \
246 ".pushsection .parainstructions,\"a\"\n" \
247 " .long 771b\n" \
248 " .byte " type "\n" \
249 " .byte 772b-771b\n" \
250 " .short " clobber "\n" \
251 ".popsection\n"
252
253/* Generate patchable code, with the default asm parameters. */
254#define paravirt_alt(insn_string) \
255 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
256
257unsigned paravirt_patch_nop(void);
258unsigned paravirt_patch_ignore(unsigned len);
259unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
260 void *site, u16 site_clobbers,
261 unsigned len);
262unsigned paravirt_patch_jmp(void *target, void *site, unsigned len);
263unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len);
264
265unsigned paravirt_patch_insns(void *site, unsigned len,
266 const char *start, const char *end);
267
268
269/*
270 * This generates an indirect call based on the operation type number.
271 * The type number, computed in PARAVIRT_PATCH, is derived from the
272 * offset into the paravirt_ops structure, and can therefore be freely
273 * converted back into a structure offset.
274 */
275#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);"
276
277/*
278 * These macros are intended to wrap calls into a paravirt_ops
279 * operation, so that they can be later identified and patched at
280 * runtime.
281 *
282 * Normally, a call to a pv_op function is a simple indirect call:
283 * (paravirt_ops.operations)(args...).
284 *
285 * Unfortunately, this is a relatively slow operation for modern CPUs,
286 * because it cannot necessarily determine what the destination
287 * address is. In this case, the address is a runtime constant, so at
288 * the very least we can patch the call to e a simple direct call, or
289 * ideally, patch an inline implementation into the callsite. (Direct
290 * calls are essentially free, because the call and return addresses
291 * are completely predictable.)
292 *
293 * These macros rely on the standard gcc "regparm(3)" calling
294 * convention, in which the first three arguments are placed in %eax,
295 * %edx, %ecx (in that order), and the remaining arguments are placed
296 * on the stack. All caller-save registers (eax,edx,ecx) are expected
297 * to be modified (either clobbered or used for return values).
298 *
299 * The call instruction itself is marked by placing its start address
300 * and size into the .parainstructions section, so that
301 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
302 * appropriate patching under the control of the backend paravirt_ops
303 * implementation.
304 *
305 * Unfortunately there's no way to get gcc to generate the args setup
306 * for the call, and then allow the call itself to be generated by an
307 * inline asm. Because of this, we must do the complete arg setup and
308 * return value handling from within these macros. This is fairly
309 * cumbersome.
310 *
311 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
312 * It could be extended to more arguments, but there would be little
313 * to be gained from that. For each number of arguments, there are
314 * the two VCALL and CALL variants for void and non-void functions.
315 *
316 * When there is a return value, the invoker of the macro must specify
317 * the return type. The macro then uses sizeof() on that type to
318 * determine whether its a 32 or 64 bit value, and places the return
319 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
320 * 64-bit).
321 *
322 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
323 * in low,high order.
324 *
325 * Small structures are passed and returned in registers. The macro
326 * calling convention can't directly deal with this, so the wrapper
327 * functions must do this.
328 *
329 * These PVOP_* macros are only defined within this header. This
330 * means that all uses must be wrapped in inline functions. This also
331 * makes sure the incoming and outgoing types are always correct.
332 */
333#define __PVOP_CALL(rettype, op, pre, post, ...) \
334 ({ \
335 rettype __ret; \
336 unsigned long __eax, __edx, __ecx; \
337 if (sizeof(rettype) > sizeof(unsigned long)) { \
338 asm volatile(pre \
339 paravirt_alt(PARAVIRT_CALL) \
340 post \
341 : "=a" (__eax), "=d" (__edx), \
342 "=c" (__ecx) \
343 : paravirt_type(op), \
344 paravirt_clobber(CLBR_ANY), \
345 ##__VA_ARGS__ \
346 : "memory", "cc"); \
347 __ret = (rettype)((((u64)__edx) << 32) | __eax); \
348 } else { \
349 asm volatile(pre \
350 paravirt_alt(PARAVIRT_CALL) \
351 post \
352 : "=a" (__eax), "=d" (__edx), \
353 "=c" (__ecx) \
354 : paravirt_type(op), \
355 paravirt_clobber(CLBR_ANY), \
356 ##__VA_ARGS__ \
357 : "memory", "cc"); \
358 __ret = (rettype)__eax; \
359 } \
360 __ret; \
361 })
362#define __PVOP_VCALL(op, pre, post, ...) \
363 ({ \
364 unsigned long __eax, __edx, __ecx; \
365 asm volatile(pre \
366 paravirt_alt(PARAVIRT_CALL) \
367 post \
368 : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \
369 : paravirt_type(op), \
370 paravirt_clobber(CLBR_ANY), \
371 ##__VA_ARGS__ \
372 : "memory", "cc"); \
373 })
374
375#define PVOP_CALL0(rettype, op) \
376 __PVOP_CALL(rettype, op, "", "")
377#define PVOP_VCALL0(op) \
378 __PVOP_VCALL(op, "", "")
379
380#define PVOP_CALL1(rettype, op, arg1) \
381 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)))
382#define PVOP_VCALL1(op, arg1) \
383 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)))
384
385#define PVOP_CALL2(rettype, op, arg1, arg2) \
386 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
387#define PVOP_VCALL2(op, arg1, arg2) \
388 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
389
390#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
391 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \
392 "1"((u32)(arg2)), "2"((u32)(arg3)))
393#define PVOP_VCALL3(op, arg1, arg2, arg3) \
394 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \
395 "2"((u32)(arg3)))
396
397#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
398 __PVOP_CALL(rettype, op, \
399 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
400 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
401 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
402#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
403 __PVOP_VCALL(op, \
404 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
405 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
406 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
407
408static inline int paravirt_enabled(void)
409{
410 return paravirt_ops.paravirt_enabled;
411}
171 412
172static inline void load_esp0(struct tss_struct *tss, 413static inline void load_esp0(struct tss_struct *tss,
173 struct thread_struct *thread) 414 struct thread_struct *thread)
174{ 415{
175 paravirt_ops.load_esp0(tss, thread); 416 PVOP_VCALL2(load_esp0, tss, thread);
176} 417}
177 418
178#define ARCH_SETUP paravirt_ops.arch_setup(); 419#define ARCH_SETUP paravirt_ops.arch_setup();
179static inline unsigned long get_wallclock(void) 420static inline unsigned long get_wallclock(void)
180{ 421{
181 return paravirt_ops.get_wallclock(); 422 return PVOP_CALL0(unsigned long, get_wallclock);
182} 423}
183 424
184static inline int set_wallclock(unsigned long nowtime) 425static inline int set_wallclock(unsigned long nowtime)
185{ 426{
186 return paravirt_ops.set_wallclock(nowtime); 427 return PVOP_CALL1(int, set_wallclock, nowtime);
187} 428}
188 429
189static inline void (*choose_time_init(void))(void) 430static inline void (*choose_time_init(void))(void)
@@ -195,113 +436,208 @@ static inline void (*choose_time_init(void))(void)
195static inline void __cpuid(unsigned int *eax, unsigned int *ebx, 436static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
196 unsigned int *ecx, unsigned int *edx) 437 unsigned int *ecx, unsigned int *edx)
197{ 438{
198 paravirt_ops.cpuid(eax, ebx, ecx, edx); 439 PVOP_VCALL4(cpuid, eax, ebx, ecx, edx);
199} 440}
200 441
201/* 442/*
202 * These special macros can be used to get or set a debugging register 443 * These special macros can be used to get or set a debugging register
203 */ 444 */
204#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) 445static inline unsigned long paravirt_get_debugreg(int reg)
205#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) 446{
447 return PVOP_CALL1(unsigned long, get_debugreg, reg);
448}
449#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
450static inline void set_debugreg(unsigned long val, int reg)
451{
452 PVOP_VCALL2(set_debugreg, reg, val);
453}
454
455static inline void clts(void)
456{
457 PVOP_VCALL0(clts);
458}
206 459
207#define clts() paravirt_ops.clts() 460static inline unsigned long read_cr0(void)
461{
462 return PVOP_CALL0(unsigned long, read_cr0);
463}
208 464
209#define read_cr0() paravirt_ops.read_cr0() 465static inline void write_cr0(unsigned long x)
210#define write_cr0(x) paravirt_ops.write_cr0(x) 466{
467 PVOP_VCALL1(write_cr0, x);
468}
211 469
212#define read_cr2() paravirt_ops.read_cr2() 470static inline unsigned long read_cr2(void)
213#define write_cr2(x) paravirt_ops.write_cr2(x) 471{
472 return PVOP_CALL0(unsigned long, read_cr2);
473}
214 474
215#define read_cr3() paravirt_ops.read_cr3() 475static inline void write_cr2(unsigned long x)
216#define write_cr3(x) paravirt_ops.write_cr3(x) 476{
477 PVOP_VCALL1(write_cr2, x);
478}
217 479
218#define read_cr4() paravirt_ops.read_cr4() 480static inline unsigned long read_cr3(void)
219#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() 481{
220#define write_cr4(x) paravirt_ops.write_cr4(x) 482 return PVOP_CALL0(unsigned long, read_cr3);
483}
484
485static inline void write_cr3(unsigned long x)
486{
487 PVOP_VCALL1(write_cr3, x);
488}
489
490static inline unsigned long read_cr4(void)
491{
492 return PVOP_CALL0(unsigned long, read_cr4);
493}
494static inline unsigned long read_cr4_safe(void)
495{
496 return PVOP_CALL0(unsigned long, read_cr4_safe);
497}
498
499static inline void write_cr4(unsigned long x)
500{
501 PVOP_VCALL1(write_cr4, x);
502}
221 503
222static inline void raw_safe_halt(void) 504static inline void raw_safe_halt(void)
223{ 505{
224 paravirt_ops.safe_halt(); 506 PVOP_VCALL0(safe_halt);
225} 507}
226 508
227static inline void halt(void) 509static inline void halt(void)
228{ 510{
229 paravirt_ops.safe_halt(); 511 PVOP_VCALL0(safe_halt);
512}
513
514static inline void wbinvd(void)
515{
516 PVOP_VCALL0(wbinvd);
230} 517}
231#define wbinvd() paravirt_ops.wbinvd()
232 518
233#define get_kernel_rpl() (paravirt_ops.kernel_rpl) 519#define get_kernel_rpl() (paravirt_ops.kernel_rpl)
234 520
235#define rdmsr(msr,val1,val2) do { \ 521static inline u64 paravirt_read_msr(unsigned msr, int *err)
236 int _err; \ 522{
237 u64 _l = paravirt_ops.read_msr(msr,&_err); \ 523 return PVOP_CALL2(u64, read_msr, msr, err);
238 val1 = (u32)_l; \ 524}
239 val2 = _l >> 32; \ 525static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
526{
527 return PVOP_CALL3(int, write_msr, msr, low, high);
528}
529
530/* These should all do BUG_ON(_err), but our headers are too tangled. */
531#define rdmsr(msr,val1,val2) do { \
532 int _err; \
533 u64 _l = paravirt_read_msr(msr, &_err); \
534 val1 = (u32)_l; \
535 val2 = _l >> 32; \
240} while(0) 536} while(0)
241 537
242#define wrmsr(msr,val1,val2) do { \ 538#define wrmsr(msr,val1,val2) do { \
243 u64 _l = ((u64)(val2) << 32) | (val1); \ 539 paravirt_write_msr(msr, val1, val2); \
244 paravirt_ops.write_msr((msr), _l); \
245} while(0) 540} while(0)
246 541
247#define rdmsrl(msr,val) do { \ 542#define rdmsrl(msr,val) do { \
248 int _err; \ 543 int _err; \
249 val = paravirt_ops.read_msr((msr),&_err); \ 544 val = paravirt_read_msr(msr, &_err); \
250} while(0) 545} while(0)
251 546
252#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) 547#define wrmsrl(msr,val) ((void)paravirt_write_msr(msr, val, 0))
253#define wrmsr_safe(msr,a,b) ({ \ 548#define wrmsr_safe(msr,a,b) paravirt_write_msr(msr, a, b)
254 u64 _l = ((u64)(b) << 32) | (a); \
255 paravirt_ops.write_msr((msr),_l); \
256})
257 549
258/* rdmsr with exception handling */ 550/* rdmsr with exception handling */
259#define rdmsr_safe(msr,a,b) ({ \ 551#define rdmsr_safe(msr,a,b) ({ \
260 int _err; \ 552 int _err; \
261 u64 _l = paravirt_ops.read_msr(msr,&_err); \ 553 u64 _l = paravirt_read_msr(msr, &_err); \
262 (*a) = (u32)_l; \ 554 (*a) = (u32)_l; \
263 (*b) = _l >> 32; \ 555 (*b) = _l >> 32; \
264 _err; }) 556 _err; })
265 557
266#define rdtsc(low,high) do { \ 558
267 u64 _l = paravirt_ops.read_tsc(); \ 559static inline u64 paravirt_read_tsc(void)
268 low = (u32)_l; \ 560{
269 high = _l >> 32; \ 561 return PVOP_CALL0(u64, read_tsc);
562}
563#define rdtsc(low,high) do { \
564 u64 _l = paravirt_read_tsc(); \
565 low = (u32)_l; \
566 high = _l >> 32; \
270} while(0) 567} while(0)
271 568
272#define rdtscl(low) do { \ 569#define rdtscl(low) do { \
273 u64 _l = paravirt_ops.read_tsc(); \ 570 u64 _l = paravirt_read_tsc(); \
274 low = (int)_l; \ 571 low = (int)_l; \
275} while(0) 572} while(0)
276 573
277#define rdtscll(val) (val = paravirt_ops.read_tsc()) 574#define rdtscll(val) (val = paravirt_read_tsc())
278 575
279#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) 576#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles())
280#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) 577#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz())
281 578
282#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 579#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
283 580
284#define rdpmc(counter,low,high) do { \ 581static inline unsigned long long paravirt_read_pmc(int counter)
285 u64 _l = paravirt_ops.read_pmc(); \ 582{
286 low = (u32)_l; \ 583 return PVOP_CALL1(u64, read_pmc, counter);
287 high = _l >> 32; \ 584}
585
586#define rdpmc(counter,low,high) do { \
587 u64 _l = paravirt_read_pmc(counter); \
588 low = (u32)_l; \
589 high = _l >> 32; \
288} while(0) 590} while(0)
289 591
290#define load_TR_desc() (paravirt_ops.load_tr_desc()) 592static inline void load_TR_desc(void)
291#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) 593{
292#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) 594 PVOP_VCALL0(load_tr_desc);
293#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) 595}
294#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) 596static inline void load_gdt(const struct Xgt_desc_struct *dtr)
295#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) 597{
296#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) 598 PVOP_VCALL1(load_gdt, dtr);
297#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) 599}
298#define write_ldt_entry(dt, entry, low, high) \ 600static inline void load_idt(const struct Xgt_desc_struct *dtr)
299 (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) 601{
300#define write_gdt_entry(dt, entry, low, high) \ 602 PVOP_VCALL1(load_idt, dtr);
301 (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) 603}
302#define write_idt_entry(dt, entry, low, high) \ 604static inline void set_ldt(const void *addr, unsigned entries)
303 (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) 605{
304#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) 606 PVOP_VCALL2(set_ldt, addr, entries);
607}
608static inline void store_gdt(struct Xgt_desc_struct *dtr)
609{
610 PVOP_VCALL1(store_gdt, dtr);
611}
612static inline void store_idt(struct Xgt_desc_struct *dtr)
613{
614 PVOP_VCALL1(store_idt, dtr);
615}
616static inline unsigned long paravirt_store_tr(void)
617{
618 return PVOP_CALL0(unsigned long, store_tr);
619}
620#define store_tr(tr) ((tr) = paravirt_store_tr())
621static inline void load_TLS(struct thread_struct *t, unsigned cpu)
622{
623 PVOP_VCALL2(load_tls, t, cpu);
624}
625static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
626{
627 PVOP_VCALL4(write_ldt_entry, dt, entry, low, high);
628}
629static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
630{
631 PVOP_VCALL4(write_gdt_entry, dt, entry, low, high);
632}
633static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
634{
635 PVOP_VCALL4(write_idt_entry, dt, entry, low, high);
636}
637static inline void set_iopl_mask(unsigned mask)
638{
639 PVOP_VCALL1(set_iopl_mask, mask);
640}
305 641
306/* The paravirtualized I/O functions */ 642/* The paravirtualized I/O functions */
307static inline void slow_down_io(void) { 643static inline void slow_down_io(void) {
@@ -319,215 +655,390 @@ static inline void slow_down_io(void) {
319 */ 655 */
320static inline void apic_write(unsigned long reg, unsigned long v) 656static inline void apic_write(unsigned long reg, unsigned long v)
321{ 657{
322 paravirt_ops.apic_write(reg,v); 658 PVOP_VCALL2(apic_write, reg, v);
323} 659}
324 660
325static inline void apic_write_atomic(unsigned long reg, unsigned long v) 661static inline void apic_write_atomic(unsigned long reg, unsigned long v)
326{ 662{
327 paravirt_ops.apic_write_atomic(reg,v); 663 PVOP_VCALL2(apic_write_atomic, reg, v);
328} 664}
329 665
330static inline unsigned long apic_read(unsigned long reg) 666static inline unsigned long apic_read(unsigned long reg)
331{ 667{
332 return paravirt_ops.apic_read(reg); 668 return PVOP_CALL1(unsigned long, apic_read, reg);
333} 669}
334 670
335static inline void setup_boot_clock(void) 671static inline void setup_boot_clock(void)
336{ 672{
337 paravirt_ops.setup_boot_clock(); 673 PVOP_VCALL0(setup_boot_clock);
338} 674}
339 675
340static inline void setup_secondary_clock(void) 676static inline void setup_secondary_clock(void)
341{ 677{
342 paravirt_ops.setup_secondary_clock(); 678 PVOP_VCALL0(setup_secondary_clock);
343} 679}
344#endif 680#endif
345 681
682static inline void paravirt_pagetable_setup_start(pgd_t *base)
683{
684 if (paravirt_ops.pagetable_setup_start)
685 (*paravirt_ops.pagetable_setup_start)(base);
686}
687
688static inline void paravirt_pagetable_setup_done(pgd_t *base)
689{
690 if (paravirt_ops.pagetable_setup_done)
691 (*paravirt_ops.pagetable_setup_done)(base);
692}
693
346#ifdef CONFIG_SMP 694#ifdef CONFIG_SMP
347static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, 695static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
348 unsigned long start_esp) 696 unsigned long start_esp)
349{ 697{
350 return paravirt_ops.startup_ipi_hook(phys_apicid, start_eip, start_esp); 698 PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp);
351} 699}
352#endif 700#endif
353 701
354#define __flush_tlb() paravirt_ops.flush_tlb_user() 702static inline void paravirt_activate_mm(struct mm_struct *prev,
355#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() 703 struct mm_struct *next)
356#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) 704{
705 PVOP_VCALL2(activate_mm, prev, next);
706}
357 707
358#define paravirt_map_pt_hook(type, va, pfn) paravirt_ops.map_pt_hook(type, va, pfn) 708static inline void arch_dup_mmap(struct mm_struct *oldmm,
709 struct mm_struct *mm)
710{
711 PVOP_VCALL2(dup_mmap, oldmm, mm);
712}
359 713
360#define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) 714static inline void arch_exit_mmap(struct mm_struct *mm)
361#define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) 715{
716 PVOP_VCALL1(exit_mmap, mm);
717}
362 718
363#define paravirt_alloc_pd(pfn) paravirt_ops.alloc_pd(pfn) 719static inline void __flush_tlb(void)
364#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) \ 720{
365 paravirt_ops.alloc_pd_clone(pfn, clonepfn, start, count) 721 PVOP_VCALL0(flush_tlb_user);
366#define paravirt_release_pd(pfn) paravirt_ops.release_pd(pfn) 722}
723static inline void __flush_tlb_global(void)
724{
725 PVOP_VCALL0(flush_tlb_kernel);
726}
727static inline void __flush_tlb_single(unsigned long addr)
728{
729 PVOP_VCALL1(flush_tlb_single, addr);
730}
367 731
368static inline void set_pte(pte_t *ptep, pte_t pteval) 732static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
733 unsigned long va)
369{ 734{
370 paravirt_ops.set_pte(ptep, pteval); 735 PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va);
371} 736}
372 737
373static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) 738static inline void paravirt_alloc_pt(unsigned pfn)
374{ 739{
375 paravirt_ops.set_pte_at(mm, addr, ptep, pteval); 740 PVOP_VCALL1(alloc_pt, pfn);
741}
742static inline void paravirt_release_pt(unsigned pfn)
743{
744 PVOP_VCALL1(release_pt, pfn);
376} 745}
377 746
378static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 747static inline void paravirt_alloc_pd(unsigned pfn)
379{ 748{
380 paravirt_ops.set_pmd(pmdp, pmdval); 749 PVOP_VCALL1(alloc_pd, pfn);
381} 750}
382 751
383static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) 752static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
753 unsigned start, unsigned count)
384{ 754{
385 paravirt_ops.pte_update(mm, addr, ptep); 755 PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count);
756}
757static inline void paravirt_release_pd(unsigned pfn)
758{
759 PVOP_VCALL1(release_pd, pfn);
386} 760}
387 761
388static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) 762#ifdef CONFIG_HIGHPTE
763static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
389{ 764{
390 paravirt_ops.pte_update_defer(mm, addr, ptep); 765 unsigned long ret;
766 ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type);
767 return (void *)ret;
768}
769#endif
770
771static inline void pte_update(struct mm_struct *mm, unsigned long addr,
772 pte_t *ptep)
773{
774 PVOP_VCALL3(pte_update, mm, addr, ptep);
775}
776
777static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
778 pte_t *ptep)
779{
780 PVOP_VCALL3(pte_update_defer, mm, addr, ptep);
391} 781}
392 782
393#ifdef CONFIG_X86_PAE 783#ifdef CONFIG_X86_PAE
784static inline pte_t __pte(unsigned long long val)
785{
786 unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte,
787 val, val >> 32);
788 return (pte_t) { ret, ret >> 32 };
789}
790
791static inline pmd_t __pmd(unsigned long long val)
792{
793 return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) };
794}
795
796static inline pgd_t __pgd(unsigned long long val)
797{
798 return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) };
799}
800
801static inline unsigned long long pte_val(pte_t x)
802{
803 return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high);
804}
805
806static inline unsigned long long pmd_val(pmd_t x)
807{
808 return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32);
809}
810
811static inline unsigned long long pgd_val(pgd_t x)
812{
813 return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32);
814}
815
816static inline void set_pte(pte_t *ptep, pte_t pteval)
817{
818 PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high);
819}
820
821static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
822 pte_t *ptep, pte_t pteval)
823{
824 /* 5 arg words */
825 paravirt_ops.set_pte_at(mm, addr, ptep, pteval);
826}
827
394static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) 828static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
395{ 829{
396 paravirt_ops.set_pte_atomic(ptep, pteval); 830 PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high);
397} 831}
398 832
399static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 833static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
834 pte_t *ptep, pte_t pte)
400{ 835{
836 /* 5 arg words */
401 paravirt_ops.set_pte_present(mm, addr, ptep, pte); 837 paravirt_ops.set_pte_present(mm, addr, ptep, pte);
402} 838}
403 839
840static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
841{
842 PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32);
843}
844
404static inline void set_pud(pud_t *pudp, pud_t pudval) 845static inline void set_pud(pud_t *pudp, pud_t pudval)
405{ 846{
406 paravirt_ops.set_pud(pudp, pudval); 847 PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32);
407} 848}
408 849
409static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 850static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
410{ 851{
411 paravirt_ops.pte_clear(mm, addr, ptep); 852 PVOP_VCALL3(pte_clear, mm, addr, ptep);
412} 853}
413 854
414static inline void pmd_clear(pmd_t *pmdp) 855static inline void pmd_clear(pmd_t *pmdp)
415{ 856{
416 paravirt_ops.pmd_clear(pmdp); 857 PVOP_VCALL1(pmd_clear, pmdp);
417} 858}
418#endif
419 859
420/* Lazy mode for batching updates / context switch */ 860#else /* !CONFIG_X86_PAE */
421#define PARAVIRT_LAZY_NONE 0 861
422#define PARAVIRT_LAZY_MMU 1 862static inline pte_t __pte(unsigned long val)
423#define PARAVIRT_LAZY_CPU 2 863{
424#define PARAVIRT_LAZY_FLUSH 3 864 return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) };
865}
866
867static inline pgd_t __pgd(unsigned long val)
868{
869 return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) };
870}
871
872static inline unsigned long pte_val(pte_t x)
873{
874 return PVOP_CALL1(unsigned long, pte_val, x.pte_low);
875}
876
877static inline unsigned long pgd_val(pgd_t x)
878{
879 return PVOP_CALL1(unsigned long, pgd_val, x.pgd);
880}
881
882static inline void set_pte(pte_t *ptep, pte_t pteval)
883{
884 PVOP_VCALL2(set_pte, ptep, pteval.pte_low);
885}
886
887static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
888 pte_t *ptep, pte_t pteval)
889{
890 PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low);
891}
892
893static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
894{
895 PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd);
896}
897#endif /* CONFIG_X86_PAE */
425 898
426#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 899#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
427#define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU) 900static inline void arch_enter_lazy_cpu_mode(void)
428#define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) 901{
429#define arch_flush_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) 902 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU);
903}
904
905static inline void arch_leave_lazy_cpu_mode(void)
906{
907 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE);
908}
909
910static inline void arch_flush_lazy_cpu_mode(void)
911{
912 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH);
913}
914
430 915
431#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 916#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
432#define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU) 917static inline void arch_enter_lazy_mmu_mode(void)
433#define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) 918{
434#define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) 919 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU);
920}
921
922static inline void arch_leave_lazy_mmu_mode(void)
923{
924 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE);
925}
926
927static inline void arch_flush_lazy_mmu_mode(void)
928{
929 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH);
930}
931
932void _paravirt_nop(void);
933#define paravirt_nop ((void *)_paravirt_nop)
435 934
436/* These all sit in the .parainstructions section to tell us what to patch. */ 935/* These all sit in the .parainstructions section to tell us what to patch. */
437struct paravirt_patch { 936struct paravirt_patch_site {
438 u8 *instr; /* original instructions */ 937 u8 *instr; /* original instructions */
439 u8 instrtype; /* type of this instruction */ 938 u8 instrtype; /* type of this instruction */
440 u8 len; /* length of original instruction */ 939 u8 len; /* length of original instruction */
441 u16 clobbers; /* what registers you may clobber */ 940 u16 clobbers; /* what registers you may clobber */
442}; 941};
443 942
444#define paravirt_alt(insn_string, typenum, clobber) \ 943extern struct paravirt_patch_site __parainstructions[],
445 "771:\n\t" insn_string "\n" "772:\n" \ 944 __parainstructions_end[];
446 ".pushsection .parainstructions,\"a\"\n" \
447 " .long 771b\n" \
448 " .byte " __stringify(typenum) "\n" \
449 " .byte 772b-771b\n" \
450 " .short " __stringify(clobber) "\n" \
451 ".popsection"
452 945
453static inline unsigned long __raw_local_save_flags(void) 946static inline unsigned long __raw_local_save_flags(void)
454{ 947{
455 unsigned long f; 948 unsigned long f;
456 949
457 __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" 950 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
458 "call *%1;" 951 PARAVIRT_CALL
459 "popl %%edx; popl %%ecx", 952 "popl %%edx; popl %%ecx")
460 PARAVIRT_SAVE_FLAGS, CLBR_NONE) 953 : "=a"(f)
461 : "=a"(f): "m"(paravirt_ops.save_fl) 954 : paravirt_type(save_fl),
462 : "memory", "cc"); 955 paravirt_clobber(CLBR_EAX)
956 : "memory", "cc");
463 return f; 957 return f;
464} 958}
465 959
466static inline void raw_local_irq_restore(unsigned long f) 960static inline void raw_local_irq_restore(unsigned long f)
467{ 961{
468 __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" 962 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
469 "call *%1;" 963 PARAVIRT_CALL
470 "popl %%edx; popl %%ecx", 964 "popl %%edx; popl %%ecx")
471 PARAVIRT_RESTORE_FLAGS, CLBR_EAX) 965 : "=a"(f)
472 : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f) 966 : "0"(f),
473 : "memory", "cc"); 967 paravirt_type(restore_fl),
968 paravirt_clobber(CLBR_EAX)
969 : "memory", "cc");
474} 970}
475 971
476static inline void raw_local_irq_disable(void) 972static inline void raw_local_irq_disable(void)
477{ 973{
478 __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" 974 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
479 "call *%0;" 975 PARAVIRT_CALL
480 "popl %%edx; popl %%ecx", 976 "popl %%edx; popl %%ecx")
481 PARAVIRT_IRQ_DISABLE, CLBR_EAX) 977 :
482 : : "m" (paravirt_ops.irq_disable) 978 : paravirt_type(irq_disable),
483 : "memory", "eax", "cc"); 979 paravirt_clobber(CLBR_EAX)
980 : "memory", "eax", "cc");
484} 981}
485 982
486static inline void raw_local_irq_enable(void) 983static inline void raw_local_irq_enable(void)
487{ 984{
488 __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" 985 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
489 "call *%0;" 986 PARAVIRT_CALL
490 "popl %%edx; popl %%ecx", 987 "popl %%edx; popl %%ecx")
491 PARAVIRT_IRQ_ENABLE, CLBR_EAX) 988 :
492 : : "m" (paravirt_ops.irq_enable) 989 : paravirt_type(irq_enable),
493 : "memory", "eax", "cc"); 990 paravirt_clobber(CLBR_EAX)
991 : "memory", "eax", "cc");
494} 992}
495 993
496static inline unsigned long __raw_local_irq_save(void) 994static inline unsigned long __raw_local_irq_save(void)
497{ 995{
498 unsigned long f; 996 unsigned long f;
499 997
500 __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" 998 f = __raw_local_save_flags();
501 "call *%1; pushl %%eax;" 999 raw_local_irq_disable();
502 "call *%2; popl %%eax;"
503 "popl %%edx; popl %%ecx",
504 PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
505 CLBR_NONE)
506 : "=a"(f)
507 : "m" (paravirt_ops.save_fl),
508 "m" (paravirt_ops.irq_disable)
509 : "memory", "cc");
510 return f; 1000 return f;
511} 1001}
512 1002
513#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1003#define CLI_STRING \
514 "call *paravirt_ops+%c[irq_disable];" \ 1004 _paravirt_alt("pushl %%ecx; pushl %%edx;" \
515 "popl %%edx; popl %%ecx", \ 1005 "call *paravirt_ops+%c[paravirt_cli_type]*4;" \
516 PARAVIRT_IRQ_DISABLE, CLBR_EAX) 1006 "popl %%edx; popl %%ecx", \
1007 "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
1008
1009#define STI_STRING \
1010 _paravirt_alt("pushl %%ecx; pushl %%edx;" \
1011 "call *paravirt_ops+%c[paravirt_sti_type]*4;" \
1012 "popl %%edx; popl %%ecx", \
1013 "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
517 1014
518#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
519 "call *paravirt_ops+%c[irq_enable];" \
520 "popl %%edx; popl %%ecx", \
521 PARAVIRT_IRQ_ENABLE, CLBR_EAX)
522#define CLI_STI_CLOBBERS , "%eax" 1015#define CLI_STI_CLOBBERS , "%eax"
523#define CLI_STI_INPUT_ARGS \ 1016#define CLI_STI_INPUT_ARGS \
524 , \ 1017 , \
525 [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \ 1018 [paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)), \
526 [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable)) 1019 [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \
1020 paravirt_clobber(CLBR_EAX)
1021
1022/* Make sure as little as possible of this mess escapes. */
1023#undef PARAVIRT_CALL
1024#undef __PVOP_CALL
1025#undef __PVOP_VCALL
1026#undef PVOP_VCALL0
1027#undef PVOP_CALL0
1028#undef PVOP_VCALL1
1029#undef PVOP_CALL1
1030#undef PVOP_VCALL2
1031#undef PVOP_CALL2
1032#undef PVOP_VCALL3
1033#undef PVOP_CALL3
1034#undef PVOP_VCALL4
1035#undef PVOP_CALL4
527 1036
528#else /* __ASSEMBLY__ */ 1037#else /* __ASSEMBLY__ */
529 1038
530#define PARA_PATCH(ptype, clobbers, ops) \ 1039#define PARA_PATCH(off) ((off) / 4)
1040
1041#define PARA_SITE(ptype, clobbers, ops) \
531771:; \ 1042771:; \
532 ops; \ 1043 ops; \
533772:; \ 1044772:; \
@@ -538,28 +1049,30 @@ static inline unsigned long __raw_local_irq_save(void)
538 .short clobbers; \ 1049 .short clobbers; \
539 .popsection 1050 .popsection
540 1051
541#define INTERRUPT_RETURN \ 1052#define INTERRUPT_RETURN \
542 PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \ 1053 PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE, \
543 jmp *%cs:paravirt_ops+PARAVIRT_iret) 1054 jmp *%cs:paravirt_ops+PARAVIRT_iret)
544 1055
545#define DISABLE_INTERRUPTS(clobbers) \ 1056#define DISABLE_INTERRUPTS(clobbers) \
546 PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \ 1057 PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \
547 pushl %ecx; pushl %edx; \ 1058 pushl %eax; pushl %ecx; pushl %edx; \
548 call *paravirt_ops+PARAVIRT_irq_disable; \ 1059 call *%cs:paravirt_ops+PARAVIRT_irq_disable; \
549 popl %edx; popl %ecx) \ 1060 popl %edx; popl %ecx; popl %eax) \
550 1061
551#define ENABLE_INTERRUPTS(clobbers) \ 1062#define ENABLE_INTERRUPTS(clobbers) \
552 PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \ 1063 PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \
553 pushl %ecx; pushl %edx; \ 1064 pushl %eax; pushl %ecx; pushl %edx; \
554 call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ 1065 call *%cs:paravirt_ops+PARAVIRT_irq_enable; \
555 popl %edx; popl %ecx) 1066 popl %edx; popl %ecx; popl %eax)
556 1067
557#define ENABLE_INTERRUPTS_SYSEXIT \ 1068#define ENABLE_INTERRUPTS_SYSEXIT \
558 PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \ 1069 PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE, \
559 jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) 1070 jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
560 1071
561#define GET_CR0_INTO_EAX \ 1072#define GET_CR0_INTO_EAX \
562 call *paravirt_ops+PARAVIRT_read_cr0 1073 push %ecx; push %edx; \
1074 call *paravirt_ops+PARAVIRT_read_cr0; \
1075 pop %edx; pop %ecx
563 1076
564#endif /* __ASSEMBLY__ */ 1077#endif /* __ASSEMBLY__ */
565#endif /* CONFIG_PARAVIRT */ 1078#endif /* CONFIG_PARAVIRT */
diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h
deleted file mode 100644
index b12d59a318b7..000000000000
--- a/include/asm-i386/pda.h
+++ /dev/null
@@ -1,100 +0,0 @@
1/*
2 Per-processor Data Areas
3 Jeremy Fitzhardinge <jeremy@goop.org> 2006
4 Based on asm-x86_64/pda.h by Andi Kleen.
5 */
6#ifndef _I386_PDA_H
7#define _I386_PDA_H
8
9#include <linux/stddef.h>
10#include <linux/types.h>
11
12struct i386_pda
13{
14 struct i386_pda *_pda; /* pointer to self */
15
16 int cpu_number;
17 struct task_struct *pcurrent; /* current process */
18 struct pt_regs *irq_regs;
19};
20
21extern struct i386_pda *_cpu_pda[];
22
23#define cpu_pda(i) (_cpu_pda[i])
24
25#define pda_offset(field) offsetof(struct i386_pda, field)
26
27extern void __bad_pda_field(void);
28
29/* This variable is never instantiated. It is only used as a stand-in
30 for the real per-cpu PDA memory, so that gcc can understand what
31 memory operations the inline asms() below are performing. This
32 eliminates the need to make the asms volatile or have memory
33 clobbers, so gcc can readily analyse them. */
34extern struct i386_pda _proxy_pda;
35
36#define pda_to_op(op,field,val) \
37 do { \
38 typedef typeof(_proxy_pda.field) T__; \
39 if (0) { T__ tmp__; tmp__ = (val); } \
40 switch (sizeof(_proxy_pda.field)) { \
41 case 1: \
42 asm(op "b %1,%%fs:%c2" \
43 : "+m" (_proxy_pda.field) \
44 :"ri" ((T__)val), \
45 "i"(pda_offset(field))); \
46 break; \
47 case 2: \
48 asm(op "w %1,%%fs:%c2" \
49 : "+m" (_proxy_pda.field) \
50 :"ri" ((T__)val), \
51 "i"(pda_offset(field))); \
52 break; \
53 case 4: \
54 asm(op "l %1,%%fs:%c2" \
55 : "+m" (_proxy_pda.field) \
56 :"ri" ((T__)val), \
57 "i"(pda_offset(field))); \
58 break; \
59 default: __bad_pda_field(); \
60 } \
61 } while (0)
62
63#define pda_from_op(op,field) \
64 ({ \
65 typeof(_proxy_pda.field) ret__; \
66 switch (sizeof(_proxy_pda.field)) { \
67 case 1: \
68 asm(op "b %%fs:%c1,%0" \
69 : "=r" (ret__) \
70 : "i" (pda_offset(field)), \
71 "m" (_proxy_pda.field)); \
72 break; \
73 case 2: \
74 asm(op "w %%fs:%c1,%0" \
75 : "=r" (ret__) \
76 : "i" (pda_offset(field)), \
77 "m" (_proxy_pda.field)); \
78 break; \
79 case 4: \
80 asm(op "l %%fs:%c1,%0" \
81 : "=r" (ret__) \
82 : "i" (pda_offset(field)), \
83 "m" (_proxy_pda.field)); \
84 break; \
85 default: __bad_pda_field(); \
86 } \
87 ret__; })
88
89/* Return a pointer to a pda field */
90#define pda_addr(field) \
91 ((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \
92 pda_offset(field)))
93
94#define read_pda(field) pda_from_op("mov",field)
95#define write_pda(field,val) pda_to_op("mov",field,val)
96#define add_pda(field,val) pda_to_op("add",field,val)
97#define sub_pda(field,val) pda_to_op("sub",field,val)
98#define or_pda(field,val) pda_to_op("or",field,val)
99
100#endif /* _I386_PDA_H */
diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h
index 510ae1d3486c..f54830b5d5ac 100644
--- a/include/asm-i386/percpu.h
+++ b/include/asm-i386/percpu.h
@@ -1,9 +1,32 @@
1#ifndef __ARCH_I386_PERCPU__ 1#ifndef __ARCH_I386_PERCPU__
2#define __ARCH_I386_PERCPU__ 2#define __ARCH_I386_PERCPU__
3 3
4#ifndef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5#include <asm-generic/percpu.h> 5
6#else 6/*
7 * PER_CPU finds an address of a per-cpu variable.
8 *
9 * Args:
10 * var - variable name
11 * reg - 32bit register
12 *
13 * The resulting address is stored in the "reg" argument.
14 *
15 * Example:
16 * PER_CPU(cpu_gdt_descr, %ebx)
17 */
18#ifdef CONFIG_SMP
19#define PER_CPU(var, reg) \
20 movl %fs:per_cpu__##this_cpu_off, reg; \
21 lea per_cpu__##var(reg), reg
22#define PER_CPU_VAR(var) %fs:per_cpu__##var
23#else /* ! SMP */
24#define PER_CPU(var, reg) \
25 movl $per_cpu__##var, reg
26#define PER_CPU_VAR(var) per_cpu__##var
27#endif /* SMP */
28
29#else /* ...!ASSEMBLY */
7 30
8/* 31/*
9 * PER_CPU finds an address of a per-cpu variable. 32 * PER_CPU finds an address of a per-cpu variable.
@@ -18,14 +41,109 @@
18 * PER_CPU(cpu_gdt_descr, %ebx) 41 * PER_CPU(cpu_gdt_descr, %ebx)
19 */ 42 */
20#ifdef CONFIG_SMP 43#ifdef CONFIG_SMP
21#define PER_CPU(var, cpu) \ 44/* Same as generic implementation except for optimized local access. */
22 movl __per_cpu_offset(,cpu,4), cpu; \ 45#define __GENERIC_PER_CPU
23 addl $per_cpu__/**/var, cpu; 46
24#else /* ! SMP */ 47/* This is used for other cpus to find our section. */
25#define PER_CPU(var, cpu) \ 48extern unsigned long __per_cpu_offset[];
26 movl $per_cpu__/**/var, cpu; 49
50#define per_cpu_offset(x) (__per_cpu_offset[x])
51
52/* Separate out the type, so (int[3], foo) works. */
53#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
54#define DEFINE_PER_CPU(type, name) \
55 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
56
57/* We can use this directly for local CPU (faster). */
58DECLARE_PER_CPU(unsigned long, this_cpu_off);
59
60/* var is in discarded region: offset to particular copy we want */
61#define per_cpu(var, cpu) (*({ \
62 extern int simple_indentifier_##var(void); \
63 RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
64
65#define __raw_get_cpu_var(var) (*({ \
66 extern int simple_indentifier_##var(void); \
67 RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \
68}))
69
70#define __get_cpu_var(var) __raw_get_cpu_var(var)
71
72/* A macro to avoid #include hell... */
73#define percpu_modcopy(pcpudst, src, size) \
74do { \
75 unsigned int __i; \
76 for_each_possible_cpu(__i) \
77 memcpy((pcpudst)+__per_cpu_offset[__i], \
78 (src), (size)); \
79} while (0)
80
81#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
82#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
83
84/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
85#define __percpu_seg "%%fs:"
86#else /* !SMP */
87#include <asm-generic/percpu.h>
88#define __percpu_seg ""
27#endif /* SMP */ 89#endif /* SMP */
28 90
91/* For arch-specific code, we can use direct single-insn ops (they
92 * don't give an lvalue though). */
93extern void __bad_percpu_size(void);
94
95#define percpu_to_op(op,var,val) \
96 do { \
97 typedef typeof(var) T__; \
98 if (0) { T__ tmp__; tmp__ = (val); } \
99 switch (sizeof(var)) { \
100 case 1: \
101 asm(op "b %1,"__percpu_seg"%0" \
102 : "+m" (var) \
103 :"ri" ((T__)val)); \
104 break; \
105 case 2: \
106 asm(op "w %1,"__percpu_seg"%0" \
107 : "+m" (var) \
108 :"ri" ((T__)val)); \
109 break; \
110 case 4: \
111 asm(op "l %1,"__percpu_seg"%0" \
112 : "+m" (var) \
113 :"ri" ((T__)val)); \
114 break; \
115 default: __bad_percpu_size(); \
116 } \
117 } while (0)
118
119#define percpu_from_op(op,var) \
120 ({ \
121 typeof(var) ret__; \
122 switch (sizeof(var)) { \
123 case 1: \
124 asm(op "b "__percpu_seg"%1,%0" \
125 : "=r" (ret__) \
126 : "m" (var)); \
127 break; \
128 case 2: \
129 asm(op "w "__percpu_seg"%1,%0" \
130 : "=r" (ret__) \
131 : "m" (var)); \
132 break; \
133 case 4: \
134 asm(op "l "__percpu_seg"%1,%0" \
135 : "=r" (ret__) \
136 : "m" (var)); \
137 break; \
138 default: __bad_percpu_size(); \
139 } \
140 ret__; })
141
142#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
143#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
144#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
145#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
146#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
29#endif /* !__ASSEMBLY__ */ 147#endif /* !__ASSEMBLY__ */
30 148
31#endif /* __ARCH_I386_PERCPU__ */ 149#endif /* __ARCH_I386_PERCPU__ */
diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h
index c8dc2d0141a7..47430175b75f 100644
--- a/include/asm-i386/pgalloc.h
+++ b/include/asm-i386/pgalloc.h
@@ -1,7 +1,6 @@
1#ifndef _I386_PGALLOC_H 1#ifndef _I386_PGALLOC_H
2#define _I386_PGALLOC_H 2#define _I386_PGALLOC_H
3 3
4#include <asm/fixmap.h>
5#include <linux/threads.h> 4#include <linux/threads.h>
6#include <linux/mm.h> /* for struct page */ 5#include <linux/mm.h> /* for struct page */
7 6
diff --git a/include/asm-i386/pgtable-2level-defs.h b/include/asm-i386/pgtable-2level-defs.h
index 02518079f816..0f71c9f13da4 100644
--- a/include/asm-i386/pgtable-2level-defs.h
+++ b/include/asm-i386/pgtable-2level-defs.h
@@ -1,6 +1,8 @@
1#ifndef _I386_PGTABLE_2LEVEL_DEFS_H 1#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
2#define _I386_PGTABLE_2LEVEL_DEFS_H 2#define _I386_PGTABLE_2LEVEL_DEFS_H
3 3
4#define SHARED_KERNEL_PMD 0
5
4/* 6/*
5 * traditional i386 two-level paging structure: 7 * traditional i386 two-level paging structure:
6 */ 8 */
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 38c3fcc0676d..a50fd1773de8 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -11,10 +11,23 @@
11 * within a page table are directly modified. Thus, the following 11 * within a page table are directly modified. Thus, the following
12 * hook is made available. 12 * hook is made available.
13 */ 13 */
14static inline void native_set_pte(pte_t *ptep , pte_t pte)
15{
16 *ptep = pte;
17}
18static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
19 pte_t *ptep , pte_t pte)
20{
21 native_set_pte(ptep, pte);
22}
23static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
24{
25 *pmdp = pmd;
26}
14#ifndef CONFIG_PARAVIRT 27#ifndef CONFIG_PARAVIRT
15#define set_pte(pteptr, pteval) (*(pteptr) = pteval) 28#define set_pte(pteptr, pteval) native_set_pte(pteptr, pteval)
16#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) 29#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval)
17#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) 30#define set_pmd(pmdptr, pmdval) native_set_pmd(pmdptr, pmdval)
18#endif 31#endif
19 32
20#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) 33#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
@@ -23,11 +36,23 @@
23#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) 36#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
24#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 37#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
25 38
26#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) 39static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
40{
41 *xp = __pte(0);
42}
43
44#ifdef CONFIG_SMP
45static inline pte_t native_ptep_get_and_clear(pte_t *xp)
46{
47 return __pte(xchg(&xp->pte_low, 0));
48}
49#else
50#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
51#endif
27 52
28#define pte_page(x) pfn_to_page(pte_pfn(x)) 53#define pte_page(x) pfn_to_page(pte_pfn(x))
29#define pte_none(x) (!(x).pte_low) 54#define pte_none(x) (!(x).pte_low)
30#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) 55#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
31#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 56#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
32#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 57#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
33 58
@@ -66,6 +91,4 @@ static inline int pte_exec_kernel(pte_t pte)
66#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) 91#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
67#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 92#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
68 93
69void vmalloc_sync_all(void);
70
71#endif /* _I386_PGTABLE_2LEVEL_H */ 94#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/include/asm-i386/pgtable-3level-defs.h b/include/asm-i386/pgtable-3level-defs.h
index eb3a1ea88671..c0df89f66e8b 100644
--- a/include/asm-i386/pgtable-3level-defs.h
+++ b/include/asm-i386/pgtable-3level-defs.h
@@ -1,6 +1,12 @@
1#ifndef _I386_PGTABLE_3LEVEL_DEFS_H 1#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
2#define _I386_PGTABLE_3LEVEL_DEFS_H 2#define _I386_PGTABLE_3LEVEL_DEFS_H
3 3
4#ifdef CONFIG_PARAVIRT
5#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd)
6#else
7#define SHARED_KERNEL_PMD 1
8#endif
9
4/* 10/*
5 * PGDIR_SHIFT determines what a top-level page table entry can map 11 * PGDIR_SHIFT determines what a top-level page table entry can map
6 */ 12 */
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index 7a2318f38303..eb0f1d7e96a1 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -42,20 +42,23 @@ static inline int pte_exec_kernel(pte_t pte)
42 return pte_x(pte); 42 return pte_x(pte);
43} 43}
44 44
45#ifndef CONFIG_PARAVIRT
46/* Rules for using set_pte: the pte being assigned *must* be 45/* Rules for using set_pte: the pte being assigned *must* be
47 * either not present or in a state where the hardware will 46 * either not present or in a state where the hardware will
48 * not attempt to update the pte. In places where this is 47 * not attempt to update the pte. In places where this is
49 * not possible, use pte_get_and_clear to obtain the old pte 48 * not possible, use pte_get_and_clear to obtain the old pte
50 * value and then use set_pte to update it. -ben 49 * value and then use set_pte to update it. -ben
51 */ 50 */
52static inline void set_pte(pte_t *ptep, pte_t pte) 51static inline void native_set_pte(pte_t *ptep, pte_t pte)
53{ 52{
54 ptep->pte_high = pte.pte_high; 53 ptep->pte_high = pte.pte_high;
55 smp_wmb(); 54 smp_wmb();
56 ptep->pte_low = pte.pte_low; 55 ptep->pte_low = pte.pte_low;
57} 56}
58#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) 57static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
58 pte_t *ptep , pte_t pte)
59{
60 native_set_pte(ptep, pte);
61}
59 62
60/* 63/*
61 * Since this is only called on user PTEs, and the page fault handler 64 * Since this is only called on user PTEs, and the page fault handler
@@ -63,7 +66,8 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
63 * we are justified in merely clearing the PTE present bit, followed 66 * we are justified in merely clearing the PTE present bit, followed
64 * by a set. The ordering here is important. 67 * by a set. The ordering here is important.
65 */ 68 */
66static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 69static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr,
70 pte_t *ptep, pte_t pte)
67{ 71{
68 ptep->pte_low = 0; 72 ptep->pte_low = 0;
69 smp_wmb(); 73 smp_wmb();
@@ -72,32 +76,48 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte
72 ptep->pte_low = pte.pte_low; 76 ptep->pte_low = pte.pte_low;
73} 77}
74 78
75#define set_pte_atomic(pteptr,pteval) \ 79static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
76 set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) 80{
77#define set_pmd(pmdptr,pmdval) \ 81 set_64bit((unsigned long long *)(ptep),native_pte_val(pte));
78 set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) 82}
79#define set_pud(pudptr,pudval) \ 83static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
80 (*(pudptr) = (pudval)) 84{
85 set_64bit((unsigned long long *)(pmdp),native_pmd_val(pmd));
86}
87static inline void native_set_pud(pud_t *pudp, pud_t pud)
88{
89 *pudp = pud;
90}
81 91
82/* 92/*
83 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table 93 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
84 * entry, so clear the bottom half first and enforce ordering with a compiler 94 * entry, so clear the bottom half first and enforce ordering with a compiler
85 * barrier. 95 * barrier.
86 */ 96 */
87static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 97static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
88{ 98{
89 ptep->pte_low = 0; 99 ptep->pte_low = 0;
90 smp_wmb(); 100 smp_wmb();
91 ptep->pte_high = 0; 101 ptep->pte_high = 0;
92} 102}
93 103
94static inline void pmd_clear(pmd_t *pmd) 104static inline void native_pmd_clear(pmd_t *pmd)
95{ 105{
96 u32 *tmp = (u32 *)pmd; 106 u32 *tmp = (u32 *)pmd;
97 *tmp = 0; 107 *tmp = 0;
98 smp_wmb(); 108 smp_wmb();
99 *(tmp + 1) = 0; 109 *(tmp + 1) = 0;
100} 110}
111
112#ifndef CONFIG_PARAVIRT
113#define set_pte(ptep, pte) native_set_pte(ptep, pte)
114#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
115#define set_pte_present(mm, addr, ptep, pte) native_set_pte_present(mm, addr, ptep, pte)
116#define set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte)
117#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
118#define set_pud(pudp, pud) native_set_pud(pudp, pud)
119#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
120#define pmd_clear(pmd) native_pmd_clear(pmd)
101#endif 121#endif
102 122
103/* 123/*
@@ -119,7 +139,8 @@ static inline void pud_clear (pud_t * pud) { }
119#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ 139#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
120 pmd_index(address)) 140 pmd_index(address))
121 141
122static inline pte_t raw_ptep_get_and_clear(pte_t *ptep) 142#ifdef CONFIG_SMP
143static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
123{ 144{
124 pte_t res; 145 pte_t res;
125 146
@@ -130,6 +151,9 @@ static inline pte_t raw_ptep_get_and_clear(pte_t *ptep)
130 151
131 return res; 152 return res;
132} 153}
154#else
155#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
156#endif
133 157
134#define __HAVE_ARCH_PTE_SAME 158#define __HAVE_ARCH_PTE_SAME
135static inline int pte_same(pte_t a, pte_t b) 159static inline int pte_same(pte_t a, pte_t b)
@@ -146,28 +170,21 @@ static inline int pte_none(pte_t pte)
146 170
147static inline unsigned long pte_pfn(pte_t pte) 171static inline unsigned long pte_pfn(pte_t pte)
148{ 172{
149 return (pte.pte_low >> PAGE_SHIFT) | 173 return pte_val(pte) >> PAGE_SHIFT;
150 (pte.pte_high << (32 - PAGE_SHIFT));
151} 174}
152 175
153extern unsigned long long __supported_pte_mask; 176extern unsigned long long __supported_pte_mask;
154 177
155static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) 178static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
156{ 179{
157 pte_t pte; 180 return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
158 181 pgprot_val(pgprot)) & __supported_pte_mask);
159 pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \
160 (pgprot_val(pgprot) >> 32);
161 pte.pte_high &= (__supported_pte_mask >> 32);
162 pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \
163 __supported_pte_mask;
164 return pte;
165} 182}
166 183
167static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) 184static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
168{ 185{
169 return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ 186 return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
170 pgprot_val(pgprot)) & __supported_pte_mask); 187 pgprot_val(pgprot)) & __supported_pte_mask);
171} 188}
172 189
173/* 190/*
@@ -187,6 +204,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
187 204
188#define __pmd_free_tlb(tlb, x) do { } while (0) 205#define __pmd_free_tlb(tlb, x) do { } while (0)
189 206
190#define vmalloc_sync_all() ((void)0)
191
192#endif /* _I386_PGTABLE_3LEVEL_H */ 207#endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index c3b58d473a55..c6b8b944120c 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -159,6 +159,7 @@ void paging_init(void);
159 159
160extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; 160extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
161#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) 161#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
162#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
162#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) 163#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
163#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) 164#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
164#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 165#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -166,6 +167,7 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
166#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) 167#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
167#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) 168#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
168#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) 169#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
170#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
169#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) 171#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
170#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) 172#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
171#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) 173#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
@@ -241,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re
241static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } 243static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
242static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } 244static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
243 245
246extern void vmalloc_sync_all(void);
247
244#ifdef CONFIG_X86_PAE 248#ifdef CONFIG_X86_PAE
245# include <asm/pgtable-3level.h> 249# include <asm/pgtable-3level.h>
246#else 250#else
@@ -263,9 +267,18 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p
263 */ 267 */
264#define pte_update(mm, addr, ptep) do { } while (0) 268#define pte_update(mm, addr, ptep) do { } while (0)
265#define pte_update_defer(mm, addr, ptep) do { } while (0) 269#define pte_update_defer(mm, addr, ptep) do { } while (0)
266#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
267#endif 270#endif
268 271
272/* local pte updates need not use xchg for locking */
273static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
274{
275 pte_t res = *ptep;
276
277 /* Pure native function needs no input for mm, addr */
278 native_pte_clear(NULL, 0, ptep);
279 return res;
280}
281
269/* 282/*
270 * We only update the dirty/accessed state if we set 283 * We only update the dirty/accessed state if we set
271 * the dirty bit by hand in the kernel, since the hardware 284 * the dirty bit by hand in the kernel, since the hardware
@@ -330,7 +343,7 @@ do { \
330#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 343#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
331static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 344static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
332{ 345{
333 pte_t pte = raw_ptep_get_and_clear(ptep); 346 pte_t pte = native_ptep_get_and_clear(ptep);
334 pte_update(mm, addr, ptep); 347 pte_update(mm, addr, ptep);
335 return pte; 348 return pte;
336} 349}
@@ -340,8 +353,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
340{ 353{
341 pte_t pte; 354 pte_t pte;
342 if (full) { 355 if (full) {
343 pte = *ptep; 356 /*
344 pte_clear(mm, addr, ptep); 357 * Full address destruction in progress; paravirt does not
358 * care about updates and native needs no locking
359 */
360 pte = native_local_ptep_get_and_clear(ptep);
345 } else { 361 } else {
346 pte = ptep_get_and_clear(mm, addr, ptep); 362 pte = ptep_get_and_clear(mm, addr, ptep);
347 } 363 }
@@ -470,24 +486,10 @@ extern pte_t *lookup_address(unsigned long address);
470#endif 486#endif
471 487
472#if defined(CONFIG_HIGHPTE) 488#if defined(CONFIG_HIGHPTE)
473#define pte_offset_map(dir, address) \ 489#define pte_offset_map(dir, address) \
474({ \ 490 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
475 pte_t *__ptep; \ 491#define pte_offset_map_nested(dir, address) \
476 unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ 492 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
477 __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE0);\
478 paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
479 __ptep = __ptep + pte_index(address); \
480 __ptep; \
481})
482#define pte_offset_map_nested(dir, address) \
483({ \
484 pte_t *__ptep; \
485 unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
486 __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE1);\
487 paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
488 __ptep = __ptep + pte_index(address); \
489 __ptep; \
490})
491#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) 493#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
492#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) 494#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
493#else 495#else
@@ -510,6 +512,22 @@ do { \
510 * tables contain all the necessary information. 512 * tables contain all the necessary information.
511 */ 513 */
512#define update_mmu_cache(vma,address,pte) do { } while (0) 514#define update_mmu_cache(vma,address,pte) do { } while (0)
515
516void native_pagetable_setup_start(pgd_t *base);
517void native_pagetable_setup_done(pgd_t *base);
518
519#ifndef CONFIG_PARAVIRT
520static inline void paravirt_pagetable_setup_start(pgd_t *base)
521{
522 native_pagetable_setup_start(base);
523}
524
525static inline void paravirt_pagetable_setup_done(pgd_t *base)
526{
527 native_pagetable_setup_done(base);
528}
529#endif /* !CONFIG_PARAVIRT */
530
513#endif /* !__ASSEMBLY__ */ 531#endif /* !__ASSEMBLY__ */
514 532
515#ifdef CONFIG_FLATMEM 533#ifdef CONFIG_FLATMEM
diff --git a/include/asm-i386/processor-flags.h b/include/asm-i386/processor-flags.h
new file mode 100644
index 000000000000..5404e90edd57
--- /dev/null
+++ b/include/asm-i386/processor-flags.h
@@ -0,0 +1,91 @@
1#ifndef __ASM_I386_PROCESSOR_FLAGS_H
2#define __ASM_I386_PROCESSOR_FLAGS_H
3/* Various flags defined: can be included from assembler. */
4
5/*
6 * EFLAGS bits
7 */
8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
9#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
10#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
11#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
12#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
13#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
14#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
15#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
16#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
17#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
18#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
19#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
20#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
21#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
22#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
23#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
24#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
25
26/*
27 * Basic CPU control in CR0
28 */
29#define X86_CR0_PE 0x00000001 /* Protection Enable */
30#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
31#define X86_CR0_EM 0x00000004 /* Emulation */
32#define X86_CR0_TS 0x00000008 /* Task Switched */
33#define X86_CR0_ET 0x00000010 /* Extension Type */
34#define X86_CR0_NE 0x00000020 /* Numeric Error */
35#define X86_CR0_WP 0x00010000 /* Write Protect */
36#define X86_CR0_AM 0x00040000 /* Alignment Mask */
37#define X86_CR0_NW 0x20000000 /* Not Write-through */
38#define X86_CR0_CD 0x40000000 /* Cache Disable */
39#define X86_CR0_PG 0x80000000 /* Paging */
40
41/*
42 * Paging options in CR3
43 */
44#define X86_CR3_PWT 0x00000008 /* Page Write Through */
45#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
46
47/*
48 * Intel CPU features in CR4
49 */
50#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
51#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */
52#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */
53#define X86_CR4_DE 0x00000008 /* enable debugging extensions */
54#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
55#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
56#define X86_CR4_MCE 0x00000040 /* Machine check enable */
57#define X86_CR4_PGE 0x00000080 /* enable global pages */
58#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */
59#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
62
63/*
64 * x86-64 Task Priority Register, CR8
65 */
66#define X86_CR8_TPR 0x00000007 /* task priority register */
67
68/*
69 * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
70 */
71
72/*
73 * NSC/Cyrix CPU configuration register indexes
74 */
75#define CX86_PCR0 0x20
76#define CX86_GCR 0xb8
77#define CX86_CCR0 0xc0
78#define CX86_CCR1 0xc1
79#define CX86_CCR2 0xc2
80#define CX86_CCR3 0xc3
81#define CX86_CCR4 0xe8
82#define CX86_CCR5 0xe9
83#define CX86_CCR6 0xea
84#define CX86_CCR7 0xeb
85#define CX86_PCR1 0xf0
86#define CX86_DIR0 0xfe
87#define CX86_DIR1 0xff
88#define CX86_ARR_BASE 0xc4
89#define CX86_RCR_BASE 0xdc
90
91#endif /* __ASM_I386_PROCESSOR_FLAGS_H */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 11bf899de8aa..70f3515c3db0 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -21,6 +21,7 @@
21#include <asm/percpu.h> 21#include <asm/percpu.h>
22#include <linux/cpumask.h> 22#include <linux/cpumask.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <asm/processor-flags.h>
24 25
25/* flag for disabling the tsc */ 26/* flag for disabling the tsc */
26extern int tsc_disable; 27extern int tsc_disable;
@@ -115,7 +116,8 @@ extern char ignore_fpu_irq;
115 116
116void __init cpu_detect(struct cpuinfo_x86 *c); 117void __init cpu_detect(struct cpuinfo_x86 *c);
117 118
118extern void identify_cpu(struct cpuinfo_x86 *); 119extern void identify_boot_cpu(void);
120extern void identify_secondary_cpu(struct cpuinfo_x86 *);
119extern void print_cpu_info(struct cpuinfo_x86 *); 121extern void print_cpu_info(struct cpuinfo_x86 *);
120extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 122extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
121extern unsigned short num_cache_leaves; 123extern unsigned short num_cache_leaves;
@@ -126,28 +128,7 @@ extern void detect_ht(struct cpuinfo_x86 *c);
126static inline void detect_ht(struct cpuinfo_x86 *c) {} 128static inline void detect_ht(struct cpuinfo_x86 *c) {}
127#endif 129#endif
128 130
129/* 131static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
130 * EFLAGS bits
131 */
132#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
133#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
134#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
135#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
136#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
137#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
138#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
139#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
140#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
141#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
142#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
143#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
144#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
145#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
146#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
147#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
148#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
149
150static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx,
151 unsigned int *ecx, unsigned int *edx) 132 unsigned int *ecx, unsigned int *edx)
152{ 133{
153 /* ecx is often an input as well as an output. */ 134 /* ecx is often an input as well as an output. */
@@ -162,21 +143,6 @@ static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx,
162#define load_cr3(pgdir) write_cr3(__pa(pgdir)) 143#define load_cr3(pgdir) write_cr3(__pa(pgdir))
163 144
164/* 145/*
165 * Intel CPU features in CR4
166 */
167#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
168#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
169#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
170#define X86_CR4_DE 0x0008 /* enable debugging extensions */
171#define X86_CR4_PSE 0x0010 /* enable page size extensions */
172#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
173#define X86_CR4_MCE 0x0040 /* Machine check enable */
174#define X86_CR4_PGE 0x0080 /* enable global pages */
175#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
176#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
177#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
178
179/*
180 * Save the cr4 feature set we're using (ie 146 * Save the cr4 feature set we're using (ie
181 * Pentium 4MB enable and PPro Global page 147 * Pentium 4MB enable and PPro Global page
182 * enable), so that any CPU's that boot up 148 * enable), so that any CPU's that boot up
@@ -203,26 +169,6 @@ static inline void clear_in_cr4 (unsigned long mask)
203} 169}
204 170
205/* 171/*
206 * NSC/Cyrix CPU configuration register indexes
207 */
208
209#define CX86_PCR0 0x20
210#define CX86_GCR 0xb8
211#define CX86_CCR0 0xc0
212#define CX86_CCR1 0xc1
213#define CX86_CCR2 0xc2
214#define CX86_CCR3 0xc3
215#define CX86_CCR4 0xe8
216#define CX86_CCR5 0xe9
217#define CX86_CCR6 0xea
218#define CX86_CCR7 0xeb
219#define CX86_PCR1 0xf0
220#define CX86_DIR0 0xfe
221#define CX86_DIR1 0xff
222#define CX86_ARR_BASE 0xc4
223#define CX86_RCR_BASE 0xdc
224
225/*
226 * NSC/Cyrix CPU indexed register access macros 172 * NSC/Cyrix CPU indexed register access macros
227 */ 173 */
228 174
@@ -345,7 +291,8 @@ typedef struct {
345 291
346struct thread_struct; 292struct thread_struct;
347 293
348struct tss_struct { 294/* This is the TSS defined by the hardware. */
295struct i386_hw_tss {
349 unsigned short back_link,__blh; 296 unsigned short back_link,__blh;
350 unsigned long esp0; 297 unsigned long esp0;
351 unsigned short ss0,__ss0h; 298 unsigned short ss0,__ss0h;
@@ -369,6 +316,11 @@ struct tss_struct {
369 unsigned short gs, __gsh; 316 unsigned short gs, __gsh;
370 unsigned short ldt, __ldth; 317 unsigned short ldt, __ldth;
371 unsigned short trace, io_bitmap_base; 318 unsigned short trace, io_bitmap_base;
319} __attribute__((packed));
320
321struct tss_struct {
322 struct i386_hw_tss x86_tss;
323
372 /* 324 /*
373 * The extra 1 is there because the CPU will access an 325 * The extra 1 is there because the CPU will access an
374 * additional byte beyond the end of the IO permission 326 * additional byte beyond the end of the IO permission
@@ -421,10 +373,11 @@ struct thread_struct {
421}; 373};
422 374
423#define INIT_THREAD { \ 375#define INIT_THREAD { \
376 .esp0 = sizeof(init_stack) + (long)&init_stack, \
424 .vm86_info = NULL, \ 377 .vm86_info = NULL, \
425 .sysenter_cs = __KERNEL_CS, \ 378 .sysenter_cs = __KERNEL_CS, \
426 .io_bitmap_ptr = NULL, \ 379 .io_bitmap_ptr = NULL, \
427 .fs = __KERNEL_PDA, \ 380 .fs = __KERNEL_PERCPU, \
428} 381}
429 382
430/* 383/*
@@ -434,10 +387,12 @@ struct thread_struct {
434 * be within the limit. 387 * be within the limit.
435 */ 388 */
436#define INIT_TSS { \ 389#define INIT_TSS { \
437 .esp0 = sizeof(init_stack) + (long)&init_stack, \ 390 .x86_tss = { \
438 .ss0 = __KERNEL_DS, \ 391 .esp0 = sizeof(init_stack) + (long)&init_stack, \
439 .ss1 = __KERNEL_CS, \ 392 .ss0 = __KERNEL_DS, \
440 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ 393 .ss1 = __KERNEL_CS, \
394 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
395 }, \
441 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ 396 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
442} 397}
443 398
@@ -544,40 +499,70 @@ static inline void rep_nop(void)
544 499
545#define cpu_relax() rep_nop() 500#define cpu_relax() rep_nop()
546 501
547#ifdef CONFIG_PARAVIRT 502static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
548#include <asm/paravirt.h>
549#else
550#define paravirt_enabled() 0
551#define __cpuid native_cpuid
552
553static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
554{ 503{
555 tss->esp0 = thread->esp0; 504 tss->x86_tss.esp0 = thread->esp0;
556 /* This can only happen when SEP is enabled, no need to test "SEP"arately */ 505 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
557 if (unlikely(tss->ss1 != thread->sysenter_cs)) { 506 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
558 tss->ss1 = thread->sysenter_cs; 507 tss->x86_tss.ss1 = thread->sysenter_cs;
559 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 508 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
560 } 509 }
561} 510}
562 511
563/*
564 * These special macros can be used to get or set a debugging register
565 */
566#define get_debugreg(var, register) \
567 __asm__("movl %%db" #register ", %0" \
568 :"=r" (var))
569#define set_debugreg(value, register) \
570 __asm__("movl %0,%%db" #register \
571 : /* no output */ \
572 :"r" (value))
573 512
574#define set_iopl_mask native_set_iopl_mask 513static inline unsigned long native_get_debugreg(int regno)
575#endif /* CONFIG_PARAVIRT */ 514{
515 unsigned long val = 0; /* Damn you, gcc! */
516
517 switch (regno) {
518 case 0:
519 asm("movl %%db0, %0" :"=r" (val)); break;
520 case 1:
521 asm("movl %%db1, %0" :"=r" (val)); break;
522 case 2:
523 asm("movl %%db2, %0" :"=r" (val)); break;
524 case 3:
525 asm("movl %%db3, %0" :"=r" (val)); break;
526 case 6:
527 asm("movl %%db6, %0" :"=r" (val)); break;
528 case 7:
529 asm("movl %%db7, %0" :"=r" (val)); break;
530 default:
531 BUG();
532 }
533 return val;
534}
535
536static inline void native_set_debugreg(int regno, unsigned long value)
537{
538 switch (regno) {
539 case 0:
540 asm("movl %0,%%db0" : /* no output */ :"r" (value));
541 break;
542 case 1:
543 asm("movl %0,%%db1" : /* no output */ :"r" (value));
544 break;
545 case 2:
546 asm("movl %0,%%db2" : /* no output */ :"r" (value));
547 break;
548 case 3:
549 asm("movl %0,%%db3" : /* no output */ :"r" (value));
550 break;
551 case 6:
552 asm("movl %0,%%db6" : /* no output */ :"r" (value));
553 break;
554 case 7:
555 asm("movl %0,%%db7" : /* no output */ :"r" (value));
556 break;
557 default:
558 BUG();
559 }
560}
576 561
577/* 562/*
578 * Set IOPL bits in EFLAGS from given mask 563 * Set IOPL bits in EFLAGS from given mask
579 */ 564 */
580static fastcall inline void native_set_iopl_mask(unsigned mask) 565static inline void native_set_iopl_mask(unsigned mask)
581{ 566{
582 unsigned int reg; 567 unsigned int reg;
583 __asm__ __volatile__ ("pushfl;" 568 __asm__ __volatile__ ("pushfl;"
@@ -590,6 +575,28 @@ static fastcall inline void native_set_iopl_mask(unsigned mask)
590 : "i" (~X86_EFLAGS_IOPL), "r" (mask)); 575 : "i" (~X86_EFLAGS_IOPL), "r" (mask));
591} 576}
592 577
578#ifdef CONFIG_PARAVIRT
579#include <asm/paravirt.h>
580#else
581#define paravirt_enabled() 0
582#define __cpuid native_cpuid
583
584static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
585{
586 native_load_esp0(tss, thread);
587}
588
589/*
590 * These special macros can be used to get or set a debugging register
591 */
592#define get_debugreg(var, register) \
593 (var) = native_get_debugreg(register)
594#define set_debugreg(value, register) \
595 native_set_debugreg(register, value)
596
597#define set_iopl_mask native_set_iopl_mask
598#endif /* CONFIG_PARAVIRT */
599
593/* 600/*
594 * Generic CPUID function 601 * Generic CPUID function
595 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx 602 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
@@ -742,8 +749,10 @@ extern unsigned long boot_option_idle_override;
742extern void enable_sep_cpu(void); 749extern void enable_sep_cpu(void);
743extern int sysenter_setup(void); 750extern int sysenter_setup(void);
744 751
745extern int init_gdt(int cpu, struct task_struct *idle);
746extern void cpu_set_gdt(int); 752extern void cpu_set_gdt(int);
747extern void secondary_cpu_init(void); 753extern void switch_to_new_gdt(void);
754extern void cpu_init(void);
755
756extern int force_mwait;
748 757
749#endif /* __ASM_I386_PROCESSOR_H */ 758#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-i386/reboot.h b/include/asm-i386/reboot.h
new file mode 100644
index 000000000000..e9e3ffc22c07
--- /dev/null
+++ b/include/asm-i386/reboot.h
@@ -0,0 +1,20 @@
1#ifndef _ASM_REBOOT_H
2#define _ASM_REBOOT_H
3
4struct pt_regs;
5
6struct machine_ops
7{
8 void (*restart)(char *cmd);
9 void (*halt)(void);
10 void (*power_off)(void);
11 void (*shutdown)(void);
12 void (*crash_shutdown)(struct pt_regs *);
13 void (*emergency_restart)(void);
14};
15
16extern struct machine_ops machine_ops;
17
18void machine_real_restart(unsigned char *code, int length);
19
20#endif /* _ASM_REBOOT_H */
diff --git a/include/linux/reboot_fixups.h b/include/asm-i386/reboot_fixups.h
index 480ea2d489d8..0cb7d87c2b68 100644
--- a/include/linux/reboot_fixups.h
+++ b/include/asm-i386/reboot_fixups.h
@@ -1,10 +1,6 @@
1#ifndef _LINUX_REBOOT_FIXUPS_H 1#ifndef _LINUX_REBOOT_FIXUPS_H
2#define _LINUX_REBOOT_FIXUPS_H 2#define _LINUX_REBOOT_FIXUPS_H
3 3
4#ifdef CONFIG_X86_REBOOTFIXUPS
5extern void mach_reboot_fixups(void); 4extern void mach_reboot_fixups(void);
6#else
7#define mach_reboot_fixups() ((void)(0))
8#endif
9 5
10#endif /* _LINUX_REBOOT_FIXUPS_H */ 6#endif /* _LINUX_REBOOT_FIXUPS_H */
diff --git a/include/asm-i386/required-features.h b/include/asm-i386/required-features.h
new file mode 100644
index 000000000000..9db866c1e64c
--- /dev/null
+++ b/include/asm-i386/required-features.h
@@ -0,0 +1,34 @@
1#ifndef _ASM_REQUIRED_FEATURES_H
2#define _ASM_REQUIRED_FEATURES_H 1
3
4/* Define minimum CPUID feature set for kernel These bits are checked
5 really early to actually display a visible error message before the
6 kernel dies. Only add word 0 bits here
7
8 Some requirements that are not in CPUID yet are also in the
9 CONFIG_X86_MINIMUM_CPU mode which is checked too.
10
11 The real information is in arch/i386/Kconfig.cpu, this just converts
12 the CONFIGs into a bitmask */
13
14#ifdef CONFIG_X86_PAE
15#define NEED_PAE (1<<X86_FEATURE_PAE)
16#else
17#define NEED_PAE 0
18#endif
19
20#ifdef CONFIG_X86_CMOV
21#define NEED_CMOV (1<<X86_FEATURE_CMOV)
22#else
23#define NEED_CMOV 0
24#endif
25
26#ifdef CONFIG_X86_CMPXCHG64
27#define NEED_CMPXCHG64 (1<<X86_FEATURE_CX8)
28#else
29#define NEED_CMPXCHG64 0
30#endif
31
32#define REQUIRED_MASK1 (NEED_PAE|NEED_CMOV|NEED_CMPXCHG64)
33
34#endif
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h
index 065f10bfa487..597a47c2515f 100644
--- a/include/asm-i386/segment.h
+++ b/include/asm-i386/segment.h
@@ -39,7 +39,7 @@
39 * 25 - APM BIOS support 39 * 25 - APM BIOS support
40 * 40 *
41 * 26 - ESPFIX small SS 41 * 26 - ESPFIX small SS
42 * 27 - PDA [ per-cpu private data area ] 42 * 27 - per-cpu [ offset to per-cpu data area ]
43 * 28 - unused 43 * 28 - unused
44 * 29 - unused 44 * 29 - unused
45 * 30 - unused 45 * 30 - unused
@@ -74,8 +74,12 @@
74#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) 74#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
75#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) 75#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
76 76
77#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) 77#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
78#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) 78#ifdef CONFIG_SMP
79#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
80#else
81#define __KERNEL_PERCPU 0
82#endif
79 83
80#define GDT_ENTRY_DOUBLEFAULT_TSS 31 84#define GDT_ENTRY_DOUBLEFAULT_TSS 31
81 85
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 6bf0033a301c..090abc1da32a 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -8,19 +8,15 @@
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/threads.h> 9#include <linux/threads.h>
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <asm/pda.h>
12#endif 11#endif
13 12
14#ifdef CONFIG_X86_LOCAL_APIC 13#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
15#ifndef __ASSEMBLY__
16#include <asm/fixmap.h>
17#include <asm/bitops.h> 14#include <asm/bitops.h>
18#include <asm/mpspec.h> 15#include <asm/mpspec.h>
16#include <asm/apic.h>
19#ifdef CONFIG_X86_IO_APIC 17#ifdef CONFIG_X86_IO_APIC
20#include <asm/io_apic.h> 18#include <asm/io_apic.h>
21#endif 19#endif
22#include <asm/apic.h>
23#endif
24#endif 20#endif
25 21
26#define BAD_APICID 0xFFu 22#define BAD_APICID 0xFFu
@@ -52,6 +48,59 @@ extern void cpu_exit_clear(void);
52extern void cpu_uninit(void); 48extern void cpu_uninit(void);
53#endif 49#endif
54 50
51struct smp_ops
52{
53 void (*smp_prepare_boot_cpu)(void);
54 void (*smp_prepare_cpus)(unsigned max_cpus);
55 int (*cpu_up)(unsigned cpu);
56 void (*smp_cpus_done)(unsigned max_cpus);
57
58 void (*smp_send_stop)(void);
59 void (*smp_send_reschedule)(int cpu);
60 int (*smp_call_function_mask)(cpumask_t mask,
61 void (*func)(void *info), void *info,
62 int wait);
63};
64
65extern struct smp_ops smp_ops;
66
67static inline void smp_prepare_boot_cpu(void)
68{
69 smp_ops.smp_prepare_boot_cpu();
70}
71static inline void smp_prepare_cpus(unsigned int max_cpus)
72{
73 smp_ops.smp_prepare_cpus(max_cpus);
74}
75static inline int __cpu_up(unsigned int cpu)
76{
77 return smp_ops.cpu_up(cpu);
78}
79static inline void smp_cpus_done(unsigned int max_cpus)
80{
81 smp_ops.smp_cpus_done(max_cpus);
82}
83
84static inline void smp_send_stop(void)
85{
86 smp_ops.smp_send_stop();
87}
88static inline void smp_send_reschedule(int cpu)
89{
90 smp_ops.smp_send_reschedule(cpu);
91}
92static inline int smp_call_function_mask(cpumask_t mask,
93 void (*func) (void *info), void *info,
94 int wait)
95{
96 return smp_ops.smp_call_function_mask(mask, func, info, wait);
97}
98
99void native_smp_prepare_boot_cpu(void);
100void native_smp_prepare_cpus(unsigned int max_cpus);
101int native_cpu_up(unsigned int cpunum);
102void native_smp_cpus_done(unsigned int max_cpus);
103
55#ifndef CONFIG_PARAVIRT 104#ifndef CONFIG_PARAVIRT
56#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ 105#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
57do { } while (0) 106do { } while (0)
@@ -62,7 +111,8 @@ do { } while (0)
62 * from the initial startup. We map APIC_BASE very early in page_setup(), 111 * from the initial startup. We map APIC_BASE very early in page_setup(),
63 * so this is correct in the x86 case. 112 * so this is correct in the x86 case.
64 */ 113 */
65#define raw_smp_processor_id() (read_pda(cpu_number)) 114DECLARE_PER_CPU(int, cpu_number);
115#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
66 116
67extern cpumask_t cpu_callout_map; 117extern cpumask_t cpu_callout_map;
68extern cpumask_t cpu_callin_map; 118extern cpumask_t cpu_callin_map;
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index a6d20d9a1a30..c3a58c08c495 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -88,65 +88,96 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \
88#define savesegment(seg, value) \ 88#define savesegment(seg, value) \
89 asm volatile("mov %%" #seg ",%0":"=rm" (value)) 89 asm volatile("mov %%" #seg ",%0":"=rm" (value))
90 90
91
92static inline void native_clts(void)
93{
94 asm volatile ("clts");
95}
96
97static inline unsigned long native_read_cr0(void)
98{
99 unsigned long val;
100 asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
101 return val;
102}
103
104static inline void native_write_cr0(unsigned long val)
105{
106 asm volatile("movl %0,%%cr0": :"r" (val));
107}
108
109static inline unsigned long native_read_cr2(void)
110{
111 unsigned long val;
112 asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
113 return val;
114}
115
116static inline void native_write_cr2(unsigned long val)
117{
118 asm volatile("movl %0,%%cr2": :"r" (val));
119}
120
121static inline unsigned long native_read_cr3(void)
122{
123 unsigned long val;
124 asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
125 return val;
126}
127
128static inline void native_write_cr3(unsigned long val)
129{
130 asm volatile("movl %0,%%cr3": :"r" (val));
131}
132
133static inline unsigned long native_read_cr4(void)
134{
135 unsigned long val;
136 asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
137 return val;
138}
139
140static inline unsigned long native_read_cr4_safe(void)
141{
142 unsigned long val;
143 /* This could fault if %cr4 does not exist */
144 asm("1: movl %%cr4, %0 \n"
145 "2: \n"
146 ".section __ex_table,\"a\" \n"
147 ".long 1b,2b \n"
148 ".previous \n"
149 : "=r" (val): "0" (0));
150 return val;
151}
152
153static inline void native_write_cr4(unsigned long val)
154{
155 asm volatile("movl %0,%%cr4": :"r" (val));
156}
157
158static inline void native_wbinvd(void)
159{
160 asm volatile("wbinvd": : :"memory");
161}
162
163
91#ifdef CONFIG_PARAVIRT 164#ifdef CONFIG_PARAVIRT
92#include <asm/paravirt.h> 165#include <asm/paravirt.h>
93#else 166#else
94#define read_cr0() ({ \ 167#define read_cr0() (native_read_cr0())
95 unsigned int __dummy; \ 168#define write_cr0(x) (native_write_cr0(x))
96 __asm__ __volatile__( \ 169#define read_cr2() (native_read_cr2())
97 "movl %%cr0,%0\n\t" \ 170#define write_cr2(x) (native_write_cr2(x))
98 :"=r" (__dummy)); \ 171#define read_cr3() (native_read_cr3())
99 __dummy; \ 172#define write_cr3(x) (native_write_cr3(x))
100}) 173#define read_cr4() (native_read_cr4())
101#define write_cr0(x) \ 174#define read_cr4_safe() (native_read_cr4_safe())
102 __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) 175#define write_cr4(x) (native_write_cr4(x))
103 176#define wbinvd() (native_wbinvd())
104#define read_cr2() ({ \
105 unsigned int __dummy; \
106 __asm__ __volatile__( \
107 "movl %%cr2,%0\n\t" \
108 :"=r" (__dummy)); \
109 __dummy; \
110})
111#define write_cr2(x) \
112 __asm__ __volatile__("movl %0,%%cr2": :"r" (x))
113
114#define read_cr3() ({ \
115 unsigned int __dummy; \
116 __asm__ ( \
117 "movl %%cr3,%0\n\t" \
118 :"=r" (__dummy)); \
119 __dummy; \
120})
121#define write_cr3(x) \
122 __asm__ __volatile__("movl %0,%%cr3": :"r" (x))
123
124#define read_cr4() ({ \
125 unsigned int __dummy; \
126 __asm__( \
127 "movl %%cr4,%0\n\t" \
128 :"=r" (__dummy)); \
129 __dummy; \
130})
131#define read_cr4_safe() ({ \
132 unsigned int __dummy; \
133 /* This could fault if %cr4 does not exist */ \
134 __asm__("1: movl %%cr4, %0 \n" \
135 "2: \n" \
136 ".section __ex_table,\"a\" \n" \
137 ".long 1b,2b \n" \
138 ".previous \n" \
139 : "=r" (__dummy): "0" (0)); \
140 __dummy; \
141})
142#define write_cr4(x) \
143 __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
144
145#define wbinvd() \
146 __asm__ __volatile__ ("wbinvd": : :"memory")
147 177
148/* Clear the 'TS' bit */ 178/* Clear the 'TS' bit */
149#define clts() __asm__ __volatile__ ("clts") 179#define clts() (native_clts())
180
150#endif/* CONFIG_PARAVIRT */ 181#endif/* CONFIG_PARAVIRT */
151 182
152/* Set the 'TS' bit */ 183/* Set the 'TS' bit */
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
index 12dd67bf760f..153770e25faa 100644
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -9,8 +9,6 @@ void setup_pit_timer(void);
9unsigned long long native_sched_clock(void); 9unsigned long long native_sched_clock(void);
10unsigned long native_calculate_cpu_khz(void); 10unsigned long native_calculate_cpu_khz(void);
11 11
12/* Modifiers for buggy PIT handling */
13extern int pit_latch_buggy;
14extern int timer_ack; 12extern int timer_ack;
15extern int no_timer_check; 13extern int no_timer_check;
16extern int no_sync_cmos_clock; 14extern int no_sync_cmos_clock;
diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h
index 4dd82840d53b..db7f77eacfa0 100644
--- a/include/asm-i386/tlbflush.h
+++ b/include/asm-i386/tlbflush.h
@@ -79,11 +79,15 @@
79 * - flush_tlb_range(vma, start, end) flushes a range of pages 79 * - flush_tlb_range(vma, start, end) flushes a range of pages
80 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 80 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
81 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables 81 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
82 * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
82 * 83 *
83 * ..but the i386 has somewhat limited tlb flushing capabilities, 84 * ..but the i386 has somewhat limited tlb flushing capabilities,
84 * and page-granular flushes are available only on i486 and up. 85 * and page-granular flushes are available only on i486 and up.
85 */ 86 */
86 87
88#define TLB_FLUSH_ALL 0xffffffff
89
90
87#ifndef CONFIG_SMP 91#ifndef CONFIG_SMP
88 92
89#define flush_tlb() __flush_tlb() 93#define flush_tlb() __flush_tlb()
@@ -110,7 +114,12 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
110 __flush_tlb(); 114 __flush_tlb();
111} 115}
112 116
113#else 117static inline void native_flush_tlb_others(const cpumask_t *cpumask,
118 struct mm_struct *mm, unsigned long va)
119{
120}
121
122#else /* SMP */
114 123
115#include <asm/smp.h> 124#include <asm/smp.h>
116 125
@@ -129,6 +138,9 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st
129 flush_tlb_mm(vma->vm_mm); 138 flush_tlb_mm(vma->vm_mm);
130} 139}
131 140
141void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
142 unsigned long va);
143
132#define TLBSTATE_OK 1 144#define TLBSTATE_OK 1
133#define TLBSTATE_LAZY 2 145#define TLBSTATE_LAZY 2
134 146
@@ -139,8 +151,11 @@ struct tlb_state
139 char __cacheline_padding[L1_CACHE_BYTES-8]; 151 char __cacheline_padding[L1_CACHE_BYTES-8];
140}; 152};
141DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); 153DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
154#endif /* SMP */
142 155
143 156#ifndef CONFIG_PARAVIRT
157#define flush_tlb_others(mask, mm, va) \
158 native_flush_tlb_others(&mask, mm, va)
144#endif 159#endif
145 160
146#define flush_tlb_kernel_range(start, end) flush_tlb_all() 161#define flush_tlb_kernel_range(start, end) flush_tlb_all()
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
index 84016ff481b9..3f3c1fa000b4 100644
--- a/include/asm-i386/tsc.h
+++ b/include/asm-i386/tsc.h
@@ -35,25 +35,30 @@ static inline cycles_t get_cycles(void)
35static __always_inline cycles_t get_cycles_sync(void) 35static __always_inline cycles_t get_cycles_sync(void)
36{ 36{
37 unsigned long long ret; 37 unsigned long long ret;
38#ifdef X86_FEATURE_SYNC_RDTSC
39 unsigned eax; 38 unsigned eax;
40 39
41 /* 40 /*
41 * Use RDTSCP if possible; it is guaranteed to be synchronous
42 * and doesn't cause a VMEXIT on Hypervisors
43 */
44 alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP,
45 "=A" (ret), "0" (0ULL) : "ecx", "memory");
46 if (ret)
47 return ret;
48
49 /*
42 * Don't do an additional sync on CPUs where we know 50 * Don't do an additional sync on CPUs where we know
43 * RDTSC is already synchronous: 51 * RDTSC is already synchronous:
44 */ 52 */
45 alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, 53 alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
46 "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); 54 "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
47#else
48 sync_core();
49#endif
50 rdtscll(ret); 55 rdtscll(ret);
51 56
52 return ret; 57 return ret;
53} 58}
54 59
55extern void tsc_init(void); 60extern void tsc_init(void);
56extern void mark_tsc_unstable(void); 61extern void mark_tsc_unstable(char *reason);
57extern int unsynchronized_tsc(void); 62extern int unsynchronized_tsc(void);
58extern void init_tsc_clocksource(void); 63extern void init_tsc_clocksource(void);
59 64
diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
index 70829ae3ad52..e2aa5e0d0cc7 100644
--- a/include/asm-i386/uaccess.h
+++ b/include/asm-i386/uaccess.h
@@ -397,7 +397,19 @@ unsigned long __must_check __copy_from_user_ll_nocache(void *to,
397unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to, 397unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to,
398 const void __user *from, unsigned long n); 398 const void __user *from, unsigned long n);
399 399
400/* 400/**
401 * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking.
402 * @to: Destination address, in user space.
403 * @from: Source address, in kernel space.
404 * @n: Number of bytes to copy.
405 *
406 * Context: User context only.
407 *
408 * Copy data from kernel space to user space. Caller must check
409 * the specified block with access_ok() before calling this function.
410 * The caller should also make sure he pins the user space address
411 * so that the we don't result in page fault and sleep.
412 *
401 * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault 413 * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
402 * we return the initial request size (1, 2 or 4), as copy_*_user should do. 414 * we return the initial request size (1, 2 or 4), as copy_*_user should do.
403 * If a store crosses a page boundary and gets a fault, the x86 will not write 415 * If a store crosses a page boundary and gets a fault, the x86 will not write
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
index c3a1fcf66c96..213930b995cb 100644
--- a/include/asm-i386/vmi_time.h
+++ b/include/asm-i386/vmi_time.h
@@ -53,22 +53,8 @@ extern unsigned long long vmi_get_sched_cycles(void);
53extern unsigned long vmi_cpu_khz(void); 53extern unsigned long vmi_cpu_khz(void);
54 54
55#ifdef CONFIG_X86_LOCAL_APIC 55#ifdef CONFIG_X86_LOCAL_APIC
56extern void __init vmi_timer_setup_boot_alarm(void); 56extern void __devinit vmi_time_bsp_init(void);
57extern void __devinit vmi_timer_setup_secondary_alarm(void); 57extern void __devinit vmi_time_ap_init(void);
58extern void apic_vmi_timer_interrupt(void);
59#endif
60
61#ifdef CONFIG_NO_IDLE_HZ
62extern int vmi_stop_hz_timer(void);
63extern void vmi_account_time_restart_hz_timer(void);
64#else
65static inline int vmi_stop_hz_timer(void)
66{
67 return 0;
68}
69static inline void vmi_account_time_restart_hz_timer(void)
70{
71}
72#endif 58#endif
73 59
74/* 60/*
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index b5c65081a3aa..cef2400983fa 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -29,6 +29,7 @@
29#include <linux/spinlock.h> 29#include <linux/spinlock.h>
30 30
31#include <asm/processor.h> 31#include <asm/processor.h>
32#include <asm-generic/mm_hooks.h>
32 33
33struct ia64_ctx { 34struct ia64_ctx {
34 spinlock_t lock; 35 spinlock_t lock;
diff --git a/include/asm-m32r/mmu_context.h b/include/asm-m32r/mmu_context.h
index 1f40d4a0acf1..91909e5dd9d0 100644
--- a/include/asm-m32r/mmu_context.h
+++ b/include/asm-m32r/mmu_context.h
@@ -15,6 +15,7 @@
15#include <asm/pgalloc.h> 15#include <asm/pgalloc.h>
16#include <asm/mmu.h> 16#include <asm/mmu.h>
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm-generic/mm_hooks.h>
18 19
19/* 20/*
20 * Cache of MMU context last used. 21 * Cache of MMU context last used.
diff --git a/include/asm-m68k/mmu_context.h b/include/asm-m68k/mmu_context.h
index 231d11bd8e32..894dacbcee14 100644
--- a/include/asm-m68k/mmu_context.h
+++ b/include/asm-m68k/mmu_context.h
@@ -1,6 +1,7 @@
1#ifndef __M68K_MMU_CONTEXT_H 1#ifndef __M68K_MMU_CONTEXT_H
2#define __M68K_MMU_CONTEXT_H 2#define __M68K_MMU_CONTEXT_H
3 3
4#include <asm-generic/mm_hooks.h>
4 5
5static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
6{ 7{
diff --git a/include/asm-m68knommu/mmu_context.h b/include/asm-m68knommu/mmu_context.h
index 6c077d3a2572..9ccee4278c97 100644
--- a/include/asm-m68knommu/mmu_context.h
+++ b/include/asm-m68knommu/mmu_context.h
@@ -4,6 +4,7 @@
4#include <asm/setup.h> 4#include <asm/setup.h>
5#include <asm/page.h> 5#include <asm/page.h>
6#include <asm/pgalloc.h> 6#include <asm/pgalloc.h>
7#include <asm-generic/mm_hooks.h>
7 8
8static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 9static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
9{ 10{
diff --git a/include/asm-mips/mmu_context.h b/include/asm-mips/mmu_context.h
index fe065d6070ca..65024ffd7879 100644
--- a/include/asm-mips/mmu_context.h
+++ b/include/asm-mips/mmu_context.h
@@ -20,6 +20,7 @@
20#include <asm/mipsmtregs.h> 20#include <asm/mipsmtregs.h>
21#include <asm/smtc.h> 21#include <asm/smtc.h>
22#endif /* SMTC */ 22#endif /* SMTC */
23#include <asm-generic/mm_hooks.h>
23 24
24/* 25/*
25 * For the fast tlb miss handlers, we keep a per cpu array of pointers 26 * For the fast tlb miss handlers, we keep a per cpu array of pointers
diff --git a/include/asm-parisc/mmu_context.h b/include/asm-parisc/mmu_context.h
index 9c05836239a2..bad690298f0c 100644
--- a/include/asm-parisc/mmu_context.h
+++ b/include/asm-parisc/mmu_context.h
@@ -5,6 +5,7 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <asm/pgalloc.h> 6#include <asm/pgalloc.h>
7#include <asm/pgtable.h> 7#include <asm/pgtable.h>
8#include <asm-generic/mm_hooks.h>
8 9
9static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 10static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
10{ 11{
diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h
index 083ac917bd29..c0d7795e3d25 100644
--- a/include/asm-powerpc/mmu_context.h
+++ b/include/asm-powerpc/mmu_context.h
@@ -10,6 +10,7 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <asm/mmu.h> 11#include <asm/mmu.h>
12#include <asm/cputable.h> 12#include <asm/cputable.h>
13#include <asm-generic/mm_hooks.h>
13 14
14/* 15/*
15 * Copyright (C) 2001 PPC 64 Team, IBM Corp 16 * Copyright (C) 2001 PPC 64 Team, IBM Corp
diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h
index 2bc8589cc451..a6441a063e5d 100644
--- a/include/asm-ppc/mmu_context.h
+++ b/include/asm-ppc/mmu_context.h
@@ -6,6 +6,7 @@
6#include <asm/bitops.h> 6#include <asm/bitops.h>
7#include <asm/mmu.h> 7#include <asm/mmu.h>
8#include <asm/cputable.h> 8#include <asm/cputable.h>
9#include <asm-generic/mm_hooks.h>
9 10
10/* 11/*
11 * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs 12 * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h
index 1d21da220d49..501cb9b06314 100644
--- a/include/asm-s390/mmu_context.h
+++ b/include/asm-s390/mmu_context.h
@@ -10,6 +10,8 @@
10#define __S390_MMU_CONTEXT_H 10#define __S390_MMU_CONTEXT_H
11 11
12#include <asm/pgalloc.h> 12#include <asm/pgalloc.h>
13#include <asm-generic/mm_hooks.h>
14
13/* 15/*
14 * get a new mmu context.. S390 don't know about contexts. 16 * get a new mmu context.. S390 don't know about contexts.
15 */ 17 */
diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h
index 342024425b7d..01acaaae9751 100644
--- a/include/asm-sh/mmu_context.h
+++ b/include/asm-sh/mmu_context.h
@@ -12,6 +12,7 @@
12#include <asm/tlbflush.h> 12#include <asm/tlbflush.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/io.h> 14#include <asm/io.h>
15#include <asm-generic/mm_hooks.h>
15 16
16/* 17/*
17 * The MMU "context" consists of two things: 18 * The MMU "context" consists of two things:
diff --git a/include/asm-sh64/mmu_context.h b/include/asm-sh64/mmu_context.h
index 8c860dab2d0e..507bf72bb8e1 100644
--- a/include/asm-sh64/mmu_context.h
+++ b/include/asm-sh64/mmu_context.h
@@ -27,7 +27,7 @@
27extern unsigned long mmu_context_cache; 27extern unsigned long mmu_context_cache;
28 28
29#include <asm/page.h> 29#include <asm/page.h>
30 30#include <asm-generic/mm_hooks.h>
31 31
32/* Current mm's pgd */ 32/* Current mm's pgd */
33extern pgd_t *mmu_pdtp_cache; 33extern pgd_t *mmu_pdtp_cache;
diff --git a/include/asm-sparc/mmu_context.h b/include/asm-sparc/mmu_context.h
index ed1e01d04d21..671a997b9e69 100644
--- a/include/asm-sparc/mmu_context.h
+++ b/include/asm-sparc/mmu_context.h
@@ -5,6 +5,8 @@
5 5
6#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
7 7
8#include <asm-generic/mm_hooks.h>
9
8static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 10static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
9{ 11{
10} 12}
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 2337eb487719..8d129032013e 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -9,6 +9,7 @@
9#include <linux/spinlock.h> 9#include <linux/spinlock.h>
10#include <asm/system.h> 10#include <asm/system.h>
11#include <asm/spitfire.h> 11#include <asm/spitfire.h>
12#include <asm-generic/mm_hooks.h>
12 13
13static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 14static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
14{ 15{
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index 0d3df76aa47f..ced8cbde046d 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -5,16 +5,6 @@
5 5
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 7
8#ifdef CONFIG_MODULES
9# define PERCPU_MODULE_RESERVE 8192
10#else
11# define PERCPU_MODULE_RESERVE 0
12#endif
13
14#define PERCPU_ENOUGH_ROOM \
15 (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
16 PERCPU_MODULE_RESERVE)
17
18extern void setup_per_cpu_areas(void); 8extern void setup_per_cpu_areas(void);
19 9
20extern unsigned long __per_cpu_base; 10extern unsigned long __per_cpu_base;
diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h
index f709c784bf12..9aa4b44e8cc1 100644
--- a/include/asm-um/mmu_context.h
+++ b/include/asm-um/mmu_context.h
@@ -6,6 +6,8 @@
6#ifndef __UM_MMU_CONTEXT_H 6#ifndef __UM_MMU_CONTEXT_H
7#define __UM_MMU_CONTEXT_H 7#define __UM_MMU_CONTEXT_H
8 8
9#include <asm-generic/mm_hooks.h>
10
9#include "linux/sched.h" 11#include "linux/sched.h"
10#include "choose-mode.h" 12#include "choose-mode.h"
11#include "um_mmu.h" 13#include "um_mmu.h"
diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h
index f521c8050d3c..01daacd5474e 100644
--- a/include/asm-v850/mmu_context.h
+++ b/include/asm-v850/mmu_context.h
@@ -1,6 +1,8 @@
1#ifndef __V850_MMU_CONTEXT_H__ 1#ifndef __V850_MMU_CONTEXT_H__
2#define __V850_MMU_CONTEXT_H__ 2#define __V850_MMU_CONTEXT_H__
3 3
4#include <asm-generic/mm_hooks.h>
5
4#define destroy_context(mm) ((void)0) 6#define destroy_context(mm) ((void)0)
5#define init_new_context(tsk,mm) 0 7#define init_new_context(tsk,mm) 0
6#define switch_mm(prev,next,tsk) ((void)0) 8#define switch_mm(prev,next,tsk) ((void)0)
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index ebd7117782a6..89ad1fc27c8b 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -8,7 +8,7 @@ header-y += boot.h
8header-y += bootsetup.h 8header-y += bootsetup.h
9header-y += debugreg.h 9header-y += debugreg.h
10header-y += ldt.h 10header-y += ldt.h
11header-y += msr.h 11header-y += msr-index.h
12header-y += prctl.h 12header-y += prctl.h
13header-y += ptrace-abi.h 13header-y += ptrace-abi.h
14header-y += sigcontext32.h 14header-y += sigcontext32.h
@@ -16,5 +16,7 @@ header-y += ucontext.h
16header-y += vsyscall32.h 16header-y += vsyscall32.h
17 17
18unifdef-y += mce.h 18unifdef-y += mce.h
19unifdef-y += msr.h
19unifdef-y += mtrr.h 20unifdef-y += mtrr.h
20unifdef-y += vsyscall.h 21unifdef-y += vsyscall.h
22unifdef-y += const.h
diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h
index a6657b4f3e0e..a09fe85c268e 100644
--- a/include/asm-x86_64/alternative.h
+++ b/include/asm-x86_64/alternative.h
@@ -16,6 +16,7 @@ struct alt_instr {
16 u8 pad[5]; 16 u8 pad[5];
17}; 17};
18 18
19extern void alternative_instructions(void);
19extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 20extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
20 21
21struct module; 22struct module;
@@ -141,8 +142,8 @@ void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
141static inline void 142static inline void
142apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) 143apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
143{} 144{}
144#define __start_parainstructions NULL 145#define __parainstructions NULL
145#define __stop_parainstructions NULL 146#define __parainstructions_end NULL
146#endif 147#endif
147 148
148#endif /* _X86_64_ALTERNATIVE_H */ 149#endif /* _X86_64_ALTERNATIVE_H */
diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
index 7cfb39cbd918..45e9fca1febc 100644
--- a/include/asm-x86_64/apic.h
+++ b/include/asm-x86_64/apic.h
@@ -2,6 +2,7 @@
2#define __ASM_APIC_H 2#define __ASM_APIC_H
3 3
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/delay.h>
5#include <asm/fixmap.h> 6#include <asm/fixmap.h>
6#include <asm/apicdef.h> 7#include <asm/apicdef.h>
7#include <asm/system.h> 8#include <asm/system.h>
@@ -47,11 +48,8 @@ static __inline unsigned int apic_read(unsigned long reg)
47 return *((volatile unsigned int *)(APIC_BASE+reg)); 48 return *((volatile unsigned int *)(APIC_BASE+reg));
48} 49}
49 50
50static __inline__ void apic_wait_icr_idle(void) 51extern void apic_wait_icr_idle(void);
51{ 52extern unsigned int safe_apic_wait_icr_idle(void);
52 while (apic_read( APIC_ICR ) & APIC_ICR_BUSY)
53 cpu_relax();
54}
55 53
56static inline void ack_APIC_irq(void) 54static inline void ack_APIC_irq(void)
57{ 55{
@@ -83,7 +81,7 @@ extern void setup_secondary_APIC_clock (void);
83extern int APIC_init_uniprocessor (void); 81extern int APIC_init_uniprocessor (void);
84extern void disable_APIC_timer(void); 82extern void disable_APIC_timer(void);
85extern void enable_APIC_timer(void); 83extern void enable_APIC_timer(void);
86extern void clustered_apic_check(void); 84extern void setup_apic_routing(void);
87 85
88extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector, 86extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
89 unsigned char msg_type, unsigned char mask); 87 unsigned char msg_type, unsigned char mask);
diff --git a/include/asm-x86_64/bugs.h b/include/asm-x86_64/bugs.h
index d86c5dd689fa..b33dc04d8f42 100644
--- a/include/asm-x86_64/bugs.h
+++ b/include/asm-x86_64/bugs.h
@@ -1,28 +1,6 @@
1/* 1#ifndef _ASM_X86_64_BUGS_H
2 * include/asm-x86_64/bugs.h 2#define _ASM_X86_64_BUGS_H
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 * Copyright (C) 2000 SuSE
6 *
7 * This is included by init/main.c to check for architecture-dependent bugs.
8 *
9 * Needs:
10 * void check_bugs(void);
11 */
12 3
13#include <asm/processor.h> 4void check_bugs(void);
14#include <asm/i387.h>
15#include <asm/msr.h>
16#include <asm/pda.h>
17 5
18extern void alternative_instructions(void); 6#endif /* _ASM_X86_64_BUGS_H */
19
20static void __init check_bugs(void)
21{
22 identify_cpu(&boot_cpu_data);
23#if !defined(CONFIG_SMP)
24 printk("CPU: ");
25 print_cpu_info(&boot_cpu_data);
26#endif
27 alternative_instructions();
28}
diff --git a/include/asm-x86_64/const.h b/include/asm-x86_64/const.h
new file mode 100644
index 000000000000..54fb08f3db9b
--- /dev/null
+++ b/include/asm-x86_64/const.h
@@ -0,0 +1,20 @@
1/* const.h: Macros for dealing with constants. */
2
3#ifndef _X86_64_CONST_H
4#define _X86_64_CONST_H
5
6/* Some constant macros are used in both assembler and
7 * C code. Therefore we cannot annotate them always with
8 * 'UL' and other type specificers unilaterally. We
9 * use the following macros to deal with this.
10 */
11
12#ifdef __ASSEMBLY__
13#define _AC(X,Y) X
14#else
15#define __AC(X,Y) (X##Y)
16#define _AC(X,Y) __AC(X,Y)
17#endif
18
19
20#endif /* !(_X86_64_CONST_H) */
diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
index 913d6ac00033..ac991b5ca0fd 100644
--- a/include/asm-x86_64/desc.h
+++ b/include/asm-x86_64/desc.h
@@ -107,16 +107,6 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
107 DESC_LDT, size * 8 - 1); 107 DESC_LDT, size * 8 - 1);
108} 108}
109 109
110static inline void set_seg_base(unsigned cpu, int entry, void *base)
111{
112 struct desc_struct *d = &cpu_gdt(cpu)[entry];
113 u32 addr = (u32)(u64)base;
114 BUG_ON((u64)base >> 32);
115 d->base0 = addr & 0xffff;
116 d->base1 = (addr >> 16) & 0xff;
117 d->base2 = (addr >> 24) & 0xff;
118}
119
120#define LDT_entry_a(info) \ 110#define LDT_entry_a(info) \
121 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) 111 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
122/* Don't allow setting of the lm bit. It is useless anyways because 112/* Don't allow setting of the lm bit. It is useless anyways because
@@ -145,16 +135,13 @@ static inline void set_seg_base(unsigned cpu, int entry, void *base)
145 (info)->useable == 0 && \ 135 (info)->useable == 0 && \
146 (info)->lm == 0) 136 (info)->lm == 0)
147 137
148#if TLS_SIZE != 24
149# error update this code.
150#endif
151
152static inline void load_TLS(struct thread_struct *t, unsigned int cpu) 138static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
153{ 139{
140 unsigned int i;
154 u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); 141 u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
155 gdt[0] = t->tls_array[0]; 142
156 gdt[1] = t->tls_array[1]; 143 for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
157 gdt[2] = t->tls_array[2]; 144 gdt[i] = t->tls_array[i];
158} 145}
159 146
160/* 147/*
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h
index d2af227f06d0..6897e2a436e5 100644
--- a/include/asm-x86_64/dma-mapping.h
+++ b/include/asm-x86_64/dma-mapping.h
@@ -52,7 +52,7 @@ struct dma_mapping_ops {
52}; 52};
53 53
54extern dma_addr_t bad_dma_address; 54extern dma_addr_t bad_dma_address;
55extern struct dma_mapping_ops* dma_ops; 55extern const struct dma_mapping_ops* dma_ops;
56extern int iommu_merge; 56extern int iommu_merge;
57 57
58static inline int dma_mapping_error(dma_addr_t dma_addr) 58static inline int dma_mapping_error(dma_addr_t dma_addr)
diff --git a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
index 1b620db5b9e3..e90e1677531b 100644
--- a/include/asm-x86_64/fixmap.h
+++ b/include/asm-x86_64/fixmap.h
@@ -15,7 +15,6 @@
15#include <asm/apicdef.h> 15#include <asm/apicdef.h>
16#include <asm/page.h> 16#include <asm/page.h>
17#include <asm/vsyscall.h> 17#include <asm/vsyscall.h>
18#include <asm/vsyscall32.h>
19 18
20/* 19/*
21 * Here we define all the compile-time 'special' virtual 20 * Here we define all the compile-time 'special' virtual
diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h
index b80f4bb5f273..d7e516ccbaa4 100644
--- a/include/asm-x86_64/genapic.h
+++ b/include/asm-x86_64/genapic.h
@@ -29,7 +29,9 @@ struct genapic {
29 unsigned int (*phys_pkg_id)(int index_msb); 29 unsigned int (*phys_pkg_id)(int index_msb);
30}; 30};
31 31
32extern struct genapic *genapic;
32 33
33extern struct genapic *genapic, *genapic_force, apic_flat; 34extern struct genapic apic_flat;
35extern struct genapic apic_physflat;
34 36
35#endif 37#endif
diff --git a/include/asm-x86_64/ipi.h b/include/asm-x86_64/ipi.h
index 2a5c162b7d92..a7c75ea408a8 100644
--- a/include/asm-x86_64/ipi.h
+++ b/include/asm-x86_64/ipi.h
@@ -18,10 +18,8 @@
18 * Subject to the GNU Public License, v.2 18 * Subject to the GNU Public License, v.2
19 */ 19 */
20 20
21#include <asm/fixmap.h>
22#include <asm/hw_irq.h> 21#include <asm/hw_irq.h>
23#include <asm/apicdef.h> 22#include <asm/apic.h>
24#include <asm/genapic.h>
25 23
26/* 24/*
27 * the following functions deal with sending IPIs between CPUs. 25 * the following functions deal with sending IPIs between CPUs.
@@ -76,10 +74,42 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsign
76 apic_write(APIC_ICR, cfg); 74 apic_write(APIC_ICR, cfg);
77} 75}
78 76
77/*
78 * This is used to send an IPI with no shorthand notation (the destination is
79 * specified in bits 56 to 63 of the ICR).
80 */
81static inline void __send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
82{
83 unsigned long cfg;
84
85 /*
86 * Wait for idle.
87 */
88 if (unlikely(vector == NMI_VECTOR))
89 safe_apic_wait_icr_idle();
90 else
91 apic_wait_icr_idle();
92
93 /*
94 * prepare target chip field
95 */
96 cfg = __prepare_ICR2(mask);
97 apic_write(APIC_ICR2, cfg);
98
99 /*
100 * program the ICR
101 */
102 cfg = __prepare_ICR(0, vector, dest);
103
104 /*
105 * Send the IPI. The write to APIC_ICR fires this off.
106 */
107 apic_write(APIC_ICR, cfg);
108}
79 109
80static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 110static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
81{ 111{
82 unsigned long cfg, flags; 112 unsigned long flags;
83 unsigned long query_cpu; 113 unsigned long query_cpu;
84 114
85 /* 115 /*
@@ -88,28 +118,9 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
88 * - mbligh 118 * - mbligh
89 */ 119 */
90 local_irq_save(flags); 120 local_irq_save(flags);
91
92 for_each_cpu_mask(query_cpu, mask) { 121 for_each_cpu_mask(query_cpu, mask) {
93 /* 122 __send_IPI_dest_field(x86_cpu_to_apicid[query_cpu],
94 * Wait for idle. 123 vector, APIC_DEST_PHYSICAL);
95 */
96 apic_wait_icr_idle();
97
98 /*
99 * prepare target chip field
100 */
101 cfg = __prepare_ICR2(x86_cpu_to_apicid[query_cpu]);
102 apic_write(APIC_ICR2, cfg);
103
104 /*
105 * program the ICR
106 */
107 cfg = __prepare_ICR(0, vector, APIC_DEST_PHYSICAL);
108
109 /*
110 * Send the IPI. The write to APIC_ICR fires this off.
111 */
112 apic_write(APIC_ICR, cfg);
113 } 124 }
114 local_irq_restore(flags); 125 local_irq_restore(flags);
115} 126}
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
index cce6937e87c0..86e70fe23659 100644
--- a/include/asm-x86_64/irqflags.h
+++ b/include/asm-x86_64/irqflags.h
@@ -9,6 +9,7 @@
9 */ 9 */
10#ifndef _ASM_IRQFLAGS_H 10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H 11#define _ASM_IRQFLAGS_H
12#include <asm/processor-flags.h>
12 13
13#ifndef __ASSEMBLY__ 14#ifndef __ASSEMBLY__
14/* 15/*
@@ -53,19 +54,19 @@ static inline void raw_local_irq_disable(void)
53{ 54{
54 unsigned long flags = __raw_local_save_flags(); 55 unsigned long flags = __raw_local_save_flags();
55 56
56 raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); 57 raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
57} 58}
58 59
59static inline void raw_local_irq_enable(void) 60static inline void raw_local_irq_enable(void)
60{ 61{
61 unsigned long flags = __raw_local_save_flags(); 62 unsigned long flags = __raw_local_save_flags();
62 63
63 raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); 64 raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
64} 65}
65 66
66static inline int raw_irqs_disabled_flags(unsigned long flags) 67static inline int raw_irqs_disabled_flags(unsigned long flags)
67{ 68{
68 return !(flags & (1<<9)) || (flags & (1 << 18)); 69 return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
69} 70}
70 71
71#else /* CONFIG_X86_VSMP */ 72#else /* CONFIG_X86_VSMP */
@@ -82,7 +83,7 @@ static inline void raw_local_irq_enable(void)
82 83
83static inline int raw_irqs_disabled_flags(unsigned long flags) 84static inline int raw_irqs_disabled_flags(unsigned long flags)
84{ 85{
85 return !(flags & (1 << 9)); 86 return !(flags & X86_EFLAGS_IF);
86} 87}
87 88
88#endif 89#endif
diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h
index af03b9f852d6..0cce83a78378 100644
--- a/include/asm-x86_64/mmu_context.h
+++ b/include/asm-x86_64/mmu_context.h
@@ -7,6 +7,7 @@
7#include <asm/pda.h> 7#include <asm/pda.h>
8#include <asm/pgtable.h> 8#include <asm/pgtable.h>
9#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
10#include <asm-generic/mm_hooks.h>
10 11
11/* 12/*
12 * possibly do the LDT unload here? 13 * possibly do the LDT unload here?
diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
index fb558fb1d211..19a89377b123 100644
--- a/include/asm-x86_64/mmzone.h
+++ b/include/asm-x86_64/mmzone.h
@@ -49,7 +49,7 @@ extern int pfn_valid(unsigned long pfn);
49 49
50#ifdef CONFIG_NUMA_EMU 50#ifdef CONFIG_NUMA_EMU
51#define FAKE_NODE_MIN_SIZE (64*1024*1024) 51#define FAKE_NODE_MIN_SIZE (64*1024*1024)
52#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1ul)) 52#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1uL))
53#endif 53#endif
54 54
55#endif 55#endif
diff --git a/include/asm-x86_64/msr-index.h b/include/asm-x86_64/msr-index.h
new file mode 100644
index 000000000000..d77a63f1ddf2
--- /dev/null
+++ b/include/asm-x86_64/msr-index.h
@@ -0,0 +1 @@
#include <asm-i386/msr-index.h>
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 902f9a58617e..a524f0325673 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -1,6 +1,8 @@
1#ifndef X86_64_MSR_H 1#ifndef X86_64_MSR_H
2#define X86_64_MSR_H 1 2#define X86_64_MSR_H 1
3 3
4#include <asm/msr-index.h>
5
4#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
5/* 7/*
6 * Access to machine-specific registers (available on 586 and better only) 8 * Access to machine-specific registers (available on 586 and better only)
@@ -157,9 +159,6 @@ static inline unsigned int cpuid_edx(unsigned int op)
157 return edx; 159 return edx;
158} 160}
159 161
160#define MSR_IA32_UCODE_WRITE 0x79
161#define MSR_IA32_UCODE_REV 0x8b
162
163#ifdef CONFIG_SMP 162#ifdef CONFIG_SMP
164void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); 163void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
165void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); 164void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -172,269 +171,6 @@ static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
172{ 171{
173 wrmsr(msr_no, l, h); 172 wrmsr(msr_no, l, h);
174} 173}
175#endif /* CONFIG_SMP */ 174#endif /* CONFIG_SMP */
176 175#endif /* __ASSEMBLY__ */
177#endif 176#endif /* X86_64_MSR_H */
178
179/* AMD/K8 specific MSRs */
180#define MSR_EFER 0xc0000080 /* extended feature register */
181#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
182#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
183#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */
184#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
185#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
186#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
187#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */
188/* EFER bits: */
189#define _EFER_SCE 0 /* SYSCALL/SYSRET */
190#define _EFER_LME 8 /* Long mode enable */
191#define _EFER_LMA 10 /* Long mode active (read-only) */
192#define _EFER_NX 11 /* No execute enable */
193
194#define EFER_SCE (1<<_EFER_SCE)
195#define EFER_LME (1<<_EFER_LME)
196#define EFER_LMA (1<<_EFER_LMA)
197#define EFER_NX (1<<_EFER_NX)
198
199/* Intel MSRs. Some also available on other CPUs */
200#define MSR_IA32_TSC 0x10
201#define MSR_IA32_PLATFORM_ID 0x17
202
203#define MSR_IA32_PERFCTR0 0xc1
204#define MSR_IA32_PERFCTR1 0xc2
205#define MSR_FSB_FREQ 0xcd
206
207#define MSR_MTRRcap 0x0fe
208#define MSR_IA32_BBL_CR_CTL 0x119
209
210#define MSR_IA32_SYSENTER_CS 0x174
211#define MSR_IA32_SYSENTER_ESP 0x175
212#define MSR_IA32_SYSENTER_EIP 0x176
213
214#define MSR_IA32_MCG_CAP 0x179
215#define MSR_IA32_MCG_STATUS 0x17a
216#define MSR_IA32_MCG_CTL 0x17b
217
218#define MSR_IA32_EVNTSEL0 0x186
219#define MSR_IA32_EVNTSEL1 0x187
220
221#define MSR_IA32_DEBUGCTLMSR 0x1d9
222#define MSR_IA32_LASTBRANCHFROMIP 0x1db
223#define MSR_IA32_LASTBRANCHTOIP 0x1dc
224#define MSR_IA32_LASTINTFROMIP 0x1dd
225#define MSR_IA32_LASTINTTOIP 0x1de
226
227#define MSR_IA32_PEBS_ENABLE 0x3f1
228#define MSR_IA32_DS_AREA 0x600
229#define MSR_IA32_PERF_CAPABILITIES 0x345
230
231#define MSR_MTRRfix64K_00000 0x250
232#define MSR_MTRRfix16K_80000 0x258
233#define MSR_MTRRfix16K_A0000 0x259
234#define MSR_MTRRfix4K_C0000 0x268
235#define MSR_MTRRfix4K_C8000 0x269
236#define MSR_MTRRfix4K_D0000 0x26a
237#define MSR_MTRRfix4K_D8000 0x26b
238#define MSR_MTRRfix4K_E0000 0x26c
239#define MSR_MTRRfix4K_E8000 0x26d
240#define MSR_MTRRfix4K_F0000 0x26e
241#define MSR_MTRRfix4K_F8000 0x26f
242#define MSR_MTRRdefType 0x2ff
243
244#define MSR_IA32_MC0_CTL 0x400
245#define MSR_IA32_MC0_STATUS 0x401
246#define MSR_IA32_MC0_ADDR 0x402
247#define MSR_IA32_MC0_MISC 0x403
248
249#define MSR_P6_PERFCTR0 0xc1
250#define MSR_P6_PERFCTR1 0xc2
251#define MSR_P6_EVNTSEL0 0x186
252#define MSR_P6_EVNTSEL1 0x187
253
254/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
255#define MSR_K7_EVNTSEL0 0xC0010000
256#define MSR_K7_PERFCTR0 0xC0010004
257#define MSR_K7_EVNTSEL1 0xC0010001
258#define MSR_K7_PERFCTR1 0xC0010005
259#define MSR_K7_EVNTSEL2 0xC0010002
260#define MSR_K7_PERFCTR2 0xC0010006
261#define MSR_K7_EVNTSEL3 0xC0010003
262#define MSR_K7_PERFCTR3 0xC0010007
263#define MSR_K8_TOP_MEM1 0xC001001A
264#define MSR_K8_TOP_MEM2 0xC001001D
265#define MSR_K8_SYSCFG 0xC0010010
266#define MSR_K8_HWCR 0xC0010015
267
268/* K6 MSRs */
269#define MSR_K6_EFER 0xC0000080
270#define MSR_K6_STAR 0xC0000081
271#define MSR_K6_WHCR 0xC0000082
272#define MSR_K6_UWCCR 0xC0000085
273#define MSR_K6_PSOR 0xC0000087
274#define MSR_K6_PFIR 0xC0000088
275
276/* Centaur-Hauls/IDT defined MSRs. */
277#define MSR_IDT_FCR1 0x107
278#define MSR_IDT_FCR2 0x108
279#define MSR_IDT_FCR3 0x109
280#define MSR_IDT_FCR4 0x10a
281
282#define MSR_IDT_MCR0 0x110
283#define MSR_IDT_MCR1 0x111
284#define MSR_IDT_MCR2 0x112
285#define MSR_IDT_MCR3 0x113
286#define MSR_IDT_MCR4 0x114
287#define MSR_IDT_MCR5 0x115
288#define MSR_IDT_MCR6 0x116
289#define MSR_IDT_MCR7 0x117
290#define MSR_IDT_MCR_CTRL 0x120
291
292/* VIA Cyrix defined MSRs*/
293#define MSR_VIA_FCR 0x1107
294#define MSR_VIA_LONGHAUL 0x110a
295#define MSR_VIA_RNG 0x110b
296#define MSR_VIA_BCR2 0x1147
297
298/* Intel defined MSRs. */
299#define MSR_IA32_P5_MC_ADDR 0
300#define MSR_IA32_P5_MC_TYPE 1
301#define MSR_IA32_PLATFORM_ID 0x17
302#define MSR_IA32_EBL_CR_POWERON 0x2a
303
304#define MSR_IA32_APICBASE 0x1b
305#define MSR_IA32_APICBASE_BSP (1<<8)
306#define MSR_IA32_APICBASE_ENABLE (1<<11)
307#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
308
309/* P4/Xeon+ specific */
310#define MSR_IA32_MCG_EAX 0x180
311#define MSR_IA32_MCG_EBX 0x181
312#define MSR_IA32_MCG_ECX 0x182
313#define MSR_IA32_MCG_EDX 0x183
314#define MSR_IA32_MCG_ESI 0x184
315#define MSR_IA32_MCG_EDI 0x185
316#define MSR_IA32_MCG_EBP 0x186
317#define MSR_IA32_MCG_ESP 0x187
318#define MSR_IA32_MCG_EFLAGS 0x188
319#define MSR_IA32_MCG_EIP 0x189
320#define MSR_IA32_MCG_RESERVED 0x18A
321
322#define MSR_P6_EVNTSEL0 0x186
323#define MSR_P6_EVNTSEL1 0x187
324
325#define MSR_IA32_PERF_STATUS 0x198
326#define MSR_IA32_PERF_CTL 0x199
327
328#define MSR_IA32_MPERF 0xE7
329#define MSR_IA32_APERF 0xE8
330
331#define MSR_IA32_THERM_CONTROL 0x19a
332#define MSR_IA32_THERM_INTERRUPT 0x19b
333#define MSR_IA32_THERM_STATUS 0x19c
334#define MSR_IA32_MISC_ENABLE 0x1a0
335
336#define MSR_IA32_DEBUGCTLMSR 0x1d9
337#define MSR_IA32_LASTBRANCHFROMIP 0x1db
338#define MSR_IA32_LASTBRANCHTOIP 0x1dc
339#define MSR_IA32_LASTINTFROMIP 0x1dd
340#define MSR_IA32_LASTINTTOIP 0x1de
341
342#define MSR_IA32_MC0_CTL 0x400
343#define MSR_IA32_MC0_STATUS 0x401
344#define MSR_IA32_MC0_ADDR 0x402
345#define MSR_IA32_MC0_MISC 0x403
346
347/* Pentium IV performance counter MSRs */
348#define MSR_P4_BPU_PERFCTR0 0x300
349#define MSR_P4_BPU_PERFCTR1 0x301
350#define MSR_P4_BPU_PERFCTR2 0x302
351#define MSR_P4_BPU_PERFCTR3 0x303
352#define MSR_P4_MS_PERFCTR0 0x304
353#define MSR_P4_MS_PERFCTR1 0x305
354#define MSR_P4_MS_PERFCTR2 0x306
355#define MSR_P4_MS_PERFCTR3 0x307
356#define MSR_P4_FLAME_PERFCTR0 0x308
357#define MSR_P4_FLAME_PERFCTR1 0x309
358#define MSR_P4_FLAME_PERFCTR2 0x30a
359#define MSR_P4_FLAME_PERFCTR3 0x30b
360#define MSR_P4_IQ_PERFCTR0 0x30c
361#define MSR_P4_IQ_PERFCTR1 0x30d
362#define MSR_P4_IQ_PERFCTR2 0x30e
363#define MSR_P4_IQ_PERFCTR3 0x30f
364#define MSR_P4_IQ_PERFCTR4 0x310
365#define MSR_P4_IQ_PERFCTR5 0x311
366#define MSR_P4_BPU_CCCR0 0x360
367#define MSR_P4_BPU_CCCR1 0x361
368#define MSR_P4_BPU_CCCR2 0x362
369#define MSR_P4_BPU_CCCR3 0x363
370#define MSR_P4_MS_CCCR0 0x364
371#define MSR_P4_MS_CCCR1 0x365
372#define MSR_P4_MS_CCCR2 0x366
373#define MSR_P4_MS_CCCR3 0x367
374#define MSR_P4_FLAME_CCCR0 0x368
375#define MSR_P4_FLAME_CCCR1 0x369
376#define MSR_P4_FLAME_CCCR2 0x36a
377#define MSR_P4_FLAME_CCCR3 0x36b
378#define MSR_P4_IQ_CCCR0 0x36c
379#define MSR_P4_IQ_CCCR1 0x36d
380#define MSR_P4_IQ_CCCR2 0x36e
381#define MSR_P4_IQ_CCCR3 0x36f
382#define MSR_P4_IQ_CCCR4 0x370
383#define MSR_P4_IQ_CCCR5 0x371
384#define MSR_P4_ALF_ESCR0 0x3ca
385#define MSR_P4_ALF_ESCR1 0x3cb
386#define MSR_P4_BPU_ESCR0 0x3b2
387#define MSR_P4_BPU_ESCR1 0x3b3
388#define MSR_P4_BSU_ESCR0 0x3a0
389#define MSR_P4_BSU_ESCR1 0x3a1
390#define MSR_P4_CRU_ESCR0 0x3b8
391#define MSR_P4_CRU_ESCR1 0x3b9
392#define MSR_P4_CRU_ESCR2 0x3cc
393#define MSR_P4_CRU_ESCR3 0x3cd
394#define MSR_P4_CRU_ESCR4 0x3e0
395#define MSR_P4_CRU_ESCR5 0x3e1
396#define MSR_P4_DAC_ESCR0 0x3a8
397#define MSR_P4_DAC_ESCR1 0x3a9
398#define MSR_P4_FIRM_ESCR0 0x3a4
399#define MSR_P4_FIRM_ESCR1 0x3a5
400#define MSR_P4_FLAME_ESCR0 0x3a6
401#define MSR_P4_FLAME_ESCR1 0x3a7
402#define MSR_P4_FSB_ESCR0 0x3a2
403#define MSR_P4_FSB_ESCR1 0x3a3
404#define MSR_P4_IQ_ESCR0 0x3ba
405#define MSR_P4_IQ_ESCR1 0x3bb
406#define MSR_P4_IS_ESCR0 0x3b4
407#define MSR_P4_IS_ESCR1 0x3b5
408#define MSR_P4_ITLB_ESCR0 0x3b6
409#define MSR_P4_ITLB_ESCR1 0x3b7
410#define MSR_P4_IX_ESCR0 0x3c8
411#define MSR_P4_IX_ESCR1 0x3c9
412#define MSR_P4_MOB_ESCR0 0x3aa
413#define MSR_P4_MOB_ESCR1 0x3ab
414#define MSR_P4_MS_ESCR0 0x3c0
415#define MSR_P4_MS_ESCR1 0x3c1
416#define MSR_P4_PMH_ESCR0 0x3ac
417#define MSR_P4_PMH_ESCR1 0x3ad
418#define MSR_P4_RAT_ESCR0 0x3bc
419#define MSR_P4_RAT_ESCR1 0x3bd
420#define MSR_P4_SAAT_ESCR0 0x3ae
421#define MSR_P4_SAAT_ESCR1 0x3af
422#define MSR_P4_SSU_ESCR0 0x3be
423#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */
424#define MSR_P4_TBPU_ESCR0 0x3c2
425#define MSR_P4_TBPU_ESCR1 0x3c3
426#define MSR_P4_TC_ESCR0 0x3c4
427#define MSR_P4_TC_ESCR1 0x3c5
428#define MSR_P4_U2L_ESCR0 0x3b0
429#define MSR_P4_U2L_ESCR1 0x3b1
430
431/* Intel Core-based CPU performance counters */
432#define MSR_CORE_PERF_FIXED_CTR0 0x309
433#define MSR_CORE_PERF_FIXED_CTR1 0x30a
434#define MSR_CORE_PERF_FIXED_CTR2 0x30b
435#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d
436#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e
437#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f
438#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390
439
440#endif
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
index d6135b2549bf..b557c486bef8 100644
--- a/include/asm-x86_64/mtrr.h
+++ b/include/asm-x86_64/mtrr.h
@@ -135,6 +135,18 @@ struct mtrr_gentry32
135 135
136#endif /* CONFIG_COMPAT */ 136#endif /* CONFIG_COMPAT */
137 137
138#ifdef CONFIG_MTRR
139extern void mtrr_ap_init(void);
140extern void mtrr_bp_init(void);
141extern void mtrr_save_fixed_ranges(void *);
142extern void mtrr_save_state(void);
143#else
144#define mtrr_ap_init() do {} while (0)
145#define mtrr_bp_init() do {} while (0)
146#define mtrr_save_fixed_ranges(arg) do {} while (0)
147#define mtrr_save_state() do {} while (0)
148#endif
149
138#endif /* __KERNEL__ */ 150#endif /* __KERNEL__ */
139 151
140#endif /* _LINUX_MTRR_H */ 152#endif /* _LINUX_MTRR_H */
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h
index 72375e7d32a8..d0a7f53b1497 100644
--- a/include/asm-x86_64/nmi.h
+++ b/include/asm-x86_64/nmi.h
@@ -80,4 +80,13 @@ extern int unknown_nmi_panic;
80void __trigger_all_cpu_backtrace(void); 80void __trigger_all_cpu_backtrace(void);
81#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() 81#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
82 82
83
84void lapic_watchdog_stop(void);
85int lapic_watchdog_init(unsigned nmi_hz);
86int lapic_wd_event(unsigned nmi_hz);
87unsigned lapic_adjust_nmi_hz(unsigned hz);
88int lapic_watchdog_ok(void);
89void disable_lapic_nmi_watchdog(void);
90void enable_lapic_nmi_watchdog(void);
91
83#endif /* ASM_NMI_H */ 92#endif /* ASM_NMI_H */
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index 10f346165cab..b17fc16ec2eb 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -1,14 +1,11 @@
1#ifndef _X86_64_PAGE_H 1#ifndef _X86_64_PAGE_H
2#define _X86_64_PAGE_H 2#define _X86_64_PAGE_H
3 3
4#include <asm/const.h>
4 5
5/* PAGE_SHIFT determines the page size */ 6/* PAGE_SHIFT determines the page size */
6#define PAGE_SHIFT 12 7#define PAGE_SHIFT 12
7#ifdef __ASSEMBLY__ 8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
8#define PAGE_SIZE (0x1 << PAGE_SHIFT)
9#else
10#define PAGE_SIZE (1UL << PAGE_SHIFT)
11#endif
12#define PAGE_MASK (~(PAGE_SIZE-1)) 9#define PAGE_MASK (~(PAGE_SIZE-1))
13#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) 10#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
14 11
@@ -33,10 +30,10 @@
33#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ 30#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
34 31
35#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) 32#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
36#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) 33#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
37 34
38#define HPAGE_SHIFT PMD_SHIFT 35#define HPAGE_SHIFT PMD_SHIFT
39#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 36#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
40#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 37#define HPAGE_MASK (~(HPAGE_SIZE - 1))
41#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 38#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
42 39
@@ -64,6 +61,8 @@ typedef struct { unsigned long pgd; } pgd_t;
64 61
65typedef struct { unsigned long pgprot; } pgprot_t; 62typedef struct { unsigned long pgprot; } pgprot_t;
66 63
64extern unsigned long phys_base;
65
67#define pte_val(x) ((x).pte) 66#define pte_val(x) ((x).pte)
68#define pmd_val(x) ((x).pmd) 67#define pmd_val(x) ((x).pmd)
69#define pud_val(x) ((x).pud) 68#define pud_val(x) ((x).pud)
@@ -76,29 +75,25 @@ typedef struct { unsigned long pgprot; } pgprot_t;
76#define __pgd(x) ((pgd_t) { (x) } ) 75#define __pgd(x) ((pgd_t) { (x) } )
77#define __pgprot(x) ((pgprot_t) { (x) } ) 76#define __pgprot(x) ((pgprot_t) { (x) } )
78 77
79#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) 78#endif /* !__ASSEMBLY__ */
80#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
81#define __START_KERNEL_map 0xffffffff80000000UL
82#define __PAGE_OFFSET 0xffff810000000000UL
83 79
84#else
85#define __PHYSICAL_START CONFIG_PHYSICAL_START 80#define __PHYSICAL_START CONFIG_PHYSICAL_START
81#define __KERNEL_ALIGN 0x200000
86#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) 82#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
87#define __START_KERNEL_map 0xffffffff80000000 83#define __START_KERNEL_map 0xffffffff80000000
88#define __PAGE_OFFSET 0xffff810000000000 84#define __PAGE_OFFSET 0xffff810000000000
89#endif /* !__ASSEMBLY__ */
90 85
91/* to align the pointer to the (next) page boundary */ 86/* to align the pointer to the (next) page boundary */
92#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) 87#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
93 88
94/* See Documentation/x86_64/mm.txt for a description of the memory map. */ 89/* See Documentation/x86_64/mm.txt for a description of the memory map. */
95#define __PHYSICAL_MASK_SHIFT 46 90#define __PHYSICAL_MASK_SHIFT 46
96#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) 91#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
97#define __VIRTUAL_MASK_SHIFT 48 92#define __VIRTUAL_MASK_SHIFT 48
98#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) 93#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
99 94
100#define KERNEL_TEXT_SIZE (40UL*1024*1024) 95#define KERNEL_TEXT_SIZE (40*1024*1024)
101#define KERNEL_TEXT_START 0xffffffff80000000UL 96#define KERNEL_TEXT_START 0xffffffff80000000
102 97
103#ifndef __ASSEMBLY__ 98#ifndef __ASSEMBLY__
104 99
@@ -106,21 +101,19 @@ typedef struct { unsigned long pgprot; } pgprot_t;
106 101
107#endif /* __ASSEMBLY__ */ 102#endif /* __ASSEMBLY__ */
108 103
109#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) 104#define PAGE_OFFSET __PAGE_OFFSET
110 105
111/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. 106/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
112 Otherwise you risk miscompilation. */ 107 Otherwise you risk miscompilation. */
113#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) 108#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
114/* __pa_symbol should be used for C visible symbols. 109/* __pa_symbol should be used for C visible symbols.
115 This seems to be the official gcc blessed way to do such arithmetic. */ 110 This seems to be the official gcc blessed way to do such arithmetic. */
116#define __pa_symbol(x) \ 111#define __pa_symbol(x) \
117 ({unsigned long v; \ 112 ({unsigned long v; \
118 asm("" : "=r" (v) : "0" (x)); \ 113 asm("" : "=r" (v) : "0" (x)); \
119 __pa(v); }) 114 ((v - __START_KERNEL_map) + phys_base); })
120 115
121#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 116#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
122#define __boot_va(x) __va(x)
123#define __boot_pa(x) __pa(x)
124#ifdef CONFIG_FLATMEM 117#ifdef CONFIG_FLATMEM
125#define pfn_valid(pfn) ((pfn) < end_pfn) 118#define pfn_valid(pfn) ((pfn) < end_pfn)
126#endif 119#endif
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 5ed0ef340842..c6fbb67eac90 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -11,16 +11,6 @@
11 11
12#include <asm/pda.h> 12#include <asm/pda.h>
13 13
14#ifdef CONFIG_MODULES
15# define PERCPU_MODULE_RESERVE 8192
16#else
17# define PERCPU_MODULE_RESERVE 0
18#endif
19
20#define PERCPU_ENOUGH_ROOM \
21 (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
22 PERCPU_MODULE_RESERVE)
23
24#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) 14#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
25#define __my_cpu_offset() read_pda(data_offset) 15#define __my_cpu_offset() read_pda(data_offset)
26 16
diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h
index 4e28b6060a5e..8bb564687860 100644
--- a/include/asm-x86_64/pgalloc.h
+++ b/include/asm-x86_64/pgalloc.h
@@ -1,7 +1,6 @@
1#ifndef _X86_64_PGALLOC_H 1#ifndef _X86_64_PGALLOC_H
2#define _X86_64_PGALLOC_H 2#define _X86_64_PGALLOC_H
3 3
4#include <asm/fixmap.h>
5#include <asm/pda.h> 4#include <asm/pda.h>
6#include <linux/threads.h> 5#include <linux/threads.h>
7#include <linux/mm.h> 6#include <linux/mm.h>
@@ -45,24 +44,16 @@ static inline void pgd_list_add(pgd_t *pgd)
45 struct page *page = virt_to_page(pgd); 44 struct page *page = virt_to_page(pgd);
46 45
47 spin_lock(&pgd_lock); 46 spin_lock(&pgd_lock);
48 page->index = (pgoff_t)pgd_list; 47 list_add(&page->lru, &pgd_list);
49 if (pgd_list)
50 pgd_list->private = (unsigned long)&page->index;
51 pgd_list = page;
52 page->private = (unsigned long)&pgd_list;
53 spin_unlock(&pgd_lock); 48 spin_unlock(&pgd_lock);
54} 49}
55 50
56static inline void pgd_list_del(pgd_t *pgd) 51static inline void pgd_list_del(pgd_t *pgd)
57{ 52{
58 struct page *next, **pprev, *page = virt_to_page(pgd); 53 struct page *page = virt_to_page(pgd);
59 54
60 spin_lock(&pgd_lock); 55 spin_lock(&pgd_lock);
61 next = (struct page *)page->index; 56 list_del(&page->lru);
62 pprev = (struct page **)page->private;
63 *pprev = next;
64 if (next)
65 next->private = (unsigned long)pprev;
66 spin_unlock(&pgd_lock); 57 spin_unlock(&pgd_lock);
67} 58}
68 59
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 730bd6028416..599993f6ba84 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -1,25 +1,25 @@
1#ifndef _X86_64_PGTABLE_H 1#ifndef _X86_64_PGTABLE_H
2#define _X86_64_PGTABLE_H 2#define _X86_64_PGTABLE_H
3 3
4#include <asm/const.h>
5#ifndef __ASSEMBLY__
6
4/* 7/*
5 * This file contains the functions and defines necessary to modify and use 8 * This file contains the functions and defines necessary to modify and use
6 * the x86-64 page table tree. 9 * the x86-64 page table tree.
7 */ 10 */
8#include <asm/processor.h> 11#include <asm/processor.h>
9#include <asm/fixmap.h>
10#include <asm/bitops.h> 12#include <asm/bitops.h>
11#include <linux/threads.h> 13#include <linux/threads.h>
12#include <asm/pda.h> 14#include <asm/pda.h>
13 15
14extern pud_t level3_kernel_pgt[512]; 16extern pud_t level3_kernel_pgt[512];
15extern pud_t level3_physmem_pgt[512];
16extern pud_t level3_ident_pgt[512]; 17extern pud_t level3_ident_pgt[512];
17extern pmd_t level2_kernel_pgt[512]; 18extern pmd_t level2_kernel_pgt[512];
18extern pgd_t init_level4_pgt[]; 19extern pgd_t init_level4_pgt[];
19extern pgd_t boot_level4_pgt[];
20extern unsigned long __supported_pte_mask; 20extern unsigned long __supported_pte_mask;
21 21
22#define swapper_pg_dir init_level4_pgt 22#define swapper_pg_dir ((pgd_t *)NULL)
23 23
24extern void paging_init(void); 24extern void paging_init(void);
25extern void clear_kernel_mapping(unsigned long addr, unsigned long size); 25extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
@@ -29,7 +29,9 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
29 * for zero-mapped memory areas etc.. 29 * for zero-mapped memory areas etc..
30 */ 30 */
31extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; 31extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
32#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) 32#define ZERO_PAGE(vaddr) (pfn_to_page(__pa_symbol(&empty_zero_page) >> PAGE_SHIFT))
33
34#endif /* !__ASSEMBLY__ */
33 35
34/* 36/*
35 * PGDIR_SHIFT determines what a top-level page table entry can map 37 * PGDIR_SHIFT determines what a top-level page table entry can map
@@ -55,6 +57,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
55 */ 57 */
56#define PTRS_PER_PTE 512 58#define PTRS_PER_PTE 512
57 59
60#ifndef __ASSEMBLY__
61
58#define pte_ERROR(e) \ 62#define pte_ERROR(e) \
59 printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e)) 63 printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e))
60#define pmd_ERROR(e) \ 64#define pmd_ERROR(e) \
@@ -118,22 +122,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
118 122
119#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) 123#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
120 124
121#define PMD_SIZE (1UL << PMD_SHIFT) 125#endif /* !__ASSEMBLY__ */
126
127#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
122#define PMD_MASK (~(PMD_SIZE-1)) 128#define PMD_MASK (~(PMD_SIZE-1))
123#define PUD_SIZE (1UL << PUD_SHIFT) 129#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
124#define PUD_MASK (~(PUD_SIZE-1)) 130#define PUD_MASK (~(PUD_SIZE-1))
125#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 131#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
126#define PGDIR_MASK (~(PGDIR_SIZE-1)) 132#define PGDIR_MASK (~(PGDIR_SIZE-1))
127 133
128#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) 134#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
129#define FIRST_USER_ADDRESS 0 135#define FIRST_USER_ADDRESS 0
130 136
131#ifndef __ASSEMBLY__ 137#define MAXMEM 0x3fffffffffff
132#define MAXMEM 0x3fffffffffffUL 138#define VMALLOC_START 0xffffc20000000000
133#define VMALLOC_START 0xffffc20000000000UL 139#define VMALLOC_END 0xffffe1ffffffffff
134#define VMALLOC_END 0xffffe1ffffffffffUL 140#define MODULES_VADDR 0xffffffff88000000
135#define MODULES_VADDR 0xffffffff88000000UL 141#define MODULES_END 0xfffffffffff00000
136#define MODULES_END 0xfffffffffff00000UL
137#define MODULES_LEN (MODULES_END - MODULES_VADDR) 142#define MODULES_LEN (MODULES_END - MODULES_VADDR)
138 143
139#define _PAGE_BIT_PRESENT 0 144#define _PAGE_BIT_PRESENT 0
@@ -159,7 +164,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
159#define _PAGE_GLOBAL 0x100 /* Global TLB entry */ 164#define _PAGE_GLOBAL 0x100 /* Global TLB entry */
160 165
161#define _PAGE_PROTNONE 0x080 /* If not present */ 166#define _PAGE_PROTNONE 0x080 /* If not present */
162#define _PAGE_NX (1UL<<_PAGE_BIT_NX) 167#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
163 168
164#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) 169#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
165#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 170#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -221,6 +226,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
221#define __S110 PAGE_SHARED_EXEC 226#define __S110 PAGE_SHARED_EXEC
222#define __S111 PAGE_SHARED_EXEC 227#define __S111 PAGE_SHARED_EXEC
223 228
229#ifndef __ASSEMBLY__
230
224static inline unsigned long pgd_bad(pgd_t pgd) 231static inline unsigned long pgd_bad(pgd_t pgd)
225{ 232{
226 return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); 233 return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
@@ -403,11 +410,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
403#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 410#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
404 411
405extern spinlock_t pgd_lock; 412extern spinlock_t pgd_lock;
406extern struct page *pgd_list; 413extern struct list_head pgd_list;
407void vmalloc_sync_all(void); 414void vmalloc_sync_all(void);
408 415
409#endif /* !__ASSEMBLY__ */
410
411extern int kern_addr_valid(unsigned long addr); 416extern int kern_addr_valid(unsigned long addr);
412 417
413#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ 418#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
@@ -437,5 +442,6 @@ extern int kern_addr_valid(unsigned long addr);
437#define __HAVE_ARCH_PTEP_SET_WRPROTECT 442#define __HAVE_ARCH_PTEP_SET_WRPROTECT
438#define __HAVE_ARCH_PTE_SAME 443#define __HAVE_ARCH_PTE_SAME
439#include <asm-generic/pgtable.h> 444#include <asm-generic/pgtable.h>
445#endif /* !__ASSEMBLY__ */
440 446
441#endif /* _X86_64_PGTABLE_H */ 447#endif /* _X86_64_PGTABLE_H */
diff --git a/include/asm-x86_64/processor-flags.h b/include/asm-x86_64/processor-flags.h
new file mode 100644
index 000000000000..ec99a57b2c6a
--- /dev/null
+++ b/include/asm-x86_64/processor-flags.h
@@ -0,0 +1 @@
#include <asm-i386/processor-flags.h>
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 76552d72804c..461ffe4c1fcc 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -20,6 +20,7 @@
20#include <asm/percpu.h> 20#include <asm/percpu.h>
21#include <linux/personality.h> 21#include <linux/personality.h>
22#include <linux/cpumask.h> 22#include <linux/cpumask.h>
23#include <asm/processor-flags.h>
23 24
24#define TF_MASK 0x00000100 25#define TF_MASK 0x00000100
25#define IF_MASK 0x00000200 26#define IF_MASK 0x00000200
@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
103extern unsigned short num_cache_leaves; 104extern unsigned short num_cache_leaves;
104 105
105/* 106/*
106 * EFLAGS bits
107 */
108#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
109#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
110#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
111#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
112#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
113#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
114#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
115#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
116#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
117#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
118#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
119#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
120#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
121#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
122#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
123#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
124#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
125
126/*
127 * Intel CPU features in CR4
128 */
129#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
130#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
131#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
132#define X86_CR4_DE 0x0008 /* enable debugging extensions */
133#define X86_CR4_PSE 0x0010 /* enable page size extensions */
134#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
135#define X86_CR4_MCE 0x0040 /* Machine check enable */
136#define X86_CR4_PGE 0x0080 /* enable global pages */
137#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
138#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
139#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
140
141/*
142 * Save the cr4 feature set we're using (ie 107 * Save the cr4 feature set we're using (ie
143 * Pentium 4MB enable and PPro Global page 108 * Pentium 4MB enable and PPro Global page
144 * enable), so that any CPU's that boot up 109 * enable), so that any CPU's that boot up
@@ -201,7 +166,7 @@ struct i387_fxsave_struct {
201 u32 mxcsr; 166 u32 mxcsr;
202 u32 mxcsr_mask; 167 u32 mxcsr_mask;
203 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ 168 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
204 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ 169 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
205 u32 padding[24]; 170 u32 padding[24];
206} __attribute__ ((aligned (16))); 171} __attribute__ ((aligned (16)));
207 172
@@ -427,22 +392,6 @@ static inline void prefetchw(void *x)
427#define cpu_relax() rep_nop() 392#define cpu_relax() rep_nop()
428 393
429/* 394/*
430 * NSC/Cyrix CPU configuration register indexes
431 */
432#define CX86_CCR0 0xc0
433#define CX86_CCR1 0xc1
434#define CX86_CCR2 0xc2
435#define CX86_CCR3 0xc3
436#define CX86_CCR4 0xe8
437#define CX86_CCR5 0xe9
438#define CX86_CCR6 0xea
439#define CX86_CCR7 0xeb
440#define CX86_DIR0 0xfe
441#define CX86_DIR1 0xff
442#define CX86_ARR_BASE 0xc4
443#define CX86_RCR_BASE 0xdc
444
445/*
446 * NSC/Cyrix CPU indexed register access macros 395 * NSC/Cyrix CPU indexed register access macros
447 */ 396 */
448 397
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index b6e65a699f2a..85255db1e82d 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -11,18 +11,9 @@ struct pt_regs;
11extern void start_kernel(void); 11extern void start_kernel(void);
12extern void pda_init(int); 12extern void pda_init(int);
13 13
14extern void zap_low_mappings(int cpu);
15
16extern void early_idt_handler(void); 14extern void early_idt_handler(void);
17 15
18extern void mcheck_init(struct cpuinfo_x86 *c); 16extern void mcheck_init(struct cpuinfo_x86 *c);
19#ifdef CONFIG_MTRR
20extern void mtrr_ap_init(void);
21extern void mtrr_bp_init(void);
22#else
23#define mtrr_ap_init() do {} while (0)
24#define mtrr_bp_init() do {} while (0)
25#endif
26extern void init_memory_mapping(unsigned long start, unsigned long end); 17extern void init_memory_mapping(unsigned long start, unsigned long end);
27 18
28extern void system_call(void); 19extern void system_call(void);
@@ -82,7 +73,6 @@ extern void syscall32_cpu_init(void);
82extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); 73extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
83 74
84extern void early_quirks(void); 75extern void early_quirks(void);
85extern void quirk_intel_irqbalance(void);
86extern void check_efer(void); 76extern void check_efer(void);
87 77
88extern int unhandled_signal(struct task_struct *tsk, int sig); 78extern int unhandled_signal(struct task_struct *tsk, int sig);
@@ -93,6 +83,7 @@ extern unsigned long table_start, table_end;
93 83
94extern int exception_trace; 84extern int exception_trace;
95extern unsigned cpu_khz; 85extern unsigned cpu_khz;
86extern unsigned tsc_khz;
96 87
97extern void no_iommu_init(void); 88extern void no_iommu_init(void);
98extern int force_iommu, no_iommu; 89extern int force_iommu, no_iommu;
@@ -121,8 +112,12 @@ extern int gsi_irq_sharing(int gsi);
121 112
122extern void smp_local_timer_interrupt(void); 113extern void smp_local_timer_interrupt(void);
123 114
115extern int force_mwait;
116
124long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); 117long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
125 118
119void i8254_timer_resume(void);
120
126#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) 121#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
127#define round_down(x,y) ((x) & ~((y)-1)) 122#define round_down(x,y) ((x) & ~((y)-1))
128 123
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h
index 334ddcdd8f92..adf2bf1e187c 100644
--- a/include/asm-x86_64/segment.h
+++ b/include/asm-x86_64/segment.h
@@ -6,7 +6,7 @@
6#define __KERNEL_CS 0x10 6#define __KERNEL_CS 0x10
7#define __KERNEL_DS 0x18 7#define __KERNEL_DS 0x18
8 8
9#define __KERNEL32_CS 0x38 9#define __KERNEL32_CS 0x08
10 10
11/* 11/*
12 * we cannot use the same code segment descriptor for user and kernel 12 * we cannot use the same code segment descriptor for user and kernel
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index de592a408c07..d5704421456b 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -10,10 +10,9 @@
10#include <linux/init.h> 10#include <linux/init.h>
11extern int disable_apic; 11extern int disable_apic;
12 12
13#include <asm/fixmap.h>
14#include <asm/mpspec.h> 13#include <asm/mpspec.h>
15#include <asm/io_apic.h>
16#include <asm/apic.h> 14#include <asm/apic.h>
15#include <asm/io_apic.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
18 17
19#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
@@ -38,7 +37,6 @@ extern void lock_ipi_call_lock(void);
38extern void unlock_ipi_call_lock(void); 37extern void unlock_ipi_call_lock(void);
39extern int smp_num_siblings; 38extern int smp_num_siblings;
40extern void smp_send_reschedule(int cpu); 39extern void smp_send_reschedule(int cpu);
41void smp_stop_cpu(void);
42 40
43extern cpumask_t cpu_sibling_map[NR_CPUS]; 41extern cpumask_t cpu_sibling_map[NR_CPUS];
44extern cpumask_t cpu_core_map[NR_CPUS]; 42extern cpumask_t cpu_core_map[NR_CPUS];
diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h
index bc7f81715e5e..9c3f8de90d2d 100644
--- a/include/asm-x86_64/suspend.h
+++ b/include/asm-x86_64/suspend.h
@@ -17,6 +17,7 @@ struct saved_context {
17 u16 ds, es, fs, gs, ss; 17 u16 ds, es, fs, gs, ss;
18 unsigned long gs_base, gs_kernel_base, fs_base; 18 unsigned long gs_base, gs_kernel_base, fs_base;
19 unsigned long cr0, cr2, cr3, cr4, cr8; 19 unsigned long cr0, cr2, cr3, cr4, cr8;
20 unsigned long efer;
20 u16 gdt_pad; 21 u16 gdt_pad;
21 u16 gdt_limit; 22 u16 gdt_limit;
22 unsigned long gdt_base; 23 unsigned long gdt_base;
@@ -44,12 +45,12 @@ extern unsigned long saved_context_eflags;
44extern void fix_processor_context(void); 45extern void fix_processor_context(void);
45 46
46#ifdef CONFIG_ACPI_SLEEP 47#ifdef CONFIG_ACPI_SLEEP
47extern unsigned long saved_eip; 48extern unsigned long saved_rip;
48extern unsigned long saved_esp; 49extern unsigned long saved_rsp;
49extern unsigned long saved_ebp; 50extern unsigned long saved_rbp;
50extern unsigned long saved_ebx; 51extern unsigned long saved_rbx;
51extern unsigned long saved_esi; 52extern unsigned long saved_rsi;
52extern unsigned long saved_edi; 53extern unsigned long saved_rdi;
53 54
54/* routines for saving/restoring kernel state */ 55/* routines for saving/restoring kernel state */
55extern int acpi_save_state_mem(void); 56extern int acpi_save_state_mem(void);
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index bd376bc8c4ab..213b7fe5d998 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -89,6 +89,11 @@ static inline unsigned long read_cr3(void)
89 return cr3; 89 return cr3;
90} 90}
91 91
92static inline void write_cr3(unsigned long val)
93{
94 asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
95}
96
92static inline unsigned long read_cr4(void) 97static inline unsigned long read_cr4(void)
93{ 98{
94 unsigned long cr4; 99 unsigned long cr4;
@@ -98,7 +103,7 @@ static inline unsigned long read_cr4(void)
98 103
99static inline void write_cr4(unsigned long val) 104static inline void write_cr4(unsigned long val)
100{ 105{
101 asm volatile("movq %0,%%cr4" :: "r" (val)); 106 asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
102} 107}
103 108
104#define stts() write_cr0(8 | read_cr0()) 109#define stts() write_cr0(8 | read_cr0())
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
index 8c6808a3fba4..f6527e1b6c1c 100644
--- a/include/asm-x86_64/timex.h
+++ b/include/asm-x86_64/timex.h
@@ -27,6 +27,6 @@ extern int read_current_timer(unsigned long *timer_value);
27#define NS_SCALE 10 /* 2^10, carefully chosen */ 27#define NS_SCALE 10 /* 2^10, carefully chosen */
28#define US_SCALE 32 /* 2^32, arbitralrily chosen */ 28#define US_SCALE 32 /* 2^32, arbitralrily chosen */
29 29
30extern void mark_tsc_unstable(void); 30extern void mark_tsc_unstable(char *msg);
31extern void set_cyc2ns_scale(unsigned long khz); 31extern void set_cyc2ns_scale(unsigned long khz);
32#endif 32#endif
diff --git a/include/asm-x86_64/tlbflush.h b/include/asm-x86_64/tlbflush.h
index 983bd296c81a..512401b8725f 100644
--- a/include/asm-x86_64/tlbflush.h
+++ b/include/asm-x86_64/tlbflush.h
@@ -3,41 +3,18 @@
3 3
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <asm/processor.h> 5#include <asm/processor.h>
6 6#include <asm/system.h>
7static inline unsigned long get_cr3(void)
8{
9 unsigned long cr3;
10 asm volatile("mov %%cr3,%0" : "=r" (cr3));
11 return cr3;
12}
13
14static inline void set_cr3(unsigned long cr3)
15{
16 asm volatile("mov %0,%%cr3" :: "r" (cr3) : "memory");
17}
18 7
19static inline void __flush_tlb(void) 8static inline void __flush_tlb(void)
20{ 9{
21 set_cr3(get_cr3()); 10 write_cr3(read_cr3());
22}
23
24static inline unsigned long get_cr4(void)
25{
26 unsigned long cr4;
27 asm volatile("mov %%cr4,%0" : "=r" (cr4));
28 return cr4;
29}
30
31static inline void set_cr4(unsigned long cr4)
32{
33 asm volatile("mov %0,%%cr4" :: "r" (cr4) : "memory");
34} 11}
35 12
36static inline void __flush_tlb_all(void) 13static inline void __flush_tlb_all(void)
37{ 14{
38 unsigned long cr4 = get_cr4(); 15 unsigned long cr4 = read_cr4();
39 set_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ 16 write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
40 set_cr4(cr4); /* write old PGE again and flush TLBs */ 17 write_cr4(cr4); /* write old PGE again and flush TLBs */
41} 18}
42 19
43#define __flush_tlb_one(addr) \ 20#define __flush_tlb_one(addr) \
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index c5f596e71faa..26e23e01c54a 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -620,8 +620,6 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice)
620#define __NR_move_pages 279 620#define __NR_move_pages 279
621__SYSCALL(__NR_move_pages, sys_move_pages) 621__SYSCALL(__NR_move_pages, sys_move_pages)
622 622
623#define __NR_syscall_max __NR_move_pages
624
625#ifndef __NO_STUBS 623#ifndef __NO_STUBS
626#define __ARCH_WANT_OLD_READDIR 624#define __ARCH_WANT_OLD_READDIR
627#define __ARCH_WANT_OLD_STAT 625#define __ARCH_WANT_OLD_STAT
@@ -655,7 +653,6 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
655#include <asm/ptrace.h> 653#include <asm/ptrace.h>
656 654
657asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs); 655asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs);
658asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on);
659struct sigaction; 656struct sigaction;
660asmlinkage long sys_rt_sigaction(int sig, 657asmlinkage long sys_rt_sigaction(int sig,
661 const struct sigaction __user *act, 658 const struct sigaction __user *act,
diff --git a/include/asm-xtensa/mmu_context.h b/include/asm-xtensa/mmu_context.h
index f14851f086c3..92f948392ebc 100644
--- a/include/asm-xtensa/mmu_context.h
+++ b/include/asm-xtensa/mmu_context.h
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/cacheflush.h> 19#include <asm/cacheflush.h>
20#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm-generic/mm_hooks.h>
21 22
22#define XCHAL_MMU_ASID_BITS 8 23#define XCHAL_MMU_ASID_BITS 8
23 24
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 81c07cd18643..0365ec9fc0c9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -122,9 +122,9 @@ extern void *alloc_large_system_hash(const char *tablename,
122#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ 122#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
123 123
124/* Only NUMA needs hash distribution. 124/* Only NUMA needs hash distribution.
125 * IA64 is known to have sufficient vmalloc space. 125 * IA64 and x86_64 have sufficient vmalloc space.
126 */ 126 */
127#if defined(CONFIG_NUMA) && defined(CONFIG_IA64) 127#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64))
128#define HASHDIST_DEFAULT 1 128#define HASHDIST_DEFAULT 1
129#else 129#else
130#define HASHDIST_DEFAULT 0 130#define HASHDIST_DEFAULT 0
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 3ec6e7ff5fbd..963051a967d6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -32,7 +32,15 @@
32 * CPUFREQ NOTIFIER INTERFACE * 32 * CPUFREQ NOTIFIER INTERFACE *
33 *********************************************************************/ 33 *********************************************************************/
34 34
35#ifdef CONFIG_CPU_FREQ
35int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); 36int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);
37#else
38static inline int cpufreq_register_notifier(struct notifier_block *nb,
39 unsigned int list)
40{
41 return 0;
42}
43#endif
36int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); 44int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list);
37 45
38#define CPUFREQ_TRANSITION_NOTIFIER (0) 46#define CPUFREQ_TRANSITION_NOTIFIER (0)
@@ -260,17 +268,22 @@ struct freq_attr {
260int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); 268int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
261int cpufreq_update_policy(unsigned int cpu); 269int cpufreq_update_policy(unsigned int cpu);
262 270
263/* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */
264unsigned int cpufreq_get(unsigned int cpu);
265 271
266/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ 272/*
273 * query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it
274 */
267#ifdef CONFIG_CPU_FREQ 275#ifdef CONFIG_CPU_FREQ
268unsigned int cpufreq_quick_get(unsigned int cpu); 276unsigned int cpufreq_quick_get(unsigned int cpu);
277unsigned int cpufreq_get(unsigned int cpu);
269#else 278#else
270static inline unsigned int cpufreq_quick_get(unsigned int cpu) 279static inline unsigned int cpufreq_quick_get(unsigned int cpu)
271{ 280{
272 return 0; 281 return 0;
273} 282}
283static inline unsigned int cpufreq_get(unsigned int cpu)
284{
285 return 0;
286}
274#endif 287#endif
275 288
276 289
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 32503657f14f..22c7ac5cd80c 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
14extern const struct file_operations proc_vmcore_operations; 14extern const struct file_operations proc_vmcore_operations;
15extern struct proc_dir_entry *proc_vmcore; 15extern struct proc_dir_entry *proc_vmcore;
16 16
17/* Architecture code defines this if there are other possible ELF
18 * machine types, e.g. on bi-arch capable hardware. */
19#ifndef vmcore_elf_check_arch_cross
20#define vmcore_elf_check_arch_cross(x) 0
21#endif
22
23#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
24
17#endif /* CONFIG_CRASH_DUMP */ 25#endif /* CONFIG_CRASH_DUMP */
18#endif /* LINUX_CRASHDUMP_H */ 26#endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 60713e6ea297..8b17ffe222c4 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -83,6 +83,23 @@ typedef __s64 Elf64_Sxword;
83#define DT_DEBUG 21 83#define DT_DEBUG 21
84#define DT_TEXTREL 22 84#define DT_TEXTREL 22
85#define DT_JMPREL 23 85#define DT_JMPREL 23
86#define DT_ENCODING 32
87#define OLD_DT_LOOS 0x60000000
88#define DT_LOOS 0x6000000d
89#define DT_HIOS 0x6ffff000
90#define DT_VALRNGLO 0x6ffffd00
91#define DT_VALRNGHI 0x6ffffdff
92#define DT_ADDRRNGLO 0x6ffffe00
93#define DT_ADDRRNGHI 0x6ffffeff
94#define DT_VERSYM 0x6ffffff0
95#define DT_RELACOUNT 0x6ffffff9
96#define DT_RELCOUNT 0x6ffffffa
97#define DT_FLAGS_1 0x6ffffffb
98#define DT_VERDEF 0x6ffffffc
99#define DT_VERDEFNUM 0x6ffffffd
100#define DT_VERNEED 0x6ffffffe
101#define DT_VERNEEDNUM 0x6fffffff
102#define OLD_DT_HIOS 0x6fffffff
86#define DT_LOPROC 0x70000000 103#define DT_LOPROC 0x70000000
87#define DT_HIPROC 0x7fffffff 104#define DT_HIPROC 0x7fffffff
88 105
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
index 67396db141e8..9a1e0674e56c 100644
--- a/include/linux/elfnote.h
+++ b/include/linux/elfnote.h
@@ -39,12 +39,12 @@
39 * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) 39 * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
40 */ 40 */
41#define ELFNOTE(name, type, desctype, descdata) \ 41#define ELFNOTE(name, type, desctype, descdata) \
42.pushsection .note.name ; \ 42.pushsection .note.name, "",@note ; \
43 .align 4 ; \ 43 .align 4 ; \
44 .long 2f - 1f /* namesz */ ; \ 44 .long 2f - 1f /* namesz */ ; \
45 .long 4f - 3f /* descsz */ ; \ 45 .long 4f - 3f /* descsz */ ; \
46 .long type ; \ 46 .long type ; \
471:.asciz "name" ; \ 471:.asciz #name ; \
482:.align 4 ; \ 482:.align 4 ; \
493:desctype descdata ; \ 493:desctype descdata ; \
504:.align 4 ; \ 504:.align 4 ; \
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7bab8eae2341..a515eb0afdfb 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -27,6 +27,8 @@ static inline void flush_kernel_dcache_page(struct page *page)
27unsigned int nr_free_highpages(void); 27unsigned int nr_free_highpages(void);
28extern unsigned long totalhigh_pages; 28extern unsigned long totalhigh_pages;
29 29
30void kmap_flush_unused(void);
31
30#else /* CONFIG_HIGHMEM */ 32#else /* CONFIG_HIGHMEM */
31 33
32static inline unsigned int nr_free_highpages(void) { return 0; } 34static inline unsigned int nr_free_highpages(void) { return 0; }
@@ -49,10 +51,13 @@ static inline void *kmap_atomic(struct page *page, enum km_type idx)
49 pagefault_disable(); 51 pagefault_disable();
50 return page_address(page); 52 return page_address(page);
51} 53}
54#define kmap_atomic_prot(page, idx, prot) kmap_atomic(page, idx)
52 55
53#define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0) 56#define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0)
54#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) 57#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx))
55#define kmap_atomic_to_page(ptr) virt_to_page(ptr) 58#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
59
60#define kmap_flush_unused() do {} while(0)
56#endif 61#endif
57 62
58#endif /* CONFIG_HIGHMEM */ 63#endif /* CONFIG_HIGHMEM */
diff --git a/include/linux/init.h b/include/linux/init.h
index e290a010e3f2..9abf120ec9f8 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -52,9 +52,14 @@
52#endif 52#endif
53 53
54/* For assembly routines */ 54/* For assembly routines */
55#ifdef CONFIG_HOTPLUG_CPU
56#define __INIT .section ".text","ax"
57#define __INITDATA .section ".data","aw"
58#else
55#define __INIT .section ".init.text","ax" 59#define __INIT .section ".init.text","ax"
56#define __FINIT .previous
57#define __INITDATA .section ".init.data","aw" 60#define __INITDATA .section ".init.data","aw"
61#endif
62#define __FINIT .previous
58 63
59#ifndef __ASSEMBLY__ 64#ifndef __ASSEMBLY__
60/* 65/*
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 600e3d387ffc..b72be2f79e6a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -11,9 +11,16 @@
11 11
12/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ 12/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
13#ifndef PERCPU_ENOUGH_ROOM 13#ifndef PERCPU_ENOUGH_ROOM
14#define PERCPU_ENOUGH_ROOM 32768 14#ifdef CONFIG_MODULES
15#define PERCPU_MODULE_RESERVE 8192
16#else
17#define PERCPU_MODULE_RESERVE 0
15#endif 18#endif
16 19
20#define PERCPU_ENOUGH_ROOM \
21 (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)
22#endif /* PERCPU_ENOUGH_ROOM */
23
17/* 24/*
18 * Must be an lvalue. Since @var must be a simple identifier, 25 * Must be an lvalue. Since @var must be a simple identifier,
19 * we force a syntax error here if it isn't. 26 * we force a syntax error here if it isn't.
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 3e628f990fdf..89580b764959 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -26,9 +26,6 @@
26/********** arch/$ARCH/mm/init.c **********/ 26/********** arch/$ARCH/mm/init.c **********/
27#define POISON_FREE_INITMEM 0xcc 27#define POISON_FREE_INITMEM 0xcc
28 28
29/********** arch/x86_64/mm/init.c **********/
30#define POISON_FREE_INITDATA 0xba
31
32/********** arch/ia64/hp/common/sba_iommu.c **********/ 29/********** arch/ia64/hp/common/sba_iommu.c **********/
33/* 30/*
34 * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a 31 * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
diff --git a/init/main.c b/init/main.c
index a92989e7836a..80f09f31bfab 100644
--- a/init/main.c
+++ b/init/main.c
@@ -369,12 +369,8 @@ static void __init setup_per_cpu_areas(void)
369 unsigned long nr_possible_cpus = num_possible_cpus(); 369 unsigned long nr_possible_cpus = num_possible_cpus();
370 370
371 /* Copy section for each CPU (we discard the original) */ 371 /* Copy section for each CPU (we discard the original) */
372 size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); 372 size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
373#ifdef CONFIG_MODULES 373 ptr = alloc_bootmem_pages(size * nr_possible_cpus);
374 if (size < PERCPU_ENOUGH_ROOM)
375 size = PERCPU_ENOUGH_ROOM;
376#endif
377 ptr = alloc_bootmem(size * nr_possible_cpus);
378 374
379 for_each_possible_cpu(i) { 375 for_each_possible_cpu(i) {
380 __per_cpu_offset[i] = ptr - __per_cpu_start; 376 __per_cpu_offset[i] = ptr - __per_cpu_start;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6af959c034d8..ffccefb28b6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -286,6 +286,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
286 if (retval) 286 if (retval)
287 goto out; 287 goto out;
288 } 288 }
289 /* a new mm has just been created */
290 arch_dup_mmap(oldmm, mm);
289 retval = 0; 291 retval = 0;
290out: 292out:
291 up_write(&mm->mmap_sem); 293 up_write(&mm->mmap_sem);
diff --git a/kernel/module.c b/kernel/module.c
index ff982ec435fc..1eb8ca565ba0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -348,10 +348,10 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
348 unsigned int i; 348 unsigned int i;
349 void *ptr; 349 void *ptr;
350 350
351 if (align > SMP_CACHE_BYTES) { 351 if (align > PAGE_SIZE) {
352 printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n", 352 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
353 name, align, SMP_CACHE_BYTES); 353 name, align, PAGE_SIZE);
354 align = SMP_CACHE_BYTES; 354 align = PAGE_SIZE;
355 } 355 }
356 356
357 ptr = __per_cpu_start; 357 ptr = __per_cpu_start;
@@ -432,7 +432,7 @@ static int percpu_modinit(void)
432 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, 432 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
433 GFP_KERNEL); 433 GFP_KERNEL);
434 /* Static in-kernel percpu data (used). */ 434 /* Static in-kernel percpu data (used). */
435 pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); 435 pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
436 /* Free room. */ 436 /* Free room. */
437 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; 437 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
438 if (pcpu_size[1] < 0) { 438 if (pcpu_size[1] < 0) {
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 5f842c3efc4b..33bd94ceba32 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -23,6 +23,8 @@ static inline int pm_suspend_disk(void)
23} 23}
24#endif 24#endif
25 25
26extern int pfn_is_nosave(unsigned long);
27
26extern struct mutex pm_mutex; 28extern struct mutex pm_mutex;
27 29
28#define power_attr(_name) \ 30#define power_attr(_name) \
@@ -37,9 +39,6 @@ static struct subsys_attribute _name##_attr = { \
37 39
38extern struct kset power_subsys; 40extern struct kset power_subsys;
39 41
40/* References to section boundaries */
41extern const void __nosave_begin, __nosave_end;
42
43/* Preferred image size in bytes (default 500 MB) */ 42/* Preferred image size in bytes (default 500 MB) */
44extern unsigned long image_size; 43extern unsigned long image_size;
45extern int in_suspend; 44extern int in_suspend;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index fc53ad068128..704c25a3ffec 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -651,17 +651,6 @@ static inline unsigned int count_highmem_pages(void) { return 0; }
651#endif /* CONFIG_HIGHMEM */ 651#endif /* CONFIG_HIGHMEM */
652 652
653/** 653/**
654 * pfn_is_nosave - check if given pfn is in the 'nosave' section
655 */
656
657static inline int pfn_is_nosave(unsigned long pfn)
658{
659 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
660 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
661 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
662}
663
664/**
665 * saveable - Determine whether a non-highmem page should be included in 654 * saveable - Determine whether a non-highmem page should be included in
666 * the suspend image. 655 * the suspend image.
667 * 656 *
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 3581f8f86acd..b18c155cbb60 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -33,12 +33,14 @@ extern char resume_file[];
33 33
34#define SWSUSP_SIG "S1SUSPEND" 34#define SWSUSP_SIG "S1SUSPEND"
35 35
36static struct swsusp_header { 36struct swsusp_header {
37 char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; 37 char reserved[PAGE_SIZE - 20 - sizeof(sector_t)];
38 sector_t image; 38 sector_t image;
39 char orig_sig[10]; 39 char orig_sig[10];
40 char sig[10]; 40 char sig[10];
41} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; 41} __attribute__((packed));
42
43static struct swsusp_header *swsusp_header;
42 44
43/* 45/*
44 * General things 46 * General things
@@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start)
141{ 143{
142 int error; 144 int error;
143 145
144 bio_read_page(swsusp_resume_block, &swsusp_header, NULL); 146 bio_read_page(swsusp_resume_block, swsusp_header, NULL);
145 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || 147 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
146 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { 148 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
147 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); 149 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
148 memcpy(swsusp_header.sig,SWSUSP_SIG, 10); 150 memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
149 swsusp_header.image = start; 151 swsusp_header->image = start;
150 error = bio_write_page(swsusp_resume_block, 152 error = bio_write_page(swsusp_resume_block,
151 &swsusp_header, NULL); 153 swsusp_header, NULL);
152 } else { 154 } else {
153 printk(KERN_ERR "swsusp: Swap header not found!\n"); 155 printk(KERN_ERR "swsusp: Swap header not found!\n");
154 error = -ENODEV; 156 error = -ENODEV;
@@ -564,7 +566,7 @@ int swsusp_read(void)
564 if (error < PAGE_SIZE) 566 if (error < PAGE_SIZE)
565 return error < 0 ? error : -EFAULT; 567 return error < 0 ? error : -EFAULT;
566 header = (struct swsusp_info *)data_of(snapshot); 568 header = (struct swsusp_info *)data_of(snapshot);
567 error = get_swap_reader(&handle, swsusp_header.image); 569 error = get_swap_reader(&handle, swsusp_header->image);
568 if (!error) 570 if (!error)
569 error = swap_read_page(&handle, header, NULL); 571 error = swap_read_page(&handle, header, NULL);
570 if (!error) 572 if (!error)
@@ -591,17 +593,17 @@ int swsusp_check(void)
591 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 593 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
592 if (!IS_ERR(resume_bdev)) { 594 if (!IS_ERR(resume_bdev)) {
593 set_blocksize(resume_bdev, PAGE_SIZE); 595 set_blocksize(resume_bdev, PAGE_SIZE);
594 memset(&swsusp_header, 0, sizeof(swsusp_header)); 596 memset(swsusp_header, 0, sizeof(PAGE_SIZE));
595 error = bio_read_page(swsusp_resume_block, 597 error = bio_read_page(swsusp_resume_block,
596 &swsusp_header, NULL); 598 swsusp_header, NULL);
597 if (error) 599 if (error)
598 return error; 600 return error;
599 601
600 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { 602 if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
601 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); 603 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
602 /* Reset swap signature now */ 604 /* Reset swap signature now */
603 error = bio_write_page(swsusp_resume_block, 605 error = bio_write_page(swsusp_resume_block,
604 &swsusp_header, NULL); 606 swsusp_header, NULL);
605 } else { 607 } else {
606 return -EINVAL; 608 return -EINVAL;
607 } 609 }
@@ -632,3 +634,13 @@ void swsusp_close(void)
632 634
633 blkdev_put(resume_bdev); 635 blkdev_put(resume_bdev);
634} 636}
637
638static int swsusp_header_init(void)
639{
640 swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
641 if (!swsusp_header)
642 panic("Could not allocate memory for swsusp_header\n");
643 return 0;
644}
645
646core_initcall(swsusp_header_init);
diff --git a/lib/inflate.c b/lib/inflate.c
index 6db6e98d1637..845f91d3ac12 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -292,7 +292,6 @@ STATIC int INIT huft_build(
292 oversubscribed set of lengths), and three if not enough memory. */ 292 oversubscribed set of lengths), and three if not enough memory. */
293{ 293{
294 unsigned a; /* counter for codes of length k */ 294 unsigned a; /* counter for codes of length k */
295 unsigned c[BMAX+1]; /* bit length count table */
296 unsigned f; /* i repeats in table every f entries */ 295 unsigned f; /* i repeats in table every f entries */
297 int g; /* maximum code length */ 296 int g; /* maximum code length */
298 int h; /* table level */ 297 int h; /* table level */
@@ -303,18 +302,33 @@ STATIC int INIT huft_build(
303 register unsigned *p; /* pointer into c[], b[], or v[] */ 302 register unsigned *p; /* pointer into c[], b[], or v[] */
304 register struct huft *q; /* points to current table */ 303 register struct huft *q; /* points to current table */
305 struct huft r; /* table entry for structure assignment */ 304 struct huft r; /* table entry for structure assignment */
306 struct huft *u[BMAX]; /* table stack */
307 unsigned v[N_MAX]; /* values in order of bit length */
308 register int w; /* bits before this table == (l * h) */ 305 register int w; /* bits before this table == (l * h) */
309 unsigned x[BMAX+1]; /* bit offsets, then code stack */
310 unsigned *xp; /* pointer into x */ 306 unsigned *xp; /* pointer into x */
311 int y; /* number of dummy codes added */ 307 int y; /* number of dummy codes added */
312 unsigned z; /* number of entries in current table */ 308 unsigned z; /* number of entries in current table */
309 struct {
310 unsigned c[BMAX+1]; /* bit length count table */
311 struct huft *u[BMAX]; /* table stack */
312 unsigned v[N_MAX]; /* values in order of bit length */
313 unsigned x[BMAX+1]; /* bit offsets, then code stack */
314 } *stk;
315 unsigned *c, *v, *x;
316 struct huft **u;
317 int ret;
313 318
314DEBG("huft1 "); 319DEBG("huft1 ");
315 320
321 stk = malloc(sizeof(*stk));
322 if (stk == NULL)
323 return 3; /* out of memory */
324
325 c = stk->c;
326 v = stk->v;
327 x = stk->x;
328 u = stk->u;
329
316 /* Generate counts for each bit length */ 330 /* Generate counts for each bit length */
317 memzero(c, sizeof(c)); 331 memzero(stk->c, sizeof(stk->c));
318 p = b; i = n; 332 p = b; i = n;
319 do { 333 do {
320 Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"), 334 Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"),
@@ -326,7 +340,8 @@ DEBG("huft1 ");
326 { 340 {
327 *t = (struct huft *)NULL; 341 *t = (struct huft *)NULL;
328 *m = 0; 342 *m = 0;
329 return 2; 343 ret = 2;
344 goto out;
330 } 345 }
331 346
332DEBG("huft2 "); 347DEBG("huft2 ");
@@ -351,10 +366,14 @@ DEBG("huft3 ");
351 366
352 /* Adjust last length count to fill out codes, if needed */ 367 /* Adjust last length count to fill out codes, if needed */
353 for (y = 1 << j; j < i; j++, y <<= 1) 368 for (y = 1 << j; j < i; j++, y <<= 1)
354 if ((y -= c[j]) < 0) 369 if ((y -= c[j]) < 0) {
355 return 2; /* bad input: more codes than bits */ 370 ret = 2; /* bad input: more codes than bits */
356 if ((y -= c[i]) < 0) 371 goto out;
357 return 2; 372 }
373 if ((y -= c[i]) < 0) {
374 ret = 2;
375 goto out;
376 }
358 c[i] += y; 377 c[i] += y;
359 378
360DEBG("huft4 "); 379DEBG("huft4 ");
@@ -428,7 +447,8 @@ DEBG1("3 ");
428 { 447 {
429 if (h) 448 if (h)
430 huft_free(u[0]); 449 huft_free(u[0]);
431 return 3; /* not enough memory */ 450 ret = 3; /* not enough memory */
451 goto out;
432 } 452 }
433DEBG1("4 "); 453DEBG1("4 ");
434 hufts += z + 1; /* track memory usage */ 454 hufts += z + 1; /* track memory usage */
@@ -492,7 +512,11 @@ DEBG("h6f ");
492DEBG("huft7 "); 512DEBG("huft7 ");
493 513
494 /* Return true (1) if we were given an incomplete table */ 514 /* Return true (1) if we were given an incomplete table */
495 return y != 0 && g != 1; 515 ret = y != 0 && g != 1;
516
517 out:
518 free(stk);
519 return ret;
496} 520}
497 521
498 522
@@ -705,10 +729,14 @@ STATIC int noinline INIT inflate_fixed(void)
705 struct huft *td; /* distance code table */ 729 struct huft *td; /* distance code table */
706 int bl; /* lookup bits for tl */ 730 int bl; /* lookup bits for tl */
707 int bd; /* lookup bits for td */ 731 int bd; /* lookup bits for td */
708 unsigned l[288]; /* length list for huft_build */ 732 unsigned *l; /* length list for huft_build */
709 733
710DEBG("<fix"); 734DEBG("<fix");
711 735
736 l = malloc(sizeof(*l) * 288);
737 if (l == NULL)
738 return 3; /* out of memory */
739
712 /* set up literal table */ 740 /* set up literal table */
713 for (i = 0; i < 144; i++) 741 for (i = 0; i < 144; i++)
714 l[i] = 8; 742 l[i] = 8;
@@ -719,9 +747,10 @@ DEBG("<fix");
719 for (; i < 288; i++) /* make a complete, but wrong code set */ 747 for (; i < 288; i++) /* make a complete, but wrong code set */
720 l[i] = 8; 748 l[i] = 8;
721 bl = 7; 749 bl = 7;
722 if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) 750 if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) {
751 free(l);
723 return i; 752 return i;
724 753 }
725 754
726 /* set up distance table */ 755 /* set up distance table */
727 for (i = 0; i < 30; i++) /* make an incomplete code set */ 756 for (i = 0; i < 30; i++) /* make an incomplete code set */
@@ -730,6 +759,7 @@ DEBG("<fix");
730 if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) 759 if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1)
731 { 760 {
732 huft_free(tl); 761 huft_free(tl);
762 free(l);
733 763
734 DEBG(">"); 764 DEBG(">");
735 return i; 765 return i;
@@ -737,11 +767,13 @@ DEBG("<fix");
737 767
738 768
739 /* decompress until an end-of-block code */ 769 /* decompress until an end-of-block code */
740 if (inflate_codes(tl, td, bl, bd)) 770 if (inflate_codes(tl, td, bl, bd)) {
771 free(l);
741 return 1; 772 return 1;
742 773 }
743 774
744 /* free the decoding tables, return */ 775 /* free the decoding tables, return */
776 free(l);
745 huft_free(tl); 777 huft_free(tl);
746 huft_free(td); 778 huft_free(td);
747 return 0; 779 return 0;
@@ -766,16 +798,19 @@ STATIC int noinline INIT inflate_dynamic(void)
766 unsigned nb; /* number of bit length codes */ 798 unsigned nb; /* number of bit length codes */
767 unsigned nl; /* number of literal/length codes */ 799 unsigned nl; /* number of literal/length codes */
768 unsigned nd; /* number of distance codes */ 800 unsigned nd; /* number of distance codes */
769#ifdef PKZIP_BUG_WORKAROUND 801 unsigned *ll; /* literal/length and distance code lengths */
770 unsigned ll[288+32]; /* literal/length and distance code lengths */
771#else
772 unsigned ll[286+30]; /* literal/length and distance code lengths */
773#endif
774 register ulg b; /* bit buffer */ 802 register ulg b; /* bit buffer */
775 register unsigned k; /* number of bits in bit buffer */ 803 register unsigned k; /* number of bits in bit buffer */
804 int ret;
776 805
777DEBG("<dyn"); 806DEBG("<dyn");
778 807
808#ifdef PKZIP_BUG_WORKAROUND
809 ll = malloc(sizeof(*ll) * (288+32)); /* literal/length and distance code lengths */
810#else
811 ll = malloc(sizeof(*ll) * (286+30)); /* literal/length and distance code lengths */
812#endif
813
779 /* make local bit buffer */ 814 /* make local bit buffer */
780 b = bb; 815 b = bb;
781 k = bk; 816 k = bk;
@@ -796,7 +831,10 @@ DEBG("<dyn");
796#else 831#else
797 if (nl > 286 || nd > 30) 832 if (nl > 286 || nd > 30)
798#endif 833#endif
799 return 1; /* bad lengths */ 834 {
835 ret = 1; /* bad lengths */
836 goto out;
837 }
800 838
801DEBG("dyn1 "); 839DEBG("dyn1 ");
802 840
@@ -818,7 +856,8 @@ DEBG("dyn2 ");
818 { 856 {
819 if (i == 1) 857 if (i == 1)
820 huft_free(tl); 858 huft_free(tl);
821 return i; /* incomplete code set */ 859 ret = i; /* incomplete code set */
860 goto out;
822 } 861 }
823 862
824DEBG("dyn3 "); 863DEBG("dyn3 ");
@@ -840,8 +879,10 @@ DEBG("dyn3 ");
840 NEEDBITS(2) 879 NEEDBITS(2)
841 j = 3 + ((unsigned)b & 3); 880 j = 3 + ((unsigned)b & 3);
842 DUMPBITS(2) 881 DUMPBITS(2)
843 if ((unsigned)i + j > n) 882 if ((unsigned)i + j > n) {
844 return 1; 883 ret = 1;
884 goto out;
885 }
845 while (j--) 886 while (j--)
846 ll[i++] = l; 887 ll[i++] = l;
847 } 888 }
@@ -850,8 +891,10 @@ DEBG("dyn3 ");
850 NEEDBITS(3) 891 NEEDBITS(3)
851 j = 3 + ((unsigned)b & 7); 892 j = 3 + ((unsigned)b & 7);
852 DUMPBITS(3) 893 DUMPBITS(3)
853 if ((unsigned)i + j > n) 894 if ((unsigned)i + j > n) {
854 return 1; 895 ret = 1;
896 goto out;
897 }
855 while (j--) 898 while (j--)
856 ll[i++] = 0; 899 ll[i++] = 0;
857 l = 0; 900 l = 0;
@@ -861,8 +904,10 @@ DEBG("dyn3 ");
861 NEEDBITS(7) 904 NEEDBITS(7)
862 j = 11 + ((unsigned)b & 0x7f); 905 j = 11 + ((unsigned)b & 0x7f);
863 DUMPBITS(7) 906 DUMPBITS(7)
864 if ((unsigned)i + j > n) 907 if ((unsigned)i + j > n) {
865 return 1; 908 ret = 1;
909 goto out;
910 }
866 while (j--) 911 while (j--)
867 ll[i++] = 0; 912 ll[i++] = 0;
868 l = 0; 913 l = 0;
@@ -891,7 +936,8 @@ DEBG("dyn5b ");
891 error("incomplete literal tree"); 936 error("incomplete literal tree");
892 huft_free(tl); 937 huft_free(tl);
893 } 938 }
894 return i; /* incomplete code set */ 939 ret = i; /* incomplete code set */
940 goto out;
895 } 941 }
896DEBG("dyn5c "); 942DEBG("dyn5c ");
897 bd = dbits; 943 bd = dbits;
@@ -907,15 +953,18 @@ DEBG("dyn5d ");
907 huft_free(td); 953 huft_free(td);
908 } 954 }
909 huft_free(tl); 955 huft_free(tl);
910 return i; /* incomplete code set */ 956 ret = i; /* incomplete code set */
957 goto out;
911#endif 958#endif
912 } 959 }
913 960
914DEBG("dyn6 "); 961DEBG("dyn6 ");
915 962
916 /* decompress until an end-of-block code */ 963 /* decompress until an end-of-block code */
917 if (inflate_codes(tl, td, bl, bd)) 964 if (inflate_codes(tl, td, bl, bd)) {
918 return 1; 965 ret = 1;
966 goto out;
967 }
919 968
920DEBG("dyn7 "); 969DEBG("dyn7 ");
921 970
@@ -924,10 +973,14 @@ DEBG("dyn7 ");
924 huft_free(td); 973 huft_free(td);
925 974
926 DEBG(">"); 975 DEBG(">");
927 return 0; 976 ret = 0;
928 977out:
929 underrun: 978 free(ll);
930 return 4; /* Input underrun */ 979 return ret;
980
981underrun:
982 ret = 4; /* Input underrun */
983 goto out;
931} 984}
932 985
933 986
diff --git a/mm/highmem.c b/mm/highmem.c
index 51e1c1995fec..be8f8d36a8b9 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -99,6 +99,15 @@ static void flush_all_zero_pkmaps(void)
99 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); 99 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
100} 100}
101 101
102/* Flush all unused kmap mappings in order to remove stray
103 mappings. */
104void kmap_flush_unused(void)
105{
106 spin_lock(&kmap_lock);
107 flush_all_zero_pkmaps();
108 spin_unlock(&kmap_lock);
109}
110
102static inline unsigned long map_new_virtual(struct page *page) 111static inline unsigned long map_new_virtual(struct page *page)
103{ 112{
104 unsigned long vaddr; 113 unsigned long vaddr;
diff --git a/mm/mmap.c b/mm/mmap.c
index 84f997da78d7..88da687bde89 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -29,6 +29,7 @@
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
30#include <asm/cacheflush.h> 30#include <asm/cacheflush.h>
31#include <asm/tlb.h> 31#include <asm/tlb.h>
32#include <asm/mmu_context.h>
32 33
33#ifndef arch_mmap_check 34#ifndef arch_mmap_check
34#define arch_mmap_check(addr, len, flags) (0) 35#define arch_mmap_check(addr, len, flags) (0)
@@ -1979,6 +1980,9 @@ void exit_mmap(struct mm_struct *mm)
1979 unsigned long nr_accounted = 0; 1980 unsigned long nr_accounted = 0;
1980 unsigned long end; 1981 unsigned long end;
1981 1982
1983 /* mm's last user has gone, and its about to be pulled down */
1984 arch_exit_mmap(mm);
1985
1982 lru_add_drain(); 1986 lru_add_drain();
1983 flush_cache_mm(mm); 1987 flush_cache_mm(mm);
1984 tlb = tlb_gather_mmu(mm, 1); 1988 tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/slab.c b/mm/slab.c
index 4cbac24ae2f1..168bfe9d8ffe 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1146,7 +1146,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1146 * Make sure we are not freeing a object from another node to the array 1146 * Make sure we are not freeing a object from another node to the array
1147 * cache on this cpu. 1147 * cache on this cpu.
1148 */ 1148 */
1149 if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches)) 1149 if (likely(slabp->nodeid == node))
1150 return 0; 1150 return 0;
1151 1151
1152 l3 = cachep->nodelists[node]; 1152 l3 = cachep->nodelists[node];
@@ -1394,6 +1394,9 @@ void __init kmem_cache_init(void)
1394 int order; 1394 int order;
1395 int node; 1395 int node;
1396 1396
1397 if (num_possible_nodes() == 1)
1398 use_alien_caches = 0;
1399
1397 for (i = 0; i < NUM_INIT_LISTS; i++) { 1400 for (i = 0; i < NUM_INIT_LISTS; i++) {
1398 kmem_list3_init(&initkmem_list3[i]); 1401 kmem_list3_init(&initkmem_list3[i]);
1399 if (i < MAX_NUMNODES) 1402 if (i < MAX_NUMNODES)
@@ -3563,7 +3566,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3563 check_irq_off(); 3566 check_irq_off();
3564 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3567 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3565 3568
3566 if (cache_free_alien(cachep, objp)) 3569 if (use_alien_caches && cache_free_alien(cachep, objp))
3567 return; 3570 return;
3568 3571
3569 if (likely(ac->avail < ac->limit)) { 3572 if (likely(ac->avail < ac->limit)) {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9eef486da909..cb5aabda7046 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -431,7 +431,7 @@ void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
431 area->flags |= VM_VPAGES; 431 area->flags |= VM_VPAGES;
432 } else { 432 } else {
433 pages = kmalloc_node(array_size, 433 pages = kmalloc_node(array_size,
434 (gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO)), 434 (gfp_mask & GFP_LEVEL_MASK),
435 node); 435 node);
436 } 436 }
437 area->pages = pages; 437 area->pages = pages;
@@ -577,6 +577,14 @@ void *vmalloc_exec(unsigned long size)
577 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 577 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
578} 578}
579 579
580#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
581#define GFP_VMALLOC32 GFP_DMA32
582#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
583#define GFP_VMALLOC32 GFP_DMA
584#else
585#define GFP_VMALLOC32 GFP_KERNEL
586#endif
587
580/** 588/**
581 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 589 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
582 * @size: allocation size 590 * @size: allocation size
@@ -586,7 +594,7 @@ void *vmalloc_exec(unsigned long size)
586 */ 594 */
587void *vmalloc_32(unsigned long size) 595void *vmalloc_32(unsigned long size)
588{ 596{
589 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 597 return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL);
590} 598}
591EXPORT_SYMBOL(vmalloc_32); 599EXPORT_SYMBOL(vmalloc_32);
592 600
@@ -602,7 +610,7 @@ void *vmalloc_32_user(unsigned long size)
602 struct vm_struct *area; 610 struct vm_struct *area;
603 void *ret; 611 void *ret;
604 612
605 ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 613 ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
606 if (ret) { 614 if (ret) {
607 write_lock(&vmlist_lock); 615 write_lock(&vmlist_lock);
608 area = __find_vm_area(ret); 616 area = __find_vm_area(ret);
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 65bdfdb56877..78d659cbb36a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -606,6 +606,7 @@ static int secref_whitelist(const char *modname, const char *tosec,
606 "_probe", 606 "_probe",
607 "_probe_one", 607 "_probe_one",
608 "_console", 608 "_console",
609 "apic_es7000",
609 NULL 610 NULL
610 }; 611 };
611 612