aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig42
-rw-r--r--arch/i386/Kconfig.cpu11
-rw-r--r--arch/i386/Makefile4
-rw-r--r--arch/i386/boot/.gitignore2
-rw-r--r--arch/i386/boot/Makefile48
-rw-r--r--arch/i386/boot/a20.c161
-rw-r--r--arch/i386/boot/apm.c97
-rw-r--r--arch/i386/boot/bitops.h45
-rw-r--r--arch/i386/boot/boot.h296
-rw-r--r--arch/i386/boot/bootsect.S98
-rw-r--r--arch/i386/boot/cmdline.c97
-rw-r--r--arch/i386/boot/code16gcc.h15
-rw-r--r--arch/i386/boot/compressed/Makefile7
-rw-r--r--arch/i386/boot/compressed/head.S6
-rw-r--r--arch/i386/boot/compressed/relocs.c3
-rw-r--r--arch/i386/boot/copy.S101
-rw-r--r--arch/i386/boot/cpu.c69
-rw-r--r--arch/i386/boot/cpucheck.c267
-rw-r--r--arch/i386/boot/edd.S231
-rw-r--r--arch/i386/boot/edd.c196
-rw-r--r--arch/i386/boot/header.S283
-rw-r--r--arch/i386/boot/main.c161
-rw-r--r--arch/i386/boot/mca.c43
-rw-r--r--arch/i386/boot/memory.c99
-rw-r--r--arch/i386/boot/pm.c170
-rw-r--r--arch/i386/boot/pmjump.S54
-rw-r--r--arch/i386/boot/printf.c307
-rw-r--r--arch/i386/boot/setup.S1075
-rw-r--r--arch/i386/boot/setup.ld54
-rw-r--r--arch/i386/boot/string.c52
-rw-r--r--arch/i386/boot/tools/build.c162
-rw-r--r--arch/i386/boot/tty.c112
-rw-r--r--arch/i386/boot/version.c23
-rw-r--r--arch/i386/boot/vesa.h79
-rw-r--r--arch/i386/boot/video-bios.c125
-rw-r--r--arch/i386/boot/video-vesa.c284
-rw-r--r--arch/i386/boot/video-vga.c260
-rw-r--r--arch/i386/boot/video.S2043
-rw-r--r--arch/i386/boot/video.c461
-rw-r--r--arch/i386/boot/video.h152
-rw-r--r--arch/i386/boot/voyager.c46
-rw-r--r--arch/i386/defconfig265
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/boot.c53
-rw-r--r--arch/i386/kernel/acpi/sleep.c12
-rw-r--r--arch/i386/kernel/acpi/wakeup.S39
-rw-r--r--arch/i386/kernel/alternative.c14
-rw-r--r--arch/i386/kernel/apic.c10
-rw-r--r--arch/i386/kernel/apm.c2
-rw-r--r--arch/i386/kernel/asm-offsets.c29
-rw-r--r--arch/i386/kernel/cpu/Makefile3
-rw-r--r--arch/i386/kernel/cpu/addon_cpuid_features.c50
-rw-r--r--arch/i386/kernel/cpu/amd.c8
-rw-r--r--arch/i386/kernel/cpu/common.c4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig31
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c17
-rw-r--r--arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c6
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c216
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h12
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c29
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c276
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-ich.c4
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c79
-rw-r--r--arch/i386/kernel/cpu/mcheck/non-fatal.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/therm_throt.c6
-rw-r--r--arch/i386/kernel/cpu/mtrr/cyrix.c4
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c14
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c11
-rw-r--r--arch/i386/kernel/cpu/perfctr-watchdog.c65
-rw-r--r--arch/i386/kernel/cpu/proc.c21
-rw-r--r--arch/i386/kernel/cpu/rise.c52
-rw-r--r--arch/i386/kernel/e820.c34
-rw-r--r--arch/i386/kernel/efi.c2
-rw-r--r--arch/i386/kernel/entry.S95
-rw-r--r--arch/i386/kernel/geode.c155
-rw-r--r--arch/i386/kernel/head.S13
-rw-r--r--arch/i386/kernel/hpet.c98
-rw-r--r--arch/i386/kernel/i8253.c32
-rw-r--r--arch/i386/kernel/init_task.c2
-rw-r--r--arch/i386/kernel/io_apic.c27
-rw-r--r--arch/i386/kernel/irq.c10
-rw-r--r--arch/i386/kernel/nmi.c8
-rw-r--r--arch/i386/kernel/paravirt.c37
-rw-r--r--arch/i386/kernel/pci-dma.c27
-rw-r--r--arch/i386/kernel/process.c85
-rw-r--r--arch/i386/kernel/ptrace.c39
-rw-r--r--arch/i386/kernel/quirks.c5
-rw-r--r--arch/i386/kernel/reboot.c9
-rw-r--r--arch/i386/kernel/reboot_fixups.c13
-rw-r--r--arch/i386/kernel/setup.c25
-rw-r--r--arch/i386/kernel/smp.c5
-rw-r--r--arch/i386/kernel/smpboot.c20
-rw-r--r--arch/i386/kernel/smpcommon.c8
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/i386/kernel/sysenter.c4
-rw-r--r--arch/i386/kernel/time.c50
-rw-r--r--arch/i386/kernel/traps.c29
-rw-r--r--arch/i386/kernel/tsc.c36
-rw-r--r--arch/i386/kernel/verify_cpu.S94
-rw-r--r--arch/i386/kernel/vmi.c4
-rw-r--r--arch/i386/kernel/vmiclock.c8
-rw-r--r--arch/i386/kernel/vmlinux.lds.S8
-rw-r--r--arch/i386/kernel/vsyscall-note.S52
-rw-r--r--arch/i386/lib/Makefile2
-rw-r--r--arch/i386/lib/string.c257
-rw-r--r--arch/i386/mach-es7000/es7000plat.c48
-rw-r--r--arch/i386/mach-generic/es7000.c2
-rw-r--r--arch/i386/mach-visws/traps.c4
-rw-r--r--arch/i386/mach-voyager/voyager_thread.c2
-rw-r--r--arch/i386/math-emu/fpu_entry.c2
-rw-r--r--arch/i386/mm/fault.c28
-rw-r--r--arch/i386/mm/init.c16
-rw-r--r--arch/i386/mm/ioremap.c2
-rw-r--r--arch/i386/mm/pageattr.c50
-rw-r--r--arch/i386/mm/pgtable.c6
-rw-r--r--arch/i386/pci/acpi.c32
-rw-r--r--arch/i386/pci/common.c13
-rw-r--r--arch/i386/pci/fixup.c9
-rw-r--r--arch/i386/pci/mmconfig-shared.c48
-rw-r--r--arch/i386/video/Makefile1
-rw-r--r--arch/i386/video/fbdev.c32
-rw-r--r--arch/i386/xen/Kconfig11
-rw-r--r--arch/i386/xen/Makefile4
-rw-r--r--arch/i386/xen/enlighten.c1144
-rw-r--r--arch/i386/xen/events.c591
-rw-r--r--arch/i386/xen/features.c29
-rw-r--r--arch/i386/xen/manage.c143
-rw-r--r--arch/i386/xen/mmu.c564
-rw-r--r--arch/i386/xen/mmu.h60
-rw-r--r--arch/i386/xen/multicalls.c90
-rw-r--r--arch/i386/xen/multicalls.h45
-rw-r--r--arch/i386/xen/setup.c111
-rw-r--r--arch/i386/xen/smp.c404
-rw-r--r--arch/i386/xen/time.c593
-rw-r--r--arch/i386/xen/vdso.h4
-rw-r--r--arch/i386/xen/xen-asm.S291
-rw-r--r--arch/i386/xen/xen-head.S36
-rw-r--r--arch/i386/xen/xen-ops.h71
139 files changed, 10240 insertions, 4722 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 8770a5d0b143..abb582bc218f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
18 bool 18 bool
19 default y 19 default y
20 20
21config GENERIC_CMOS_UPDATE
22 bool
23 default y
24
21config CLOCKSOURCE_WATCHDOG 25config CLOCKSOURCE_WATCHDOG
22 bool 26 bool
23 default y 27 default y
@@ -222,6 +226,8 @@ config PARAVIRT
222 However, when run without a hypervisor the kernel is 226 However, when run without a hypervisor the kernel is
223 theoretically slower. If in doubt, say N. 227 theoretically slower. If in doubt, say N.
224 228
229source "arch/i386/xen/Kconfig"
230
225config VMI 231config VMI
226 bool "VMI Paravirt-ops support" 232 bool "VMI Paravirt-ops support"
227 depends on PARAVIRT 233 depends on PARAVIRT
@@ -441,8 +447,8 @@ config X86_REBOOTFIXUPS
441 this config is intended, is when reboot ends with a stalled/hung 447 this config is intended, is when reboot ends with a stalled/hung
442 system. 448 system.
443 449
444 Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1. 450 Currently, the only fixup is for the Geode machines using
445 combination. 451 CS5530A and CS5536 chipsets.
446 452
447 Say Y if you want to enable the fixup. Currently, it's safe to 453 Say Y if you want to enable the fixup. Currently, it's safe to
448 enable this option even if you don't need it. 454 enable this option even if you don't need it.
@@ -541,7 +547,8 @@ config HIGHMEM4G
541 547
542config HIGHMEM64G 548config HIGHMEM64G
543 bool "64GB" 549 bool "64GB"
544 depends on X86_CMPXCHG64 550 depends on !M386 && !M486
551 select X86_PAE
545 help 552 help
546 Select this if you have a 32-bit processor and more than 4 553 Select this if you have a 32-bit processor and more than 4
547 gigabytes of physical RAM. 554 gigabytes of physical RAM.
@@ -571,12 +578,12 @@ choice
571 config VMSPLIT_3G 578 config VMSPLIT_3G
572 bool "3G/1G user/kernel split" 579 bool "3G/1G user/kernel split"
573 config VMSPLIT_3G_OPT 580 config VMSPLIT_3G_OPT
574 depends on !HIGHMEM 581 depends on !X86_PAE
575 bool "3G/1G user/kernel split (for full 1G low memory)" 582 bool "3G/1G user/kernel split (for full 1G low memory)"
576 config VMSPLIT_2G 583 config VMSPLIT_2G
577 bool "2G/2G user/kernel split" 584 bool "2G/2G user/kernel split"
578 config VMSPLIT_2G_OPT 585 config VMSPLIT_2G_OPT
579 depends on !HIGHMEM 586 depends on !X86_PAE
580 bool "2G/2G user/kernel split (for full 2G low memory)" 587 bool "2G/2G user/kernel split (for full 2G low memory)"
581 config VMSPLIT_1G 588 config VMSPLIT_1G
582 bool "1G/3G user/kernel split" 589 bool "1G/3G user/kernel split"
@@ -596,10 +603,15 @@ config HIGHMEM
596 default y 603 default y
597 604
598config X86_PAE 605config X86_PAE
599 bool 606 bool "PAE (Physical Address Extension) Support"
600 depends on HIGHMEM64G 607 default n
601 default y 608 depends on !HIGHMEM4G
602 select RESOURCES_64BIT 609 select RESOURCES_64BIT
610 help
611 PAE is required for NX support, and furthermore enables
612 larger swapspace support for non-overcommit purposes. It
613 has the cost of more pagetable lookup overhead, and also
614 consumes more pagetable space per process.
603 615
604# Common NUMA Features 616# Common NUMA Features
605config NUMA 617config NUMA
@@ -815,6 +827,7 @@ config CRASH_DUMP
815 827
816config PHYSICAL_START 828config PHYSICAL_START
817 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 829 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
830 default "0x1000000" if X86_NUMAQ
818 default "0x100000" 831 default "0x100000"
819 help 832 help
820 This gives the physical address where the kernel is loaded. 833 This gives the physical address where the kernel is loaded.
@@ -1212,21 +1225,26 @@ source "drivers/Kconfig"
1212 1225
1213source "fs/Kconfig" 1226source "fs/Kconfig"
1214 1227
1215menu "Instrumentation Support" 1228menuconfig INSTRUMENTATION
1229 bool "Instrumentation Support"
1216 depends on EXPERIMENTAL 1230 depends on EXPERIMENTAL
1231 default y
1232
1233if INSTRUMENTATION
1217 1234
1218source "arch/i386/oprofile/Kconfig" 1235source "arch/i386/oprofile/Kconfig"
1219 1236
1220config KPROBES 1237config KPROBES
1221 bool "Kprobes (EXPERIMENTAL)" 1238 bool "Kprobes"
1222 depends on KALLSYMS && EXPERIMENTAL && MODULES 1239 depends on KALLSYMS && MODULES
1223 help 1240 help
1224 Kprobes allows you to trap at almost any kernel address and 1241 Kprobes allows you to trap at almost any kernel address and
1225 execute a callback function. register_kprobe() establishes 1242 execute a callback function. register_kprobe() establishes
1226 a probepoint and specifies the callback. Kprobes is useful 1243 a probepoint and specifies the callback. Kprobes is useful
1227 for kernel debugging, non-intrusive instrumentation and testing. 1244 for kernel debugging, non-intrusive instrumentation and testing.
1228 If in doubt, say "N". 1245 If in doubt, say "N".
1229endmenu 1246
1247endif # INSTRUMENTATION
1230 1248
1231source "arch/i386/Kconfig.debug" 1249source "arch/i386/Kconfig.debug"
1232 1250
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index d7f6fb0b30f2..11a24d54f27b 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -297,11 +297,6 @@ config X86_POPAD_OK
297 depends on !M386 297 depends on !M386
298 default y 298 default y
299 299
300config X86_CMPXCHG64
301 bool
302 depends on !M386 && !M486
303 default y
304
305config X86_ALIGNMENT_16 300config X86_ALIGNMENT_16
306 bool 301 bool
307 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 302 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
@@ -344,8 +339,8 @@ config X86_CMOV
344 depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) 339 depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
345 default y 340 default y
346 341
347config X86_MINIMUM_CPU_MODEL 342config X86_MINIMUM_CPU_FAMILY
348 int 343 int
349 default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP 344 default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK
350 default "0" 345 default "3"
351 346
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index bd28f9f9b4b7..01f0ff0daaf4 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -93,6 +93,9 @@ mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000
93mcore-$(CONFIG_X86_ES7000) := mach-default 93mcore-$(CONFIG_X86_ES7000) := mach-default
94core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ 94core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/
95 95
96# Xen paravirtualization support
97core-$(CONFIG_XEN) += arch/i386/xen/
98
96# default subarch .h files 99# default subarch .h files
97mflags-y += -Iinclude/asm-i386/mach-default 100mflags-y += -Iinclude/asm-i386/mach-default
98 101
@@ -108,6 +111,7 @@ drivers-$(CONFIG_PCI) += arch/i386/pci/
108# must be linked after kernel/ 111# must be linked after kernel/
109drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ 112drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/
110drivers-$(CONFIG_PM) += arch/i386/power/ 113drivers-$(CONFIG_PM) += arch/i386/power/
114drivers-$(CONFIG_FB) += arch/i386/video/
111 115
112CFLAGS += $(mflags-y) 116CFLAGS += $(mflags-y)
113AFLAGS += $(mflags-y) 117AFLAGS += $(mflags-y)
diff --git a/arch/i386/boot/.gitignore b/arch/i386/boot/.gitignore
index 495f20c085de..18465143cfa2 100644
--- a/arch/i386/boot/.gitignore
+++ b/arch/i386/boot/.gitignore
@@ -1,3 +1,5 @@
1bootsect 1bootsect
2bzImage 2bzImage
3setup 3setup
4setup.bin
5setup.elf
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index bfbc32098a4a..93386a4e40b4 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -25,27 +25,56 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
25 25
26#RAMDISK := -DRAMDISK=512 26#RAMDISK := -DRAMDISK=512
27 27
28targets := vmlinux.bin bootsect bootsect.o \ 28targets := vmlinux.bin setup.bin setup.elf zImage bzImage
29 setup setup.o zImage bzImage
30subdir- := compressed 29subdir- := compressed
31 30
31setup-y += a20.o apm.o cmdline.o copy.o cpu.o cpucheck.o edd.o
32setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
33setup-y += printf.o string.o tty.o video.o version.o voyager.o
34
35# The link order of the video-*.o modules can matter. In particular,
36# video-vga.o *must* be listed first, followed by video-vesa.o.
37# Hardware-specific drivers should follow in the order they should be
38# probed, and video-bios.o should typically be last.
39setup-y += video-vga.o
40setup-y += video-vesa.o
41setup-y += video-bios.o
42targets += $(setup-y)
32hostprogs-y := tools/build 43hostprogs-y := tools/build
33 44
34HOSTCFLAGS_build.o := $(LINUXINCLUDE) 45HOSTCFLAGS_build.o := $(LINUXINCLUDE)
35 46
36# --------------------------------------------------------------------------- 47# ---------------------------------------------------------------------------
37 48
49# How to compile the 16-bit code. Note we always compile for -march=i386,
50# that way we can complain to the user if the CPU is insufficient.
51cflags-i386 :=
52cflags-x86_64 := -m32
53CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
54 $(cflags-$(ARCH)) \
55 -Wall -Wstrict-prototypes \
56 -march=i386 -mregparm=3 \
57 -include $(srctree)/$(src)/code16gcc.h \
58 -fno-strict-aliasing -fomit-frame-pointer \
59 $(call cc-option, -ffreestanding) \
60 $(call cc-option, -fno-toplevel-reorder,\
61 $(call cc-option, -fno-unit-at-a-time)) \
62 $(call cc-option, -fno-stack-protector) \
63 $(call cc-option, -mpreferred-stack-boundary=2)
64AFLAGS := $(CFLAGS) -D__ASSEMBLY__
65
38$(obj)/zImage: IMAGE_OFFSET := 0x1000 66$(obj)/zImage: IMAGE_OFFSET := 0x1000
39$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) 67$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK)
40$(obj)/bzImage: IMAGE_OFFSET := 0x100000 68$(obj)/bzImage: IMAGE_OFFSET := 0x100000
69$(obj)/bzImage: EXTRA_CFLAGS := -D__BIG_KERNEL__
41$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ 70$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
42$(obj)/bzImage: BUILDFLAGS := -b 71$(obj)/bzImage: BUILDFLAGS := -b
43 72
44quiet_cmd_image = BUILD $@ 73quiet_cmd_image = BUILD $@
45cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \ 74cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \
46 $(obj)/vmlinux.bin $(ROOT_DEV) > $@ 75 $(obj)/vmlinux.bin $(ROOT_DEV) > $@
47 76
48$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \ 77$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
49 $(obj)/vmlinux.bin $(obj)/tools/build FORCE 78 $(obj)/vmlinux.bin $(obj)/tools/build FORCE
50 $(call if_changed,image) 79 $(call if_changed,image)
51 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' 80 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
@@ -53,12 +82,17 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
53$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE 82$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
54 $(call if_changed,objcopy) 83 $(call if_changed,objcopy)
55 84
56LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary 85SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
57LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext
58 86
59$(obj)/setup $(obj)/bootsect: %: %.o FORCE 87LDFLAGS_setup.elf := -T
88$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
60 $(call if_changed,ld) 89 $(call if_changed,ld)
61 90
91OBJCOPYFLAGS_setup.bin := -O binary
92
93$(obj)/setup.bin: $(obj)/setup.elf FORCE
94 $(call if_changed,objcopy)
95
62$(obj)/compressed/vmlinux: FORCE 96$(obj)/compressed/vmlinux: FORCE
63 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ 97 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
64 98
diff --git a/arch/i386/boot/a20.c b/arch/i386/boot/a20.c
new file mode 100644
index 000000000000..31348d054fca
--- /dev/null
+++ b/arch/i386/boot/a20.c
@@ -0,0 +1,161 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/a20.c
13 *
14 * Enable A20 gate (return -1 on failure)
15 */
16
17#include "boot.h"
18
19#define MAX_8042_LOOPS 100000
20
21static int empty_8042(void)
22{
23 u8 status;
24 int loops = MAX_8042_LOOPS;
25
26 while (loops--) {
27 io_delay();
28
29 status = inb(0x64);
30 if (status & 1) {
31 /* Read and discard input data */
32 io_delay();
33 (void)inb(0x60);
34 } else if (!(status & 2)) {
35 /* Buffers empty, finished! */
36 return 0;
37 }
38 }
39
40 return -1;
41}
42
43/* Returns nonzero if the A20 line is enabled. The memory address
44 used as a test is the int $0x80 vector, which should be safe. */
45
46#define A20_TEST_ADDR (4*0x80)
47#define A20_TEST_SHORT 32
48#define A20_TEST_LONG 2097152 /* 2^21 */
49
50static int a20_test(int loops)
51{
52 int ok = 0;
53 int saved, ctr;
54
55 set_fs(0x0000);
56 set_gs(0xffff);
57
58 saved = ctr = rdfs32(A20_TEST_ADDR);
59
60 while (loops--) {
61 wrfs32(++ctr, A20_TEST_ADDR);
62 io_delay(); /* Serialize and make delay constant */
63 ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr;
64 if (ok)
65 break;
66 }
67
68 wrfs32(saved, A20_TEST_ADDR);
69 return ok;
70}
71
72/* Quick test to see if A20 is already enabled */
73static int a20_test_short(void)
74{
75 return a20_test(A20_TEST_SHORT);
76}
77
78/* Longer test that actually waits for A20 to come on line; this
79 is useful when dealing with the KBC or other slow external circuitry. */
80static int a20_test_long(void)
81{
82 return a20_test(A20_TEST_LONG);
83}
84
85static void enable_a20_bios(void)
86{
87 asm volatile("pushfl; int $0x15; popfl"
88 : : "a" ((u16)0x2401));
89}
90
91static void enable_a20_kbc(void)
92{
93 empty_8042();
94
95 outb(0xd1, 0x64); /* Command write */
96 empty_8042();
97
98 outb(0xdf, 0x60); /* A20 on */
99 empty_8042();
100}
101
102static void enable_a20_fast(void)
103{
104 u8 port_a;
105
106 port_a = inb(0x92); /* Configuration port A */
107 port_a |= 0x02; /* Enable A20 */
108 port_a &= ~0x01; /* Do not reset machine */
109 outb(port_a, 0x92);
110}
111
112/*
113 * Actual routine to enable A20; return 0 on ok, -1 on failure
114 */
115
116#define A20_ENABLE_LOOPS 255 /* Number of times to try */
117
118int enable_a20(void)
119{
120 int loops = A20_ENABLE_LOOPS;
121
122#if defined(CONFIG_X86_ELAN)
123 /* Elan croaks if we try to touch the KBC */
124 enable_a20_fast();
125 while (!a20_test_long())
126 ;
127 return 0;
128#elif defined(CONFIG_X86_VOYAGER)
129 /* On Voyager, a20_test() is unsafe? */
130 enable_a20_kbc();
131 return 0;
132#else
133 while (loops--) {
134 /* First, check to see if A20 is already enabled
135 (legacy free, etc.) */
136 if (a20_test_short())
137 return 0;
138
139 /* Next, try the BIOS (INT 0x15, AX=0x2401) */
140 enable_a20_bios();
141 if (a20_test_short())
142 return 0;
143
144 /* Try enabling A20 through the keyboard controller */
145 empty_8042();
146 if (a20_test_short())
147 return 0; /* BIOS worked, but with delayed reaction */
148
149 enable_a20_kbc();
150 if (a20_test_long())
151 return 0;
152
153 /* Finally, try enabling the "fast A20 gate" */
154 enable_a20_fast();
155 if (a20_test_long())
156 return 0;
157 }
158
159 return -1;
160#endif
161}
diff --git a/arch/i386/boot/apm.c b/arch/i386/boot/apm.c
new file mode 100644
index 000000000000..a34087c370c0
--- /dev/null
+++ b/arch/i386/boot/apm.c
@@ -0,0 +1,97 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * Original APM BIOS checking by Stephen Rothwell, May 1994
7 * (sfr@canb.auug.org.au)
8 *
9 * This file is part of the Linux kernel, and is made available under
10 * the terms of the GNU General Public License version 2.
11 *
12 * ----------------------------------------------------------------------- */
13
14/*
15 * arch/i386/boot/apm.c
16 *
17 * Get APM BIOS information
18 */
19
20#include "boot.h"
21
22#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
23
24int query_apm_bios(void)
25{
26 u16 ax, bx, cx, dx, di;
27 u32 ebx, esi;
28 u8 err;
29
30 /* APM BIOS installation check */
31 ax = 0x5300;
32 bx = cx = 0;
33 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
34 : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
35 : : "esi", "edi");
36
37 if (err)
38 return -1; /* No APM BIOS */
39
40 if (bx != 0x504d) /* "PM" signature */
41 return -1;
42
43 if (cx & 0x02) /* 32 bits supported? */
44 return -1;
45
46 /* Disconnect first, just in case */
47 ax = 0x5304;
48 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
49 : "+a" (ax)
50 : : "ebx", "ecx", "edx", "esi", "edi");
51
52 /* Paranoia */
53 ebx = esi = 0;
54 cx = dx = di = 0;
55
56 /* 32-bit connect */
57 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6"
58 : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx),
59 "+S" (esi), "+D" (di), "=m" (err)
60 : "a" (0x5303));
61
62 boot_params.apm_bios_info.cseg = ax;
63 boot_params.apm_bios_info.offset = ebx;
64 boot_params.apm_bios_info.cseg_16 = cx;
65 boot_params.apm_bios_info.dseg = dx;
66 boot_params.apm_bios_info.cseg_len = (u16)esi;
67 boot_params.apm_bios_info.cseg_16_len = esi >> 16;
68 boot_params.apm_bios_info.dseg_len = di;
69
70 if (err)
71 return -1;
72
73 /* Redo the installation check as the 32-bit connect;
74 some BIOSes return different flags this way... */
75
76 ax = 0x5300;
77 bx = cx = 0;
78 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
79 : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
80 : : "esi", "edi");
81
82 if (err || bx != 0x504d) {
83 /* Failure with 32-bit connect, try to disconect and ignore */
84 ax = 0x5304;
85 bx = 0;
86 asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
87 : "+a" (ax), "+b" (bx)
88 : : "ecx", "edx", "esi", "edi");
89 return -1;
90 }
91
92 boot_params.apm_bios_info.version = ax;
93 boot_params.apm_bios_info.flags = cx;
94 return 0;
95}
96
97#endif
diff --git a/arch/i386/boot/bitops.h b/arch/i386/boot/bitops.h
new file mode 100644
index 000000000000..8dcc8dc7db88
--- /dev/null
+++ b/arch/i386/boot/bitops.h
@@ -0,0 +1,45 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/bitops.h
13 *
14 * Very simple bitops for the boot code.
15 */
16
17#ifndef BOOT_BITOPS_H
18#define BOOT_BITOPS_H
19#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
20
21static inline int constant_test_bit(int nr, const void *addr)
22{
23 const u32 *p = (const u32 *)addr;
24 return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
25}
26static inline int variable_test_bit(int nr, const void *addr)
27{
28 u8 v;
29 const u32 *p = (const u32 *)addr;
30
31 asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
32 return v;
33}
34
35#define test_bit(nr,addr) \
36(__builtin_constant_p(nr) ? \
37 constant_test_bit((nr),(addr)) : \
38 variable_test_bit((nr),(addr)))
39
40static inline void set_bit(int nr, void *addr)
41{
42 asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
43}
44
45#endif /* BOOT_BITOPS_H */
diff --git a/arch/i386/boot/boot.h b/arch/i386/boot/boot.h
new file mode 100644
index 000000000000..dec70c9b6050
--- /dev/null
+++ b/arch/i386/boot/boot.h
@@ -0,0 +1,296 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/boot.h
13 *
14 * Header file for the real-mode kernel code
15 */
16
17#ifndef BOOT_BOOT_H
18#define BOOT_BOOT_H
19
20#ifndef __ASSEMBLY__
21
22#include <stdarg.h>
23#include <linux/types.h>
24#include <linux/edd.h>
25#include <asm/boot.h>
26#include <asm/bootparam.h>
27
28/* Useful macros */
29#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
30
31extern struct setup_header hdr;
32extern struct boot_params boot_params;
33
34/* Basic port I/O */
35static inline void outb(u8 v, u16 port)
36{
37 asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
38}
39static inline u8 inb(u16 port)
40{
41 u8 v;
42 asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
43 return v;
44}
45
46static inline void outw(u16 v, u16 port)
47{
48 asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
49}
50static inline u16 inw(u16 port)
51{
52 u16 v;
53 asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
54 return v;
55}
56
57static inline void outl(u32 v, u16 port)
58{
59 asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
60}
61static inline u32 inl(u32 port)
62{
63 u32 v;
64 asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
65 return v;
66}
67
68static inline void io_delay(void)
69{
70 const u16 DELAY_PORT = 0x80;
71 asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
72}
73
74/* These functions are used to reference data in other segments. */
75
76static inline u16 ds(void)
77{
78 u16 seg;
79 asm("movw %%ds,%0" : "=rm" (seg));
80 return seg;
81}
82
83static inline void set_fs(u16 seg)
84{
85 asm volatile("movw %0,%%fs" : : "rm" (seg));
86}
87static inline u16 fs(void)
88{
89 u16 seg;
90 asm("movw %%fs,%0" : "=rm" (seg));
91 return seg;
92}
93
94static inline void set_gs(u16 seg)
95{
96 asm volatile("movw %0,%%gs" : : "rm" (seg));
97}
98static inline u16 gs(void)
99{
100 u16 seg;
101 asm("movw %%gs,%0" : "=rm" (seg));
102 return seg;
103}
104
105typedef unsigned int addr_t;
106
107static inline u8 rdfs8(addr_t addr)
108{
109 u8 v;
110 asm("movb %%fs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr));
111 return v;
112}
113static inline u16 rdfs16(addr_t addr)
114{
115 u16 v;
116 asm("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
117 return v;
118}
119static inline u32 rdfs32(addr_t addr)
120{
121 u32 v;
122 asm("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
123 return v;
124}
125
126static inline void wrfs8(u8 v, addr_t addr)
127{
128 asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "r" (v));
129}
130static inline void wrfs16(u16 v, addr_t addr)
131{
132 asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "r" (v));
133}
134static inline void wrfs32(u32 v, addr_t addr)
135{
136 asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "r" (v));
137}
138
139static inline u8 rdgs8(addr_t addr)
140{
141 u8 v;
142 asm("movb %%gs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr));
143 return v;
144}
145static inline u16 rdgs16(addr_t addr)
146{
147 u16 v;
148 asm("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
149 return v;
150}
151static inline u32 rdgs32(addr_t addr)
152{
153 u32 v;
154 asm("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
155 return v;
156}
157
158static inline void wrgs8(u8 v, addr_t addr)
159{
160 asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "r" (v));
161}
162static inline void wrgs16(u16 v, addr_t addr)
163{
164 asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "r" (v));
165}
166static inline void wrgs32(u32 v, addr_t addr)
167{
168 asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "r" (v));
169}
170
171/* Note: these only return true/false, not a signed return value! */
172static inline int memcmp(const void *s1, const void *s2, size_t len)
173{
174 u8 diff;
175 asm("repe; cmpsb; setnz %0"
176 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
177 return diff;
178}
179
180static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
181{
182 u8 diff;
183 asm("fs; repe; cmpsb; setnz %0"
184 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
185 return diff;
186}
187static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
188{
189 u8 diff;
190 asm("gs; repe; cmpsb; setnz %0"
191 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
192 return diff;
193}
194
195static inline int isdigit(int ch)
196{
197 return (ch >= '0') && (ch <= '9');
198}
199
200/* Heap -- available for dynamic lists. */
201#define STACK_SIZE 512 /* Minimum number of bytes for stack */
202
203extern char _end[];
204extern char *HEAP;
205extern char *heap_end;
206#define RESET_HEAP() ((void *)( HEAP = _end ))
207static inline char *__get_heap(size_t s, size_t a, size_t n)
208{
209 char *tmp;
210
211 HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1));
212 tmp = HEAP;
213 HEAP += s*n;
214 return tmp;
215}
216#define GET_HEAP(type, n) \
217 ((type *)__get_heap(sizeof(type),__alignof__(type),(n)))
218
219static inline int heap_free(void)
220{
221 return heap_end-HEAP;
222}
223
224/* copy.S */
225
226void copy_to_fs(addr_t dst, void *src, size_t len);
227void *copy_from_fs(void *dst, addr_t src, size_t len);
228void copy_to_gs(addr_t dst, void *src, size_t len);
229void *copy_from_gs(void *dst, addr_t src, size_t len);
230void *memcpy(void *dst, void *src, size_t len);
231void *memset(void *dst, int c, size_t len);
232
233#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
234#define memset(d,c,l) __builtin_memset(d,c,l)
235
236/* a20.c */
237int enable_a20(void);
238
239/* apm.c */
240int query_apm_bios(void);
241
242/* cmdline.c */
243int cmdline_find_option(const char *option, char *buffer, int bufsize);
244
245/* cpu.c, cpucheck.c */
246int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
247int validate_cpu(void);
248
249/* edd.c */
250void query_edd(void);
251
252/* header.S */
253void __attribute__((noreturn)) die(void);
254
255/* mca.c */
256int query_mca(void);
257
258/* memory.c */
259int detect_memory(void);
260
261/* pm.c */
262void __attribute__((noreturn)) go_to_protected_mode(void);
263
264/* pmjump.S */
265void __attribute__((noreturn))
266 protected_mode_jump(u32 entrypoint, u32 bootparams);
267
268/* printf.c */
269int sprintf(char *buf, const char *fmt, ...);
270int vsprintf(char *buf, const char *fmt, va_list args);
271int printf(const char *fmt, ...);
272
273/* string.c */
274int strcmp(const char *str1, const char *str2);
275size_t strnlen(const char *s, size_t maxlen);
276unsigned int atou(const char *s);
277
278/* tty.c */
279void puts(const char *);
280void putchar(int);
281int getchar(void);
282void kbd_flush(void);
283int getchar_timeout(void);
284
285/* video.c */
286void set_video(void);
287
288/* video-vesa.c */
289void vesa_store_edid(void);
290
291/* voyager.c */
292int query_voyager(void);
293
294#endif /* __ASSEMBLY__ */
295
296#endif /* BOOT_BOOT_H */
diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S
deleted file mode 100644
index 011b7a4993d4..000000000000
--- a/arch/i386/boot/bootsect.S
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * bootsect.S Copyright (C) 1991, 1992 Linus Torvalds
3 *
4 * modified by Drew Eckhardt
5 * modified by Bruce Evans (bde)
6 * modified by Chris Noe (May 1999) (as86 -> gas)
7 * gutted by H. Peter Anvin (Jan 2003)
8 *
9 * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
10 * addresses must be multiplied by 16 to obtain their respective linear
11 * addresses. To avoid confusion, linear addresses are written using leading
12 * hex while segment addresses are written as segment:offset.
13 *
14 */
15
16#include <asm/boot.h>
17
18SETUPSECTS = 4 /* default nr of setup-sectors */
19BOOTSEG = 0x07C0 /* original address of boot-sector */
20INITSEG = DEF_INITSEG /* we move boot here - out of the way */
21SETUPSEG = DEF_SETUPSEG /* setup starts here */
22SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
23SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
24 /* to be loaded */
25ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
26SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
27
28#ifndef SVGA_MODE
29#define SVGA_MODE ASK_VGA
30#endif
31
32#ifndef RAMDISK
33#define RAMDISK 0
34#endif
35
36#ifndef ROOT_RDONLY
37#define ROOT_RDONLY 1
38#endif
39
40.code16
41.text
42
43.global _start
44_start:
45
46 # Normalize the start address
47 jmpl $BOOTSEG, $start2
48
49start2:
50 movw %cs, %ax
51 movw %ax, %ds
52 movw %ax, %es
53 movw %ax, %ss
54 movw $0x7c00, %sp
55 sti
56 cld
57
58 movw $bugger_off_msg, %si
59
60msg_loop:
61 lodsb
62 andb %al, %al
63 jz die
64 movb $0xe, %ah
65 movw $7, %bx
66 int $0x10
67 jmp msg_loop
68
69die:
70 # Allow the user to press a key, then reboot
71 xorw %ax, %ax
72 int $0x16
73 int $0x19
74
75 # int 0x19 should never return. In case it does anyway,
76 # invoke the BIOS reset code...
77 ljmp $0xf000,$0xfff0
78
79
80bugger_off_msg:
81 .ascii "Direct booting from floppy is no longer supported.\r\n"
82 .ascii "Please use a boot loader program instead.\r\n"
83 .ascii "\n"
84 .ascii "Remove disk and press any key to reboot . . .\r\n"
85 .byte 0
86
87
88 # Kernel attributes; used by setup
89
90 .org 497
91setup_sects: .byte SETUPSECTS
92root_flags: .word ROOT_RDONLY
93syssize: .word SYSSIZE
94swap_dev: .word SWAP_DEV
95ram_size: .word RAMDISK
96vid_mode: .word SVGA_MODE
97root_dev: .word ROOT_DEV
98boot_flag: .word 0xAA55
diff --git a/arch/i386/boot/cmdline.c b/arch/i386/boot/cmdline.c
new file mode 100644
index 000000000000..34bb778c4357
--- /dev/null
+++ b/arch/i386/boot/cmdline.c
@@ -0,0 +1,97 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/cmdline.c
13 *
14 * Simple command-line parser for early boot.
15 */
16
17#include "boot.h"
18
19static inline int myisspace(u8 c)
20{
21 return c <= ' '; /* Close enough approximation */
22}
23
24/*
25 * Find a non-boolean option, that is, "option=argument". In accordance
26 * with standard Linux practice, if this option is repeated, this returns
27 * the last instance on the command line.
28 *
29 * Returns the length of the argument (regardless of if it was
30 * truncated to fit in the buffer), or -1 on not found.
31 */
32int cmdline_find_option(const char *option, char *buffer, int bufsize)
33{
34 u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr;
35 addr_t cptr;
36 char c;
37 int len = -1;
38 const char *opptr = NULL;
39 char *bufptr = buffer;
40 enum {
41 st_wordstart, /* Start of word/after whitespace */
42 st_wordcmp, /* Comparing this word */
43 st_wordskip, /* Miscompare, skip */
44 st_bufcpy /* Copying this to buffer */
45 } state = st_wordstart;
46
47 if (!cmdline_ptr || cmdline_ptr >= 0x100000)
48 return -1; /* No command line, or inaccessible */
49
50 cptr = cmdline_ptr & 0xf;
51 set_fs(cmdline_ptr >> 4);
52
53 while (cptr < 0x10000 && (c = rdfs8(cptr++))) {
54 switch (state) {
55 case st_wordstart:
56 if (myisspace(c))
57 break;
58
59 /* else */
60 state = st_wordcmp;
61 opptr = option;
62 /* fall through */
63
64 case st_wordcmp:
65 if (c == '=' && !*opptr) {
66 len = 0;
67 bufptr = buffer;
68 state = st_bufcpy;
69 } else if (myisspace(c)) {
70 state = st_wordstart;
71 } else if (c != *opptr++) {
72 state = st_wordskip;
73 }
74 break;
75
76 case st_wordskip:
77 if (myisspace(c))
78 state = st_wordstart;
79 break;
80
81 case st_bufcpy:
82 if (myisspace(c)) {
83 state = st_wordstart;
84 } else {
85 if (len < bufsize-1)
86 *bufptr++ = c;
87 len++;
88 }
89 break;
90 }
91 }
92
93 if (bufsize)
94 *bufptr = '\0';
95
96 return len;
97}
diff --git a/arch/i386/boot/code16gcc.h b/arch/i386/boot/code16gcc.h
new file mode 100644
index 000000000000..3bd848093b9d
--- /dev/null
+++ b/arch/i386/boot/code16gcc.h
@@ -0,0 +1,15 @@
1/*
2 * code16gcc.h
3 *
4 * This file is -include'd when compiling 16-bit C code.
5 * Note: this asm() needs to be emitted before gcc omits any code.
6 * Depending on gcc version, this requires -fno-unit-at-a-time or
7 * -fno-toplevel-reorder.
8 *
9 * Hopefully gcc will eventually have a real -m16 option so we can
10 * drop this hack long term.
11 */
12
13#ifndef __ASSEMBLY__
14asm(".code16gcc");
15#endif
diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile
index a661217f33ec..189fa1dbefcc 100644
--- a/arch/i386/boot/compressed/Makefile
+++ b/arch/i386/boot/compressed/Makefile
@@ -9,9 +9,14 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \
9EXTRA_AFLAGS := -traditional 9EXTRA_AFLAGS := -traditional
10 10
11LDFLAGS_vmlinux := -T 11LDFLAGS_vmlinux := -T
12CFLAGS_misc.o += -fPIC
13hostprogs-y := relocs 12hostprogs-y := relocs
14 13
14CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
15 -fno-strict-aliasing -fPIC \
16 $(call cc-option,-ffreestanding) \
17 $(call cc-option,-fno-stack-protector)
18LDFLAGS := -m elf_i386
19
15$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE 20$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
16 $(call if_changed,ld) 21 $(call if_changed,ld)
17 @: 22 @:
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
index 3517a32aaf41..f35ea2237522 100644
--- a/arch/i386/boot/compressed/head.S
+++ b/arch/i386/boot/compressed/head.S
@@ -45,10 +45,10 @@ startup_32:
45 * at and where we were actually loaded at. This can only be done 45 * at and where we were actually loaded at. This can only be done
46 * with a short local call on x86. Nothing else will tell us what 46 * with a short local call on x86. Nothing else will tell us what
47 * address we are running at. The reserved chunk of the real-mode 47 * address we are running at. The reserved chunk of the real-mode
48 * data at 0x34-0x3f are used as the stack for this calculation. 48 * data at 0x1e4 (defined as a scratch field) are used as the stack
49 * Only 4 bytes are needed. 49 * for this calculation. Only 4 bytes are needed.
50 */ 50 */
51 leal 0x40(%esi), %esp 51 leal (0x1e4+4)(%esi), %esp
52 call 1f 52 call 1f
531: popl %ebp 531: popl %ebp
54 subl $1b, %ebp 54 subl $1b, %ebp
diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c
index ce4fda261aaf..2d77ee728f92 100644
--- a/arch/i386/boot/compressed/relocs.c
+++ b/arch/i386/boot/compressed/relocs.c
@@ -31,6 +31,9 @@ static const char* safe_abs_relocs[] = {
31 "__kernel_rt_sigreturn", 31 "__kernel_rt_sigreturn",
32 "__kernel_sigreturn", 32 "__kernel_sigreturn",
33 "SYSENTER_RETURN", 33 "SYSENTER_RETURN",
34 "VDSO_NOTE_MASK",
35 "xen_irq_disable_direct_reloc",
36 "xen_save_fl_direct_reloc",
34}; 37};
35 38
36static int is_safe_abs_reloc(const char* sym_name) 39static int is_safe_abs_reloc(const char* sym_name)
diff --git a/arch/i386/boot/copy.S b/arch/i386/boot/copy.S
new file mode 100644
index 000000000000..ef127e56a3cf
--- /dev/null
+++ b/arch/i386/boot/copy.S
@@ -0,0 +1,101 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/copy.S
13 *
14 * Memory copy routines
15 */
16
17 .code16gcc
18 .text
19
20 .globl memcpy
21 .type memcpy, @function
22memcpy:
23 pushw %si
24 pushw %di
25 movw %ax, %di
26 movw %dx, %si
27 pushw %cx
28 shrw $2, %cx
29 rep; movsl
30 popw %cx
31 andw $3, %cx
32 rep; movsb
33 popw %di
34 popw %si
35 ret
36 .size memcpy, .-memcpy
37
38 .globl memset
39 .type memset, @function
40memset:
41 pushw %di
42 movw %ax, %di
43 movzbl %dl, %eax
44 imull $0x01010101,%eax
45 pushw %cx
46 shrw $2, %cx
47 rep; stosl
48 popw %cx
49 andw $3, %cx
50 rep; stosb
51 popw %di
52 ret
53 .size memset, .-memset
54
55 .globl copy_from_fs
56 .type copy_from_fs, @function
57copy_from_fs:
58 pushw %ds
59 pushw %fs
60 popw %ds
61 call memcpy
62 popw %ds
63 ret
64 .size copy_from_fs, .-copy_from_fs
65
66 .globl copy_to_fs
67 .type copy_to_fs, @function
68copy_to_fs:
69 pushw %es
70 pushw %fs
71 popw %es
72 call memcpy
73 popw %es
74 ret
75 .size copy_to_fs, .-copy_to_fs
76
77#if 0 /* Not currently used, but can be enabled as needed */
78
79 .globl copy_from_gs
80 .type copy_from_gs, @function
81copy_from_gs:
82 pushw %ds
83 pushw %gs
84 popw %ds
85 call memcpy
86 popw %ds
87 ret
88 .size copy_from_gs, .-copy_from_gs
89 .globl copy_to_gs
90
91 .type copy_to_gs, @function
92copy_to_gs:
93 pushw %es
94 pushw %gs
95 popw %es
96 call memcpy
97 popw %es
98 ret
99 .size copy_to_gs, .-copy_to_gs
100
101#endif
diff --git a/arch/i386/boot/cpu.c b/arch/i386/boot/cpu.c
new file mode 100644
index 000000000000..2a5c32da5852
--- /dev/null
+++ b/arch/i386/boot/cpu.c
@@ -0,0 +1,69 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/cpu.c
13 *
14 * Check for obligatory CPU features and abort if the features are not
15 * present.
16 */
17
18#include "boot.h"
19#include "bitops.h"
20#include <asm/cpufeature.h>
21
22static char *cpu_name(int level)
23{
24 static char buf[6];
25
26 if (level == 64) {
27 return "x86-64";
28 } else {
29 sprintf(buf, "i%d86", level);
30 return buf;
31 }
32}
33
34int validate_cpu(void)
35{
36 u32 *err_flags;
37 int cpu_level, req_level;
38
39 check_cpu(&cpu_level, &req_level, &err_flags);
40
41 if (cpu_level < req_level) {
42 printf("This kernel requires an %s CPU, ",
43 cpu_name(req_level));
44 printf("but only detected an %s CPU.\n",
45 cpu_name(cpu_level));
46 return -1;
47 }
48
49 if (err_flags) {
50 int i, j;
51 puts("This kernel requires the following features "
52 "not present on the CPU:\n");
53
54 for (i = 0; i < NCAPINTS; i++) {
55 u32 e = err_flags[i];
56
57 for (j = 0; j < 32; j++) {
58 if (e & 1)
59 printf("%d:%d ", i, j);
60
61 e >>= 1;
62 }
63 }
64 putchar('\n');
65 return -1;
66 } else {
67 return 0;
68 }
69}
diff --git a/arch/i386/boot/cpucheck.c b/arch/i386/boot/cpucheck.c
new file mode 100644
index 000000000000..991e8ceae1de
--- /dev/null
+++ b/arch/i386/boot/cpucheck.c
@@ -0,0 +1,267 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/cpucheck.c
13 *
14 * Check for obligatory CPU features and abort if the features are not
15 * present. This code should be compilable as 16-, 32- or 64-bit
16 * code, so be very careful with types and inline assembly.
17 *
18 * This code should not contain any messages; that requires an
19 * additional wrapper.
20 *
21 * As written, this code is not safe for inclusion into the kernel
22 * proper (after FPU initialization, in particular).
23 */
24
25#ifdef _SETUP
26# include "boot.h"
27# include "bitops.h"
28#endif
29#include <linux/types.h>
30#include <asm/cpufeature.h>
31#include <asm/processor-flags.h>
32#include <asm/required-features.h>
33#include <asm/msr-index.h>
34
35struct cpu_features {
36 int level; /* Family, or 64 for x86-64 */
37 int model;
38 u32 flags[NCAPINTS];
39};
40
41static struct cpu_features cpu;
42static u32 cpu_vendor[3];
43static u32 err_flags[NCAPINTS];
44
45#ifdef CONFIG_X86_64
46static const int req_level = 64;
47#elif defined(CONFIG_X86_MINIMUM_CPU_FAMILY)
48static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
49#else
50static const int req_level = 3;
51#endif
52
53static const u32 req_flags[NCAPINTS] =
54{
55 REQUIRED_MASK0,
56 REQUIRED_MASK1,
57 REQUIRED_MASK2,
58 REQUIRED_MASK3,
59 REQUIRED_MASK4,
60 REQUIRED_MASK5,
61 REQUIRED_MASK6,
62 REQUIRED_MASK7,
63};
64
65#define A32(a,b,c,d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
66
67static int is_amd(void)
68{
69 return cpu_vendor[0] == A32('A','u','t','h') &&
70 cpu_vendor[1] == A32('e','n','t','i') &&
71 cpu_vendor[2] == A32('c','A','M','D');
72}
73
74static int is_centaur(void)
75{
76 return cpu_vendor[0] == A32('C','e','n','t') &&
77 cpu_vendor[1] == A32('a','u','r','H') &&
78 cpu_vendor[2] == A32('a','u','l','s');
79}
80
81static int is_transmeta(void)
82{
83 return cpu_vendor[0] == A32('G','e','n','u') &&
84 cpu_vendor[1] == A32('i','n','e','T') &&
85 cpu_vendor[2] == A32('M','x','8','6');
86}
87
88static int has_fpu(void)
89{
90 u16 fcw = -1, fsw = -1;
91 u32 cr0;
92
93 asm("movl %%cr0,%0" : "=r" (cr0));
94 if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
95 cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
96 asm volatile("movl %0,%%cr0" : : "r" (cr0));
97 }
98
99 asm("fninit ; fnstsw %0 ; fnstcw %1" : "+m" (fsw), "+m" (fcw));
100
101 return fsw == 0 && (fcw & 0x103f) == 0x003f;
102}
103
104static int has_eflag(u32 mask)
105{
106 u32 f0, f1;
107
108 asm("pushfl ; "
109 "pushfl ; "
110 "popl %0 ; "
111 "movl %0,%1 ; "
112 "xorl %2,%1 ; "
113 "pushl %1 ; "
114 "popfl ; "
115 "pushfl ; "
116 "popl %1 ; "
117 "popfl"
118 : "=&r" (f0), "=&r" (f1)
119 : "ri" (mask));
120
121 return !!((f0^f1) & mask);
122}
123
124static void get_flags(void)
125{
126 u32 max_intel_level, max_amd_level;
127 u32 tfms;
128
129 if (has_fpu())
130 set_bit(X86_FEATURE_FPU, cpu.flags);
131
132 if (has_eflag(X86_EFLAGS_ID)) {
133 asm("cpuid"
134 : "=a" (max_intel_level),
135 "=b" (cpu_vendor[0]),
136 "=d" (cpu_vendor[1]),
137 "=c" (cpu_vendor[2])
138 : "a" (0));
139
140 if (max_intel_level >= 0x00000001 &&
141 max_intel_level <= 0x0000ffff) {
142 asm("cpuid"
143 : "=a" (tfms),
144 "=c" (cpu.flags[4]),
145 "=d" (cpu.flags[0])
146 : "a" (0x00000001)
147 : "ebx");
148 cpu.level = (tfms >> 8) & 15;
149 cpu.model = (tfms >> 4) & 15;
150 if (cpu.level >= 6)
151 cpu.model += ((tfms >> 16) & 0xf) << 4;
152 }
153
154 asm("cpuid"
155 : "=a" (max_amd_level)
156 : "a" (0x80000000)
157 : "ebx", "ecx", "edx");
158
159 if (max_amd_level >= 0x80000001 &&
160 max_amd_level <= 0x8000ffff) {
161 u32 eax = 0x80000001;
162 asm("cpuid"
163 : "+a" (eax),
164 "=c" (cpu.flags[6]),
165 "=d" (cpu.flags[1])
166 : : "ebx");
167 }
168 }
169}
170
171/* Returns a bitmask of which words we have error bits in */
172static int check_flags(void)
173{
174 u32 err;
175 int i;
176
177 err = 0;
178 for (i = 0; i < NCAPINTS; i++) {
179 err_flags[i] = req_flags[i] & ~cpu.flags[i];
180 if (err_flags[i])
181 err |= 1 << i;
182 }
183
184 return err;
185}
186
187/*
188 * Returns -1 on error.
189 *
190 * *cpu_level is set to the current CPU level; *req_level to the required
191 * level. x86-64 is considered level 64 for this purpose.
192 *
193 * *err_flags_ptr is set to the flags error array if there are flags missing.
194 */
195int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
196{
197 int err;
198
199 memset(&cpu.flags, 0, sizeof cpu.flags);
200 cpu.level = 3;
201
202 if (has_eflag(X86_EFLAGS_AC))
203 cpu.level = 4;
204
205 get_flags();
206 err = check_flags();
207
208 if (test_bit(X86_FEATURE_LM, cpu.flags))
209 cpu.level = 64;
210
211 if (err == 0x01 &&
212 !(err_flags[0] &
213 ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) &&
214 is_amd()) {
215 /* If this is an AMD and we're only missing SSE+SSE2, try to
216 turn them on */
217
218 u32 ecx = MSR_K7_HWCR;
219 u32 eax, edx;
220
221 asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
222 eax &= ~(1 << 15);
223 asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
224
225 get_flags(); /* Make sure it really did something */
226 err = check_flags();
227 } else if (err == 0x01 &&
228 !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
229 is_centaur() && cpu.model >= 6) {
230 /* If this is a VIA C3, we might have to enable CX8
231 explicitly */
232
233 u32 ecx = MSR_VIA_FCR;
234 u32 eax, edx;
235
236 asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
237 eax |= (1<<1)|(1<<7);
238 asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
239
240 set_bit(X86_FEATURE_CX8, cpu.flags);
241 err = check_flags();
242 } else if (err == 0x01 && is_transmeta()) {
243 /* Transmeta might have masked feature bits in word 0 */
244
245 u32 ecx = 0x80860004;
246 u32 eax, edx;
247 u32 level = 1;
248
249 asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
250 asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
251 asm("cpuid"
252 : "+a" (level), "=d" (cpu.flags[0])
253 : : "ecx", "ebx");
254 asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
255
256 err = check_flags();
257 }
258
259 if (err_flags_ptr)
260 *err_flags_ptr = err ? err_flags : NULL;
261 if (cpu_level_ptr)
262 *cpu_level_ptr = cpu.level;
263 if (req_level_ptr)
264 *req_level_ptr = req_level;
265
266 return (cpu.level < req_level || err) ? -1 : 0;
267}
diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
deleted file mode 100644
index 34321368011a..000000000000
--- a/arch/i386/boot/edd.S
+++ /dev/null
@@ -1,231 +0,0 @@
1/*
2 * BIOS Enhanced Disk Drive support
3 * Copyright (C) 2002, 2003, 2004 Dell, Inc.
4 * by Matt Domsch <Matt_Domsch@dell.com> October 2002
5 * conformant to T13 Committee www.t13.org
6 * projects 1572D, 1484D, 1386D, 1226DT
7 * disk signature read by Matt Domsch <Matt_Domsch@dell.com>
8 * and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004
9 * legacy CHS retrieval by Patrick J. LoPresti <patl@users.sourceforge.net>
10 * March 2004
11 * Command line option parsing, Matt Domsch, November 2004
12 */
13
14#include <linux/edd.h>
15#include <asm/setup.h>
16
17#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
18
19# It is assumed that %ds == INITSEG here
20
21 movb $0, (EDD_MBR_SIG_NR_BUF)
22 movb $0, (EDDNR)
23
24# Check the command line for options:
25# edd=of disables EDD completely (edd=off)
26# edd=sk skips the MBR test (edd=skipmbr)
27# edd=on re-enables EDD (edd=on)
28
29 pushl %esi
30 movw $edd_mbr_sig_start, %di # Default to edd=on
31
32 movl %cs:(cmd_line_ptr), %esi
33 andl %esi, %esi
34 jz old_cl # Old boot protocol?
35
36# Convert to a real-mode pointer in fs:si
37 movl %esi, %eax
38 shrl $4, %eax
39 movw %ax, %fs
40 andw $0xf, %si
41 jmp have_cl_pointer
42
43# Old-style boot protocol?
44old_cl:
45 push %ds # aka INITSEG
46 pop %fs
47
48 cmpw $0xa33f, (0x20)
49 jne done_cl # No command line at all?
50 movw (0x22), %si # Pointer relative to INITSEG
51
52# fs:si has the pointer to the command line now
53have_cl_pointer:
54
55# Loop through kernel command line one byte at a time. Just in
56# case the loader is buggy and failed to null-terminate the command line
57# terminate if we get close enough to the end of the segment that we
58# cannot fit "edd=XX"...
59cl_atspace:
60 cmpw $-5, %si # Watch for segment wraparound
61 jae done_cl
62 movl %fs:(%si), %eax
63 andb %al, %al # End of line?
64 jz done_cl
65 cmpl $EDD_CL_EQUALS, %eax
66 jz found_edd_equals
67 cmpb $0x20, %al # <= space consider whitespace
68 ja cl_skipword
69 incw %si
70 jmp cl_atspace
71
72cl_skipword:
73 cmpw $-5, %si # Watch for segment wraparound
74 jae done_cl
75 movb %fs:(%si), %al # End of string?
76 andb %al, %al
77 jz done_cl
78 cmpb $0x20, %al
79 jbe cl_atspace
80 incw %si
81 jmp cl_skipword
82
83found_edd_equals:
84# only looking at first two characters after equals
85# late overrides early on the command line, so keep going after finding something
86 movw %fs:4(%si), %ax
87 cmpw $EDD_CL_OFF, %ax # edd=of
88 je do_edd_off
89 cmpw $EDD_CL_SKIP, %ax # edd=sk
90 je do_edd_skipmbr
91 cmpw $EDD_CL_ON, %ax # edd=on
92 je do_edd_on
93 jmp cl_skipword
94do_edd_skipmbr:
95 movw $edd_start, %di
96 jmp cl_skipword
97do_edd_off:
98 movw $edd_done, %di
99 jmp cl_skipword
100do_edd_on:
101 movw $edd_mbr_sig_start, %di
102 jmp cl_skipword
103
104done_cl:
105 popl %esi
106 jmpw *%di
107
108# Read the first sector of each BIOS disk device and store the 4-byte signature
109edd_mbr_sig_start:
110 movb $0x80, %dl # from device 80
111 movw $EDD_MBR_SIG_BUF, %bx # store buffer ptr in bx
112edd_mbr_sig_read:
113 movl $0xFFFFFFFF, %eax
114 movl %eax, (%bx) # assume failure
115 pushw %bx
116 movb $READ_SECTORS, %ah
117 movb $1, %al # read 1 sector
118 movb $0, %dh # at head 0
119 movw $1, %cx # cylinder 0, sector 0
120 pushw %es
121 pushw %ds
122 popw %es
123 movw $EDDBUF, %bx # disk's data goes into EDDBUF
124 pushw %dx # work around buggy BIOSes
125 stc # work around buggy BIOSes
126 int $0x13
127 sti # work around buggy BIOSes
128 popw %dx
129 popw %es
130 popw %bx
131 jc edd_mbr_sig_done # on failure, we're done.
132 cmpb $0, %ah # some BIOSes do not set CF
133 jne edd_mbr_sig_done # on failure, we're done.
134 movl (EDDBUF+EDD_MBR_SIG_OFFSET), %eax # read sig out of the MBR
135 movl %eax, (%bx) # store success
136 incb (EDD_MBR_SIG_NR_BUF) # note that we stored something
137 incb %dl # increment to next device
138 addw $4, %bx # increment sig buffer ptr
139 cmpb $EDD_MBR_SIG_MAX, (EDD_MBR_SIG_NR_BUF) # Out of space?
140 jb edd_mbr_sig_read # keep looping
141edd_mbr_sig_done:
142
143# Do the BIOS Enhanced Disk Drive calls
144# This consists of two calls:
145# int 13h ah=41h "Check Extensions Present"
146# int 13h ah=48h "Get Device Parameters"
147# int 13h ah=08h "Legacy Get Device Parameters"
148#
149# A buffer of size EDDMAXNR*(EDDEXTSIZE+EDDPARMSIZE) is reserved for our use
150# in the boot_params at EDDBUF. The first four bytes of which are
151# used to store the device number, interface support map and version
152# results from fn41. The next four bytes are used to store the legacy
153# cylinders, heads, and sectors from fn08. The following 74 bytes are used to
154# store the results from fn48. Starting from device 80h, fn41, then fn48
155# are called and their results stored in EDDBUF+n*(EDDEXTSIZE+EDDPARMIZE).
156# Then the pointer is incremented to store the data for the next call.
157# This repeats until either a device doesn't exist, or until EDDMAXNR
158# devices have been stored.
159# The one tricky part is that ds:si always points EDDEXTSIZE bytes into
160# the structure, and the fn41 and fn08 results are stored at offsets
161# from there. This removes the need to increment the pointer for
162# every store, and leaves it ready for the fn48 call.
163# A second one-byte buffer, EDDNR, in the boot_params stores
164# the number of BIOS devices which exist, up to EDDMAXNR.
165# In setup.c, copy_edd() stores both boot_params buffers away
166# for later use, as they would get overwritten otherwise.
167# This code is sensitive to the size of the structs in edd.h
168edd_start:
169 # %ds points to the bootsector
170 # result buffer for fn48
171 movw $EDDBUF+EDDEXTSIZE, %si # in ds:si, fn41 results
172 # kept just before that
173 movb $0x80, %dl # BIOS device 0x80
174
175edd_check_ext:
176 movb $CHECKEXTENSIONSPRESENT, %ah # Function 41
177 movw $EDDMAGIC1, %bx # magic
178 int $0x13 # make the call
179 jc edd_done # no more BIOS devices
180
181 cmpw $EDDMAGIC2, %bx # is magic right?
182 jne edd_next # nope, next...
183
184 movb %dl, %ds:-8(%si) # store device number
185 movb %ah, %ds:-7(%si) # store version
186 movw %cx, %ds:-6(%si) # store extensions
187 incb (EDDNR) # note that we stored something
188
189edd_get_device_params:
190 movw $EDDPARMSIZE, %ds:(%si) # put size
191 movw $0x0, %ds:2(%si) # work around buggy BIOSes
192 movb $GETDEVICEPARAMETERS, %ah # Function 48
193 int $0x13 # make the call
194 # Don't check for fail return
195 # it doesn't matter.
196edd_get_legacy_chs:
197 xorw %ax, %ax
198 movw %ax, %ds:-4(%si)
199 movw %ax, %ds:-2(%si)
200 # Ralf Brown's Interrupt List says to set ES:DI to
201 # 0000h:0000h "to guard against BIOS bugs"
202 pushw %es
203 movw %ax, %es
204 movw %ax, %di
205 pushw %dx # legacy call clobbers %dl
206 movb $LEGACYGETDEVICEPARAMETERS, %ah # Function 08
207 int $0x13 # make the call
208 jc edd_legacy_done # failed
209 movb %cl, %al # Low 6 bits are max
210 andb $0x3F, %al # sector number
211 movb %al, %ds:-1(%si) # Record max sect
212 movb %dh, %ds:-2(%si) # Record max head number
213 movb %ch, %al # Low 8 bits of max cyl
214 shr $6, %cl
215 movb %cl, %ah # High 2 bits of max cyl
216 movw %ax, %ds:-4(%si)
217
218edd_legacy_done:
219 popw %dx
220 popw %es
221 movw %si, %ax # increment si
222 addw $EDDPARMSIZE+EDDEXTSIZE, %ax
223 movw %ax, %si
224
225edd_next:
226 incb %dl # increment to next device
227 cmpb $EDDMAXNR, (EDDNR) # Out of space?
228 jb edd_check_ext # keep looping
229
230edd_done:
231#endif
diff --git a/arch/i386/boot/edd.c b/arch/i386/boot/edd.c
new file mode 100644
index 000000000000..25a282494f4c
--- /dev/null
+++ b/arch/i386/boot/edd.c
@@ -0,0 +1,196 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/edd.c
13 *
14 * Get EDD BIOS disk information
15 */
16
17#include "boot.h"
18#include <linux/edd.h>
19
20#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
21
22struct edd_dapa {
23 u8 pkt_size;
24 u8 rsvd;
25 u16 sector_cnt;
26 u16 buf_off, buf_seg;
27 u64 lba;
28 u64 buf_lin_addr;
29};
30
31/*
32 * Read the MBR (first sector) from a specific device.
33 */
34static int read_mbr(u8 devno, void *buf)
35{
36 struct edd_dapa dapa;
37 u16 ax, bx, cx, dx, si;
38
39 memset(&dapa, 0, sizeof dapa);
40 dapa.pkt_size = sizeof(dapa);
41 dapa.sector_cnt = 1;
42 dapa.buf_off = (size_t)buf;
43 dapa.buf_seg = ds();
44 /* dapa.lba = 0; */
45
46 ax = 0x4200; /* Extended Read */
47 si = (size_t)&dapa;
48 dx = devno;
49 asm("pushfl; stc; int $0x13; setc %%al; popfl"
50 : "+a" (ax), "+S" (si), "+d" (dx)
51 : "m" (dapa)
52 : "ebx", "ecx", "edi", "memory");
53
54 if (!(u8)ax)
55 return 0; /* OK */
56
57 ax = 0x0201; /* Legacy Read, one sector */
58 cx = 0x0001; /* Sector 0-0-1 */
59 dx = devno;
60 bx = (size_t)buf;
61 asm("pushfl; stc; int $0x13; setc %%al; popfl"
62 : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
63 : : "esi", "edi", "memory");
64
65 return -(u8)ax; /* 0 or -1 */
66}
67
68static u32 read_mbr_sig(u8 devno, struct edd_info *ei)
69{
70 int sector_size;
71 char *mbrbuf_ptr, *mbrbuf_end;
72 u32 mbrsig;
73 u32 buf_base, mbr_base;
74 extern char _end[];
75 static char mbr_buf[1024];
76
77 sector_size = ei->params.bytes_per_sector;
78 if (!sector_size)
79 sector_size = 512; /* Best available guess */
80
81 buf_base = (ds() << 4) + (u32)&_end;
82 mbr_base = (buf_base+sector_size-1) & ~(sector_size-1);
83 mbrbuf_ptr = mbr_buf + (mbr_base-buf_base);
84 mbrbuf_end = mbrbuf_ptr + sector_size;
85
86 if (!(boot_params.hdr.loadflags & CAN_USE_HEAP))
87 return 0;
88 if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
89 return 0;
90
91 if (read_mbr(devno, mbrbuf_ptr))
92 return 0;
93
94 mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
95 return mbrsig;
96}
97
98static int get_edd_info(u8 devno, struct edd_info *ei)
99{
100 u16 ax, bx, cx, dx, di;
101
102 memset(ei, 0, sizeof *ei);
103
104 /* Check Extensions Present */
105
106 ax = 0x4100;
107 bx = EDDMAGIC1;
108 dx = devno;
109 asm("pushfl; stc; int $0x13; setc %%al; popfl"
110 : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx)
111 : : "esi", "edi");
112
113 if ((u8)ax)
114 return -1; /* No extended information */
115
116 if (bx != EDDMAGIC2)
117 return -1;
118
119 ei->device = devno;
120 ei->version = ax >> 8; /* EDD version number */
121 ei->interface_support = cx; /* EDD functionality subsets */
122
123 /* Extended Get Device Parameters */
124
125 ei->params.length = sizeof(ei->params);
126 ax = 0x4800;
127 dx = devno;
128 asm("pushfl; int $0x13; popfl"
129 : "+a" (ax), "+d" (dx)
130 : "S" (&ei->params)
131 : "ebx", "ecx", "edi");
132
133 /* Get legacy CHS parameters */
134
135 /* Ralf Brown recommends setting ES:DI to 0:0 */
136 ax = 0x0800;
137 dx = devno;
138 di = 0;
139 asm("pushw %%es; "
140 "movw %%di,%%es; "
141 "pushfl; stc; int $0x13; setc %%al; popfl; "
142 "popw %%es"
143 : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di)
144 : : "esi");
145
146 if ((u8)ax == 0) {
147 ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2);
148 ei->legacy_max_head = dx >> 8;
149 ei->legacy_sectors_per_track = cx & 0x3f;
150 }
151
152 return 0;
153}
154
155void query_edd(void)
156{
157 char eddarg[8];
158 int do_mbr = 1;
159 int do_edd = 1;
160 int devno;
161 struct edd_info ei, *edp;
162
163 if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
164 if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip"))
165 do_mbr = 0;
166 else if (!strcmp(eddarg, "off"))
167 do_edd = 0;
168 }
169
170 edp = (struct edd_info *)boot_params.eddbuf;
171
172 if (!do_edd)
173 return;
174
175 for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
176 /*
177 * Scan the BIOS-supported hard disks and query EDD
178 * information...
179 */
180 get_edd_info(devno, &ei);
181
182 if (boot_params.eddbuf_entries < EDDMAXNR) {
183 memcpy(edp, &ei, sizeof ei);
184 edp++;
185 boot_params.eddbuf_entries++;
186 }
187
188 if (do_mbr) {
189 u32 mbr_sig;
190 mbr_sig = read_mbr_sig(devno, &ei);
191 boot_params.edd_mbr_sig_buffer[devno-0x80] = mbr_sig;
192 }
193 }
194}
195
196#endif
diff --git a/arch/i386/boot/header.S b/arch/i386/boot/header.S
new file mode 100644
index 000000000000..6b9923fb6eae
--- /dev/null
+++ b/arch/i386/boot/header.S
@@ -0,0 +1,283 @@
1/*
2 * header.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * Based on bootsect.S and setup.S
7 * modified by more people than can be counted
8 *
9 * Rewritten as a common file by H. Peter Anvin (Apr 2007)
10 *
11 * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
12 * addresses must be multiplied by 16 to obtain their respective linear
13 * addresses. To avoid confusion, linear addresses are written using leading
14 * hex while segment addresses are written as segment:offset.
15 *
16 */
17
18#include <asm/segment.h>
19#include <linux/utsrelease.h>
20#include <asm/boot.h>
21#include <asm/e820.h>
22#include <asm/page.h>
23#include <asm/setup.h>
24#include "boot.h"
25
26SETUPSECTS = 4 /* default nr of setup-sectors */
27BOOTSEG = 0x07C0 /* original address of boot-sector */
28SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
29SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
30 /* to be loaded */
31ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
32SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
33
34#ifndef SVGA_MODE
35#define SVGA_MODE ASK_VGA
36#endif
37
38#ifndef RAMDISK
39#define RAMDISK 0
40#endif
41
42#ifndef ROOT_RDONLY
43#define ROOT_RDONLY 1
44#endif
45
46 .code16
47 .section ".bstext", "ax"
48
49 .global bootsect_start
50bootsect_start:
51
52 # Normalize the start address
53 ljmp $BOOTSEG, $start2
54
55start2:
56 movw %cs, %ax
57 movw %ax, %ds
58 movw %ax, %es
59 movw %ax, %ss
60 xorw %sp, %sp
61 sti
62 cld
63
64 movw $bugger_off_msg, %si
65
66msg_loop:
67 lodsb
68 andb %al, %al
69 jz bs_die
70 movb $0xe, %ah
71 movw $7, %bx
72 int $0x10
73 jmp msg_loop
74
75bs_die:
76 # Allow the user to press a key, then reboot
77 xorw %ax, %ax
78 int $0x16
79 int $0x19
80
81 # int 0x19 should never return. In case it does anyway,
82 # invoke the BIOS reset code...
83 ljmp $0xf000,$0xfff0
84
85 .section ".bsdata", "a"
86bugger_off_msg:
87 .ascii "Direct booting from floppy is no longer supported.\r\n"
88 .ascii "Please use a boot loader program instead.\r\n"
89 .ascii "\n"
90 .ascii "Remove disk and press any key to reboot . . .\r\n"
91 .byte 0
92
93
94 # Kernel attributes; used by setup. This is part 1 of the
95 # header, from the old boot sector.
96
97 .section ".header", "a"
98 .globl hdr
99hdr:
100setup_sects: .byte SETUPSECTS
101root_flags: .word ROOT_RDONLY
102syssize: .long SYSSIZE
103ram_size: .word RAMDISK
104vid_mode: .word SVGA_MODE
105root_dev: .word ROOT_DEV
106boot_flag: .word 0xAA55
107
108 # offset 512, entry point
109
110 .globl _start
111_start:
112 # Explicitly enter this as bytes, or the assembler
113 # tries to generate a 3-byte jump here, which causes
114 # everything else to push off to the wrong offset.
115 .byte 0xeb # short (2-byte) jump
116 .byte start_of_setup-1f
1171:
118
119 # Part 2 of the header, from the old setup.S
120
121 .ascii "HdrS" # header signature
122 .word 0x0206 # header version number (>= 0x0105)
123 # or else old loadlin-1.5 will fail)
124 .globl realmode_swtch
125realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
126start_sys_seg: .word SYSSEG
127 .word kernel_version-512 # pointing to kernel version string
128 # above section of header is compatible
129 # with loadlin-1.5 (header v1.5). Don't
130 # change it.
131
132type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin,
133 # Bootlin, SYSLX, bootsect...)
134 # See Documentation/i386/boot.txt for
135 # assigned ids
136
137# flags, unused bits must be zero (RFU) bit within loadflags
138loadflags:
139LOADED_HIGH = 1 # If set, the kernel is loaded high
140CAN_USE_HEAP = 0x80 # If set, the loader also has set
141 # heap_end_ptr to tell how much
142 # space behind setup.S can be used for
143 # heap purposes.
144 # Only the loader knows what is free
145#ifndef __BIG_KERNEL__
146 .byte 0
147#else
148 .byte LOADED_HIGH
149#endif
150
151setup_move_size: .word 0x8000 # size to move, when setup is not
152 # loaded at 0x90000. We will move setup
153 # to 0x90000 then just before jumping
154 # into the kernel. However, only the
155 # loader knows how much data behind
156 # us also needs to be loaded.
157
158code32_start: # here loaders can put a different
159 # start address for 32-bit code.
160#ifndef __BIG_KERNEL__
161 .long 0x1000 # 0x1000 = default for zImage
162#else
163 .long 0x100000 # 0x100000 = default for big kernel
164#endif
165
166ramdisk_image: .long 0 # address of loaded ramdisk image
167 # Here the loader puts the 32-bit
168 # address where it loaded the image.
169 # This only will be read by the kernel.
170
171ramdisk_size: .long 0 # its size in bytes
172
173bootsect_kludge:
174 .long 0 # obsolete
175
176heap_end_ptr: .word _end+1024 # (Header version 0x0201 or later)
177 # space from here (exclusive) down to
178 # end of setup code can be used by setup
179 # for local heap purposes.
180
181pad1: .word 0
182cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
183 # If nonzero, a 32-bit pointer
184 # to the kernel command line.
185 # The command line should be
186 # located between the start of
187 # setup and the end of low
188 # memory (0xa0000), or it may
189 # get overwritten before it
190 # gets read. If this field is
191 # used, there is no longer
192 # anything magical about the
193 # 0x90000 segment; the setup
194 # can be located anywhere in
195 # low memory 0x10000 or higher.
196
197ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
198 # (Header version 0x0203 or later)
199 # The highest safe address for
200 # the contents of an initrd
201
202kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
203 #required for protected mode
204 #kernel
205#ifdef CONFIG_RELOCATABLE
206relocatable_kernel: .byte 1
207#else
208relocatable_kernel: .byte 0
209#endif
210pad2: .byte 0
211pad3: .word 0
212
213cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
214 #added with boot protocol
215 #version 2.06
216
217# End of setup header #####################################################
218
219 .section ".inittext", "ax"
220start_of_setup:
221#ifdef SAFE_RESET_DISK_CONTROLLER
222# Reset the disk controller.
223 movw $0x0000, %ax # Reset disk controller
224 movb $0x80, %dl # All disks
225 int $0x13
226#endif
227
228# We will have entired with %cs = %ds+0x20, normalize %cs so
229# it is on par with the other segments.
230 pushw %ds
231 pushw $setup2
232 lretw
233
234setup2:
235# Force %es = %ds
236 movw %ds, %ax
237 movw %ax, %es
238 cld
239
240# Stack paranoia: align the stack and make sure it is good
241# for both 16- and 32-bit references. In particular, if we
242# were meant to have been using the full 16-bit segment, the
243# caller might have set %sp to zero, which breaks %esp-based
244# references.
245 andw $~3, %sp # dword align (might as well...)
246 jnz 1f
247 movw $0xfffc, %sp # Make sure we're not zero
2481: movzwl %sp, %esp # Clear upper half of %esp
249 sti
250
251# Check signature at end of setup
252 cmpl $0x5a5aaa55, setup_sig
253 jne setup_bad
254
255# Zero the bss
256 movw $__bss_start, %di
257 movw $_end+3, %cx
258 xorl %eax, %eax
259 subw %di, %cx
260 shrw $2, %cx
261 rep; stosl
262
263# Jump to C code (should not return)
264 calll main
265
266# Setup corrupt somehow...
267setup_bad:
268 movl $setup_corrupt, %eax
269 calll puts
270 # Fall through...
271
272 .globl die
273 .type die, @function
274die:
275 hlt
276 jmp die
277
278 .size die, .-due
279
280 .section ".initdata", "a"
281setup_corrupt:
282 .byte 7
283 .string "No setup signature found..."
diff --git a/arch/i386/boot/main.c b/arch/i386/boot/main.c
new file mode 100644
index 000000000000..7f01f96c4fb8
--- /dev/null
+++ b/arch/i386/boot/main.c
@@ -0,0 +1,161 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/main.c
13 *
14 * Main module for the real-mode kernel code
15 */
16
17#include "boot.h"
18
19struct boot_params boot_params __attribute__((aligned(16)));
20
21char *HEAP = _end;
22char *heap_end = _end; /* Default end of heap = no heap */
23
24/*
25 * Copy the header into the boot parameter block. Since this
26 * screws up the old-style command line protocol, adjust by
27 * filling in the new-style command line pointer instead.
28 */
29#define OLD_CL_MAGIC 0xA33F
30#define OLD_CL_ADDRESS 0x20
31
32static void copy_boot_params(void)
33{
34 struct old_cmdline {
35 u16 cl_magic;
36 u16 cl_offset;
37 };
38 const struct old_cmdline * const oldcmd =
39 (const struct old_cmdline *)OLD_CL_ADDRESS;
40
41 BUILD_BUG_ON(sizeof boot_params != 4096);
42 memcpy(&boot_params.hdr, &hdr, sizeof hdr);
43
44 if (!boot_params.hdr.cmd_line_ptr &&
45 oldcmd->cl_magic == OLD_CL_MAGIC) {
46 /* Old-style command line protocol. */
47 u16 cmdline_seg;
48
49 /* Figure out if the command line falls in the region
50 of memory that an old kernel would have copied up
51 to 0x90000... */
52 if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
53 cmdline_seg = ds();
54 else
55 cmdline_seg = 0x9000;
56
57 boot_params.hdr.cmd_line_ptr =
58 (cmdline_seg << 4) + oldcmd->cl_offset;
59 }
60}
61
62/*
63 * Set the keyboard repeat rate to maximum. Unclear why this
64 * is done here; this might be possible to kill off as stale code.
65 */
66static void keyboard_set_repeat(void)
67{
68 u16 ax = 0x0305;
69 u16 bx = 0;
70 asm volatile("int $0x16"
71 : "+a" (ax), "+b" (bx)
72 : : "ecx", "edx", "esi", "edi");
73}
74
75/*
76 * Get Intel SpeedStep IST information.
77 */
78static void query_speedstep_ist(void)
79{
80 asm("int $0x15"
81 : "=a" (boot_params.speedstep_info[0]),
82 "=b" (boot_params.speedstep_info[1]),
83 "=c" (boot_params.speedstep_info[2]),
84 "=d" (boot_params.speedstep_info[3])
85 : "a" (0x0000e980), /* IST Support */
86 "d" (0x47534943)); /* Request value */
87}
88
89/*
90 * Tell the BIOS what CPU mode we intend to run in.
91 */
92static void set_bios_mode(void)
93{
94#ifdef CONFIG_X86_64
95 u32 eax, ebx;
96
97 eax = 0xec00;
98 ebx = 2;
99 asm volatile("int $0x15"
100 : "+a" (eax), "+b" (ebx)
101 : : "ecx", "edx", "esi", "edi");
102#endif
103}
104
105void main(void)
106{
107 /* First, copy the boot header into the "zeropage" */
108 copy_boot_params();
109
110 /* End of heap check */
111 if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
112 heap_end = (char *)(boot_params.hdr.heap_end_ptr
113 +0x200-STACK_SIZE);
114 } else {
115 /* Boot protocol 2.00 only, no heap available */
116 puts("WARNING: Ancient bootloader, some functionality "
117 "may be limited!\n");
118 }
119
120 /* Make sure we have all the proper CPU support */
121 if (validate_cpu()) {
122 puts("Unable to boot - please use a kernel appropriate "
123 "for your CPU.\n");
124 die();
125 }
126
127 /* Tell the BIOS what CPU mode we intend to run in. */
128 set_bios_mode();
129
130 /* Detect memory layout */
131 detect_memory();
132
133 /* Set keyboard repeat rate (why?) */
134 keyboard_set_repeat();
135
136 /* Set the video mode */
137 set_video();
138
139 /* Query MCA information */
140 query_mca();
141
142 /* Voyager */
143#ifdef CONFIG_X86_VOYAGER
144 query_voyager();
145#endif
146
147 /* Query SpeedStep IST information */
148 query_speedstep_ist();
149
150 /* Query APM information */
151#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
152 query_apm_bios();
153#endif
154
155 /* Query EDD information */
156#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
157 query_edd();
158#endif
159 /* Do the last things and invoke protected mode */
160 go_to_protected_mode();
161}
diff --git a/arch/i386/boot/mca.c b/arch/i386/boot/mca.c
new file mode 100644
index 000000000000..68222f2d4b67
--- /dev/null
+++ b/arch/i386/boot/mca.c
@@ -0,0 +1,43 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/mca.c
13 *
14 * Get the MCA system description table
15 */
16
17#include "boot.h"
18
19int query_mca(void)
20{
21 u8 err;
22 u16 es, bx, len;
23
24 asm("pushw %%es ; "
25 "int $0x15 ; "
26 "setc %0 ; "
27 "movw %%es, %1 ; "
28 "popw %%es"
29 : "=acd" (err), "=acdSD" (es), "=b" (bx)
30 : "a" (0xc000));
31
32 if (err)
33 return -1; /* No MCA present */
34
35 set_fs(es);
36 len = rdfs16(bx);
37
38 if (len > sizeof(boot_params.sys_desc_table))
39 len = sizeof(boot_params.sys_desc_table);
40
41 copy_from_fs(&boot_params.sys_desc_table, bx, len);
42 return 0;
43}
diff --git a/arch/i386/boot/memory.c b/arch/i386/boot/memory.c
new file mode 100644
index 000000000000..1a2e62db8bed
--- /dev/null
+++ b/arch/i386/boot/memory.c
@@ -0,0 +1,99 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/memory.c
13 *
14 * Memory detection code
15 */
16
17#include "boot.h"
18
19#define SMAP 0x534d4150 /* ASCII "SMAP" */
20
21static int detect_memory_e820(void)
22{
23 u32 next = 0;
24 u32 size, id;
25 u8 err;
26 struct e820entry *desc = boot_params.e820_map;
27
28 do {
29 size = sizeof(struct e820entry);
30 id = SMAP;
31 asm("int $0x15; setc %0"
32 : "=am" (err), "+b" (next), "+d" (id), "+c" (size),
33 "=m" (*desc)
34 : "D" (desc), "a" (0xe820));
35
36 if (err || id != SMAP)
37 break;
38
39 boot_params.e820_entries++;
40 desc++;
41 } while (next && boot_params.e820_entries < E820MAX);
42
43 return boot_params.e820_entries;
44}
45
46static int detect_memory_e801(void)
47{
48 u16 ax, bx, cx, dx;
49 u8 err;
50
51 bx = cx = dx = 0;
52 ax = 0xe801;
53 asm("stc; int $0x15; setc %0"
54 : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx));
55
56 if (err)
57 return -1;
58
59 /* Do we really need to do this? */
60 if (cx || dx) {
61 ax = cx;
62 bx = dx;
63 }
64
65 if (ax > 15*1024)
66 return -1; /* Bogus! */
67
68 /* This ignores memory above 16MB if we have a memory hole
69 there. If someone actually finds a machine with a memory
70 hole at 16MB and no support for 0E820h they should probably
71 generate a fake e820 map. */
72 boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax;
73
74 return 0;
75}
76
77static int detect_memory_88(void)
78{
79 u16 ax;
80 u8 err;
81
82 ax = 0x8800;
83 asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax));
84
85 boot_params.screen_info.ext_mem_k = ax;
86
87 return -err;
88}
89
90int detect_memory(void)
91{
92 if (detect_memory_e820() > 0)
93 return 0;
94
95 if (!detect_memory_e801())
96 return 0;
97
98 return detect_memory_88();
99}
diff --git a/arch/i386/boot/pm.c b/arch/i386/boot/pm.c
new file mode 100644
index 000000000000..1df025c73261
--- /dev/null
+++ b/arch/i386/boot/pm.c
@@ -0,0 +1,170 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/pm.c
13 *
14 * Prepare the machine for transition to protected mode.
15 */
16
17#include "boot.h"
18#include <asm/segment.h>
19
20/*
21 * Invoke the realmode switch hook if present; otherwise
22 * disable all interrupts.
23 */
24static void realmode_switch_hook(void)
25{
26 if (boot_params.hdr.realmode_swtch) {
27 asm volatile("lcallw *%0"
28 : : "m" (boot_params.hdr.realmode_swtch)
29 : "eax", "ebx", "ecx", "edx");
30 } else {
31 asm volatile("cli");
32 outb(0x80, 0x70); /* Disable NMI */
33 io_delay();
34 }
35}
36
37/*
38 * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
39 * A bzImage kernel is loaded and runs at 0x100000.
40 */
41static void move_kernel_around(void)
42{
43 /* Note: rely on the compile-time option here rather than
44 the LOADED_HIGH flag. The Qemu kernel loader unconditionally
45 sets the loadflags to zero. */
46#ifndef __BIG_KERNEL__
47 u16 dst_seg, src_seg;
48 u32 syssize;
49
50 dst_seg = 0x1000 >> 4;
51 src_seg = 0x10000 >> 4;
52 syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
53
54 while (syssize) {
55 int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
56 int dwords = paras << 2;
57
58 asm volatile("pushw %%es ; "
59 "pushw %%ds ; "
60 "movw %1,%%es ; "
61 "movw %2,%%ds ; "
62 "xorw %%di,%%di ; "
63 "xorw %%si,%%si ; "
64 "rep;movsl ; "
65 "popw %%ds ; "
66 "popw %%es"
67 : "+c" (dwords)
68 : "r" (dst_seg), "r" (src_seg)
69 : "esi", "edi");
70
71 syssize -= paras;
72 dst_seg += paras;
73 src_seg += paras;
74 }
75#endif
76}
77
78/*
79 * Disable all interrupts at the legacy PIC.
80 */
81static void mask_all_interrupts(void)
82{
83 outb(0xff, 0xa1); /* Mask all interrupts on the seconday PIC */
84 io_delay();
85 outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
86 io_delay();
87}
88
89/*
90 * Reset IGNNE# if asserted in the FPU.
91 */
92static void reset_coprocessor(void)
93{
94 outb(0, 0xf0);
95 io_delay();
96 outb(0, 0xf1);
97 io_delay();
98}
99
100/*
101 * Set up the GDT
102 */
103#define GDT_ENTRY(flags,base,limit) \
104 (((u64)(base & 0xff000000) << 32) | \
105 ((u64)flags << 40) | \
106 ((u64)(limit & 0x00ff0000) << 32) | \
107 ((u64)(base & 0x00ffff00) << 16) | \
108 ((u64)(limit & 0x0000ffff)))
109
110struct gdt_ptr {
111 u16 len;
112 u32 ptr;
113} __attribute__((packed));
114
115static void setup_gdt(void)
116{
117 /* There are machines which are known to not boot with the GDT
118 being 8-byte unaligned. Intel recommends 16 byte alignment. */
119 static const u64 boot_gdt[] __attribute__((aligned(16))) = {
120 /* CS: code, read/execute, 4 GB, base 0 */
121 [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
122 /* DS: data, read/write, 4 GB, base 0 */
123 [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
124 };
125 struct gdt_ptr gdt;
126
127 gdt.len = sizeof(boot_gdt)-1;
128 gdt.ptr = (u32)&boot_gdt + (ds() << 4);
129
130 asm volatile("lgdtl %0" : : "m" (gdt));
131}
132
133/*
134 * Set up the IDT
135 */
136static void setup_idt(void)
137{
138 static const struct gdt_ptr null_idt = {0, 0};
139 asm volatile("lidtl %0" : : "m" (null_idt));
140}
141
142/*
143 * Actual invocation sequence
144 */
145void go_to_protected_mode(void)
146{
147 /* Hook before leaving real mode, also disables interrupts */
148 realmode_switch_hook();
149
150 /* Move the kernel/setup to their final resting places */
151 move_kernel_around();
152
153 /* Enable the A20 gate */
154 if (enable_a20()) {
155 puts("A20 gate not responding, unable to boot...\n");
156 die();
157 }
158
159 /* Reset coprocessor (IGNNE#) */
160 reset_coprocessor();
161
162 /* Mask all interrupts in the PIC */
163 mask_all_interrupts();
164
165 /* Actual transition to protected mode... */
166 setup_idt();
167 setup_gdt();
168 protected_mode_jump(boot_params.hdr.code32_start,
169 (u32)&boot_params + (ds() << 4));
170}
diff --git a/arch/i386/boot/pmjump.S b/arch/i386/boot/pmjump.S
new file mode 100644
index 000000000000..2e559233725a
--- /dev/null
+++ b/arch/i386/boot/pmjump.S
@@ -0,0 +1,54 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/pmjump.S
13 *
14 * The actual transition into protected mode
15 */
16
17#include <asm/boot.h>
18#include <asm/segment.h>
19
20 .text
21
22 .globl protected_mode_jump
23 .type protected_mode_jump, @function
24
25 .code16
26
27/*
28 * void protected_mode_jump(u32 entrypoint, u32 bootparams);
29 */
30protected_mode_jump:
31 xorl %ebx, %ebx # Flag to indicate this is a boot
32 movl %edx, %esi # Pointer to boot_params table
33 movl %eax, 2f # Patch ljmpl instruction
34 jmp 1f # Short jump to flush instruction q.
35
361:
37 movw $__BOOT_DS, %cx
38
39 movl %cr0, %edx
40 orb $1, %dl # Protected mode (PE) bit
41 movl %edx, %cr0
42
43 movw %cx, %ds
44 movw %cx, %es
45 movw %cx, %fs
46 movw %cx, %gs
47 movw %cx, %ss
48
49 # Jump to the 32-bit entrypoint
50 .byte 0x66, 0xea # ljmpl opcode
512: .long 0 # offset
52 .word __BOOT_CS # segment
53
54 .size protected_mode_jump, .-protected_mode_jump
diff --git a/arch/i386/boot/printf.c b/arch/i386/boot/printf.c
new file mode 100644
index 000000000000..1a09f9309d3c
--- /dev/null
+++ b/arch/i386/boot/printf.c
@@ -0,0 +1,307 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/printf.c
13 *
14 * Oh, it's a waste of space, but oh-so-yummy for debugging. This
15 * version of printf() does not include 64-bit support. "Live with
16 * it."
17 *
18 */
19
20#include "boot.h"
21
22static int skip_atoi(const char **s)
23{
24 int i = 0;
25
26 while (isdigit(**s))
27 i = i * 10 + *((*s)++) - '0';
28 return i;
29}
30
31#define ZEROPAD 1 /* pad with zero */
32#define SIGN 2 /* unsigned/signed long */
33#define PLUS 4 /* show plus */
34#define SPACE 8 /* space if plus */
35#define LEFT 16 /* left justified */
36#define SPECIAL 32 /* 0x */
37#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
38
39#define do_div(n,base) ({ \
40int __res; \
41__res = ((unsigned long) n) % (unsigned) base; \
42n = ((unsigned long) n) / (unsigned) base; \
43__res; })
44
45static char *number(char *str, long num, int base, int size, int precision,
46 int type)
47{
48 char c, sign, tmp[66];
49 const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz";
50 int i;
51
52 if (type & LARGE)
53 digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
54 if (type & LEFT)
55 type &= ~ZEROPAD;
56 if (base < 2 || base > 36)
57 return 0;
58 c = (type & ZEROPAD) ? '0' : ' ';
59 sign = 0;
60 if (type & SIGN) {
61 if (num < 0) {
62 sign = '-';
63 num = -num;
64 size--;
65 } else if (type & PLUS) {
66 sign = '+';
67 size--;
68 } else if (type & SPACE) {
69 sign = ' ';
70 size--;
71 }
72 }
73 if (type & SPECIAL) {
74 if (base == 16)
75 size -= 2;
76 else if (base == 8)
77 size--;
78 }
79 i = 0;
80 if (num == 0)
81 tmp[i++] = '0';
82 else
83 while (num != 0)
84 tmp[i++] = digits[do_div(num, base)];
85 if (i > precision)
86 precision = i;
87 size -= precision;
88 if (!(type & (ZEROPAD + LEFT)))
89 while (size-- > 0)
90 *str++ = ' ';
91 if (sign)
92 *str++ = sign;
93 if (type & SPECIAL) {
94 if (base == 8)
95 *str++ = '0';
96 else if (base == 16) {
97 *str++ = '0';
98 *str++ = digits[33];
99 }
100 }
101 if (!(type & LEFT))
102 while (size-- > 0)
103 *str++ = c;
104 while (i < precision--)
105 *str++ = '0';
106 while (i-- > 0)
107 *str++ = tmp[i];
108 while (size-- > 0)
109 *str++ = ' ';
110 return str;
111}
112
113int vsprintf(char *buf, const char *fmt, va_list args)
114{
115 int len;
116 unsigned long num;
117 int i, base;
118 char *str;
119 const char *s;
120
121 int flags; /* flags to number() */
122
123 int field_width; /* width of output field */
124 int precision; /* min. # of digits for integers; max
125 number of chars for from string */
126 int qualifier; /* 'h', 'l', or 'L' for integer fields */
127
128 for (str = buf; *fmt; ++fmt) {
129 if (*fmt != '%') {
130 *str++ = *fmt;
131 continue;
132 }
133
134 /* process flags */
135 flags = 0;
136 repeat:
137 ++fmt; /* this also skips first '%' */
138 switch (*fmt) {
139 case '-':
140 flags |= LEFT;
141 goto repeat;
142 case '+':
143 flags |= PLUS;
144 goto repeat;
145 case ' ':
146 flags |= SPACE;
147 goto repeat;
148 case '#':
149 flags |= SPECIAL;
150 goto repeat;
151 case '0':
152 flags |= ZEROPAD;
153 goto repeat;
154 }
155
156 /* get field width */
157 field_width = -1;
158 if (isdigit(*fmt))
159 field_width = skip_atoi(&fmt);
160 else if (*fmt == '*') {
161 ++fmt;
162 /* it's the next argument */
163 field_width = va_arg(args, int);
164 if (field_width < 0) {
165 field_width = -field_width;
166 flags |= LEFT;
167 }
168 }
169
170 /* get the precision */
171 precision = -1;
172 if (*fmt == '.') {
173 ++fmt;
174 if (isdigit(*fmt))
175 precision = skip_atoi(&fmt);
176 else if (*fmt == '*') {
177 ++fmt;
178 /* it's the next argument */
179 precision = va_arg(args, int);
180 }
181 if (precision < 0)
182 precision = 0;
183 }
184
185 /* get the conversion qualifier */
186 qualifier = -1;
187 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
188 qualifier = *fmt;
189 ++fmt;
190 }
191
192 /* default base */
193 base = 10;
194
195 switch (*fmt) {
196 case 'c':
197 if (!(flags & LEFT))
198 while (--field_width > 0)
199 *str++ = ' ';
200 *str++ = (unsigned char)va_arg(args, int);
201 while (--field_width > 0)
202 *str++ = ' ';
203 continue;
204
205 case 's':
206 s = va_arg(args, char *);
207 len = strnlen(s, precision);
208
209 if (!(flags & LEFT))
210 while (len < field_width--)
211 *str++ = ' ';
212 for (i = 0; i < len; ++i)
213 *str++ = *s++;
214 while (len < field_width--)
215 *str++ = ' ';
216 continue;
217
218 case 'p':
219 if (field_width == -1) {
220 field_width = 2 * sizeof(void *);
221 flags |= ZEROPAD;
222 }
223 str = number(str,
224 (unsigned long)va_arg(args, void *), 16,
225 field_width, precision, flags);
226 continue;
227
228 case 'n':
229 if (qualifier == 'l') {
230 long *ip = va_arg(args, long *);
231 *ip = (str - buf);
232 } else {
233 int *ip = va_arg(args, int *);
234 *ip = (str - buf);
235 }
236 continue;
237
238 case '%':
239 *str++ = '%';
240 continue;
241
242 /* integer number formats - set up the flags and "break" */
243 case 'o':
244 base = 8;
245 break;
246
247 case 'X':
248 flags |= LARGE;
249 case 'x':
250 base = 16;
251 break;
252
253 case 'd':
254 case 'i':
255 flags |= SIGN;
256 case 'u':
257 break;
258
259 default:
260 *str++ = '%';
261 if (*fmt)
262 *str++ = *fmt;
263 else
264 --fmt;
265 continue;
266 }
267 if (qualifier == 'l')
268 num = va_arg(args, unsigned long);
269 else if (qualifier == 'h') {
270 num = (unsigned short)va_arg(args, int);
271 if (flags & SIGN)
272 num = (short)num;
273 } else if (flags & SIGN)
274 num = va_arg(args, int);
275 else
276 num = va_arg(args, unsigned int);
277 str = number(str, num, base, field_width, precision, flags);
278 }
279 *str = '\0';
280 return str - buf;
281}
282
283int sprintf(char *buf, const char *fmt, ...)
284{
285 va_list args;
286 int i;
287
288 va_start(args, fmt);
289 i = vsprintf(buf, fmt, args);
290 va_end(args);
291 return i;
292}
293
294int printf(const char *fmt, ...)
295{
296 char printf_buf[1024];
297 va_list args;
298 int printed;
299
300 va_start(args, fmt);
301 printed = vsprintf(printf_buf, fmt, args);
302 va_end(args);
303
304 puts(printf_buf);
305
306 return printed;
307}
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
deleted file mode 100644
index 6dbcc95b2120..000000000000
--- a/arch/i386/boot/setup.S
+++ /dev/null
@@ -1,1075 +0,0 @@
1/*
2 * setup.S Copyright (C) 1991, 1992 Linus Torvalds
3 *
4 * setup.s is responsible for getting the system data from the BIOS,
5 * and putting them into the appropriate places in system memory.
6 * both setup.s and system has been loaded by the bootblock.
7 *
8 * This code asks the bios for memory/disk/other parameters, and
9 * puts them in a "safe" place: 0x90000-0x901FF, ie where the
10 * boot-block used to be. It is then up to the protected mode
11 * system to read them from there before the area is overwritten
12 * for buffer-blocks.
13 *
14 * Move PS/2 aux init code to psaux.c
15 * (troyer@saifr00.cfsat.Honeywell.COM) 03Oct92
16 *
17 * some changes and additional features by Christoph Niemann,
18 * March 1993/June 1994 (Christoph.Niemann@linux.org)
19 *
20 * add APM BIOS checking by Stephen Rothwell, May 1994
21 * (sfr@canb.auug.org.au)
22 *
23 * High load stuff, initrd support and position independency
24 * by Hans Lermen & Werner Almesberger, February 1996
25 * <lermen@elserv.ffm.fgan.de>, <almesber@lrc.epfl.ch>
26 *
27 * Video handling moved to video.S by Martin Mares, March 1996
28 * <mj@k332.feld.cvut.cz>
29 *
30 * Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
31 * parsons) to avoid loadlin confusion, July 1997
32 *
33 * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
34 * <stiker@northlink.com>
35 *
36 * Fix to work around buggy BIOSes which don't use carry bit correctly
37 * and/or report extended memory in CX/DX for e801h memory size detection
38 * call. As a result the kernel got wrong figures. The int15/e801h docs
39 * from Ralf Brown interrupt list seem to indicate AX/BX should be used
40 * anyway. So to avoid breaking many machines (presumably there was a reason
41 * to orginally use CX/DX instead of AX/BX), we do a kludge to see
42 * if CX/DX have been changed in the e801 call and if so use AX/BX .
43 * Michael Miller, April 2001 <michaelm@mjmm.org>
44 *
45 * New A20 code ported from SYSLINUX by H. Peter Anvin. AMD Elan bugfixes
46 * by Robert Schwebel, December 2001 <robert@schwebel.de>
47 */
48
49#include <asm/segment.h>
50#include <linux/utsrelease.h>
51#include <linux/compile.h>
52#include <asm/boot.h>
53#include <asm/e820.h>
54#include <asm/page.h>
55#include <asm/setup.h>
56
57/* Signature words to ensure LILO loaded us right */
58#define SIG1 0xAA55
59#define SIG2 0x5A5A
60
61INITSEG = DEF_INITSEG # 0x9000, we move boot here, out of the way
62SYSSEG = DEF_SYSSEG # 0x1000, system loaded at 0x10000 (65536).
63SETUPSEG = DEF_SETUPSEG # 0x9020, this is the current segment
64 # ... and the former contents of CS
65
66DELTA_INITSEG = SETUPSEG - INITSEG # 0x0020
67
68.code16
69.globl begtext, begdata, begbss, endtext, enddata, endbss
70
71.text
72begtext:
73.data
74begdata:
75.bss
76begbss:
77.text
78
79start:
80 jmp trampoline
81
82# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
83
84 .ascii "HdrS" # header signature
85 .word 0x0206 # header version number (>= 0x0105)
86 # or else old loadlin-1.5 will fail)
87realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
88start_sys_seg: .word SYSSEG
89 .word kernel_version # pointing to kernel version string
90 # above section of header is compatible
91 # with loadlin-1.5 (header v1.5). Don't
92 # change it.
93
94type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin,
95 # Bootlin, SYSLX, bootsect...)
96 # See Documentation/i386/boot.txt for
97 # assigned ids
98
99# flags, unused bits must be zero (RFU) bit within loadflags
100loadflags:
101LOADED_HIGH = 1 # If set, the kernel is loaded high
102CAN_USE_HEAP = 0x80 # If set, the loader also has set
103 # heap_end_ptr to tell how much
104 # space behind setup.S can be used for
105 # heap purposes.
106 # Only the loader knows what is free
107#ifndef __BIG_KERNEL__
108 .byte 0
109#else
110 .byte LOADED_HIGH
111#endif
112
113setup_move_size: .word 0x8000 # size to move, when setup is not
114 # loaded at 0x90000. We will move setup
115 # to 0x90000 then just before jumping
116 # into the kernel. However, only the
117 # loader knows how much data behind
118 # us also needs to be loaded.
119
120code32_start: # here loaders can put a different
121 # start address for 32-bit code.
122#ifndef __BIG_KERNEL__
123 .long 0x1000 # 0x1000 = default for zImage
124#else
125 .long 0x100000 # 0x100000 = default for big kernel
126#endif
127
128ramdisk_image: .long 0 # address of loaded ramdisk image
129 # Here the loader puts the 32-bit
130 # address where it loaded the image.
131 # This only will be read by the kernel.
132
133ramdisk_size: .long 0 # its size in bytes
134
135bootsect_kludge:
136 .long 0 # obsolete
137
138heap_end_ptr: .word modelist+1024 # (Header version 0x0201 or later)
139 # space from here (exclusive) down to
140 # end of setup code can be used by setup
141 # for local heap purposes.
142
143pad1: .word 0
144cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
145 # If nonzero, a 32-bit pointer
146 # to the kernel command line.
147 # The command line should be
148 # located between the start of
149 # setup and the end of low
150 # memory (0xa0000), or it may
151 # get overwritten before it
152 # gets read. If this field is
153 # used, there is no longer
154 # anything magical about the
155 # 0x90000 segment; the setup
156 # can be located anywhere in
157 # low memory 0x10000 or higher.
158
159ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
160 # (Header version 0x0203 or later)
161 # The highest safe address for
162 # the contents of an initrd
163
164kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
165 #required for protected mode
166 #kernel
167#ifdef CONFIG_RELOCATABLE
168relocatable_kernel: .byte 1
169#else
170relocatable_kernel: .byte 0
171#endif
172pad2: .byte 0
173pad3: .word 0
174
175cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
176 #added with boot protocol
177 #version 2.06
178
179trampoline: call start_of_setup
180 .align 16
181 # The offset at this point is 0x240
182 .space (0xeff-0x240+1) # E820 & EDD space (ending at 0xeff)
183# End of setup header #####################################################
184
185start_of_setup:
186# Bootlin depends on this being done early
187 movw $0x01500, %ax
188 movb $0x81, %dl
189 int $0x13
190
191#ifdef SAFE_RESET_DISK_CONTROLLER
192# Reset the disk controller.
193 movw $0x0000, %ax
194 movb $0x80, %dl
195 int $0x13
196#endif
197
198# Set %ds = %cs, we know that SETUPSEG = %cs at this point
199 movw %cs, %ax # aka SETUPSEG
200 movw %ax, %ds
201# Check signature at end of setup
202 cmpw $SIG1, setup_sig1
203 jne bad_sig
204
205 cmpw $SIG2, setup_sig2
206 jne bad_sig
207
208 jmp good_sig1
209
210# Routine to print asciiz string at ds:si
211prtstr:
212 lodsb
213 andb %al, %al
214 jz fin
215
216 call prtchr
217 jmp prtstr
218
219fin: ret
220
221# Space printing
222prtsp2: call prtspc # Print double space
223prtspc: movb $0x20, %al # Print single space (note: fall-thru)
224
225# Part of above routine, this one just prints ascii al
226prtchr: pushw %ax
227 pushw %cx
228 movw $7,%bx
229 movw $0x01, %cx
230 movb $0x0e, %ah
231 int $0x10
232 popw %cx
233 popw %ax
234 ret
235
236beep: movb $0x07, %al
237 jmp prtchr
238
239no_sig_mess: .string "No setup signature found ..."
240
241good_sig1:
242 jmp good_sig
243
244# We now have to find the rest of the setup code/data
245bad_sig:
246 movw %cs, %ax # SETUPSEG
247 subw $DELTA_INITSEG, %ax # INITSEG
248 movw %ax, %ds
249 xorb %bh, %bh
250 movb (497), %bl # get setup sect from bootsect
251 subw $4, %bx # LILO loads 4 sectors of setup
252 shlw $8, %bx # convert to words (1sect=2^8 words)
253 movw %bx, %cx
254 shrw $3, %bx # convert to segment
255 addw $SYSSEG, %bx
256 movw %bx, %cs:start_sys_seg
257# Move rest of setup code/data to here
258 movw $2048, %di # four sectors loaded by LILO
259 subw %si, %si
260 pushw %cs
261 popw %es
262 movw $SYSSEG, %ax
263 movw %ax, %ds
264 rep
265 movsw
266 movw %cs, %ax # aka SETUPSEG
267 movw %ax, %ds
268 cmpw $SIG1, setup_sig1
269 jne no_sig
270
271 cmpw $SIG2, setup_sig2
272 jne no_sig
273
274 jmp good_sig
275
276no_sig:
277 lea no_sig_mess, %si
278 call prtstr
279
280no_sig_loop:
281 hlt
282 jmp no_sig_loop
283
284good_sig:
285 movw %cs, %ax # aka SETUPSEG
286 subw $DELTA_INITSEG, %ax # aka INITSEG
287 movw %ax, %ds
288# Check if an old loader tries to load a big-kernel
289 testb $LOADED_HIGH, %cs:loadflags # Do we have a big kernel?
290 jz loader_ok # No, no danger for old loaders.
291
292 cmpb $0, %cs:type_of_loader # Do we have a loader that
293 # can deal with us?
294 jnz loader_ok # Yes, continue.
295
296 pushw %cs # No, we have an old loader,
297 popw %ds # die.
298 lea loader_panic_mess, %si
299 call prtstr
300
301 jmp no_sig_loop
302
303loader_panic_mess: .string "Wrong loader, giving up..."
304
305# check minimum cpuid
306# we do this here because it is the last place we can actually
307# show a user visible error message. Later the video modus
308# might be already messed up.
309loader_ok:
310 call verify_cpu
311 testl %eax,%eax
312 jz cpu_ok
313 movw %cs,%ax # aka SETUPSEG
314 movw %ax,%ds
315 lea cpu_panic_mess,%si
316 call prtstr
3171: jmp 1b
318
319cpu_panic_mess:
320 .asciz "PANIC: CPU too old for this kernel."
321
322#include "../kernel/verify_cpu.S"
323
324cpu_ok:
325# Get memory size (extended mem, kB)
326
327 xorl %eax, %eax
328 movl %eax, (0x1e0)
329#ifndef STANDARD_MEMORY_BIOS_CALL
330 movb %al, (E820NR)
331# Try three different memory detection schemes. First, try
332# e820h, which lets us assemble a memory map, then try e801h,
333# which returns a 32-bit memory size, and finally 88h, which
334# returns 0-64m
335
336# method E820H:
337# the memory map from hell. e820h returns memory classified into
338# a whole bunch of different types, and allows memory holes and
339# everything. We scan through this memory map and build a list
340# of the first 32 memory areas, which we return at [E820MAP].
341# This is documented at http://www.acpi.info/, in the ACPI 2.0 specification.
342
343#define SMAP 0x534d4150
344
345meme820:
346 xorl %ebx, %ebx # continuation counter
347 movw $E820MAP, %di # point into the whitelist
348 # so we can have the bios
349 # directly write into it.
350
351jmpe820:
352 movl $0x0000e820, %eax # e820, upper word zeroed
353 movl $SMAP, %edx # ascii 'SMAP'
354 movl $20, %ecx # size of the e820rec
355 pushw %ds # data record.
356 popw %es
357 int $0x15 # make the call
358 jc bail820 # fall to e801 if it fails
359
360 cmpl $SMAP, %eax # check the return is `SMAP'
361 jne bail820 # fall to e801 if it fails
362
363# cmpl $1, 16(%di) # is this usable memory?
364# jne again820
365
366 # If this is usable memory, we save it by simply advancing %di by
367 # sizeof(e820rec).
368 #
369good820:
370 movb (E820NR), %al # up to 128 entries
371 cmpb $E820MAX, %al
372 jae bail820
373
374 incb (E820NR)
375 movw %di, %ax
376 addw $20, %ax
377 movw %ax, %di
378again820:
379 cmpl $0, %ebx # check to see if
380 jne jmpe820 # %ebx is set to EOF
381bail820:
382
383
384# method E801H:
385# memory size is in 1k chunksizes, to avoid confusing loadlin.
386# we store the 0xe801 memory size in a completely different place,
387# because it will most likely be longer than 16 bits.
388# (use 1e0 because that's what Larry Augustine uses in his
389# alternative new memory detection scheme, and it's sensible
390# to write everything into the same place.)
391
392meme801:
393 stc # fix to work around buggy
394 xorw %cx,%cx # BIOSes which don't clear/set
395 xorw %dx,%dx # carry on pass/error of
396 # e801h memory size call
397 # or merely pass cx,dx though
398 # without changing them.
399 movw $0xe801, %ax
400 int $0x15
401 jc mem88
402
403 cmpw $0x0, %cx # Kludge to handle BIOSes
404 jne e801usecxdx # which report their extended
405 cmpw $0x0, %dx # memory in AX/BX rather than
406 jne e801usecxdx # CX/DX. The spec I have read
407 movw %ax, %cx # seems to indicate AX/BX
408 movw %bx, %dx # are more reasonable anyway...
409
410e801usecxdx:
411 andl $0xffff, %edx # clear sign extend
412 shll $6, %edx # and go from 64k to 1k chunks
413 movl %edx, (0x1e0) # store extended memory size
414 andl $0xffff, %ecx # clear sign extend
415 addl %ecx, (0x1e0) # and add lower memory into
416 # total size.
417
418# Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
419# 64mb, depending on the bios) in ax.
420mem88:
421
422#endif
423 movb $0x88, %ah
424 int $0x15
425 movw %ax, (2)
426
427# Set the keyboard repeat rate to the max
428 movw $0x0305, %ax
429 xorw %bx, %bx
430 int $0x16
431
432# Check for video adapter and its parameters and allow the
433# user to browse video modes.
434 call video # NOTE: we need %ds pointing
435 # to bootsector
436
437# Get hd0 data...
438 xorw %ax, %ax
439 movw %ax, %ds
440 ldsw (4 * 0x41), %si
441 movw %cs, %ax # aka SETUPSEG
442 subw $DELTA_INITSEG, %ax # aka INITSEG
443 pushw %ax
444 movw %ax, %es
445 movw $0x0080, %di
446 movw $0x10, %cx
447 pushw %cx
448 cld
449 rep
450 movsb
451# Get hd1 data...
452 xorw %ax, %ax
453 movw %ax, %ds
454 ldsw (4 * 0x46), %si
455 popw %cx
456 popw %es
457 movw $0x0090, %di
458 rep
459 movsb
460# Check that there IS a hd1 :-)
461 movw $0x01500, %ax
462 movb $0x81, %dl
463 int $0x13
464 jc no_disk1
465
466 cmpb $3, %ah
467 je is_disk1
468
469no_disk1:
470 movw %cs, %ax # aka SETUPSEG
471 subw $DELTA_INITSEG, %ax # aka INITSEG
472 movw %ax, %es
473 movw $0x0090, %di
474 movw $0x10, %cx
475 xorw %ax, %ax
476 cld
477 rep
478 stosb
479is_disk1:
480# check for Micro Channel (MCA) bus
481 movw %cs, %ax # aka SETUPSEG
482 subw $DELTA_INITSEG, %ax # aka INITSEG
483 movw %ax, %ds
484 xorw %ax, %ax
485 movw %ax, (0xa0) # set table length to 0
486 movb $0xc0, %ah
487 stc
488 int $0x15 # moves feature table to es:bx
489 jc no_mca
490
491 pushw %ds
492 movw %es, %ax
493 movw %ax, %ds
494 movw %cs, %ax # aka SETUPSEG
495 subw $DELTA_INITSEG, %ax # aka INITSEG
496 movw %ax, %es
497 movw %bx, %si
498 movw $0xa0, %di
499 movw (%si), %cx
500 addw $2, %cx # table length is a short
501 cmpw $0x10, %cx
502 jc sysdesc_ok
503
504 movw $0x10, %cx # we keep only first 16 bytes
505sysdesc_ok:
506 rep
507 movsb
508 popw %ds
509no_mca:
510#ifdef CONFIG_X86_VOYAGER
511 movb $0xff, 0x40 # flag on config found
512 movb $0xc0, %al
513 mov $0xff, %ah
514 int $0x15 # put voyager config info at es:di
515 jc no_voyager
516 movw $0x40, %si # place voyager info in apm table
517 cld
518 movw $7, %cx
519voyager_rep:
520 movb %es:(%di), %al
521 movb %al,(%si)
522 incw %di
523 incw %si
524 decw %cx
525 jnz voyager_rep
526no_voyager:
527#endif
528# Check for PS/2 pointing device
529 movw %cs, %ax # aka SETUPSEG
530 subw $DELTA_INITSEG, %ax # aka INITSEG
531 movw %ax, %ds
532 movb $0, (0x1ff) # default is no pointing device
533 int $0x11 # int 0x11: equipment list
534 testb $0x04, %al # check if mouse installed
535 jz no_psmouse
536
537 movb $0xAA, (0x1ff) # device present
538no_psmouse:
539
540#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
541 movl $0x0000E980, %eax # IST Support
542 movl $0x47534943, %edx # Request value
543 int $0x15
544
545 movl %eax, (96)
546 movl %ebx, (100)
547 movl %ecx, (104)
548 movl %edx, (108)
549#endif
550
551#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
552# Then check for an APM BIOS...
553 # %ds points to the bootsector
554 movw $0, 0x40 # version = 0 means no APM BIOS
555 movw $0x05300, %ax # APM BIOS installation check
556 xorw %bx, %bx
557 int $0x15
558 jc done_apm_bios # Nope, no APM BIOS
559
560 cmpw $0x0504d, %bx # Check for "PM" signature
561 jne done_apm_bios # No signature, no APM BIOS
562
563 andw $0x02, %cx # Is 32 bit supported?
564 je done_apm_bios # No 32-bit, no (good) APM BIOS
565
566 movw $0x05304, %ax # Disconnect first just in case
567 xorw %bx, %bx
568 int $0x15 # ignore return code
569 movw $0x05303, %ax # 32 bit connect
570 xorl %ebx, %ebx
571 xorw %cx, %cx # paranoia :-)
572 xorw %dx, %dx # ...
573 xorl %esi, %esi # ...
574 xorw %di, %di # ...
575 int $0x15
576 jc no_32_apm_bios # Ack, error.
577
578 movw %ax, (66) # BIOS code segment
579 movl %ebx, (68) # BIOS entry point offset
580 movw %cx, (72) # BIOS 16 bit code segment
581 movw %dx, (74) # BIOS data segment
582 movl %esi, (78) # BIOS code segment lengths
583 movw %di, (82) # BIOS data segment length
584# Redo the installation check as the 32 bit connect
585# modifies the flags returned on some BIOSs
586 movw $0x05300, %ax # APM BIOS installation check
587 xorw %bx, %bx
588 xorw %cx, %cx # paranoia
589 int $0x15
590 jc apm_disconnect # error -> shouldn't happen
591
592 cmpw $0x0504d, %bx # check for "PM" signature
593 jne apm_disconnect # no sig -> shouldn't happen
594
595 movw %ax, (64) # record the APM BIOS version
596 movw %cx, (76) # and flags
597 jmp done_apm_bios
598
599apm_disconnect: # Tidy up
600 movw $0x05304, %ax # Disconnect
601 xorw %bx, %bx
602 int $0x15 # ignore return code
603
604 jmp done_apm_bios
605
606no_32_apm_bios:
607 andw $0xfffd, (76) # remove 32 bit support bit
608done_apm_bios:
609#endif
610
611#include "edd.S"
612
613# Now we want to move to protected mode ...
614 cmpw $0, %cs:realmode_swtch
615 jz rmodeswtch_normal
616
617 lcall *%cs:realmode_swtch
618
619 jmp rmodeswtch_end
620
621rmodeswtch_normal:
622 pushw %cs
623 call default_switch
624
625rmodeswtch_end:
626# Now we move the system to its rightful place ... but we check if we have a
627# big-kernel. In that case we *must* not move it ...
628 testb $LOADED_HIGH, %cs:loadflags
629 jz do_move0 # .. then we have a normal low
630 # loaded zImage
631 # .. or else we have a high
632 # loaded bzImage
633 jmp end_move # ... and we skip moving
634
635do_move0:
636 movw $0x100, %ax # start of destination segment
637 movw %cs, %bp # aka SETUPSEG
638 subw $DELTA_INITSEG, %bp # aka INITSEG
639 movw %cs:start_sys_seg, %bx # start of source segment
640 cld
641do_move:
642 movw %ax, %es # destination segment
643 incb %ah # instead of add ax,#0x100
644 movw %bx, %ds # source segment
645 addw $0x100, %bx
646 subw %di, %di
647 subw %si, %si
648 movw $0x800, %cx
649 rep
650 movsw
651 cmpw %bp, %bx # assume start_sys_seg > 0x200,
652 # so we will perhaps read one
653 # page more than needed, but
654 # never overwrite INITSEG
655 # because destination is a
656 # minimum one page below source
657 jb do_move
658
659end_move:
660# then we load the segment descriptors
661 movw %cs, %ax # aka SETUPSEG
662 movw %ax, %ds
663
664# Check whether we need to be downward compatible with version <=201
665 cmpl $0, cmd_line_ptr
666 jne end_move_self # loader uses version >=202 features
667 cmpb $0x20, type_of_loader
668 je end_move_self # bootsect loader, we know of it
669
670# Boot loader doesnt support boot protocol version 2.02.
671# If we have our code not at 0x90000, we need to move it there now.
672# We also then need to move the params behind it (commandline)
673# Because we would overwrite the code on the current IP, we move
674# it in two steps, jumping high after the first one.
675 movw %cs, %ax
676 cmpw $SETUPSEG, %ax
677 je end_move_self
678
679 cli # make sure we really have
680 # interrupts disabled !
681 # because after this the stack
682 # should not be used
683 subw $DELTA_INITSEG, %ax # aka INITSEG
684 movw %ss, %dx
685 cmpw %ax, %dx
686 jb move_self_1
687
688 addw $INITSEG, %dx
689 subw %ax, %dx # this will go into %ss after
690 # the move
691move_self_1:
692 movw %ax, %ds
693 movw $INITSEG, %ax # real INITSEG
694 movw %ax, %es
695 movw %cs:setup_move_size, %cx
696 std # we have to move up, so we use
697 # direction down because the
698 # areas may overlap
699 movw %cx, %di
700 decw %di
701 movw %di, %si
702 subw $move_self_here+0x200, %cx
703 rep
704 movsb
705 ljmp $SETUPSEG, $move_self_here
706
707move_self_here:
708 movw $move_self_here+0x200, %cx
709 rep
710 movsb
711 movw $SETUPSEG, %ax
712 movw %ax, %ds
713 movw %dx, %ss
714end_move_self: # now we are at the right place
715
716#
717# Enable A20. This is at the very best an annoying procedure.
718# A20 code ported from SYSLINUX 1.52-1.63 by H. Peter Anvin.
719# AMD Elan bug fix by Robert Schwebel.
720#
721
722#if defined(CONFIG_X86_ELAN)
723 movb $0x02, %al # alternate A20 gate
724 outb %al, $0x92 # this works on SC410/SC520
725a20_elan_wait:
726 call a20_test
727 jz a20_elan_wait
728 jmp a20_done
729#endif
730
731
732A20_TEST_LOOPS = 32 # Iterations per wait
733A20_ENABLE_LOOPS = 255 # Total loops to try
734
735
736#ifndef CONFIG_X86_VOYAGER
737a20_try_loop:
738
739 # First, see if we are on a system with no A20 gate.
740a20_none:
741 call a20_test
742 jnz a20_done
743
744 # Next, try the BIOS (INT 0x15, AX=0x2401)
745a20_bios:
746 movw $0x2401, %ax
747 pushfl # Be paranoid about flags
748 int $0x15
749 popfl
750
751 call a20_test
752 jnz a20_done
753
754 # Try enabling A20 through the keyboard controller
755#endif /* CONFIG_X86_VOYAGER */
756a20_kbc:
757 call empty_8042
758
759#ifndef CONFIG_X86_VOYAGER
760 call a20_test # Just in case the BIOS worked
761 jnz a20_done # but had a delayed reaction.
762#endif
763
764 movb $0xD1, %al # command write
765 outb %al, $0x64
766 call empty_8042
767
768 movb $0xDF, %al # A20 on
769 outb %al, $0x60
770 call empty_8042
771
772#ifndef CONFIG_X86_VOYAGER
773 # Wait until a20 really *is* enabled; it can take a fair amount of
774 # time on certain systems; Toshiba Tecras are known to have this
775 # problem.
776a20_kbc_wait:
777 xorw %cx, %cx
778a20_kbc_wait_loop:
779 call a20_test
780 jnz a20_done
781 loop a20_kbc_wait_loop
782
783 # Final attempt: use "configuration port A"
784a20_fast:
785 inb $0x92, %al # Configuration Port A
786 orb $0x02, %al # "fast A20" version
787 andb $0xFE, %al # don't accidentally reset
788 outb %al, $0x92
789
790 # Wait for configuration port A to take effect
791a20_fast_wait:
792 xorw %cx, %cx
793a20_fast_wait_loop:
794 call a20_test
795 jnz a20_done
796 loop a20_fast_wait_loop
797
798 # A20 is still not responding. Try frobbing it again.
799 #
800 decb (a20_tries)
801 jnz a20_try_loop
802
803 movw $a20_err_msg, %si
804 call prtstr
805
806a20_die:
807 hlt
808 jmp a20_die
809
810a20_tries:
811 .byte A20_ENABLE_LOOPS
812
813a20_err_msg:
814 .ascii "linux: fatal error: A20 gate not responding!"
815 .byte 13, 10, 0
816
817 # If we get here, all is good
818a20_done:
819
820#endif /* CONFIG_X86_VOYAGER */
821# set up gdt and idt and 32bit start address
822 lidt idt_48 # load idt with 0,0
823 xorl %eax, %eax # Compute gdt_base
824 movw %ds, %ax # (Convert %ds:gdt to a linear ptr)
825 shll $4, %eax
826 addl %eax, code32
827 addl $gdt, %eax
828 movl %eax, (gdt_48+2)
829 lgdt gdt_48 # load gdt with whatever is
830 # appropriate
831
832# make sure any possible coprocessor is properly reset..
833 xorw %ax, %ax
834 outb %al, $0xf0
835 call delay
836
837 outb %al, $0xf1
838 call delay
839
840# well, that went ok, I hope. Now we mask all interrupts - the rest
841# is done in init_IRQ().
842 movb $0xFF, %al # mask all interrupts for now
843 outb %al, $0xA1
844 call delay
845
846 movb $0xFB, %al # mask all irq's but irq2 which
847 outb %al, $0x21 # is cascaded
848
849# Well, that certainly wasn't fun :-(. Hopefully it works, and we don't
850# need no steenking BIOS anyway (except for the initial loading :-).
851# The BIOS-routine wants lots of unnecessary data, and it's less
852# "interesting" anyway. This is how REAL programmers do it.
853#
854# Well, now's the time to actually move into protected mode. To make
855# things as simple as possible, we do no register set-up or anything,
856# we let the gnu-compiled 32-bit programs do that. We just jump to
857# absolute address 0x1000 (or the loader supplied one),
858# in 32-bit protected mode.
859#
860# Note that the short jump isn't strictly needed, although there are
861# reasons why it might be a good idea. It won't hurt in any case.
862 movw $1, %ax # protected mode (PE) bit
863 lmsw %ax # This is it!
864 jmp flush_instr
865
866flush_instr:
867 xorw %bx, %bx # Flag to indicate a boot
868 xorl %esi, %esi # Pointer to real-mode code
869 movw %cs, %si
870 subw $DELTA_INITSEG, %si
871 shll $4, %esi # Convert to 32-bit pointer
872
873# jump to startup_32 in arch/i386/boot/compressed/head.S
874#
875# NOTE: For high loaded big kernels we need a
876# jmpi 0x100000,__BOOT_CS
877#
878# but we yet haven't reloaded the CS register, so the default size
879# of the target offset still is 16 bit.
880# However, using an operand prefix (0x66), the CPU will properly
881# take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
882# Manual, Mixing 16-bit and 32-bit code, page 16-6)
883
884 .byte 0x66, 0xea # prefix + jmpi-opcode
885code32: .long startup_32 # will be set to %cs+startup_32
886 .word __BOOT_CS
887.code32
888startup_32:
889 movl $(__BOOT_DS), %eax
890 movl %eax, %ds
891 movl %eax, %es
892 movl %eax, %fs
893 movl %eax, %gs
894 movl %eax, %ss
895
896 xorl %eax, %eax
8971: incl %eax # check that A20 really IS enabled
898 movl %eax, 0x00000000 # loop forever if it isn't
899 cmpl %eax, 0x00100000
900 je 1b
901
902 # Jump to the 32bit entry point
903 jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi)
904.code16
905
906# Here's a bunch of information about your current kernel..
907kernel_version: .ascii UTS_RELEASE
908 .ascii " ("
909 .ascii LINUX_COMPILE_BY
910 .ascii "@"
911 .ascii LINUX_COMPILE_HOST
912 .ascii ") "
913 .ascii UTS_VERSION
914 .byte 0
915
916# This is the default real mode switch routine.
917# to be called just before protected mode transition
918default_switch:
919 cli # no interrupts allowed !
920 movb $0x80, %al # disable NMI for bootup
921 # sequence
922 outb %al, $0x70
923 lret
924
925
926#ifndef CONFIG_X86_VOYAGER
927# This routine tests whether or not A20 is enabled. If so, it
928# exits with zf = 0.
929#
930# The memory address used, 0x200, is the int $0x80 vector, which
931# should be safe.
932
933A20_TEST_ADDR = 4*0x80
934
935a20_test:
936 pushw %cx
937 pushw %ax
938 xorw %cx, %cx
939 movw %cx, %fs # Low memory
940 decw %cx
941 movw %cx, %gs # High memory area
942 movw $A20_TEST_LOOPS, %cx
943 movw %fs:(A20_TEST_ADDR), %ax
944 pushw %ax
945a20_test_wait:
946 incw %ax
947 movw %ax, %fs:(A20_TEST_ADDR)
948 call delay # Serialize and make delay constant
949 cmpw %gs:(A20_TEST_ADDR+0x10), %ax
950 loope a20_test_wait
951
952 popw %fs:(A20_TEST_ADDR)
953 popw %ax
954 popw %cx
955 ret
956
957#endif /* CONFIG_X86_VOYAGER */
958
959# This routine checks that the keyboard command queue is empty
960# (after emptying the output buffers)
961#
962# Some machines have delusions that the keyboard buffer is always full
963# with no keyboard attached...
964#
965# If there is no keyboard controller, we will usually get 0xff
966# to all the reads. With each IO taking a microsecond and
967# a timeout of 100,000 iterations, this can take about half a
968# second ("delay" == outb to port 0x80). That should be ok,
969# and should also be plenty of time for a real keyboard controller
970# to empty.
971#
972
973empty_8042:
974 pushl %ecx
975 movl $100000, %ecx
976
977empty_8042_loop:
978 decl %ecx
979 jz empty_8042_end_loop
980
981 call delay
982
983 inb $0x64, %al # 8042 status port
984 testb $1, %al # output buffer?
985 jz no_output
986
987 call delay
988 inb $0x60, %al # read it
989 jmp empty_8042_loop
990
991no_output:
992 testb $2, %al # is input buffer full?
993 jnz empty_8042_loop # yes - loop
994empty_8042_end_loop:
995 popl %ecx
996 ret
997
998# Read the cmos clock. Return the seconds in al
999gettime:
1000 pushw %cx
1001 movb $0x02, %ah
1002 int $0x1a
1003 movb %dh, %al # %dh contains the seconds
1004 andb $0x0f, %al
1005 movb %dh, %ah
1006 movb $0x04, %cl
1007 shrb %cl, %ah
1008 aad
1009 popw %cx
1010 ret
1011
1012# Delay is needed after doing I/O
1013delay:
1014 outb %al,$0x80
1015 ret
1016
1017# Descriptor tables
1018#
1019# NOTE: The intel manual says gdt should be sixteen bytes aligned for
1020# efficiency reasons. However, there are machines which are known not
1021# to boot with misaligned GDTs, so alter this at your peril! If you alter
1022# GDT_ENTRY_BOOT_CS (in asm/segment.h) remember to leave at least two
1023# empty GDT entries (one for NULL and one reserved).
1024#
1025# NOTE: On some CPUs, the GDT must be 8 byte aligned. This is
1026# true for the Voyager Quad CPU card which will not boot without
1027# This directive. 16 byte aligment is recommended by intel.
1028#
1029 .align 16
1030gdt:
1031 .fill GDT_ENTRY_BOOT_CS,8,0
1032
1033 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
1034 .word 0 # base address = 0
1035 .word 0x9A00 # code read/exec
1036 .word 0x00CF # granularity = 4096, 386
1037 # (+5th nibble of limit)
1038
1039 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
1040 .word 0 # base address = 0
1041 .word 0x9200 # data read/write
1042 .word 0x00CF # granularity = 4096, 386
1043 # (+5th nibble of limit)
1044gdt_end:
1045 .align 4
1046
1047 .word 0 # alignment byte
1048idt_48:
1049 .word 0 # idt limit = 0
1050 .word 0, 0 # idt base = 0L
1051
1052 .word 0 # alignment byte
1053gdt_48:
1054 .word gdt_end - gdt - 1 # gdt limit
1055 .word 0, 0 # gdt base (filled in later)
1056
1057# Include video setup & detection code
1058
1059#include "video.S"
1060
1061# Setup signature -- must be last
1062setup_sig1: .word SIG1
1063setup_sig2: .word SIG2
1064
1065# After this point, there is some free space which is used by the video mode
1066# handling code to store the temporary mode table (not used by the kernel).
1067
1068modelist:
1069
1070.text
1071endtext:
1072.data
1073enddata:
1074.bss
1075endbss:
diff --git a/arch/i386/boot/setup.ld b/arch/i386/boot/setup.ld
new file mode 100644
index 000000000000..df9234b3a5e0
--- /dev/null
+++ b/arch/i386/boot/setup.ld
@@ -0,0 +1,54 @@
1/*
2 * setup.ld
3 *
4 * Linker script for the i386 setup code
5 */
6OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
7OUTPUT_ARCH(i386)
8ENTRY(_start)
9
10SECTIONS
11{
12 . = 0;
13 .bstext : { *(.bstext) }
14 .bsdata : { *(.bsdata) }
15
16 . = 497;
17 .header : { *(.header) }
18 .inittext : { *(.inittext) }
19 .initdata : { *(.initdata) }
20 .text : { *(.text*) }
21
22 . = ALIGN(16);
23 .rodata : { *(.rodata*) }
24
25 .videocards : {
26 video_cards = .;
27 *(.videocards)
28 video_cards_end = .;
29 }
30
31 . = ALIGN(16);
32 .data : { *(.data*) }
33
34 .signature : {
35 setup_sig = .;
36 LONG(0x5a5aaa55)
37 }
38
39
40 . = ALIGN(16);
41 .bss :
42 {
43 __bss_start = .;
44 *(.bss)
45 __bss_end = .;
46 }
47 . = ALIGN(16);
48 _end = .;
49
50 /DISCARD/ : { *(.note*) }
51
52 . = ASSERT(_end <= 0x8000, "Setup too big!");
53 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
54}
diff --git a/arch/i386/boot/string.c b/arch/i386/boot/string.c
new file mode 100644
index 000000000000..481a22097781
--- /dev/null
+++ b/arch/i386/boot/string.c
@@ -0,0 +1,52 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/string.c
13 *
14 * Very basic string functions
15 */
16
17#include "boot.h"
18
19int strcmp(const char *str1, const char *str2)
20{
21 const unsigned char *s1 = (const unsigned char *)str1;
22 const unsigned char *s2 = (const unsigned char *)str2;
23 int delta = 0;
24
25 while (*s1 || *s2) {
26 delta = *s2 - *s1;
27 if (delta)
28 return delta;
29 s1++;
30 s2++;
31 }
32 return 0;
33}
34
35size_t strnlen(const char *s, size_t maxlen)
36{
37 const char *es = s;
38 while (*es && maxlen) {
39 es++;
40 maxlen--;
41 }
42
43 return (es - s);
44}
45
46unsigned int atou(const char *s)
47{
48 unsigned int i = 0;
49 while (isdigit(*s))
50 i = i * 10 + (*s++ - '0');
51 return i;
52}
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
index 05798419a6a9..b4248740ff0d 100644
--- a/arch/i386/boot/tools/build.c
+++ b/arch/i386/boot/tools/build.c
@@ -1,13 +1,12 @@
1/* 1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1997 Martin Mares 3 * Copyright (C) 1997 Martin Mares
4 * Copyright (C) 2007 H. Peter Anvin
4 */ 5 */
5 6
6/* 7/*
7 * This file builds a disk-image from three different files: 8 * This file builds a disk-image from two different files:
8 * 9 *
9 * - bootsect: compatibility mbr which prints an error message if
10 * someone tries to boot the kernel directly.
11 * - setup: 8086 machine code, sets up system parm 10 * - setup: 8086 machine code, sets up system parm
12 * - system: 80386 code for actual system 11 * - system: 80386 code for actual system
13 * 12 *
@@ -21,6 +20,7 @@
21 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 20 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
22 * Cross compiling fixes by Gertjan van Wingerde, July 1996 21 * Cross compiling fixes by Gertjan van Wingerde, July 1996
23 * Rewritten by Martin Mares, April 1997 22 * Rewritten by Martin Mares, April 1997
23 * Substantially overhauled by H. Peter Anvin, April 2007
24 */ 24 */
25 25
26#include <stdio.h> 26#include <stdio.h>
@@ -32,23 +32,25 @@
32#include <sys/sysmacros.h> 32#include <sys/sysmacros.h>
33#include <unistd.h> 33#include <unistd.h>
34#include <fcntl.h> 34#include <fcntl.h>
35#include <sys/mman.h>
35#include <asm/boot.h> 36#include <asm/boot.h>
36 37
37typedef unsigned char byte; 38typedef unsigned char u8;
38typedef unsigned short word; 39typedef unsigned short u16;
39typedef unsigned long u32; 40typedef unsigned long u32;
40 41
41#define DEFAULT_MAJOR_ROOT 0 42#define DEFAULT_MAJOR_ROOT 0
42#define DEFAULT_MINOR_ROOT 0 43#define DEFAULT_MINOR_ROOT 0
43 44
44/* Minimal number of setup sectors (see also bootsect.S) */ 45/* Minimal number of setup sectors */
45#define SETUP_SECTS 4 46#define SETUP_SECT_MIN 5
47#define SETUP_SECT_MAX 64
46 48
47byte buf[1024]; 49/* This must be large enough to hold the entire setup */
48int fd; 50u8 buf[SETUP_SECT_MAX*512];
49int is_big_kernel; 51int is_big_kernel;
50 52
51void die(const char * str, ...) 53static void die(const char * str, ...)
52{ 54{
53 va_list args; 55 va_list args;
54 va_start(args, str); 56 va_start(args, str);
@@ -57,15 +59,9 @@ void die(const char * str, ...)
57 exit(1); 59 exit(1);
58} 60}
59 61
60void file_open(const char *name) 62static void usage(void)
61{ 63{
62 if ((fd = open(name, O_RDONLY, 0)) < 0) 64 die("Usage: build [-b] setup system [rootdev] [> image]");
63 die("Unable to open `%s': %m", name);
64}
65
66void usage(void)
67{
68 die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
69} 65}
70 66
71int main(int argc, char ** argv) 67int main(int argc, char ** argv)
@@ -73,27 +69,30 @@ int main(int argc, char ** argv)
73 unsigned int i, sz, setup_sectors; 69 unsigned int i, sz, setup_sectors;
74 int c; 70 int c;
75 u32 sys_size; 71 u32 sys_size;
76 byte major_root, minor_root; 72 u8 major_root, minor_root;
77 struct stat sb; 73 struct stat sb;
74 FILE *file;
75 int fd;
76 void *kernel;
78 77
79 if (argc > 2 && !strcmp(argv[1], "-b")) 78 if (argc > 2 && !strcmp(argv[1], "-b"))
80 { 79 {
81 is_big_kernel = 1; 80 is_big_kernel = 1;
82 argc--, argv++; 81 argc--, argv++;
83 } 82 }
84 if ((argc < 4) || (argc > 5)) 83 if ((argc < 3) || (argc > 4))
85 usage(); 84 usage();
86 if (argc > 4) { 85 if (argc > 3) {
87 if (!strcmp(argv[4], "CURRENT")) { 86 if (!strcmp(argv[3], "CURRENT")) {
88 if (stat("/", &sb)) { 87 if (stat("/", &sb)) {
89 perror("/"); 88 perror("/");
90 die("Couldn't stat /"); 89 die("Couldn't stat /");
91 } 90 }
92 major_root = major(sb.st_dev); 91 major_root = major(sb.st_dev);
93 minor_root = minor(sb.st_dev); 92 minor_root = minor(sb.st_dev);
94 } else if (strcmp(argv[4], "FLOPPY")) { 93 } else if (strcmp(argv[3], "FLOPPY")) {
95 if (stat(argv[4], &sb)) { 94 if (stat(argv[3], &sb)) {
96 perror(argv[4]); 95 perror(argv[3]);
97 die("Couldn't stat root device."); 96 die("Couldn't stat root device.");
98 } 97 }
99 major_root = major(sb.st_rdev); 98 major_root = major(sb.st_rdev);
@@ -108,79 +107,62 @@ int main(int argc, char ** argv)
108 } 107 }
109 fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root); 108 fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
110 109
111 file_open(argv[1]); 110 /* Copy the setup code */
112 i = read(fd, buf, sizeof(buf)); 111 file = fopen(argv[1], "r");
113 fprintf(stderr,"Boot sector %d bytes.\n",i); 112 if (!file)
114 if (i != 512) 113 die("Unable to open `%s': %m", argv[1]);
115 die("Boot block must be exactly 512 bytes"); 114 c = fread(buf, 1, sizeof(buf), file);
115 if (ferror(file))
116 die("read-error on `setup'");
117 if (c < 1024)
118 die("The setup must be at least 1024 bytes");
116 if (buf[510] != 0x55 || buf[511] != 0xaa) 119 if (buf[510] != 0x55 || buf[511] != 0xaa)
117 die("Boot block hasn't got boot flag (0xAA55)"); 120 die("Boot block hasn't got boot flag (0xAA55)");
121 fclose(file);
122
123 /* Pad unused space with zeros */
124 setup_sectors = (c + 511) / 512;
125 if (setup_sectors < SETUP_SECT_MIN)
126 setup_sectors = SETUP_SECT_MIN;
127 i = setup_sectors*512;
128 memset(buf+c, 0, i-c);
129
130 /* Set the default root device */
118 buf[508] = minor_root; 131 buf[508] = minor_root;
119 buf[509] = major_root; 132 buf[509] = major_root;
120 if (write(1, buf, 512) != 512)
121 die("Write call failed");
122 close (fd);
123
124 file_open(argv[2]); /* Copy the setup code */
125 for (i=0 ; (c=read(fd, buf, sizeof(buf)))>0 ; i+=c )
126 if (write(1, buf, c) != c)
127 die("Write call failed");
128 if (c != 0)
129 die("read-error on `setup'");
130 close (fd);
131
132 setup_sectors = (i + 511) / 512; /* Pad unused space with zeros */
133 /* for compatibility with ancient versions of LILO. */
134 if (setup_sectors < SETUP_SECTS)
135 setup_sectors = SETUP_SECTS;
136 fprintf(stderr, "Setup is %d bytes.\n", i);
137 memset(buf, 0, sizeof(buf));
138 while (i < setup_sectors * 512) {
139 c = setup_sectors * 512 - i;
140 if (c > sizeof(buf))
141 c = sizeof(buf);
142 if (write(1, buf, c) != c)
143 die("Write call failed");
144 i += c;
145 }
146 133
147 file_open(argv[3]); 134 fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
148 if (fstat (fd, &sb)) 135
149 die("Unable to stat `%s': %m", argv[3]); 136 /* Open and stat the kernel file */
137 fd = open(argv[2], O_RDONLY);
138 if (fd < 0)
139 die("Unable to open `%s': %m", argv[2]);
140 if (fstat(fd, &sb))
141 die("Unable to stat `%s': %m", argv[2]);
150 sz = sb.st_size; 142 sz = sb.st_size;
151 fprintf (stderr, "System is %d kB\n", sz/1024); 143 fprintf (stderr, "System is %d kB\n", (sz+1023)/1024);
144 kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
145 if (kernel == MAP_FAILED)
146 die("Unable to mmap '%s': %m", argv[2]);
152 sys_size = (sz + 15) / 16; 147 sys_size = (sz + 15) / 16;
153 if (!is_big_kernel && sys_size > DEF_SYSSIZE) 148 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
154 die("System is too big. Try using bzImage or modules."); 149 die("System is too big. Try using bzImage or modules.");
155 while (sz > 0) { 150
156 int l, n; 151 /* Patch the setup code with the appropriate size parameters */
157 152 buf[0x1f1] = setup_sectors-1;
158 l = (sz > sizeof(buf)) ? sizeof(buf) : sz; 153 buf[0x1f4] = sys_size;
159 if ((n=read(fd, buf, l)) != l) { 154 buf[0x1f5] = sys_size >> 8;
160 if (n < 0) 155 buf[0x1f6] = sys_size >> 16;
161 die("Error reading %s: %m", argv[3]); 156 buf[0x1f7] = sys_size >> 24;
162 else 157
163 die("%s: Unexpected EOF", argv[3]); 158 if (fwrite(buf, 1, i, stdout) != i)
164 } 159 die("Writing setup failed");
165 if (write(1, buf, l) != l) 160
166 die("Write failed"); 161 /* Copy the kernel code */
167 sz -= l; 162 if (fwrite(kernel, 1, sz, stdout) != sz)
168 } 163 die("Writing kernel failed");
169 close(fd); 164 close(fd);
170 165
171 if (lseek(1, 497, SEEK_SET) != 497) /* Write sizes to the bootsector */ 166 /* Everything is OK */
172 die("Output: seek failed"); 167 return 0;
173 buf[0] = setup_sectors;
174 if (write(1, buf, 1) != 1)
175 die("Write of setup sector count failed");
176 if (lseek(1, 500, SEEK_SET) != 500)
177 die("Output: seek failed");
178 buf[0] = (sys_size & 0xff);
179 buf[1] = ((sys_size >> 8) & 0xff);
180 buf[2] = ((sys_size >> 16) & 0xff);
181 buf[3] = ((sys_size >> 24) & 0xff);
182 if (write(1, buf, 4) != 4)
183 die("Write of image length failed");
184
185 return 0; /* Everything is OK */
186} 168}
diff --git a/arch/i386/boot/tty.c b/arch/i386/boot/tty.c
new file mode 100644
index 000000000000..9c668aad3515
--- /dev/null
+++ b/arch/i386/boot/tty.c
@@ -0,0 +1,112 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/tty.c
13 *
14 * Very simple screen I/O
15 * XXX: Probably should add very simple serial I/O?
16 */
17
18#include "boot.h"
19
20/*
21 * These functions are in .inittext so they can be used to signal
22 * error during initialization.
23 */
24
25void __attribute__((section(".inittext"))) putchar(int ch)
26{
27 unsigned char c = ch;
28
29 if (c == '\n')
30 putchar('\r'); /* \n -> \r\n */
31
32 /* int $0x10 is known to have bugs involving touching registers
33 it shouldn't. Be extra conservative... */
34 asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal"
35 : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch));
36}
37
38void __attribute__((section(".inittext"))) puts(const char *str)
39{
40 int n = 0;
41 while (*str) {
42 putchar(*str++);
43 n++;
44 }
45}
46
47/*
48 * Read the CMOS clock through the BIOS, and return the
49 * seconds in BCD.
50 */
51
52static u8 gettime(void)
53{
54 u16 ax = 0x0200;
55 u16 cx, dx;
56
57 asm("int $0x1a"
58 : "+a" (ax), "=c" (cx), "=d" (dx)
59 : : "ebx", "esi", "edi");
60
61 return dx >> 8;
62}
63
64/*
65 * Read from the keyboard
66 */
67int getchar(void)
68{
69 u16 ax = 0;
70 asm("int $0x16" : "+a" (ax));
71
72 return ax & 0xff;
73}
74
75static int kbd_pending(void)
76{
77 u8 pending;
78 asm("int $0x16; setnz %0"
79 : "=rm" (pending)
80 : "a" (0x0100));
81 return pending;
82}
83
84void kbd_flush(void)
85{
86 for (;;) {
87 if (!kbd_pending())
88 break;
89 getchar();
90 }
91}
92
93int getchar_timeout(void)
94{
95 int cnt = 30;
96 int t0, t1;
97
98 t0 = gettime();
99
100 while (cnt) {
101 if (kbd_pending())
102 return getchar();
103
104 t1 = gettime();
105 if (t0 != t1) {
106 cnt--;
107 t0 = t1;
108 }
109 }
110
111 return 0; /* Timeout! */
112}
diff --git a/arch/i386/boot/version.c b/arch/i386/boot/version.c
new file mode 100644
index 000000000000..c61462f7d9a7
--- /dev/null
+++ b/arch/i386/boot/version.c
@@ -0,0 +1,23 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/version.c
13 *
14 * Kernel version string
15 */
16
17#include "boot.h"
18#include <linux/utsrelease.h>
19#include <linux/compile.h>
20
21const char kernel_version[] =
22 UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
23 UTS_VERSION;
diff --git a/arch/i386/boot/vesa.h b/arch/i386/boot/vesa.h
new file mode 100644
index 000000000000..ff5b73cd406f
--- /dev/null
+++ b/arch/i386/boot/vesa.h
@@ -0,0 +1,79 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13#ifndef BOOT_VESA_H
14#define BOOT_VESA_H
15
16typedef struct {
17 u16 off, seg;
18} far_ptr;
19
20/* VESA General Information table */
21struct vesa_general_info {
22 u32 signature; /* 0 Magic number = "VESA" */
23 u16 version; /* 4 */
24 far_ptr vendor_string; /* 6 */
25 u32 capabilities; /* 10 */
26 far_ptr video_mode_ptr; /* 14 */
27 u16 total_memory; /* 18 */
28
29 u16 oem_software_rev; /* 20 */
30 far_ptr oem_vendor_name_ptr; /* 22 */
31 far_ptr oem_product_name_ptr; /* 26 */
32 far_ptr oem_product_rev_ptr; /* 30 */
33
34 u8 reserved[222]; /* 34 */
35 u8 oem_data[256]; /* 256 */
36} __attribute__ ((packed));
37
38#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
39#define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24))
40
41struct vesa_mode_info {
42 u16 mode_attr; /* 0 */
43 u8 win_attr[2]; /* 2 */
44 u16 win_grain; /* 4 */
45 u16 win_size; /* 6 */
46 u16 win_seg[2]; /* 8 */
47 far_ptr win_scheme; /* 12 */
48 u16 logical_scan; /* 16 */
49
50 u16 h_res; /* 18 */
51 u16 v_res; /* 20 */
52 u8 char_width; /* 22 */
53 u8 char_height; /* 23 */
54 u8 memory_planes; /* 24 */
55 u8 bpp; /* 25 */
56 u8 banks; /* 26 */
57 u8 memory_layout; /* 27 */
58 u8 bank_size; /* 28 */
59 u8 image_planes; /* 29 */
60 u8 page_function; /* 30 */
61
62 u8 rmask; /* 31 */
63 u8 rpos; /* 32 */
64 u8 gmask; /* 33 */
65 u8 gpos; /* 34 */
66 u8 bmask; /* 35 */
67 u8 bpos; /* 36 */
68 u8 resv_mask; /* 37 */
69 u8 resv_pos; /* 38 */
70 u8 dcm_info; /* 39 */
71
72 u32 lfb_ptr; /* 40 Linear frame buffer address */
73 u32 offscreen_ptr; /* 44 Offscreen memory address */
74 u16 offscreen_size; /* 48 */
75
76 u8 reserved[206]; /* 50 */
77} __attribute__ ((packed));
78
79#endif /* LIB_SYS_VESA_H */
diff --git a/arch/i386/boot/video-bios.c b/arch/i386/boot/video-bios.c
new file mode 100644
index 000000000000..afea46c500cc
--- /dev/null
+++ b/arch/i386/boot/video-bios.c
@@ -0,0 +1,125 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/video-bios.c
13 *
14 * Standard video BIOS modes
15 *
16 * We have two options for this; silent and scanned.
17 */
18
19#include "boot.h"
20#include "video.h"
21
22__videocard video_bios;
23
24/* Set a conventional BIOS mode */
25static int set_bios_mode(u8 mode);
26
27static int bios_set_mode(struct mode_info *mi)
28{
29 return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS);
30}
31
32static int set_bios_mode(u8 mode)
33{
34 u16 ax;
35 u8 new_mode;
36
37 ax = mode; /* AH=0x00 Set Video Mode */
38 asm volatile(INT10
39 : "+a" (ax)
40 : : "ebx", "ecx", "edx", "esi", "edi");
41
42 ax = 0x0f00; /* Get Current Video Mode */
43 asm volatile(INT10
44 : "+a" (ax)
45 : : "ebx", "ecx", "edx", "esi", "edi");
46
47 do_restore = 1; /* Assume video contents was lost */
48 new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */
49
50 if (new_mode == mode)
51 return 0; /* Mode change OK */
52
53 if (new_mode != boot_params.screen_info.orig_video_mode) {
54 /* Mode setting failed, but we didn't end up where we
55 started. That's bad. Try to revert to the original
56 video mode. */
57 ax = boot_params.screen_info.orig_video_mode;
58 asm volatile(INT10
59 : "+a" (ax)
60 : : "ebx", "ecx", "edx", "esi", "edi");
61 }
62 return -1;
63}
64
65static int bios_probe(void)
66{
67 u8 mode;
68 u8 saved_mode = boot_params.screen_info.orig_video_mode;
69 u16 crtc;
70 struct mode_info *mi;
71 int nmodes = 0;
72
73 if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA)
74 return 0;
75
76 set_fs(0);
77 crtc = vga_crtc();
78
79 video_bios.modes = GET_HEAP(struct mode_info, 0);
80
81 for (mode = 0x14; mode <= 0x7f; mode++) {
82 if (heap_free() < sizeof(struct mode_info))
83 break;
84
85 if (mode_defined(VIDEO_FIRST_BIOS+mode))
86 continue;
87
88 if (set_bios_mode(mode))
89 continue;
90
91 /* Try to verify that it's a text mode. */
92
93 /* Attribute Controller: make graphics controller disabled */
94 if (in_idx(0x3c0, 0x10) & 0x01)
95 continue;
96
97 /* Graphics Controller: verify Alpha addressing enabled */
98 if (in_idx(0x3ce, 0x06) & 0x01)
99 continue;
100
101 /* CRTC cursor location low should be zero(?) */
102 if (in_idx(crtc, 0x0f))
103 continue;
104
105 mi = GET_HEAP(struct mode_info, 1);
106 mi->mode = VIDEO_FIRST_BIOS+mode;
107 mi->x = rdfs16(0x44a);
108 mi->y = rdfs8(0x484)+1;
109 nmodes++;
110 }
111
112 set_bios_mode(saved_mode);
113
114 return nmodes;
115}
116
117__videocard video_bios =
118{
119 .card_name = "BIOS (scanned)",
120 .probe = bios_probe,
121 .set_mode = bios_set_mode,
122 .unsafe = 1,
123 .xmode_first = VIDEO_FIRST_BIOS,
124 .xmode_n = 0x80,
125};
diff --git a/arch/i386/boot/video-vesa.c b/arch/i386/boot/video-vesa.c
new file mode 100644
index 000000000000..e6aa9eb8d93a
--- /dev/null
+++ b/arch/i386/boot/video-vesa.c
@@ -0,0 +1,284 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/video-vesa.c
13 *
14 * VESA text modes
15 */
16
17#include "boot.h"
18#include "video.h"
19#include "vesa.h"
20
21/* VESA information */
22static struct vesa_general_info vginfo;
23static struct vesa_mode_info vminfo;
24
25__videocard video_vesa;
26
27static void vesa_store_mode_params_graphics(void);
28
29static int vesa_probe(void)
30{
31#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
32 u16 ax;
33 u16 mode;
34 addr_t mode_ptr;
35 struct mode_info *mi;
36 int nmodes = 0;
37
38 video_vesa.modes = GET_HEAP(struct mode_info, 0);
39
40 vginfo.signature = VBE2_MAGIC;
41
42 /* Optimistically assume a VESA BIOS is register-clean... */
43 ax = 0x4f00;
44 asm("int $0x10" : "+a" (ax), "=m" (vginfo) : "D" (&vginfo));
45
46 if (ax != 0x004f ||
47 vginfo.signature != VESA_MAGIC ||
48 vginfo.version < 0x0102)
49 return 0; /* Not present */
50#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */
51#ifdef CONFIG_VIDEO_VESA
52 set_fs(vginfo.video_mode_ptr.seg);
53 mode_ptr = vginfo.video_mode_ptr.off;
54
55 while ((mode = rdfs16(mode_ptr)) != 0xffff) {
56 mode_ptr += 2;
57
58 if (heap_free() < sizeof(struct mode_info))
59 break; /* Heap full, can't save mode info */
60
61 if (mode & ~0x1ff)
62 continue;
63
64 memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
65
66 ax = 0x4f01;
67 asm("int $0x10"
68 : "+a" (ax), "=m" (vminfo)
69 : "c" (mode), "D" (&vminfo));
70
71 if (ax != 0x004f)
72 continue;
73
74 if ((vminfo.mode_attr & 0x15) == 0x05) {
75 /* Text Mode, TTY BIOS supported,
76 supported by hardware */
77 mi = GET_HEAP(struct mode_info, 1);
78 mi->mode = mode + VIDEO_FIRST_VESA;
79 mi->x = vminfo.h_res;
80 mi->y = vminfo.v_res;
81 nmodes++;
82 } else if ((vminfo.mode_attr & 0x99) == 0x99) {
83#ifdef CONFIG_FB
84 /* Graphics mode, color, linear frame buffer
85 supported -- register the mode but hide from
86 the menu. Only do this if framebuffer is
87 configured, however, otherwise the user will
88 be left without a screen. */
89 mi = GET_HEAP(struct mode_info, 1);
90 mi->mode = mode + VIDEO_FIRST_VESA;
91 mi->x = mi->y = 0;
92 nmodes++;
93#endif
94 }
95 }
96
97 return nmodes;
98#else
99 return 0;
100#endif /* CONFIG_VIDEO_VESA */
101}
102
103static int vesa_set_mode(struct mode_info *mode)
104{
105 u16 ax;
106 int is_graphic;
107 u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
108
109 memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
110
111 ax = 0x4f01;
112 asm("int $0x10"
113 : "+a" (ax), "=m" (vminfo)
114 : "c" (vesa_mode), "D" (&vminfo));
115
116 if (ax != 0x004f)
117 return -1;
118
119 if ((vminfo.mode_attr & 0x15) == 0x05) {
120 /* It's a supported text mode */
121 is_graphic = 0;
122 } else if ((vminfo.mode_attr & 0x99) == 0x99) {
123 /* It's a graphics mode with linear frame buffer */
124 is_graphic = 1;
125 vesa_mode |= 0x4000; /* Request linear frame buffer */
126 } else {
127 return -1; /* Invalid mode */
128 }
129
130
131 ax = 0x4f02;
132 asm volatile("int $0x10"
133 : "+a" (ax)
134 : "b" (vesa_mode), "D" (0));
135
136 if (ax != 0x004f)
137 return -1;
138
139 graphic_mode = is_graphic;
140 if (!is_graphic) {
141 /* Text mode */
142 force_x = mode->x;
143 force_y = mode->y;
144 do_restore = 1;
145 } else {
146 /* Graphics mode */
147 vesa_store_mode_params_graphics();
148 }
149
150 return 0;
151}
152
153
154/* Switch DAC to 8-bit mode */
155static void vesa_dac_set_8bits(void)
156{
157 u8 dac_size = 6;
158
159 /* If possible, switch the DAC to 8-bit mode */
160 if (vginfo.capabilities & 1) {
161 u16 ax, bx;
162
163 ax = 0x4f08;
164 bx = 0x0800;
165 asm volatile(INT10
166 : "+a" (ax), "+b" (bx)
167 : : "ecx", "edx", "esi", "edi");
168
169 if (ax == 0x004f)
170 dac_size = bx >> 8;
171 }
172
173 /* Set the color sizes to the DAC size, and offsets to 0 */
174 boot_params.screen_info.red_size = dac_size;
175 boot_params.screen_info.green_size = dac_size;
176 boot_params.screen_info.blue_size = dac_size;
177 boot_params.screen_info.rsvd_size = dac_size;
178
179 boot_params.screen_info.red_pos = 0;
180 boot_params.screen_info.green_pos = 0;
181 boot_params.screen_info.blue_pos = 0;
182 boot_params.screen_info.rsvd_pos = 0;
183}
184
185/* Save the VESA protected mode info */
186static void vesa_store_pm_info(void)
187{
188 u16 ax, bx, di, es;
189
190 ax = 0x4f0a;
191 bx = di = 0;
192 asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es"
193 : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di)
194 : : "ecx", "esi");
195
196 if (ax != 0x004f)
197 return;
198
199 boot_params.screen_info.vesapm_seg = es;
200 boot_params.screen_info.vesapm_off = di;
201}
202
203/*
204 * Save video mode parameters for graphics mode
205 */
206static void vesa_store_mode_params_graphics(void)
207{
208 /* Tell the kernel we're in VESA graphics mode */
209 boot_params.screen_info.orig_video_isVGA = 0x23;
210
211 /* Mode parameters */
212 boot_params.screen_info.vesa_attributes = vminfo.mode_attr;
213 boot_params.screen_info.lfb_linelength = vminfo.logical_scan;
214 boot_params.screen_info.lfb_width = vminfo.h_res;
215 boot_params.screen_info.lfb_height = vminfo.v_res;
216 boot_params.screen_info.lfb_depth = vminfo.bpp;
217 boot_params.screen_info.pages = vminfo.image_planes;
218 boot_params.screen_info.lfb_base = vminfo.lfb_ptr;
219 memcpy(&boot_params.screen_info.red_size,
220 &vminfo.rmask, 8);
221
222 /* General parameters */
223 boot_params.screen_info.lfb_size = vginfo.total_memory;
224
225 if (vminfo.bpp <= 8)
226 vesa_dac_set_8bits();
227
228 vesa_store_pm_info();
229}
230
231/*
232 * Save EDID information for the kernel; this is invoked, separately,
233 * after mode-setting.
234 */
235void vesa_store_edid(void)
236{
237#ifdef CONFIG_FIRMWARE_EDID
238 u16 ax, bx, cx, dx, di;
239
240 /* Apparently used as a nonsense token... */
241 memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
242
243 if (vginfo.version < 0x0200)
244 return; /* EDID requires VBE 2.0+ */
245
246 ax = 0x4f15; /* VBE DDC */
247 bx = 0x0000; /* Report DDC capabilities */
248 cx = 0; /* Controller 0 */
249 di = 0; /* ES:DI must be 0 by spec */
250
251 /* Note: The VBE DDC spec is different from the main VESA spec;
252 we genuinely have to assume all registers are destroyed here. */
253
254 asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
255 : "+a" (ax), "+b" (bx)
256 : "c" (cx), "D" (di)
257 : "esi");
258
259 if (ax != 0x004f)
260 return; /* No EDID */
261
262 /* BH = time in seconds to transfer EDD information */
263 /* BL = DDC level supported */
264
265 ax = 0x4f15; /* VBE DDC */
266 bx = 0x0001; /* Read EDID */
267 cx = 0; /* Controller 0 */
268 dx = 0; /* EDID block number */
269 di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */
270 asm(INT10
271 : "+a" (ax), "+b" (bx), "+d" (dx)
272 : "c" (cx), "D" (di)
273 : "esi");
274#endif /* CONFIG_FIRMWARE_EDID */
275}
276
277__videocard video_vesa =
278{
279 .card_name = "VESA",
280 .probe = vesa_probe,
281 .set_mode = vesa_set_mode,
282 .xmode_first = VIDEO_FIRST_VESA,
283 .xmode_n = 0x200,
284};
diff --git a/arch/i386/boot/video-vga.c b/arch/i386/boot/video-vga.c
new file mode 100644
index 000000000000..700d09a9c9b3
--- /dev/null
+++ b/arch/i386/boot/video-vga.c
@@ -0,0 +1,260 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/video-vga.c
13 *
14 * Common all-VGA modes
15 */
16
17#include "boot.h"
18#include "video.h"
19
20static struct mode_info vga_modes[] = {
21 { VIDEO_80x25, 80, 25 },
22 { VIDEO_8POINT, 80, 50 },
23 { VIDEO_80x43, 80, 43 },
24 { VIDEO_80x28, 80, 28 },
25 { VIDEO_80x30, 80, 30 },
26 { VIDEO_80x34, 80, 34 },
27 { VIDEO_80x60, 80, 60 },
28};
29
30static struct mode_info ega_modes[] = {
31 { VIDEO_80x25, 80, 25 },
32 { VIDEO_8POINT, 80, 43 },
33};
34
35static struct mode_info cga_modes[] = {
36 { VIDEO_80x25, 80, 25 },
37};
38
39__videocard video_vga;
40
41/* Set basic 80x25 mode */
42static u8 vga_set_basic_mode(void)
43{
44 u16 ax;
45 u8 rows;
46 u8 mode;
47
48#ifdef CONFIG_VIDEO_400_HACK
49 if (adapter >= ADAPTER_VGA) {
50 asm(INT10
51 : : "a" (0x1202), "b" (0x0030)
52 : "ecx", "edx", "esi", "edi");
53 }
54#endif
55
56 ax = 0x0f00;
57 asm(INT10
58 : "+a" (ax)
59 : : "ebx", "ecx", "edx", "esi", "edi");
60
61 mode = (u8)ax;
62
63 set_fs(0);
64 rows = rdfs8(0x484); /* rows minus one */
65
66#ifndef CONFIG_VIDEO_400_HACK
67 if ((ax == 0x5003 || ax == 0x5007) &&
68 (rows == 0 || rows == 24))
69 return mode;
70#endif
71
72 if (mode != 3 && mode != 7)
73 mode = 3;
74
75 /* Set the mode */
76 asm volatile(INT10
77 : : "a" (mode)
78 : "ebx", "ecx", "edx", "esi", "edi");
79 do_restore = 1;
80 return mode;
81}
82
83static void vga_set_8font(void)
84{
85 /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
86
87 /* Set 8x8 font */
88 asm volatile(INT10 : : "a" (0x1112), "b" (0));
89
90 /* Use alternate print screen */
91 asm volatile(INT10 : : "a" (0x1200), "b" (0x20));
92
93 /* Turn off cursor emulation */
94 asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
95
96 /* Cursor is scan lines 6-7 */
97 asm volatile(INT10 : : "a" (0x0100), "c" (0x0607));
98}
99
100static void vga_set_14font(void)
101{
102 /* Set 9x14 font - 80x28 on VGA */
103
104 /* Set 9x14 font */
105 asm volatile(INT10 : : "a" (0x1111), "b" (0));
106
107 /* Turn off cursor emulation */
108 asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
109
110 /* Cursor is scan lines 11-12 */
111 asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c));
112}
113
114static void vga_set_80x43(void)
115{
116 /* Set 80x43 mode on VGA (not EGA) */
117
118 /* Set 350 scans */
119 asm volatile(INT10 : : "a" (0x1201), "b" (0x30));
120
121 /* Reset video mode */
122 asm volatile(INT10 : : "a" (0x0003));
123
124 vga_set_8font();
125}
126
127/* I/O address of the VGA CRTC */
128u16 vga_crtc(void)
129{
130 return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
131}
132
133static void vga_set_480_scanlines(int end)
134{
135 u16 crtc;
136 u8 csel;
137
138 crtc = vga_crtc();
139
140 out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
141 out_idx(0x0b, crtc, 0x06); /* Vertical total */
142 out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
143 out_idx(0xea, crtc, 0x10); /* Vertical sync start */
144 out_idx(end, crtc, 0x12); /* Vertical display end */
145 out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
146 out_idx(0x04, crtc, 0x16); /* Vertical blank end */
147 csel = inb(0x3cc);
148 csel &= 0x0d;
149 csel |= 0xe2;
150 outb(csel, 0x3cc);
151}
152
153static void vga_set_80x30(void)
154{
155 vga_set_480_scanlines(0xdf);
156}
157
158static void vga_set_80x34(void)
159{
160 vga_set_14font();
161 vga_set_480_scanlines(0xdb);
162}
163
164static void vga_set_80x60(void)
165{
166 vga_set_8font();
167 vga_set_480_scanlines(0xdf);
168}
169
170static int vga_set_mode(struct mode_info *mode)
171{
172 /* Set the basic mode */
173 vga_set_basic_mode();
174
175 /* Override a possibly broken BIOS */
176 force_x = mode->x;
177 force_y = mode->y;
178
179 switch (mode->mode) {
180 case VIDEO_80x25:
181 break;
182 case VIDEO_8POINT:
183 vga_set_8font();
184 break;
185 case VIDEO_80x43:
186 vga_set_80x43();
187 break;
188 case VIDEO_80x28:
189 vga_set_14font();
190 break;
191 case VIDEO_80x30:
192 vga_set_80x30();
193 break;
194 case VIDEO_80x34:
195 vga_set_80x34();
196 break;
197 case VIDEO_80x60:
198 vga_set_80x60();
199 break;
200 }
201
202 return 0;
203}
204
205/*
206 * Note: this probe includes basic information required by all
207 * systems. It should be executed first, by making sure
208 * video-vga.c is listed first in the Makefile.
209 */
210static int vga_probe(void)
211{
212 static const char *card_name[] = {
213 "CGA/MDA/HGC", "EGA", "VGA"
214 };
215 static struct mode_info *mode_lists[] = {
216 cga_modes,
217 ega_modes,
218 vga_modes,
219 };
220 static int mode_count[] = {
221 sizeof(cga_modes)/sizeof(struct mode_info),
222 sizeof(ega_modes)/sizeof(struct mode_info),
223 sizeof(vga_modes)/sizeof(struct mode_info),
224 };
225 u8 vga_flag;
226
227 asm(INT10
228 : "=b" (boot_params.screen_info.orig_video_ega_bx)
229 : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
230 : "ecx", "edx", "esi", "edi");
231
232 /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
233 if ((u8)boot_params.screen_info.orig_video_ega_bx != 0x10) {
234 /* EGA/VGA */
235 asm(INT10
236 : "=a" (vga_flag)
237 : "a" (0x1a00)
238 : "ebx", "ecx", "edx", "esi", "edi");
239
240 if (vga_flag == 0x1a) {
241 adapter = ADAPTER_VGA;
242 boot_params.screen_info.orig_video_isVGA = 1;
243 } else {
244 adapter = ADAPTER_EGA;
245 }
246 } else {
247 adapter = ADAPTER_CGA;
248 }
249
250 video_vga.modes = mode_lists[adapter];
251 video_vga.card_name = card_name[adapter];
252 return mode_count[adapter];
253}
254
255__videocard video_vga =
256{
257 .card_name = "VGA",
258 .probe = vga_probe,
259 .set_mode = vga_set_mode,
260};
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
deleted file mode 100644
index 8143c9516cb4..000000000000
--- a/arch/i386/boot/video.S
+++ /dev/null
@@ -1,2043 +0,0 @@
1/* video.S
2 *
3 * Display adapter & video mode setup, version 2.13 (14-May-99)
4 *
5 * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
6 * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
7 *
8 * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
9 *
10 * For further information, look at Documentation/svga.txt.
11 *
12 */
13
14/* Enable autodetection of SVGA adapters and modes. */
15#undef CONFIG_VIDEO_SVGA
16
17/* Enable autodetection of VESA modes */
18#define CONFIG_VIDEO_VESA
19
20/* Enable compacting of mode table */
21#define CONFIG_VIDEO_COMPACT
22
23/* Retain screen contents when switching modes */
24#define CONFIG_VIDEO_RETAIN
25
26/* Enable local mode list */
27#undef CONFIG_VIDEO_LOCAL
28
29/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
30#undef CONFIG_VIDEO_400_HACK
31
32/* Hack that lets you force specific BIOS mode ID and specific dimensions */
33#undef CONFIG_VIDEO_GFX_HACK
34#define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */
35#define VIDEO_GFX_BIOS_BX 0x0102
36#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */
37
38/* This code uses an extended set of video mode numbers. These include:
39 * Aliases for standard modes
40 * NORMAL_VGA (-1)
41 * EXTENDED_VGA (-2)
42 * ASK_VGA (-3)
43 * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
44 * of compatibility when extending the table. These are between 0x00 and 0xff.
45 */
46#define VIDEO_FIRST_MENU 0x0000
47
48/* Standard BIOS video modes (BIOS number + 0x0100) */
49#define VIDEO_FIRST_BIOS 0x0100
50
51/* VESA BIOS video modes (VESA number + 0x0200) */
52#define VIDEO_FIRST_VESA 0x0200
53
54/* Video7 special modes (BIOS number + 0x0900) */
55#define VIDEO_FIRST_V7 0x0900
56
57/* Special video modes */
58#define VIDEO_FIRST_SPECIAL 0x0f00
59#define VIDEO_80x25 0x0f00
60#define VIDEO_8POINT 0x0f01
61#define VIDEO_80x43 0x0f02
62#define VIDEO_80x28 0x0f03
63#define VIDEO_CURRENT_MODE 0x0f04
64#define VIDEO_80x30 0x0f05
65#define VIDEO_80x34 0x0f06
66#define VIDEO_80x60 0x0f07
67#define VIDEO_GFX_HACK 0x0f08
68#define VIDEO_LAST_SPECIAL 0x0f09
69
70/* Video modes given by resolution */
71#define VIDEO_FIRST_RESOLUTION 0x1000
72
73/* The "recalculate timings" flag */
74#define VIDEO_RECALC 0x8000
75
76/* Positions of various video parameters passed to the kernel */
77/* (see also include/linux/tty.h) */
78#define PARAM_CURSOR_POS 0x00
79#define PARAM_VIDEO_PAGE 0x04
80#define PARAM_VIDEO_MODE 0x06
81#define PARAM_VIDEO_COLS 0x07
82#define PARAM_VIDEO_EGA_BX 0x0a
83#define PARAM_VIDEO_LINES 0x0e
84#define PARAM_HAVE_VGA 0x0f
85#define PARAM_FONT_POINTS 0x10
86
87#define PARAM_LFB_WIDTH 0x12
88#define PARAM_LFB_HEIGHT 0x14
89#define PARAM_LFB_DEPTH 0x16
90#define PARAM_LFB_BASE 0x18
91#define PARAM_LFB_SIZE 0x1c
92#define PARAM_LFB_LINELENGTH 0x24
93#define PARAM_LFB_COLORS 0x26
94#define PARAM_VESAPM_SEG 0x2e
95#define PARAM_VESAPM_OFF 0x30
96#define PARAM_LFB_PAGES 0x32
97#define PARAM_VESA_ATTRIB 0x34
98#define PARAM_CAPABILITIES 0x36
99
100/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
101#ifdef CONFIG_VIDEO_RETAIN
102#define DO_STORE call store_screen
103#else
104#define DO_STORE
105#endif /* CONFIG_VIDEO_RETAIN */
106
107# This is the main entry point called by setup.S
108# %ds *must* be pointing to the bootsector
109video: pushw %ds # We use different segments
110 pushw %ds # FS contains original DS
111 popw %fs
112 pushw %cs # DS is equal to CS
113 popw %ds
114 pushw %cs # ES is equal to CS
115 popw %es
116 xorw %ax, %ax
117 movw %ax, %gs # GS is zero
118 cld
119 call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA)
120#ifdef CONFIG_VIDEO_SELECT
121 movw %fs:(0x01fa), %ax # User selected video mode
122 cmpw $ASK_VGA, %ax # Bring up the menu
123 jz vid2
124
125 call mode_set # Set the mode
126 jc vid1
127
128 leaw badmdt, %si # Invalid mode ID
129 call prtstr
130vid2: call mode_menu
131vid1:
132#ifdef CONFIG_VIDEO_RETAIN
133 call restore_screen # Restore screen contents
134#endif /* CONFIG_VIDEO_RETAIN */
135 call store_edid
136#endif /* CONFIG_VIDEO_SELECT */
137 call mode_params # Store mode parameters
138 popw %ds # Restore original DS
139 ret
140
141# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
142basic_detect:
143 movb $0, %fs:(PARAM_HAVE_VGA)
144 movb $0x12, %ah # Check EGA/VGA
145 movb $0x10, %bl
146 int $0x10
147 movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel
148 cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card.
149 je basret
150
151 incb adapter
152 movw $0x1a00, %ax # Check EGA or VGA?
153 int $0x10
154 cmpb $0x1a, %al # 1a means VGA...
155 jne basret # anything else is EGA.
156
157 incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA
158 incb adapter
159basret: ret
160
161# Store the video mode parameters for later usage by the kernel.
162# This is done by asking the BIOS except for the rows/columns
163# parameters in the default 80x25 mode -- these are set directly,
164# because some very obscure BIOSes supply insane values.
165mode_params:
166#ifdef CONFIG_VIDEO_SELECT
167 cmpb $0, graphic_mode
168 jnz mopar_gr
169#endif
170 movb $0x03, %ah # Read cursor position
171 xorb %bh, %bh
172 int $0x10
173 movw %dx, %fs:(PARAM_CURSOR_POS)
174 movb $0x0f, %ah # Read page/mode/width
175 int $0x10
176 movw %bx, %fs:(PARAM_VIDEO_PAGE)
177 movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width
178 cmpb $0x7, %al # MDA/HGA => segment differs
179 jnz mopar0
180
181 movw $0xb000, video_segment
182mopar0: movw %gs:(0x485), %ax # Font size
183 movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA)
184 movw force_size, %ax # Forced size?
185 orw %ax, %ax
186 jz mopar1
187
188 movb %ah, %fs:(PARAM_VIDEO_COLS)
189 movb %al, %fs:(PARAM_VIDEO_LINES)
190 ret
191
192mopar1: movb $25, %al
193 cmpb $0, adapter # If we are on CGA/MDA/HGA, the
194 jz mopar2 # screen must have 25 lines.
195
196 movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS
197 incb %al # location of max lines.
198mopar2: movb %al, %fs:(PARAM_VIDEO_LINES)
199 ret
200
201#ifdef CONFIG_VIDEO_SELECT
202# Fetching of VESA frame buffer parameters
203mopar_gr:
204 leaw modelist+1024, %di
205 movb $0x23, %fs:(PARAM_HAVE_VGA)
206 movw 16(%di), %ax
207 movw %ax, %fs:(PARAM_LFB_LINELENGTH)
208 movw 18(%di), %ax
209 movw %ax, %fs:(PARAM_LFB_WIDTH)
210 movw 20(%di), %ax
211 movw %ax, %fs:(PARAM_LFB_HEIGHT)
212 movb 25(%di), %al
213 movb $0, %ah
214 movw %ax, %fs:(PARAM_LFB_DEPTH)
215 movb 29(%di), %al
216 movb $0, %ah
217 movw %ax, %fs:(PARAM_LFB_PAGES)
218 movl 40(%di), %eax
219 movl %eax, %fs:(PARAM_LFB_BASE)
220 movl 31(%di), %eax
221 movl %eax, %fs:(PARAM_LFB_COLORS)
222 movl 35(%di), %eax
223 movl %eax, %fs:(PARAM_LFB_COLORS+4)
224 movw 0(%di), %ax
225 movw %ax, %fs:(PARAM_VESA_ATTRIB)
226
227# get video mem size
228 leaw modelist+1024, %di
229 movw $0x4f00, %ax
230 int $0x10
231 xorl %eax, %eax
232 movw 18(%di), %ax
233 movl %eax, %fs:(PARAM_LFB_SIZE)
234
235# store mode capabilities
236 movl 10(%di), %eax
237 movl %eax, %fs:(PARAM_CAPABILITIES)
238
239# switching the DAC to 8-bit is for <= 8 bpp only
240 movw %fs:(PARAM_LFB_DEPTH), %ax
241 cmpw $8, %ax
242 jg dac_done
243
244# get DAC switching capability
245 xorl %eax, %eax
246 movb 10(%di), %al
247 testb $1, %al
248 jz dac_set
249
250# attempt to switch DAC to 8-bit
251 movw $0x4f08, %ax
252 movw $0x0800, %bx
253 int $0x10
254 cmpw $0x004f, %ax
255 jne dac_set
256 movb %bh, dac_size # store actual DAC size
257
258dac_set:
259# set color size to DAC size
260 movb dac_size, %al
261 movb %al, %fs:(PARAM_LFB_COLORS+0)
262 movb %al, %fs:(PARAM_LFB_COLORS+2)
263 movb %al, %fs:(PARAM_LFB_COLORS+4)
264 movb %al, %fs:(PARAM_LFB_COLORS+6)
265
266# set color offsets to 0
267 movb $0, %fs:(PARAM_LFB_COLORS+1)
268 movb $0, %fs:(PARAM_LFB_COLORS+3)
269 movb $0, %fs:(PARAM_LFB_COLORS+5)
270 movb $0, %fs:(PARAM_LFB_COLORS+7)
271
272dac_done:
273# get protected mode interface informations
274 movw $0x4f0a, %ax
275 xorw %bx, %bx
276 xorw %di, %di
277 int $0x10
278 cmp $0x004f, %ax
279 jnz no_pm
280
281 movw %es, %fs:(PARAM_VESAPM_SEG)
282 movw %di, %fs:(PARAM_VESAPM_OFF)
283no_pm: ret
284
285# The video mode menu
286mode_menu:
287 leaw keymsg, %si # "Return/Space/Timeout" message
288 call prtstr
289 call flush
290nokey: call getkt
291
292 cmpb $0x0d, %al # ENTER ?
293 je listm # yes - manual mode selection
294
295 cmpb $0x20, %al # SPACE ?
296 je defmd1 # no - repeat
297
298 call beep
299 jmp nokey
300
301defmd1: ret # No mode chosen? Default 80x25
302
303listm: call mode_table # List mode table
304listm0: leaw name_bann, %si # Print adapter name
305 call prtstr
306 movw card_name, %si
307 orw %si, %si
308 jnz an2
309
310 movb adapter, %al
311 leaw old_name, %si
312 orb %al, %al
313 jz an1
314
315 leaw ega_name, %si
316 decb %al
317 jz an1
318
319 leaw vga_name, %si
320 jmp an1
321
322an2: call prtstr
323 leaw svga_name, %si
324an1: call prtstr
325 leaw listhdr, %si # Table header
326 call prtstr
327 movb $0x30, %dl # DL holds mode number
328 leaw modelist, %si
329lm1: cmpw $ASK_VGA, (%si) # End?
330 jz lm2
331
332 movb %dl, %al # Menu selection number
333 call prtchr
334 call prtsp2
335 lodsw
336 call prthw # Mode ID
337 call prtsp2
338 movb 0x1(%si), %al
339 call prtdec # Rows
340 movb $0x78, %al # the letter 'x'
341 call prtchr
342 lodsw
343 call prtdec # Columns
344 movb $0x0d, %al # New line
345 call prtchr
346 movb $0x0a, %al
347 call prtchr
348 incb %dl # Next character
349 cmpb $0x3a, %dl
350 jnz lm1
351
352 movb $0x61, %dl
353 jmp lm1
354
355lm2: leaw prompt, %si # Mode prompt
356 call prtstr
357 leaw edit_buf, %di # Editor buffer
358lm3: call getkey
359 cmpb $0x0d, %al # Enter?
360 jz lment
361
362 cmpb $0x08, %al # Backspace?
363 jz lmbs
364
365 cmpb $0x20, %al # Printable?
366 jc lm3
367
368 cmpw $edit_buf+4, %di # Enough space?
369 jz lm3
370
371 stosb
372 call prtchr
373 jmp lm3
374
375lmbs: cmpw $edit_buf, %di # Backspace
376 jz lm3
377
378 decw %di
379 movb $0x08, %al
380 call prtchr
381 call prtspc
382 movb $0x08, %al
383 call prtchr
384 jmp lm3
385
386lment: movb $0, (%di)
387 leaw crlft, %si
388 call prtstr
389 leaw edit_buf, %si
390 cmpb $0, (%si) # Empty string = default mode
391 jz lmdef
392
393 cmpb $0, 1(%si) # One character = menu selection
394 jz mnusel
395
396 cmpw $0x6373, (%si) # "scan" => mode scanning
397 jnz lmhx
398
399 cmpw $0x6e61, 2(%si)
400 jz lmscan
401
402lmhx: xorw %bx, %bx # Else => mode ID in hex
403lmhex: lodsb
404 orb %al, %al
405 jz lmuse1
406
407 subb $0x30, %al
408 jc lmbad
409
410 cmpb $10, %al
411 jc lmhx1
412
413 subb $7, %al
414 andb $0xdf, %al
415 cmpb $10, %al
416 jc lmbad
417
418 cmpb $16, %al
419 jnc lmbad
420
421lmhx1: shlw $4, %bx
422 orb %al, %bl
423 jmp lmhex
424
425lmuse1: movw %bx, %ax
426 jmp lmuse
427
428mnusel: lodsb # Menu selection
429 xorb %ah, %ah
430 subb $0x30, %al
431 jc lmbad
432
433 cmpb $10, %al
434 jc lmuse
435
436 cmpb $0x61-0x30, %al
437 jc lmbad
438
439 subb $0x61-0x30-10, %al
440 cmpb $36, %al
441 jnc lmbad
442
443lmuse: call mode_set
444 jc lmdef
445
446lmbad: leaw unknt, %si
447 call prtstr
448 jmp lm2
449lmscan: cmpb $0, adapter # Scanning only on EGA/VGA
450 jz lmbad
451
452 movw $0, mt_end # Scanning of modes is
453 movb $1, scanning # done as new autodetection.
454 call mode_table
455 jmp listm0
456lmdef: ret
457
458# Additional parts of mode_set... (relative jumps, you know)
459setv7: # Video7 extended modes
460 DO_STORE
461 subb $VIDEO_FIRST_V7>>8, %bh
462 movw $0x6f05, %ax
463 int $0x10
464 stc
465 ret
466
467_setrec: jmp setrec # Ugly...
468_set_80x25: jmp set_80x25
469
470# Aliases for backward compatibility.
471setalias:
472 movw $VIDEO_80x25, %ax
473 incw %bx
474 jz mode_set
475
476 movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
477 incw %bx
478 jnz setbad # Fall-through!
479
480# Setting of user mode (AX=mode ID) => CF=success
481mode_set:
482 movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S
483 movw %ax, %bx
484 cmpb $0xff, %ah
485 jz setalias
486
487 testb $VIDEO_RECALC>>8, %ah
488 jnz _setrec
489
490 cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
491 jnc setres
492
493 cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
494 jz setspc
495
496 cmpb $VIDEO_FIRST_V7>>8, %ah
497 jz setv7
498
499 cmpb $VIDEO_FIRST_VESA>>8, %ah
500 jnc check_vesa
501
502 orb %ah, %ah
503 jz setmenu
504
505 decb %ah
506 jz setbios
507
508setbad: clc
509 movb $0, do_restore # The screen needn't be restored
510 ret
511
512setvesa:
513 DO_STORE
514 subb $VIDEO_FIRST_VESA>>8, %bh
515 movw $0x4f02, %ax # VESA BIOS mode set call
516 int $0x10
517 cmpw $0x004f, %ax # AL=4f if implemented
518 jnz setbad # AH=0 if OK
519
520 stc
521 ret
522
523setbios:
524 DO_STORE
525 int $0x10 # Standard BIOS mode set call
526 pushw %bx
527 movb $0x0f, %ah # Check if really set
528 int $0x10
529 popw %bx
530 cmpb %bl, %al
531 jnz setbad
532
533 stc
534 ret
535
536setspc: xorb %bh, %bh # Set special mode
537 cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
538 jnc setbad
539
540 addw %bx, %bx
541 jmp *spec_inits(%bx)
542
543setmenu:
544 orb %al, %al # 80x25 is an exception
545 jz _set_80x25
546
547 pushw %bx # Set mode chosen from menu
548 call mode_table # Build the mode table
549 popw %ax
550 shlw $2, %ax
551 addw %ax, %si
552 cmpw %di, %si
553 jnc setbad
554
555 movw (%si), %ax # Fetch mode ID
556_m_s: jmp mode_set
557
558setres: pushw %bx # Set mode chosen by resolution
559 call mode_table
560 popw %bx
561 xchgb %bl, %bh
562setr1: lodsw
563 cmpw $ASK_VGA, %ax # End of the list?
564 jz setbad
565
566 lodsw
567 cmpw %bx, %ax
568 jnz setr1
569
570 movw -4(%si), %ax # Fetch mode ID
571 jmp _m_s
572
573check_vesa:
574#ifdef CONFIG_FIRMWARE_EDID
575 leaw modelist+1024, %di
576 movw $0x4f00, %ax
577 int $0x10
578 cmpw $0x004f, %ax
579 jnz setbad
580
581 movw 4(%di), %ax
582 movw %ax, vbe_version
583#endif
584 leaw modelist+1024, %di
585 subb $VIDEO_FIRST_VESA>>8, %bh
586 movw %bx, %cx # Get mode information structure
587 movw $0x4f01, %ax
588 int $0x10
589 addb $VIDEO_FIRST_VESA>>8, %bh
590 cmpw $0x004f, %ax
591 jnz setbad
592
593 movb (%di), %al # Check capabilities.
594 andb $0x19, %al
595 cmpb $0x09, %al
596 jz setvesa # This is a text mode
597
598 movb (%di), %al # Check capabilities.
599 andb $0x99, %al
600 cmpb $0x99, %al
601 jnz _setbad # Doh! No linear frame buffer.
602
603 subb $VIDEO_FIRST_VESA>>8, %bh
604 orw $0x4000, %bx # Use linear frame buffer
605 movw $0x4f02, %ax # VESA BIOS mode set call
606 int $0x10
607 cmpw $0x004f, %ax # AL=4f if implemented
608 jnz _setbad # AH=0 if OK
609
610 movb $1, graphic_mode # flag graphic mode
611 movb $0, do_restore # no screen restore
612 stc
613 ret
614
615_setbad: jmp setbad # Ugly...
616
617# Recalculate vertical display end registers -- this fixes various
618# inconsistencies of extended modes on many adapters. Called when
619# the VIDEO_RECALC flag is set in the mode ID.
620
621setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode
622 call mode_set
623 jnc rct3
624
625 movw %gs:(0x485), %ax # Font size in pixels
626 movb %gs:(0x484), %bl # Number of rows
627 incb %bl
628 mulb %bl # Number of visible
629 decw %ax # scan lines - 1
630 movw $0x3d4, %dx
631 movw %ax, %bx
632 movb $0x12, %al # Lower 8 bits
633 movb %bl, %ah
634 outw %ax, %dx
635 movb $0x07, %al # Bits 8 and 9 in the overflow register
636 call inidx
637 xchgb %al, %ah
638 andb $0xbd, %ah
639 shrb %bh
640 jnc rct1
641 orb $0x02, %ah
642rct1: shrb %bh
643 jnc rct2
644 orb $0x40, %ah
645rct2: movb $0x07, %al
646 outw %ax, %dx
647 stc
648rct3: ret
649
650# Table of routines for setting of the special modes.
651spec_inits:
652 .word set_80x25
653 .word set_8pixel
654 .word set_80x43
655 .word set_80x28
656 .word set_current
657 .word set_80x30
658 .word set_80x34
659 .word set_80x60
660 .word set_gfx
661
662# Set the 80x25 mode. If already set, do nothing.
663set_80x25:
664 movw $0x5019, force_size # Override possibly broken BIOS
665use_80x25:
666#ifdef CONFIG_VIDEO_400_HACK
667 movw $0x1202, %ax # Force 400 scan lines
668 movb $0x30, %bl
669 int $0x10
670#else
671 movb $0x0f, %ah # Get current mode ID
672 int $0x10
673 cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available
674 jz st80 # on CGA/MDA/HGA and is also available on EGAM
675
676 cmpw $0x5003, %ax # Unknown mode, force 80x25 color
677 jnz force3
678
679st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25
680 jz set80
681
682 movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc.
683 orb %al, %al # Some buggy BIOS'es set 0 rows
684 jz set80
685
686 cmpb $24, %al # It's hopefully correct
687 jz set80
688#endif /* CONFIG_VIDEO_400_HACK */
689force3: DO_STORE
690 movw $0x0003, %ax # Forced set
691 int $0x10
692set80: stc
693 ret
694
695# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
696set_8pixel:
697 DO_STORE
698 call use_80x25 # The base is 80x25
699set_8pt:
700 movw $0x1112, %ax # Use 8x8 font
701 xorb %bl, %bl
702 int $0x10
703 movw $0x1200, %ax # Use alternate print screen
704 movb $0x20, %bl
705 int $0x10
706 movw $0x1201, %ax # Turn off cursor emulation
707 movb $0x34, %bl
708 int $0x10
709 movb $0x01, %ah # Define cursor scan lines 6-7
710 movw $0x0607, %cx
711 int $0x10
712set_current:
713 stc
714 ret
715
716# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
717# 80x25 mode with 14-point fonts instead of 16-point.
718set_80x28:
719 DO_STORE
720 call use_80x25 # The base is 80x25
721set14: movw $0x1111, %ax # Use 9x14 font
722 xorb %bl, %bl
723 int $0x10
724 movb $0x01, %ah # Define cursor scan lines 11-12
725 movw $0x0b0c, %cx
726 int $0x10
727 stc
728 ret
729
730# Set the 80x43 mode. This mode is works on all VGA's.
731# It's a 350-scanline mode with 8-pixel font.
732set_80x43:
733 DO_STORE
734 movw $0x1201, %ax # Set 350 scans
735 movb $0x30, %bl
736 int $0x10
737 movw $0x0003, %ax # Reset video mode
738 int $0x10
739 jmp set_8pt # Use 8-pixel font
740
741# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
742set_80x30:
743 call use_80x25 # Start with real 80x25
744 DO_STORE
745 movw $0x3cc, %dx # Get CRTC port
746 inb %dx, %al
747 movb $0xd4, %dl
748 rorb %al # Mono or color?
749 jc set48a
750
751 movb $0xb4, %dl
752set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7)
753 call outidx
754 movw $0x0b06, %ax # Vertical total
755 call outidx
756 movw $0x3e07, %ax # (Vertical) overflow
757 call outidx
758 movw $0xea10, %ax # Vertical sync start
759 call outidx
760 movw $0xdf12, %ax # Vertical display end
761 call outidx
762 movw $0xe715, %ax # Vertical blank start
763 call outidx
764 movw $0x0416, %ax # Vertical blank end
765 call outidx
766 pushw %dx
767 movb $0xcc, %dl # Misc output register (read)
768 inb %dx, %al
769 movb $0xc2, %dl # (write)
770 andb $0x0d, %al # Preserve clock select bits and color bit
771 orb $0xe2, %al # Set correct sync polarity
772 outb %al, %dx
773 popw %dx
774 movw $0x501e, force_size
775 stc # That's all.
776 ret
777
778# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
779set_80x34:
780 call set_80x30 # Set 480 scans
781 call set14 # And 14-pt font
782 movw $0xdb12, %ax # VGA vertical display end
783 movw $0x5022, force_size
784setvde: call outidx
785 stc
786 ret
787
788# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
789set_80x60:
790 call set_80x30 # Set 480 scans
791 call set_8pt # And 8-pt font
792 movw $0xdf12, %ax # VGA vertical display end
793 movw $0x503c, force_size
794 jmp setvde
795
796# Special hack for ThinkPad graphics
797set_gfx:
798#ifdef CONFIG_VIDEO_GFX_HACK
799 movw $VIDEO_GFX_BIOS_AX, %ax
800 movw $VIDEO_GFX_BIOS_BX, %bx
801 int $0x10
802 movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size
803 stc
804#endif
805 ret
806
807#ifdef CONFIG_VIDEO_RETAIN
808
809# Store screen contents to temporary buffer.
810store_screen:
811 cmpb $0, do_restore # Already stored?
812 jnz stsr
813
814 testb $CAN_USE_HEAP, loadflags # Have we space for storing?
815 jz stsr
816
817 pushw %ax
818 pushw %bx
819 pushw force_size # Don't force specific size
820 movw $0, force_size
821 call mode_params # Obtain params of current mode
822 popw force_size
823 movb %fs:(PARAM_VIDEO_LINES), %ah
824 movb %fs:(PARAM_VIDEO_COLS), %al
825 movw %ax, %bx # BX=dimensions
826 mulb %ah
827 movw %ax, %cx # CX=number of characters
828 addw %ax, %ax # Calculate image size
829 addw $modelist+1024+4, %ax
830 cmpw heap_end_ptr, %ax
831 jnc sts1 # Unfortunately, out of memory
832
833 movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params
834 leaw modelist+1024, %di
835 stosw
836 movw %bx, %ax
837 stosw
838 pushw %ds # Store the screen
839 movw video_segment, %ds
840 xorw %si, %si
841 rep
842 movsw
843 popw %ds
844 incb do_restore # Screen will be restored later
845sts1: popw %bx
846 popw %ax
847stsr: ret
848
849# Restore screen contents from temporary buffer.
850restore_screen:
851 cmpb $0, do_restore # Has the screen been stored?
852 jz res1
853
854 call mode_params # Get parameters of current mode
855 movb %fs:(PARAM_VIDEO_LINES), %cl
856 movb %fs:(PARAM_VIDEO_COLS), %ch
857 leaw modelist+1024, %si # Screen buffer
858 lodsw # Set cursor position
859 movw %ax, %dx
860 cmpb %cl, %dh
861 jc res2
862
863 movb %cl, %dh
864 decb %dh
865res2: cmpb %ch, %dl
866 jc res3
867
868 movb %ch, %dl
869 decb %dl
870res3: movb $0x02, %ah
871 movb $0x00, %bh
872 int $0x10
873 lodsw # Display size
874 movb %ah, %dl # DL=number of lines
875 movb $0, %ah # BX=phys. length of orig. line
876 movw %ax, %bx
877 cmpb %cl, %dl # Too many?
878 jc res4
879
880 pushw %ax
881 movb %dl, %al
882 subb %cl, %al
883 mulb %bl
884 addw %ax, %si
885 addw %ax, %si
886 popw %ax
887 movb %cl, %dl
888res4: cmpb %ch, %al # Too wide?
889 jc res5
890
891 movb %ch, %al # AX=width of src. line
892res5: movb $0, %cl
893 xchgb %ch, %cl
894 movw %cx, %bp # BP=width of dest. line
895 pushw %es
896 movw video_segment, %es
897 xorw %di, %di # Move the data
898 addw %bx, %bx # Convert BX and BP to _bytes_
899 addw %bp, %bp
900res6: pushw %si
901 pushw %di
902 movw %ax, %cx
903 rep
904 movsw
905 popw %di
906 popw %si
907 addw %bp, %di
908 addw %bx, %si
909 decb %dl
910 jnz res6
911
912 popw %es # Done
913res1: ret
914#endif /* CONFIG_VIDEO_RETAIN */
915
916# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
917outidx: outb %al, %dx
918 pushw %ax
919 movb %ah, %al
920 incw %dx
921 outb %al, %dx
922 decw %dx
923 popw %ax
924 ret
925
926# Build the table of video modes (stored after the setup.S code at the
927# `modelist' label. Each video mode record looks like:
928# .word MODE-ID (our special mode ID (see above))
929# .byte rows (number of rows)
930# .byte columns (number of columns)
931# Returns address of the end of the table in DI, the end is marked
932# with a ASK_VGA ID.
933mode_table:
934 movw mt_end, %di # Already filled?
935 orw %di, %di
936 jnz mtab1x
937
938 leaw modelist, %di # Store standard modes:
939 movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
940 stosl
941 movb adapter, %al # CGA/MDA/HGA -- no more modes
942 orb %al, %al
943 jz mtabe
944
945 decb %al
946 jnz mtabv
947
948 movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode
949 stosl
950 jmp mtabe
951
952mtab1x: jmp mtab1
953
954mtabv: leaw vga_modes, %si # All modes for std VGA
955 movw $vga_modes_end-vga_modes, %cx
956 rep # I'm unable to use movsw as I don't know how to store a half
957 movsb # of the expression above to cx without using explicit shr.
958
959 cmpb $0, scanning # Mode scan requested?
960 jz mscan1
961
962 call mode_scan
963mscan1:
964
965#ifdef CONFIG_VIDEO_LOCAL
966 call local_modes
967#endif /* CONFIG_VIDEO_LOCAL */
968
969#ifdef CONFIG_VIDEO_VESA
970 call vesa_modes # Detect VESA VGA modes
971#endif /* CONFIG_VIDEO_VESA */
972
973#ifdef CONFIG_VIDEO_SVGA
974 cmpb $0, scanning # Bypass when scanning
975 jnz mscan2
976
977 call svga_modes # Detect SVGA cards & modes
978mscan2:
979#endif /* CONFIG_VIDEO_SVGA */
980
981mtabe:
982
983#ifdef CONFIG_VIDEO_COMPACT
984 leaw modelist, %si
985 movw %di, %dx
986 movw %si, %di
987cmt1: cmpw %dx, %si # Scan all modes
988 jz cmt2
989
990 leaw modelist, %bx # Find in previous entries
991 movw 2(%si), %cx
992cmt3: cmpw %bx, %si
993 jz cmt4
994
995 cmpw 2(%bx), %cx # Found => don't copy this entry
996 jz cmt5
997
998 addw $4, %bx
999 jmp cmt3
1000
1001cmt4: movsl # Copy entry
1002 jmp cmt1
1003
1004cmt5: addw $4, %si # Skip entry
1005 jmp cmt1
1006
1007cmt2:
1008#endif /* CONFIG_VIDEO_COMPACT */
1009
1010 movw $ASK_VGA, (%di) # End marker
1011 movw %di, mt_end
1012mtab1: leaw modelist, %si # SI=mode list, DI=list end
1013ret0: ret
1014
1015# Modes usable on all standard VGAs
1016vga_modes:
1017 .word VIDEO_8POINT
1018 .word 0x5032 # 80x50
1019 .word VIDEO_80x43
1020 .word 0x502b # 80x43
1021 .word VIDEO_80x28
1022 .word 0x501c # 80x28
1023 .word VIDEO_80x30
1024 .word 0x501e # 80x30
1025 .word VIDEO_80x34
1026 .word 0x5022 # 80x34
1027 .word VIDEO_80x60
1028 .word 0x503c # 80x60
1029#ifdef CONFIG_VIDEO_GFX_HACK
1030 .word VIDEO_GFX_HACK
1031 .word VIDEO_GFX_DUMMY_RESOLUTION
1032#endif
1033
1034vga_modes_end:
1035# Detect VESA modes.
1036
1037#ifdef CONFIG_VIDEO_VESA
1038vesa_modes:
1039 cmpb $2, adapter # VGA only
1040 jnz ret0
1041
1042 movw %di, %bp # BP=original mode table end
1043 addw $0x200, %di # Buffer space
1044 movw $0x4f00, %ax # VESA Get card info call
1045 int $0x10
1046 movw %bp, %di
1047 cmpw $0x004f, %ax # Successful?
1048 jnz ret0
1049
1050 cmpw $0x4556, 0x200(%di)
1051 jnz ret0
1052
1053 cmpw $0x4153, 0x202(%di)
1054 jnz ret0
1055
1056 movw $vesa_name, card_name # Set name to "VESA VGA"
1057 pushw %gs
1058 lgsw 0x20e(%di), %si # GS:SI=mode list
1059 movw $128, %cx # Iteration limit
1060vesa1:
1061# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
1062# XXX: lodsw %gs:(%si), %ax # Get next mode in the list
1063 gs; lodsw
1064 cmpw $0xffff, %ax # End of the table?
1065 jz vesar
1066
1067 cmpw $0x0080, %ax # Check validity of mode ID
1068 jc vesa2
1069
1070 orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
1071 jz vesan # Certain BIOSes report 0x80-0xff!
1072
1073 cmpw $0x0800, %ax
1074 jnc vesae
1075
1076vesa2: pushw %cx
1077 movw %ax, %cx # Get mode information structure
1078 movw $0x4f01, %ax
1079 int $0x10
1080 movw %cx, %bx # BX=mode number
1081 addb $VIDEO_FIRST_VESA>>8, %bh
1082 popw %cx
1083 cmpw $0x004f, %ax
1084 jnz vesan # Don't report errors (buggy BIOSES)
1085
1086 movb (%di), %al # Check capabilities. We require
1087 andb $0x19, %al # a color text mode.
1088 cmpb $0x09, %al
1089 jnz vesan
1090
1091 cmpw $0xb800, 8(%di) # Standard video memory address required
1092 jnz vesan
1093
1094 testb $2, (%di) # Mode characteristics supplied?
1095 movw %bx, (%di) # Store mode number
1096 jz vesa3
1097
1098 xorw %dx, %dx
1099 movw 0x12(%di), %bx # Width
1100 orb %bh, %bh
1101 jnz vesan
1102
1103 movb %bl, 0x3(%di)
1104 movw 0x14(%di), %ax # Height
1105 orb %ah, %ah
1106 jnz vesan
1107
1108 movb %al, 2(%di)
1109 mulb %bl
1110 cmpw $8193, %ax # Small enough for Linux console driver?
1111 jnc vesan
1112
1113 jmp vesaok
1114
1115vesa3: subw $0x8108, %bx # This mode has no detailed info specified,
1116 jc vesan # so it must be a standard VESA mode.
1117
1118 cmpw $5, %bx
1119 jnc vesan
1120
1121 movw vesa_text_mode_table(%bx), %ax
1122 movw %ax, 2(%di)
1123vesaok: addw $4, %di # The mode is valid. Store it.
1124vesan: loop vesa1 # Next mode. Limit exceeded => error
1125vesae: leaw vesaer, %si
1126 call prtstr
1127 movw %bp, %di # Discard already found modes.
1128vesar: popw %gs
1129 ret
1130
1131# Dimensions of standard VESA text modes
1132vesa_text_mode_table:
1133 .byte 60, 80 # 0108
1134 .byte 25, 132 # 0109
1135 .byte 43, 132 # 010A
1136 .byte 50, 132 # 010B
1137 .byte 60, 132 # 010C
1138#endif /* CONFIG_VIDEO_VESA */
1139
1140# Scan for video modes. A bit dirty, but should work.
1141mode_scan:
1142 movw $0x0100, %cx # Start with mode 0
1143scm1: movb $0, %ah # Test the mode
1144 movb %cl, %al
1145 int $0x10
1146 movb $0x0f, %ah
1147 int $0x10
1148 cmpb %cl, %al
1149 jnz scm2 # Mode not set
1150
1151 movw $0x3c0, %dx # Test if it's a text mode
1152 movb $0x10, %al # Mode bits
1153 call inidx
1154 andb $0x03, %al
1155 jnz scm2
1156
1157 movb $0xce, %dl # Another set of mode bits
1158 movb $0x06, %al
1159 call inidx
1160 shrb %al
1161 jc scm2
1162
1163 movb $0xd4, %dl # Cursor location
1164 movb $0x0f, %al
1165 call inidx
1166 orb %al, %al
1167 jnz scm2
1168
1169 movw %cx, %ax # Ok, store the mode
1170 stosw
1171 movb %gs:(0x484), %al # Number of rows
1172 incb %al
1173 stosb
1174 movw %gs:(0x44a), %ax # Number of columns
1175 stosb
1176scm2: incb %cl
1177 jns scm1
1178
1179 movw $0x0003, %ax # Return back to mode 3
1180 int $0x10
1181 ret
1182
1183tstidx: outw %ax, %dx # OUT DX,AX and inidx
1184inidx: outb %al, %dx # Read from indexed VGA register
1185 incw %dx # AL=index, DX=index reg port -> AL=data
1186 inb %dx, %al
1187 decw %dx
1188 ret
1189
1190# Try to detect type of SVGA card and supply (usually approximate) video
1191# mode table for it.
1192
1193#ifdef CONFIG_VIDEO_SVGA
1194svga_modes:
1195 leaw svga_table, %si # Test all known SVGA adapters
1196dosvga: lodsw
1197 movw %ax, %bp # Default mode table
1198 orw %ax, %ax
1199 jz didsv1
1200
1201 lodsw # Pointer to test routine
1202 pushw %si
1203 pushw %di
1204 pushw %es
1205 movw $0xc000, %bx
1206 movw %bx, %es
1207 call *%ax # Call test routine
1208 popw %es
1209 popw %di
1210 popw %si
1211 orw %bp, %bp
1212 jz dosvga
1213
1214 movw %bp, %si # Found, copy the modes
1215 movb svga_prefix, %ah
1216cpsvga: lodsb
1217 orb %al, %al
1218 jz didsv
1219
1220 stosw
1221 movsw
1222 jmp cpsvga
1223
1224didsv: movw %si, card_name # Store pointer to card name
1225didsv1: ret
1226
1227# Table of all known SVGA cards. For each card, we store a pointer to
1228# a table of video modes supported by the card and a pointer to a routine
1229# used for testing of presence of the card. The video mode table is always
1230# followed by the name of the card or the chipset.
1231svga_table:
1232 .word ati_md, ati_test
1233 .word oak_md, oak_test
1234 .word paradise_md, paradise_test
1235 .word realtek_md, realtek_test
1236 .word s3_md, s3_test
1237 .word chips_md, chips_test
1238 .word video7_md, video7_test
1239 .word cirrus5_md, cirrus5_test
1240 .word cirrus6_md, cirrus6_test
1241 .word cirrus1_md, cirrus1_test
1242 .word ahead_md, ahead_test
1243 .word everex_md, everex_test
1244 .word genoa_md, genoa_test
1245 .word trident_md, trident_test
1246 .word tseng_md, tseng_test
1247 .word 0
1248
1249# Test routines and mode tables:
1250
1251# S3 - The test algorithm was taken from the SuperProbe package
1252# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
1253s3_test:
1254 movw $0x0f35, %cx # we store some constants in cl/ch
1255 movw $0x03d4, %dx
1256 movb $0x38, %al
1257 call inidx
1258 movb %al, %bh # store current CRT-register 0x38
1259 movw $0x0038, %ax
1260 call outidx # disable writing to special regs
1261 movb %cl, %al # check whether we can write special reg 0x35
1262 call inidx
1263 movb %al, %bl # save the current value of CRT reg 0x35
1264 andb $0xf0, %al # clear bits 0-3
1265 movb %al, %ah
1266 movb %cl, %al # and write it to CRT reg 0x35
1267 call outidx
1268 call inidx # now read it back
1269 andb %ch, %al # clear the upper 4 bits
1270 jz s3_2 # the first test failed. But we have a
1271
1272 movb %bl, %ah # second chance
1273 movb %cl, %al
1274 call outidx
1275 jmp s3_1 # do the other tests
1276
1277s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35
1278 orb %bl, %ah # set the upper 4 bits of ah with the orig value
1279 call outidx # write ...
1280 call inidx # ... and reread
1281 andb %cl, %al # turn off the upper 4 bits
1282 pushw %ax
1283 movb %bl, %ah # restore old value in register 0x35
1284 movb %cl, %al
1285 call outidx
1286 popw %ax
1287 cmpb %ch, %al # setting lower 4 bits was successful => bad
1288 je no_s3 # writing is allowed => this is not an S3
1289
1290s3_1: movw $0x4838, %ax # allow writing to special regs by putting
1291 call outidx # magic number into CRT-register 0x38
1292 movb %cl, %al # check whether we can write special reg 0x35
1293 call inidx
1294 movb %al, %bl
1295 andb $0xf0, %al
1296 movb %al, %ah
1297 movb %cl, %al
1298 call outidx
1299 call inidx
1300 andb %ch, %al
1301 jnz no_s3 # no, we can't write => no S3
1302
1303 movw %cx, %ax
1304 orb %bl, %ah
1305 call outidx
1306 call inidx
1307 andb %ch, %al
1308 pushw %ax
1309 movb %bl, %ah # restore old value in register 0x35
1310 movb %cl, %al
1311 call outidx
1312 popw %ax
1313 cmpb %ch, %al
1314 jne no_s31 # writing not possible => no S3
1315 movb $0x30, %al
1316 call inidx # now get the S3 id ...
1317 leaw idS3, %di
1318 movw $0x10, %cx
1319 repne
1320 scasb
1321 je no_s31
1322
1323 movb %bh, %ah
1324 movb $0x38, %al
1325 jmp s3rest
1326
1327no_s3: movb $0x35, %al # restore CRT register 0x35
1328 movb %bl, %ah
1329 call outidx
1330no_s31: xorw %bp, %bp # Detection failed
1331s3rest: movb %bh, %ah
1332 movb $0x38, %al # restore old value of CRT register 0x38
1333 jmp outidx
1334
1335idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
1336 .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
1337
1338s3_md: .byte 0x54, 0x2b, 0x84
1339 .byte 0x55, 0x19, 0x84
1340 .byte 0
1341 .ascii "S3"
1342 .byte 0
1343
1344# ATI cards.
1345ati_test:
1346 leaw idati, %si
1347 movw $0x31, %di
1348 movw $0x09, %cx
1349 repe
1350 cmpsb
1351 je atiok
1352
1353 xorw %bp, %bp
1354atiok: ret
1355
1356idati: .ascii "761295520"
1357
1358ati_md: .byte 0x23, 0x19, 0x84
1359 .byte 0x33, 0x2c, 0x84
1360 .byte 0x22, 0x1e, 0x64
1361 .byte 0x21, 0x19, 0x64
1362 .byte 0x58, 0x21, 0x50
1363 .byte 0x5b, 0x1e, 0x50
1364 .byte 0
1365 .ascii "ATI"
1366 .byte 0
1367
1368# AHEAD
1369ahead_test:
1370 movw $0x200f, %ax
1371 movw $0x3ce, %dx
1372 outw %ax, %dx
1373 incw %dx
1374 inb %dx, %al
1375 cmpb $0x20, %al
1376 je isahed
1377
1378 cmpb $0x21, %al
1379 je isahed
1380
1381 xorw %bp, %bp
1382isahed: ret
1383
1384ahead_md:
1385 .byte 0x22, 0x2c, 0x84
1386 .byte 0x23, 0x19, 0x84
1387 .byte 0x24, 0x1c, 0x84
1388 .byte 0x2f, 0x32, 0xa0
1389 .byte 0x32, 0x22, 0x50
1390 .byte 0x34, 0x42, 0x50
1391 .byte 0
1392 .ascii "Ahead"
1393 .byte 0
1394
1395# Chips & Tech.
1396chips_test:
1397 movw $0x3c3, %dx
1398 inb %dx, %al
1399 orb $0x10, %al
1400 outb %al, %dx
1401 movw $0x104, %dx
1402 inb %dx, %al
1403 movb %al, %bl
1404 movw $0x3c3, %dx
1405 inb %dx, %al
1406 andb $0xef, %al
1407 outb %al, %dx
1408 cmpb $0xa5, %bl
1409 je cantok
1410
1411 xorw %bp, %bp
1412cantok: ret
1413
1414chips_md:
1415 .byte 0x60, 0x19, 0x84
1416 .byte 0x61, 0x32, 0x84
1417 .byte 0
1418 .ascii "Chips & Technologies"
1419 .byte 0
1420
1421# Cirrus Logic 5X0
1422cirrus1_test:
1423 movw $0x3d4, %dx
1424 movb $0x0c, %al
1425 outb %al, %dx
1426 incw %dx
1427 inb %dx, %al
1428 movb %al, %bl
1429 xorb %al, %al
1430 outb %al, %dx
1431 decw %dx
1432 movb $0x1f, %al
1433 outb %al, %dx
1434 incw %dx
1435 inb %dx, %al
1436 movb %al, %bh
1437 xorb %ah, %ah
1438 shlb $4, %al
1439 movw %ax, %cx
1440 movb %bh, %al
1441 shrb $4, %al
1442 addw %ax, %cx
1443 shlw $8, %cx
1444 addw $6, %cx
1445 movw %cx, %ax
1446 movw $0x3c4, %dx
1447 outw %ax, %dx
1448 incw %dx
1449 inb %dx, %al
1450 andb %al, %al
1451 jnz nocirr
1452
1453 movb %bh, %al
1454 outb %al, %dx
1455 inb %dx, %al
1456 cmpb $0x01, %al
1457 je iscirr
1458
1459nocirr: xorw %bp, %bp
1460iscirr: movw $0x3d4, %dx
1461 movb %bl, %al
1462 xorb %ah, %ah
1463 shlw $8, %ax
1464 addw $0x0c, %ax
1465 outw %ax, %dx
1466 ret
1467
1468cirrus1_md:
1469 .byte 0x1f, 0x19, 0x84
1470 .byte 0x20, 0x2c, 0x84
1471 .byte 0x22, 0x1e, 0x84
1472 .byte 0x31, 0x25, 0x64
1473 .byte 0
1474 .ascii "Cirrus Logic 5X0"
1475 .byte 0
1476
1477# Cirrus Logic 54XX
1478cirrus5_test:
1479 movw $0x3c4, %dx
1480 movb $6, %al
1481 call inidx
1482 movb %al, %bl # BL=backup
1483 movw $6, %ax
1484 call tstidx
1485 cmpb $0x0f, %al
1486 jne c5fail
1487
1488 movw $0x1206, %ax
1489 call tstidx
1490 cmpb $0x12, %al
1491 jne c5fail
1492
1493 movb $0x1e, %al
1494 call inidx
1495 movb %al, %bh
1496 movb %bh, %ah
1497 andb $0xc0, %ah
1498 movb $0x1e, %al
1499 call tstidx
1500 andb $0x3f, %al
1501 jne c5xx
1502
1503 movb $0x1e, %al
1504 movb %bh, %ah
1505 orb $0x3f, %ah
1506 call tstidx
1507 xorb $0x3f, %al
1508 andb $0x3f, %al
1509c5xx: pushf
1510 movb $0x1e, %al
1511 movb %bh, %ah
1512 outw %ax, %dx
1513 popf
1514 je c5done
1515
1516c5fail: xorw %bp, %bp
1517c5done: movb $6, %al
1518 movb %bl, %ah
1519 outw %ax, %dx
1520 ret
1521
1522cirrus5_md:
1523 .byte 0x14, 0x19, 0x84
1524 .byte 0x54, 0x2b, 0x84
1525 .byte 0
1526 .ascii "Cirrus Logic 54XX"
1527 .byte 0
1528
1529# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
1530# it's misidentified by the Ahead test.
1531cirrus6_test:
1532 movw $0x3ce, %dx
1533 movb $0x0a, %al
1534 call inidx
1535 movb %al, %bl # BL=backup
1536 movw $0xce0a, %ax
1537 call tstidx
1538 orb %al, %al
1539 jne c2fail
1540
1541 movw $0xec0a, %ax
1542 call tstidx
1543 cmpb $0x01, %al
1544 jne c2fail
1545
1546 movb $0xaa, %al
1547 call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's.
1548 shrb $4, %al
1549 subb $4, %al
1550 jz c6done
1551
1552 decb %al
1553 jz c6done
1554
1555 subb $2, %al
1556 jz c6done
1557
1558 decb %al
1559 jz c6done
1560
1561c2fail: xorw %bp, %bp
1562c6done: movb $0x0a, %al
1563 movb %bl, %ah
1564 outw %ax, %dx
1565 ret
1566
1567cirrus6_md:
1568 .byte 0
1569 .ascii "Cirrus Logic 64XX"
1570 .byte 0
1571
1572# Everex / Trident
1573everex_test:
1574 movw $0x7000, %ax
1575 xorw %bx, %bx
1576 int $0x10
1577 cmpb $0x70, %al
1578 jne noevrx
1579
1580 shrw $4, %dx
1581 cmpw $0x678, %dx
1582 je evtrid
1583
1584 cmpw $0x236, %dx
1585 jne evrxok
1586
1587evtrid: leaw trident_md, %bp
1588evrxok: ret
1589
1590noevrx: xorw %bp, %bp
1591 ret
1592
1593everex_md:
1594 .byte 0x03, 0x22, 0x50
1595 .byte 0x04, 0x3c, 0x50
1596 .byte 0x07, 0x2b, 0x64
1597 .byte 0x08, 0x4b, 0x64
1598 .byte 0x0a, 0x19, 0x84
1599 .byte 0x0b, 0x2c, 0x84
1600 .byte 0x16, 0x1e, 0x50
1601 .byte 0x18, 0x1b, 0x64
1602 .byte 0x21, 0x40, 0xa0
1603 .byte 0x40, 0x1e, 0x84
1604 .byte 0
1605 .ascii "Everex/Trident"
1606 .byte 0
1607
1608# Genoa.
1609genoa_test:
1610 leaw idgenoa, %si # Check Genoa 'clues'
1611 xorw %ax, %ax
1612 movb %es:(0x37), %al
1613 movw %ax, %di
1614 movw $0x04, %cx
1615 decw %si
1616 decw %di
1617l1: incw %si
1618 incw %di
1619 movb (%si), %al
1620 testb %al, %al
1621 jz l2
1622
1623 cmpb %es:(%di), %al
1624l2: loope l1
1625 orw %cx, %cx
1626 je isgen
1627
1628 xorw %bp, %bp
1629isgen: ret
1630
1631idgenoa: .byte 0x77, 0x00, 0x99, 0x66
1632
1633genoa_md:
1634 .byte 0x58, 0x20, 0x50
1635 .byte 0x5a, 0x2a, 0x64
1636 .byte 0x60, 0x19, 0x84
1637 .byte 0x61, 0x1d, 0x84
1638 .byte 0x62, 0x20, 0x84
1639 .byte 0x63, 0x2c, 0x84
1640 .byte 0x64, 0x3c, 0x84
1641 .byte 0x6b, 0x4f, 0x64
1642 .byte 0x72, 0x3c, 0x50
1643 .byte 0x74, 0x42, 0x50
1644 .byte 0x78, 0x4b, 0x64
1645 .byte 0
1646 .ascii "Genoa"
1647 .byte 0
1648
1649# OAK
1650oak_test:
1651 leaw idoakvga, %si
1652 movw $0x08, %di
1653 movw $0x08, %cx
1654 repe
1655 cmpsb
1656 je isoak
1657
1658 xorw %bp, %bp
1659isoak: ret
1660
1661idoakvga: .ascii "OAK VGA "
1662
1663oak_md: .byte 0x4e, 0x3c, 0x50
1664 .byte 0x4f, 0x3c, 0x84
1665 .byte 0x50, 0x19, 0x84
1666 .byte 0x51, 0x2b, 0x84
1667 .byte 0
1668 .ascii "OAK"
1669 .byte 0
1670
1671# WD Paradise.
1672paradise_test:
1673 leaw idparadise, %si
1674 movw $0x7d, %di
1675 movw $0x04, %cx
1676 repe
1677 cmpsb
1678 je ispara
1679
1680 xorw %bp, %bp
1681ispara: ret
1682
1683idparadise: .ascii "VGA="
1684
1685paradise_md:
1686 .byte 0x41, 0x22, 0x50
1687 .byte 0x47, 0x1c, 0x84
1688 .byte 0x55, 0x19, 0x84
1689 .byte 0x54, 0x2c, 0x84
1690 .byte 0
1691 .ascii "Paradise"
1692 .byte 0
1693
1694# Trident.
1695trident_test:
1696 movw $0x3c4, %dx
1697 movb $0x0e, %al
1698 outb %al, %dx
1699 incw %dx
1700 inb %dx, %al
1701 xchgb %al, %ah
1702 xorb %al, %al
1703 outb %al, %dx
1704 inb %dx, %al
1705 xchgb %ah, %al
1706 movb %al, %bl # Strange thing ... in the book this wasn't
1707 andb $0x02, %bl # necessary but it worked on my card which
1708 jz setb2 # is a trident. Without it the screen goes
1709 # blurred ...
1710 andb $0xfd, %al
1711 jmp clrb2
1712
1713setb2: orb $0x02, %al
1714clrb2: outb %al, %dx
1715 andb $0x0f, %ah
1716 cmpb $0x02, %ah
1717 je istrid
1718
1719 xorw %bp, %bp
1720istrid: ret
1721
1722trident_md:
1723 .byte 0x50, 0x1e, 0x50
1724 .byte 0x51, 0x2b, 0x50
1725 .byte 0x52, 0x3c, 0x50
1726 .byte 0x57, 0x19, 0x84
1727 .byte 0x58, 0x1e, 0x84
1728 .byte 0x59, 0x2b, 0x84
1729 .byte 0x5a, 0x3c, 0x84
1730 .byte 0
1731 .ascii "Trident"
1732 .byte 0
1733
1734# Tseng.
1735tseng_test:
1736 movw $0x3cd, %dx
1737 inb %dx, %al # Could things be this simple ! :-)
1738 movb %al, %bl
1739 movb $0x55, %al
1740 outb %al, %dx
1741 inb %dx, %al
1742 movb %al, %ah
1743 movb %bl, %al
1744 outb %al, %dx
1745 cmpb $0x55, %ah
1746 je istsen
1747
1748isnot: xorw %bp, %bp
1749istsen: ret
1750
1751tseng_md:
1752 .byte 0x26, 0x3c, 0x50
1753 .byte 0x2a, 0x28, 0x64
1754 .byte 0x23, 0x19, 0x84
1755 .byte 0x24, 0x1c, 0x84
1756 .byte 0x22, 0x2c, 0x84
1757 .byte 0x21, 0x3c, 0x84
1758 .byte 0
1759 .ascii "Tseng"
1760 .byte 0
1761
1762# Video7.
1763video7_test:
1764 movw $0x3cc, %dx
1765 inb %dx, %al
1766 movw $0x3b4, %dx
1767 andb $0x01, %al
1768 jz even7
1769
1770 movw $0x3d4, %dx
1771even7: movb $0x0c, %al
1772 outb %al, %dx
1773 incw %dx
1774 inb %dx, %al
1775 movb %al, %bl
1776 movb $0x55, %al
1777 outb %al, %dx
1778 inb %dx, %al
1779 decw %dx
1780 movb $0x1f, %al
1781 outb %al, %dx
1782 incw %dx
1783 inb %dx, %al
1784 movb %al, %bh
1785 decw %dx
1786 movb $0x0c, %al
1787 outb %al, %dx
1788 incw %dx
1789 movb %bl, %al
1790 outb %al, %dx
1791 movb $0x55, %al
1792 xorb $0xea, %al
1793 cmpb %bh, %al
1794 jne isnot
1795
1796 movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
1797 ret
1798
1799video7_md:
1800 .byte 0x40, 0x2b, 0x50
1801 .byte 0x43, 0x3c, 0x50
1802 .byte 0x44, 0x3c, 0x64
1803 .byte 0x41, 0x19, 0x84
1804 .byte 0x42, 0x2c, 0x84
1805 .byte 0x45, 0x1c, 0x84
1806 .byte 0
1807 .ascii "Video 7"
1808 .byte 0
1809
1810# Realtek VGA
1811realtek_test:
1812 leaw idrtvga, %si
1813 movw $0x45, %di
1814 movw $0x0b, %cx
1815 repe
1816 cmpsb
1817 je isrt
1818
1819 xorw %bp, %bp
1820isrt: ret
1821
1822idrtvga: .ascii "REALTEK VGA"
1823
1824realtek_md:
1825 .byte 0x1a, 0x3c, 0x50
1826 .byte 0x1b, 0x19, 0x84
1827 .byte 0x1c, 0x1e, 0x84
1828 .byte 0x1d, 0x2b, 0x84
1829 .byte 0x1e, 0x3c, 0x84
1830 .byte 0
1831 .ascii "REALTEK"
1832 .byte 0
1833
1834#endif /* CONFIG_VIDEO_SVGA */
1835
1836# User-defined local mode table (VGA only)
1837#ifdef CONFIG_VIDEO_LOCAL
1838local_modes:
1839 leaw local_mode_table, %si
1840locm1: lodsw
1841 orw %ax, %ax
1842 jz locm2
1843
1844 stosw
1845 movsw
1846 jmp locm1
1847
1848locm2: ret
1849
1850# This is the table of local video modes which can be supplied manually
1851# by the user. Each entry consists of mode ID (word) and dimensions
1852# (byte for column count and another byte for row count). These modes
1853# are placed before all SVGA and VESA modes and override them if table
1854# compacting is enabled. The table must end with a zero word followed
1855# by NUL-terminated video adapter name.
1856local_mode_table:
1857 .word 0x0100 # Example: 40x25
1858 .byte 25,40
1859 .word 0
1860 .ascii "Local"
1861 .byte 0
1862#endif /* CONFIG_VIDEO_LOCAL */
1863
1864# Read a key and return the ASCII code in al, scan code in ah
1865getkey: xorb %ah, %ah
1866 int $0x16
1867 ret
1868
1869# Read a key with a timeout of 30 seconds.
1870# The hardware clock is used to get the time.
1871getkt: call gettime
1872 addb $30, %al # Wait 30 seconds
1873 cmpb $60, %al
1874 jl lminute
1875
1876 subb $60, %al
1877lminute:
1878 movb %al, %cl
1879again: movb $0x01, %ah
1880 int $0x16
1881 jnz getkey # key pressed, so get it
1882
1883 call gettime
1884 cmpb %cl, %al
1885 jne again
1886
1887 movb $0x20, %al # timeout, return `space'
1888 ret
1889
1890# Flush the keyboard buffer
1891flush: movb $0x01, %ah
1892 int $0x16
1893 jz empty
1894
1895 xorb %ah, %ah
1896 int $0x16
1897 jmp flush
1898
1899empty: ret
1900
1901# Print hexadecimal number.
1902prthw: pushw %ax
1903 movb %ah, %al
1904 call prthb
1905 popw %ax
1906prthb: pushw %ax
1907 shrb $4, %al
1908 call prthn
1909 popw %ax
1910 andb $0x0f, %al
1911prthn: cmpb $0x0a, %al
1912 jc prth1
1913
1914 addb $0x07, %al
1915prth1: addb $0x30, %al
1916 jmp prtchr
1917
1918# Print decimal number in al
1919prtdec: pushw %ax
1920 pushw %cx
1921 xorb %ah, %ah
1922 movb $0x0a, %cl
1923 idivb %cl
1924 cmpb $0x09, %al
1925 jbe lt100
1926
1927 call prtdec
1928 jmp skip10
1929
1930lt100: addb $0x30, %al
1931 call prtchr
1932skip10: movb %ah, %al
1933 addb $0x30, %al
1934 call prtchr
1935 popw %cx
1936 popw %ax
1937 ret
1938
1939store_edid:
1940#ifdef CONFIG_FIRMWARE_EDID
1941 pushw %es # just save all registers
1942 pushw %ax
1943 pushw %bx
1944 pushw %cx
1945 pushw %dx
1946 pushw %di
1947
1948 pushw %fs
1949 popw %es
1950
1951 movl $0x13131313, %eax # memset block with 0x13
1952 movw $32, %cx
1953 movw $0x140, %di
1954 cld
1955 rep
1956 stosl
1957
1958 cmpw $0x0200, vbe_version # only do EDID on >= VBE2.0
1959 jl no_edid
1960
1961 pushw %es # save ES
1962 xorw %di, %di # Report Capability
1963 pushw %di
1964 popw %es # ES:DI must be 0:0
1965 movw $0x4f15, %ax
1966 xorw %bx, %bx
1967 xorw %cx, %cx
1968 int $0x10
1969 popw %es # restore ES
1970
1971 cmpb $0x00, %ah # call successful
1972 jne no_edid
1973
1974 cmpb $0x4f, %al # function supported
1975 jne no_edid
1976
1977 movw $0x4f15, %ax # do VBE/DDC
1978 movw $0x01, %bx
1979 movw $0x00, %cx
1980 movw $0x00, %dx
1981 movw $0x140, %di
1982 int $0x10
1983
1984no_edid:
1985 popw %di # restore all registers
1986 popw %dx
1987 popw %cx
1988 popw %bx
1989 popw %ax
1990 popw %es
1991#endif
1992 ret
1993
1994# VIDEO_SELECT-only variables
1995mt_end: .word 0 # End of video mode table if built
1996edit_buf: .space 6 # Line editor buffer
1997card_name: .word 0 # Pointer to adapter name
1998scanning: .byte 0 # Performing mode scan
1999do_restore: .byte 0 # Screen contents altered during mode change
2000svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes
2001graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
2002dac_size: .byte 6 # DAC bit depth
2003vbe_version: .word 0 # VBE bios version
2004
2005# Status messages
2006keymsg: .ascii "Press <RETURN> to see video modes available, "
2007 .ascii "<SPACE> to continue or wait 30 secs"
2008 .byte 0x0d, 0x0a, 0
2009
2010listhdr: .byte 0x0d, 0x0a
2011 .ascii "Mode: COLSxROWS:"
2012
2013crlft: .byte 0x0d, 0x0a, 0
2014
2015prompt: .byte 0x0d, 0x0a
2016 .asciz "Enter mode number or `scan': "
2017
2018unknt: .asciz "Unknown mode ID. Try again."
2019
2020badmdt: .ascii "You passed an undefined mode number."
2021 .byte 0x0d, 0x0a, 0
2022
2023vesaer: .ascii "Error: Scanning of VESA modes failed. Please "
2024 .ascii "report to <mj@ucw.cz>."
2025 .byte 0x0d, 0x0a, 0
2026
2027old_name: .asciz "CGA/MDA/HGA"
2028
2029ega_name: .asciz "EGA"
2030
2031svga_name: .ascii " "
2032
2033vga_name: .asciz "VGA"
2034
2035vesa_name: .asciz "VESA"
2036
2037name_bann: .asciz "Video adapter: "
2038#endif /* CONFIG_VIDEO_SELECT */
2039
2040# Other variables:
2041adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
2042video_segment: .word 0xb800 # Video memory segment
2043force_size: .word 0 # Use this size instead of the one in BIOS vars
diff --git a/arch/i386/boot/video.c b/arch/i386/boot/video.c
new file mode 100644
index 000000000000..958130ef0042
--- /dev/null
+++ b/arch/i386/boot/video.c
@@ -0,0 +1,461 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/video.c
13 *
14 * Select video mode
15 */
16
17#include "boot.h"
18#include "video.h"
19#include "vesa.h"
20
21/*
22 * Mode list variables
23 */
24static struct card_info cards[]; /* List of cards to probe for */
25
26/*
27 * Common variables
28 */
29int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
30u16 video_segment;
31int force_x, force_y; /* Don't query the BIOS for cols/rows */
32
33int do_restore = 0; /* Screen contents changed during mode flip */
34int graphic_mode; /* Graphic mode with linear frame buffer */
35
36static void store_cursor_position(void)
37{
38 u16 curpos;
39 u16 ax, bx;
40
41 ax = 0x0300;
42 bx = 0;
43 asm(INT10
44 : "=d" (curpos), "+a" (ax), "+b" (bx)
45 : : "ecx", "esi", "edi");
46
47 boot_params.screen_info.orig_x = curpos;
48 boot_params.screen_info.orig_y = curpos >> 8;
49}
50
51static void store_video_mode(void)
52{
53 u16 ax, page;
54
55 /* N.B.: the saving of the video page here is a bit silly,
56 since we pretty much assume page 0 everywhere. */
57 ax = 0x0f00;
58 asm(INT10
59 : "+a" (ax), "=b" (page)
60 : : "ecx", "edx", "esi", "edi");
61
62 /* Not all BIOSes are clean with respect to the top bit */
63 boot_params.screen_info.orig_video_mode = ax & 0x7f;
64 boot_params.screen_info.orig_video_page = page;
65}
66
67/*
68 * Store the video mode parameters for later usage by the kernel.
69 * This is done by asking the BIOS except for the rows/columns
70 * parameters in the default 80x25 mode -- these are set directly,
71 * because some very obscure BIOSes supply insane values.
72 */
73static void store_mode_params(void)
74{
75 u16 font_size;
76 int x, y;
77
78 /* For graphics mode, it is up to the mode-setting driver
79 (currently only video-vesa.c) to store the parameters */
80 if (graphic_mode)
81 return;
82
83 store_cursor_position();
84 store_video_mode();
85
86 if (boot_params.screen_info.orig_video_mode == 0x07) {
87 /* MDA, HGC, or VGA in monochrome mode */
88 video_segment = 0xb000;
89 } else {
90 /* CGA, EGA, VGA and so forth */
91 video_segment = 0xb800;
92 }
93
94 set_fs(0);
95 font_size = rdfs16(0x485); /* Font size, BIOS area */
96 boot_params.screen_info.orig_video_points = font_size;
97
98 x = rdfs16(0x44a);
99 y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
100
101 if (force_x)
102 x = force_x;
103 if (force_y)
104 y = force_y;
105
106 boot_params.screen_info.orig_video_cols = x;
107 boot_params.screen_info.orig_video_lines = y;
108}
109
110/* Probe the video drivers and have them generate their mode lists. */
111static void probe_cards(int unsafe)
112{
113 struct card_info *card;
114 static u8 probed[2];
115
116 if (probed[unsafe])
117 return;
118
119 probed[unsafe] = 1;
120
121 for (card = video_cards; card < video_cards_end; card++) {
122 if (card->unsafe == unsafe) {
123 if (card->probe)
124 card->nmodes = card->probe();
125 else
126 card->nmodes = 0;
127 }
128 }
129}
130
131/* Test if a mode is defined */
132int mode_defined(u16 mode)
133{
134 struct card_info *card;
135 struct mode_info *mi;
136 int i;
137
138 for (card = video_cards; card < video_cards_end; card++) {
139 mi = card->modes;
140 for (i = 0; i < card->nmodes; i++, mi++) {
141 if (mi->mode == mode)
142 return 1;
143 }
144 }
145
146 return 0;
147}
148
149/* Set mode (without recalc) */
150static int raw_set_mode(u16 mode)
151{
152 int nmode, i;
153 struct card_info *card;
154 struct mode_info *mi;
155
156 /* Drop the recalc bit if set */
157 mode &= ~VIDEO_RECALC;
158
159 /* Scan for mode based on fixed ID, position, or resolution */
160 nmode = 0;
161 for (card = video_cards; card < video_cards_end; card++) {
162 mi = card->modes;
163 for (i = 0; i < card->nmodes; i++, mi++) {
164 int visible = mi->x || mi->y;
165
166 if ((mode == nmode && visible) ||
167 mode == mi->mode ||
168 mode == (mi->y << 8)+mi->x)
169 return card->set_mode(mi);
170
171 if (visible)
172 nmode++;
173 }
174 }
175
176 /* Nothing found? Is it an "exceptional" (unprobed) mode? */
177 for (card = video_cards; card < video_cards_end; card++) {
178 if (mode >= card->xmode_first &&
179 mode < card->xmode_first+card->xmode_n) {
180 struct mode_info mix;
181 mix.mode = mode;
182 mix.x = mix.y = 0;
183 return card->set_mode(&mix);
184 }
185 }
186
187 /* Otherwise, failure... */
188 return -1;
189}
190
191/*
192 * Recalculate the vertical video cutoff (hack!)
193 */
194static void vga_recalc_vertical(void)
195{
196 unsigned int font_size, rows;
197 u16 crtc;
198 u8 pt, ov;
199
200 set_fs(0);
201 font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
202 rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
203
204 rows *= font_size; /* Visible scan lines */
205 rows--; /* ... minus one */
206
207 crtc = vga_crtc();
208
209 pt = in_idx(crtc, 0x11);
210 pt &= ~0x80; /* Unlock CR0-7 */
211 out_idx(pt, crtc, 0x11);
212
213 out_idx((u8)rows, crtc, 0x12); /* Lower height register */
214
215 ov = in_idx(crtc, 0x07); /* Overflow register */
216 ov &= 0xbd;
217 ov |= (rows >> (8-1)) & 0x02;
218 ov |= (rows >> (9-6)) & 0x40;
219 out_idx(ov, crtc, 0x07);
220}
221
222/* Set mode (with recalc if specified) */
223static int set_mode(u16 mode)
224{
225 int rv;
226
227 /* Very special mode numbers... */
228 if (mode == VIDEO_CURRENT_MODE)
229 return 0; /* Nothing to do... */
230 else if (mode == NORMAL_VGA)
231 mode = VIDEO_80x25;
232 else if (mode == EXTENDED_VGA)
233 mode = VIDEO_8POINT;
234
235 rv = raw_set_mode(mode);
236 if (rv)
237 return rv;
238
239 if (mode & VIDEO_RECALC)
240 vga_recalc_vertical();
241
242 return 0;
243}
244
245static unsigned int get_entry(void)
246{
247 char entry_buf[4];
248 int i, len = 0;
249 int key;
250 unsigned int v;
251
252 do {
253 key = getchar();
254
255 if (key == '\b') {
256 if (len > 0) {
257 puts("\b \b");
258 len--;
259 }
260 } else if ((key >= '0' && key <= '9') ||
261 (key >= 'A' && key <= 'Z') ||
262 (key >= 'a' && key <= 'z')) {
263 if (len < sizeof entry_buf) {
264 entry_buf[len++] = key;
265 putchar(key);
266 }
267 }
268 } while (key != '\r');
269 putchar('\n');
270
271 if (len == 0)
272 return VIDEO_CURRENT_MODE; /* Default */
273
274 v = 0;
275 for (i = 0; i < len; i++) {
276 v <<= 4;
277 key = entry_buf[i] | 0x20;
278 v += (key > '9') ? key-'a'+10 : key-'0';
279 }
280
281 return v;
282}
283
284static void display_menu(void)
285{
286 struct card_info *card;
287 struct mode_info *mi;
288 char ch;
289 int i;
290
291 puts("Mode: COLSxROWS:\n");
292
293 ch = '0';
294 for (card = video_cards; card < video_cards_end; card++) {
295 mi = card->modes;
296 for (i = 0; i < card->nmodes; i++, mi++) {
297 int visible = mi->x && mi->y;
298 u16 mode_id = mi->mode ? mi->mode :
299 (mi->y << 8)+mi->x;
300
301 if (!visible)
302 continue; /* Hidden mode */
303
304 printf("%c %04X %3dx%-3d %s\n",
305 ch, mode_id, mi->x, mi->y, card->card_name);
306
307 if (ch == '9')
308 ch = 'a';
309 else if (ch == 'z' || ch == ' ')
310 ch = ' '; /* Out of keys... */
311 else
312 ch++;
313 }
314 }
315}
316
317#define H(x) ((x)-'a'+10)
318#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n'))
319
320static unsigned int mode_menu(void)
321{
322 int key;
323 unsigned int sel;
324
325 puts("Press <ENTER> to see video modes available, "
326 "<SPACE> to continue, or wait 30 sec\n");
327
328 kbd_flush();
329 while (1) {
330 key = getchar_timeout();
331 if (key == ' ' || key == 0)
332 return VIDEO_CURRENT_MODE; /* Default */
333 if (key == '\r')
334 break;
335 putchar('\a'); /* Beep! */
336 }
337
338
339 for (;;) {
340 display_menu();
341
342 puts("Enter a video mode or \"scan\" to scan for "
343 "additional modes: ");
344 sel = get_entry();
345 if (sel != SCAN)
346 return sel;
347
348 probe_cards(1);
349 }
350}
351
352#ifdef CONFIG_VIDEO_RETAIN
353/* Save screen content to the heap */
354struct saved_screen {
355 int x, y;
356 int curx, cury;
357 u16 *data;
358} saved;
359
360static void save_screen(void)
361{
362 /* Should be called after store_mode_params() */
363 saved.x = boot_params.screen_info.orig_video_cols;
364 saved.y = boot_params.screen_info.orig_video_lines;
365 saved.curx = boot_params.screen_info.orig_x;
366 saved.cury = boot_params.screen_info.orig_y;
367
368 if (heap_free() < saved.x*saved.y*sizeof(u16)+512)
369 return; /* Not enough heap to save the screen */
370
371 saved.data = GET_HEAP(u16, saved.x*saved.y);
372
373 set_fs(video_segment);
374 copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
375}
376
377static void restore_screen(void)
378{
379 /* Should be called after store_mode_params() */
380 int xs = boot_params.screen_info.orig_video_cols;
381 int ys = boot_params.screen_info.orig_video_lines;
382 int y;
383 addr_t dst = 0;
384 u16 *src = saved.data;
385 u16 ax, bx, dx;
386
387 if (graphic_mode)
388 return; /* Can't restore onto a graphic mode */
389
390 if (!src)
391 return; /* No saved screen contents */
392
393 /* Restore screen contents */
394
395 set_fs(video_segment);
396 for (y = 0; y < ys; y++) {
397 int npad;
398
399 if (y < saved.y) {
400 int copy = (xs < saved.x) ? xs : saved.x;
401 copy_to_fs(dst, src, copy*sizeof(u16));
402 dst += copy*sizeof(u16);
403 src += saved.x;
404 npad = (xs < saved.x) ? 0 : xs-saved.x;
405 } else {
406 npad = xs;
407 }
408
409 /* Writes "npad" blank characters to
410 video_segment:dst and advances dst */
411 asm volatile("pushw %%es ; "
412 "movw %2,%%es ; "
413 "shrw %%cx ; "
414 "jnc 1f ; "
415 "stosw \n\t"
416 "1: rep;stosl ; "
417 "popw %%es"
418 : "+D" (dst), "+c" (npad)
419 : "bdS" (video_segment),
420 "a" (0x07200720));
421 }
422
423 /* Restore cursor position */
424 ax = 0x0200; /* Set cursor position */
425 bx = 0; /* Page number (<< 8) */
426 dx = (saved.cury << 8)+saved.curx;
427 asm volatile(INT10
428 : "+a" (ax), "+b" (bx), "+d" (dx)
429 : : "ecx", "esi", "edi");
430}
431#else
432#define save_screen() ((void)0)
433#define restore_screen() ((void)0)
434#endif
435
436void set_video(void)
437{
438 u16 mode = boot_params.hdr.vid_mode;
439
440 RESET_HEAP();
441
442 store_mode_params();
443 save_screen();
444 probe_cards(0);
445
446 for (;;) {
447 if (mode == ASK_VGA)
448 mode = mode_menu();
449
450 if (!set_mode(mode))
451 break;
452
453 printf("Undefined video mode number: %x\n", mode);
454 mode = ASK_VGA;
455 }
456 vesa_store_edid();
457 store_mode_params();
458
459 if (do_restore)
460 restore_screen();
461}
diff --git a/arch/i386/boot/video.h b/arch/i386/boot/video.h
new file mode 100644
index 000000000000..b92447d51213
--- /dev/null
+++ b/arch/i386/boot/video.h
@@ -0,0 +1,152 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/video.h
13 *
14 * Header file for the real-mode video probing code
15 */
16
17#ifndef BOOT_VIDEO_H
18#define BOOT_VIDEO_H
19
20#include <linux/types.h>
21
22/* Enable autodetection of SVGA adapters and modes. */
23#undef CONFIG_VIDEO_SVGA
24
25/* Enable autodetection of VESA modes */
26#define CONFIG_VIDEO_VESA
27
28/* Retain screen contents when switching modes */
29#define CONFIG_VIDEO_RETAIN
30
31/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
32#undef CONFIG_VIDEO_400_HACK
33
34/* This code uses an extended set of video mode numbers. These include:
35 * Aliases for standard modes
36 * NORMAL_VGA (-1)
37 * EXTENDED_VGA (-2)
38 * ASK_VGA (-3)
39 * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
40 * of compatibility when extending the table. These are between 0x00 and 0xff.
41 */
42#define VIDEO_FIRST_MENU 0x0000
43
44/* Standard BIOS video modes (BIOS number + 0x0100) */
45#define VIDEO_FIRST_BIOS 0x0100
46
47/* VESA BIOS video modes (VESA number + 0x0200) */
48#define VIDEO_FIRST_VESA 0x0200
49
50/* Video7 special modes (BIOS number + 0x0900) */
51#define VIDEO_FIRST_V7 0x0900
52
53/* Special video modes */
54#define VIDEO_FIRST_SPECIAL 0x0f00
55#define VIDEO_80x25 0x0f00
56#define VIDEO_8POINT 0x0f01
57#define VIDEO_80x43 0x0f02
58#define VIDEO_80x28 0x0f03
59#define VIDEO_CURRENT_MODE 0x0f04
60#define VIDEO_80x30 0x0f05
61#define VIDEO_80x34 0x0f06
62#define VIDEO_80x60 0x0f07
63#define VIDEO_GFX_HACK 0x0f08
64#define VIDEO_LAST_SPECIAL 0x0f09
65
66/* Video modes given by resolution */
67#define VIDEO_FIRST_RESOLUTION 0x1000
68
69/* The "recalculate timings" flag */
70#define VIDEO_RECALC 0x8000
71
72/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
73#ifdef CONFIG_VIDEO_RETAIN
74void store_screen(void);
75#define DO_STORE() store_screen()
76#else
77#define DO_STORE() ((void)0)
78#endif /* CONFIG_VIDEO_RETAIN */
79
80/*
81 * Mode table structures
82 */
83
84struct mode_info {
85 u16 mode; /* Mode number (vga= style) */
86 u8 x, y; /* Width, height */
87};
88
89struct card_info {
90 const char *card_name;
91 int (*set_mode)(struct mode_info *mode);
92 int (*probe)(void);
93 struct mode_info *modes;
94 int nmodes; /* Number of probed modes so far */
95 int unsafe; /* Probing is unsafe, only do after "scan" */
96 u16 xmode_first; /* Unprobed modes to try to call anyway */
97 u16 xmode_n; /* Size of unprobed mode range */
98};
99
100#define __videocard struct card_info __attribute__((section(".videocards")))
101extern struct card_info video_cards[], video_cards_end[];
102
103int mode_defined(u16 mode); /* video.c */
104
105/* Basic video information */
106#define ADAPTER_CGA 0 /* CGA/MDA/HGC */
107#define ADAPTER_EGA 1
108#define ADAPTER_VGA 2
109
110extern int adapter;
111extern u16 video_segment;
112extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
113extern int do_restore; /* Restore screen contents */
114extern int graphic_mode; /* Graphics mode with linear frame buffer */
115
116/*
117 * int $0x10 is notorious for touching registers it shouldn't.
118 * gcc doesn't like %ebp being clobbered, so define it as a push/pop
119 * sequence here.
120 *
121 * A number of systems, including the original PC can clobber %bp in
122 * certain circumstances, like when scrolling. There exists at least
123 * one Trident video card which could clobber DS under a set of
124 * circumstances that we are unlikely to encounter (scrolling when
125 * using an extended graphics mode of more than 800x600 pixels), but
126 * it's cheap insurance to deal with that here.
127 */
128#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp"
129
130/* Accessing VGA indexed registers */
131static inline u8 in_idx(u16 port, u8 index)
132{
133 outb(index, port);
134 return inb(port+1);
135}
136
137static inline void out_idx(u8 v, u16 port, u8 index)
138{
139 outw(index+(v << 8), port);
140}
141
142/* Writes a value to an indexed port and then reads the port again */
143static inline u8 tst_idx(u8 v, u16 port, u8 index)
144{
145 out_idx(port, index, v);
146 return in_idx(port, index);
147}
148
149/* Get the I/O port of the VGA CRTC */
150u16 vga_crtc(void); /* video-vga.c */
151
152#endif /* BOOT_VIDEO_H */
diff --git a/arch/i386/boot/voyager.c b/arch/i386/boot/voyager.c
new file mode 100644
index 000000000000..61c8fe0453be
--- /dev/null
+++ b/arch/i386/boot/voyager.c
@@ -0,0 +1,46 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * arch/i386/boot/voyager.c
13 *
14 * Get the Voyager config information
15 */
16
17#include "boot.h"
18
19#ifdef CONFIG_X86_VOYAGER
20
21int query_voyager(void)
22{
23 u8 err;
24 u16 es, di;
25 /* Abuse the apm_bios_info area for this */
26 u8 *data_ptr = (u8 *)&boot_params.apm_bios_info;
27
28 data_ptr[0] = 0xff; /* Flag on config not found(?) */
29
30 asm("pushw %%es ; "
31 "int $0x15 ; "
32 "setc %0 ; "
33 "movw %%es, %1 ; "
34 "popw %%es"
35 : "=q" (err), "=r" (es), "=D" (di)
36 : "a" (0xffc0));
37
38 if (err)
39 return -1; /* Not Voyager */
40
41 set_fs(es);
42 copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */
43 return 0;
44}
45
46#endif /* CONFIG_X86_VOYAGER */
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 1a3a2217b7c2..54ee1764fdae 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.22-rc2 3# Linux kernel version: 2.6.22-git14
4# Mon May 21 13:23:44 2007 4# Fri Jul 20 09:53:15 2007
5# 5#
6CONFIG_X86_32=y 6CONFIG_X86_32=y
7CONFIG_GENERIC_TIME=y 7CONFIG_GENERIC_TIME=y
@@ -37,19 +37,18 @@ CONFIG_LOCALVERSION=""
37CONFIG_LOCALVERSION_AUTO=y 37CONFIG_LOCALVERSION_AUTO=y
38CONFIG_SWAP=y 38CONFIG_SWAP=y
39CONFIG_SYSVIPC=y 39CONFIG_SYSVIPC=y
40# CONFIG_IPC_NS is not set
41CONFIG_SYSVIPC_SYSCTL=y 40CONFIG_SYSVIPC_SYSCTL=y
42CONFIG_POSIX_MQUEUE=y 41CONFIG_POSIX_MQUEUE=y
43# CONFIG_BSD_PROCESS_ACCT is not set 42# CONFIG_BSD_PROCESS_ACCT is not set
44# CONFIG_TASKSTATS is not set 43# CONFIG_TASKSTATS is not set
45# CONFIG_UTS_NS is not set 44# CONFIG_USER_NS is not set
46# CONFIG_AUDIT is not set 45# CONFIG_AUDIT is not set
47CONFIG_IKCONFIG=y 46CONFIG_IKCONFIG=y
48CONFIG_IKCONFIG_PROC=y 47CONFIG_IKCONFIG_PROC=y
49CONFIG_LOG_BUF_SHIFT=18 48CONFIG_LOG_BUF_SHIFT=18
50# CONFIG_CPUSETS is not set 49# CONFIG_CPUSETS is not set
51CONFIG_SYSFS_DEPRECATED=y 50CONFIG_SYSFS_DEPRECATED=y
52# CONFIG_RELAY is not set 51CONFIG_RELAY=y
53CONFIG_BLK_DEV_INITRD=y 52CONFIG_BLK_DEV_INITRD=y
54CONFIG_INITRAMFS_SOURCE="" 53CONFIG_INITRAMFS_SOURCE=""
55CONFIG_CC_OPTIMIZE_FOR_SIZE=y 54CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -73,16 +72,13 @@ CONFIG_TIMERFD=y
73CONFIG_EVENTFD=y 72CONFIG_EVENTFD=y
74CONFIG_SHMEM=y 73CONFIG_SHMEM=y
75CONFIG_VM_EVENT_COUNTERS=y 74CONFIG_VM_EVENT_COUNTERS=y
76CONFIG_SLAB=y 75CONFIG_SLUB_DEBUG=y
77# CONFIG_SLUB is not set 76# CONFIG_SLAB is not set
77CONFIG_SLUB=y
78# CONFIG_SLOB is not set 78# CONFIG_SLOB is not set
79CONFIG_RT_MUTEXES=y 79CONFIG_RT_MUTEXES=y
80# CONFIG_TINY_SHMEM is not set 80# CONFIG_TINY_SHMEM is not set
81CONFIG_BASE_SMALL=0 81CONFIG_BASE_SMALL=0
82
83#
84# Loadable module support
85#
86CONFIG_MODULES=y 82CONFIG_MODULES=y
87CONFIG_MODULE_UNLOAD=y 83CONFIG_MODULE_UNLOAD=y
88CONFIG_MODULE_FORCE_UNLOAD=y 84CONFIG_MODULE_FORCE_UNLOAD=y
@@ -90,14 +86,11 @@ CONFIG_MODULE_FORCE_UNLOAD=y
90# CONFIG_MODULE_SRCVERSION_ALL is not set 86# CONFIG_MODULE_SRCVERSION_ALL is not set
91# CONFIG_KMOD is not set 87# CONFIG_KMOD is not set
92CONFIG_STOP_MACHINE=y 88CONFIG_STOP_MACHINE=y
93
94#
95# Block layer
96#
97CONFIG_BLOCK=y 89CONFIG_BLOCK=y
98CONFIG_LBD=y 90CONFIG_LBD=y
99# CONFIG_BLK_DEV_IO_TRACE is not set 91# CONFIG_BLK_DEV_IO_TRACE is not set
100# CONFIG_LSF is not set 92# CONFIG_LSF is not set
93# CONFIG_BLK_DEV_BSG is not set
101 94
102# 95#
103# IO Schedulers 96# IO Schedulers
@@ -166,7 +159,6 @@ CONFIG_X86_WP_WORKS_OK=y
166CONFIG_X86_INVLPG=y 159CONFIG_X86_INVLPG=y
167CONFIG_X86_BSWAP=y 160CONFIG_X86_BSWAP=y
168CONFIG_X86_POPAD_OK=y 161CONFIG_X86_POPAD_OK=y
169CONFIG_X86_CMPXCHG64=y
170CONFIG_X86_GOOD_APIC=y 162CONFIG_X86_GOOD_APIC=y
171CONFIG_X86_INTEL_USERCOPY=y 163CONFIG_X86_INTEL_USERCOPY=y
172CONFIG_X86_USE_PPRO_CHECKSUM=y 164CONFIG_X86_USE_PPRO_CHECKSUM=y
@@ -202,6 +194,7 @@ CONFIG_X86_CPUID=y
202# CONFIG_EDD is not set 194# CONFIG_EDD is not set
203# CONFIG_DELL_RBU is not set 195# CONFIG_DELL_RBU is not set
204# CONFIG_DCDBAS is not set 196# CONFIG_DCDBAS is not set
197CONFIG_DMIID=y
205# CONFIG_NOHIGHMEM is not set 198# CONFIG_NOHIGHMEM is not set
206CONFIG_HIGHMEM4G=y 199CONFIG_HIGHMEM4G=y
207# CONFIG_HIGHMEM64G is not set 200# CONFIG_HIGHMEM64G is not set
@@ -218,7 +211,9 @@ CONFIG_FLAT_NODE_MEM_MAP=y
218CONFIG_SPLIT_PTLOCK_CPUS=4 211CONFIG_SPLIT_PTLOCK_CPUS=4
219CONFIG_RESOURCES_64BIT=y 212CONFIG_RESOURCES_64BIT=y
220CONFIG_ZONE_DMA_FLAG=1 213CONFIG_ZONE_DMA_FLAG=1
214CONFIG_BOUNCE=y
221CONFIG_NR_QUICK=1 215CONFIG_NR_QUICK=1
216CONFIG_VIRT_TO_BUS=y
222# CONFIG_HIGHPTE is not set 217# CONFIG_HIGHPTE is not set
223# CONFIG_MATH_EMULATION is not set 218# CONFIG_MATH_EMULATION is not set
224CONFIG_MTRR=y 219CONFIG_MTRR=y
@@ -245,7 +240,6 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
245CONFIG_PM=y 240CONFIG_PM=y
246CONFIG_PM_LEGACY=y 241CONFIG_PM_LEGACY=y
247# CONFIG_PM_DEBUG is not set 242# CONFIG_PM_DEBUG is not set
248# CONFIG_PM_SYSFS_DEPRECATED is not set
249 243
250# 244#
251# ACPI (Advanced Configuration and Power Interface) Support 245# ACPI (Advanced Configuration and Power Interface) Support
@@ -285,7 +279,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
285# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 279# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
286CONFIG_CPU_FREQ_GOV_USERSPACE=y 280CONFIG_CPU_FREQ_GOV_USERSPACE=y
287CONFIG_CPU_FREQ_GOV_ONDEMAND=y 281CONFIG_CPU_FREQ_GOV_ONDEMAND=y
288# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set 282CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
289 283
290# 284#
291# CPUFreq processor drivers 285# CPUFreq processor drivers
@@ -326,7 +320,7 @@ CONFIG_PCI_MMCONFIG=y
326CONFIG_ARCH_SUPPORTS_MSI=y 320CONFIG_ARCH_SUPPORTS_MSI=y
327CONFIG_PCI_MSI=y 321CONFIG_PCI_MSI=y
328# CONFIG_PCI_DEBUG is not set 322# CONFIG_PCI_DEBUG is not set
329CONFIG_HT_IRQ=y 323# CONFIG_HT_IRQ is not set
330CONFIG_ISA_DMA_API=y 324CONFIG_ISA_DMA_API=y
331# CONFIG_ISA is not set 325# CONFIG_ISA is not set
332# CONFIG_MCA is not set 326# CONFIG_MCA is not set
@@ -382,7 +376,7 @@ CONFIG_IP_PNP_DHCP=y
382CONFIG_INET_TUNNEL=y 376CONFIG_INET_TUNNEL=y
383CONFIG_INET_XFRM_MODE_TRANSPORT=y 377CONFIG_INET_XFRM_MODE_TRANSPORT=y
384CONFIG_INET_XFRM_MODE_TUNNEL=y 378CONFIG_INET_XFRM_MODE_TUNNEL=y
385CONFIG_INET_XFRM_MODE_BEET=y 379# CONFIG_INET_XFRM_MODE_BEET is not set
386CONFIG_INET_DIAG=y 380CONFIG_INET_DIAG=y
387CONFIG_INET_TCP_DIAG=y 381CONFIG_INET_TCP_DIAG=y
388# CONFIG_TCP_CONG_ADVANCED is not set 382# CONFIG_TCP_CONG_ADVANCED is not set
@@ -401,27 +395,15 @@ CONFIG_IPV6=y
401# CONFIG_INET6_TUNNEL is not set 395# CONFIG_INET6_TUNNEL is not set
402CONFIG_INET6_XFRM_MODE_TRANSPORT=y 396CONFIG_INET6_XFRM_MODE_TRANSPORT=y
403CONFIG_INET6_XFRM_MODE_TUNNEL=y 397CONFIG_INET6_XFRM_MODE_TUNNEL=y
404CONFIG_INET6_XFRM_MODE_BEET=y 398# CONFIG_INET6_XFRM_MODE_BEET is not set
405# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set 399# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
406CONFIG_IPV6_SIT=y 400CONFIG_IPV6_SIT=y
407# CONFIG_IPV6_TUNNEL is not set 401# CONFIG_IPV6_TUNNEL is not set
408# CONFIG_IPV6_MULTIPLE_TABLES is not set 402# CONFIG_IPV6_MULTIPLE_TABLES is not set
409# CONFIG_NETWORK_SECMARK is not set 403# CONFIG_NETWORK_SECMARK is not set
410# CONFIG_NETFILTER is not set 404# CONFIG_NETFILTER is not set
411
412#
413# DCCP Configuration (EXPERIMENTAL)
414#
415# CONFIG_IP_DCCP is not set 405# CONFIG_IP_DCCP is not set
416
417#
418# SCTP Configuration (EXPERIMENTAL)
419#
420# CONFIG_IP_SCTP is not set 406# CONFIG_IP_SCTP is not set
421
422#
423# TIPC Configuration (EXPERIMENTAL)
424#
425# CONFIG_TIPC is not set 407# CONFIG_TIPC is not set
426# CONFIG_ATM is not set 408# CONFIG_ATM is not set
427# CONFIG_BRIDGE is not set 409# CONFIG_BRIDGE is not set
@@ -458,6 +440,7 @@ CONFIG_IPV6_SIT=y
458# CONFIG_MAC80211 is not set 440# CONFIG_MAC80211 is not set
459# CONFIG_IEEE80211 is not set 441# CONFIG_IEEE80211 is not set
460# CONFIG_RFKILL is not set 442# CONFIG_RFKILL is not set
443# CONFIG_NET_9P is not set
461 444
462# 445#
463# Device Drivers 446# Device Drivers
@@ -472,21 +455,9 @@ CONFIG_FW_LOADER=y
472# CONFIG_DEBUG_DRIVER is not set 455# CONFIG_DEBUG_DRIVER is not set
473# CONFIG_DEBUG_DEVRES is not set 456# CONFIG_DEBUG_DEVRES is not set
474# CONFIG_SYS_HYPERVISOR is not set 457# CONFIG_SYS_HYPERVISOR is not set
475
476#
477# Connector - unified userspace <-> kernelspace linker
478#
479# CONFIG_CONNECTOR is not set 458# CONFIG_CONNECTOR is not set
480# CONFIG_MTD is not set 459# CONFIG_MTD is not set
481
482#
483# Parallel port support
484#
485# CONFIG_PARPORT is not set 460# CONFIG_PARPORT is not set
486
487#
488# Plug and Play support
489#
490CONFIG_PNP=y 461CONFIG_PNP=y
491# CONFIG_PNP_DEBUG is not set 462# CONFIG_PNP_DEBUG is not set
492 463
@@ -494,10 +465,7 @@ CONFIG_PNP=y
494# Protocols 465# Protocols
495# 466#
496CONFIG_PNPACPI=y 467CONFIG_PNPACPI=y
497 468CONFIG_BLK_DEV=y
498#
499# Block devices
500#
501CONFIG_BLK_DEV_FD=y 469CONFIG_BLK_DEV_FD=y
502# CONFIG_BLK_CPQ_DA is not set 470# CONFIG_BLK_CPQ_DA is not set
503# CONFIG_BLK_CPQ_CISS_DA is not set 471# CONFIG_BLK_CPQ_CISS_DA is not set
@@ -515,17 +483,14 @@ CONFIG_BLK_DEV_RAM_SIZE=4096
515CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 483CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
516# CONFIG_CDROM_PKTCDVD is not set 484# CONFIG_CDROM_PKTCDVD is not set
517# CONFIG_ATA_OVER_ETH is not set 485# CONFIG_ATA_OVER_ETH is not set
518 486CONFIG_MISC_DEVICES=y
519#
520# Misc devices
521#
522# CONFIG_IBM_ASM is not set 487# CONFIG_IBM_ASM is not set
523# CONFIG_PHANTOM is not set 488# CONFIG_PHANTOM is not set
489# CONFIG_EEPROM_93CX6 is not set
524# CONFIG_SGI_IOC4 is not set 490# CONFIG_SGI_IOC4 is not set
525# CONFIG_TIFM_CORE is not set 491# CONFIG_TIFM_CORE is not set
526# CONFIG_SONY_LAPTOP is not set 492# CONFIG_SONY_LAPTOP is not set
527# CONFIG_THINKPAD_ACPI is not set 493# CONFIG_THINKPAD_ACPI is not set
528# CONFIG_BLINK is not set
529CONFIG_IDE=y 494CONFIG_IDE=y
530CONFIG_BLK_DEV_IDE=y 495CONFIG_BLK_DEV_IDE=y
531 496
@@ -597,6 +562,7 @@ CONFIG_BLK_DEV_IDEDMA=y
597# 562#
598# CONFIG_RAID_ATTRS is not set 563# CONFIG_RAID_ATTRS is not set
599CONFIG_SCSI=y 564CONFIG_SCSI=y
565CONFIG_SCSI_DMA=y
600# CONFIG_SCSI_TGT is not set 566# CONFIG_SCSI_TGT is not set
601CONFIG_SCSI_NETLINK=y 567CONFIG_SCSI_NETLINK=y
602# CONFIG_SCSI_PROC_FS is not set 568# CONFIG_SCSI_PROC_FS is not set
@@ -607,8 +573,9 @@ CONFIG_SCSI_NETLINK=y
607CONFIG_BLK_DEV_SD=y 573CONFIG_BLK_DEV_SD=y
608# CONFIG_CHR_DEV_ST is not set 574# CONFIG_CHR_DEV_ST is not set
609# CONFIG_CHR_DEV_OSST is not set 575# CONFIG_CHR_DEV_OSST is not set
610# CONFIG_BLK_DEV_SR is not set 576CONFIG_BLK_DEV_SR=y
611# CONFIG_CHR_DEV_SG is not set 577# CONFIG_BLK_DEV_SR_VENDOR is not set
578CONFIG_CHR_DEV_SG=y
612# CONFIG_CHR_DEV_SCH is not set 579# CONFIG_CHR_DEV_SCH is not set
613 580
614# 581#
@@ -668,6 +635,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
668# CONFIG_SCSI_INIA100 is not set 635# CONFIG_SCSI_INIA100 is not set
669# CONFIG_SCSI_STEX is not set 636# CONFIG_SCSI_STEX is not set
670# CONFIG_SCSI_SYM53C8XX_2 is not set 637# CONFIG_SCSI_SYM53C8XX_2 is not set
638# CONFIG_SCSI_IPR is not set
671# CONFIG_SCSI_QLOGIC_1280 is not set 639# CONFIG_SCSI_QLOGIC_1280 is not set
672# CONFIG_SCSI_QLA_FC is not set 640# CONFIG_SCSI_QLA_FC is not set
673# CONFIG_SCSI_QLA_ISCSI is not set 641# CONFIG_SCSI_QLA_ISCSI is not set
@@ -676,14 +644,73 @@ CONFIG_AIC79XX_DEBUG_MASK=0
676# CONFIG_SCSI_DC390T is not set 644# CONFIG_SCSI_DC390T is not set
677# CONFIG_SCSI_NSP32 is not set 645# CONFIG_SCSI_NSP32 is not set
678# CONFIG_SCSI_DEBUG is not set 646# CONFIG_SCSI_DEBUG is not set
679# CONFIG_SCSI_ESP_CORE is not set
680# CONFIG_SCSI_SRP is not set 647# CONFIG_SCSI_SRP is not set
681# CONFIG_ATA is not set 648CONFIG_ATA=y
682 649# CONFIG_ATA_NONSTANDARD is not set
683# 650CONFIG_ATA_ACPI=y
684# Multi-device support (RAID and LVM) 651CONFIG_SATA_AHCI=y
685# 652CONFIG_SATA_SVW=y
686# CONFIG_MD is not set 653CONFIG_ATA_PIIX=y
654# CONFIG_SATA_MV is not set
655CONFIG_SATA_NV=y
656# CONFIG_PDC_ADMA is not set
657# CONFIG_SATA_QSTOR is not set
658# CONFIG_SATA_PROMISE is not set
659# CONFIG_SATA_SX4 is not set
660CONFIG_SATA_SIL=y
661# CONFIG_SATA_SIL24 is not set
662# CONFIG_SATA_SIS is not set
663# CONFIG_SATA_ULI is not set
664CONFIG_SATA_VIA=y
665# CONFIG_SATA_VITESSE is not set
666# CONFIG_SATA_INIC162X is not set
667# CONFIG_PATA_ALI is not set
668# CONFIG_PATA_AMD is not set
669# CONFIG_PATA_ARTOP is not set
670# CONFIG_PATA_ATIIXP is not set
671# CONFIG_PATA_CMD640_PCI is not set
672# CONFIG_PATA_CMD64X is not set
673# CONFIG_PATA_CS5520 is not set
674# CONFIG_PATA_CS5530 is not set
675# CONFIG_PATA_CS5535 is not set
676# CONFIG_PATA_CYPRESS is not set
677# CONFIG_PATA_EFAR is not set
678# CONFIG_ATA_GENERIC is not set
679# CONFIG_PATA_HPT366 is not set
680# CONFIG_PATA_HPT37X is not set
681# CONFIG_PATA_HPT3X2N is not set
682# CONFIG_PATA_HPT3X3 is not set
683# CONFIG_PATA_IT821X is not set
684# CONFIG_PATA_IT8213 is not set
685# CONFIG_PATA_JMICRON is not set
686# CONFIG_PATA_TRIFLEX is not set
687# CONFIG_PATA_MARVELL is not set
688# CONFIG_PATA_MPIIX is not set
689# CONFIG_PATA_OLDPIIX is not set
690# CONFIG_PATA_NETCELL is not set
691# CONFIG_PATA_NS87410 is not set
692# CONFIG_PATA_OPTI is not set
693# CONFIG_PATA_OPTIDMA is not set
694# CONFIG_PATA_PDC_OLD is not set
695# CONFIG_PATA_RADISYS is not set
696# CONFIG_PATA_RZ1000 is not set
697# CONFIG_PATA_SC1200 is not set
698# CONFIG_PATA_SERVERWORKS is not set
699# CONFIG_PATA_PDC2027X is not set
700# CONFIG_PATA_SIL680 is not set
701# CONFIG_PATA_SIS is not set
702# CONFIG_PATA_VIA is not set
703# CONFIG_PATA_WINBOND is not set
704CONFIG_MD=y
705# CONFIG_BLK_DEV_MD is not set
706CONFIG_BLK_DEV_DM=y
707# CONFIG_DM_DEBUG is not set
708# CONFIG_DM_CRYPT is not set
709# CONFIG_DM_SNAPSHOT is not set
710# CONFIG_DM_MIRROR is not set
711# CONFIG_DM_ZERO is not set
712# CONFIG_DM_MULTIPATH is not set
713# CONFIG_DM_DELAY is not set
687 714
688# 715#
689# Fusion MPT device support 716# Fusion MPT device support
@@ -724,42 +751,27 @@ CONFIG_IEEE1394_OHCI1394=y
724# CONFIG_IEEE1394_ETH1394 is not set 751# CONFIG_IEEE1394_ETH1394 is not set
725# CONFIG_IEEE1394_DV1394 is not set 752# CONFIG_IEEE1394_DV1394 is not set
726CONFIG_IEEE1394_RAWIO=y 753CONFIG_IEEE1394_RAWIO=y
727
728#
729# I2O device support
730#
731# CONFIG_I2O is not set 754# CONFIG_I2O is not set
732# CONFIG_MACINTOSH_DRIVERS is not set 755CONFIG_MACINTOSH_DRIVERS=y
733 756# CONFIG_MAC_EMUMOUSEBTN is not set
734#
735# Network device support
736#
737CONFIG_NETDEVICES=y 757CONFIG_NETDEVICES=y
758CONFIG_NETDEVICES_MULTIQUEUE=y
738# CONFIG_DUMMY is not set 759# CONFIG_DUMMY is not set
739# CONFIG_BONDING is not set 760# CONFIG_BONDING is not set
761# CONFIG_MACVLAN is not set
740# CONFIG_EQUALIZER is not set 762# CONFIG_EQUALIZER is not set
741# CONFIG_TUN is not set 763# CONFIG_TUN is not set
742# CONFIG_NET_SB1000 is not set 764# CONFIG_NET_SB1000 is not set
743
744#
745# ARCnet devices
746#
747# CONFIG_ARCNET is not set 765# CONFIG_ARCNET is not set
748# CONFIG_PHYLIB is not set 766# CONFIG_PHYLIB is not set
749
750#
751# Ethernet (10 or 100Mbit)
752#
753CONFIG_NET_ETHERNET=y 767CONFIG_NET_ETHERNET=y
754CONFIG_MII=y 768CONFIG_MII=y
755# CONFIG_HAPPYMEAL is not set 769# CONFIG_HAPPYMEAL is not set
756# CONFIG_SUNGEM is not set 770# CONFIG_SUNGEM is not set
757# CONFIG_CASSINI is not set 771# CONFIG_CASSINI is not set
758# CONFIG_NET_VENDOR_3COM is not set 772CONFIG_NET_VENDOR_3COM=y
759 773CONFIG_VORTEX=y
760# 774# CONFIG_TYPHOON is not set
761# Tulip family network device support
762#
763CONFIG_NET_TULIP=y 775CONFIG_NET_TULIP=y
764# CONFIG_DE2104X is not set 776# CONFIG_DE2104X is not set
765CONFIG_TULIP=y 777CONFIG_TULIP=y
@@ -810,7 +822,6 @@ CONFIG_R8169=y
810# CONFIG_SIS190 is not set 822# CONFIG_SIS190 is not set
811# CONFIG_SKGE is not set 823# CONFIG_SKGE is not set
812CONFIG_SKY2=y 824CONFIG_SKY2=y
813# CONFIG_SK98LIN is not set
814# CONFIG_VIA_VELOCITY is not set 825# CONFIG_VIA_VELOCITY is not set
815CONFIG_TIGON3=y 826CONFIG_TIGON3=y
816CONFIG_BNX2=y 827CONFIG_BNX2=y
@@ -824,10 +835,6 @@ CONFIG_NETDEV_10000=y
824# CONFIG_MYRI10GE is not set 835# CONFIG_MYRI10GE is not set
825# CONFIG_NETXEN_NIC is not set 836# CONFIG_NETXEN_NIC is not set
826# CONFIG_MLX4_CORE is not set 837# CONFIG_MLX4_CORE is not set
827
828#
829# Token Ring devices
830#
831# CONFIG_TR is not set 838# CONFIG_TR is not set
832 839
833# 840#
@@ -856,15 +863,7 @@ CONFIG_NETCONSOLE=y
856CONFIG_NETPOLL=y 863CONFIG_NETPOLL=y
857# CONFIG_NETPOLL_TRAP is not set 864# CONFIG_NETPOLL_TRAP is not set
858CONFIG_NET_POLL_CONTROLLER=y 865CONFIG_NET_POLL_CONTROLLER=y
859
860#
861# ISDN subsystem
862#
863# CONFIG_ISDN is not set 866# CONFIG_ISDN is not set
864
865#
866# Telephony Support
867#
868# CONFIG_PHONE is not set 867# CONFIG_PHONE is not set
869 868
870# 869#
@@ -872,6 +871,7 @@ CONFIG_NET_POLL_CONTROLLER=y
872# 871#
873CONFIG_INPUT=y 872CONFIG_INPUT=y
874# CONFIG_INPUT_FF_MEMLESS is not set 873# CONFIG_INPUT_FF_MEMLESS is not set
874# CONFIG_INPUT_POLLDEV is not set
875 875
876# 876#
877# Userland interfaces 877# Userland interfaces
@@ -937,6 +937,7 @@ CONFIG_HW_CONSOLE=y
937# 937#
938CONFIG_SERIAL_8250=y 938CONFIG_SERIAL_8250=y
939CONFIG_SERIAL_8250_CONSOLE=y 939CONFIG_SERIAL_8250_CONSOLE=y
940CONFIG_FIX_EARLYCON_MEM=y
940CONFIG_SERIAL_8250_PCI=y 941CONFIG_SERIAL_8250_PCI=y
941CONFIG_SERIAL_8250_PNP=y 942CONFIG_SERIAL_8250_PNP=y
942CONFIG_SERIAL_8250_NR_UARTS=4 943CONFIG_SERIAL_8250_NR_UARTS=4
@@ -952,10 +953,6 @@ CONFIG_SERIAL_CORE_CONSOLE=y
952CONFIG_UNIX98_PTYS=y 953CONFIG_UNIX98_PTYS=y
953CONFIG_LEGACY_PTYS=y 954CONFIG_LEGACY_PTYS=y
954CONFIG_LEGACY_PTY_COUNT=256 955CONFIG_LEGACY_PTY_COUNT=256
955
956#
957# IPMI
958#
959# CONFIG_IPMI_HANDLER is not set 956# CONFIG_IPMI_HANDLER is not set
960# CONFIG_WATCHDOG is not set 957# CONFIG_WATCHDOG is not set
961CONFIG_HW_RANDOM=y 958CONFIG_HW_RANDOM=y
@@ -989,11 +986,7 @@ CONFIG_MAX_RAW_DEVS=256
989CONFIG_HPET=y 986CONFIG_HPET=y
990# CONFIG_HPET_RTC_IRQ is not set 987# CONFIG_HPET_RTC_IRQ is not set
991CONFIG_HPET_MMAP=y 988CONFIG_HPET_MMAP=y
992CONFIG_HANGCHECK_TIMER=y 989# CONFIG_HANGCHECK_TIMER is not set
993
994#
995# TPM devices
996#
997# CONFIG_TCG_TPM is not set 990# CONFIG_TCG_TPM is not set
998# CONFIG_TELCLOCK is not set 991# CONFIG_TELCLOCK is not set
999CONFIG_DEVPORT=y 992CONFIG_DEVPORT=y
@@ -1004,11 +997,8 @@ CONFIG_DEVPORT=y
1004# 997#
1005# CONFIG_SPI is not set 998# CONFIG_SPI is not set
1006# CONFIG_SPI_MASTER is not set 999# CONFIG_SPI_MASTER is not set
1007
1008#
1009# Dallas's 1-wire bus
1010#
1011# CONFIG_W1 is not set 1000# CONFIG_W1 is not set
1001# CONFIG_POWER_SUPPLY is not set
1012# CONFIG_HWMON is not set 1002# CONFIG_HWMON is not set
1013 1003
1014# 1004#
@@ -1042,7 +1032,7 @@ CONFIG_DAB=y
1042CONFIG_VGA_CONSOLE=y 1032CONFIG_VGA_CONSOLE=y
1043CONFIG_VGACON_SOFT_SCROLLBACK=y 1033CONFIG_VGACON_SOFT_SCROLLBACK=y
1044CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 1034CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128
1045# CONFIG_VIDEO_SELECT is not set 1035CONFIG_VIDEO_SELECT=y
1046CONFIG_DUMMY_CONSOLE=y 1036CONFIG_DUMMY_CONSOLE=y
1047 1037
1048# 1038#
@@ -1059,15 +1049,11 @@ CONFIG_SOUND=y
1059# Open Sound System 1049# Open Sound System
1060# 1050#
1061CONFIG_SOUND_PRIME=y 1051CONFIG_SOUND_PRIME=y
1062# CONFIG_OSS_OBSOLETE is not set
1063# CONFIG_SOUND_TRIDENT is not set 1052# CONFIG_SOUND_TRIDENT is not set
1064# CONFIG_SOUND_MSNDCLAS is not set 1053# CONFIG_SOUND_MSNDCLAS is not set
1065# CONFIG_SOUND_MSNDPIN is not set 1054# CONFIG_SOUND_MSNDPIN is not set
1066# CONFIG_SOUND_OSS is not set 1055# CONFIG_SOUND_OSS is not set
1067 1056CONFIG_HID_SUPPORT=y
1068#
1069# HID Devices
1070#
1071CONFIG_HID=y 1057CONFIG_HID=y
1072# CONFIG_HID_DEBUG is not set 1058# CONFIG_HID_DEBUG is not set
1073 1059
@@ -1078,10 +1064,7 @@ CONFIG_USB_HID=y
1078# CONFIG_USB_HIDINPUT_POWERBOOK is not set 1064# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1079# CONFIG_HID_FF is not set 1065# CONFIG_HID_FF is not set
1080# CONFIG_USB_HIDDEV is not set 1066# CONFIG_USB_HIDDEV is not set
1081 1067CONFIG_USB_SUPPORT=y
1082#
1083# USB support
1084#
1085CONFIG_USB_ARCH_HAS_HCD=y 1068CONFIG_USB_ARCH_HAS_HCD=y
1086CONFIG_USB_ARCH_HAS_OHCI=y 1069CONFIG_USB_ARCH_HAS_OHCI=y
1087CONFIG_USB_ARCH_HAS_EHCI=y 1070CONFIG_USB_ARCH_HAS_EHCI=y
@@ -1095,6 +1078,7 @@ CONFIG_USB_DEVICEFS=y
1095# CONFIG_USB_DEVICE_CLASS is not set 1078# CONFIG_USB_DEVICE_CLASS is not set
1096# CONFIG_USB_DYNAMIC_MINORS is not set 1079# CONFIG_USB_DYNAMIC_MINORS is not set
1097# CONFIG_USB_SUSPEND is not set 1080# CONFIG_USB_SUSPEND is not set
1081# CONFIG_USB_PERSIST is not set
1098# CONFIG_USB_OTG is not set 1082# CONFIG_USB_OTG is not set
1099 1083
1100# 1084#
@@ -1104,7 +1088,6 @@ CONFIG_USB_EHCI_HCD=y
1104# CONFIG_USB_EHCI_SPLIT_ISO is not set 1088# CONFIG_USB_EHCI_SPLIT_ISO is not set
1105# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1089# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1106# CONFIG_USB_EHCI_TT_NEWSCHED is not set 1090# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1107# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set
1108# CONFIG_USB_ISP116X_HCD is not set 1091# CONFIG_USB_ISP116X_HCD is not set
1109CONFIG_USB_OHCI_HCD=y 1092CONFIG_USB_OHCI_HCD=y
1110# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set 1093# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
@@ -1112,6 +1095,7 @@ CONFIG_USB_OHCI_HCD=y
1112CONFIG_USB_OHCI_LITTLE_ENDIAN=y 1095CONFIG_USB_OHCI_LITTLE_ENDIAN=y
1113CONFIG_USB_UHCI_HCD=y 1096CONFIG_USB_UHCI_HCD=y
1114# CONFIG_USB_SL811_HCD is not set 1097# CONFIG_USB_SL811_HCD is not set
1098# CONFIG_USB_R8A66597_HCD is not set
1115 1099
1116# 1100#
1117# USB Device Class drivers 1101# USB Device Class drivers
@@ -1202,15 +1186,7 @@ CONFIG_USB_MON=y
1202# 1186#
1203# LED Triggers 1187# LED Triggers
1204# 1188#
1205
1206#
1207# InfiniBand support
1208#
1209# CONFIG_INFINIBAND is not set 1189# CONFIG_INFINIBAND is not set
1210
1211#
1212# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
1213#
1214# CONFIG_EDAC is not set 1190# CONFIG_EDAC is not set
1215 1191
1216# 1192#
@@ -1230,11 +1206,13 @@ CONFIG_USB_MON=y
1230# 1206#
1231# DMA Devices 1207# DMA Devices
1232# 1208#
1209CONFIG_VIRTUALIZATION=y
1210# CONFIG_KVM is not set
1233 1211
1234# 1212#
1235# Virtualization 1213# Userspace I/O
1236# 1214#
1237# CONFIG_KVM is not set 1215# CONFIG_UIO is not set
1238 1216
1239# 1217#
1240# File systems 1218# File systems
@@ -1272,6 +1250,7 @@ CONFIG_DNOTIFY=y
1272# CONFIG_AUTOFS_FS is not set 1250# CONFIG_AUTOFS_FS is not set
1273CONFIG_AUTOFS4_FS=y 1251CONFIG_AUTOFS4_FS=y
1274# CONFIG_FUSE_FS is not set 1252# CONFIG_FUSE_FS is not set
1253CONFIG_GENERIC_ACL=y
1275 1254
1276# 1255#
1277# CD-ROM/DVD Filesystems 1256# CD-ROM/DVD Filesystems
@@ -1299,7 +1278,7 @@ CONFIG_PROC_KCORE=y
1299CONFIG_PROC_SYSCTL=y 1278CONFIG_PROC_SYSCTL=y
1300CONFIG_SYSFS=y 1279CONFIG_SYSFS=y
1301CONFIG_TMPFS=y 1280CONFIG_TMPFS=y
1302# CONFIG_TMPFS_POSIX_ACL is not set 1281CONFIG_TMPFS_POSIX_ACL=y
1303CONFIG_HUGETLBFS=y 1282CONFIG_HUGETLBFS=y
1304CONFIG_HUGETLB_PAGE=y 1283CONFIG_HUGETLB_PAGE=y
1305CONFIG_RAMFS=y 1284CONFIG_RAMFS=y
@@ -1349,7 +1328,6 @@ CONFIG_SUNRPC=y
1349# CONFIG_NCP_FS is not set 1328# CONFIG_NCP_FS is not set
1350# CONFIG_CODA_FS is not set 1329# CONFIG_CODA_FS is not set
1351# CONFIG_AFS_FS is not set 1330# CONFIG_AFS_FS is not set
1352# CONFIG_9P_FS is not set
1353 1331
1354# 1332#
1355# Partition Types 1333# Partition Types
@@ -1405,10 +1383,7 @@ CONFIG_NLS_UTF8=y
1405# Distributed Lock Manager 1383# Distributed Lock Manager
1406# 1384#
1407# CONFIG_DLM is not set 1385# CONFIG_DLM is not set
1408 1386CONFIG_INSTRUMENTATION=y
1409#
1410# Instrumentation Support
1411#
1412CONFIG_PROFILING=y 1387CONFIG_PROFILING=y
1413CONFIG_OPROFILE=y 1388CONFIG_OPROFILE=y
1414CONFIG_KPROBES=y 1389CONFIG_KPROBES=y
@@ -1418,7 +1393,7 @@ CONFIG_KPROBES=y
1418# 1393#
1419CONFIG_TRACE_IRQFLAGS_SUPPORT=y 1394CONFIG_TRACE_IRQFLAGS_SUPPORT=y
1420# CONFIG_PRINTK_TIME is not set 1395# CONFIG_PRINTK_TIME is not set
1421CONFIG_ENABLE_MUST_CHECK=y 1396# CONFIG_ENABLE_MUST_CHECK is not set
1422CONFIG_MAGIC_SYSRQ=y 1397CONFIG_MAGIC_SYSRQ=y
1423CONFIG_UNUSED_SYMBOLS=y 1398CONFIG_UNUSED_SYMBOLS=y
1424# CONFIG_DEBUG_FS is not set 1399# CONFIG_DEBUG_FS is not set
@@ -1426,15 +1401,17 @@ CONFIG_UNUSED_SYMBOLS=y
1426CONFIG_DEBUG_KERNEL=y 1401CONFIG_DEBUG_KERNEL=y
1427# CONFIG_DEBUG_SHIRQ is not set 1402# CONFIG_DEBUG_SHIRQ is not set
1428CONFIG_DETECT_SOFTLOCKUP=y 1403CONFIG_DETECT_SOFTLOCKUP=y
1404# CONFIG_SCHED_DEBUG is not set
1429# CONFIG_SCHEDSTATS is not set 1405# CONFIG_SCHEDSTATS is not set
1430# CONFIG_TIMER_STATS is not set 1406CONFIG_TIMER_STATS=y
1431# CONFIG_DEBUG_SLAB is not set 1407# CONFIG_SLUB_DEBUG_ON is not set
1432# CONFIG_DEBUG_RT_MUTEXES is not set 1408# CONFIG_DEBUG_RT_MUTEXES is not set
1433# CONFIG_RT_MUTEX_TESTER is not set 1409# CONFIG_RT_MUTEX_TESTER is not set
1434# CONFIG_DEBUG_SPINLOCK is not set 1410# CONFIG_DEBUG_SPINLOCK is not set
1435# CONFIG_DEBUG_MUTEXES is not set 1411# CONFIG_DEBUG_MUTEXES is not set
1436# CONFIG_DEBUG_LOCK_ALLOC is not set 1412# CONFIG_DEBUG_LOCK_ALLOC is not set
1437# CONFIG_PROVE_LOCKING is not set 1413# CONFIG_PROVE_LOCKING is not set
1414# CONFIG_LOCK_STAT is not set
1438# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 1415# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1439# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set 1416# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
1440# CONFIG_DEBUG_KOBJECT is not set 1417# CONFIG_DEBUG_KOBJECT is not set
@@ -1444,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
1444# CONFIG_DEBUG_VM is not set 1421# CONFIG_DEBUG_VM is not set
1445# CONFIG_DEBUG_LIST is not set 1422# CONFIG_DEBUG_LIST is not set
1446# CONFIG_FRAME_POINTER is not set 1423# CONFIG_FRAME_POINTER is not set
1447# CONFIG_UNWIND_INFO is not set
1448# CONFIG_FORCED_INLINING is not set 1424# CONFIG_FORCED_INLINING is not set
1449# CONFIG_RCU_TORTURE_TEST is not set 1425# CONFIG_RCU_TORTURE_TEST is not set
1450# CONFIG_LKDTM is not set 1426# CONFIG_LKDTM is not set
@@ -1463,10 +1439,6 @@ CONFIG_DOUBLEFAULT=y
1463# 1439#
1464# CONFIG_KEYS is not set 1440# CONFIG_KEYS is not set
1465# CONFIG_SECURITY is not set 1441# CONFIG_SECURITY is not set
1466
1467#
1468# Cryptographic options
1469#
1470# CONFIG_CRYPTO is not set 1442# CONFIG_CRYPTO is not set
1471 1443
1472# 1444#
@@ -1477,6 +1449,7 @@ CONFIG_BITREVERSE=y
1477# CONFIG_CRC16 is not set 1449# CONFIG_CRC16 is not set
1478# CONFIG_CRC_ITU_T is not set 1450# CONFIG_CRC_ITU_T is not set
1479CONFIG_CRC32=y 1451CONFIG_CRC32=y
1452# CONFIG_CRC7 is not set
1480# CONFIG_LIBCRC32C is not set 1453# CONFIG_LIBCRC32C is not set
1481CONFIG_ZLIB_INFLATE=y 1454CONFIG_ZLIB_INFLATE=y
1482CONFIG_PLIST=y 1455CONFIG_PLIST=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 06da59f6f837..dbe5e87e0d66 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_VM86) += vm86.o
40obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 40obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
41obj-$(CONFIG_HPET_TIMER) += hpet.o 41obj-$(CONFIG_HPET_TIMER) += hpet.o
42obj-$(CONFIG_K8_NB) += k8.o 42obj-$(CONFIG_K8_NB) += k8.o
43obj-$(CONFIG_MGEODE_LX) += geode.o
43 44
44obj-$(CONFIG_VMI) += vmi.o vmiclock.o 45obj-$(CONFIG_VMI) += vmi.o vmiclock.o
45obj-$(CONFIG_PARAVIRT) += paravirt.o 46obj-$(CONFIG_PARAVIRT) += paravirt.o
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index a2c8b9efd700..cacdd883bf2b 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -618,11 +618,11 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table)
618#ifdef CONFIG_HPET_TIMER 618#ifdef CONFIG_HPET_TIMER
619#include <asm/hpet.h> 619#include <asm/hpet.h>
620 620
621static struct __initdata resource *hpet_res;
622
621static int __init acpi_parse_hpet(struct acpi_table_header *table) 623static int __init acpi_parse_hpet(struct acpi_table_header *table)
622{ 624{
623 struct acpi_table_hpet *hpet_tbl; 625 struct acpi_table_hpet *hpet_tbl;
624 struct resource *hpet_res;
625 resource_size_t res_start;
626 626
627 hpet_tbl = (struct acpi_table_hpet *)table; 627 hpet_tbl = (struct acpi_table_hpet *)table;
628 if (!hpet_tbl) { 628 if (!hpet_tbl) {
@@ -636,31 +636,46 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
636 return -1; 636 return -1;
637 } 637 }
638 638
639#define HPET_RESOURCE_NAME_SIZE 9
640 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
641 if (hpet_res) {
642 memset(hpet_res, 0, sizeof(*hpet_res));
643 hpet_res->name = (void *)&hpet_res[1];
644 hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
645 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
646 "HPET %u", hpet_tbl->sequence);
647 hpet_res->end = (1 * 1024) - 1;
648 }
649
650 hpet_address = hpet_tbl->address.address; 639 hpet_address = hpet_tbl->address.address;
651 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 640 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
652 hpet_tbl->id, hpet_address); 641 hpet_tbl->id, hpet_address);
653 642
654 res_start = hpet_address; 643 /*
644 * Allocate and initialize the HPET firmware resource for adding into
645 * the resource tree during the lateinit timeframe.
646 */
647#define HPET_RESOURCE_NAME_SIZE 9
648 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
655 649
656 if (hpet_res) { 650 if (!hpet_res)
657 hpet_res->start = res_start; 651 return 0;
658 hpet_res->end += res_start; 652
659 insert_resource(&iomem_resource, hpet_res); 653 memset(hpet_res, 0, sizeof(*hpet_res));
660 } 654 hpet_res->name = (void *)&hpet_res[1];
655 hpet_res->flags = IORESOURCE_MEM;
656 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
657 hpet_tbl->sequence);
658
659 hpet_res->start = hpet_address;
660 hpet_res->end = hpet_address + (1 * 1024) - 1;
661 661
662 return 0; 662 return 0;
663} 663}
664
665/*
666 * hpet_insert_resource inserts the HPET resources used into the resource
667 * tree.
668 */
669static __init int hpet_insert_resource(void)
670{
671 if (!hpet_res)
672 return 1;
673
674 return insert_resource(&iomem_resource, hpet_res);
675}
676
677late_initcall(hpet_insert_resource);
678
664#else 679#else
665#define acpi_parse_hpet NULL 680#define acpi_parse_hpet NULL
666#endif 681#endif
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 4ee83577bf61..c42b5ab49deb 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -14,7 +14,7 @@
14 14
15/* address in low memory of the wakeup routine. */ 15/* address in low memory of the wakeup routine. */
16unsigned long acpi_wakeup_address = 0; 16unsigned long acpi_wakeup_address = 0;
17unsigned long acpi_video_flags; 17unsigned long acpi_realmode_flags;
18extern char wakeup_start, wakeup_end; 18extern char wakeup_start, wakeup_end;
19 19
20extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); 20extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
@@ -68,9 +68,11 @@ static int __init acpi_sleep_setup(char *str)
68{ 68{
69 while ((str != NULL) && (*str != '\0')) { 69 while ((str != NULL) && (*str != '\0')) {
70 if (strncmp(str, "s3_bios", 7) == 0) 70 if (strncmp(str, "s3_bios", 7) == 0)
71 acpi_video_flags = 1; 71 acpi_realmode_flags |= 1;
72 if (strncmp(str, "s3_mode", 7) == 0) 72 if (strncmp(str, "s3_mode", 7) == 0)
73 acpi_video_flags |= 2; 73 acpi_realmode_flags |= 2;
74 if (strncmp(str, "s3_beep", 7) == 0)
75 acpi_realmode_flags |= 4;
74 str = strchr(str, ','); 76 str = strchr(str, ',');
75 if (str != NULL) 77 if (str != NULL)
76 str += strspn(str, ", \t"); 78 str += strspn(str, ", \t");
@@ -80,9 +82,11 @@ static int __init acpi_sleep_setup(char *str)
80 82
81__setup("acpi_sleep=", acpi_sleep_setup); 83__setup("acpi_sleep=", acpi_sleep_setup);
82 84
85/* Ouch, we want to delete this. We already have better version in userspace, in
86 s2ram from suspend.sf.net project */
83static __init int reset_videomode_after_s3(struct dmi_system_id *d) 87static __init int reset_videomode_after_s3(struct dmi_system_id *d)
84{ 88{
85 acpi_video_flags |= 2; 89 acpi_realmode_flags |= 2;
86 return 0; 90 return 0;
87} 91}
88 92
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index b781b38131c0..ed0a0f2c1597 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -13,6 +13,21 @@
13# cs = 0x1234, eip = 0x05 13# cs = 0x1234, eip = 0x05
14# 14#
15 15
16#define BEEP \
17 inb $97, %al; \
18 outb %al, $0x80; \
19 movb $3, %al; \
20 outb %al, $97; \
21 outb %al, $0x80; \
22 movb $-74, %al; \
23 outb %al, $67; \
24 outb %al, $0x80; \
25 movb $-119, %al; \
26 outb %al, $66; \
27 outb %al, $0x80; \
28 movb $15, %al; \
29 outb %al, $66;
30
16ALIGN 31ALIGN
17 .align 4096 32 .align 4096
18ENTRY(wakeup_start) 33ENTRY(wakeup_start)
@@ -31,6 +46,11 @@ wakeup_code:
31 movw %cs, %ax 46 movw %cs, %ax
32 movw %ax, %ds # Make ds:0 point to wakeup_start 47 movw %ax, %ds # Make ds:0 point to wakeup_start
33 movw %ax, %ss 48 movw %ax, %ss
49
50 testl $4, realmode_flags - wakeup_code
51 jz 1f
52 BEEP
531:
34 mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board 54 mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
35 movw $0x0e00 + 'S', %fs:(0x12) 55 movw $0x0e00 + 'S', %fs:(0x12)
36 56
@@ -41,7 +61,7 @@ wakeup_code:
41 cmpl $0x12345678, %eax 61 cmpl $0x12345678, %eax
42 jne bogus_real_magic 62 jne bogus_real_magic
43 63
44 testl $1, video_flags - wakeup_code 64 testl $1, realmode_flags - wakeup_code
45 jz 1f 65 jz 1f
46 lcall $0xc000,$3 66 lcall $0xc000,$3
47 movw %cs, %ax 67 movw %cs, %ax
@@ -49,7 +69,7 @@ wakeup_code:
49 movw %ax, %ss 69 movw %ax, %ss
501: 701:
51 71
52 testl $2, video_flags - wakeup_code 72 testl $2, realmode_flags - wakeup_code
53 jz 1f 73 jz 1f
54 mov video_mode - wakeup_code, %ax 74 mov video_mode - wakeup_code, %ax
55 call mode_set 75 call mode_set
@@ -88,7 +108,11 @@ wakeup_code:
88 cmpl $0x12345678, %eax 108 cmpl $0x12345678, %eax
89 jne bogus_real_magic 109 jne bogus_real_magic
90 110
91 ljmpl $__KERNEL_CS,$wakeup_pmode_return 111 testl $8, realmode_flags - wakeup_code
112 jz 1f
113 BEEP
1141:
115 ljmpl $__KERNEL_CS, $wakeup_pmode_return
92 116
93real_save_gdt: .word 0 117real_save_gdt: .word 0
94 .long 0 118 .long 0
@@ -97,7 +121,8 @@ real_save_cr3: .long 0
97real_save_cr4: .long 0 121real_save_cr4: .long 0
98real_magic: .long 0 122real_magic: .long 0
99video_mode: .long 0 123video_mode: .long 0
100video_flags: .long 0 124realmode_flags: .long 0
125beep_flags: .long 0
101real_efer_save_restore: .long 0 126real_efer_save_restore: .long 0
102real_save_efer_edx: .long 0 127real_save_efer_edx: .long 0
103real_save_efer_eax: .long 0 128real_save_efer_eax: .long 0
@@ -230,6 +255,7 @@ bogus_magic:
230# 255#
231ENTRY(acpi_copy_wakeup_routine) 256ENTRY(acpi_copy_wakeup_routine)
232 257
258 pushl %ebx
233 sgdt saved_gdt 259 sgdt saved_gdt
234 sidt saved_idt 260 sidt saved_idt
235 sldt saved_ldt 261 sldt saved_ldt
@@ -259,10 +285,11 @@ ENTRY(acpi_copy_wakeup_routine)
259 285
260 movl saved_videomode, %edx 286 movl saved_videomode, %edx
261 movl %edx, video_mode - wakeup_start (%eax) 287 movl %edx, video_mode - wakeup_start (%eax)
262 movl acpi_video_flags, %edx 288 movl acpi_realmode_flags, %edx
263 movl %edx, video_flags - wakeup_start (%eax) 289 movl %edx, realmode_flags - wakeup_start (%eax)
264 movl $0x12345678, real_magic - wakeup_start (%eax) 290 movl $0x12345678, real_magic - wakeup_start (%eax)
265 movl $0x12345678, saved_magic 291 movl $0x12345678, saved_magic
292 popl %ebx
266 ret 293 ret
267 294
268save_registers: 295save_registers:
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index d8cda14fff8b..0695be538de5 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -5,9 +5,8 @@
5#include <asm/alternative.h> 5#include <asm/alternative.h>
6#include <asm/sections.h> 6#include <asm/sections.h>
7 7
8static int noreplace_smp = 0; 8#ifdef CONFIG_HOTPLUG_CPU
9static int smp_alt_once = 0; 9static int smp_alt_once;
10static int debug_alternative = 0;
11 10
12static int __init bootonly(char *str) 11static int __init bootonly(char *str)
13{ 12{
@@ -15,6 +14,11 @@ static int __init bootonly(char *str)
15 return 1; 14 return 1;
16} 15}
17__setup("smp-alt-boot", bootonly); 16__setup("smp-alt-boot", bootonly);
17#else
18#define smp_alt_once 1
19#endif
20
21static int debug_alternative;
18 22
19static int __init debug_alt(char *str) 23static int __init debug_alt(char *str)
20{ 24{
@@ -23,6 +27,8 @@ static int __init debug_alt(char *str)
23} 27}
24__setup("debug-alternative", debug_alt); 28__setup("debug-alternative", debug_alt);
25 29
30static int noreplace_smp;
31
26static int __init setup_noreplace_smp(char *str) 32static int __init setup_noreplace_smp(char *str)
27{ 33{
28 noreplace_smp = 1; 34 noreplace_smp = 1;
@@ -376,8 +382,6 @@ void __init alternative_instructions(void)
376#ifdef CONFIG_HOTPLUG_CPU 382#ifdef CONFIG_HOTPLUG_CPU
377 if (num_possible_cpus() < 2) 383 if (num_possible_cpus() < 2)
378 smp_alt_once = 1; 384 smp_alt_once = 1;
379#else
380 smp_alt_once = 1;
381#endif 385#endif
382 386
383#ifdef CONFIG_SMP 387#ifdef CONFIG_SMP
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 67824f3bb974..bfc6cb7df7e7 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -263,6 +263,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
263 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 263 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
264 apic_write_around(APIC_LVTT, v); 264 apic_write_around(APIC_LVTT, v);
265 break; 265 break;
266 case CLOCK_EVT_MODE_RESUME:
267 /* Nothing to do here */
268 break;
266 } 269 }
267 270
268 local_irq_restore(flags); 271 local_irq_restore(flags);
@@ -315,7 +318,7 @@ static void __devinit setup_APIC_timer(void)
315 318
316#define LAPIC_CAL_LOOPS (HZ/10) 319#define LAPIC_CAL_LOOPS (HZ/10)
317 320
318static __initdata volatile int lapic_cal_loops = -1; 321static __initdata int lapic_cal_loops = -1;
319static __initdata long lapic_cal_t1, lapic_cal_t2; 322static __initdata long lapic_cal_t1, lapic_cal_t2;
320static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; 323static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
321static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; 324static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
@@ -485,7 +488,7 @@ void __init setup_boot_APIC_clock(void)
485 /* Let the interrupts run */ 488 /* Let the interrupts run */
486 local_irq_enable(); 489 local_irq_enable();
487 490
488 while(lapic_cal_loops <= LAPIC_CAL_LOOPS) 491 while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
489 cpu_relax(); 492 cpu_relax();
490 493
491 local_irq_disable(); 494 local_irq_disable();
@@ -521,6 +524,9 @@ void __init setup_boot_APIC_clock(void)
521 */ 524 */
522 if (nmi_watchdog != NMI_IO_APIC) 525 if (nmi_watchdog != NMI_IO_APIC)
523 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 526 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
527 else
528 printk(KERN_WARNING "APIC timer registered as dummy,"
529 " due to nmi_watchdog=1!\n");
524 } 530 }
525 531
526 /* Setup the lapic or request the broadcast */ 532 /* Setup the lapic or request the broadcast */
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 4112afe712b9..47001d50a083 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -222,6 +222,7 @@
222#include <linux/capability.h> 222#include <linux/capability.h>
223#include <linux/device.h> 223#include <linux/device.h>
224#include <linux/kernel.h> 224#include <linux/kernel.h>
225#include <linux/freezer.h>
225#include <linux/smp.h> 226#include <linux/smp.h>
226#include <linux/dmi.h> 227#include <linux/dmi.h>
227#include <linux/suspend.h> 228#include <linux/suspend.h>
@@ -2311,7 +2312,6 @@ static int __init apm_init(void)
2311 remove_proc_entry("apm", NULL); 2312 remove_proc_entry("apm", NULL);
2312 return err; 2313 return err;
2313 } 2314 }
2314 kapmd_task->flags |= PF_NOFREEZE;
2315 wake_up_process(kapmd_task); 2315 wake_up_process(kapmd_task);
2316 2316
2317 if (num_online_cpus() > 1 && !smp ) { 2317 if (num_online_cpus() > 1 && !smp ) {
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 27a776c9044d..7288ac88d746 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -17,6 +17,13 @@
17#include <asm/thread_info.h> 17#include <asm/thread_info.h>
18#include <asm/elf.h> 18#include <asm/elf.h>
19 19
20#include <xen/interface/xen.h>
21
22#ifdef CONFIG_LGUEST_GUEST
23#include <linux/lguest.h>
24#include "../../../drivers/lguest/lg.h"
25#endif
26
20#define DEFINE(sym, val) \ 27#define DEFINE(sym, val) \
21 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 28 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
22 29
@@ -59,6 +66,7 @@ void foo(void)
59 OFFSET(TI_addr_limit, thread_info, addr_limit); 66 OFFSET(TI_addr_limit, thread_info, addr_limit);
60 OFFSET(TI_restart_block, thread_info, restart_block); 67 OFFSET(TI_restart_block, thread_info, restart_block);
61 OFFSET(TI_sysenter_return, thread_info, sysenter_return); 68 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
69 OFFSET(TI_cpu, thread_info, cpu);
62 BLANK(); 70 BLANK();
63 71
64 OFFSET(GDS_size, Xgt_desc_struct, size); 72 OFFSET(GDS_size, Xgt_desc_struct, size);
@@ -115,4 +123,25 @@ void foo(void)
115 OFFSET(PARAVIRT_iret, paravirt_ops, iret); 123 OFFSET(PARAVIRT_iret, paravirt_ops, iret);
116 OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); 124 OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
117#endif 125#endif
126
127#ifdef CONFIG_XEN
128 BLANK();
129 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
130 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
131#endif
132
133#ifdef CONFIG_LGUEST_GUEST
134 BLANK();
135 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
136 OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
137 OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
138 OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
139 OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp);
140 OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc);
141 OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc);
142 OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt);
143 OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum);
144 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
145 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
146#endif
118} 147}
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
index 74f27a463db0..778396c78d65 100644
--- a/arch/i386/kernel/cpu/Makefile
+++ b/arch/i386/kernel/cpu/Makefile
@@ -8,8 +8,7 @@ obj-y += amd.o
8obj-y += cyrix.o 8obj-y += cyrix.o
9obj-y += centaur.o 9obj-y += centaur.o
10obj-y += transmeta.o 10obj-y += transmeta.o
11obj-y += intel.o intel_cacheinfo.o 11obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o
12obj-y += rise.o
13obj-y += nexgen.o 12obj-y += nexgen.o
14obj-y += umc.o 13obj-y += umc.o
15 14
diff --git a/arch/i386/kernel/cpu/addon_cpuid_features.c b/arch/i386/kernel/cpu/addon_cpuid_features.c
new file mode 100644
index 000000000000..3e91d3ee26ec
--- /dev/null
+++ b/arch/i386/kernel/cpu/addon_cpuid_features.c
@@ -0,0 +1,50 @@
1
2/*
3 * Routines to indentify additional cpu features that are scattered in
4 * cpuid space.
5 */
6
7#include <linux/cpu.h>
8
9#include <asm/processor.h>
10
11struct cpuid_bit {
12 u16 feature;
13 u8 reg;
14 u8 bit;
15 u32 level;
16};
17
18enum cpuid_regs {
19 CR_EAX = 0,
20 CR_ECX,
21 CR_EDX,
22 CR_EBX
23};
24
25void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
26{
27 u32 max_level;
28 u32 regs[4];
29 const struct cpuid_bit *cb;
30
31 static const struct cpuid_bit cpuid_bits[] = {
32 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
33 { 0, 0, 0, 0 }
34 };
35
36 for (cb = cpuid_bits; cb->feature; cb++) {
37
38 /* Verify that the level is valid */
39 max_level = cpuid_eax(cb->level & 0xffff0000);
40 if (max_level < cb->level ||
41 max_level > (cb->level | 0xffff))
42 continue;
43
44 cpuid(cb->level, &regs[CR_EAX], &regs[CR_EBX],
45 &regs[CR_ECX], &regs[CR_EDX]);
46
47 if (regs[cb->reg] & (1 << cb->bit))
48 set_bit(cb->feature, c->x86_capability);
49 }
50}
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 6f47eeeb93ea..815a5f0aa474 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -272,8 +272,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
272 } 272 }
273#endif 273#endif
274 274
275 if (cpuid_eax(0x80000000) >= 0x80000006) 275 if (cpuid_eax(0x80000000) >= 0x80000006) {
276 num_cache_leaves = 3; 276 if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000))
277 num_cache_leaves = 4;
278 else
279 num_cache_leaves = 3;
280 }
277 281
278 if (amd_apic_timer_broken()) 282 if (amd_apic_timer_broken())
279 set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); 283 set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 794d593c47eb..d506201d397c 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -353,6 +353,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
353 if ( xlvl >= 0x80000004 ) 353 if ( xlvl >= 0x80000004 )
354 get_model_name(c); /* Default name */ 354 get_model_name(c); /* Default name */
355 } 355 }
356
357 init_scattered_cpuid_features(c);
356 } 358 }
357 359
358 early_intel_workaround(c); 360 early_intel_workaround(c);
@@ -604,7 +606,6 @@ extern int nsc_init_cpu(void);
604extern int amd_init_cpu(void); 606extern int amd_init_cpu(void);
605extern int centaur_init_cpu(void); 607extern int centaur_init_cpu(void);
606extern int transmeta_init_cpu(void); 608extern int transmeta_init_cpu(void);
607extern int rise_init_cpu(void);
608extern int nexgen_init_cpu(void); 609extern int nexgen_init_cpu(void);
609extern int umc_init_cpu(void); 610extern int umc_init_cpu(void);
610 611
@@ -616,7 +617,6 @@ void __init early_cpu_init(void)
616 amd_init_cpu(); 617 amd_init_cpu();
617 centaur_init_cpu(); 618 centaur_init_cpu();
618 transmeta_init_cpu(); 619 transmeta_init_cpu();
619 rise_init_cpu();
620 nexgen_init_cpu(); 620 nexgen_init_cpu();
621 umc_init_cpu(); 621 umc_init_cpu();
622 early_cpu_detect(); 622 early_cpu_detect();
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
index e912aae9473c..094118ba00da 100644
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -90,10 +90,17 @@ config X86_POWERNOW_K8
90 If in doubt, say N. 90 If in doubt, say N.
91 91
92config X86_POWERNOW_K8_ACPI 92config X86_POWERNOW_K8_ACPI
93 bool 93 bool "ACPI Support"
94 depends on X86_POWERNOW_K8 && ACPI_PROCESSOR 94 select ACPI_PROCESSOR
95 depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) 95 depends on X86_POWERNOW_K8
96 default y 96 default y
97 help
98 This provides access to the K8s Processor Performance States via ACPI.
99 This driver is probably required for CPUFreq to work with multi-socket and
100 SMP systems. It is not required on at least some single-socket yet
101 multi-core systems, even if SMP is enabled.
102
103 It is safe to say Y here.
97 104
98config X86_GX_SUSPMOD 105config X86_GX_SUSPMOD
99 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" 106 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
@@ -109,7 +116,7 @@ config X86_GX_SUSPMOD
109config X86_SPEEDSTEP_CENTRINO 116config X86_SPEEDSTEP_CENTRINO
110 tristate "Intel Enhanced SpeedStep" 117 tristate "Intel Enhanced SpeedStep"
111 select CPU_FREQ_TABLE 118 select CPU_FREQ_TABLE
112 select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI) 119 select X86_SPEEDSTEP_CENTRINO_TABLE
113 help 120 help
114 This adds the CPUFreq driver for Enhanced SpeedStep enabled 121 This adds the CPUFreq driver for Enhanced SpeedStep enabled
115 mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, 122 mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However,
@@ -121,20 +128,6 @@ config X86_SPEEDSTEP_CENTRINO
121 128
122 If in doubt, say N. 129 If in doubt, say N.
123 130
124config X86_SPEEDSTEP_CENTRINO_ACPI
125 bool "Use ACPI tables to decode valid frequency/voltage (deprecated)"
126 depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR
127 depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m)
128 help
129 This is deprecated and this functionality is now merged into
130 acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
131 speedstep_centrino.
132 Use primarily the information provided in the BIOS ACPI tables
133 to determine valid CPU frequency and voltage pairings. It is
134 required for the driver to work on non-Banias CPUs.
135
136 If in doubt, say Y.
137
138config X86_SPEEDSTEP_CENTRINO_TABLE 131config X86_SPEEDSTEP_CENTRINO_TABLE
139 bool "Built-in tables for Banias CPUs" 132 bool "Built-in tables for Banias CPUs"
140 depends on X86_SPEEDSTEP_CENTRINO 133 depends on X86_SPEEDSTEP_CENTRINO
@@ -230,7 +223,7 @@ comment "shared options"
230config X86_ACPI_CPUFREQ_PROC_INTF 223config X86_ACPI_CPUFREQ_PROC_INTF
231 bool "/proc/acpi/processor/../performance interface (deprecated)" 224 bool "/proc/acpi/processor/../performance interface (deprecated)"
232 depends on PROC_FS 225 depends on PROC_FS
233 depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI 226 depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
234 help 227 help
235 This enables the deprecated /proc/acpi/processor/../performance 228 This enables the deprecated /proc/acpi/processor/../performance
236 interface. While it is helpful for debugging, the generic, 229 interface. While it is helpful for debugging, the generic,
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 10baa3501ed3..6f846bee2103 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -167,11 +167,13 @@ static void do_drv_read(struct drv_cmd *cmd)
167 167
168static void do_drv_write(struct drv_cmd *cmd) 168static void do_drv_write(struct drv_cmd *cmd)
169{ 169{
170 u32 h = 0; 170 u32 lo, hi;
171 171
172 switch (cmd->type) { 172 switch (cmd->type) {
173 case SYSTEM_INTEL_MSR_CAPABLE: 173 case SYSTEM_INTEL_MSR_CAPABLE:
174 wrmsr(cmd->addr.msr.reg, cmd->val, h); 174 rdmsr(cmd->addr.msr.reg, lo, hi);
175 lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
176 wrmsr(cmd->addr.msr.reg, lo, hi);
175 break; 177 break;
176 case SYSTEM_IO_CAPABLE: 178 case SYSTEM_IO_CAPABLE:
177 acpi_os_write_port((acpi_io_address)cmd->addr.io.port, 179 acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
@@ -372,7 +374,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
372 struct cpufreq_freqs freqs; 374 struct cpufreq_freqs freqs;
373 cpumask_t online_policy_cpus; 375 cpumask_t online_policy_cpus;
374 struct drv_cmd cmd; 376 struct drv_cmd cmd;
375 unsigned int msr;
376 unsigned int next_state = 0; /* Index into freq_table */ 377 unsigned int next_state = 0; /* Index into freq_table */
377 unsigned int next_perf_state = 0; /* Index into perf table */ 378 unsigned int next_perf_state = 0; /* Index into perf table */
378 unsigned int i; 379 unsigned int i;
@@ -417,11 +418,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
417 case SYSTEM_INTEL_MSR_CAPABLE: 418 case SYSTEM_INTEL_MSR_CAPABLE:
418 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 419 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
419 cmd.addr.msr.reg = MSR_IA32_PERF_CTL; 420 cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
420 msr = 421 cmd.val = (u32) perf->states[next_perf_state].control;
421 (u32) perf->states[next_perf_state].
422 control & INTEL_MSR_RANGE;
423 cmd.val = get_cur_val(online_policy_cpus);
424 cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr;
425 break; 422 break;
426 case SYSTEM_IO_CAPABLE: 423 case SYSTEM_IO_CAPABLE:
427 cmd.type = SYSTEM_IO_CAPABLE; 424 cmd.type = SYSTEM_IO_CAPABLE;
@@ -668,8 +665,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
668 data->max_freq = perf->states[0].core_frequency * 1000; 665 data->max_freq = perf->states[0].core_frequency * 1000;
669 /* table init */ 666 /* table init */
670 for (i=0; i<perf->state_count; i++) { 667 for (i=0; i<perf->state_count; i++) {
671 if (i>0 && perf->states[i].core_frequency == 668 if (i>0 && perf->states[i].core_frequency >=
672 perf->states[i-1].core_frequency) 669 data->freq_table[valid_states-1].frequency / 1000)
673 continue; 670 continue;
674 671
675 data->freq_table[valid_states].index = i; 672 data->freq_table[valid_states].index = i;
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 0d49d73d1b71..66acd5039918 100644
--- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -391,8 +391,6 @@ static struct cpufreq_driver nforce2_driver = {
391 */ 391 */
392static unsigned int nforce2_detect_chipset(void) 392static unsigned int nforce2_detect_chipset(void)
393{ 393{
394 u8 revision;
395
396 nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 394 nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
397 PCI_DEVICE_ID_NVIDIA_NFORCE2, 395 PCI_DEVICE_ID_NVIDIA_NFORCE2,
398 PCI_ANY_ID, PCI_ANY_ID, NULL); 396 PCI_ANY_ID, PCI_ANY_ID, NULL);
@@ -400,10 +398,8 @@ static unsigned int nforce2_detect_chipset(void)
400 if (nforce2_chipset_dev == NULL) 398 if (nforce2_chipset_dev == NULL)
401 return -ENODEV; 399 return -ENODEV;
402 400
403 pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision);
404
405 printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", 401 printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
406 revision); 402 nforce2_chipset_dev->revision);
407 printk(KERN_INFO 403 printk(KERN_INFO
408 "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); 404 "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
409 405
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
index 6667e9cceb9f..194144539a6f 100644
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -115,7 +115,6 @@ struct gxfreq_params {
115 u8 pci_suscfg; 115 u8 pci_suscfg;
116 u8 pci_pmer1; 116 u8 pci_pmer1;
117 u8 pci_pmer2; 117 u8 pci_pmer2;
118 u8 pci_rev;
119 struct pci_dev *cs55x0; 118 struct pci_dev *cs55x0;
120}; 119};
121 120
@@ -276,7 +275,7 @@ static void gx_set_cpuspeed(unsigned int khz)
276 pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ 275 pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
277 pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); 276 pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
278 277
279 if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */ 278 if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */
280 suscfg = gx_params->pci_suscfg | SUSMOD; 279 suscfg = gx_params->pci_suscfg | SUSMOD;
281 } else { /* CS5530A,B.. */ 280 } else { /* CS5530A,B.. */
282 suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; 281 suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
@@ -471,7 +470,6 @@ static int __init cpufreq_gx_init(void)
471 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); 470 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
472 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); 471 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
473 pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); 472 pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
474 pci_read_config_byte(params->cs55x0, PCI_REVISION_ID, &params->pci_rev);
475 473
476 if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { 474 if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
477 kfree(params); 475 kfree(params);
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index a3df9c039bd4..ef8f0bc3fc71 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -29,6 +29,7 @@
29#include <linux/pci.h> 29#include <linux/pci.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/delay.h>
32 33
33#include <asm/msr.h> 34#include <asm/msr.h>
34#include <asm/timex.h> 35#include <asm/timex.h>
@@ -55,7 +56,6 @@
55/* Flags */ 56/* Flags */
56#define USE_ACPI_C3 (1 << 1) 57#define USE_ACPI_C3 (1 << 1)
57#define USE_NORTHBRIDGE (1 << 2) 58#define USE_NORTHBRIDGE (1 << 2)
58#define USE_VT8235 (1 << 3)
59 59
60static int cpu_model; 60static int cpu_model;
61static unsigned int numscales=16; 61static unsigned int numscales=16;
@@ -63,22 +63,19 @@ static unsigned int fsb;
63 63
64static const struct mV_pos *vrm_mV_table; 64static const struct mV_pos *vrm_mV_table;
65static const unsigned char *mV_vrm_table; 65static const unsigned char *mV_vrm_table;
66struct f_msr {
67 u8 vrm;
68 u8 pos;
69};
70static struct f_msr f_msr_table[32];
71 66
72static unsigned int highest_speed, lowest_speed; /* kHz */ 67static unsigned int highest_speed, lowest_speed; /* kHz */
73static unsigned int minmult, maxmult; 68static unsigned int minmult, maxmult;
74static int can_scale_voltage; 69static int can_scale_voltage;
75static struct acpi_processor *pr = NULL; 70static struct acpi_processor *pr = NULL;
76static struct acpi_processor_cx *cx = NULL; 71static struct acpi_processor_cx *cx = NULL;
72static u32 acpi_regs_addr;
77static u8 longhaul_flags; 73static u8 longhaul_flags;
78static u8 longhaul_pos; 74static unsigned int longhaul_index;
79 75
80/* Module parameters */ 76/* Module parameters */
81static int scale_voltage; 77static int scale_voltage;
78static int disable_acpi_c3;
82 79
83#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 80#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
84 81
@@ -144,7 +141,7 @@ static void do_longhaul1(unsigned int clock_ratio_index)
144 rdmsrl(MSR_VIA_BCR2, bcr2.val); 141 rdmsrl(MSR_VIA_BCR2, bcr2.val);
145 /* Enable software clock multiplier */ 142 /* Enable software clock multiplier */
146 bcr2.bits.ESOFTBF = 1; 143 bcr2.bits.ESOFTBF = 1;
147 bcr2.bits.CLOCKMUL = clock_ratio_index; 144 bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff;
148 145
149 /* Sync to timer tick */ 146 /* Sync to timer tick */
150 safe_halt(); 147 safe_halt();
@@ -163,14 +160,12 @@ static void do_longhaul1(unsigned int clock_ratio_index)
163 160
164/* For processor with Longhaul MSR */ 161/* For processor with Longhaul MSR */
165 162
166static void do_powersaver(int cx_address, unsigned int clock_ratio_index) 163static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
164 unsigned int dir)
167{ 165{
168 union msr_longhaul longhaul; 166 union msr_longhaul longhaul;
169 u8 dest_pos;
170 u32 t; 167 u32 t;
171 168
172 dest_pos = f_msr_table[clock_ratio_index].pos;
173
174 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); 169 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
175 /* Setup new frequency */ 170 /* Setup new frequency */
176 longhaul.bits.RevisionKey = longhaul.bits.RevisionID; 171 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
@@ -178,11 +173,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
178 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; 173 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
179 /* Setup new voltage */ 174 /* Setup new voltage */
180 if (can_scale_voltage) 175 if (can_scale_voltage)
181 longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; 176 longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f;
182 /* Sync to timer tick */ 177 /* Sync to timer tick */
183 safe_halt(); 178 safe_halt();
184 /* Raise voltage if necessary */ 179 /* Raise voltage if necessary */
185 if (can_scale_voltage && longhaul_pos < dest_pos) { 180 if (can_scale_voltage && dir) {
186 longhaul.bits.EnableSoftVID = 1; 181 longhaul.bits.EnableSoftVID = 1;
187 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 182 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
188 /* Change voltage */ 183 /* Change voltage */
@@ -199,7 +194,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
199 } 194 }
200 longhaul.bits.EnableSoftVID = 0; 195 longhaul.bits.EnableSoftVID = 0;
201 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 196 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
202 longhaul_pos = dest_pos;
203 } 197 }
204 198
205 /* Change frequency on next halt or sleep */ 199 /* Change frequency on next halt or sleep */
@@ -220,7 +214,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
220 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 214 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
221 215
222 /* Reduce voltage if necessary */ 216 /* Reduce voltage if necessary */
223 if (can_scale_voltage && longhaul_pos > dest_pos) { 217 if (can_scale_voltage && !dir) {
224 longhaul.bits.EnableSoftVID = 1; 218 longhaul.bits.EnableSoftVID = 1;
225 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 219 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
226 /* Change voltage */ 220 /* Change voltage */
@@ -237,7 +231,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
237 } 231 }
238 longhaul.bits.EnableSoftVID = 0; 232 longhaul.bits.EnableSoftVID = 0;
239 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 233 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
240 longhaul_pos = dest_pos;
241 } 234 }
242} 235}
243 236
@@ -248,25 +241,28 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
248 * Sets a new clock ratio. 241 * Sets a new clock ratio.
249 */ 242 */
250 243
251static void longhaul_setstate(unsigned int clock_ratio_index) 244static void longhaul_setstate(unsigned int table_index)
252{ 245{
246 unsigned int clock_ratio_index;
253 int speed, mult; 247 int speed, mult;
254 struct cpufreq_freqs freqs; 248 struct cpufreq_freqs freqs;
255 static unsigned int old_ratio=-1;
256 unsigned long flags; 249 unsigned long flags;
257 unsigned int pic1_mask, pic2_mask; 250 unsigned int pic1_mask, pic2_mask;
251 u16 bm_status = 0;
252 u32 bm_timeout = 1000;
253 unsigned int dir = 0;
258 254
259 if (old_ratio == clock_ratio_index) 255 clock_ratio_index = longhaul_table[table_index].index;
260 return; 256 /* Safety precautions */
261 old_ratio = clock_ratio_index; 257 mult = clock_ratio[clock_ratio_index & 0x1f];
262
263 mult = clock_ratio[clock_ratio_index];
264 if (mult == -1) 258 if (mult == -1)
265 return; 259 return;
266
267 speed = calc_speed(mult); 260 speed = calc_speed(mult);
268 if ((speed > highest_speed) || (speed < lowest_speed)) 261 if ((speed > highest_speed) || (speed < lowest_speed))
269 return; 262 return;
263 /* Voltage transition before frequency transition? */
264 if (can_scale_voltage && longhaul_index < table_index)
265 dir = 1;
270 266
271 freqs.old = calc_speed(longhaul_get_cpu_mult()); 267 freqs.old = calc_speed(longhaul_get_cpu_mult());
272 freqs.new = speed; 268 freqs.new = speed;
@@ -285,11 +281,24 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
285 outb(0xFF,0xA1); /* Overkill */ 281 outb(0xFF,0xA1); /* Overkill */
286 outb(0xFE,0x21); /* TMR0 only */ 282 outb(0xFE,0x21); /* TMR0 only */
287 283
284 /* Wait while PCI bus is busy. */
285 if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
286 || ((pr != NULL) && pr->flags.bm_control))) {
287 bm_status = inw(acpi_regs_addr);
288 bm_status &= 1 << 4;
289 while (bm_status && bm_timeout) {
290 outw(1 << 4, acpi_regs_addr);
291 bm_timeout--;
292 bm_status = inw(acpi_regs_addr);
293 bm_status &= 1 << 4;
294 }
295 }
296
288 if (longhaul_flags & USE_NORTHBRIDGE) { 297 if (longhaul_flags & USE_NORTHBRIDGE) {
289 /* Disable AGP and PCI arbiters */ 298 /* Disable AGP and PCI arbiters */
290 outb(3, 0x22); 299 outb(3, 0x22);
291 } else if ((pr != NULL) && pr->flags.bm_control) { 300 } else if ((pr != NULL) && pr->flags.bm_control) {
292 /* Disable bus master arbitration */ 301 /* Disable bus master arbitration */
293 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 302 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
294 } 303 }
295 switch (longhaul_version) { 304 switch (longhaul_version) {
@@ -314,9 +323,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
314 if (longhaul_flags & USE_ACPI_C3) { 323 if (longhaul_flags & USE_ACPI_C3) {
315 /* Don't allow wakeup */ 324 /* Don't allow wakeup */
316 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 325 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
317 do_powersaver(cx->address, clock_ratio_index); 326 do_powersaver(cx->address, clock_ratio_index, dir);
318 } else { 327 } else {
319 do_powersaver(0, clock_ratio_index); 328 do_powersaver(0, clock_ratio_index, dir);
320 } 329 }
321 break; 330 break;
322 } 331 }
@@ -336,6 +345,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
336 345
337 freqs.new = calc_speed(longhaul_get_cpu_mult()); 346 freqs.new = calc_speed(longhaul_get_cpu_mult());
338 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 347 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
348
349 if (!bm_timeout)
350 printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n");
339} 351}
340 352
341/* 353/*
@@ -369,7 +381,8 @@ static int guess_fsb(int mult)
369 381
370static int __init longhaul_get_ranges(void) 382static int __init longhaul_get_ranges(void)
371{ 383{
372 unsigned int j, k = 0; 384 unsigned int i, j, k = 0;
385 unsigned int ratio;
373 int mult; 386 int mult;
374 387
375 /* Get current frequency */ 388 /* Get current frequency */
@@ -423,8 +436,7 @@ static int __init longhaul_get_ranges(void)
423 if(!longhaul_table) 436 if(!longhaul_table)
424 return -ENOMEM; 437 return -ENOMEM;
425 438
426 for (j=0; j < numscales; j++) { 439 for (j = 0; j < numscales; j++) {
427 unsigned int ratio;
428 ratio = clock_ratio[j]; 440 ratio = clock_ratio[j];
429 if (ratio == -1) 441 if (ratio == -1)
430 continue; 442 continue;
@@ -434,13 +446,41 @@ static int __init longhaul_get_ranges(void)
434 longhaul_table[k].index = j; 446 longhaul_table[k].index = j;
435 k++; 447 k++;
436 } 448 }
449 if (k <= 1) {
450 kfree(longhaul_table);
451 return -ENODEV;
452 }
453 /* Sort */
454 for (j = 0; j < k - 1; j++) {
455 unsigned int min_f, min_i;
456 min_f = longhaul_table[j].frequency;
457 min_i = j;
458 for (i = j + 1; i < k; i++) {
459 if (longhaul_table[i].frequency < min_f) {
460 min_f = longhaul_table[i].frequency;
461 min_i = i;
462 }
463 }
464 if (min_i != j) {
465 unsigned int temp;
466 temp = longhaul_table[j].frequency;
467 longhaul_table[j].frequency = longhaul_table[min_i].frequency;
468 longhaul_table[min_i].frequency = temp;
469 temp = longhaul_table[j].index;
470 longhaul_table[j].index = longhaul_table[min_i].index;
471 longhaul_table[min_i].index = temp;
472 }
473 }
437 474
438 longhaul_table[k].frequency = CPUFREQ_TABLE_END; 475 longhaul_table[k].frequency = CPUFREQ_TABLE_END;
439 if (!k) {
440 kfree (longhaul_table);
441 return -EINVAL;
442 }
443 476
477 /* Find index we are running on */
478 for (j = 0; j < k; j++) {
479 if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) {
480 longhaul_index = j;
481 break;
482 }
483 }
444 return 0; 484 return 0;
445} 485}
446 486
@@ -448,7 +488,7 @@ static int __init longhaul_get_ranges(void)
448static void __init longhaul_setup_voltagescaling(void) 488static void __init longhaul_setup_voltagescaling(void)
449{ 489{
450 union msr_longhaul longhaul; 490 union msr_longhaul longhaul;
451 struct mV_pos minvid, maxvid; 491 struct mV_pos minvid, maxvid, vid;
452 unsigned int j, speed, pos, kHz_step, numvscales; 492 unsigned int j, speed, pos, kHz_step, numvscales;
453 int min_vid_speed; 493 int min_vid_speed;
454 494
@@ -459,11 +499,11 @@ static void __init longhaul_setup_voltagescaling(void)
459 } 499 }
460 500
461 if (!longhaul.bits.VRMRev) { 501 if (!longhaul.bits.VRMRev) {
462 printk (KERN_INFO PFX "VRM 8.5\n"); 502 printk(KERN_INFO PFX "VRM 8.5\n");
463 vrm_mV_table = &vrm85_mV[0]; 503 vrm_mV_table = &vrm85_mV[0];
464 mV_vrm_table = &mV_vrm85[0]; 504 mV_vrm_table = &mV_vrm85[0];
465 } else { 505 } else {
466 printk (KERN_INFO PFX "Mobile VRM\n"); 506 printk(KERN_INFO PFX "Mobile VRM\n");
467 if (cpu_model < CPU_NEHEMIAH) 507 if (cpu_model < CPU_NEHEMIAH)
468 return; 508 return;
469 vrm_mV_table = &mobilevrm_mV[0]; 509 vrm_mV_table = &mobilevrm_mV[0];
@@ -523,7 +563,6 @@ static void __init longhaul_setup_voltagescaling(void)
523 /* Calculate kHz for one voltage step */ 563 /* Calculate kHz for one voltage step */
524 kHz_step = (highest_speed - min_vid_speed) / numvscales; 564 kHz_step = (highest_speed - min_vid_speed) / numvscales;
525 565
526
527 j = 0; 566 j = 0;
528 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { 567 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
529 speed = longhaul_table[j].frequency; 568 speed = longhaul_table[j].frequency;
@@ -531,15 +570,14 @@ static void __init longhaul_setup_voltagescaling(void)
531 pos = (speed - min_vid_speed) / kHz_step + minvid.pos; 570 pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
532 else 571 else
533 pos = minvid.pos; 572 pos = minvid.pos;
534 f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; 573 longhaul_table[j].index |= mV_vrm_table[pos] << 8;
535 f_msr_table[longhaul_table[j].index].pos = pos; 574 vid = vrm_mV_table[mV_vrm_table[pos]];
575 printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV);
536 j++; 576 j++;
537 } 577 }
538 578
539 longhaul_pos = maxvid.pos;
540 can_scale_voltage = 1; 579 can_scale_voltage = 1;
541 printk(KERN_INFO PFX "Voltage scaling enabled. " 580 printk(KERN_INFO PFX "Voltage scaling enabled.\n");
542 "Use of \"conservative\" governor is highly recommended.\n");
543} 581}
544 582
545 583
@@ -553,15 +591,44 @@ static int longhaul_target(struct cpufreq_policy *policy,
553 unsigned int target_freq, unsigned int relation) 591 unsigned int target_freq, unsigned int relation)
554{ 592{
555 unsigned int table_index = 0; 593 unsigned int table_index = 0;
556 unsigned int new_clock_ratio = 0; 594 unsigned int i;
595 unsigned int dir = 0;
596 u8 vid, current_vid;
557 597
558 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) 598 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
559 return -EINVAL; 599 return -EINVAL;
560 600
561 new_clock_ratio = longhaul_table[table_index].index & 0xFF; 601 /* Don't set same frequency again */
562 602 if (longhaul_index == table_index)
563 longhaul_setstate(new_clock_ratio); 603 return 0;
564 604
605 if (!can_scale_voltage)
606 longhaul_setstate(table_index);
607 else {
608 /* On test system voltage transitions exceeding single
609 * step up or down were turning motherboard off. Both
610 * "ondemand" and "userspace" are unsafe. C7 is doing
611 * this in hardware, C3 is old and we need to do this
612 * in software. */
613 i = longhaul_index;
614 current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f;
615 if (table_index > longhaul_index)
616 dir = 1;
617 while (i != table_index) {
618 vid = (longhaul_table[i].index >> 8) & 0x1f;
619 if (vid != current_vid) {
620 longhaul_setstate(i);
621 current_vid = vid;
622 msleep(200);
623 }
624 if (dir)
625 i++;
626 else
627 i--;
628 }
629 longhaul_setstate(table_index);
630 }
631 longhaul_index = table_index;
565 return 0; 632 return 0;
566} 633}
567 634
@@ -590,11 +657,10 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
590static int enable_arbiter_disable(void) 657static int enable_arbiter_disable(void)
591{ 658{
592 struct pci_dev *dev; 659 struct pci_dev *dev;
593 int status; 660 int status = 1;
594 int reg; 661 int reg;
595 u8 pci_cmd; 662 u8 pci_cmd;
596 663
597 status = 1;
598 /* Find PLE133 host bridge */ 664 /* Find PLE133 host bridge */
599 reg = 0x78; 665 reg = 0x78;
600 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, 666 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
@@ -627,13 +693,17 @@ static int enable_arbiter_disable(void)
627 return 0; 693 return 0;
628} 694}
629 695
630static int longhaul_setup_vt8235(void) 696static int longhaul_setup_southbridge(void)
631{ 697{
632 struct pci_dev *dev; 698 struct pci_dev *dev;
633 u8 pci_cmd; 699 u8 pci_cmd;
634 700
635 /* Find VT8235 southbridge */ 701 /* Find VT8235 southbridge */
636 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); 702 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
703 if (dev == NULL)
704 /* Find VT8237 southbridge */
705 dev = pci_get_device(PCI_VENDOR_ID_VIA,
706 PCI_DEVICE_ID_VIA_8237, NULL);
637 if (dev != NULL) { 707 if (dev != NULL) {
638 /* Set transition time to max */ 708 /* Set transition time to max */
639 pci_read_config_byte(dev, 0xec, &pci_cmd); 709 pci_read_config_byte(dev, 0xec, &pci_cmd);
@@ -645,6 +715,14 @@ static int longhaul_setup_vt8235(void)
645 pci_read_config_byte(dev, 0xe5, &pci_cmd); 715 pci_read_config_byte(dev, 0xe5, &pci_cmd);
646 pci_cmd |= 1 << 7; 716 pci_cmd |= 1 << 7;
647 pci_write_config_byte(dev, 0xe5, pci_cmd); 717 pci_write_config_byte(dev, 0xe5, pci_cmd);
718 /* Get address of ACPI registers block*/
719 pci_read_config_byte(dev, 0x81, &pci_cmd);
720 if (pci_cmd & 1 << 7) {
721 pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
722 acpi_regs_addr &= 0xff00;
723 printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
724 }
725
648 pci_dev_put(dev); 726 pci_dev_put(dev);
649 return 1; 727 return 1;
650 } 728 }
@@ -657,7 +735,6 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
657 char *cpuname=NULL; 735 char *cpuname=NULL;
658 int ret; 736 int ret;
659 u32 lo, hi; 737 u32 lo, hi;
660 int vt8235_present;
661 738
662 /* Check what we have on this motherboard */ 739 /* Check what we have on this motherboard */
663 switch (c->x86_model) { 740 switch (c->x86_model) {
@@ -755,7 +832,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
755 }; 832 };
756 833
757 /* Doesn't hurt */ 834 /* Doesn't hurt */
758 vt8235_present = longhaul_setup_vt8235(); 835 longhaul_setup_southbridge();
759 836
760 /* Find ACPI data for processor */ 837 /* Find ACPI data for processor */
761 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, 838 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
@@ -765,35 +842,29 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
765 /* Check ACPI support for C3 state */ 842 /* Check ACPI support for C3 state */
766 if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { 843 if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
767 cx = &pr->power.states[ACPI_STATE_C3]; 844 cx = &pr->power.states[ACPI_STATE_C3];
768 if (cx->address > 0 && cx->latency <= 1000) { 845 if (cx->address > 0 && cx->latency <= 1000)
769 longhaul_flags |= USE_ACPI_C3; 846 longhaul_flags |= USE_ACPI_C3;
770 goto print_support_type;
771 }
772 } 847 }
848 /* Disable if it isn't working */
849 if (disable_acpi_c3)
850 longhaul_flags &= ~USE_ACPI_C3;
773 /* Check if northbridge is friendly */ 851 /* Check if northbridge is friendly */
774 if (enable_arbiter_disable()) { 852 if (enable_arbiter_disable())
775 longhaul_flags |= USE_NORTHBRIDGE; 853 longhaul_flags |= USE_NORTHBRIDGE;
776 goto print_support_type; 854
777 }
778 /* Use VT8235 southbridge if present */
779 if (longhaul_version == TYPE_POWERSAVER && vt8235_present) {
780 longhaul_flags |= USE_VT8235;
781 goto print_support_type;
782 }
783 /* Check ACPI support for bus master arbiter disable */ 855 /* Check ACPI support for bus master arbiter disable */
784 if ((pr == NULL) || !(pr->flags.bm_control)) { 856 if (!(longhaul_flags & USE_ACPI_C3
857 || longhaul_flags & USE_NORTHBRIDGE)
858 && ((pr == NULL) || !(pr->flags.bm_control))) {
785 printk(KERN_ERR PFX 859 printk(KERN_ERR PFX
786 "No ACPI support. Unsupported northbridge.\n"); 860 "No ACPI support. Unsupported northbridge.\n");
787 return -ENODEV; 861 return -ENODEV;
788 } 862 }
789 863
790print_support_type:
791 if (longhaul_flags & USE_NORTHBRIDGE) 864 if (longhaul_flags & USE_NORTHBRIDGE)
792 printk (KERN_INFO PFX "Using northbridge support.\n"); 865 printk(KERN_INFO PFX "Using northbridge support.\n");
793 else if (longhaul_flags & USE_VT8235) 866 if (longhaul_flags & USE_ACPI_C3)
794 printk (KERN_INFO PFX "Using VT8235 support.\n"); 867 printk(KERN_INFO PFX "Using ACPI support.\n");
795 else
796 printk (KERN_INFO PFX "Using ACPI support.\n");
797 868
798 ret = longhaul_get_ranges(); 869 ret = longhaul_get_ranges();
799 if (ret != 0) 870 if (ret != 0)
@@ -885,6 +956,9 @@ static void __exit longhaul_exit(void)
885 kfree(longhaul_table); 956 kfree(longhaul_table);
886} 957}
887 958
959module_param (disable_acpi_c3, int, 0644);
960MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
961
888module_param (scale_voltage, int, 0644); 962module_param (scale_voltage, int, 0644);
889MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); 963MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
890 964
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index 102548f12842..4fcc320997df 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -180,7 +180,7 @@ static const int __initdata ezrat_clock_ratio[32] = {
180 180
181 -1, /* 0000 -> RESERVED (10.0x) */ 181 -1, /* 0000 -> RESERVED (10.0x) */
182 110, /* 0001 -> 11.0x */ 182 110, /* 0001 -> 11.0x */
183 120, /* 0010 -> 12.0x */ 183 -1, /* 0010 -> 12.0x */
184 -1, /* 0011 -> RESERVED (9.0x)*/ 184 -1, /* 0011 -> RESERVED (9.0x)*/
185 105, /* 0100 -> 10.5x */ 185 105, /* 0100 -> 10.5x */
186 115, /* 0101 -> 11.5x */ 186 115, /* 0101 -> 11.5x */
@@ -237,7 +237,7 @@ static const int __initdata ezrat_eblcr[32] = {
237 237
238static const int __initdata nehemiah_clock_ratio[32] = { 238static const int __initdata nehemiah_clock_ratio[32] = {
239 100, /* 0000 -> 10.0x */ 239 100, /* 0000 -> 10.0x */
240 160, /* 0001 -> 16.0x */ 240 -1, /* 0001 -> 16.0x */
241 40, /* 0010 -> 4.0x */ 241 40, /* 0010 -> 4.0x */
242 90, /* 0011 -> 9.0x */ 242 90, /* 0011 -> 9.0x */
243 95, /* 0100 -> 9.5x */ 243 95, /* 0100 -> 9.5x */
@@ -252,10 +252,10 @@ static const int __initdata nehemiah_clock_ratio[32] = {
252 75, /* 1101 -> 7.5x */ 252 75, /* 1101 -> 7.5x */
253 85, /* 1110 -> 8.5x */ 253 85, /* 1110 -> 8.5x */
254 120, /* 1111 -> 12.0x */ 254 120, /* 1111 -> 12.0x */
255 100, /* 0000 -> 10.0x */ 255 -1, /* 0000 -> 10.0x */
256 110, /* 0001 -> 11.0x */ 256 110, /* 0001 -> 11.0x */
257 120, /* 0010 -> 12.0x */ 257 -1, /* 0010 -> 12.0x */
258 90, /* 0011 -> 9.0x */ 258 -1, /* 0011 -> 9.0x */
259 105, /* 0100 -> 10.5x */ 259 105, /* 0100 -> 10.5x */
260 115, /* 0101 -> 11.5x */ 260 115, /* 0101 -> 11.5x */
261 125, /* 0110 -> 12.5x */ 261 125, /* 0110 -> 12.5x */
@@ -267,7 +267,7 @@ static const int __initdata nehemiah_clock_ratio[32] = {
267 145, /* 1100 -> 14.5x */ 267 145, /* 1100 -> 14.5x */
268 155, /* 1101 -> 15.5x */ 268 155, /* 1101 -> 15.5x */
269 -1, /* 1110 -> RESERVED (13.0x) */ 269 -1, /* 1110 -> RESERVED (13.0x) */
270 120, /* 1111 -> 12.0x */ 270 -1, /* 1111 -> 12.0x */
271}; 271};
272 272
273static const int __initdata nehemiah_eblcr[32] = { 273static const int __initdata nehemiah_eblcr[32] = {
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 4ade55c5f333..34ed53a06730 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -599,14 +599,17 @@ static void print_basics(struct powernow_k8_data *data)
599 for (j = 0; j < data->numps; j++) { 599 for (j = 0; j < data->numps; j++) {
600 if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { 600 if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
601 if (cpu_family == CPU_HW_PSTATE) { 601 if (cpu_family == CPU_HW_PSTATE) {
602 printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8, 602 printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n",
603 (data->powernow_table[j].index & 0xff0000) >> 16, 603 j,
604 data->powernow_table[j].frequency/1000); 604 (data->powernow_table[j].index & 0xff00) >> 8,
605 (data->powernow_table[j].index & 0xff0000) >> 16,
606 data->powernow_table[j].frequency/1000);
605 } else { 607 } else {
606 printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, 608 printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n",
607 data->powernow_table[j].index & 0xff, 609 j,
608 data->powernow_table[j].frequency/1000, 610 data->powernow_table[j].index & 0xff,
609 data->powernow_table[j].index >> 8); 611 data->powernow_table[j].frequency/1000,
612 data->powernow_table[j].index >> 8);
610 } 613 }
611 } 614 }
612 } 615 }
@@ -1086,7 +1089,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1086 1089
1087 if (cpu_family == CPU_HW_PSTATE) 1090 if (cpu_family == CPU_HW_PSTATE)
1088 dprintk("targ: curr fid 0x%x, did 0x%x\n", 1091 dprintk("targ: curr fid 0x%x, did 0x%x\n",
1089 data->currfid, data->currvid); 1092 data->currfid, data->currdid);
1090 else { 1093 else {
1091 dprintk("targ: curr fid 0x%x, vid 0x%x\n", 1094 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
1092 data->currfid, data->currvid); 1095 data->currfid, data->currvid);
@@ -1322,16 +1325,22 @@ static struct cpufreq_driver cpufreq_amd64_driver = {
1322static int __cpuinit powernowk8_init(void) 1325static int __cpuinit powernowk8_init(void)
1323{ 1326{
1324 unsigned int i, supported_cpus = 0; 1327 unsigned int i, supported_cpus = 0;
1328 unsigned int booted_cores = 1;
1325 1329
1326 for_each_online_cpu(i) { 1330 for_each_online_cpu(i) {
1327 if (check_supported_cpu(i)) 1331 if (check_supported_cpu(i))
1328 supported_cpus++; 1332 supported_cpus++;
1329 } 1333 }
1330 1334
1335#ifdef CONFIG_SMP
1336 booted_cores = cpu_data[0].booted_cores;
1337#endif
1338
1331 if (supported_cpus == num_online_cpus()) { 1339 if (supported_cpus == num_online_cpus()) {
1332 printk(KERN_INFO PFX "Found %d %s " 1340 printk(KERN_INFO PFX "Found %d %s "
1333 "processors (" VERSION ")\n", supported_cpus, 1341 "processors (%d cpu cores) (" VERSION ")\n",
1334 boot_cpu_data.x86_model_id); 1342 supported_cpus/booted_cores,
1343 boot_cpu_data.x86_model_id, supported_cpus);
1335 return cpufreq_register_driver(&cpufreq_amd64_driver); 1344 return cpufreq_register_driver(&cpufreq_amd64_driver);
1336 } 1345 }
1337 1346
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 35489fd68852..6c5dc2c85aeb 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -21,12 +21,6 @@
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/compiler.h> 22#include <linux/compiler.h>
23 23
24#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
25#include <linux/acpi.h>
26#include <linux/dmi.h>
27#include <acpi/processor.h>
28#endif
29
30#include <asm/msr.h> 24#include <asm/msr.h>
31#include <asm/processor.h> 25#include <asm/processor.h>
32#include <asm/cpufeature.h> 26#include <asm/cpufeature.h>
@@ -257,9 +251,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
257 /* Matched a non-match */ 251 /* Matched a non-match */
258 dprintk("no table support for CPU model \"%s\"\n", 252 dprintk("no table support for CPU model \"%s\"\n",
259 cpu->x86_model_id); 253 cpu->x86_model_id);
260#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI 254 dprintk("try using the acpi-cpufreq driver\n");
261 dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
262#endif
263 return -ENOENT; 255 return -ENOENT;
264 } 256 }
265 257
@@ -346,213 +338,6 @@ static unsigned int get_cur_freq(unsigned int cpu)
346} 338}
347 339
348 340
349#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
350
351static struct acpi_processor_performance *acpi_perf_data[NR_CPUS];
352
353/*
354 * centrino_cpu_early_init_acpi - Do the preregistering with ACPI P-States
355 * library
356 *
357 * Before doing the actual init, we need to do _PSD related setup whenever
358 * supported by the BIOS. These are handled by this early_init routine.
359 */
360static int centrino_cpu_early_init_acpi(void)
361{
362 unsigned int i, j;
363 struct acpi_processor_performance *data;
364
365 for_each_possible_cpu(i) {
366 data = kzalloc(sizeof(struct acpi_processor_performance),
367 GFP_KERNEL);
368 if (!data) {
369 for_each_possible_cpu(j) {
370 kfree(acpi_perf_data[j]);
371 acpi_perf_data[j] = NULL;
372 }
373 return (-ENOMEM);
374 }
375 acpi_perf_data[i] = data;
376 }
377
378 acpi_processor_preregister_performance(acpi_perf_data);
379 return 0;
380}
381
382
383#ifdef CONFIG_SMP
384/*
385 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
386 * or do it in BIOS firmware and won't inform about it to OS. If not
387 * detected, this has a side effect of making CPU run at a different speed
388 * than OS intended it to run at. Detect it and handle it cleanly.
389 */
390static int bios_with_sw_any_bug;
391static int sw_any_bug_found(struct dmi_system_id *d)
392{
393 bios_with_sw_any_bug = 1;
394 return 0;
395}
396
397static struct dmi_system_id sw_any_bug_dmi_table[] = {
398 {
399 .callback = sw_any_bug_found,
400 .ident = "Supermicro Server X6DLP",
401 .matches = {
402 DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
403 DMI_MATCH(DMI_BIOS_VERSION, "080010"),
404 DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
405 },
406 },
407 { }
408};
409#endif
410
411/*
412 * centrino_cpu_init_acpi - register with ACPI P-States library
413 *
414 * Register with the ACPI P-States library (part of drivers/acpi/processor.c)
415 * in order to determine correct frequency and voltage pairings by reading
416 * the _PSS of the ACPI DSDT or SSDT tables.
417 */
418static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
419{
420 unsigned long cur_freq;
421 int result = 0, i;
422 unsigned int cpu = policy->cpu;
423 struct acpi_processor_performance *p;
424
425 p = acpi_perf_data[cpu];
426
427 /* register with ACPI core */
428 if (acpi_processor_register_performance(p, cpu)) {
429 dprintk(PFX "obtaining ACPI data failed\n");
430 return -EIO;
431 }
432
433 policy->shared_type = p->shared_type;
434 /*
435 * Will let policy->cpus know about dependency only when software
436 * coordination is required.
437 */
438 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
439 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
440 policy->cpus = p->shared_cpu_map;
441 }
442
443#ifdef CONFIG_SMP
444 dmi_check_system(sw_any_bug_dmi_table);
445 if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
446 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
447 policy->cpus = cpu_core_map[cpu];
448 }
449#endif
450
451 /* verify the acpi_data */
452 if (p->state_count <= 1) {
453 dprintk("No P-States\n");
454 result = -ENODEV;
455 goto err_unreg;
456 }
457
458 if ((p->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
459 (p->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
460 dprintk("Invalid control/status registers (%x - %x)\n",
461 p->control_register.space_id, p->status_register.space_id);
462 result = -EIO;
463 goto err_unreg;
464 }
465
466 for (i=0; i<p->state_count; i++) {
467 if ((p->states[i].control & INTEL_MSR_RANGE) !=
468 (p->states[i].status & INTEL_MSR_RANGE)) {
469 dprintk("Different MSR bits in control (%llu) and status (%llu)\n",
470 p->states[i].control, p->states[i].status);
471 result = -EINVAL;
472 goto err_unreg;
473 }
474
475 if (!p->states[i].core_frequency) {
476 dprintk("Zero core frequency for state %u\n", i);
477 result = -EINVAL;
478 goto err_unreg;
479 }
480
481 if (p->states[i].core_frequency > p->states[0].core_frequency) {
482 dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i,
483 p->states[i].core_frequency, p->states[0].core_frequency);
484 p->states[i].core_frequency = 0;
485 continue;
486 }
487 }
488
489 centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL);
490 if (!centrino_model[cpu]) {
491 result = -ENOMEM;
492 goto err_unreg;
493 }
494
495 centrino_model[cpu]->model_name=NULL;
496 centrino_model[cpu]->max_freq = p->states[0].core_frequency * 1000;
497 centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) *
498 (p->state_count + 1), GFP_KERNEL);
499 if (!centrino_model[cpu]->op_points) {
500 result = -ENOMEM;
501 goto err_kfree;
502 }
503
504 for (i=0; i<p->state_count; i++) {
505 centrino_model[cpu]->op_points[i].index = p->states[i].control & INTEL_MSR_RANGE;
506 centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000;
507 dprintk("adding state %i with frequency %u and control value %04x\n",
508 i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index);
509 }
510 centrino_model[cpu]->op_points[p->state_count].frequency = CPUFREQ_TABLE_END;
511
512 cur_freq = get_cur_freq(cpu);
513
514 for (i=0; i<p->state_count; i++) {
515 if (!p->states[i].core_frequency) {
516 dprintk("skipping state %u\n", i);
517 centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID;
518 continue;
519 }
520
521 if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) !=
522 (centrino_model[cpu]->op_points[i].frequency)) {
523 dprintk("Invalid encoded frequency (%u vs. %u)\n",
524 extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0),
525 centrino_model[cpu]->op_points[i].frequency);
526 result = -EINVAL;
527 goto err_kfree_all;
528 }
529
530 if (cur_freq == centrino_model[cpu]->op_points[i].frequency)
531 p->state = i;
532 }
533
534 /* notify BIOS that we exist */
535 acpi_processor_notify_smm(THIS_MODULE);
536 printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI "
537 "config is deprecated.\n "
538 "Use X86_ACPI_CPUFREQ (acpi-cpufreq) instead.\n" );
539
540 return 0;
541
542 err_kfree_all:
543 kfree(centrino_model[cpu]->op_points);
544 err_kfree:
545 kfree(centrino_model[cpu]);
546 err_unreg:
547 acpi_processor_unregister_performance(p, cpu);
548 dprintk(PFX "invalid ACPI data\n");
549 return (result);
550}
551#else
552static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; }
553static inline int centrino_cpu_early_init_acpi(void) { return 0; }
554#endif
555
556static int centrino_cpu_init(struct cpufreq_policy *policy) 341static int centrino_cpu_init(struct cpufreq_policy *policy)
557{ 342{
558 struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; 343 struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
@@ -568,27 +353,25 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
568 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) 353 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
569 centrino_driver.flags |= CPUFREQ_CONST_LOOPS; 354 centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
570 355
571 if (centrino_cpu_init_acpi(policy)) { 356 if (policy->cpu != 0)
572 if (policy->cpu != 0) 357 return -ENODEV;
573 return -ENODEV;
574 358
575 for (i = 0; i < N_IDS; i++) 359 for (i = 0; i < N_IDS; i++)
576 if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) 360 if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
577 break; 361 break;
578 362
579 if (i != N_IDS) 363 if (i != N_IDS)
580 centrino_cpu[policy->cpu] = &cpu_ids[i]; 364 centrino_cpu[policy->cpu] = &cpu_ids[i];
581 365
582 if (!centrino_cpu[policy->cpu]) { 366 if (!centrino_cpu[policy->cpu]) {
583 dprintk("found unsupported CPU with " 367 dprintk("found unsupported CPU with "
584 "Enhanced SpeedStep: send /proc/cpuinfo to " 368 "Enhanced SpeedStep: send /proc/cpuinfo to "
585 MAINTAINER "\n"); 369 MAINTAINER "\n");
586 return -ENODEV; 370 return -ENODEV;
587 } 371 }
588 372
589 if (centrino_cpu_init_table(policy)) { 373 if (centrino_cpu_init_table(policy)) {
590 return -ENODEV; 374 return -ENODEV;
591 }
592 } 375 }
593 376
594 /* Check to see if Enhanced SpeedStep is enabled, and try to 377 /* Check to see if Enhanced SpeedStep is enabled, and try to
@@ -634,20 +417,6 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
634 417
635 cpufreq_frequency_table_put_attr(cpu); 418 cpufreq_frequency_table_put_attr(cpu);
636 419
637#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
638 if (!centrino_model[cpu]->model_name) {
639 static struct acpi_processor_performance *p;
640
641 if (acpi_perf_data[cpu]) {
642 p = acpi_perf_data[cpu];
643 dprintk("unregistering and freeing ACPI data\n");
644 acpi_processor_unregister_performance(p, cpu);
645 kfree(centrino_model[cpu]->op_points);
646 kfree(centrino_model[cpu]);
647 }
648 }
649#endif
650
651 centrino_model[cpu] = NULL; 420 centrino_model[cpu] = NULL;
652 421
653 return 0; 422 return 0;
@@ -849,25 +618,12 @@ static int __init centrino_init(void)
849 if (!cpu_has(cpu, X86_FEATURE_EST)) 618 if (!cpu_has(cpu, X86_FEATURE_EST))
850 return -ENODEV; 619 return -ENODEV;
851 620
852 centrino_cpu_early_init_acpi();
853
854 return cpufreq_register_driver(&centrino_driver); 621 return cpufreq_register_driver(&centrino_driver);
855} 622}
856 623
857static void __exit centrino_exit(void) 624static void __exit centrino_exit(void)
858{ 625{
859#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
860 unsigned int j;
861#endif
862
863 cpufreq_unregister_driver(&centrino_driver); 626 cpufreq_unregister_driver(&centrino_driver);
864
865#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
866 for_each_possible_cpu(j) {
867 kfree(acpi_perf_data[j]);
868 acpi_perf_data[j] = NULL;
869 }
870#endif
871} 627}
872 628
873MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); 629MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
index 698f980eb443..a5b2346faf1f 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
@@ -205,7 +205,6 @@ static unsigned int speedstep_detect_chipset (void)
205 * host brige. Abort on these systems. 205 * host brige. Abort on these systems.
206 */ 206 */
207 static struct pci_dev *hostbridge; 207 static struct pci_dev *hostbridge;
208 u8 rev = 0;
209 208
210 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, 209 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
211 PCI_DEVICE_ID_INTEL_82815_MC, 210 PCI_DEVICE_ID_INTEL_82815_MC,
@@ -216,8 +215,7 @@ static unsigned int speedstep_detect_chipset (void)
216 if (!hostbridge) 215 if (!hostbridge)
217 return 2; /* 2-M */ 216 return 2; /* 2-M */
218 217
219 pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev); 218 if (hostbridge->revision < 5) {
220 if (rev < 5) {
221 dprintk("hostbridge does not support speedstep\n"); 219 dprintk("hostbridge does not support speedstep\n");
222 speedstep_chipset_dev = NULL; 220 speedstep_chipset_dev = NULL;
223 pci_dev_put(hostbridge); 221 pci_dev_put(hostbridge);
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index e5be819492ef..d5a456d27d82 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,7 +4,7 @@
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen : CPUID4 emulation on AMD. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */ 8 */
9 9
10#include <linux/init.h> 10#include <linux/init.h>
@@ -135,7 +135,7 @@ unsigned short num_cache_leaves;
135 135
136/* AMD doesn't have CPUID4. Emulate it here to report the same 136/* AMD doesn't have CPUID4. Emulate it here to report the same
137 information to the user. This makes some assumptions about the machine: 137 information to the user. This makes some assumptions about the machine:
138 No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. 138 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
139 139
140 In theory the TLBs could be reported as fake type (they are in "dummy"). 140 In theory the TLBs could be reported as fake type (they are in "dummy").
141 Maybe later */ 141 Maybe later */
@@ -159,13 +159,26 @@ union l2_cache {
159 unsigned val; 159 unsigned val;
160}; 160};
161 161
162union l3_cache {
163 struct {
164 unsigned line_size : 8;
165 unsigned lines_per_tag : 4;
166 unsigned assoc : 4;
167 unsigned res : 2;
168 unsigned size_encoded : 14;
169 };
170 unsigned val;
171};
172
162static const unsigned short assocs[] = { 173static const unsigned short assocs[] = {
163 [1] = 1, [2] = 2, [4] = 4, [6] = 8, 174 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
164 [8] = 16, 175 [8] = 16, [0xa] = 32, [0xb] = 48,
176 [0xc] = 64,
165 [0xf] = 0xffff // ?? 177 [0xf] = 0xffff // ??
166 }; 178};
167static const unsigned char levels[] = { 1, 1, 2 }; 179
168static const unsigned char types[] = { 1, 2, 3 }; 180static const unsigned char levels[] = { 1, 1, 2, 3 };
181static const unsigned char types[] = { 1, 2, 3, 3 };
169 182
170static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 183static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
171 union _cpuid4_leaf_ebx *ebx, 184 union _cpuid4_leaf_ebx *ebx,
@@ -175,37 +188,58 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
175 unsigned line_size, lines_per_tag, assoc, size_in_kb; 188 unsigned line_size, lines_per_tag, assoc, size_in_kb;
176 union l1_cache l1i, l1d; 189 union l1_cache l1i, l1d;
177 union l2_cache l2; 190 union l2_cache l2;
191 union l3_cache l3;
192 union l1_cache *l1 = &l1d;
178 193
179 eax->full = 0; 194 eax->full = 0;
180 ebx->full = 0; 195 ebx->full = 0;
181 ecx->full = 0; 196 ecx->full = 0;
182 197
183 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 198 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
184 cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); 199 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
185 200
186 if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) 201 switch (leaf) {
187 return; 202 case 1:
188 203 l1 = &l1i;
189 eax->split.is_self_initializing = 1; 204 case 0:
190 eax->split.type = types[leaf]; 205 if (!l1->val)
191 eax->split.level = levels[leaf]; 206 return;
192 eax->split.num_threads_sharing = 0;
193 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
194
195 if (leaf <= 1) {
196 union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
197 assoc = l1->assoc; 207 assoc = l1->assoc;
198 line_size = l1->line_size; 208 line_size = l1->line_size;
199 lines_per_tag = l1->lines_per_tag; 209 lines_per_tag = l1->lines_per_tag;
200 size_in_kb = l1->size_in_kb; 210 size_in_kb = l1->size_in_kb;
201 } else { 211 break;
212 case 2:
213 if (!l2.val)
214 return;
202 assoc = l2.assoc; 215 assoc = l2.assoc;
203 line_size = l2.line_size; 216 line_size = l2.line_size;
204 lines_per_tag = l2.lines_per_tag; 217 lines_per_tag = l2.lines_per_tag;
205 /* cpu_data has errata corrections for K7 applied */ 218 /* cpu_data has errata corrections for K7 applied */
206 size_in_kb = current_cpu_data.x86_cache_size; 219 size_in_kb = current_cpu_data.x86_cache_size;
220 break;
221 case 3:
222 if (!l3.val)
223 return;
224 assoc = l3.assoc;
225 line_size = l3.line_size;
226 lines_per_tag = l3.lines_per_tag;
227 size_in_kb = l3.size_encoded * 512;
228 break;
229 default:
230 return;
207 } 231 }
208 232
233 eax->split.is_self_initializing = 1;
234 eax->split.type = types[leaf];
235 eax->split.level = levels[leaf];
236 if (leaf == 3)
237 eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
238 else
239 eax->split.num_threads_sharing = 0;
240 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
241
242
209 if (assoc == 0xf) 243 if (assoc == 0xf)
210 eax->split.is_fully_associative = 1; 244 eax->split.is_fully_associative = 1;
211 ebx->split.coherency_line_size = line_size - 1; 245 ebx->split.coherency_line_size = line_size - 1;
@@ -239,8 +273,7 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le
239 return 0; 273 return 0;
240} 274}
241 275
242/* will only be called once; __init is safe here */ 276static int __cpuinit find_num_cache_leaves(void)
243static int __init find_num_cache_leaves(void)
244{ 277{
245 unsigned int eax, ebx, ecx, edx; 278 unsigned int eax, ebx, ecx, edx;
246 union _cpuid4_leaf_eax cache_eax; 279 union _cpuid4_leaf_eax cache_eax;
@@ -710,7 +743,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
710 return retval; 743 return retval;
711} 744}
712 745
713static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) 746static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
714{ 747{
715 unsigned int cpu = sys_dev->id; 748 unsigned int cpu = sys_dev->id;
716 unsigned long i; 749 unsigned long i;
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
index 6b5d3518a1c0..bf39409b3838 100644
--- a/arch/i386/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c
@@ -57,7 +57,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
57static void mce_work_fn(struct work_struct *work) 57static void mce_work_fn(struct work_struct *work)
58{ 58{
59 on_each_cpu(mce_checkregs, NULL, 1, 1); 59 on_each_cpu(mce_checkregs, NULL, 1, 1);
60 schedule_delayed_work(&mce_work, MCE_RATE); 60 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
61} 61}
62 62
63static int __init init_nonfatal_mce_checker(void) 63static int __init init_nonfatal_mce_checker(void)
@@ -82,7 +82,7 @@ static int __init init_nonfatal_mce_checker(void)
82 /* 82 /*
83 * Check for non-fatal errors every MCE_RATE s 83 * Check for non-fatal errors every MCE_RATE s
84 */ 84 */
85 schedule_delayed_work(&mce_work, MCE_RATE); 85 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
86 printk(KERN_INFO "Machine check exception polling timer started.\n"); 86 printk(KERN_INFO "Machine check exception polling timer started.\n");
87 return 0; 87 return 0;
88} 88}
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 7ba7c3abd3a4..1203dc5ab87a 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
134 int err; 134 int err;
135 135
136 sys_dev = get_cpu_sysdev(cpu); 136 sys_dev = get_cpu_sysdev(cpu);
137 mutex_lock(&therm_cpu_lock);
138 switch (action) { 137 switch (action) {
139 case CPU_ONLINE: 138 case CPU_ONLINE:
140 case CPU_ONLINE_FROZEN: 139 case CPU_ONLINE_FROZEN:
140 mutex_lock(&therm_cpu_lock);
141 err = thermal_throttle_add_dev(sys_dev); 141 err = thermal_throttle_add_dev(sys_dev);
142 mutex_unlock(&therm_cpu_lock);
142 WARN_ON(err); 143 WARN_ON(err);
143 break; 144 break;
144 case CPU_DEAD: 145 case CPU_DEAD:
145 case CPU_DEAD_FROZEN: 146 case CPU_DEAD_FROZEN:
147 mutex_lock(&therm_cpu_lock);
146 thermal_throttle_remove_dev(sys_dev); 148 thermal_throttle_remove_dev(sys_dev);
149 mutex_unlock(&therm_cpu_lock);
147 break; 150 break;
148 } 151 }
149 mutex_unlock(&therm_cpu_lock);
150 return NOTIFY_OK; 152 return NOTIFY_OK;
151} 153}
152 154
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c
index 9edf5625584f..1001f1e0fe6d 100644
--- a/arch/i386/kernel/cpu/mtrr/cyrix.c
+++ b/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -233,12 +233,12 @@ typedef struct {
233 mtrr_type type; 233 mtrr_type type;
234} arr_state_t; 234} arr_state_t;
235 235
236static arr_state_t arr_state[8] __devinitdata = { 236static arr_state_t arr_state[8] = {
237 {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, 237 {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
238 {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} 238 {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
239}; 239};
240 240
241static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 }; 241static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
242 242
243static void cyrix_set_all(void) 243static void cyrix_set_all(void)
244{ 244{
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index c4ebb5126ef7..56f64e34829f 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -42,7 +42,7 @@ static int mtrr_show;
42module_param_named(show, mtrr_show, bool, 0); 42module_param_named(show, mtrr_show, bool, 0);
43 43
44/* Get the MSR pair relating to a var range */ 44/* Get the MSR pair relating to a var range */
45static void __init 45static void
46get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) 46get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
47{ 47{
48 rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); 48 rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
@@ -65,10 +65,11 @@ get_fixed_ranges(mtrr_type * frs)
65 65
66void mtrr_save_fixed_ranges(void *info) 66void mtrr_save_fixed_ranges(void *info)
67{ 67{
68 get_fixed_ranges(mtrr_state.fixed_ranges); 68 if (cpu_has_mtrr)
69 get_fixed_ranges(mtrr_state.fixed_ranges);
69} 70}
70 71
71static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types) 72static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
72{ 73{
73 unsigned i; 74 unsigned i;
74 75
@@ -78,7 +79,7 @@ static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*
78} 79}
79 80
80/* Grab all of the MTRR state for this CPU into *state */ 81/* Grab all of the MTRR state for this CPU into *state */
81void get_mtrr_state(void) 82void __init get_mtrr_state(void)
82{ 83{
83 unsigned int i; 84 unsigned int i;
84 struct mtrr_var_range *vrs; 85 struct mtrr_var_range *vrs;
@@ -469,11 +470,6 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
469 } 470 }
470 } 471 }
471 472
472 if (base < 0x100) {
473 printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
474 base, size);
475 return -EINVAL;
476 }
477 /* Check upper bits of base and last are equal and lower bits are 0 473 /* Check upper bits of base and last are equal and lower bits are 0
478 for base and 1 for last */ 474 for base and 1 for last */
479 last = base + size - 1; 475 last = base + size - 1;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 1cf466df330a..c48b6fea5ab4 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -229,6 +229,8 @@ static void set_mtrr(unsigned int reg, unsigned long base,
229 data.smp_size = size; 229 data.smp_size = size;
230 data.smp_type = type; 230 data.smp_type = type;
231 atomic_set(&data.count, num_booting_cpus() - 1); 231 atomic_set(&data.count, num_booting_cpus() - 1);
232 /* make sure data.count is visible before unleashing other CPUs */
233 smp_wmb();
232 atomic_set(&data.gate,0); 234 atomic_set(&data.gate,0);
233 235
234 /* Start the ball rolling on other CPUs */ 236 /* Start the ball rolling on other CPUs */
@@ -242,6 +244,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
242 244
243 /* ok, reset count and toggle gate */ 245 /* ok, reset count and toggle gate */
244 atomic_set(&data.count, num_booting_cpus() - 1); 246 atomic_set(&data.count, num_booting_cpus() - 1);
247 smp_wmb();
245 atomic_set(&data.gate,1); 248 atomic_set(&data.gate,1);
246 249
247 /* do our MTRR business */ 250 /* do our MTRR business */
@@ -260,6 +263,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
260 cpu_relax(); 263 cpu_relax();
261 264
262 atomic_set(&data.count, num_booting_cpus() - 1); 265 atomic_set(&data.count, num_booting_cpus() - 1);
266 smp_wmb();
263 atomic_set(&data.gate,0); 267 atomic_set(&data.gate,0);
264 268
265 /* 269 /*
@@ -639,7 +643,7 @@ static struct sysdev_driver mtrr_sysdev_driver = {
639 * initialized (i.e. before smp_init()). 643 * initialized (i.e. before smp_init()).
640 * 644 *
641 */ 645 */
642void mtrr_bp_init(void) 646void __init mtrr_bp_init(void)
643{ 647{
644 init_ifs(); 648 init_ifs();
645 649
@@ -734,10 +738,13 @@ void mtrr_ap_init(void)
734 */ 738 */
735void mtrr_save_state(void) 739void mtrr_save_state(void)
736{ 740{
737 if (smp_processor_id() == 0) 741 int cpu = get_cpu();
742
743 if (cpu == 0)
738 mtrr_save_fixed_ranges(NULL); 744 mtrr_save_fixed_ranges(NULL);
739 else 745 else
740 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); 746 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
747 put_cpu();
741} 748}
742 749
743static int __init mtrr_init_finialize(void) 750static int __init mtrr_init_finialize(void)
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
index 2b04c8f1db62..30b5e48aa76b 100644
--- a/arch/i386/kernel/cpu/perfctr-watchdog.c
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -28,7 +28,7 @@ struct wd_ops {
28 void (*unreserve)(void); 28 void (*unreserve)(void);
29 int (*setup)(unsigned nmi_hz); 29 int (*setup)(unsigned nmi_hz);
30 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); 30 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
31 void (*stop)(void *); 31 void (*stop)(void);
32 unsigned perfctr; 32 unsigned perfctr;
33 unsigned evntsel; 33 unsigned evntsel;
34 u64 checkbit; 34 u64 checkbit;
@@ -55,14 +55,45 @@ static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
55/* converts an msr to an appropriate reservation bit */ 55/* converts an msr to an appropriate reservation bit */
56static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 56static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
57{ 57{
58 return wd_ops ? msr - wd_ops->perfctr : 0; 58 /* returns the bit offset of the performance counter register */
59 switch (boot_cpu_data.x86_vendor) {
60 case X86_VENDOR_AMD:
61 return (msr - MSR_K7_PERFCTR0);
62 case X86_VENDOR_INTEL:
63 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
64 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
65
66 switch (boot_cpu_data.x86) {
67 case 6:
68 return (msr - MSR_P6_PERFCTR0);
69 case 15:
70 return (msr - MSR_P4_BPU_PERFCTR0);
71 }
72 }
73 return 0;
59} 74}
60 75
61/* converts an msr to an appropriate reservation bit */ 76/* converts an msr to an appropriate reservation bit */
62/* returns the bit offset of the event selection register */ 77/* returns the bit offset of the event selection register */
63static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 78static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
64{ 79{
65 return wd_ops ? msr - wd_ops->evntsel : 0; 80 /* returns the bit offset of the event selection register */
81 switch (boot_cpu_data.x86_vendor) {
82 case X86_VENDOR_AMD:
83 return (msr - MSR_K7_EVNTSEL0);
84 case X86_VENDOR_INTEL:
85 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
86 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
87
88 switch (boot_cpu_data.x86) {
89 case 6:
90 return (msr - MSR_P6_EVNTSEL0);
91 case 15:
92 return (msr - MSR_P4_BSU_ESCR0);
93 }
94 }
95 return 0;
96
66} 97}
67 98
68/* checks for a bit availability (hack for oprofile) */ 99/* checks for a bit availability (hack for oprofile) */
@@ -142,7 +173,7 @@ void disable_lapic_nmi_watchdog(void)
142 if (atomic_read(&nmi_active) <= 0) 173 if (atomic_read(&nmi_active) <= 0)
143 return; 174 return;
144 175
145 on_each_cpu(wd_ops->stop, NULL, 0, 1); 176 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
146 wd_ops->unreserve(); 177 wd_ops->unreserve();
147 178
148 BUG_ON(atomic_read(&nmi_active) != 0); 179 BUG_ON(atomic_read(&nmi_active) != 0);
@@ -255,7 +286,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
255 return 1; 286 return 1;
256} 287}
257 288
258static void single_msr_stop_watchdog(void *arg) 289static void single_msr_stop_watchdog(void)
259{ 290{
260 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 291 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
261 292
@@ -276,8 +307,8 @@ static int single_msr_reserve(void)
276 307
277static void single_msr_unreserve(void) 308static void single_msr_unreserve(void)
278{ 309{
279 release_evntsel_nmi(wd_ops->perfctr); 310 release_evntsel_nmi(wd_ops->evntsel);
280 release_perfctr_nmi(wd_ops->evntsel); 311 release_perfctr_nmi(wd_ops->perfctr);
281} 312}
282 313
283static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 314static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
@@ -442,7 +473,7 @@ static int setup_p4_watchdog(unsigned nmi_hz)
442 return 1; 473 return 1;
443} 474}
444 475
445static void stop_p4_watchdog(void *arg) 476static void stop_p4_watchdog(void)
446{ 477{
447 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 478 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
448 wrmsr(wd->cccr_msr, 0, 0); 479 wrmsr(wd->cccr_msr, 0, 0);
@@ -475,10 +506,10 @@ static void p4_unreserve(void)
475{ 506{
476#ifdef CONFIG_SMP 507#ifdef CONFIG_SMP
477 if (smp_num_siblings > 1) 508 if (smp_num_siblings > 1)
478 release_evntsel_nmi(MSR_P4_IQ_PERFCTR1); 509 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
479#endif 510#endif
480 release_evntsel_nmi(MSR_P4_IQ_PERFCTR0); 511 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
481 release_perfctr_nmi(MSR_P4_CRU_ESCR0); 512 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
482} 513}
483 514
484static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 515static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
@@ -568,8 +599,8 @@ static struct wd_ops intel_arch_wd_ops = {
568 .setup = setup_intel_arch_watchdog, 599 .setup = setup_intel_arch_watchdog,
569 .rearm = p6_rearm, 600 .rearm = p6_rearm,
570 .stop = single_msr_stop_watchdog, 601 .stop = single_msr_stop_watchdog,
571 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 602 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
572 .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, 603 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
573}; 604};
574 605
575static void probe_nmi_watchdog(void) 606static void probe_nmi_watchdog(void)
@@ -614,6 +645,12 @@ int lapic_watchdog_init(unsigned nmi_hz)
614 probe_nmi_watchdog(); 645 probe_nmi_watchdog();
615 if (!wd_ops) 646 if (!wd_ops)
616 return -1; 647 return -1;
648
649 if (!wd_ops->reserve()) {
650 printk(KERN_ERR
651 "NMI watchdog: cannot reserve perfctrs\n");
652 return -1;
653 }
617 } 654 }
618 655
619 if (!(wd_ops->setup(nmi_hz))) { 656 if (!(wd_ops->setup(nmi_hz))) {
@@ -628,7 +665,7 @@ int lapic_watchdog_init(unsigned nmi_hz)
628void lapic_watchdog_stop(void) 665void lapic_watchdog_stop(void)
629{ 666{
630 if (wd_ops) 667 if (wd_ops)
631 wd_ops->stop(NULL); 668 wd_ops->stop();
632} 669}
633 670
634unsigned lapic_adjust_nmi_hz(unsigned hz) 671unsigned lapic_adjust_nmi_hz(unsigned hz)
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 89d91e6cc972..1e31b6caffb1 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -29,7 +29,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, 30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, 31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
32 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", 32 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
33 "3dnowext", "3dnow",
33 34
34 /* Transmeta-defined */ 35 /* Transmeta-defined */
35 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, 36 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -40,8 +41,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
40 /* Other (Linux-defined) */ 41 /* Other (Linux-defined) */
41 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", 42 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
42 NULL, NULL, NULL, NULL, 43 NULL, NULL, NULL, NULL,
43 "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, 44 "constant_tsc", "up", NULL, "arch_perfmon",
44 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 45 "pebs", "bts", NULL, "sync_rdtsc",
46 "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
45 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 47 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
46 48
47 /* Intel-defined (#2) */ 49 /* Intel-defined (#2) */
@@ -57,9 +59,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
57 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 59 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
58 60
59 /* AMD-defined (#2) */ 61 /* AMD-defined (#2) */
60 "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm", 62 "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
61 "sse4a", "misalignsse", 63 "altmovcr8", "abm", "sse4a",
62 "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, 64 "misalignsse", "3dnowprefetch",
65 "osvw", "ibs", NULL, NULL, NULL, NULL,
66 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
67 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
68
69 /* Auxiliary (Linux-defined) */
70 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
71 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 72 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 73 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
65 }; 74 };
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
deleted file mode 100644
index 50076f22e90f..000000000000
--- a/arch/i386/kernel/cpu/rise.c
+++ /dev/null
@@ -1,52 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/bitops.h>
4#include <asm/processor.h>
5
6#include "cpu.h"
7
8static void __cpuinit init_rise(struct cpuinfo_x86 *c)
9{
10 printk("CPU: Rise iDragon");
11 if (c->x86_model > 2)
12 printk(" II");
13 printk("\n");
14
15 /* Unhide possibly hidden capability flags
16 The mp6 iDragon family don't have MSRs.
17 We switch on extra features with this cpuid weirdness: */
18 __asm__ (
19 "movl $0x6363452a, %%eax\n\t"
20 "movl $0x3231206c, %%ecx\n\t"
21 "movl $0x2a32313a, %%edx\n\t"
22 "cpuid\n\t"
23 "movl $0x63634523, %%eax\n\t"
24 "movl $0x32315f6c, %%ecx\n\t"
25 "movl $0x2333313a, %%edx\n\t"
26 "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
27 );
28 set_bit(X86_FEATURE_CX8, c->x86_capability);
29}
30
31static struct cpu_dev rise_cpu_dev __cpuinitdata = {
32 .c_vendor = "Rise",
33 .c_ident = { "RiseRiseRise" },
34 .c_models = {
35 { .vendor = X86_VENDOR_RISE, .family = 5, .model_names =
36 {
37 [0] = "iDragon",
38 [2] = "iDragon",
39 [8] = "iDragon II",
40 [9] = "iDragon II"
41 }
42 },
43 },
44 .c_init = init_rise,
45};
46
47int __init rise_init_cpu(void)
48{
49 cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
50 return 0;
51}
52
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index 9645bb51f76a..e60cddbc4cfb 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -10,6 +10,7 @@
10#include <linux/efi.h> 10#include <linux/efi.h>
11#include <linux/pfn.h> 11#include <linux/pfn.h>
12#include <linux/uaccess.h> 12#include <linux/uaccess.h>
13#include <linux/suspend.h>
13 14
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/page.h> 16#include <asm/page.h>
@@ -320,6 +321,37 @@ static int __init request_standard_resources(void)
320 321
321subsys_initcall(request_standard_resources); 322subsys_initcall(request_standard_resources);
322 323
324#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
325/**
326 * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
327 * correspond to e820 RAM areas and mark the corresponding pages as nosave for
328 * hibernation.
329 *
330 * This function requires the e820 map to be sorted and without any
331 * overlapping entries and assumes the first e820 area to be RAM.
332 */
333void __init e820_mark_nosave_regions(void)
334{
335 int i;
336 unsigned long pfn;
337
338 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
339 for (i = 1; i < e820.nr_map; i++) {
340 struct e820entry *ei = &e820.map[i];
341
342 if (pfn < PFN_UP(ei->addr))
343 register_nosave_region(pfn, PFN_UP(ei->addr));
344
345 pfn = PFN_DOWN(ei->addr + ei->size);
346 if (ei->type != E820_RAM)
347 register_nosave_region(PFN_UP(ei->addr), pfn);
348
349 if (pfn >= max_low_pfn)
350 break;
351 }
352}
353#endif
354
323void __init add_memory_region(unsigned long long start, 355void __init add_memory_region(unsigned long long start,
324 unsigned long long size, int type) 356 unsigned long long size, int type)
325{ 357{
@@ -734,7 +766,7 @@ void __init print_memory_map(char *who)
734 case E820_NVS: 766 case E820_NVS:
735 printk("(ACPI NVS)\n"); 767 printk("(ACPI NVS)\n");
736 break; 768 break;
737 default: printk("type %lu\n", e820.map[i].type); 769 default: printk("type %u\n", e820.map[i].type);
738 break; 770 break;
739 } 771 }
740 } 772 }
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index a1808022ea19..2452c6fbe992 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -278,7 +278,7 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
278 struct range { 278 struct range {
279 unsigned long start; 279 unsigned long start;
280 unsigned long end; 280 unsigned long end;
281 } prev, curr; 281 } uninitialized_var(prev), curr;
282 efi_memory_desc_t *md; 282 efi_memory_desc_t *md;
283 unsigned long start, end; 283 unsigned long start, end;
284 void *p; 284 void *p;
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index b1f16ee65e4d..a714d6b43506 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -367,10 +367,6 @@ ENTRY(system_call)
367 CFI_ADJUST_CFA_OFFSET 4 367 CFI_ADJUST_CFA_OFFSET 4
368 SAVE_ALL 368 SAVE_ALL
369 GET_THREAD_INFO(%ebp) 369 GET_THREAD_INFO(%ebp)
370 testl $TF_MASK,PT_EFLAGS(%esp)
371 jz no_singlestep
372 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
373no_singlestep:
374 # system call tracing in operation / emulation 370 # system call tracing in operation / emulation
375 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 371 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
376 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 372 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -385,6 +381,10 @@ syscall_exit:
385 # setting need_resched or sigpending 381 # setting need_resched or sigpending
386 # between sampling and the iret 382 # between sampling and the iret
387 TRACE_IRQS_OFF 383 TRACE_IRQS_OFF
384 testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
385 jz no_singlestep
386 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
387no_singlestep:
388 movl TI_flags(%ebp), %ecx 388 movl TI_flags(%ebp), %ecx
389 testw $_TIF_ALLWORK_MASK, %cx # current->work 389 testw $_TIF_ALLWORK_MASK, %cx # current->work
390 jne syscall_exit_work 390 jne syscall_exit_work
@@ -409,8 +409,6 @@ restore_nocheck_notrace:
4091: INTERRUPT_RETURN 4091: INTERRUPT_RETURN
410.section .fixup,"ax" 410.section .fixup,"ax"
411iret_exc: 411iret_exc:
412 TRACE_IRQS_ON
413 ENABLE_INTERRUPTS(CLBR_NONE)
414 pushl $0 # no error code 412 pushl $0 # no error code
415 pushl $do_iret_error 413 pushl $do_iret_error
416 jmp error_code 414 jmp error_code
@@ -1023,6 +1021,91 @@ ENTRY(kernel_thread_helper)
1023 CFI_ENDPROC 1021 CFI_ENDPROC
1024ENDPROC(kernel_thread_helper) 1022ENDPROC(kernel_thread_helper)
1025 1023
1024#ifdef CONFIG_XEN
1025ENTRY(xen_hypervisor_callback)
1026 CFI_STARTPROC
1027 pushl $0
1028 CFI_ADJUST_CFA_OFFSET 4
1029 SAVE_ALL
1030 TRACE_IRQS_OFF
1031
1032 /* Check to see if we got the event in the critical
1033 region in xen_iret_direct, after we've reenabled
1034 events and checked for pending events. This simulates
1035 iret instruction's behaviour where it delivers a
1036 pending interrupt when enabling interrupts. */
1037 movl PT_EIP(%esp),%eax
1038 cmpl $xen_iret_start_crit,%eax
1039 jb 1f
1040 cmpl $xen_iret_end_crit,%eax
1041 jae 1f
1042
1043 call xen_iret_crit_fixup
1044
10451: mov %esp, %eax
1046 call xen_evtchn_do_upcall
1047 jmp ret_from_intr
1048 CFI_ENDPROC
1049ENDPROC(xen_hypervisor_callback)
1050
1051# Hypervisor uses this for application faults while it executes.
1052# We get here for two reasons:
1053# 1. Fault while reloading DS, ES, FS or GS
1054# 2. Fault while executing IRET
1055# Category 1 we fix up by reattempting the load, and zeroing the segment
1056# register if the load fails.
1057# Category 2 we fix up by jumping to do_iret_error. We cannot use the
1058# normal Linux return path in this case because if we use the IRET hypercall
1059# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1060# We distinguish between categories by maintaining a status value in EAX.
1061ENTRY(xen_failsafe_callback)
1062 CFI_STARTPROC
1063 pushl %eax
1064 CFI_ADJUST_CFA_OFFSET 4
1065 movl $1,%eax
10661: mov 4(%esp),%ds
10672: mov 8(%esp),%es
10683: mov 12(%esp),%fs
10694: mov 16(%esp),%gs
1070 testl %eax,%eax
1071 popl %eax
1072 CFI_ADJUST_CFA_OFFSET -4
1073 lea 16(%esp),%esp
1074 CFI_ADJUST_CFA_OFFSET -16
1075 jz 5f
1076 addl $16,%esp
1077 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
10785: pushl $0 # EAX == 0 => Category 1 (Bad segment)
1079 CFI_ADJUST_CFA_OFFSET 4
1080 SAVE_ALL
1081 jmp ret_from_exception
1082 CFI_ENDPROC
1083
1084.section .fixup,"ax"
10856: xorl %eax,%eax
1086 movl %eax,4(%esp)
1087 jmp 1b
10887: xorl %eax,%eax
1089 movl %eax,8(%esp)
1090 jmp 2b
10918: xorl %eax,%eax
1092 movl %eax,12(%esp)
1093 jmp 3b
10949: xorl %eax,%eax
1095 movl %eax,16(%esp)
1096 jmp 4b
1097.previous
1098.section __ex_table,"a"
1099 .align 4
1100 .long 1b,6b
1101 .long 2b,7b
1102 .long 3b,8b
1103 .long 4b,9b
1104.previous
1105ENDPROC(xen_failsafe_callback)
1106
1107#endif /* CONFIG_XEN */
1108
1026.section .rodata,"a" 1109.section .rodata,"a"
1027#include "syscall_table.S" 1110#include "syscall_table.S"
1028 1111
diff --git a/arch/i386/kernel/geode.c b/arch/i386/kernel/geode.c
new file mode 100644
index 000000000000..41e8aec4c61d
--- /dev/null
+++ b/arch/i386/kernel/geode.c
@@ -0,0 +1,155 @@
1/*
2 * AMD Geode southbridge support code
3 * Copyright (C) 2006, Advanced Micro Devices, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public License
7 * as published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/ioport.h>
13#include <linux/io.h>
14#include <asm/msr.h>
15#include <asm/geode.h>
16
17static struct {
18 char *name;
19 u32 msr;
20 int size;
21 u32 base;
22} lbars[] = {
23 { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 },
24 { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 },
25 { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 },
26 { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 }
27};
28
29static void __init init_lbars(void)
30{
31 u32 lo, hi;
32 int i;
33
34 for (i = 0; i < ARRAY_SIZE(lbars); i++) {
35 rdmsr(lbars[i].msr, lo, hi);
36 if (hi & 0x01)
37 lbars[i].base = lo & 0x0000ffff;
38
39 if (lbars[i].base == 0)
40 printk(KERN_ERR "geode: Couldn't initialize '%s'\n",
41 lbars[i].name);
42 }
43}
44
45int geode_get_dev_base(unsigned int dev)
46{
47 BUG_ON(dev >= ARRAY_SIZE(lbars));
48 return lbars[dev].base;
49}
50EXPORT_SYMBOL_GPL(geode_get_dev_base);
51
52/* === GPIO API === */
53
54void geode_gpio_set(unsigned int gpio, unsigned int reg)
55{
56 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
57
58 if (!base)
59 return;
60
61 if (gpio < 16)
62 outl(1 << gpio, base + reg);
63 else
64 outl(1 << (gpio - 16), base + 0x80 + reg);
65}
66EXPORT_SYMBOL_GPL(geode_gpio_set);
67
68void geode_gpio_clear(unsigned int gpio, unsigned int reg)
69{
70 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
71
72 if (!base)
73 return;
74
75 if (gpio < 16)
76 outl(1 << (gpio + 16), base + reg);
77 else
78 outl(1 << gpio, base + 0x80 + reg);
79}
80EXPORT_SYMBOL_GPL(geode_gpio_clear);
81
82int geode_gpio_isset(unsigned int gpio, unsigned int reg)
83{
84 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
85
86 if (!base)
87 return 0;
88
89 if (gpio < 16)
90 return (inl(base + reg) & (1 << gpio)) ? 1 : 0;
91 else
92 return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0;
93}
94EXPORT_SYMBOL_GPL(geode_gpio_isset);
95
96void geode_gpio_set_irq(unsigned int group, unsigned int irq)
97{
98 u32 lo, hi;
99
100 if (group > 7 || irq > 15)
101 return;
102
103 rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
104
105 lo &= ~(0xF << (group * 4));
106 lo |= (irq & 0xF) << (group * 4);
107
108 wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
109}
110EXPORT_SYMBOL_GPL(geode_gpio_set_irq);
111
112void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
113{
114 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
115 u32 offset, shift, val;
116
117 if (gpio >= 24)
118 offset = GPIO_MAP_W;
119 else if (gpio >= 16)
120 offset = GPIO_MAP_Z;
121 else if (gpio >= 8)
122 offset = GPIO_MAP_Y;
123 else
124 offset = GPIO_MAP_X;
125
126 shift = (gpio % 8) * 4;
127
128 val = inl(base + offset);
129
130 /* Clear whatever was there before */
131 val &= ~(0xF << shift);
132
133 /* And set the new value */
134
135 val |= ((pair & 7) << shift);
136
137 /* Set the PME bit if this is a PME event */
138
139 if (pme)
140 val |= (1 << (shift + 3));
141
142 outl(val, base + offset);
143}
144EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
145
146static int __init geode_southbridge_init(void)
147{
148 if (!is_geode())
149 return -ENODEV;
150
151 init_lbars();
152 return 0;
153}
154
155postcore_initcall(geode_southbridge_init);
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index f74dfc419b56..7c52b222207e 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -168,6 +168,12 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
168.section .init.text,"ax",@progbits 168.section .init.text,"ax",@progbits
169#endif 169#endif
170 170
171 /* Do an early initialization of the fixmap area */
172 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
173 movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
174 addl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
175 movl %eax, 4092(%edx)
176
171#ifdef CONFIG_SMP 177#ifdef CONFIG_SMP
172ENTRY(startup_32_smp) 178ENTRY(startup_32_smp)
173 cld 179 cld
@@ -504,9 +510,12 @@ ENTRY(_stext)
504/* 510/*
505 * BSS section 511 * BSS section
506 */ 512 */
507.section ".bss.page_aligned","w" 513.section ".bss.page_aligned","wa"
514 .align PAGE_SIZE_asm
508ENTRY(swapper_pg_dir) 515ENTRY(swapper_pg_dir)
509 .fill 1024,4,0 516 .fill 1024,4,0
517ENTRY(swapper_pg_pmd)
518 .fill 1024,4,0
510ENTRY(empty_zero_page) 519ENTRY(empty_zero_page)
511 .fill 4096,1,0 520 .fill 4096,1,0
512 521
@@ -530,6 +539,8 @@ fault_msg:
530 .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" 539 .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
531 .asciz "Stack: %p %p %p %p %p %p %p %p\n" 540 .asciz "Stack: %p %p %p %p %p %p %p %p\n"
532 541
542#include "../xen/xen-head.S"
543
533/* 544/*
534 * The IDT and GDT 'descriptors' are a strange 48-bit object 545 * The IDT and GDT 'descriptors' are a strange 48-bit object
535 * only used by the lidt and lgdt instructions. They are not 546 * only used by the lidt and lgdt instructions. They are not
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index 17d73459fc5f..533d4932bc79 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -5,6 +5,7 @@
5#include <linux/init.h> 5#include <linux/init.h>
6#include <linux/sysdev.h> 6#include <linux/sysdev.h>
7#include <linux/pm.h> 7#include <linux/pm.h>
8#include <linux/delay.h>
8 9
9#include <asm/hpet.h> 10#include <asm/hpet.h>
10#include <asm/io.h> 11#include <asm/io.h>
@@ -187,6 +188,10 @@ static void hpet_set_mode(enum clock_event_mode mode,
187 cfg &= ~HPET_TN_ENABLE; 188 cfg &= ~HPET_TN_ENABLE;
188 hpet_writel(cfg, HPET_T0_CFG); 189 hpet_writel(cfg, HPET_T0_CFG);
189 break; 190 break;
191
192 case CLOCK_EVT_MODE_RESUME:
193 hpet_enable_int();
194 break;
190 } 195 }
191} 196}
192 197
@@ -217,6 +222,7 @@ static struct clocksource clocksource_hpet = {
217 .mask = HPET_MASK, 222 .mask = HPET_MASK,
218 .shift = HPET_SHIFT, 223 .shift = HPET_SHIFT,
219 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 224 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
225 .resume = hpet_start_counter,
220}; 226};
221 227
222/* 228/*
@@ -226,7 +232,8 @@ int __init hpet_enable(void)
226{ 232{
227 unsigned long id; 233 unsigned long id;
228 uint64_t hpet_freq; 234 uint64_t hpet_freq;
229 u64 tmp; 235 u64 tmp, start, now;
236 cycle_t t1;
230 237
231 if (!is_hpet_capable()) 238 if (!is_hpet_capable())
232 return 0; 239 return 0;
@@ -273,6 +280,27 @@ int __init hpet_enable(void)
273 /* Start the counter */ 280 /* Start the counter */
274 hpet_start_counter(); 281 hpet_start_counter();
275 282
283 /* Verify whether hpet counter works */
284 t1 = read_hpet();
285 rdtscll(start);
286
287 /*
288 * We don't know the TSC frequency yet, but waiting for
289 * 200000 TSC cycles is safe:
290 * 4 GHz == 50us
291 * 1 GHz == 200us
292 */
293 do {
294 rep_nop();
295 rdtscll(now);
296 } while ((now - start) < 200000UL);
297
298 if (t1 == read_hpet()) {
299 printk(KERN_WARNING
300 "HPET counter not counting. HPET disabled\n");
301 goto out_nohpet;
302 }
303
276 /* Initialize and register HPET clocksource 304 /* Initialize and register HPET clocksource
277 * 305 *
278 * hpet period is in femto seconds per cycle 306 * hpet period is in femto seconds per cycle
@@ -291,7 +319,6 @@ int __init hpet_enable(void)
291 319
292 clocksource_register(&clocksource_hpet); 320 clocksource_register(&clocksource_hpet);
293 321
294
295 if (id & HPET_ID_LEGSUP) { 322 if (id & HPET_ID_LEGSUP) {
296 hpet_enable_int(); 323 hpet_enable_int();
297 hpet_reserve_platform_timers(id); 324 hpet_reserve_platform_timers(id);
@@ -299,7 +326,7 @@ int __init hpet_enable(void)
299 * Start hpet with the boot cpu mask and make it 326 * Start hpet with the boot cpu mask and make it
300 * global after the IO_APIC has been initialized. 327 * global after the IO_APIC has been initialized.
301 */ 328 */
302 hpet_clockevent.cpumask =cpumask_of_cpu(0); 329 hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
303 clockevents_register_device(&hpet_clockevent); 330 clockevents_register_device(&hpet_clockevent);
304 global_clock_event = &hpet_clockevent; 331 global_clock_event = &hpet_clockevent;
305 return 1; 332 return 1;
@@ -524,68 +551,3 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
524 return IRQ_HANDLED; 551 return IRQ_HANDLED;
525} 552}
526#endif 553#endif
527
528
529/*
530 * Suspend/resume part
531 */
532
533#ifdef CONFIG_PM
534
535static int hpet_suspend(struct sys_device *sys_device, pm_message_t state)
536{
537 unsigned long cfg = hpet_readl(HPET_CFG);
538
539 cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY);
540 hpet_writel(cfg, HPET_CFG);
541
542 return 0;
543}
544
545static int hpet_resume(struct sys_device *sys_device)
546{
547 unsigned int id;
548
549 hpet_start_counter();
550
551 id = hpet_readl(HPET_ID);
552
553 if (id & HPET_ID_LEGSUP)
554 hpet_enable_int();
555
556 return 0;
557}
558
559static struct sysdev_class hpet_class = {
560 set_kset_name("hpet"),
561 .suspend = hpet_suspend,
562 .resume = hpet_resume,
563};
564
565static struct sys_device hpet_device = {
566 .id = 0,
567 .cls = &hpet_class,
568};
569
570
571static __init int hpet_register_sysfs(void)
572{
573 int err;
574
575 if (!is_hpet_capable())
576 return 0;
577
578 err = sysdev_class_register(&hpet_class);
579
580 if (!err) {
581 err = sysdev_register(&hpet_device);
582 if (err)
583 sysdev_class_unregister(&hpet_class);
584 }
585
586 return err;
587}
588
589device_initcall(hpet_register_sysfs);
590
591#endif
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index f8a3c4054c70..6d839f2f1b1a 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -3,18 +3,17 @@
3 * 3 *
4 */ 4 */
5#include <linux/clockchips.h> 5#include <linux/clockchips.h>
6#include <linux/spinlock.h> 6#include <linux/init.h>
7#include <linux/interrupt.h>
7#include <linux/jiffies.h> 8#include <linux/jiffies.h>
8#include <linux/sysdev.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/init.h> 10#include <linux/spinlock.h>
11 11
12#include <asm/smp.h> 12#include <asm/smp.h>
13#include <asm/delay.h> 13#include <asm/delay.h>
14#include <asm/i8253.h> 14#include <asm/i8253.h>
15#include <asm/io.h> 15#include <asm/io.h>
16 16#include <asm/timer.h>
17#include "io_ports.h"
18 17
19DEFINE_SPINLOCK(i8253_lock); 18DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock); 19EXPORT_SYMBOL(i8253_lock);
@@ -41,26 +40,27 @@ static void init_pit_timer(enum clock_event_mode mode,
41 case CLOCK_EVT_MODE_PERIODIC: 40 case CLOCK_EVT_MODE_PERIODIC:
42 /* binary, mode 2, LSB/MSB, ch 0 */ 41 /* binary, mode 2, LSB/MSB, ch 0 */
43 outb_p(0x34, PIT_MODE); 42 outb_p(0x34, PIT_MODE);
44 udelay(10);
45 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ 43 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
46 udelay(10);
47 outb(LATCH >> 8 , PIT_CH0); /* MSB */ 44 outb(LATCH >> 8 , PIT_CH0); /* MSB */
48 break; 45 break;
49 46
50 /*
51 * Avoid unnecessary state transitions, as it confuses
52 * Geode / Cyrix based boxen.
53 */
54 case CLOCK_EVT_MODE_SHUTDOWN: 47 case CLOCK_EVT_MODE_SHUTDOWN:
55 if (evt->mode == CLOCK_EVT_MODE_UNUSED)
56 break;
57 case CLOCK_EVT_MODE_UNUSED: 48 case CLOCK_EVT_MODE_UNUSED:
58 if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN) 49 if (evt->mode == CLOCK_EVT_MODE_PERIODIC ||
59 break; 50 evt->mode == CLOCK_EVT_MODE_ONESHOT) {
51 outb_p(0x30, PIT_MODE);
52 outb_p(0, PIT_CH0);
53 outb_p(0, PIT_CH0);
54 }
55 break;
56
60 case CLOCK_EVT_MODE_ONESHOT: 57 case CLOCK_EVT_MODE_ONESHOT:
61 /* One shot setup */ 58 /* One shot setup */
62 outb_p(0x38, PIT_MODE); 59 outb_p(0x38, PIT_MODE);
63 udelay(10); 60 break;
61
62 case CLOCK_EVT_MODE_RESUME:
63 /* Nothing to do here */
64 break; 64 break;
65 } 65 }
66 spin_unlock_irqrestore(&i8253_lock, flags); 66 spin_unlock_irqrestore(&i8253_lock, flags);
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
index cff95d10a4d8..d26fc063a760 100644
--- a/arch/i386/kernel/init_task.c
+++ b/arch/i386/kernel/init_task.c
@@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task);
42 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 42 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
43 * no more per-task TSS's. 43 * no more per-task TSS's.
44 */ 44 */
45DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; 45DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
46 46
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 7f8b7af2b95f..893df8280756 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -353,14 +353,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
353# include <linux/slab.h> /* kmalloc() */ 353# include <linux/slab.h> /* kmalloc() */
354# include <linux/timer.h> /* time_after() */ 354# include <linux/timer.h> /* time_after() */
355 355
356#ifdef CONFIG_BALANCED_IRQ_DEBUG
357# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
358# define Dprintk(x...) do { TDprintk(x); } while (0)
359# else
360# define TDprintk(x...)
361# define Dprintk(x...)
362# endif
363
364#define IRQBALANCE_CHECK_ARCH -999 356#define IRQBALANCE_CHECK_ARCH -999
365#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) 357#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
366#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) 358#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
@@ -443,7 +435,7 @@ static inline void balance_irq(int cpu, int irq)
443static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) 435static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
444{ 436{
445 int i, j; 437 int i, j;
446 Dprintk("Rotating IRQs among CPUs.\n"); 438
447 for_each_online_cpu(i) { 439 for_each_online_cpu(i) {
448 for (j = 0; j < NR_IRQS; j++) { 440 for (j = 0; j < NR_IRQS; j++) {
449 if (!irq_desc[j].action) 441 if (!irq_desc[j].action)
@@ -560,19 +552,11 @@ tryanothercpu:
560 max_loaded = tmp_loaded; /* processor */ 552 max_loaded = tmp_loaded; /* processor */
561 imbalance = (max_cpu_irq - min_cpu_irq) / 2; 553 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
562 554
563 Dprintk("max_loaded cpu = %d\n", max_loaded);
564 Dprintk("min_loaded cpu = %d\n", min_loaded);
565 Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
566 Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
567 Dprintk("load imbalance = %lu\n", imbalance);
568
569 /* if imbalance is less than approx 10% of max load, then 555 /* if imbalance is less than approx 10% of max load, then
570 * observe diminishing returns action. - quit 556 * observe diminishing returns action. - quit
571 */ 557 */
572 if (imbalance < (max_cpu_irq >> 3)) { 558 if (imbalance < (max_cpu_irq >> 3))
573 Dprintk("Imbalance too trivial\n");
574 goto not_worth_the_effort; 559 goto not_worth_the_effort;
575 }
576 560
577tryanotherirq: 561tryanotherirq:
578 /* if we select an IRQ to move that can't go where we want, then 562 /* if we select an IRQ to move that can't go where we want, then
@@ -629,9 +613,6 @@ tryanotherirq:
629 cpus_and(tmp, target_cpu_mask, allowed_mask); 613 cpus_and(tmp, target_cpu_mask, allowed_mask);
630 614
631 if (!cpus_empty(tmp)) { 615 if (!cpus_empty(tmp)) {
632
633 Dprintk("irq = %d moved to cpu = %d\n",
634 selected_irq, min_loaded);
635 /* mark for change destination */ 616 /* mark for change destination */
636 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); 617 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
637 618
@@ -651,7 +632,6 @@ not_worth_the_effort:
651 */ 632 */
652 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, 633 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
653 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); 634 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
654 Dprintk("IRQ worth rotating not found\n");
655 return; 635 return;
656} 636}
657 637
@@ -667,6 +647,7 @@ static int balanced_irq(void *unused)
667 set_pending_irq(i, cpumask_of_cpu(0)); 647 set_pending_irq(i, cpumask_of_cpu(0));
668 } 648 }
669 649
650 set_freezable();
670 for ( ; ; ) { 651 for ( ; ; ) {
671 time_remaining = schedule_timeout_interruptible(time_remaining); 652 time_remaining = schedule_timeout_interruptible(time_remaining);
672 try_to_freeze(); 653 try_to_freeze();
@@ -1901,7 +1882,7 @@ __setup("no_timer_check", notimercheck);
1901 * - if this function detects that timer IRQs are defunct, then we fall 1882 * - if this function detects that timer IRQs are defunct, then we fall
1902 * back to ISA timer IRQs 1883 * back to ISA timer IRQs
1903 */ 1884 */
1904int __init timer_irq_works(void) 1885static int __init timer_irq_works(void)
1905{ 1886{
1906 unsigned long t1 = jiffies; 1887 unsigned long t1 = jiffies;
1907 1888
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index d2daf672f4a2..dd2b97fc00b2 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -21,7 +21,7 @@
21#include <asm/apic.h> 21#include <asm/apic.h>
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23 23
24DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; 24DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
25EXPORT_PER_CPU_SYMBOL(irq_stat); 25EXPORT_PER_CPU_SYMBOL(irq_stat);
26 26
27DEFINE_PER_CPU(struct pt_regs *, irq_regs); 27DEFINE_PER_CPU(struct pt_regs *, irq_regs);
@@ -149,15 +149,11 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
149 149
150#ifdef CONFIG_4KSTACKS 150#ifdef CONFIG_4KSTACKS
151 151
152/*
153 * These should really be __section__(".bss.page_aligned") as well, but
154 * gcc's 3.0 and earlier don't handle that correctly.
155 */
156static char softirq_stack[NR_CPUS * THREAD_SIZE] 152static char softirq_stack[NR_CPUS * THREAD_SIZE]
157 __attribute__((__aligned__(THREAD_SIZE))); 153 __attribute__((__section__(".bss.page_aligned")));
158 154
159static char hardirq_stack[NR_CPUS * THREAD_SIZE] 155static char hardirq_stack[NR_CPUS * THREAD_SIZE]
160 __attribute__((__aligned__(THREAD_SIZE))); 156 __attribute__((__section__(".bss.page_aligned")));
161 157
162/* 158/*
163 * allocate per-cpu stacks for hardirq and for softirq processing 159 * allocate per-cpu stacks for hardirq and for softirq processing
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index fba121f7973f..03b7f5584d71 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -295,7 +295,7 @@ static unsigned int
295 last_irq_sums [NR_CPUS], 295 last_irq_sums [NR_CPUS],
296 alert_counter [NR_CPUS]; 296 alert_counter [NR_CPUS];
297 297
298void touch_nmi_watchdog (void) 298void touch_nmi_watchdog(void)
299{ 299{
300 if (nmi_watchdog > 0) { 300 if (nmi_watchdog > 0) {
301 unsigned cpu; 301 unsigned cpu;
@@ -304,8 +304,10 @@ void touch_nmi_watchdog (void)
304 * Just reset the alert counters, (other CPUs might be 304 * Just reset the alert counters, (other CPUs might be
305 * spinning on locks we hold): 305 * spinning on locks we hold):
306 */ 306 */
307 for_each_present_cpu (cpu) 307 for_each_present_cpu(cpu) {
308 alert_counter[cpu] = 0; 308 if (alert_counter[cpu])
309 alert_counter[cpu] = 0;
310 }
309 } 311 }
310 312
311 /* 313 /*
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index faab09abca5e..53f07a8275e3 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -228,6 +228,41 @@ static int __init print_banner(void)
228} 228}
229core_initcall(print_banner); 229core_initcall(print_banner);
230 230
231static struct resource reserve_ioports = {
232 .start = 0,
233 .end = IO_SPACE_LIMIT,
234 .name = "paravirt-ioport",
235 .flags = IORESOURCE_IO | IORESOURCE_BUSY,
236};
237
238static struct resource reserve_iomem = {
239 .start = 0,
240 .end = -1,
241 .name = "paravirt-iomem",
242 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
243};
244
245/*
246 * Reserve the whole legacy IO space to prevent any legacy drivers
247 * from wasting time probing for their hardware. This is a fairly
248 * brute-force approach to disabling all non-virtual drivers.
249 *
250 * Note that this must be called very early to have any effect.
251 */
252int paravirt_disable_iospace(void)
253{
254 int ret;
255
256 ret = request_resource(&ioport_resource, &reserve_ioports);
257 if (ret == 0) {
258 ret = request_resource(&iomem_resource, &reserve_iomem);
259 if (ret)
260 release_resource(&reserve_ioports);
261 }
262
263 return ret;
264}
265
231struct paravirt_ops paravirt_ops = { 266struct paravirt_ops paravirt_ops = {
232 .name = "bare hardware", 267 .name = "bare hardware",
233 .paravirt_enabled = 0, 268 .paravirt_enabled = 0,
@@ -267,7 +302,7 @@ struct paravirt_ops paravirt_ops = {
267 .write_msr = native_write_msr_safe, 302 .write_msr = native_write_msr_safe,
268 .read_tsc = native_read_tsc, 303 .read_tsc = native_read_tsc,
269 .read_pmc = native_read_pmc, 304 .read_pmc = native_read_pmc,
270 .get_scheduled_cycles = native_read_tsc, 305 .sched_clock = native_sched_clock,
271 .get_cpu_khz = native_calculate_cpu_khz, 306 .get_cpu_khz = native_calculate_cpu_khz,
272 .load_tr_desc = native_load_tr_desc, 307 .load_tr_desc = native_load_tr_desc,
273 .set_ldt = native_set_ldt, 308 .set_ldt = native_set_ldt,
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
index 30b754f7cbec..048f09b62553 100644
--- a/arch/i386/kernel/pci-dma.c
+++ b/arch/i386/kernel/pci-dma.c
@@ -12,6 +12,7 @@
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/pci.h>
15#include <asm/io.h> 16#include <asm/io.h>
16 17
17struct dma_coherent_mem { 18struct dma_coherent_mem {
@@ -148,3 +149,29 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
148 return mem->virt_base + (pos << PAGE_SHIFT); 149 return mem->virt_base + (pos << PAGE_SHIFT);
149} 150}
150EXPORT_SYMBOL(dma_mark_declared_memory_occupied); 151EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
152
153#ifdef CONFIG_PCI
154/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
155
156int forbid_dac;
157EXPORT_SYMBOL(forbid_dac);
158
159static __devinit void via_no_dac(struct pci_dev *dev)
160{
161 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
162 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
163 forbid_dac = 1;
164 }
165}
166DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
167
168static int check_iommu(char *s)
169{
170 if (!strcmp(s, "usedac")) {
171 forbid_dac = -1;
172 return 1;
173 }
174 return 0;
175}
176__setup("iommu=", check_iommu);
177#endif
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 06dfa65ad180..84664710b784 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -300,6 +300,7 @@ early_param("idle", idle_setup);
300void show_regs(struct pt_regs * regs) 300void show_regs(struct pt_regs * regs)
301{ 301{
302 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 302 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
303 unsigned long d0, d1, d2, d3, d6, d7;
303 304
304 printk("\n"); 305 printk("\n");
305 printk("Pid: %d, comm: %20s\n", current->pid, current->comm); 306 printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
@@ -324,6 +325,17 @@ void show_regs(struct pt_regs * regs)
324 cr3 = read_cr3(); 325 cr3 = read_cr3();
325 cr4 = read_cr4_safe(); 326 cr4 = read_cr4_safe();
326 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 327 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
328
329 get_debugreg(d0, 0);
330 get_debugreg(d1, 1);
331 get_debugreg(d2, 2);
332 get_debugreg(d3, 3);
333 printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
334 d0, d1, d2, d3);
335 get_debugreg(d6, 6);
336 get_debugreg(d7, 7);
337 printk("DR6: %08lx DR7: %08lx\n", d6, d7);
338
327 show_trace(NULL, regs, &regs->esp); 339 show_trace(NULL, regs, &regs->esp);
328} 340}
329 341
@@ -538,8 +550,31 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
538 return 1; 550 return 1;
539} 551}
540 552
541static noinline void __switch_to_xtra(struct task_struct *next_p, 553#ifdef CONFIG_SECCOMP
542 struct tss_struct *tss) 554void hard_disable_TSC(void)
555{
556 write_cr4(read_cr4() | X86_CR4_TSD);
557}
558void disable_TSC(void)
559{
560 preempt_disable();
561 if (!test_and_set_thread_flag(TIF_NOTSC))
562 /*
563 * Must flip the CPU state synchronously with
564 * TIF_NOTSC in the current running context.
565 */
566 hard_disable_TSC();
567 preempt_enable();
568}
569void hard_enable_TSC(void)
570{
571 write_cr4(read_cr4() & ~X86_CR4_TSD);
572}
573#endif /* CONFIG_SECCOMP */
574
575static noinline void
576__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
577 struct tss_struct *tss)
543{ 578{
544 struct thread_struct *next; 579 struct thread_struct *next;
545 580
@@ -555,6 +590,17 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
555 set_debugreg(next->debugreg[7], 7); 590 set_debugreg(next->debugreg[7], 7);
556 } 591 }
557 592
593#ifdef CONFIG_SECCOMP
594 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
595 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
596 /* prev and next are different */
597 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
598 hard_disable_TSC();
599 else
600 hard_enable_TSC();
601 }
602#endif
603
558 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 604 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
559 /* 605 /*
560 * Disable the bitmap via an invalid offset. We still cache 606 * Disable the bitmap via an invalid offset. We still cache
@@ -586,33 +632,6 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
586} 632}
587 633
588/* 634/*
589 * This function selects if the context switch from prev to next
590 * has to tweak the TSC disable bit in the cr4.
591 */
592static inline void disable_tsc(struct task_struct *prev_p,
593 struct task_struct *next_p)
594{
595 struct thread_info *prev, *next;
596
597 /*
598 * gcc should eliminate the ->thread_info dereference if
599 * has_secure_computing returns 0 at compile time (SECCOMP=n).
600 */
601 prev = task_thread_info(prev_p);
602 next = task_thread_info(next_p);
603
604 if (has_secure_computing(prev) || has_secure_computing(next)) {
605 /* slow path here */
606 if (has_secure_computing(prev) &&
607 !has_secure_computing(next)) {
608 write_cr4(read_cr4() & ~X86_CR4_TSD);
609 } else if (!has_secure_computing(prev) &&
610 has_secure_computing(next))
611 write_cr4(read_cr4() | X86_CR4_TSD);
612 }
613}
614
615/*
616 * switch_to(x,yn) should switch tasks from x to y. 635 * switch_to(x,yn) should switch tasks from x to y.
617 * 636 *
618 * We fsave/fwait so that an exception goes off at the right time 637 * We fsave/fwait so that an exception goes off at the right time
@@ -689,11 +708,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
689 /* 708 /*
690 * Now maybe handle debug registers and/or IO bitmaps 709 * Now maybe handle debug registers and/or IO bitmaps
691 */ 710 */
692 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) 711 if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
693 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))) 712 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
694 __switch_to_xtra(next_p, tss); 713 __switch_to_xtra(prev_p, next_p, tss);
695
696 disable_tsc(prev_p, next_p);
697 714
698 /* 715 /*
699 * Leave lazy mode, flushing any hypercalls made here. 716 * Leave lazy mode, flushing any hypercalls made here.
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 0c0ceec5de00..0c8f00e69c4d 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -164,14 +164,22 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
164 u32 *desc; 164 u32 *desc;
165 unsigned long base; 165 unsigned long base;
166 166
167 down(&child->mm->context.sem); 167 seg &= ~7UL;
168 desc = child->mm->context.ldt + (seg & ~7);
169 base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
170 168
171 /* 16-bit code segment? */ 169 down(&child->mm->context.sem);
172 if (!((desc[1] >> 22) & 1)) 170 if (unlikely((seg >> 3) >= child->mm->context.size))
173 addr &= 0xffff; 171 addr = -1L; /* bogus selector, access would fault */
174 addr += base; 172 else {
173 desc = child->mm->context.ldt + seg;
174 base = ((desc[0] >> 16) |
175 ((desc[1] & 0xff) << 16) |
176 (desc[1] & 0xff000000));
177
178 /* 16-bit code segment? */
179 if (!((desc[1] >> 22) & 1))
180 addr &= 0xffff;
181 addr += base;
182 }
175 up(&child->mm->context.sem); 183 up(&child->mm->context.sem);
176 } 184 }
177 return addr; 185 return addr;
@@ -358,17 +366,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
358 switch (request) { 366 switch (request) {
359 /* when I and D space are separate, these will need to be fixed. */ 367 /* when I and D space are separate, these will need to be fixed. */
360 case PTRACE_PEEKTEXT: /* read word at location addr. */ 368 case PTRACE_PEEKTEXT: /* read word at location addr. */
361 case PTRACE_PEEKDATA: { 369 case PTRACE_PEEKDATA:
362 unsigned long tmp; 370 ret = generic_ptrace_peekdata(child, addr, data);
363 int copied;
364
365 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
366 ret = -EIO;
367 if (copied != sizeof(tmp))
368 break;
369 ret = put_user(tmp, datap);
370 break; 371 break;
371 }
372 372
373 /* read the word at location addr in the USER area. */ 373 /* read the word at location addr in the USER area. */
374 case PTRACE_PEEKUSR: { 374 case PTRACE_PEEKUSR: {
@@ -395,10 +395,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
395 /* when I and D space are separate, this will have to be fixed. */ 395 /* when I and D space are separate, this will have to be fixed. */
396 case PTRACE_POKETEXT: /* write the word at location addr. */ 396 case PTRACE_POKETEXT: /* write the word at location addr. */
397 case PTRACE_POKEDATA: 397 case PTRACE_POKEDATA:
398 ret = 0; 398 ret = generic_ptrace_pokedata(child, addr, data);
399 if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
400 break;
401 ret = -EIO;
402 break; 399 break;
403 400
404 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 401 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
index 9f6ab1789bb0..6722469c2633 100644
--- a/arch/i386/kernel/quirks.c
+++ b/arch/i386/kernel/quirks.c
@@ -20,8 +20,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
20 if (rev > 0x9) 20 if (rev > 0x9)
21 return; 21 return;
22 22
23 printk(KERN_INFO "Intel E7520/7320/7525 detected.");
24
25 /* enable access to config space*/ 23 /* enable access to config space*/
26 pci_read_config_byte(dev, 0xf4, &config); 24 pci_read_config_byte(dev, 0xf4, &config);
27 pci_write_config_byte(dev, 0xf4, config|0x2); 25 pci_write_config_byte(dev, 0xf4, config|0x2);
@@ -30,7 +28,8 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
30 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); 28 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
31 29
32 if (!(word & (1 << 13))) { 30 if (!(word & (1 << 13))) {
33 printk(KERN_INFO "Disabling irq balancing and affinity\n"); 31 printk(KERN_INFO "Intel E7520/7320/7525 detected. "
32 "Disabling irq balancing and affinity\n");
34#ifdef CONFIG_IRQBALANCE 33#ifdef CONFIG_IRQBALANCE
35 irqbalance_disable(""); 34 irqbalance_disable("");
36#endif 35#endif
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 5513f8d5b5be..0d796248866c 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -113,6 +113,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
113 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), 113 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
114 }, 114 },
115 }, 115 },
116 { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/
117 .callback = set_bios_reboot,
118 .ident = "Dell OptiPlex 745",
119 .matches = {
120 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
121 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
122 DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
123 },
124 },
116 { /* Handle problems with rebooting on Dell 2400's */ 125 { /* Handle problems with rebooting on Dell 2400's */
117 .callback = set_bios_reboot, 126 .callback = set_bios_reboot,
118 .ident = "Dell PowerEdge 2400", 127 .ident = "Dell PowerEdge 2400",
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c
index 2d78d918340f..03e1cce58f49 100644
--- a/arch/i386/kernel/reboot_fixups.c
+++ b/arch/i386/kernel/reboot_fixups.c
@@ -5,12 +5,14 @@
5 * 5 *
6 * List of supported fixups: 6 * List of supported fixups:
7 * geode-gx1/cs5530a - Jaya Kumar <jayalk@intworks.biz> 7 * geode-gx1/cs5530a - Jaya Kumar <jayalk@intworks.biz>
8 * geode-gx/lx/cs5536 - Andres Salomon <dilinger@debian.org>
8 * 9 *
9 */ 10 */
10 11
11#include <asm/delay.h> 12#include <asm/delay.h>
12#include <linux/pci.h> 13#include <linux/pci.h>
13#include <asm/reboot_fixups.h> 14#include <asm/reboot_fixups.h>
15#include <asm/msr.h>
14 16
15static void cs5530a_warm_reset(struct pci_dev *dev) 17static void cs5530a_warm_reset(struct pci_dev *dev)
16{ 18{
@@ -21,6 +23,16 @@ static void cs5530a_warm_reset(struct pci_dev *dev)
21 return; 23 return;
22} 24}
23 25
26static void cs5536_warm_reset(struct pci_dev *dev)
27{
28 /*
29 * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET)
30 * writing 1 to the LSB of this MSR causes a hard reset.
31 */
32 wrmsrl(0x51400017, 1ULL);
33 udelay(50); /* shouldn't get here but be safe and spin a while */
34}
35
24struct device_fixup { 36struct device_fixup {
25 unsigned int vendor; 37 unsigned int vendor;
26 unsigned int device; 38 unsigned int device;
@@ -29,6 +41,7 @@ struct device_fixup {
29 41
30static struct device_fixup fixups_table[] = { 42static struct device_fixup fixups_table[] = {
31{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 43{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
44{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
32}; 45};
33 46
34/* 47/*
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 698c24fe482e..d474cd639bcb 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -102,19 +102,10 @@ static unsigned int highmem_pages = -1;
102/* 102/*
103 * Setup options 103 * Setup options
104 */ 104 */
105struct drive_info_struct { char dummy[32]; } drive_info;
106#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
107 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
108EXPORT_SYMBOL(drive_info);
109#endif
110struct screen_info screen_info; 105struct screen_info screen_info;
111EXPORT_SYMBOL(screen_info); 106EXPORT_SYMBOL(screen_info);
112struct apm_info apm_info; 107struct apm_info apm_info;
113EXPORT_SYMBOL(apm_info); 108EXPORT_SYMBOL(apm_info);
114struct sys_desc_table_struct {
115 unsigned short length;
116 unsigned char table[0];
117};
118struct edid_info edid_info; 109struct edid_info edid_info;
119EXPORT_SYMBOL_GPL(edid_info); 110EXPORT_SYMBOL_GPL(edid_info);
120struct ist_info ist_info; 111struct ist_info ist_info;
@@ -134,7 +125,7 @@ unsigned long saved_videomode;
134 125
135static char __initdata command_line[COMMAND_LINE_SIZE]; 126static char __initdata command_line[COMMAND_LINE_SIZE];
136 127
137unsigned char __initdata boot_params[PARAM_SIZE]; 128struct boot_params __initdata boot_params;
138 129
139#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 130#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
140struct edd edd; 131struct edd edd;
@@ -282,18 +273,18 @@ unsigned long __init find_max_low_pfn(void)
282 printk(KERN_WARNING "Warning only %ldMB will be used.\n", 273 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
283 MAXMEM>>20); 274 MAXMEM>>20);
284 if (max_pfn > MAX_NONPAE_PFN) 275 if (max_pfn > MAX_NONPAE_PFN)
285 printk(KERN_WARNING "Use a PAE enabled kernel.\n"); 276 printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
286 else 277 else
287 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); 278 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
288 max_pfn = MAXMEM_PFN; 279 max_pfn = MAXMEM_PFN;
289#else /* !CONFIG_HIGHMEM */ 280#else /* !CONFIG_HIGHMEM */
290#ifndef CONFIG_X86_PAE 281#ifndef CONFIG_HIGHMEM64G
291 if (max_pfn > MAX_NONPAE_PFN) { 282 if (max_pfn > MAX_NONPAE_PFN) {
292 max_pfn = MAX_NONPAE_PFN; 283 max_pfn = MAX_NONPAE_PFN;
293 printk(KERN_WARNING "Warning only 4GB will be used.\n"); 284 printk(KERN_WARNING "Warning only 4GB will be used.\n");
294 printk(KERN_WARNING "Use a PAE enabled kernel.\n"); 285 printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
295 } 286 }
296#endif /* !CONFIG_X86_PAE */ 287#endif /* !CONFIG_HIGHMEM64G */
297#endif /* !CONFIG_HIGHMEM */ 288#endif /* !CONFIG_HIGHMEM */
298 } else { 289 } else {
299 if (highmem_pages == -1) 290 if (highmem_pages == -1)
@@ -475,7 +466,7 @@ void __init setup_bootmem_allocator(void)
475 * 466 *
476 * This should all compile down to nothing when NUMA is off. 467 * This should all compile down to nothing when NUMA is off.
477 */ 468 */
478void __init remapped_pgdat_init(void) 469static void __init remapped_pgdat_init(void)
479{ 470{
480 int nid; 471 int nid;
481 472
@@ -528,7 +519,6 @@ void __init setup_arch(char **cmdline_p)
528#endif 519#endif
529 520
530 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); 521 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
531 drive_info = DRIVE_INFO;
532 screen_info = SCREEN_INFO; 522 screen_info = SCREEN_INFO;
533 edid_info = EDID_INFO; 523 edid_info = EDID_INFO;
534 apm_info.bios = APM_BIOS_INFO; 524 apm_info.bios = APM_BIOS_INFO;
@@ -611,6 +601,8 @@ void __init setup_arch(char **cmdline_p)
611 * NOTE: at this point the bootmem allocator is fully available. 601 * NOTE: at this point the bootmem allocator is fully available.
612 */ 602 */
613 603
604 paravirt_post_allocator_init();
605
614 dmi_scan_machine(); 606 dmi_scan_machine();
615 607
616#ifdef CONFIG_X86_GENERICARCH 608#ifdef CONFIG_X86_GENERICARCH
@@ -648,6 +640,7 @@ void __init setup_arch(char **cmdline_p)
648#endif 640#endif
649 641
650 e820_register_memory(); 642 e820_register_memory();
643 e820_mark_nosave_regions();
651 644
652#ifdef CONFIG_VT 645#ifdef CONFIG_VT
653#if defined(CONFIG_VGA_CONSOLE) 646#if defined(CONFIG_VGA_CONSOLE)
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 6299c080f6e2..2d35d8502029 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -22,6 +22,7 @@
22 22
23#include <asm/mtrr.h> 23#include <asm/mtrr.h>
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/mmu_context.h>
25#include <mach_apic.h> 26#include <mach_apic.h>
26 27
27/* 28/*
@@ -249,13 +250,13 @@ static unsigned long flush_va;
249static DEFINE_SPINLOCK(tlbstate_lock); 250static DEFINE_SPINLOCK(tlbstate_lock);
250 251
251/* 252/*
252 * We cannot call mmdrop() because we are in interrupt context, 253 * We cannot call mmdrop() because we are in interrupt context,
253 * instead update mm->cpu_vm_mask. 254 * instead update mm->cpu_vm_mask.
254 * 255 *
255 * We need to reload %cr3 since the page tables may be going 256 * We need to reload %cr3 since the page tables may be going
256 * away from under us.. 257 * away from under us..
257 */ 258 */
258static inline void leave_mm (unsigned long cpu) 259void leave_mm(unsigned long cpu)
259{ 260{
260 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 261 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
261 BUG(); 262 BUG();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 88baed1e7e83..5910d3fac561 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -148,7 +148,7 @@ void __init smp_alloc_memory(void)
148 * a given CPU 148 * a given CPU
149 */ 149 */
150 150
151static void __cpuinit smp_store_cpu_info(int id) 151void __cpuinit smp_store_cpu_info(int id)
152{ 152{
153 struct cpuinfo_x86 *c = cpu_data + id; 153 struct cpuinfo_x86 *c = cpu_data + id;
154 154
@@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu)
308/* representing cpus for which sibling maps can be computed */ 308/* representing cpus for which sibling maps can be computed */
309static cpumask_t cpu_sibling_setup_map; 309static cpumask_t cpu_sibling_setup_map;
310 310
311static inline void 311void set_cpu_sibling_map(int cpu)
312set_cpu_sibling_map(int cpu)
313{ 312{
314 int i; 313 int i;
315 struct cpuinfo_x86 *c = cpu_data; 314 struct cpuinfo_x86 *c = cpu_data;
@@ -941,17 +940,6 @@ exit:
941} 940}
942#endif 941#endif
943 942
944static void smp_tune_scheduling(void)
945{
946 if (cpu_khz) {
947 /* cache size in kB */
948 long cachesize = boot_cpu_data.x86_cache_size;
949
950 if (cachesize > 0)
951 max_cache_size = cachesize * 1024;
952 }
953}
954
955/* 943/*
956 * Cycle through the processors sending APIC IPIs to boot each. 944 * Cycle through the processors sending APIC IPIs to boot each.
957 */ 945 */
@@ -980,7 +968,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
980 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; 968 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
981 969
982 current_thread_info()->cpu = 0; 970 current_thread_info()->cpu = 0;
983 smp_tune_scheduling();
984 971
985 set_cpu_sibling_map(0); 972 set_cpu_sibling_map(0);
986 973
@@ -1156,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void)
1156} 1143}
1157 1144
1158#ifdef CONFIG_HOTPLUG_CPU 1145#ifdef CONFIG_HOTPLUG_CPU
1159static void 1146void remove_siblinginfo(int cpu)
1160remove_siblinginfo(int cpu)
1161{ 1147{
1162 int sibling; 1148 int sibling;
1163 struct cpuinfo_x86 *c = cpu_data; 1149 struct cpuinfo_x86 *c = cpu_data;
diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c
index 1868ae18eb4d..bbfe85a0f699 100644
--- a/arch/i386/kernel/smpcommon.c
+++ b/arch/i386/kernel/smpcommon.c
@@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
47EXPORT_SYMBOL(smp_call_function); 47EXPORT_SYMBOL(smp_call_function);
48 48
49/** 49/**
50 * smp_call_function_single - Run a function on another CPU 50 * smp_call_function_single - Run a function on a specific CPU
51 * @cpu: The target CPU. Cannot be the calling CPU. 51 * @cpu: The target CPU. Cannot be the calling CPU.
52 * @func: The function to run. This must be fast and non-blocking. 52 * @func: The function to run. This must be fast and non-blocking.
53 * @info: An arbitrary pointer to pass to the function. 53 * @info: An arbitrary pointer to pass to the function.
@@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
66 int ret; 66 int ret;
67 int me = get_cpu(); 67 int me = get_cpu();
68 if (cpu == me) { 68 if (cpu == me) {
69 WARN_ON(1); 69 local_irq_disable();
70 func(info);
71 local_irq_enable();
70 put_cpu(); 72 put_cpu();
71 return -EBUSY; 73 return 0;
72 } 74 }
73 75
74 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); 76 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index bf6adce52267..8344c70adf61 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -323,3 +323,4 @@ ENTRY(sys_call_table)
323 .long sys_signalfd 323 .long sys_signalfd
324 .long sys_timerfd 324 .long sys_timerfd
325 .long sys_eventfd 325 .long sys_eventfd
326 .long sys_fallocate
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index ff4ee6f3326b..6deb159d08e0 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -336,7 +336,9 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
336 336
337int in_gate_area(struct task_struct *task, unsigned long addr) 337int in_gate_area(struct task_struct *task, unsigned long addr)
338{ 338{
339 return 0; 339 const struct vm_area_struct *vma = get_gate_vma(task);
340
341 return vma && addr >= vma->vm_start && addr < vma->vm_end;
340} 342}
341 343
342int in_gate_area_no_task(unsigned long addr) 344int in_gate_area_no_task(unsigned long addr)
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a665df61f08c..19a6c678d02e 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -207,55 +207,9 @@ unsigned long read_persistent_clock(void)
207 return retval; 207 return retval;
208} 208}
209 209
210static void sync_cmos_clock(unsigned long dummy); 210int update_persistent_clock(struct timespec now)
211
212static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
213int no_sync_cmos_clock;
214
215static void sync_cmos_clock(unsigned long dummy)
216{
217 struct timeval now, next;
218 int fail = 1;
219
220 /*
221 * If we have an externally synchronized Linux clock, then update
222 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
223 * called as close as possible to 500 ms before the new second starts.
224 * This code is run on a timer. If the clock is set, that timer
225 * may not expire at the correct time. Thus, we adjust...
226 */
227 if (!ntp_synced())
228 /*
229 * Not synced, exit, do not restart a timer (if one is
230 * running, let it run out).
231 */
232 return;
233
234 do_gettimeofday(&now);
235 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
236 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
237 fail = set_rtc_mmss(now.tv_sec);
238
239 next.tv_usec = USEC_AFTER - now.tv_usec;
240 if (next.tv_usec <= 0)
241 next.tv_usec += USEC_PER_SEC;
242
243 if (!fail)
244 next.tv_sec = 659;
245 else
246 next.tv_sec = 0;
247
248 if (next.tv_usec >= USEC_PER_SEC) {
249 next.tv_sec++;
250 next.tv_usec -= USEC_PER_SEC;
251 }
252 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
253}
254
255void notify_arch_cmos_timer(void)
256{ 211{
257 if (!no_sync_cmos_clock) 212 return set_rtc_mmss(now.tv_sec);
258 mod_timer(&sync_cmos_timer, jiffies + 1);
259} 213}
260 214
261extern void (*late_time_init)(void); 215extern void (*late_time_init)(void);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 90da0575fcff..57772a18c394 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -41,6 +41,10 @@
41#include <linux/mca.h> 41#include <linux/mca.h>
42#endif 42#endif
43 43
44#if defined(CONFIG_EDAC)
45#include <linux/edac.h>
46#endif
47
44#include <asm/processor.h> 48#include <asm/processor.h>
45#include <asm/system.h> 49#include <asm/system.h>
46#include <asm/io.h> 50#include <asm/io.h>
@@ -148,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
148 if (!stack) { 152 if (!stack) {
149 unsigned long dummy; 153 unsigned long dummy;
150 stack = &dummy; 154 stack = &dummy;
151 if (task && task != current) 155 if (task != current)
152 stack = (unsigned long *)task->thread.esp; 156 stack = (unsigned long *)task->thread.esp;
153 } 157 }
154 158
@@ -207,6 +211,7 @@ static void print_trace_address(void *data, unsigned long addr)
207{ 211{
208 printk("%s [<%08lx>] ", (char *)data, addr); 212 printk("%s [<%08lx>] ", (char *)data, addr);
209 print_symbol("%s\n", addr); 213 print_symbol("%s\n", addr);
214 touch_nmi_watchdog();
210} 215}
211 216
212static struct stacktrace_ops print_trace_ops = { 217static struct stacktrace_ops print_trace_ops = {
@@ -390,7 +395,7 @@ void die(const char * str, struct pt_regs * regs, long err)
390 unsigned long esp; 395 unsigned long esp;
391 unsigned short ss; 396 unsigned short ss;
392 397
393 report_bug(regs->eip); 398 report_bug(regs->eip, regs);
394 399
395 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); 400 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
396#ifdef CONFIG_PREEMPT 401#ifdef CONFIG_PREEMPT
@@ -433,6 +438,7 @@ void die(const char * str, struct pt_regs * regs, long err)
433 438
434 bust_spinlocks(0); 439 bust_spinlocks(0);
435 die.lock_owner = -1; 440 die.lock_owner = -1;
441 add_taint(TAINT_DIE);
436 spin_unlock_irqrestore(&die.lock, flags); 442 spin_unlock_irqrestore(&die.lock, flags);
437 443
438 if (!regs) 444 if (!regs)
@@ -517,10 +523,12 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
517 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 523 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
518} 524}
519 525
520#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 526#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
521fastcall void do_##name(struct pt_regs * regs, long error_code) \ 527fastcall void do_##name(struct pt_regs * regs, long error_code) \
522{ \ 528{ \
523 siginfo_t info; \ 529 siginfo_t info; \
530 if (irq) \
531 local_irq_enable(); \
524 info.si_signo = signr; \ 532 info.si_signo = signr; \
525 info.si_errno = 0; \ 533 info.si_errno = 0; \
526 info.si_code = sicode; \ 534 info.si_code = sicode; \
@@ -560,13 +568,13 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
560#endif 568#endif
561DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) 569DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
562DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) 570DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
563DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) 571DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
564DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 572DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
565DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 573DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
566DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 574DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
567DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 575DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
568DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 576DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
569DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) 577DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
570 578
571fastcall void __kprobes do_general_protection(struct pt_regs * regs, 579fastcall void __kprobes do_general_protection(struct pt_regs * regs,
572 long error_code) 580 long error_code)
@@ -635,6 +643,14 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
635 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " 643 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
636 "CPU %d.\n", reason, smp_processor_id()); 644 "CPU %d.\n", reason, smp_processor_id());
637 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); 645 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
646
647#if defined(CONFIG_EDAC)
648 if(edac_handler_set()) {
649 edac_atomic_assert_error();
650 return;
651 }
652#endif
653
638 if (panic_on_unrecovered_nmi) 654 if (panic_on_unrecovered_nmi)
639 panic("NMI: Not continuing"); 655 panic("NMI: Not continuing");
640 656
@@ -1053,6 +1069,7 @@ asmlinkage void math_state_restore(void)
1053 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 1069 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1054 tsk->fpu_counter++; 1070 tsk->fpu_counter++;
1055} 1071}
1072EXPORT_SYMBOL_GPL(math_state_restore);
1056 1073
1057#ifndef CONFIG_MATH_EMULATION 1074#ifndef CONFIG_MATH_EMULATION
1058 1075
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index f64b81f3033b..debd7dbb4158 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -4,6 +4,7 @@
4 * See comments there for proper credits. 4 * See comments there for proper credits.
5 */ 5 */
6 6
7#include <linux/sched.h>
7#include <linux/clocksource.h> 8#include <linux/clocksource.h>
8#include <linux/workqueue.h> 9#include <linux/workqueue.h>
9#include <linux/cpufreq.h> 10#include <linux/cpufreq.h>
@@ -26,6 +27,7 @@ static int tsc_enabled;
26 * an extra value to store the TSC freq 27 * an extra value to store the TSC freq
27 */ 28 */
28unsigned int tsc_khz; 29unsigned int tsc_khz;
30EXPORT_SYMBOL_GPL(tsc_khz);
29 31
30int tsc_disable; 32int tsc_disable;
31 33
@@ -57,10 +59,11 @@ __setup("notsc", tsc_setup);
57 */ 59 */
58static int tsc_unstable; 60static int tsc_unstable;
59 61
60static inline int check_tsc_unstable(void) 62int check_tsc_unstable(void)
61{ 63{
62 return tsc_unstable; 64 return tsc_unstable;
63} 65}
66EXPORT_SYMBOL_GPL(check_tsc_unstable);
64 67
65/* Accellerators for sched_clock() 68/* Accellerators for sched_clock()
66 * convert from cycles(64bits) => nanoseconds (64bits) 69 * convert from cycles(64bits) => nanoseconds (64bits)
@@ -83,7 +86,7 @@ static inline int check_tsc_unstable(void)
83 * 86 *
84 * -johnstul@us.ibm.com "math is hard, lets go shopping!" 87 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
85 */ 88 */
86static unsigned long cyc2ns_scale __read_mostly; 89unsigned long cyc2ns_scale __read_mostly;
87 90
88#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 91#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
89 92
@@ -92,32 +95,44 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz)
92 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; 95 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
93} 96}
94 97
95static inline unsigned long long cycles_2_ns(unsigned long long cyc)
96{
97 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
98}
99
100/* 98/*
101 * Scheduler clock - returns current time in nanosec units. 99 * Scheduler clock - returns current time in nanosec units.
102 */ 100 */
103unsigned long long sched_clock(void) 101unsigned long long native_sched_clock(void)
104{ 102{
105 unsigned long long this_offset; 103 unsigned long long this_offset;
106 104
107 /* 105 /*
108 * Fall back to jiffies if there's no TSC available: 106 * Fall back to jiffies if there's no TSC available:
107 * ( But note that we still use it if the TSC is marked
108 * unstable. We do this because unlike Time Of Day,
109 * the scheduler clock tolerates small errors and it's
110 * very important for it to be as fast as the platform
111 * can achive it. )
109 */ 112 */
110 if (unlikely(!tsc_enabled)) 113 if (unlikely(!tsc_enabled && !tsc_unstable))
111 /* No locking but a rare wrong value is not a big deal: */ 114 /* No locking but a rare wrong value is not a big deal: */
112 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 115 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
113 116
114 /* read the Time Stamp Counter: */ 117 /* read the Time Stamp Counter: */
115 get_scheduled_cycles(this_offset); 118 rdtscll(this_offset);
116 119
117 /* return the value in ns */ 120 /* return the value in ns */
118 return cycles_2_ns(this_offset); 121 return cycles_2_ns(this_offset);
119} 122}
120 123
124/* We need to define a real function for sched_clock, to override the
125 weak default version */
126#ifdef CONFIG_PARAVIRT
127unsigned long long sched_clock(void)
128{
129 return paravirt_sched_clock();
130}
131#else
132unsigned long long sched_clock(void)
133 __attribute__((alias("native_sched_clock")));
134#endif
135
121unsigned long native_calculate_cpu_khz(void) 136unsigned long native_calculate_cpu_khz(void)
122{ 137{
123 unsigned long long start, end; 138 unsigned long long start, end;
@@ -277,6 +292,7 @@ static struct clocksource clocksource_tsc = {
277 292
278void mark_tsc_unstable(char *reason) 293void mark_tsc_unstable(char *reason)
279{ 294{
295 sched_clock_unstable_event();
280 if (!tsc_unstable) { 296 if (!tsc_unstable) {
281 tsc_unstable = 1; 297 tsc_unstable = 1;
282 tsc_enabled = 0; 298 tsc_enabled = 0;
diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S
deleted file mode 100644
index f1d1eacf4ab0..000000000000
--- a/arch/i386/kernel/verify_cpu.S
+++ /dev/null
@@ -1,94 +0,0 @@
1/* Check if CPU has some minimum CPUID bits
2 This runs in 16bit mode so that the caller can still use the BIOS
3 to output errors on the screen */
4#include <asm/cpufeature.h>
5#include <asm/msr.h>
6
7verify_cpu:
8 pushfl # Save caller passed flags
9 pushl $0 # Kill any dangerous flags
10 popfl
11
12#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4
13 pushfl
14 pop %eax
15 orl $(1<<18),%eax # try setting AC
16 push %eax
17 popfl
18 pushfl
19 popl %eax
20 testl $(1<<18),%eax
21 jz bad
22#endif
23#if REQUIRED_MASK1 != 0
24 pushfl # standard way to check for cpuid
25 popl %eax
26 movl %eax,%ebx
27 xorl $0x200000,%eax
28 pushl %eax
29 popfl
30 pushfl
31 popl %eax
32 cmpl %eax,%ebx
33 pushfl # standard way to check for cpuid
34 popl %eax
35 movl %eax,%ebx
36 xorl $0x200000,%eax
37 pushl %eax
38 popfl
39 pushfl
40 popl %eax
41 cmpl %eax,%ebx
42 jz bad # REQUIRED_MASK1 != 0 requires CPUID
43
44 movl $0x0,%eax # See if cpuid 1 is implemented
45 cpuid
46 cmpl $0x1,%eax
47 jb bad # no cpuid 1
48
49#if REQUIRED_MASK1 & NEED_CMPXCHG64
50 /* Some VIA C3s need magic MSRs to enable CX64. Do this here */
51 cmpl $0x746e6543,%ebx # Cent
52 jne 1f
53 cmpl $0x48727561,%edx # aurH
54 jne 1f
55 cmpl $0x736c7561,%ecx # auls
56 jne 1f
57 movl $1,%eax # check model
58 cpuid
59 movl %eax,%ebx
60 shr $8,%ebx
61 andl $0xf,%ebx
62 cmp $6,%ebx # check family == 6
63 jne 1f
64 shr $4,%eax
65 andl $0xf,%eax
66 cmpl $6,%eax # check model >= 6
67 jb 1f
68 # assume models >= 6 all support this MSR
69 movl $MSR_VIA_FCR,%ecx
70 rdmsr
71 orl $((1<<1)|(1<<7)),%eax # enable CMPXCHG64 and PGE
72 wrmsr
731:
74#endif
75 movl $0x1,%eax # Does the cpu have what it takes
76 cpuid
77
78#if CONFIG_X86_MINIMUM_CPU_MODEL > 4
79#error add proper model checking here
80#endif
81
82 andl $REQUIRED_MASK1,%edx
83 xorl $REQUIRED_MASK1,%edx
84 jnz bad
85#endif /* REQUIRED_MASK1 */
86
87 popfl
88 xor %eax,%eax
89 ret
90
91bad:
92 popfl
93 movl $1,%eax
94 ret
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index c12720d7cbc5..72042bb7ec94 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
362} 362}
363#endif 363#endif
364 364
365static void vmi_allocate_pt(u32 pfn) 365static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
366{ 366{
367 vmi_set_page_type(pfn, VMI_PAGE_L1); 367 vmi_set_page_type(pfn, VMI_PAGE_L1);
368 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 368 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -891,7 +891,7 @@ static inline int __init activate_vmi(void)
891 paravirt_ops.setup_boot_clock = vmi_time_bsp_init; 891 paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
892 paravirt_ops.setup_secondary_clock = vmi_time_ap_init; 892 paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
893#endif 893#endif
894 paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; 894 paravirt_ops.sched_clock = vmi_sched_clock;
895 paravirt_ops.get_cpu_khz = vmi_cpu_khz; 895 paravirt_ops.get_cpu_khz = vmi_cpu_khz;
896 896
897 /* We have true wallclock functions; disable CMOS clock sync */ 897 /* We have true wallclock functions; disable CMOS clock sync */
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
index 26a37f8a8762..b1b5ab08b26e 100644
--- a/arch/i386/kernel/vmiclock.c
+++ b/arch/i386/kernel/vmiclock.c
@@ -32,6 +32,7 @@
32#include <asm/apicdef.h> 32#include <asm/apicdef.h>
33#include <asm/apic.h> 33#include <asm/apic.h>
34#include <asm/timer.h> 34#include <asm/timer.h>
35#include <asm/i8253.h>
35 36
36#include <irq_vectors.h> 37#include <irq_vectors.h>
37#include "io_ports.h" 38#include "io_ports.h"
@@ -64,10 +65,10 @@ int vmi_set_wallclock(unsigned long now)
64 return 0; 65 return 0;
65} 66}
66 67
67/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ 68/* paravirt_ops.sched_clock = vmi_sched_clock */
68unsigned long long vmi_get_sched_cycles(void) 69unsigned long long vmi_sched_clock(void)
69{ 70{
70 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); 71 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
71} 72}
72 73
73/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ 74/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
@@ -142,6 +143,7 @@ static void vmi_timer_set_mode(enum clock_event_mode mode,
142 143
143 switch (mode) { 144 switch (mode) {
144 case CLOCK_EVT_MODE_ONESHOT: 145 case CLOCK_EVT_MODE_ONESHOT:
146 case CLOCK_EVT_MODE_RESUME:
145 break; 147 break;
146 case CLOCK_EVT_MODE_PERIODIC: 148 case CLOCK_EVT_MODE_PERIODIC:
147 cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); 149 cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index aa87b06c7c82..7d72cce00529 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -60,7 +60,9 @@ SECTIONS
60 __stop___ex_table = .; 60 __stop___ex_table = .;
61 } 61 }
62 62
63 BUG_TABLE 63 NOTES :text :note
64
65 BUG_TABLE :text
64 66
65 . = ALIGN(4); 67 . = ALIGN(4);
66 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { 68 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
@@ -88,6 +90,7 @@ SECTIONS
88 90
89 . = ALIGN(4096); 91 . = ALIGN(4096);
90 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 92 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
93 *(.data.page_aligned)
91 *(.data.idt) 94 *(.data.idt)
92 } 95 }
93 96
@@ -180,6 +183,7 @@ SECTIONS
180 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 183 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
181 __per_cpu_start = .; 184 __per_cpu_start = .;
182 *(.data.percpu) 185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
183 __per_cpu_end = .; 187 __per_cpu_end = .;
184 } 188 }
185 . = ALIGN(4096); 189 . = ALIGN(4096);
@@ -206,6 +210,4 @@ SECTIONS
206 STABS_DEBUG 210 STABS_DEBUG
207 211
208 DWARF_DEBUG 212 DWARF_DEBUG
209
210 NOTES
211} 213}
diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S
index d4b5be4f3d5f..07c0daf78237 100644
--- a/arch/i386/kernel/vsyscall-note.S
+++ b/arch/i386/kernel/vsyscall-note.S
@@ -3,23 +3,43 @@
3 * Here we can supply some information useful to userland. 3 * Here we can supply some information useful to userland.
4 */ 4 */
5 5
6#include <linux/uts.h>
7#include <linux/version.h> 6#include <linux/version.h>
7#include <linux/elfnote.h>
8 8
9#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ 9/* Ideally this would use UTS_NAME, but using a quoted string here
10 .section name, flags; \ 10 doesn't work. Remember to change this when changing the
11 .balign 4; \ 11 kernel's name. */
12 .long 1f - 0f; /* name length */ \ 12ELFNOTE_START(Linux, 0, "a")
13 .long 3f - 2f; /* data length */ \ 13 .long LINUX_VERSION_CODE
14 .long type; /* note type */ \ 14ELFNOTE_END
150: .asciz vendor; /* vendor name */ \
161: .balign 4; \
172:
18 15
19#define ASM_ELF_NOTE_END \ 16#ifdef CONFIG_XEN
203: .balign 4; /* pad out section */ \ 17/*
21 .previous 18 * Add a special note telling glibc's dynamic linker a fake hardware
19 * flavor that it will use to choose the search path for libraries in the
20 * same way it uses real hardware capabilities like "mmx".
21 * We supply "nosegneg" as the fake capability, to indicate that we
22 * do not like negative offsets in instructions using segment overrides,
23 * since we implement those inefficiently. This makes it possible to
24 * install libraries optimized to avoid those access patterns in someplace
25 * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
26 * corresponding to the bits here is needed to make ldconfig work right.
27 * It should contain:
28 * hwcap 1 nosegneg
29 * to match the mapping of bit to name that we give here.
30 *
31 * At runtime, the fake hardware feature will be considered to be present
32 * if its bit is set in the mask word. So, we start with the mask 0, and
33 * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
34 */
22 35
23 ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) 36#include "../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
24 .long LINUX_VERSION_CODE 37
25 ASM_ELF_NOTE_END 38 .globl VDSO_NOTE_MASK
39ELFNOTE_START(GNU, 2, "a")
40 .long 1 /* ncaps */
41VDSO_NOTE_MASK:
42 .long 0 /* mask */
43 .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
44ELFNOTE_END
45#endif
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
index 22d8ac5815f0..4d105fdfe817 100644
--- a/arch/i386/lib/Makefile
+++ b/arch/i386/lib/Makefile
@@ -4,7 +4,7 @@
4 4
5 5
6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \ 6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
7 bitops.o semaphore.o 7 bitops.o semaphore.o string.o
8 8
9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o 9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
10 10
diff --git a/arch/i386/lib/string.c b/arch/i386/lib/string.c
new file mode 100644
index 000000000000..2c773fefa3dd
--- /dev/null
+++ b/arch/i386/lib/string.c
@@ -0,0 +1,257 @@
1/*
2 * Most of the string-functions are rather heavily hand-optimized,
3 * see especially strsep,strstr,str[c]spn. They should work, but are not
4 * very easy to understand. Everything is done entirely within the register
5 * set, making the functions fast and clean. String instructions have been
6 * used through-out, making for "slightly" unclear code :-)
7 *
8 * AK: On P4 and K7 using non string instruction implementations might be faster
9 * for large memory blocks. But most of them are unlikely to be used on large
10 * strings.
11 */
12
13#include <linux/string.h>
14#include <linux/module.h>
15
16#ifdef __HAVE_ARCH_STRCPY
17char *strcpy(char * dest,const char *src)
18{
19 int d0, d1, d2;
20 asm volatile( "1:\tlodsb\n\t"
21 "stosb\n\t"
22 "testb %%al,%%al\n\t"
23 "jne 1b"
24 : "=&S" (d0), "=&D" (d1), "=&a" (d2)
25 :"0" (src),"1" (dest) : "memory");
26 return dest;
27}
28EXPORT_SYMBOL(strcpy);
29#endif
30
31#ifdef __HAVE_ARCH_STRNCPY
32char *strncpy(char * dest,const char *src,size_t count)
33{
34 int d0, d1, d2, d3;
35 asm volatile( "1:\tdecl %2\n\t"
36 "js 2f\n\t"
37 "lodsb\n\t"
38 "stosb\n\t"
39 "testb %%al,%%al\n\t"
40 "jne 1b\n\t"
41 "rep\n\t"
42 "stosb\n"
43 "2:"
44 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
45 :"0" (src),"1" (dest),"2" (count) : "memory");
46 return dest;
47}
48EXPORT_SYMBOL(strncpy);
49#endif
50
51#ifdef __HAVE_ARCH_STRCAT
52char *strcat(char * dest,const char * src)
53{
54 int d0, d1, d2, d3;
55 asm volatile( "repne\n\t"
56 "scasb\n\t"
57 "decl %1\n"
58 "1:\tlodsb\n\t"
59 "stosb\n\t"
60 "testb %%al,%%al\n\t"
61 "jne 1b"
62 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
63 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
64 return dest;
65}
66EXPORT_SYMBOL(strcat);
67#endif
68
69#ifdef __HAVE_ARCH_STRNCAT
70char *strncat(char * dest,const char * src,size_t count)
71{
72 int d0, d1, d2, d3;
73 asm volatile( "repne\n\t"
74 "scasb\n\t"
75 "decl %1\n\t"
76 "movl %8,%3\n"
77 "1:\tdecl %3\n\t"
78 "js 2f\n\t"
79 "lodsb\n\t"
80 "stosb\n\t"
81 "testb %%al,%%al\n\t"
82 "jne 1b\n"
83 "2:\txorl %2,%2\n\t"
84 "stosb"
85 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
86 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
87 : "memory");
88 return dest;
89}
90EXPORT_SYMBOL(strncat);
91#endif
92
93#ifdef __HAVE_ARCH_STRCMP
94int strcmp(const char * cs,const char * ct)
95{
96 int d0, d1;
97 int res;
98 asm volatile( "1:\tlodsb\n\t"
99 "scasb\n\t"
100 "jne 2f\n\t"
101 "testb %%al,%%al\n\t"
102 "jne 1b\n\t"
103 "xorl %%eax,%%eax\n\t"
104 "jmp 3f\n"
105 "2:\tsbbl %%eax,%%eax\n\t"
106 "orb $1,%%al\n"
107 "3:"
108 :"=a" (res), "=&S" (d0), "=&D" (d1)
109 :"1" (cs),"2" (ct)
110 :"memory");
111 return res;
112}
113EXPORT_SYMBOL(strcmp);
114#endif
115
116#ifdef __HAVE_ARCH_STRNCMP
117int strncmp(const char * cs,const char * ct,size_t count)
118{
119 int res;
120 int d0, d1, d2;
121 asm volatile( "1:\tdecl %3\n\t"
122 "js 2f\n\t"
123 "lodsb\n\t"
124 "scasb\n\t"
125 "jne 3f\n\t"
126 "testb %%al,%%al\n\t"
127 "jne 1b\n"
128 "2:\txorl %%eax,%%eax\n\t"
129 "jmp 4f\n"
130 "3:\tsbbl %%eax,%%eax\n\t"
131 "orb $1,%%al\n"
132 "4:"
133 :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
134 :"1" (cs),"2" (ct),"3" (count)
135 :"memory");
136 return res;
137}
138EXPORT_SYMBOL(strncmp);
139#endif
140
141#ifdef __HAVE_ARCH_STRCHR
142char *strchr(const char * s, int c)
143{
144 int d0;
145 char * res;
146 asm volatile( "movb %%al,%%ah\n"
147 "1:\tlodsb\n\t"
148 "cmpb %%ah,%%al\n\t"
149 "je 2f\n\t"
150 "testb %%al,%%al\n\t"
151 "jne 1b\n\t"
152 "movl $1,%1\n"
153 "2:\tmovl %1,%0\n\t"
154 "decl %0"
155 :"=a" (res), "=&S" (d0)
156 :"1" (s),"0" (c)
157 :"memory");
158 return res;
159}
160EXPORT_SYMBOL(strchr);
161#endif
162
163#ifdef __HAVE_ARCH_STRRCHR
164char *strrchr(const char * s, int c)
165{
166 int d0, d1;
167 char * res;
168 asm volatile( "movb %%al,%%ah\n"
169 "1:\tlodsb\n\t"
170 "cmpb %%ah,%%al\n\t"
171 "jne 2f\n\t"
172 "leal -1(%%esi),%0\n"
173 "2:\ttestb %%al,%%al\n\t"
174 "jne 1b"
175 :"=g" (res), "=&S" (d0), "=&a" (d1)
176 :"0" (0),"1" (s),"2" (c)
177 :"memory");
178 return res;
179}
180EXPORT_SYMBOL(strrchr);
181#endif
182
183#ifdef __HAVE_ARCH_STRLEN
184size_t strlen(const char * s)
185{
186 int d0;
187 int res;
188 asm volatile( "repne\n\t"
189 "scasb\n\t"
190 "notl %0\n\t"
191 "decl %0"
192 :"=c" (res), "=&D" (d0)
193 :"1" (s),"a" (0), "0" (0xffffffffu)
194 :"memory");
195 return res;
196}
197EXPORT_SYMBOL(strlen);
198#endif
199
200#ifdef __HAVE_ARCH_MEMCHR
201void *memchr(const void *cs,int c,size_t count)
202{
203 int d0;
204 void *res;
205 if (!count)
206 return NULL;
207 asm volatile( "repne\n\t"
208 "scasb\n\t"
209 "je 1f\n\t"
210 "movl $1,%0\n"
211 "1:\tdecl %0"
212 :"=D" (res), "=&c" (d0)
213 :"a" (c),"0" (cs),"1" (count)
214 :"memory");
215 return res;
216}
217EXPORT_SYMBOL(memchr);
218#endif
219
220#ifdef __HAVE_ARCH_MEMSCAN
221void *memscan(void * addr, int c, size_t size)
222{
223 if (!size)
224 return addr;
225 asm volatile("repnz; scasb\n\t"
226 "jnz 1f\n\t"
227 "dec %%edi\n"
228 "1:"
229 : "=D" (addr), "=c" (size)
230 : "0" (addr), "1" (size), "a" (c)
231 : "memory");
232 return addr;
233}
234EXPORT_SYMBOL(memscan);
235#endif
236
237#ifdef __HAVE_ARCH_STRNLEN
238size_t strnlen(const char *s, size_t count)
239{
240 int d0;
241 int res;
242 asm volatile( "movl %2,%0\n\t"
243 "jmp 2f\n"
244 "1:\tcmpb $0,(%0)\n\t"
245 "je 3f\n\t"
246 "incl %0\n"
247 "2:\tdecl %1\n\t"
248 "cmpl $-1,%1\n\t"
249 "jne 1b\n"
250 "3:\tsubl %2,%0"
251 :"=a" (res), "=&d" (d0)
252 :"c" (s),"1" (count)
253 :"memory");
254 return res;
255}
256EXPORT_SYMBOL(strnlen);
257#endif
diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c
index 9be6ceabf042..ab99072d3f9a 100644
--- a/arch/i386/mach-es7000/es7000plat.c
+++ b/arch/i386/mach-es7000/es7000plat.c
@@ -40,6 +40,7 @@
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/apicdef.h> 41#include <asm/apicdef.h>
42#include "es7000.h" 42#include "es7000.h"
43#include <mach_mpparse.h>
43 44
44/* 45/*
45 * ES7000 Globals 46 * ES7000 Globals
@@ -174,6 +175,53 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
174} 175}
175#endif 176#endif
176 177
178/*
179 * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic
180 * arch already has got following function definitions (asm-generic/es7000.c)
181 * hence no need to define these for that case.
182 */
183#ifndef CONFIG_X86_GENERICARCH
184void es7000_sw_apic(void);
185void __init enable_apic_mode(void)
186{
187 es7000_sw_apic();
188 return;
189}
190
191__init int mps_oem_check(struct mp_config_table *mpc, char *oem,
192 char *productid)
193{
194 if (mpc->mpc_oemptr) {
195 struct mp_config_oemtable *oem_table =
196 (struct mp_config_oemtable *)mpc->mpc_oemptr;
197 if (!strncmp(oem, "UNISYS", 6))
198 return parse_unisys_oem((char *)oem_table);
199 }
200 return 0;
201}
202#ifdef CONFIG_ACPI
203/* Hook from generic ACPI tables.c */
204int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
205{
206 unsigned long oem_addr;
207 if (!find_unisys_acpi_oem_table(&oem_addr)) {
208 if (es7000_check_dsdt())
209 return parse_unisys_oem((char *)oem_addr);
210 else {
211 setup_unisys();
212 return 1;
213 }
214 }
215 return 0;
216}
217#else
218int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
219{
220 return 0;
221}
222#endif
223#endif /* COFIG_X86_GENERICARCH */
224
177static void 225static void
178es7000_spin(int n) 226es7000_spin(int n)
179{ 227{
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c
index b47f951c0ec2..4742626f08c4 100644
--- a/arch/i386/mach-generic/es7000.c
+++ b/arch/i386/mach-generic/es7000.c
@@ -66,4 +66,4 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
66} 66}
67#endif 67#endif
68 68
69struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000); 69struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/i386/mach-visws/traps.c b/arch/i386/mach-visws/traps.c
index 5199bd03254a..843b67acf43b 100644
--- a/arch/i386/mach-visws/traps.c
+++ b/arch/i386/mach-visws/traps.c
@@ -23,13 +23,13 @@ static __init void lithium_init(void)
23 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); 23 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
24 24
25 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || 25 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
26 (li_pcia_read16(PCI_DEVICE_ID) != PCI_VENDOR_ID_SGI_LITHIUM)) { 26 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
27 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); 27 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
28 panic("This machine is not SGI Visual Workstation 320/540"); 28 panic("This machine is not SGI Visual Workstation 320/540");
29 } 29 }
30 30
31 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || 31 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
32 (li_pcib_read16(PCI_DEVICE_ID) != PCI_VENDOR_ID_SGI_LITHIUM)) { 32 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
33 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); 33 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
34 panic("This machine is not SGI Visual Workstation 320/540"); 34 panic("This machine is not SGI Visual Workstation 320/540");
35 } 35 }
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
index b4b24e0e45e1..f9d595338159 100644
--- a/arch/i386/mach-voyager/voyager_thread.c
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -52,7 +52,7 @@ execute(const char *string)
52 NULL, 52 NULL,
53 }; 53 };
54 54
55 if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) { 55 if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
56 printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", 56 printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
57 string, ret); 57 string, ret);
58 } 58 }
diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c
index ddf8fa3bbd01..1853524c8b57 100644
--- a/arch/i386/math-emu/fpu_entry.c
+++ b/arch/i386/math-emu/fpu_entry.c
@@ -754,7 +754,7 @@ int save_i387_soft(void *s387, struct _fpstate __user * buf)
754 return -1; 754 return -1;
755 if ( offset ) 755 if ( offset )
756 if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset)) 756 if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
757 return -1 757 return -1;
758 RE_ENTRANT_CHECK_ON; 758 RE_ENTRANT_CHECK_ON;
759 759
760 return 1; 760 return 1;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 29d7d61543a1..e92a10124935 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -303,6 +303,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
303 struct vm_area_struct * vma; 303 struct vm_area_struct * vma;
304 unsigned long address; 304 unsigned long address;
305 int write, si_code; 305 int write, si_code;
306 int fault;
306 307
307 /* get the address */ 308 /* get the address */
308 address = read_cr2(); 309 address = read_cr2();
@@ -422,20 +423,18 @@ good_area:
422 * make sure we exit gracefully rather than endlessly redo 423 * make sure we exit gracefully rather than endlessly redo
423 * the fault. 424 * the fault.
424 */ 425 */
425 switch (handle_mm_fault(mm, vma, address, write)) { 426 fault = handle_mm_fault(mm, vma, address, write);
426 case VM_FAULT_MINOR: 427 if (unlikely(fault & VM_FAULT_ERROR)) {
427 tsk->min_flt++; 428 if (fault & VM_FAULT_OOM)
428 break;
429 case VM_FAULT_MAJOR:
430 tsk->maj_flt++;
431 break;
432 case VM_FAULT_SIGBUS:
433 goto do_sigbus;
434 case VM_FAULT_OOM:
435 goto out_of_memory; 429 goto out_of_memory;
436 default: 430 else if (fault & VM_FAULT_SIGBUS)
437 BUG(); 431 goto do_sigbus;
432 BUG();
438 } 433 }
434 if (fault & VM_FAULT_MAJOR)
435 tsk->maj_flt++;
436 else
437 tsk->min_flt++;
439 438
440 /* 439 /*
441 * Did it hit the DOS screen memory VA from vm86 mode? 440 * Did it hit the DOS screen memory VA from vm86 mode?
@@ -458,6 +457,11 @@ bad_area:
458bad_area_nosemaphore: 457bad_area_nosemaphore:
459 /* User mode accesses just cause a SIGSEGV */ 458 /* User mode accesses just cause a SIGSEGV */
460 if (error_code & 4) { 459 if (error_code & 4) {
460 /*
461 * It's possible to have interrupts off here.
462 */
463 local_irq_enable();
464
461 /* 465 /*
462 * Valid to do another page fault here because this one came 466 * Valid to do another page fault here because this one came
463 * from user space. 467 * from user space.
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index b22ce8d6b1ba..e1a9a805c445 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -87,7 +87,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
87 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 87 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
88 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 88 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
89 89
90 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); 90 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
91 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 91 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
92 BUG_ON(page_table != pte_offset_kernel(pmd, 0)); 92 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
93 } 93 }
@@ -471,8 +471,13 @@ void zap_low_mappings (void)
471 flush_tlb_all(); 471 flush_tlb_all();
472} 472}
473 473
474int nx_enabled = 0;
475
476#ifdef CONFIG_X86_PAE
477
474static int disable_nx __initdata = 0; 478static int disable_nx __initdata = 0;
475u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; 479u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
480EXPORT_SYMBOL_GPL(__supported_pte_mask);
476 481
477/* 482/*
478 * noexec = on|off 483 * noexec = on|off
@@ -499,9 +504,6 @@ static int __init noexec_setup(char *str)
499} 504}
500early_param("noexec", noexec_setup); 505early_param("noexec", noexec_setup);
501 506
502int nx_enabled = 0;
503#ifdef CONFIG_X86_PAE
504
505static void __init set_nx(void) 507static void __init set_nx(void)
506{ 508{
507 unsigned int v[4], l, h; 509 unsigned int v[4], l, h;
@@ -751,8 +753,7 @@ void __init pgtable_cache_init(void)
751 PTRS_PER_PMD*sizeof(pmd_t), 753 PTRS_PER_PMD*sizeof(pmd_t),
752 PTRS_PER_PMD*sizeof(pmd_t), 754 PTRS_PER_PMD*sizeof(pmd_t),
753 SLAB_PANIC, 755 SLAB_PANIC,
754 pmd_ctor, 756 pmd_ctor);
755 NULL);
756 if (!SHARED_KERNEL_PMD) { 757 if (!SHARED_KERNEL_PMD) {
757 /* If we're in PAE mode and have a non-shared 758 /* If we're in PAE mode and have a non-shared
758 kernel pmd, then the pgd size must be a 759 kernel pmd, then the pgd size must be a
@@ -799,6 +800,7 @@ void mark_rodata_ro(void)
799 unsigned long start = PFN_ALIGN(_text); 800 unsigned long start = PFN_ALIGN(_text);
800 unsigned long size = PFN_ALIGN(_etext) - start; 801 unsigned long size = PFN_ALIGN(_etext) - start;
801 802
803#ifndef CONFIG_KPROBES
802#ifdef CONFIG_HOTPLUG_CPU 804#ifdef CONFIG_HOTPLUG_CPU
803 /* It must still be possible to apply SMP alternatives. */ 805 /* It must still be possible to apply SMP alternatives. */
804 if (num_possible_cpus() <= 1) 806 if (num_possible_cpus() <= 1)
@@ -808,7 +810,7 @@ void mark_rodata_ro(void)
808 size >> PAGE_SHIFT, PAGE_KERNEL_RX); 810 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
809 printk("Write protecting the kernel text: %luk\n", size >> 10); 811 printk("Write protecting the kernel text: %luk\n", size >> 10);
810 } 812 }
811 813#endif
812 start += size; 814 start += size;
813 size = (unsigned long)__end_rodata - start; 815 size = (unsigned long)__end_rodata - start;
814 change_page_attr(virt_to_page(start), 816 change_page_attr(virt_to_page(start),
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index fff08ae7b5ed..0b278315d737 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -196,7 +196,7 @@ void iounmap(volatile void __iomem *addr)
196 /* Reset the direct mapping. Can block */ 196 /* Reset the direct mapping. Can block */
197 if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { 197 if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
198 change_page_attr(virt_to_page(__va(p->phys_addr)), 198 change_page_attr(virt_to_page(__va(p->phys_addr)),
199 p->size >> PAGE_SHIFT, 199 get_vm_area_size(p) >> PAGE_SHIFT,
200 PAGE_KERNEL); 200 PAGE_KERNEL);
201 global_flush_tlb(); 201 global_flush_tlb();
202 } 202 }
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 47bd477c8ecc..8927222b3ab2 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -60,7 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
60 address = __pa(address); 60 address = __pa(address);
61 addr = address & LARGE_PAGE_MASK; 61 addr = address & LARGE_PAGE_MASK;
62 pbase = (pte_t *)page_address(base); 62 pbase = (pte_t *)page_address(base);
63 paravirt_alloc_pt(page_to_pfn(base)); 63 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
64 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 64 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
65 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, 65 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
66 addr == address ? prot : ref_prot)); 66 addr == address ? prot : ref_prot));
@@ -68,14 +68,23 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
68 return base; 68 return base;
69} 69}
70 70
71static void flush_kernel_map(void *arg) 71static void cache_flush_page(struct page *p)
72{ 72{
73 unsigned long adr = (unsigned long)arg; 73 unsigned long adr = (unsigned long)page_address(p);
74 int i;
75 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
76 asm volatile("clflush (%0)" :: "r" (adr + i));
77}
78
79static void flush_kernel_map(void *arg)
80{
81 struct list_head *lh = (struct list_head *)arg;
82 struct page *p;
74 83
75 if (adr && cpu_has_clflush) { 84 /* High level code is not ready for clflush yet */
76 int i; 85 if (cpu_has_clflush) {
77 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) 86 list_for_each_entry (p, lh, lru)
78 asm volatile("clflush (%0)" :: "r" (adr + i)); 87 cache_flush_page(p);
79 } else if (boot_cpu_data.x86_model >= 4) 88 } else if (boot_cpu_data.x86_model >= 4)
80 wbinvd(); 89 wbinvd();
81 90
@@ -127,6 +136,12 @@ static inline void revert_page(struct page *kpte_page, unsigned long address)
127 ref_prot)); 136 ref_prot));
128} 137}
129 138
139static inline void save_page(struct page *kpte_page)
140{
141 if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
142 list_add(&kpte_page->lru, &df_list);
143}
144
130static int 145static int
131__change_page_attr(struct page *page, pgprot_t prot) 146__change_page_attr(struct page *page, pgprot_t prot)
132{ 147{
@@ -141,6 +156,9 @@ __change_page_attr(struct page *page, pgprot_t prot)
141 if (!kpte) 156 if (!kpte)
142 return -EINVAL; 157 return -EINVAL;
143 kpte_page = virt_to_page(kpte); 158 kpte_page = virt_to_page(kpte);
159 BUG_ON(PageLRU(kpte_page));
160 BUG_ON(PageCompound(kpte_page));
161
144 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 162 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
145 if (!pte_huge(*kpte)) { 163 if (!pte_huge(*kpte)) {
146 set_pte_atomic(kpte, mk_pte(page, prot)); 164 set_pte_atomic(kpte, mk_pte(page, prot));
@@ -170,20 +188,20 @@ __change_page_attr(struct page *page, pgprot_t prot)
170 * time (not via split_large_page) and in turn we must not 188 * time (not via split_large_page) and in turn we must not
171 * replace it with a largepage. 189 * replace it with a largepage.
172 */ 190 */
191
192 save_page(kpte_page);
173 if (!PageReserved(kpte_page)) { 193 if (!PageReserved(kpte_page)) {
174 if (cpu_has_pse && (page_private(kpte_page) == 0)) { 194 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
175 ClearPagePrivate(kpte_page);
176 paravirt_release_pt(page_to_pfn(kpte_page)); 195 paravirt_release_pt(page_to_pfn(kpte_page));
177 list_add(&kpte_page->lru, &df_list);
178 revert_page(kpte_page, address); 196 revert_page(kpte_page, address);
179 } 197 }
180 } 198 }
181 return 0; 199 return 0;
182} 200}
183 201
184static inline void flush_map(void *adr) 202static inline void flush_map(struct list_head *l)
185{ 203{
186 on_each_cpu(flush_kernel_map, adr, 1, 1); 204 on_each_cpu(flush_kernel_map, l, 1, 1);
187} 205}
188 206
189/* 207/*
@@ -225,11 +243,13 @@ void global_flush_tlb(void)
225 spin_lock_irq(&cpa_lock); 243 spin_lock_irq(&cpa_lock);
226 list_replace_init(&df_list, &l); 244 list_replace_init(&df_list, &l);
227 spin_unlock_irq(&cpa_lock); 245 spin_unlock_irq(&cpa_lock);
228 if (!cpu_has_clflush) 246 flush_map(&l);
229 flush_map(NULL);
230 list_for_each_entry_safe(pg, next, &l, lru) { 247 list_for_each_entry_safe(pg, next, &l, lru) {
231 if (cpu_has_clflush) 248 list_del(&pg->lru);
232 flush_map(page_address(pg)); 249 clear_bit(PG_arch_1, &pg->flags);
250 if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
251 continue;
252 ClearPagePrivate(pg);
233 __free_page(pg); 253 __free_page(pg);
234 } 254 }
235} 255}
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 8d7c0864cc04..01437c46baae 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -235,7 +235,7 @@ static inline void pgd_list_del(pgd_t *pgd)
235 235
236#if (PTRS_PER_PMD == 1) 236#if (PTRS_PER_PMD == 1)
237/* Non-PAE pgd constructor */ 237/* Non-PAE pgd constructor */
238void pgd_ctor(void *pgd) 238static void pgd_ctor(void *pgd)
239{ 239{
240 unsigned long flags; 240 unsigned long flags;
241 241
@@ -257,7 +257,7 @@ void pgd_ctor(void *pgd)
257} 257}
258#else /* PTRS_PER_PMD > 1 */ 258#else /* PTRS_PER_PMD > 1 */
259/* PAE pgd constructor */ 259/* PAE pgd constructor */
260void pgd_ctor(void *pgd) 260static void pgd_ctor(void *pgd)
261{ 261{
262 /* PAE, kernel PMD may be shared */ 262 /* PAE, kernel PMD may be shared */
263 263
@@ -276,7 +276,7 @@ void pgd_ctor(void *pgd)
276} 276}
277#endif /* PTRS_PER_PMD */ 277#endif /* PTRS_PER_PMD */
278 278
279void pgd_dtor(void *pgd) 279static void pgd_dtor(void *pgd)
280{ 280{
281 unsigned long flags; /* can be called from interrupt context */ 281 unsigned long flags; /* can be called from interrupt context */
282 282
diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
index b33aea845f58..bc8a44bddaa7 100644
--- a/arch/i386/pci/acpi.c
+++ b/arch/i386/pci/acpi.c
@@ -8,20 +8,42 @@
8struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) 8struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
9{ 9{
10 struct pci_bus *bus; 10 struct pci_bus *bus;
11 struct pci_sysdata *sd;
12 int pxm;
13
14 /* Allocate per-root-bus (not per bus) arch-specific data.
15 * TODO: leak; this memory is never freed.
16 * It's arguable whether it's worth the trouble to care.
17 */
18 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
19 if (!sd) {
20 printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
21 return NULL;
22 }
11 23
12 if (domain != 0) { 24 if (domain != 0) {
13 printk(KERN_WARNING "PCI: Multiple domains not supported\n"); 25 printk(KERN_WARNING "PCI: Multiple domains not supported\n");
26 kfree(sd);
14 return NULL; 27 return NULL;
15 } 28 }
16 29
17 bus = pcibios_scan_root(busnum); 30 sd->node = -1;
31
32 pxm = acpi_get_pxm(device->handle);
33#ifdef CONFIG_ACPI_NUMA
34 if (pxm >= 0)
35 sd->node = pxm_to_node(pxm);
36#endif
37
38 bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
39 if (!bus)
40 kfree(sd);
41
18#ifdef CONFIG_ACPI_NUMA 42#ifdef CONFIG_ACPI_NUMA
19 if (bus != NULL) { 43 if (bus != NULL) {
20 int pxm = acpi_get_pxm(device->handle);
21 if (pxm >= 0) { 44 if (pxm >= 0) {
22 bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm); 45 printk("bus %d -> pxm %d -> node %d\n",
23 printk("bus %d -> pxm %d -> node %ld\n", 46 busnum, pxm, sd->node);
24 busnum, pxm, (long)(bus->sysdata));
25 } 47 }
26 } 48 }
27#endif 49#endif
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 3f78d4d8ecf3..85503deeda46 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -293,6 +293,7 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
293struct pci_bus * __devinit pcibios_scan_root(int busnum) 293struct pci_bus * __devinit pcibios_scan_root(int busnum)
294{ 294{
295 struct pci_bus *bus = NULL; 295 struct pci_bus *bus = NULL;
296 struct pci_sysdata *sd;
296 297
297 dmi_check_system(pciprobe_dmi_table); 298 dmi_check_system(pciprobe_dmi_table);
298 299
@@ -303,9 +304,19 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
303 } 304 }
304 } 305 }
305 306
307 /* Allocate per-root-bus (not per bus) arch-specific data.
308 * TODO: leak; this memory is never freed.
309 * It's arguable whether it's worth the trouble to care.
310 */
311 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
312 if (!sd) {
313 printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
314 return NULL;
315 }
316
306 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); 317 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
307 318
308 return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL); 319 return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
309} 320}
310 321
311extern u8 pci_cache_line_size; 322extern u8 pci_cache_line_size;
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
index b95b42950ed4..e7306dbf6c42 100644
--- a/arch/i386/pci/fixup.c
+++ b/arch/i386/pci/fixup.c
@@ -118,12 +118,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci
118static void pci_fixup_via_northbridge_bug(struct pci_dev *d) 118static void pci_fixup_via_northbridge_bug(struct pci_dev *d)
119{ 119{
120 u8 v; 120 u8 v;
121 u8 revision;
122 int where = 0x55; 121 int where = 0x55;
123 int mask = 0x1f; /* clear bits 5, 6, 7 by default */ 122 int mask = 0x1f; /* clear bits 5, 6, 7 by default */
124 123
125 pci_read_config_byte(d, PCI_REVISION_ID, &revision);
126
127 if (d->device == PCI_DEVICE_ID_VIA_8367_0) { 124 if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
128 /* fix pci bus latency issues resulted by NB bios error 125 /* fix pci bus latency issues resulted by NB bios error
129 it appears on bug free^Wreduced kt266x's bios forces 126 it appears on bug free^Wreduced kt266x's bios forces
@@ -133,8 +130,8 @@ static void pci_fixup_via_northbridge_bug(struct pci_dev *d)
133 where = 0x95; /* the memory write queue timer register is 130 where = 0x95; /* the memory write queue timer register is
134 different for the KT266x's: 0x95 not 0x55 */ 131 different for the KT266x's: 0x95 not 0x55 */
135 } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 && 132 } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
136 (revision == VIA_8363_KL133_REVISION_ID || 133 (d->revision == VIA_8363_KL133_REVISION_ID ||
137 revision == VIA_8363_KM133_REVISION_ID)) { 134 d->revision == VIA_8363_KM133_REVISION_ID)) {
138 mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5 135 mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
139 causes screen corruption on the KL133/KM133 */ 136 causes screen corruption on the KL133/KM133 */
140 } 137 }
@@ -142,7 +139,7 @@ static void pci_fixup_via_northbridge_bug(struct pci_dev *d)
142 pci_read_config_byte(d, where, &v); 139 pci_read_config_byte(d, where, &v);
143 if (v & ~mask) { 140 if (v & ~mask) {
144 printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \ 141 printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
145 d->device, revision, where, v, mask, v & mask); 142 d->device, d->revision, where, v, mask, v & mask);
146 v &= mask; 143 v &= mask;
147 pci_write_config_byte(d, where, v); 144 pci_write_config_byte(d, where, v);
148 } 145 }
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c
index c7cabeed4d7b..4df637e34f81 100644
--- a/arch/i386/pci/mmconfig-shared.c
+++ b/arch/i386/pci/mmconfig-shared.c
@@ -24,6 +24,9 @@
24 24
25DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); 25DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
26 26
27/* Indicate if the mmcfg resources have been placed into the resource table. */
28static int __initdata pci_mmcfg_resources_inserted;
29
27/* K8 systems have some devices (typically in the builtin northbridge) 30/* K8 systems have some devices (typically in the builtin northbridge)
28 that are only accessible using type1 31 that are only accessible using type1
29 Normally this can be expressed in the MCFG by not listing them 32 Normally this can be expressed in the MCFG by not listing them
@@ -170,7 +173,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
170 return name != NULL; 173 return name != NULL;
171} 174}
172 175
173static void __init pci_mmcfg_insert_resources(void) 176static void __init pci_mmcfg_insert_resources(unsigned long resource_flags)
174{ 177{
175#define PCI_MMCFG_RESOURCE_NAME_LEN 19 178#define PCI_MMCFG_RESOURCE_NAME_LEN 19
176 int i; 179 int i;
@@ -194,10 +197,13 @@ static void __init pci_mmcfg_insert_resources(void)
194 cfg->pci_segment); 197 cfg->pci_segment);
195 res->start = cfg->address; 198 res->start = cfg->address;
196 res->end = res->start + (num_buses << 20) - 1; 199 res->end = res->start + (num_buses << 20) - 1;
197 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 200 res->flags = IORESOURCE_MEM | resource_flags;
198 insert_resource(&iomem_resource, res); 201 insert_resource(&iomem_resource, res);
199 names += PCI_MMCFG_RESOURCE_NAME_LEN; 202 names += PCI_MMCFG_RESOURCE_NAME_LEN;
200 } 203 }
204
205 /* Mark that the resources have been inserted. */
206 pci_mmcfg_resources_inserted = 1;
201} 207}
202 208
203static void __init pci_mmcfg_reject_broken(int type) 209static void __init pci_mmcfg_reject_broken(int type)
@@ -267,7 +273,43 @@ void __init pci_mmcfg_init(int type)
267 if (type == 1) 273 if (type == 1)
268 unreachable_devices(); 274 unreachable_devices();
269 if (known_bridge) 275 if (known_bridge)
270 pci_mmcfg_insert_resources(); 276 pci_mmcfg_insert_resources(IORESOURCE_BUSY);
271 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 277 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
278 } else {
279 /*
280 * Signal not to attempt to insert mmcfg resources because
281 * the architecture mmcfg setup could not initialize.
282 */
283 pci_mmcfg_resources_inserted = 1;
272 } 284 }
273} 285}
286
287static int __init pci_mmcfg_late_insert_resources(void)
288{
289 /*
290 * If resources are already inserted or we are not using MMCONFIG,
291 * don't insert the resources.
292 */
293 if ((pci_mmcfg_resources_inserted == 1) ||
294 (pci_probe & PCI_PROBE_MMCONF) == 0 ||
295 (pci_mmcfg_config_num == 0) ||
296 (pci_mmcfg_config == NULL) ||
297 (pci_mmcfg_config[0].address == 0))
298 return 1;
299
300 /*
301 * Attempt to insert the mmcfg resources but not with the busy flag
302 * marked so it won't cause request errors when __request_region is
303 * called.
304 */
305 pci_mmcfg_insert_resources(0);
306
307 return 0;
308}
309
310/*
311 * Perform MMCONFIG resource insertion after PCI initialization to allow for
312 * misprogrammed MCFG tables that state larger sizes but actually conflict
313 * with other system resources.
314 */
315late_initcall(pci_mmcfg_late_insert_resources);
diff --git a/arch/i386/video/Makefile b/arch/i386/video/Makefile
new file mode 100644
index 000000000000..2c447c94adcc
--- /dev/null
+++ b/arch/i386/video/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_FB) += fbdev.o
diff --git a/arch/i386/video/fbdev.c b/arch/i386/video/fbdev.c
new file mode 100644
index 000000000000..48fb38d7d2c0
--- /dev/null
+++ b/arch/i386/video/fbdev.c
@@ -0,0 +1,32 @@
1/*
2 * arch/i386/video/fbdev.c - i386 Framebuffer
3 *
4 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com>
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file COPYING in the main directory of this archive
8 * for more details.
9 *
10 */
11#include <linux/fb.h>
12#include <linux/pci.h>
13
14int fb_is_primary_device(struct fb_info *info)
15{
16 struct device *device = info->device;
17 struct pci_dev *pci_dev = NULL;
18 struct resource *res = NULL;
19 int retval = 0;
20
21 if (device)
22 pci_dev = to_pci_dev(device);
23
24 if (pci_dev)
25 res = &pci_dev->resource[PCI_ROM_RESOURCE];
26
27 if (res && res->flags & IORESOURCE_ROM_SHADOW)
28 retval = 1;
29
30 return retval;
31}
32EXPORT_SYMBOL(fb_is_primary_device);
diff --git a/arch/i386/xen/Kconfig b/arch/i386/xen/Kconfig
new file mode 100644
index 000000000000..9df99e1885a4
--- /dev/null
+++ b/arch/i386/xen/Kconfig
@@ -0,0 +1,11 @@
1#
2# This Kconfig describes xen options
3#
4
5config XEN
6 bool "Enable support for Xen hypervisor"
7 depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
8 help
9 This is the Linux Xen port. Enabling this will allow the
10 kernel to boot in a paravirtualized environment under the
11 Xen hypervisor.
diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile
new file mode 100644
index 000000000000..343df246bd3e
--- /dev/null
+++ b/arch/i386/xen/Makefile
@@ -0,0 +1,4 @@
1obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \
2 events.o time.o manage.o xen-asm.o
3
4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
new file mode 100644
index 000000000000..9a8c1181c001
--- /dev/null
+++ b/arch/i386/xen/enlighten.c
@@ -0,0 +1,1144 @@
1/*
2 * Core of Xen paravirt_ops implementation.
3 *
4 * This file contains the xen_paravirt_ops structure itself, and the
5 * implementations for:
6 * - privileged instructions
7 * - interrupt flags
8 * - segment operations
9 * - booting and setup
10 *
11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
12 */
13
14#include <linux/kernel.h>
15#include <linux/init.h>
16#include <linux/smp.h>
17#include <linux/preempt.h>
18#include <linux/hardirq.h>
19#include <linux/percpu.h>
20#include <linux/delay.h>
21#include <linux/start_kernel.h>
22#include <linux/sched.h>
23#include <linux/bootmem.h>
24#include <linux/module.h>
25#include <linux/mm.h>
26#include <linux/page-flags.h>
27#include <linux/highmem.h>
28#include <linux/smp.h>
29
30#include <xen/interface/xen.h>
31#include <xen/interface/physdev.h>
32#include <xen/interface/vcpu.h>
33#include <xen/interface/sched.h>
34#include <xen/features.h>
35#include <xen/page.h>
36
37#include <asm/paravirt.h>
38#include <asm/page.h>
39#include <asm/xen/hypercall.h>
40#include <asm/xen/hypervisor.h>
41#include <asm/fixmap.h>
42#include <asm/processor.h>
43#include <asm/setup.h>
44#include <asm/desc.h>
45#include <asm/pgtable.h>
46#include <asm/tlbflush.h>
47#include <asm/reboot.h>
48
49#include "xen-ops.h"
50#include "mmu.h"
51#include "multicalls.h"
52
53EXPORT_SYMBOL_GPL(hypercall_page);
54
55DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
56
57DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
58DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
59DEFINE_PER_CPU(unsigned long, xen_cr3);
60
61struct start_info *xen_start_info;
62EXPORT_SYMBOL_GPL(xen_start_info);
63
64static /* __initdata */ struct shared_info dummy_shared_info;
65
66/*
67 * Point at some empty memory to start with. We map the real shared_info
68 * page as soon as fixmap is up and running.
69 */
70struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
71
72/*
73 * Flag to determine whether vcpu info placement is available on all
74 * VCPUs. We assume it is to start with, and then set it to zero on
75 * the first failure. This is because it can succeed on some VCPUs
76 * and not others, since it can involve hypervisor memory allocation,
77 * or because the guest failed to guarantee all the appropriate
78 * constraints on all VCPUs (ie buffer can't cross a page boundary).
79 *
80 * Note that any particular CPU may be using a placed vcpu structure,
81 * but we can only optimise if the all are.
82 *
83 * 0: not available, 1: available
84 */
85static int have_vcpu_info_placement = 1;
86
87static void __init xen_vcpu_setup(int cpu)
88{
89 struct vcpu_register_vcpu_info info;
90 int err;
91 struct vcpu_info *vcpup;
92
93 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
94
95 if (!have_vcpu_info_placement)
96 return; /* already tested, not available */
97
98 vcpup = &per_cpu(xen_vcpu_info, cpu);
99
100 info.mfn = virt_to_mfn(vcpup);
101 info.offset = offset_in_page(vcpup);
102
103 printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
104 cpu, vcpup, info.mfn, info.offset);
105
106 /* Check to see if the hypervisor will put the vcpu_info
107 structure where we want it, which allows direct access via
108 a percpu-variable. */
109 err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
110
111 if (err) {
112 printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
113 have_vcpu_info_placement = 0;
114 } else {
115 /* This cpu is using the registered vcpu info, even if
116 later ones fail to. */
117 per_cpu(xen_vcpu, cpu) = vcpup;
118
119 printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
120 cpu, vcpup);
121 }
122}
123
124static void __init xen_banner(void)
125{
126 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
127 paravirt_ops.name);
128 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
129}
130
131static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
132 unsigned int *ecx, unsigned int *edx)
133{
134 unsigned maskedx = ~0;
135
136 /*
137 * Mask out inconvenient features, to try and disable as many
138 * unsupported kernel subsystems as possible.
139 */
140 if (*eax == 1)
141 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
142 (1 << X86_FEATURE_ACPI) | /* disable ACPI */
143 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
144
145 asm(XEN_EMULATE_PREFIX "cpuid"
146 : "=a" (*eax),
147 "=b" (*ebx),
148 "=c" (*ecx),
149 "=d" (*edx)
150 : "0" (*eax), "2" (*ecx));
151 *edx &= maskedx;
152}
153
154static void xen_set_debugreg(int reg, unsigned long val)
155{
156 HYPERVISOR_set_debugreg(reg, val);
157}
158
159static unsigned long xen_get_debugreg(int reg)
160{
161 return HYPERVISOR_get_debugreg(reg);
162}
163
164static unsigned long xen_save_fl(void)
165{
166 struct vcpu_info *vcpu;
167 unsigned long flags;
168
169 vcpu = x86_read_percpu(xen_vcpu);
170
171 /* flag has opposite sense of mask */
172 flags = !vcpu->evtchn_upcall_mask;
173
174 /* convert to IF type flag
175 -0 -> 0x00000000
176 -1 -> 0xffffffff
177 */
178 return (-flags) & X86_EFLAGS_IF;
179}
180
181static void xen_restore_fl(unsigned long flags)
182{
183 struct vcpu_info *vcpu;
184
185 /* convert from IF type flag */
186 flags = !(flags & X86_EFLAGS_IF);
187
188 /* There's a one instruction preempt window here. We need to
189 make sure we're don't switch CPUs between getting the vcpu
190 pointer and updating the mask. */
191 preempt_disable();
192 vcpu = x86_read_percpu(xen_vcpu);
193 vcpu->evtchn_upcall_mask = flags;
194 preempt_enable_no_resched();
195
196 /* Doesn't matter if we get preempted here, because any
197 pending event will get dealt with anyway. */
198
199 if (flags == 0) {
200 preempt_check_resched();
201 barrier(); /* unmask then check (avoid races) */
202 if (unlikely(vcpu->evtchn_upcall_pending))
203 force_evtchn_callback();
204 }
205}
206
207static void xen_irq_disable(void)
208{
209 /* There's a one instruction preempt window here. We need to
210 make sure we're don't switch CPUs between getting the vcpu
211 pointer and updating the mask. */
212 preempt_disable();
213 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
214 preempt_enable_no_resched();
215}
216
217static void xen_irq_enable(void)
218{
219 struct vcpu_info *vcpu;
220
221 /* There's a one instruction preempt window here. We need to
222 make sure we're don't switch CPUs between getting the vcpu
223 pointer and updating the mask. */
224 preempt_disable();
225 vcpu = x86_read_percpu(xen_vcpu);
226 vcpu->evtchn_upcall_mask = 0;
227 preempt_enable_no_resched();
228
229 /* Doesn't matter if we get preempted here, because any
230 pending event will get dealt with anyway. */
231
232 barrier(); /* unmask then check (avoid races) */
233 if (unlikely(vcpu->evtchn_upcall_pending))
234 force_evtchn_callback();
235}
236
237static void xen_safe_halt(void)
238{
239 /* Blocking includes an implicit local_irq_enable(). */
240 if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
241 BUG();
242}
243
244static void xen_halt(void)
245{
246 if (irqs_disabled())
247 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
248 else
249 xen_safe_halt();
250}
251
252static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
253{
254 BUG_ON(preemptible());
255
256 switch (mode) {
257 case PARAVIRT_LAZY_NONE:
258 BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
259 break;
260
261 case PARAVIRT_LAZY_MMU:
262 case PARAVIRT_LAZY_CPU:
263 BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
264 break;
265
266 case PARAVIRT_LAZY_FLUSH:
267 /* flush if necessary, but don't change state */
268 if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
269 xen_mc_flush();
270 return;
271 }
272
273 xen_mc_flush();
274 x86_write_percpu(xen_lazy_mode, mode);
275}
276
277static unsigned long xen_store_tr(void)
278{
279 return 0;
280}
281
282static void xen_set_ldt(const void *addr, unsigned entries)
283{
284 unsigned long linear_addr = (unsigned long)addr;
285 struct mmuext_op *op;
286 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
287
288 op = mcs.args;
289 op->cmd = MMUEXT_SET_LDT;
290 if (linear_addr) {
291 /* ldt my be vmalloced, use arbitrary_virt_to_machine */
292 xmaddr_t maddr;
293 maddr = arbitrary_virt_to_machine((unsigned long)addr);
294 linear_addr = (unsigned long)maddr.maddr;
295 }
296 op->arg1.linear_addr = linear_addr;
297 op->arg2.nr_ents = entries;
298
299 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
300
301 xen_mc_issue(PARAVIRT_LAZY_CPU);
302}
303
304static void xen_load_gdt(const struct Xgt_desc_struct *dtr)
305{
306 unsigned long *frames;
307 unsigned long va = dtr->address;
308 unsigned int size = dtr->size + 1;
309 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
310 int f;
311 struct multicall_space mcs;
312
313 /* A GDT can be up to 64k in size, which corresponds to 8192
314 8-byte entries, or 16 4k pages.. */
315
316 BUG_ON(size > 65536);
317 BUG_ON(va & ~PAGE_MASK);
318
319 mcs = xen_mc_entry(sizeof(*frames) * pages);
320 frames = mcs.args;
321
322 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
323 frames[f] = virt_to_mfn(va);
324 make_lowmem_page_readonly((void *)va);
325 }
326
327 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
328
329 xen_mc_issue(PARAVIRT_LAZY_CPU);
330}
331
332static void load_TLS_descriptor(struct thread_struct *t,
333 unsigned int cpu, unsigned int i)
334{
335 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
336 xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
337 struct multicall_space mc = __xen_mc_entry(0);
338
339 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
340}
341
342static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
343{
344 xen_mc_batch();
345
346 load_TLS_descriptor(t, cpu, 0);
347 load_TLS_descriptor(t, cpu, 1);
348 load_TLS_descriptor(t, cpu, 2);
349
350 xen_mc_issue(PARAVIRT_LAZY_CPU);
351
352 /*
353 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
354 * it means we're in a context switch, and %gs has just been
355 * saved. This means we can zero it out to prevent faults on
356 * exit from the hypervisor if the next process has no %gs.
357 * Either way, it has been saved, and the new value will get
358 * loaded properly. This will go away as soon as Xen has been
359 * modified to not save/restore %gs for normal hypercalls.
360 */
361 if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
362 loadsegment(gs, 0);
363}
364
365static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
366 u32 low, u32 high)
367{
368 unsigned long lp = (unsigned long)&dt[entrynum];
369 xmaddr_t mach_lp = virt_to_machine(lp);
370 u64 entry = (u64)high << 32 | low;
371
372 preempt_disable();
373
374 xen_mc_flush();
375 if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
376 BUG();
377
378 preempt_enable();
379}
380
381static int cvt_gate_to_trap(int vector, u32 low, u32 high,
382 struct trap_info *info)
383{
384 u8 type, dpl;
385
386 type = (high >> 8) & 0x1f;
387 dpl = (high >> 13) & 3;
388
389 if (type != 0xf && type != 0xe)
390 return 0;
391
392 info->vector = vector;
393 info->address = (high & 0xffff0000) | (low & 0x0000ffff);
394 info->cs = low >> 16;
395 info->flags = dpl;
396 /* interrupt gates clear IF */
397 if (type == 0xe)
398 info->flags |= 4;
399
400 return 1;
401}
402
403/* Locations of each CPU's IDT */
404static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
405
406/* Set an IDT entry. If the entry is part of the current IDT, then
407 also update Xen. */
408static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
409 u32 low, u32 high)
410{
411 unsigned long p = (unsigned long)&dt[entrynum];
412 unsigned long start, end;
413
414 preempt_disable();
415
416 start = __get_cpu_var(idt_desc).address;
417 end = start + __get_cpu_var(idt_desc).size + 1;
418
419 xen_mc_flush();
420
421 write_dt_entry(dt, entrynum, low, high);
422
423 if (p >= start && (p + 8) <= end) {
424 struct trap_info info[2];
425
426 info[1].address = 0;
427
428 if (cvt_gate_to_trap(entrynum, low, high, &info[0]))
429 if (HYPERVISOR_set_trap_table(info))
430 BUG();
431 }
432
433 preempt_enable();
434}
435
436static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
437 struct trap_info *traps)
438{
439 unsigned in, out, count;
440
441 count = (desc->size+1) / 8;
442 BUG_ON(count > 256);
443
444 for (in = out = 0; in < count; in++) {
445 const u32 *entry = (u32 *)(desc->address + in * 8);
446
447 if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
448 out++;
449 }
450 traps[out].address = 0;
451}
452
453void xen_copy_trap_info(struct trap_info *traps)
454{
455 const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
456
457 xen_convert_trap_info(desc, traps);
458}
459
460/* Load a new IDT into Xen. In principle this can be per-CPU, so we
461 hold a spinlock to protect the static traps[] array (static because
462 it avoids allocation, and saves stack space). */
463static void xen_load_idt(const struct Xgt_desc_struct *desc)
464{
465 static DEFINE_SPINLOCK(lock);
466 static struct trap_info traps[257];
467
468 spin_lock(&lock);
469
470 __get_cpu_var(idt_desc) = *desc;
471
472 xen_convert_trap_info(desc, traps);
473
474 xen_mc_flush();
475 if (HYPERVISOR_set_trap_table(traps))
476 BUG();
477
478 spin_unlock(&lock);
479}
480
481/* Write a GDT descriptor entry. Ignore LDT descriptors, since
482 they're handled differently. */
483static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
484 u32 low, u32 high)
485{
486 preempt_disable();
487
488 switch ((high >> 8) & 0xff) {
489 case DESCTYPE_LDT:
490 case DESCTYPE_TSS:
491 /* ignore */
492 break;
493
494 default: {
495 xmaddr_t maddr = virt_to_machine(&dt[entry]);
496 u64 desc = (u64)high << 32 | low;
497
498 xen_mc_flush();
499 if (HYPERVISOR_update_descriptor(maddr.maddr, desc))
500 BUG();
501 }
502
503 }
504
505 preempt_enable();
506}
507
508static void xen_load_esp0(struct tss_struct *tss,
509 struct thread_struct *thread)
510{
511 struct multicall_space mcs = xen_mc_entry(0);
512 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
513 xen_mc_issue(PARAVIRT_LAZY_CPU);
514}
515
516static void xen_set_iopl_mask(unsigned mask)
517{
518 struct physdev_set_iopl set_iopl;
519
520 /* Force the change at ring 0. */
521 set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
522 HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
523}
524
525static void xen_io_delay(void)
526{
527}
528
529#ifdef CONFIG_X86_LOCAL_APIC
530static unsigned long xen_apic_read(unsigned long reg)
531{
532 return 0;
533}
534
535static void xen_apic_write(unsigned long reg, unsigned long val)
536{
537 /* Warn to see if there's any stray references */
538 WARN_ON(1);
539}
540#endif
541
542static void xen_flush_tlb(void)
543{
544 struct mmuext_op *op;
545 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
546
547 op = mcs.args;
548 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
549 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
550
551 xen_mc_issue(PARAVIRT_LAZY_MMU);
552}
553
554static void xen_flush_tlb_single(unsigned long addr)
555{
556 struct mmuext_op *op;
557 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
558
559 op = mcs.args;
560 op->cmd = MMUEXT_INVLPG_LOCAL;
561 op->arg1.linear_addr = addr & PAGE_MASK;
562 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
563
564 xen_mc_issue(PARAVIRT_LAZY_MMU);
565}
566
567static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
568 unsigned long va)
569{
570 struct {
571 struct mmuext_op op;
572 cpumask_t mask;
573 } *args;
574 cpumask_t cpumask = *cpus;
575 struct multicall_space mcs;
576
577 /*
578 * A couple of (to be removed) sanity checks:
579 *
580 * - current CPU must not be in mask
581 * - mask must exist :)
582 */
583 BUG_ON(cpus_empty(cpumask));
584 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
585 BUG_ON(!mm);
586
587 /* If a CPU which we ran on has gone down, OK. */
588 cpus_and(cpumask, cpumask, cpu_online_map);
589 if (cpus_empty(cpumask))
590 return;
591
592 mcs = xen_mc_entry(sizeof(*args));
593 args = mcs.args;
594 args->mask = cpumask;
595 args->op.arg2.vcpumask = &args->mask;
596
597 if (va == TLB_FLUSH_ALL) {
598 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
599 } else {
600 args->op.cmd = MMUEXT_INVLPG_MULTI;
601 args->op.arg1.linear_addr = va;
602 }
603
604 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
605
606 xen_mc_issue(PARAVIRT_LAZY_MMU);
607}
608
609static void xen_write_cr2(unsigned long cr2)
610{
611 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
612}
613
614static unsigned long xen_read_cr2(void)
615{
616 return x86_read_percpu(xen_vcpu)->arch.cr2;
617}
618
619static unsigned long xen_read_cr2_direct(void)
620{
621 return x86_read_percpu(xen_vcpu_info.arch.cr2);
622}
623
624static void xen_write_cr4(unsigned long cr4)
625{
626 /* never allow TSC to be disabled */
627 native_write_cr4(cr4 & ~X86_CR4_TSD);
628}
629
630static unsigned long xen_read_cr3(void)
631{
632 return x86_read_percpu(xen_cr3);
633}
634
635static void xen_write_cr3(unsigned long cr3)
636{
637 BUG_ON(preemptible());
638
639 if (cr3 == x86_read_percpu(xen_cr3)) {
640 /* just a simple tlb flush */
641 xen_flush_tlb();
642 return;
643 }
644
645 x86_write_percpu(xen_cr3, cr3);
646
647
648 {
649 struct mmuext_op *op;
650 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
651 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
652
653 op = mcs.args;
654 op->cmd = MMUEXT_NEW_BASEPTR;
655 op->arg1.mfn = mfn;
656
657 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
658
659 xen_mc_issue(PARAVIRT_LAZY_CPU);
660 }
661}
662
663/* Early in boot, while setting up the initial pagetable, assume
664 everything is pinned. */
665static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
666{
667 BUG_ON(mem_map); /* should only be used early */
668 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
669}
670
671/* This needs to make sure the new pte page is pinned iff its being
672 attached to a pinned pagetable. */
673static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
674{
675 struct page *page = pfn_to_page(pfn);
676
677 if (PagePinned(virt_to_page(mm->pgd))) {
678 SetPagePinned(page);
679
680 if (!PageHighMem(page))
681 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
682 else
683 /* make sure there are no stray mappings of
684 this page */
685 kmap_flush_unused();
686 }
687}
688
689/* This should never happen until we're OK to use struct page */
690static void xen_release_pt(u32 pfn)
691{
692 struct page *page = pfn_to_page(pfn);
693
694 if (PagePinned(page)) {
695 if (!PageHighMem(page))
696 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
697 }
698}
699
700#ifdef CONFIG_HIGHPTE
701static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
702{
703 pgprot_t prot = PAGE_KERNEL;
704
705 if (PagePinned(page))
706 prot = PAGE_KERNEL_RO;
707
708 if (0 && PageHighMem(page))
709 printk("mapping highpte %lx type %d prot %s\n",
710 page_to_pfn(page), type,
711 (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
712
713 return kmap_atomic_prot(page, type, prot);
714}
715#endif
716
717static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
718{
719 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
720 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
721 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
722 pte_val_ma(pte));
723
724 return pte;
725}
726
727/* Init-time set_pte while constructing initial pagetables, which
728 doesn't allow RO pagetable pages to be remapped RW */
729static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
730{
731 pte = mask_rw_pte(ptep, pte);
732
733 xen_set_pte(ptep, pte);
734}
735
736static __init void xen_pagetable_setup_start(pgd_t *base)
737{
738 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
739
740 /* special set_pte for pagetable initialization */
741 paravirt_ops.set_pte = xen_set_pte_init;
742
743 init_mm.pgd = base;
744 /*
745 * copy top-level of Xen-supplied pagetable into place. For
746 * !PAE we can use this as-is, but for PAE it is a stand-in
747 * while we copy the pmd pages.
748 */
749 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
750
751 if (PTRS_PER_PMD > 1) {
752 int i;
753 /*
754 * For PAE, need to allocate new pmds, rather than
755 * share Xen's, since Xen doesn't like pmd's being
756 * shared between address spaces.
757 */
758 for (i = 0; i < PTRS_PER_PGD; i++) {
759 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
760 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
761
762 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
763 PAGE_SIZE);
764
765 make_lowmem_page_readonly(pmd);
766
767 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
768 } else
769 pgd_clear(&base[i]);
770 }
771 }
772
773 /* make sure zero_page is mapped RO so we can use it in pagetables */
774 make_lowmem_page_readonly(empty_zero_page);
775 make_lowmem_page_readonly(base);
776 /*
777 * Switch to new pagetable. This is done before
778 * pagetable_init has done anything so that the new pages
779 * added to the table can be prepared properly for Xen.
780 */
781 xen_write_cr3(__pa(base));
782}
783
784static __init void xen_pagetable_setup_done(pgd_t *base)
785{
786 /* This will work as long as patching hasn't happened yet
787 (which it hasn't) */
788 paravirt_ops.alloc_pt = xen_alloc_pt;
789 paravirt_ops.set_pte = xen_set_pte;
790
791 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
792 /*
793 * Create a mapping for the shared info page.
794 * Should be set_fixmap(), but shared_info is a machine
795 * address with no corresponding pseudo-phys address.
796 */
797 set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
798 PFN_DOWN(xen_start_info->shared_info),
799 PAGE_KERNEL);
800
801 HYPERVISOR_shared_info =
802 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
803
804 } else
805 HYPERVISOR_shared_info =
806 (struct shared_info *)__va(xen_start_info->shared_info);
807
808 /* Actually pin the pagetable down, but we can't set PG_pinned
809 yet because the page structures don't exist yet. */
810 {
811 struct mmuext_op op;
812#ifdef CONFIG_X86_PAE
813 op.cmd = MMUEXT_PIN_L3_TABLE;
814#else
815 op.cmd = MMUEXT_PIN_L3_TABLE;
816#endif
817 op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
818 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
819 BUG();
820 }
821}
822
823/* This is called once we have the cpu_possible_map */
824void __init xen_setup_vcpu_info_placement(void)
825{
826 int cpu;
827
828 for_each_possible_cpu(cpu)
829 xen_vcpu_setup(cpu);
830
831 /* xen_vcpu_setup managed to place the vcpu_info within the
832 percpu area for all cpus, so make use of it */
833 if (have_vcpu_info_placement) {
834 printk(KERN_INFO "Xen: using vcpu_info placement\n");
835
836 paravirt_ops.save_fl = xen_save_fl_direct;
837 paravirt_ops.restore_fl = xen_restore_fl_direct;
838 paravirt_ops.irq_disable = xen_irq_disable_direct;
839 paravirt_ops.irq_enable = xen_irq_enable_direct;
840 paravirt_ops.read_cr2 = xen_read_cr2_direct;
841 paravirt_ops.iret = xen_iret_direct;
842 }
843}
844
845static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
846{
847 char *start, *end, *reloc;
848 unsigned ret;
849
850 start = end = reloc = NULL;
851
852#define SITE(x) \
853 case PARAVIRT_PATCH(x): \
854 if (have_vcpu_info_placement) { \
855 start = (char *)xen_##x##_direct; \
856 end = xen_##x##_direct_end; \
857 reloc = xen_##x##_direct_reloc; \
858 } \
859 goto patch_site
860
861 switch (type) {
862 SITE(irq_enable);
863 SITE(irq_disable);
864 SITE(save_fl);
865 SITE(restore_fl);
866#undef SITE
867
868 patch_site:
869 if (start == NULL || (end-start) > len)
870 goto default_patch;
871
872 ret = paravirt_patch_insns(insns, len, start, end);
873
874 /* Note: because reloc is assigned from something that
875 appears to be an array, gcc assumes it's non-null,
876 but doesn't know its relationship with start and
877 end. */
878 if (reloc > start && reloc < end) {
879 int reloc_off = reloc - start;
880 long *relocp = (long *)(insns + reloc_off);
881 long delta = start - (char *)insns;
882
883 *relocp += delta;
884 }
885 break;
886
887 default_patch:
888 default:
889 ret = paravirt_patch_default(type, clobbers, insns, len);
890 break;
891 }
892
893 return ret;
894}
895
896static const struct paravirt_ops xen_paravirt_ops __initdata = {
897 .paravirt_enabled = 1,
898 .shared_kernel_pmd = 0,
899
900 .name = "Xen",
901 .banner = xen_banner,
902
903 .patch = xen_patch,
904
905 .memory_setup = xen_memory_setup,
906 .arch_setup = xen_arch_setup,
907 .init_IRQ = xen_init_IRQ,
908 .post_allocator_init = xen_mark_init_mm_pinned,
909
910 .time_init = xen_time_init,
911 .set_wallclock = xen_set_wallclock,
912 .get_wallclock = xen_get_wallclock,
913 .get_cpu_khz = xen_cpu_khz,
914 .sched_clock = xen_sched_clock,
915
916 .cpuid = xen_cpuid,
917
918 .set_debugreg = xen_set_debugreg,
919 .get_debugreg = xen_get_debugreg,
920
921 .clts = native_clts,
922
923 .read_cr0 = native_read_cr0,
924 .write_cr0 = native_write_cr0,
925
926 .read_cr2 = xen_read_cr2,
927 .write_cr2 = xen_write_cr2,
928
929 .read_cr3 = xen_read_cr3,
930 .write_cr3 = xen_write_cr3,
931
932 .read_cr4 = native_read_cr4,
933 .read_cr4_safe = native_read_cr4_safe,
934 .write_cr4 = xen_write_cr4,
935
936 .save_fl = xen_save_fl,
937 .restore_fl = xen_restore_fl,
938 .irq_disable = xen_irq_disable,
939 .irq_enable = xen_irq_enable,
940 .safe_halt = xen_safe_halt,
941 .halt = xen_halt,
942 .wbinvd = native_wbinvd,
943
944 .read_msr = native_read_msr_safe,
945 .write_msr = native_write_msr_safe,
946 .read_tsc = native_read_tsc,
947 .read_pmc = native_read_pmc,
948
949 .iret = (void *)&hypercall_page[__HYPERVISOR_iret],
950 .irq_enable_sysexit = NULL, /* never called */
951
952 .load_tr_desc = paravirt_nop,
953 .set_ldt = xen_set_ldt,
954 .load_gdt = xen_load_gdt,
955 .load_idt = xen_load_idt,
956 .load_tls = xen_load_tls,
957
958 .store_gdt = native_store_gdt,
959 .store_idt = native_store_idt,
960 .store_tr = xen_store_tr,
961
962 .write_ldt_entry = xen_write_ldt_entry,
963 .write_gdt_entry = xen_write_gdt_entry,
964 .write_idt_entry = xen_write_idt_entry,
965 .load_esp0 = xen_load_esp0,
966
967 .set_iopl_mask = xen_set_iopl_mask,
968 .io_delay = xen_io_delay,
969
970#ifdef CONFIG_X86_LOCAL_APIC
971 .apic_write = xen_apic_write,
972 .apic_write_atomic = xen_apic_write,
973 .apic_read = xen_apic_read,
974 .setup_boot_clock = paravirt_nop,
975 .setup_secondary_clock = paravirt_nop,
976 .startup_ipi_hook = paravirt_nop,
977#endif
978
979 .flush_tlb_user = xen_flush_tlb,
980 .flush_tlb_kernel = xen_flush_tlb,
981 .flush_tlb_single = xen_flush_tlb_single,
982 .flush_tlb_others = xen_flush_tlb_others,
983
984 .pte_update = paravirt_nop,
985 .pte_update_defer = paravirt_nop,
986
987 .pagetable_setup_start = xen_pagetable_setup_start,
988 .pagetable_setup_done = xen_pagetable_setup_done,
989
990 .alloc_pt = xen_alloc_pt_init,
991 .release_pt = xen_release_pt,
992 .alloc_pd = paravirt_nop,
993 .alloc_pd_clone = paravirt_nop,
994 .release_pd = paravirt_nop,
995
996#ifdef CONFIG_HIGHPTE
997 .kmap_atomic_pte = xen_kmap_atomic_pte,
998#endif
999
1000 .set_pte = NULL, /* see xen_pagetable_setup_* */
1001 .set_pte_at = xen_set_pte_at,
1002 .set_pmd = xen_set_pmd,
1003
1004 .pte_val = xen_pte_val,
1005 .pgd_val = xen_pgd_val,
1006
1007 .make_pte = xen_make_pte,
1008 .make_pgd = xen_make_pgd,
1009
1010#ifdef CONFIG_X86_PAE
1011 .set_pte_atomic = xen_set_pte_atomic,
1012 .set_pte_present = xen_set_pte_at,
1013 .set_pud = xen_set_pud,
1014 .pte_clear = xen_pte_clear,
1015 .pmd_clear = xen_pmd_clear,
1016
1017 .make_pmd = xen_make_pmd,
1018 .pmd_val = xen_pmd_val,
1019#endif /* PAE */
1020
1021 .activate_mm = xen_activate_mm,
1022 .dup_mmap = xen_dup_mmap,
1023 .exit_mmap = xen_exit_mmap,
1024
1025 .set_lazy_mode = xen_set_lazy_mode,
1026};
1027
1028#ifdef CONFIG_SMP
1029static const struct smp_ops xen_smp_ops __initdata = {
1030 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
1031 .smp_prepare_cpus = xen_smp_prepare_cpus,
1032 .cpu_up = xen_cpu_up,
1033 .smp_cpus_done = xen_smp_cpus_done,
1034
1035 .smp_send_stop = xen_smp_send_stop,
1036 .smp_send_reschedule = xen_smp_send_reschedule,
1037 .smp_call_function_mask = xen_smp_call_function_mask,
1038};
1039#endif /* CONFIG_SMP */
1040
1041static void xen_reboot(int reason)
1042{
1043#ifdef CONFIG_SMP
1044 smp_send_stop();
1045#endif
1046
1047 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
1048 BUG();
1049}
1050
1051static void xen_restart(char *msg)
1052{
1053 xen_reboot(SHUTDOWN_reboot);
1054}
1055
1056static void xen_emergency_restart(void)
1057{
1058 xen_reboot(SHUTDOWN_reboot);
1059}
1060
1061static void xen_machine_halt(void)
1062{
1063 xen_reboot(SHUTDOWN_poweroff);
1064}
1065
1066static void xen_crash_shutdown(struct pt_regs *regs)
1067{
1068 xen_reboot(SHUTDOWN_crash);
1069}
1070
1071static const struct machine_ops __initdata xen_machine_ops = {
1072 .restart = xen_restart,
1073 .halt = xen_machine_halt,
1074 .power_off = xen_machine_halt,
1075 .shutdown = xen_machine_halt,
1076 .crash_shutdown = xen_crash_shutdown,
1077 .emergency_restart = xen_emergency_restart,
1078};
1079
1080
1081/* First C function to be called on Xen boot */
1082asmlinkage void __init xen_start_kernel(void)
1083{
1084 pgd_t *pgd;
1085
1086 if (!xen_start_info)
1087 return;
1088
1089 BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
1090
1091 /* Install Xen paravirt ops */
1092 paravirt_ops = xen_paravirt_ops;
1093 machine_ops = xen_machine_ops;
1094
1095#ifdef CONFIG_SMP
1096 smp_ops = xen_smp_ops;
1097#endif
1098
1099 xen_setup_features();
1100
1101 /* Get mfn list */
1102 if (!xen_feature(XENFEAT_auto_translated_physmap))
1103 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
1104
1105 pgd = (pgd_t *)xen_start_info->pt_base;
1106
1107 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1108
1109 init_mm.pgd = pgd; /* use the Xen pagetables to start */
1110
1111 /* keep using Xen gdt for now; no urgent need to change it */
1112
1113 x86_write_percpu(xen_cr3, __pa(pgd));
1114
1115#ifdef CONFIG_SMP
1116 /* Don't do the full vcpu_info placement stuff until we have a
1117 possible map. */
1118 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1119#else
1120 /* May as well do it now, since there's no good time to call
1121 it later on UP. */
1122 xen_setup_vcpu_info_placement();
1123#endif
1124
1125 paravirt_ops.kernel_rpl = 1;
1126 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1127 paravirt_ops.kernel_rpl = 0;
1128
1129 /* set the limit of our address space */
1130 reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
1131
1132 /* set up basic CPUID stuff */
1133 cpu_detect(&new_cpu_data);
1134 new_cpu_data.hard_math = 1;
1135 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1136
1137 /* Poke various useful things into boot_params */
1138 LOADER_TYPE = (9 << 4) | 0;
1139 INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0;
1140 INITRD_SIZE = xen_start_info->mod_len;
1141
1142 /* Start the world */
1143 start_kernel();
1144}
diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c
new file mode 100644
index 000000000000..da1b173547a1
--- /dev/null
+++ b/arch/i386/xen/events.c
@@ -0,0 +1,591 @@
1/*
2 * Xen event channels
3 *
4 * Xen models interrupts with abstract event channels. Because each
5 * domain gets 1024 event channels, but NR_IRQ is not that large, we
6 * must dynamically map irqs<->event channels. The event channels
7 * interface with the rest of the kernel by defining a xen interrupt
8 * chip. When an event is recieved, it is mapped to an irq and sent
9 * through the normal interrupt processing path.
10 *
11 * There are four kinds of events which can be mapped to an event
12 * channel:
13 *
14 * 1. Inter-domain notifications. This includes all the virtual
15 * device events, since they're driven by front-ends in another domain
16 * (typically dom0).
17 * 2. VIRQs, typically used for timers. These are per-cpu events.
18 * 3. IPIs.
19 * 4. Hardware interrupts. Not supported at present.
20 *
21 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
22 */
23
24#include <linux/linkage.h>
25#include <linux/interrupt.h>
26#include <linux/irq.h>
27#include <linux/module.h>
28#include <linux/string.h>
29
30#include <asm/ptrace.h>
31#include <asm/irq.h>
32#include <asm/sync_bitops.h>
33#include <asm/xen/hypercall.h>
34#include <asm/xen/hypervisor.h>
35
36#include <xen/events.h>
37#include <xen/interface/xen.h>
38#include <xen/interface/event_channel.h>
39
40#include "xen-ops.h"
41
42/*
43 * This lock protects updates to the following mapping and reference-count
44 * arrays. The lock does not need to be acquired to read the mapping tables.
45 */
46static DEFINE_SPINLOCK(irq_mapping_update_lock);
47
48/* IRQ <-> VIRQ mapping. */
49static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
50
51/* IRQ <-> IPI mapping */
52static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
53
54/* Packed IRQ information: binding type, sub-type index, and event channel. */
55struct packed_irq
56{
57 unsigned short evtchn;
58 unsigned char index;
59 unsigned char type;
60};
61
62static struct packed_irq irq_info[NR_IRQS];
63
64/* Binding types. */
65enum {
66 IRQT_UNBOUND,
67 IRQT_PIRQ,
68 IRQT_VIRQ,
69 IRQT_IPI,
70 IRQT_EVTCHN
71};
72
73/* Convenient shorthand for packed representation of an unbound IRQ. */
74#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
75
76static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
77 [0 ... NR_EVENT_CHANNELS-1] = -1
78};
79static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
80static u8 cpu_evtchn[NR_EVENT_CHANNELS];
81
82/* Reference counts for bindings to IRQs. */
83static int irq_bindcount[NR_IRQS];
84
85/* Xen will never allocate port zero for any purpose. */
86#define VALID_EVTCHN(chn) ((chn) != 0)
87
88/*
89 * Force a proper event-channel callback from Xen after clearing the
90 * callback mask. We do this in a very simple manner, by making a call
91 * down into Xen. The pending flag will be checked by Xen on return.
92 */
93void force_evtchn_callback(void)
94{
95 (void)HYPERVISOR_xen_version(0, NULL);
96}
97EXPORT_SYMBOL_GPL(force_evtchn_callback);
98
99static struct irq_chip xen_dynamic_chip;
100
101/* Constructor for packed IRQ information. */
102static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
103{
104 return (struct packed_irq) { evtchn, index, type };
105}
106
107/*
108 * Accessors for packed IRQ information.
109 */
110static inline unsigned int evtchn_from_irq(int irq)
111{
112 return irq_info[irq].evtchn;
113}
114
115static inline unsigned int index_from_irq(int irq)
116{
117 return irq_info[irq].index;
118}
119
120static inline unsigned int type_from_irq(int irq)
121{
122 return irq_info[irq].type;
123}
124
125static inline unsigned long active_evtchns(unsigned int cpu,
126 struct shared_info *sh,
127 unsigned int idx)
128{
129 return (sh->evtchn_pending[idx] &
130 cpu_evtchn_mask[cpu][idx] &
131 ~sh->evtchn_mask[idx]);
132}
133
134static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
135{
136 int irq = evtchn_to_irq[chn];
137
138 BUG_ON(irq == -1);
139#ifdef CONFIG_SMP
140 irq_desc[irq].affinity = cpumask_of_cpu(cpu);
141#endif
142
143 __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
144 __set_bit(chn, cpu_evtchn_mask[cpu]);
145
146 cpu_evtchn[chn] = cpu;
147}
148
149static void init_evtchn_cpu_bindings(void)
150{
151#ifdef CONFIG_SMP
152 int i;
153 /* By default all event channels notify CPU#0. */
154 for (i = 0; i < NR_IRQS; i++)
155 irq_desc[i].affinity = cpumask_of_cpu(0);
156#endif
157
158 memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
159 memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
160}
161
162static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
163{
164 return cpu_evtchn[evtchn];
165}
166
167static inline void clear_evtchn(int port)
168{
169 struct shared_info *s = HYPERVISOR_shared_info;
170 sync_clear_bit(port, &s->evtchn_pending[0]);
171}
172
173static inline void set_evtchn(int port)
174{
175 struct shared_info *s = HYPERVISOR_shared_info;
176 sync_set_bit(port, &s->evtchn_pending[0]);
177}
178
179
180/**
181 * notify_remote_via_irq - send event to remote end of event channel via irq
182 * @irq: irq of event channel to send event to
183 *
184 * Unlike notify_remote_via_evtchn(), this is safe to use across
185 * save/restore. Notifications on a broken connection are silently
186 * dropped.
187 */
188void notify_remote_via_irq(int irq)
189{
190 int evtchn = evtchn_from_irq(irq);
191
192 if (VALID_EVTCHN(evtchn))
193 notify_remote_via_evtchn(evtchn);
194}
195EXPORT_SYMBOL_GPL(notify_remote_via_irq);
196
197static void mask_evtchn(int port)
198{
199 struct shared_info *s = HYPERVISOR_shared_info;
200 sync_set_bit(port, &s->evtchn_mask[0]);
201}
202
203static void unmask_evtchn(int port)
204{
205 struct shared_info *s = HYPERVISOR_shared_info;
206 unsigned int cpu = get_cpu();
207
208 BUG_ON(!irqs_disabled());
209
210 /* Slow path (hypercall) if this is a non-local port. */
211 if (unlikely(cpu != cpu_from_evtchn(port))) {
212 struct evtchn_unmask unmask = { .port = port };
213 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
214 } else {
215 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
216
217 sync_clear_bit(port, &s->evtchn_mask[0]);
218
219 /*
220 * The following is basically the equivalent of
221 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
222 * the interrupt edge' if the channel is masked.
223 */
224 if (sync_test_bit(port, &s->evtchn_pending[0]) &&
225 !sync_test_and_set_bit(port / BITS_PER_LONG,
226 &vcpu_info->evtchn_pending_sel))
227 vcpu_info->evtchn_upcall_pending = 1;
228 }
229
230 put_cpu();
231}
232
233static int find_unbound_irq(void)
234{
235 int irq;
236
237 /* Only allocate from dynirq range */
238 for (irq = 0; irq < NR_IRQS; irq++)
239 if (irq_bindcount[irq] == 0)
240 break;
241
242 if (irq == NR_IRQS)
243 panic("No available IRQ to bind to: increase NR_IRQS!\n");
244
245 return irq;
246}
247
248int bind_evtchn_to_irq(unsigned int evtchn)
249{
250 int irq;
251
252 spin_lock(&irq_mapping_update_lock);
253
254 irq = evtchn_to_irq[evtchn];
255
256 if (irq == -1) {
257 irq = find_unbound_irq();
258
259 dynamic_irq_init(irq);
260 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
261 handle_level_irq, "event");
262
263 evtchn_to_irq[evtchn] = irq;
264 irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
265 }
266
267 irq_bindcount[irq]++;
268
269 spin_unlock(&irq_mapping_update_lock);
270
271 return irq;
272}
273EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
274
275static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
276{
277 struct evtchn_bind_ipi bind_ipi;
278 int evtchn, irq;
279
280 spin_lock(&irq_mapping_update_lock);
281
282 irq = per_cpu(ipi_to_irq, cpu)[ipi];
283 if (irq == -1) {
284 irq = find_unbound_irq();
285 if (irq < 0)
286 goto out;
287
288 dynamic_irq_init(irq);
289 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
290 handle_level_irq, "ipi");
291
292 bind_ipi.vcpu = cpu;
293 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
294 &bind_ipi) != 0)
295 BUG();
296 evtchn = bind_ipi.port;
297
298 evtchn_to_irq[evtchn] = irq;
299 irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
300
301 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
302
303 bind_evtchn_to_cpu(evtchn, cpu);
304 }
305
306 irq_bindcount[irq]++;
307
308 out:
309 spin_unlock(&irq_mapping_update_lock);
310 return irq;
311}
312
313
314static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
315{
316 struct evtchn_bind_virq bind_virq;
317 int evtchn, irq;
318
319 spin_lock(&irq_mapping_update_lock);
320
321 irq = per_cpu(virq_to_irq, cpu)[virq];
322
323 if (irq == -1) {
324 bind_virq.virq = virq;
325 bind_virq.vcpu = cpu;
326 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
327 &bind_virq) != 0)
328 BUG();
329 evtchn = bind_virq.port;
330
331 irq = find_unbound_irq();
332
333 dynamic_irq_init(irq);
334 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
335 handle_level_irq, "virq");
336
337 evtchn_to_irq[evtchn] = irq;
338 irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
339
340 per_cpu(virq_to_irq, cpu)[virq] = irq;
341
342 bind_evtchn_to_cpu(evtchn, cpu);
343 }
344
345 irq_bindcount[irq]++;
346
347 spin_unlock(&irq_mapping_update_lock);
348
349 return irq;
350}
351
352static void unbind_from_irq(unsigned int irq)
353{
354 struct evtchn_close close;
355 int evtchn = evtchn_from_irq(irq);
356
357 spin_lock(&irq_mapping_update_lock);
358
359 if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
360 close.port = evtchn;
361 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
362 BUG();
363
364 switch (type_from_irq(irq)) {
365 case IRQT_VIRQ:
366 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
367 [index_from_irq(irq)] = -1;
368 break;
369 default:
370 break;
371 }
372
373 /* Closed ports are implicitly re-bound to VCPU0. */
374 bind_evtchn_to_cpu(evtchn, 0);
375
376 evtchn_to_irq[evtchn] = -1;
377 irq_info[irq] = IRQ_UNBOUND;
378
379 dynamic_irq_init(irq);
380 }
381
382 spin_unlock(&irq_mapping_update_lock);
383}
384
385int bind_evtchn_to_irqhandler(unsigned int evtchn,
386 irqreturn_t (*handler)(int, void *),
387 unsigned long irqflags,
388 const char *devname, void *dev_id)
389{
390 unsigned int irq;
391 int retval;
392
393 irq = bind_evtchn_to_irq(evtchn);
394 retval = request_irq(irq, handler, irqflags, devname, dev_id);
395 if (retval != 0) {
396 unbind_from_irq(irq);
397 return retval;
398 }
399
400 return irq;
401}
402EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
403
404int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
405 irqreturn_t (*handler)(int, void *),
406 unsigned long irqflags, const char *devname, void *dev_id)
407{
408 unsigned int irq;
409 int retval;
410
411 irq = bind_virq_to_irq(virq, cpu);
412 retval = request_irq(irq, handler, irqflags, devname, dev_id);
413 if (retval != 0) {
414 unbind_from_irq(irq);
415 return retval;
416 }
417
418 return irq;
419}
420EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
421
422int bind_ipi_to_irqhandler(enum ipi_vector ipi,
423 unsigned int cpu,
424 irq_handler_t handler,
425 unsigned long irqflags,
426 const char *devname,
427 void *dev_id)
428{
429 int irq, retval;
430
431 irq = bind_ipi_to_irq(ipi, cpu);
432 if (irq < 0)
433 return irq;
434
435 retval = request_irq(irq, handler, irqflags, devname, dev_id);
436 if (retval != 0) {
437 unbind_from_irq(irq);
438 return retval;
439 }
440
441 return irq;
442}
443
444void unbind_from_irqhandler(unsigned int irq, void *dev_id)
445{
446 free_irq(irq, dev_id);
447 unbind_from_irq(irq);
448}
449EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
450
451void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
452{
453 int irq = per_cpu(ipi_to_irq, cpu)[vector];
454 BUG_ON(irq < 0);
455 notify_remote_via_irq(irq);
456}
457
458
459/*
460 * Search the CPUs pending events bitmasks. For each one found, map
461 * the event number to an irq, and feed it into do_IRQ() for
462 * handling.
463 *
464 * Xen uses a two-level bitmap to speed searching. The first level is
465 * a bitset of words which contain pending event bits. The second
466 * level is a bitset of pending events themselves.
467 */
468fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
469{
470 int cpu = get_cpu();
471 struct shared_info *s = HYPERVISOR_shared_info;
472 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
473 unsigned long pending_words;
474
475 vcpu_info->evtchn_upcall_pending = 0;
476
477 /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
478 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
479 while (pending_words != 0) {
480 unsigned long pending_bits;
481 int word_idx = __ffs(pending_words);
482 pending_words &= ~(1UL << word_idx);
483
484 while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
485 int bit_idx = __ffs(pending_bits);
486 int port = (word_idx * BITS_PER_LONG) + bit_idx;
487 int irq = evtchn_to_irq[port];
488
489 if (irq != -1) {
490 regs->orig_eax = ~irq;
491 do_IRQ(regs);
492 }
493 }
494 }
495
496 put_cpu();
497}
498
499/* Rebind an evtchn so that it gets delivered to a specific cpu */
500static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
501{
502 struct evtchn_bind_vcpu bind_vcpu;
503 int evtchn = evtchn_from_irq(irq);
504
505 if (!VALID_EVTCHN(evtchn))
506 return;
507
508 /* Send future instances of this interrupt to other vcpu. */
509 bind_vcpu.port = evtchn;
510 bind_vcpu.vcpu = tcpu;
511
512 /*
513 * If this fails, it usually just indicates that we're dealing with a
514 * virq or IPI channel, which don't actually need to be rebound. Ignore
515 * it, but don't do the xenlinux-level rebind in that case.
516 */
517 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
518 bind_evtchn_to_cpu(evtchn, tcpu);
519}
520
521
522static void set_affinity_irq(unsigned irq, cpumask_t dest)
523{
524 unsigned tcpu = first_cpu(dest);
525 rebind_irq_to_cpu(irq, tcpu);
526}
527
528static void enable_dynirq(unsigned int irq)
529{
530 int evtchn = evtchn_from_irq(irq);
531
532 if (VALID_EVTCHN(evtchn))
533 unmask_evtchn(evtchn);
534}
535
536static void disable_dynirq(unsigned int irq)
537{
538 int evtchn = evtchn_from_irq(irq);
539
540 if (VALID_EVTCHN(evtchn))
541 mask_evtchn(evtchn);
542}
543
544static void ack_dynirq(unsigned int irq)
545{
546 int evtchn = evtchn_from_irq(irq);
547
548 move_native_irq(irq);
549
550 if (VALID_EVTCHN(evtchn))
551 clear_evtchn(evtchn);
552}
553
554static int retrigger_dynirq(unsigned int irq)
555{
556 int evtchn = evtchn_from_irq(irq);
557 int ret = 0;
558
559 if (VALID_EVTCHN(evtchn)) {
560 set_evtchn(evtchn);
561 ret = 1;
562 }
563
564 return ret;
565}
566
567static struct irq_chip xen_dynamic_chip __read_mostly = {
568 .name = "xen-dyn",
569 .mask = disable_dynirq,
570 .unmask = enable_dynirq,
571 .ack = ack_dynirq,
572 .set_affinity = set_affinity_irq,
573 .retrigger = retrigger_dynirq,
574};
575
576void __init xen_init_IRQ(void)
577{
578 int i;
579
580 init_evtchn_cpu_bindings();
581
582 /* No event channels are 'live' right now. */
583 for (i = 0; i < NR_EVENT_CHANNELS; i++)
584 mask_evtchn(i);
585
586 /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
587 for (i = 0; i < NR_IRQS; i++)
588 irq_bindcount[i] = 0;
589
590 irq_ctx_init(smp_processor_id());
591}
diff --git a/arch/i386/xen/features.c b/arch/i386/xen/features.c
new file mode 100644
index 000000000000..0707714e40d6
--- /dev/null
+++ b/arch/i386/xen/features.c
@@ -0,0 +1,29 @@
1/******************************************************************************
2 * features.c
3 *
4 * Xen feature flags.
5 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Inc.
7 */
8#include <linux/types.h>
9#include <linux/cache.h>
10#include <linux/module.h>
11#include <asm/xen/hypervisor.h>
12#include <xen/features.h>
13
14u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
15EXPORT_SYMBOL_GPL(xen_features);
16
17void xen_setup_features(void)
18{
19 struct xen_feature_info fi;
20 int i, j;
21
22 for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
23 fi.submap_idx = i;
24 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
25 break;
26 for (j = 0; j < 32; j++)
27 xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
28 }
29}
diff --git a/arch/i386/xen/manage.c b/arch/i386/xen/manage.c
new file mode 100644
index 000000000000..aa7af9e6abc0
--- /dev/null
+++ b/arch/i386/xen/manage.c
@@ -0,0 +1,143 @@
1/*
2 * Handle extern requests for shutdown, reboot and sysrq
3 */
4#include <linux/kernel.h>
5#include <linux/err.h>
6#include <linux/reboot.h>
7#include <linux/sysrq.h>
8
9#include <xen/xenbus.h>
10
11#define SHUTDOWN_INVALID -1
12#define SHUTDOWN_POWEROFF 0
13#define SHUTDOWN_SUSPEND 2
14/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
15 * report a crash, not be instructed to crash!
16 * HALT is the same as POWEROFF, as far as we're concerned. The tools use
17 * the distinction when we return the reason code to them.
18 */
19#define SHUTDOWN_HALT 4
20
21/* Ignore multiple shutdown requests. */
22static int shutting_down = SHUTDOWN_INVALID;
23
24static void shutdown_handler(struct xenbus_watch *watch,
25 const char **vec, unsigned int len)
26{
27 char *str;
28 struct xenbus_transaction xbt;
29 int err;
30
31 if (shutting_down != SHUTDOWN_INVALID)
32 return;
33
34 again:
35 err = xenbus_transaction_start(&xbt);
36 if (err)
37 return;
38
39 str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
40 /* Ignore read errors and empty reads. */
41 if (XENBUS_IS_ERR_READ(str)) {
42 xenbus_transaction_end(xbt, 1);
43 return;
44 }
45
46 xenbus_write(xbt, "control", "shutdown", "");
47
48 err = xenbus_transaction_end(xbt, 0);
49 if (err == -EAGAIN) {
50 kfree(str);
51 goto again;
52 }
53
54 if (strcmp(str, "poweroff") == 0 ||
55 strcmp(str, "halt") == 0)
56 orderly_poweroff(false);
57 else if (strcmp(str, "reboot") == 0)
58 ctrl_alt_del();
59 else {
60 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
61 shutting_down = SHUTDOWN_INVALID;
62 }
63
64 kfree(str);
65}
66
67static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
68 unsigned int len)
69{
70 char sysrq_key = '\0';
71 struct xenbus_transaction xbt;
72 int err;
73
74 again:
75 err = xenbus_transaction_start(&xbt);
76 if (err)
77 return;
78 if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
79 printk(KERN_ERR "Unable to read sysrq code in "
80 "control/sysrq\n");
81 xenbus_transaction_end(xbt, 1);
82 return;
83 }
84
85 if (sysrq_key != '\0')
86 xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
87
88 err = xenbus_transaction_end(xbt, 0);
89 if (err == -EAGAIN)
90 goto again;
91
92 if (sysrq_key != '\0')
93 handle_sysrq(sysrq_key, NULL);
94}
95
96static struct xenbus_watch shutdown_watch = {
97 .node = "control/shutdown",
98 .callback = shutdown_handler
99};
100
101static struct xenbus_watch sysrq_watch = {
102 .node = "control/sysrq",
103 .callback = sysrq_handler
104};
105
106static int setup_shutdown_watcher(void)
107{
108 int err;
109
110 err = register_xenbus_watch(&shutdown_watch);
111 if (err) {
112 printk(KERN_ERR "Failed to set shutdown watcher\n");
113 return err;
114 }
115
116 err = register_xenbus_watch(&sysrq_watch);
117 if (err) {
118 printk(KERN_ERR "Failed to set sysrq watcher\n");
119 return err;
120 }
121
122 return 0;
123}
124
125static int shutdown_event(struct notifier_block *notifier,
126 unsigned long event,
127 void *data)
128{
129 setup_shutdown_watcher();
130 return NOTIFY_DONE;
131}
132
133static int __init setup_shutdown_event(void)
134{
135 static struct notifier_block xenstore_notifier = {
136 .notifier_call = shutdown_event
137 };
138 register_xenstore_notifier(&xenstore_notifier);
139
140 return 0;
141}
142
143subsys_initcall(setup_shutdown_event);
diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c
new file mode 100644
index 000000000000..4ae038aa6c24
--- /dev/null
+++ b/arch/i386/xen/mmu.c
@@ -0,0 +1,564 @@
1/*
2 * Xen mmu operations
3 *
4 * This file contains the various mmu fetch and update operations.
5 * The most important job they must perform is the mapping between the
6 * domain's pfn and the overall machine mfns.
7 *
8 * Xen allows guests to directly update the pagetable, in a controlled
9 * fashion. In other words, the guest modifies the same pagetable
10 * that the CPU actually uses, which eliminates the overhead of having
11 * a separate shadow pagetable.
12 *
13 * In order to allow this, it falls on the guest domain to map its
14 * notion of a "physical" pfn - which is just a domain-local linear
15 * address - into a real "machine address" which the CPU's MMU can
16 * use.
17 *
18 * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
19 * inserted directly into the pagetable. When creating a new
20 * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
21 * when reading the content back with __(pgd|pmd|pte)_val, it converts
22 * the mfn back into a pfn.
23 *
24 * The other constraint is that all pages which make up a pagetable
25 * must be mapped read-only in the guest. This prevents uncontrolled
26 * guest updates to the pagetable. Xen strictly enforces this, and
27 * will disallow any pagetable update which will end up mapping a
28 * pagetable page RW, and will disallow using any writable page as a
29 * pagetable.
30 *
31 * Naively, when loading %cr3 with the base of a new pagetable, Xen
32 * would need to validate the whole pagetable before going on.
33 * Naturally, this is quite slow. The solution is to "pin" a
34 * pagetable, which enforces all the constraints on the pagetable even
35 * when it is not actively in use. This menas that Xen can be assured
36 * that it is still valid when you do load it into %cr3, and doesn't
37 * need to revalidate it.
38 *
39 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
40 */
41#include <linux/sched.h>
42#include <linux/highmem.h>
43#include <linux/bug.h>
44#include <linux/sched.h>
45
46#include <asm/pgtable.h>
47#include <asm/tlbflush.h>
48#include <asm/mmu_context.h>
49#include <asm/paravirt.h>
50
51#include <asm/xen/hypercall.h>
52#include <asm/xen/hypervisor.h>
53
54#include <xen/page.h>
55#include <xen/interface/xen.h>
56
57#include "multicalls.h"
58#include "mmu.h"
59
60xmaddr_t arbitrary_virt_to_machine(unsigned long address)
61{
62 pte_t *pte = lookup_address(address);
63 unsigned offset = address & PAGE_MASK;
64
65 BUG_ON(pte == NULL);
66
67 return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
68}
69
70void make_lowmem_page_readonly(void *vaddr)
71{
72 pte_t *pte, ptev;
73 unsigned long address = (unsigned long)vaddr;
74
75 pte = lookup_address(address);
76 BUG_ON(pte == NULL);
77
78 ptev = pte_wrprotect(*pte);
79
80 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
81 BUG();
82}
83
84void make_lowmem_page_readwrite(void *vaddr)
85{
86 pte_t *pte, ptev;
87 unsigned long address = (unsigned long)vaddr;
88
89 pte = lookup_address(address);
90 BUG_ON(pte == NULL);
91
92 ptev = pte_mkwrite(*pte);
93
94 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
95 BUG();
96}
97
98
99void xen_set_pmd(pmd_t *ptr, pmd_t val)
100{
101 struct multicall_space mcs;
102 struct mmu_update *u;
103
104 preempt_disable();
105
106 mcs = xen_mc_entry(sizeof(*u));
107 u = mcs.args;
108 u->ptr = virt_to_machine(ptr).maddr;
109 u->val = pmd_val_ma(val);
110 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
111
112 xen_mc_issue(PARAVIRT_LAZY_MMU);
113
114 preempt_enable();
115}
116
117/*
118 * Associate a virtual page frame with a given physical page frame
119 * and protection flags for that frame.
120 */
121void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
122{
123 pgd_t *pgd;
124 pud_t *pud;
125 pmd_t *pmd;
126 pte_t *pte;
127
128 pgd = swapper_pg_dir + pgd_index(vaddr);
129 if (pgd_none(*pgd)) {
130 BUG();
131 return;
132 }
133 pud = pud_offset(pgd, vaddr);
134 if (pud_none(*pud)) {
135 BUG();
136 return;
137 }
138 pmd = pmd_offset(pud, vaddr);
139 if (pmd_none(*pmd)) {
140 BUG();
141 return;
142 }
143 pte = pte_offset_kernel(pmd, vaddr);
144 /* <mfn,flags> stored as-is, to permit clearing entries */
145 xen_set_pte(pte, mfn_pte(mfn, flags));
146
147 /*
148 * It's enough to flush this one mapping.
149 * (PGE mappings get flushed as well)
150 */
151 __flush_tlb_one(vaddr);
152}
153
154void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
155 pte_t *ptep, pte_t pteval)
156{
157 if (mm == current->mm || mm == &init_mm) {
158 if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
159 struct multicall_space mcs;
160 mcs = xen_mc_entry(0);
161
162 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
163 xen_mc_issue(PARAVIRT_LAZY_MMU);
164 return;
165 } else
166 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
167 return;
168 }
169 xen_set_pte(ptep, pteval);
170}
171
172#ifdef CONFIG_X86_PAE
173void xen_set_pud(pud_t *ptr, pud_t val)
174{
175 struct multicall_space mcs;
176 struct mmu_update *u;
177
178 preempt_disable();
179
180 mcs = xen_mc_entry(sizeof(*u));
181 u = mcs.args;
182 u->ptr = virt_to_machine(ptr).maddr;
183 u->val = pud_val_ma(val);
184 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
185
186 xen_mc_issue(PARAVIRT_LAZY_MMU);
187
188 preempt_enable();
189}
190
191void xen_set_pte(pte_t *ptep, pte_t pte)
192{
193 ptep->pte_high = pte.pte_high;
194 smp_wmb();
195 ptep->pte_low = pte.pte_low;
196}
197
198void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
199{
200 set_64bit((u64 *)ptep, pte_val_ma(pte));
201}
202
203void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
204{
205 ptep->pte_low = 0;
206 smp_wmb(); /* make sure low gets written first */
207 ptep->pte_high = 0;
208}
209
210void xen_pmd_clear(pmd_t *pmdp)
211{
212 xen_set_pmd(pmdp, __pmd(0));
213}
214
215unsigned long long xen_pte_val(pte_t pte)
216{
217 unsigned long long ret = 0;
218
219 if (pte.pte_low) {
220 ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
221 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
222 }
223
224 return ret;
225}
226
227unsigned long long xen_pmd_val(pmd_t pmd)
228{
229 unsigned long long ret = pmd.pmd;
230 if (ret)
231 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
232 return ret;
233}
234
235unsigned long long xen_pgd_val(pgd_t pgd)
236{
237 unsigned long long ret = pgd.pgd;
238 if (ret)
239 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
240 return ret;
241}
242
243pte_t xen_make_pte(unsigned long long pte)
244{
245 if (pte & 1)
246 pte = phys_to_machine(XPADDR(pte)).maddr;
247
248 return (pte_t){ pte, pte >> 32 };
249}
250
251pmd_t xen_make_pmd(unsigned long long pmd)
252{
253 if (pmd & 1)
254 pmd = phys_to_machine(XPADDR(pmd)).maddr;
255
256 return (pmd_t){ pmd };
257}
258
259pgd_t xen_make_pgd(unsigned long long pgd)
260{
261 if (pgd & _PAGE_PRESENT)
262 pgd = phys_to_machine(XPADDR(pgd)).maddr;
263
264 return (pgd_t){ pgd };
265}
266#else /* !PAE */
267void xen_set_pte(pte_t *ptep, pte_t pte)
268{
269 *ptep = pte;
270}
271
272unsigned long xen_pte_val(pte_t pte)
273{
274 unsigned long ret = pte.pte_low;
275
276 if (ret & _PAGE_PRESENT)
277 ret = machine_to_phys(XMADDR(ret)).paddr;
278
279 return ret;
280}
281
282unsigned long xen_pgd_val(pgd_t pgd)
283{
284 unsigned long ret = pgd.pgd;
285 if (ret)
286 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
287 return ret;
288}
289
290pte_t xen_make_pte(unsigned long pte)
291{
292 if (pte & _PAGE_PRESENT)
293 pte = phys_to_machine(XPADDR(pte)).maddr;
294
295 return (pte_t){ pte };
296}
297
298pgd_t xen_make_pgd(unsigned long pgd)
299{
300 if (pgd & _PAGE_PRESENT)
301 pgd = phys_to_machine(XPADDR(pgd)).maddr;
302
303 return (pgd_t){ pgd };
304}
305#endif /* CONFIG_X86_PAE */
306
307
308
309/*
310 (Yet another) pagetable walker. This one is intended for pinning a
311 pagetable. This means that it walks a pagetable and calls the
312 callback function on each page it finds making up the page table,
313 at every level. It walks the entire pagetable, but it only bothers
314 pinning pte pages which are below pte_limit. In the normal case
315 this will be TASK_SIZE, but at boot we need to pin up to
316 FIXADDR_TOP. But the important bit is that we don't pin beyond
317 there, because then we start getting into Xen's ptes.
318*/
319static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
320 unsigned long limit)
321{
322 pgd_t *pgd = pgd_base;
323 int flush = 0;
324 unsigned long addr = 0;
325 unsigned long pgd_next;
326
327 BUG_ON(limit > FIXADDR_TOP);
328
329 if (xen_feature(XENFEAT_auto_translated_physmap))
330 return 0;
331
332 for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
333 pud_t *pud;
334 unsigned long pud_limit, pud_next;
335
336 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
337
338 if (!pgd_val(*pgd))
339 continue;
340
341 pud = pud_offset(pgd, 0);
342
343 if (PTRS_PER_PUD > 1) /* not folded */
344 flush |= (*func)(virt_to_page(pud), 0);
345
346 for (; addr != pud_limit; pud++, addr = pud_next) {
347 pmd_t *pmd;
348 unsigned long pmd_limit;
349
350 pud_next = pud_addr_end(addr, pud_limit);
351
352 if (pud_next < limit)
353 pmd_limit = pud_next;
354 else
355 pmd_limit = limit;
356
357 if (pud_none(*pud))
358 continue;
359
360 pmd = pmd_offset(pud, 0);
361
362 if (PTRS_PER_PMD > 1) /* not folded */
363 flush |= (*func)(virt_to_page(pmd), 0);
364
365 for (; addr != pmd_limit; pmd++) {
366 addr += (PAGE_SIZE * PTRS_PER_PTE);
367 if ((pmd_limit-1) < (addr-1)) {
368 addr = pmd_limit;
369 break;
370 }
371
372 if (pmd_none(*pmd))
373 continue;
374
375 flush |= (*func)(pmd_page(*pmd), 0);
376 }
377 }
378 }
379
380 flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH);
381
382 return flush;
383}
384
385static int pin_page(struct page *page, unsigned flags)
386{
387 unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
388 int flush;
389
390 if (pgfl)
391 flush = 0; /* already pinned */
392 else if (PageHighMem(page))
393 /* kmaps need flushing if we found an unpinned
394 highpage */
395 flush = 1;
396 else {
397 void *pt = lowmem_page_address(page);
398 unsigned long pfn = page_to_pfn(page);
399 struct multicall_space mcs = __xen_mc_entry(0);
400
401 flush = 0;
402
403 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
404 pfn_pte(pfn, PAGE_KERNEL_RO),
405 flags);
406 }
407
408 return flush;
409}
410
411/* This is called just after a mm has been created, but it has not
412 been used yet. We need to make sure that its pagetable is all
413 read-only, and can be pinned. */
414void xen_pgd_pin(pgd_t *pgd)
415{
416 struct multicall_space mcs;
417 struct mmuext_op *op;
418
419 xen_mc_batch();
420
421 if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
422 /* re-enable interrupts for kmap_flush_unused */
423 xen_mc_issue(0);
424 kmap_flush_unused();
425 xen_mc_batch();
426 }
427
428 mcs = __xen_mc_entry(sizeof(*op));
429 op = mcs.args;
430
431#ifdef CONFIG_X86_PAE
432 op->cmd = MMUEXT_PIN_L3_TABLE;
433#else
434 op->cmd = MMUEXT_PIN_L2_TABLE;
435#endif
436 op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
437 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
438
439 xen_mc_issue(0);
440}
441
442/* The init_mm pagetable is really pinned as soon as its created, but
443 that's before we have page structures to store the bits. So do all
444 the book-keeping now. */
445static __init int mark_pinned(struct page *page, unsigned flags)
446{
447 SetPagePinned(page);
448 return 0;
449}
450
451void __init xen_mark_init_mm_pinned(void)
452{
453 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
454}
455
456static int unpin_page(struct page *page, unsigned flags)
457{
458 unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
459
460 if (pgfl && !PageHighMem(page)) {
461 void *pt = lowmem_page_address(page);
462 unsigned long pfn = page_to_pfn(page);
463 struct multicall_space mcs = __xen_mc_entry(0);
464
465 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
466 pfn_pte(pfn, PAGE_KERNEL),
467 flags);
468 }
469
470 return 0; /* never need to flush on unpin */
471}
472
473/* Release a pagetables pages back as normal RW */
474static void xen_pgd_unpin(pgd_t *pgd)
475{
476 struct mmuext_op *op;
477 struct multicall_space mcs;
478
479 xen_mc_batch();
480
481 mcs = __xen_mc_entry(sizeof(*op));
482
483 op = mcs.args;
484 op->cmd = MMUEXT_UNPIN_TABLE;
485 op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
486
487 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
488
489 pgd_walk(pgd, unpin_page, TASK_SIZE);
490
491 xen_mc_issue(0);
492}
493
494void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
495{
496 spin_lock(&next->page_table_lock);
497 xen_pgd_pin(next->pgd);
498 spin_unlock(&next->page_table_lock);
499}
500
501void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
502{
503 spin_lock(&mm->page_table_lock);
504 xen_pgd_pin(mm->pgd);
505 spin_unlock(&mm->page_table_lock);
506}
507
508
509#ifdef CONFIG_SMP
510/* Another cpu may still have their %cr3 pointing at the pagetable, so
511 we need to repoint it somewhere else before we can unpin it. */
512static void drop_other_mm_ref(void *info)
513{
514 struct mm_struct *mm = info;
515
516 if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
517 leave_mm(smp_processor_id());
518}
519
520static void drop_mm_ref(struct mm_struct *mm)
521{
522 if (current->active_mm == mm) {
523 if (current->mm == mm)
524 load_cr3(swapper_pg_dir);
525 else
526 leave_mm(smp_processor_id());
527 }
528
529 if (!cpus_empty(mm->cpu_vm_mask))
530 xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref,
531 mm, 1);
532}
533#else
534static void drop_mm_ref(struct mm_struct *mm)
535{
536 if (current->active_mm == mm)
537 load_cr3(swapper_pg_dir);
538}
539#endif
540
541/*
542 * While a process runs, Xen pins its pagetables, which means that the
543 * hypervisor forces it to be read-only, and it controls all updates
544 * to it. This means that all pagetable updates have to go via the
545 * hypervisor, which is moderately expensive.
546 *
547 * Since we're pulling the pagetable down, we switch to use init_mm,
548 * unpin old process pagetable and mark it all read-write, which
549 * allows further operations on it to be simple memory accesses.
550 *
551 * The only subtle point is that another CPU may be still using the
552 * pagetable because of lazy tlb flushing. This means we need need to
553 * switch all CPUs off this pagetable before we can unpin it.
554 */
555void xen_exit_mmap(struct mm_struct *mm)
556{
557 get_cpu(); /* make sure we don't move around */
558 drop_mm_ref(mm);
559 put_cpu();
560
561 spin_lock(&mm->page_table_lock);
562 xen_pgd_unpin(mm->pgd);
563 spin_unlock(&mm->page_table_lock);
564}
diff --git a/arch/i386/xen/mmu.h b/arch/i386/xen/mmu.h
new file mode 100644
index 000000000000..c9ff27f3ac3a
--- /dev/null
+++ b/arch/i386/xen/mmu.h
@@ -0,0 +1,60 @@
1#ifndef _XEN_MMU_H
2
3#include <linux/linkage.h>
4#include <asm/page.h>
5
6/*
7 * Page-directory addresses above 4GB do not fit into architectural %cr3.
8 * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
9 * must use the following accessor macros to pack/unpack valid MFNs.
10 *
11 * Note that Xen is using the fact that the pagetable base is always
12 * page-aligned, and putting the 12 MSB of the address into the 12 LSB
13 * of cr3.
14 */
15#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
16#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
17
18
19void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
20
21void xen_set_pte(pte_t *ptep, pte_t pteval);
22void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
23 pte_t *ptep, pte_t pteval);
24void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
25
26void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
27void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
28void xen_exit_mmap(struct mm_struct *mm);
29
30void xen_pgd_pin(pgd_t *pgd);
31//void xen_pgd_unpin(pgd_t *pgd);
32
33#ifdef CONFIG_X86_PAE
34unsigned long long xen_pte_val(pte_t);
35unsigned long long xen_pmd_val(pmd_t);
36unsigned long long xen_pgd_val(pgd_t);
37
38pte_t xen_make_pte(unsigned long long);
39pmd_t xen_make_pmd(unsigned long long);
40pgd_t xen_make_pgd(unsigned long long);
41
42void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
43 pte_t *ptep, pte_t pteval);
44void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
45void xen_set_pud(pud_t *ptr, pud_t val);
46void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
47void xen_pmd_clear(pmd_t *pmdp);
48
49
50#else
51unsigned long xen_pte_val(pte_t);
52unsigned long xen_pmd_val(pmd_t);
53unsigned long xen_pgd_val(pgd_t);
54
55pte_t xen_make_pte(unsigned long);
56pmd_t xen_make_pmd(unsigned long);
57pgd_t xen_make_pgd(unsigned long);
58#endif
59
60#endif /* _XEN_MMU_H */
diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c
new file mode 100644
index 000000000000..c837e8e463db
--- /dev/null
+++ b/arch/i386/xen/multicalls.c
@@ -0,0 +1,90 @@
1/*
2 * Xen hypercall batching.
3 *
4 * Xen allows multiple hypercalls to be issued at once, using the
5 * multicall interface. This allows the cost of trapping into the
6 * hypervisor to be amortized over several calls.
7 *
8 * This file implements a simple interface for multicalls. There's a
9 * per-cpu buffer of outstanding multicalls. When you want to queue a
10 * multicall for issuing, you can allocate a multicall slot for the
11 * call and its arguments, along with storage for space which is
12 * pointed to by the arguments (for passing pointers to structures,
13 * etc). When the multicall is actually issued, all the space for the
14 * commands and allocated memory is freed for reuse.
15 *
16 * Multicalls are flushed whenever any of the buffers get full, or
17 * when explicitly requested. There's no way to get per-multicall
18 * return results back. It will BUG if any of the multicalls fail.
19 *
20 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
21 */
22#include <linux/percpu.h>
23#include <linux/hardirq.h>
24
25#include <asm/xen/hypercall.h>
26
27#include "multicalls.h"
28
29#define MC_BATCH 32
30#define MC_ARGS (MC_BATCH * 16 / sizeof(u64))
31
32struct mc_buffer {
33 struct multicall_entry entries[MC_BATCH];
34 u64 args[MC_ARGS];
35 unsigned mcidx, argidx;
36};
37
38static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
39DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
40
41void xen_mc_flush(void)
42{
43 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
44 int ret = 0;
45 unsigned long flags;
46
47 BUG_ON(preemptible());
48
49 /* Disable interrupts in case someone comes in and queues
50 something in the middle */
51 local_irq_save(flags);
52
53 if (b->mcidx) {
54 int i;
55
56 if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
57 BUG();
58 for (i = 0; i < b->mcidx; i++)
59 if (b->entries[i].result < 0)
60 ret++;
61 b->mcidx = 0;
62 b->argidx = 0;
63 } else
64 BUG_ON(b->argidx != 0);
65
66 local_irq_restore(flags);
67
68 BUG_ON(ret);
69}
70
71struct multicall_space __xen_mc_entry(size_t args)
72{
73 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
74 struct multicall_space ret;
75 unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
76
77 BUG_ON(preemptible());
78 BUG_ON(argspace > MC_ARGS);
79
80 if (b->mcidx == MC_BATCH ||
81 (b->argidx + argspace) > MC_ARGS)
82 xen_mc_flush();
83
84 ret.mc = &b->entries[b->mcidx];
85 b->mcidx++;
86 ret.args = &b->args[b->argidx];
87 b->argidx += argspace;
88
89 return ret;
90}
diff --git a/arch/i386/xen/multicalls.h b/arch/i386/xen/multicalls.h
new file mode 100644
index 000000000000..e6f7530b156c
--- /dev/null
+++ b/arch/i386/xen/multicalls.h
@@ -0,0 +1,45 @@
1#ifndef _XEN_MULTICALLS_H
2#define _XEN_MULTICALLS_H
3
4#include "xen-ops.h"
5
6/* Multicalls */
7struct multicall_space
8{
9 struct multicall_entry *mc;
10 void *args;
11};
12
13/* Allocate room for a multicall and its args */
14struct multicall_space __xen_mc_entry(size_t args);
15
16DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
17
18/* Call to start a batch of multiple __xen_mc_entry()s. Must be
19 paired with xen_mc_issue() */
20static inline void xen_mc_batch(void)
21{
22 /* need to disable interrupts until this entry is complete */
23 local_irq_save(__get_cpu_var(xen_mc_irq_flags));
24}
25
26static inline struct multicall_space xen_mc_entry(size_t args)
27{
28 xen_mc_batch();
29 return __xen_mc_entry(args);
30}
31
32/* Flush all pending multicalls */
33void xen_mc_flush(void);
34
35/* Issue a multicall if we're not in a lazy mode */
36static inline void xen_mc_issue(unsigned mode)
37{
38 if ((xen_get_lazy_mode() & mode) == 0)
39 xen_mc_flush();
40
41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
43}
44
45#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c
new file mode 100644
index 000000000000..f84e77226646
--- /dev/null
+++ b/arch/i386/xen/setup.c
@@ -0,0 +1,111 @@
1/*
2 * Machine specific setup for xen
3 *
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5 */
6
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pm.h>
11
12#include <asm/elf.h>
13#include <asm/e820.h>
14#include <asm/setup.h>
15#include <asm/xen/hypervisor.h>
16#include <asm/xen/hypercall.h>
17
18#include <xen/interface/physdev.h>
19#include <xen/features.h>
20
21#include "xen-ops.h"
22#include "vdso.h"
23
24/* These are code, but not functions. Defined in entry.S */
25extern const char xen_hypervisor_callback[];
26extern const char xen_failsafe_callback[];
27
28unsigned long *phys_to_machine_mapping;
29EXPORT_SYMBOL(phys_to_machine_mapping);
30
31/**
32 * machine_specific_memory_setup - Hook for machine specific memory setup.
33 **/
34
35char * __init xen_memory_setup(void)
36{
37 unsigned long max_pfn = xen_start_info->nr_pages;
38
39 e820.nr_map = 0;
40 add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
41
42 return "Xen";
43}
44
45static void xen_idle(void)
46{
47 local_irq_disable();
48
49 if (need_resched())
50 local_irq_enable();
51 else {
52 current_thread_info()->status &= ~TS_POLLING;
53 smp_mb__after_clear_bit();
54 safe_halt();
55 current_thread_info()->status |= TS_POLLING;
56 }
57}
58
59/*
60 * Set the bit indicating "nosegneg" library variants should be used.
61 */
62static void fiddle_vdso(void)
63{
64 extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S. */
65 extern char vsyscall_int80_start;
66 u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK +
67 &vsyscall_int80_start);
68 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
69}
70
71void __init xen_arch_setup(void)
72{
73 struct physdev_set_iopl set_iopl;
74 int rc;
75
76 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
77 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
78
79 if (!xen_feature(XENFEAT_auto_translated_physmap))
80 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
81
82 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
83 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
84
85 set_iopl.iopl = 1;
86 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
87 if (rc != 0)
88 printk(KERN_INFO "physdev_op failed %d\n", rc);
89
90#ifdef CONFIG_ACPI
91 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
92 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
93 disable_acpi();
94 }
95#endif
96
97 memcpy(boot_command_line, xen_start_info->cmd_line,
98 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
99 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
100
101 pm_idle = xen_idle;
102
103#ifdef CONFIG_SMP
104 /* fill cpus_possible with all available cpus */
105 xen_fill_possible_map();
106#endif
107
108 paravirt_disable_iospace();
109
110 fiddle_vdso();
111}
diff --git a/arch/i386/xen/smp.c b/arch/i386/xen/smp.c
new file mode 100644
index 000000000000..557b8e24706a
--- /dev/null
+++ b/arch/i386/xen/smp.c
@@ -0,0 +1,404 @@
1/*
2 * Xen SMP support
3 *
4 * This file implements the Xen versions of smp_ops. SMP under Xen is
5 * very straightforward. Bringing a CPU up is simply a matter of
6 * loading its initial context and setting it running.
7 *
8 * IPIs are handled through the Xen event mechanism.
9 *
10 * Because virtual CPUs can be scheduled onto any real CPU, there's no
11 * useful topology information for the kernel to make use of. As a
12 * result, all CPUs are treated as if they're single-core and
13 * single-threaded.
14 *
15 * This does not handle HOTPLUG_CPU yet.
16 */
17#include <linux/sched.h>
18#include <linux/err.h>
19#include <linux/smp.h>
20
21#include <asm/paravirt.h>
22#include <asm/desc.h>
23#include <asm/pgtable.h>
24#include <asm/cpu.h>
25
26#include <xen/interface/xen.h>
27#include <xen/interface/vcpu.h>
28
29#include <asm/xen/interface.h>
30#include <asm/xen/hypercall.h>
31
32#include <xen/page.h>
33#include <xen/events.h>
34
35#include "xen-ops.h"
36#include "mmu.h"
37
38static cpumask_t cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq);
40static DEFINE_PER_CPU(int, callfunc_irq);
41
42/*
43 * Structure and data for smp_call_function(). This is designed to minimise
44 * static memory requirements. It also looks cleaner.
45 */
46static DEFINE_SPINLOCK(call_lock);
47
48struct call_data_struct {
49 void (*func) (void *info);
50 void *info;
51 atomic_t started;
52 atomic_t finished;
53 int wait;
54};
55
56static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
57
58static struct call_data_struct *call_data;
59
60/*
61 * Reschedule call back. Nothing to do,
62 * all the work is done automatically when
63 * we return from the interrupt.
64 */
65static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
66{
67 return IRQ_HANDLED;
68}
69
70static __cpuinit void cpu_bringup_and_idle(void)
71{
72 int cpu = smp_processor_id();
73
74 cpu_init();
75
76 preempt_disable();
77 per_cpu(cpu_state, cpu) = CPU_ONLINE;
78
79 xen_setup_cpu_clockevents();
80
81 /* We can take interrupts now: we're officially "up". */
82 local_irq_enable();
83
84 wmb(); /* make sure everything is out */
85 cpu_idle();
86}
87
88static int xen_smp_intr_init(unsigned int cpu)
89{
90 int rc;
91 const char *resched_name, *callfunc_name;
92
93 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
94
95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
97 cpu,
98 xen_reschedule_interrupt,
99 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
100 resched_name,
101 NULL);
102 if (rc < 0)
103 goto fail;
104 per_cpu(resched_irq, cpu) = rc;
105
106 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
107 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
108 cpu,
109 xen_call_function_interrupt,
110 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
111 callfunc_name,
112 NULL);
113 if (rc < 0)
114 goto fail;
115 per_cpu(callfunc_irq, cpu) = rc;
116
117 return 0;
118
119 fail:
120 if (per_cpu(resched_irq, cpu) >= 0)
121 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
122 if (per_cpu(callfunc_irq, cpu) >= 0)
123 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
124 return rc;
125}
126
127void __init xen_fill_possible_map(void)
128{
129 int i, rc;
130
131 for (i = 0; i < NR_CPUS; i++) {
132 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
133 if (rc >= 0)
134 cpu_set(i, cpu_possible_map);
135 }
136}
137
138void __init xen_smp_prepare_boot_cpu(void)
139{
140 int cpu;
141
142 BUG_ON(smp_processor_id() != 0);
143 native_smp_prepare_boot_cpu();
144
145 /* We've switched to the "real" per-cpu gdt, so make sure the
146 old memory can be recycled */
147 make_lowmem_page_readwrite(&per_cpu__gdt_page);
148
149 for (cpu = 0; cpu < NR_CPUS; cpu++) {
150 cpus_clear(cpu_sibling_map[cpu]);
151 cpus_clear(cpu_core_map[cpu]);
152 }
153
154 xen_setup_vcpu_info_placement();
155}
156
157void __init xen_smp_prepare_cpus(unsigned int max_cpus)
158{
159 unsigned cpu;
160
161 for (cpu = 0; cpu < NR_CPUS; cpu++) {
162 cpus_clear(cpu_sibling_map[cpu]);
163 cpus_clear(cpu_core_map[cpu]);
164 }
165
166 smp_store_cpu_info(0);
167 set_cpu_sibling_map(0);
168
169 if (xen_smp_intr_init(0))
170 BUG();
171
172 cpu_initialized_map = cpumask_of_cpu(0);
173
174 /* Restrict the possible_map according to max_cpus. */
175 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
176 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
177 continue;
178 cpu_clear(cpu, cpu_possible_map);
179 }
180
181 for_each_possible_cpu (cpu) {
182 struct task_struct *idle;
183
184 if (cpu == 0)
185 continue;
186
187 idle = fork_idle(cpu);
188 if (IS_ERR(idle))
189 panic("failed fork for CPU %d", cpu);
190
191 cpu_set(cpu, cpu_present_map);
192 }
193
194 //init_xenbus_allowed_cpumask();
195}
196
197static __cpuinit int
198cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
199{
200 struct vcpu_guest_context *ctxt;
201 struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
202
203 if (cpu_test_and_set(cpu, cpu_initialized_map))
204 return 0;
205
206 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
207 if (ctxt == NULL)
208 return -ENOMEM;
209
210 ctxt->flags = VGCF_IN_KERNEL;
211 ctxt->user_regs.ds = __USER_DS;
212 ctxt->user_regs.es = __USER_DS;
213 ctxt->user_regs.fs = __KERNEL_PERCPU;
214 ctxt->user_regs.gs = 0;
215 ctxt->user_regs.ss = __KERNEL_DS;
216 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
217 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
218
219 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
220
221 xen_copy_trap_info(ctxt->trap_ctxt);
222
223 ctxt->ldt_ents = 0;
224
225 BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
226 make_lowmem_page_readonly(gdt->gdt);
227
228 ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
229 ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
230
231 ctxt->user_regs.cs = __KERNEL_CS;
232 ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
233
234 ctxt->kernel_ss = __KERNEL_DS;
235 ctxt->kernel_sp = idle->thread.esp0;
236
237 ctxt->event_callback_cs = __KERNEL_CS;
238 ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
239 ctxt->failsafe_callback_cs = __KERNEL_CS;
240 ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
241
242 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
243 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
244
245 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
246 BUG();
247
248 kfree(ctxt);
249 return 0;
250}
251
252int __cpuinit xen_cpu_up(unsigned int cpu)
253{
254 struct task_struct *idle = idle_task(cpu);
255 int rc;
256
257#if 0
258 rc = cpu_up_check(cpu);
259 if (rc)
260 return rc;
261#endif
262
263 init_gdt(cpu);
264 per_cpu(current_task, cpu) = idle;
265 irq_ctx_init(cpu);
266 xen_setup_timer(cpu);
267
268 /* make sure interrupts start blocked */
269 per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
270
271 rc = cpu_initialize_context(cpu, idle);
272 if (rc)
273 return rc;
274
275 if (num_online_cpus() == 1)
276 alternatives_smp_switch(1);
277
278 rc = xen_smp_intr_init(cpu);
279 if (rc)
280 return rc;
281
282 smp_store_cpu_info(cpu);
283 set_cpu_sibling_map(cpu);
284 /* This must be done before setting cpu_online_map */
285 wmb();
286
287 cpu_set(cpu, cpu_online_map);
288
289 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
290 BUG_ON(rc);
291
292 return 0;
293}
294
295void xen_smp_cpus_done(unsigned int max_cpus)
296{
297}
298
299static void stop_self(void *v)
300{
301 int cpu = smp_processor_id();
302
303 /* make sure we're not pinning something down */
304 load_cr3(swapper_pg_dir);
305 /* should set up a minimal gdt */
306
307 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
308 BUG();
309}
310
311void xen_smp_send_stop(void)
312{
313 smp_call_function(stop_self, NULL, 0, 0);
314}
315
316void xen_smp_send_reschedule(int cpu)
317{
318 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
319}
320
321
322static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
323{
324 unsigned cpu;
325
326 cpus_and(mask, mask, cpu_online_map);
327
328 for_each_cpu_mask(cpu, mask)
329 xen_send_IPI_one(cpu, vector);
330}
331
332static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
333{
334 void (*func) (void *info) = call_data->func;
335 void *info = call_data->info;
336 int wait = call_data->wait;
337
338 /*
339 * Notify initiating CPU that I've grabbed the data and am
340 * about to execute the function
341 */
342 mb();
343 atomic_inc(&call_data->started);
344 /*
345 * At this point the info structure may be out of scope unless wait==1
346 */
347 irq_enter();
348 (*func)(info);
349 irq_exit();
350
351 if (wait) {
352 mb(); /* commit everything before setting finished */
353 atomic_inc(&call_data->finished);
354 }
355
356 return IRQ_HANDLED;
357}
358
359int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
360 void *info, int wait)
361{
362 struct call_data_struct data;
363 int cpus;
364
365 /* Holding any lock stops cpus from going down. */
366 spin_lock(&call_lock);
367
368 cpu_clear(smp_processor_id(), mask);
369
370 cpus = cpus_weight(mask);
371 if (!cpus) {
372 spin_unlock(&call_lock);
373 return 0;
374 }
375
376 /* Can deadlock when called with interrupts disabled */
377 WARN_ON(irqs_disabled());
378
379 data.func = func;
380 data.info = info;
381 atomic_set(&data.started, 0);
382 data.wait = wait;
383 if (wait)
384 atomic_set(&data.finished, 0);
385
386 call_data = &data;
387 mb(); /* write everything before IPI */
388
389 /* Send a message to other CPUs and wait for them to respond */
390 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
391
392 /* Make sure other vcpus get a chance to run.
393 XXX too severe? Maybe we should check the other CPU's states? */
394 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
395
396 /* Wait for response */
397 while (atomic_read(&data.started) != cpus ||
398 (wait && atomic_read(&data.finished) != cpus))
399 cpu_relax();
400
401 spin_unlock(&call_lock);
402
403 return 0;
404}
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c
new file mode 100644
index 000000000000..dfd6db69ead5
--- /dev/null
+++ b/arch/i386/xen/time.c
@@ -0,0 +1,593 @@
1/*
2 * Xen time implementation.
3 *
4 * This is implemented in terms of a clocksource driver which uses
5 * the hypervisor clock as a nanosecond timebase, and a clockevent
6 * driver which uses the hypervisor's timer mechanism.
7 *
8 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
9 */
10#include <linux/kernel.h>
11#include <linux/interrupt.h>
12#include <linux/clocksource.h>
13#include <linux/clockchips.h>
14#include <linux/kernel_stat.h>
15
16#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h>
18
19#include <xen/events.h>
20#include <xen/interface/xen.h>
21#include <xen/interface/vcpu.h>
22
23#include "xen-ops.h"
24
25#define XEN_SHIFT 22
26
27/* Xen may fire a timer up to this many ns early */
28#define TIMER_SLOP 100000
29#define NS_PER_TICK (1000000000LL / HZ)
30
31static cycle_t xen_clocksource_read(void);
32
33/* These are perodically updated in shared_info, and then copied here. */
34struct shadow_time_info {
35 u64 tsc_timestamp; /* TSC at last update of time vals. */
36 u64 system_timestamp; /* Time, in nanosecs, since boot. */
37 u32 tsc_to_nsec_mul;
38 int tsc_shift;
39 u32 version;
40};
41
42static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
43
44/* runstate info updated by Xen */
45static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
46
47/* snapshots of runstate info */
48static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
49
50/* unused ns of stolen and blocked time */
51static DEFINE_PER_CPU(u64, residual_stolen);
52static DEFINE_PER_CPU(u64, residual_blocked);
53
54/* return an consistent snapshot of 64-bit time/counter value */
55static u64 get64(const u64 *p)
56{
57 u64 ret;
58
59 if (BITS_PER_LONG < 64) {
60 u32 *p32 = (u32 *)p;
61 u32 h, l;
62
63 /*
64 * Read high then low, and then make sure high is
65 * still the same; this will only loop if low wraps
66 * and carries into high.
67 * XXX some clean way to make this endian-proof?
68 */
69 do {
70 h = p32[1];
71 barrier();
72 l = p32[0];
73 barrier();
74 } while (p32[1] != h);
75
76 ret = (((u64)h) << 32) | l;
77 } else
78 ret = *p;
79
80 return ret;
81}
82
83/*
84 * Runstate accounting
85 */
86static void get_runstate_snapshot(struct vcpu_runstate_info *res)
87{
88 u64 state_time;
89 struct vcpu_runstate_info *state;
90
91 BUG_ON(preemptible());
92
93 state = &__get_cpu_var(runstate);
94
95 /*
96 * The runstate info is always updated by the hypervisor on
97 * the current CPU, so there's no need to use anything
98 * stronger than a compiler barrier when fetching it.
99 */
100 do {
101 state_time = get64(&state->state_entry_time);
102 barrier();
103 *res = *state;
104 barrier();
105 } while (get64(&state->state_entry_time) != state_time);
106}
107
108static void setup_runstate_info(int cpu)
109{
110 struct vcpu_register_runstate_memory_area area;
111
112 area.addr.v = &per_cpu(runstate, cpu);
113
114 if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
115 cpu, &area))
116 BUG();
117}
118
119static void do_stolen_accounting(void)
120{
121 struct vcpu_runstate_info state;
122 struct vcpu_runstate_info *snap;
123 s64 blocked, runnable, offline, stolen;
124 cputime_t ticks;
125
126 get_runstate_snapshot(&state);
127
128 WARN_ON(state.state != RUNSTATE_running);
129
130 snap = &__get_cpu_var(runstate_snapshot);
131
132 /* work out how much time the VCPU has not been runn*ing* */
133 blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
134 runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
135 offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
136
137 *snap = state;
138
139 /* Add the appropriate number of ticks of stolen time,
140 including any left-overs from last time. Passing NULL to
141 account_steal_time accounts the time as stolen. */
142 stolen = runnable + offline + __get_cpu_var(residual_stolen);
143
144 if (stolen < 0)
145 stolen = 0;
146
147 ticks = 0;
148 while (stolen >= NS_PER_TICK) {
149 ticks++;
150 stolen -= NS_PER_TICK;
151 }
152 __get_cpu_var(residual_stolen) = stolen;
153 account_steal_time(NULL, ticks);
154
155 /* Add the appropriate number of ticks of blocked time,
156 including any left-overs from last time. Passing idle to
157 account_steal_time accounts the time as idle/wait. */
158 blocked += __get_cpu_var(residual_blocked);
159
160 if (blocked < 0)
161 blocked = 0;
162
163 ticks = 0;
164 while (blocked >= NS_PER_TICK) {
165 ticks++;
166 blocked -= NS_PER_TICK;
167 }
168 __get_cpu_var(residual_blocked) = blocked;
169 account_steal_time(idle_task(smp_processor_id()), ticks);
170}
171
172/*
173 * Xen sched_clock implementation. Returns the number of unstolen
174 * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
175 * states.
176 */
177unsigned long long xen_sched_clock(void)
178{
179 struct vcpu_runstate_info state;
180 cycle_t now;
181 u64 ret;
182 s64 offset;
183
184 /*
185 * Ideally sched_clock should be called on a per-cpu basis
186 * anyway, so preempt should already be disabled, but that's
187 * not current practice at the moment.
188 */
189 preempt_disable();
190
191 now = xen_clocksource_read();
192
193 get_runstate_snapshot(&state);
194
195 WARN_ON(state.state != RUNSTATE_running);
196
197 offset = now - state.state_entry_time;
198 if (offset < 0)
199 offset = 0;
200
201 ret = state.time[RUNSTATE_blocked] +
202 state.time[RUNSTATE_running] +
203 offset;
204
205 preempt_enable();
206
207 return ret;
208}
209
210
211/* Get the CPU speed from Xen */
212unsigned long xen_cpu_khz(void)
213{
214 u64 cpu_khz = 1000000ULL << 32;
215 const struct vcpu_time_info *info =
216 &HYPERVISOR_shared_info->vcpu_info[0].time;
217
218 do_div(cpu_khz, info->tsc_to_system_mul);
219 if (info->tsc_shift < 0)
220 cpu_khz <<= -info->tsc_shift;
221 else
222 cpu_khz >>= info->tsc_shift;
223
224 return cpu_khz;
225}
226
227/*
228 * Reads a consistent set of time-base values from Xen, into a shadow data
229 * area.
230 */
231static unsigned get_time_values_from_xen(void)
232{
233 struct vcpu_time_info *src;
234 struct shadow_time_info *dst;
235
236 /* src is shared memory with the hypervisor, so we need to
237 make sure we get a consistent snapshot, even in the face of
238 being preempted. */
239 src = &__get_cpu_var(xen_vcpu)->time;
240 dst = &__get_cpu_var(shadow_time);
241
242 do {
243 dst->version = src->version;
244 rmb(); /* fetch version before data */
245 dst->tsc_timestamp = src->tsc_timestamp;
246 dst->system_timestamp = src->system_time;
247 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
248 dst->tsc_shift = src->tsc_shift;
249 rmb(); /* test version after fetching data */
250 } while ((src->version & 1) | (dst->version ^ src->version));
251
252 return dst->version;
253}
254
255/*
256 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
257 * yielding a 64-bit result.
258 */
259static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
260{
261 u64 product;
262#ifdef __i386__
263 u32 tmp1, tmp2;
264#endif
265
266 if (shift < 0)
267 delta >>= -shift;
268 else
269 delta <<= shift;
270
271#ifdef __i386__
272 __asm__ (
273 "mul %5 ; "
274 "mov %4,%%eax ; "
275 "mov %%edx,%4 ; "
276 "mul %5 ; "
277 "xor %5,%5 ; "
278 "add %4,%%eax ; "
279 "adc %5,%%edx ; "
280 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
281 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
282#elif __x86_64__
283 __asm__ (
284 "mul %%rdx ; shrd $32,%%rdx,%%rax"
285 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
286#else
287#error implement me!
288#endif
289
290 return product;
291}
292
293static u64 get_nsec_offset(struct shadow_time_info *shadow)
294{
295 u64 now, delta;
296 now = native_read_tsc();
297 delta = now - shadow->tsc_timestamp;
298 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
299}
300
301static cycle_t xen_clocksource_read(void)
302{
303 struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
304 cycle_t ret;
305 unsigned version;
306
307 do {
308 version = get_time_values_from_xen();
309 barrier();
310 ret = shadow->system_timestamp + get_nsec_offset(shadow);
311 barrier();
312 } while (version != __get_cpu_var(xen_vcpu)->time.version);
313
314 put_cpu_var(shadow_time);
315
316 return ret;
317}
318
319static void xen_read_wallclock(struct timespec *ts)
320{
321 const struct shared_info *s = HYPERVISOR_shared_info;
322 u32 version;
323 u64 delta;
324 struct timespec now;
325
326 /* get wallclock at system boot */
327 do {
328 version = s->wc_version;
329 rmb(); /* fetch version before time */
330 now.tv_sec = s->wc_sec;
331 now.tv_nsec = s->wc_nsec;
332 rmb(); /* fetch time before checking version */
333 } while ((s->wc_version & 1) | (version ^ s->wc_version));
334
335 delta = xen_clocksource_read(); /* time since system boot */
336 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
337
338 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
339 now.tv_sec = delta;
340
341 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
342}
343
344unsigned long xen_get_wallclock(void)
345{
346 struct timespec ts;
347
348 xen_read_wallclock(&ts);
349
350 return ts.tv_sec;
351}
352
353int xen_set_wallclock(unsigned long now)
354{
355 /* do nothing for domU */
356 return -1;
357}
358
359static struct clocksource xen_clocksource __read_mostly = {
360 .name = "xen",
361 .rating = 400,
362 .read = xen_clocksource_read,
363 .mask = ~0,
364 .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
365 .shift = XEN_SHIFT,
366 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
367};
368
369/*
370 Xen clockevent implementation
371
372 Xen has two clockevent implementations:
373
374 The old timer_op one works with all released versions of Xen prior
375 to version 3.0.4. This version of the hypervisor provides a
376 single-shot timer with nanosecond resolution. However, sharing the
377 same event channel is a 100Hz tick which is delivered while the
378 vcpu is running. We don't care about or use this tick, but it will
379 cause the core time code to think the timer fired too soon, and
380 will end up resetting it each time. It could be filtered, but
381 doing so has complications when the ktime clocksource is not yet
382 the xen clocksource (ie, at boot time).
383
384 The new vcpu_op-based timer interface allows the tick timer period
385 to be changed or turned off. The tick timer is not useful as a
386 periodic timer because events are only delivered to running vcpus.
387 The one-shot timer can report when a timeout is in the past, so
388 set_next_event is capable of returning -ETIME when appropriate.
389 This interface is used when available.
390*/
391
392
393/*
394 Get a hypervisor absolute time. In theory we could maintain an
395 offset between the kernel's time and the hypervisor's time, and
396 apply that to a kernel's absolute timeout. Unfortunately the
397 hypervisor and kernel times can drift even if the kernel is using
398 the Xen clocksource, because ntp can warp the kernel's clocksource.
399*/
400static s64 get_abs_timeout(unsigned long delta)
401{
402 return xen_clocksource_read() + delta;
403}
404
405static void xen_timerop_set_mode(enum clock_event_mode mode,
406 struct clock_event_device *evt)
407{
408 switch (mode) {
409 case CLOCK_EVT_MODE_PERIODIC:
410 /* unsupported */
411 WARN_ON(1);
412 break;
413
414 case CLOCK_EVT_MODE_ONESHOT:
415 case CLOCK_EVT_MODE_RESUME:
416 break;
417
418 case CLOCK_EVT_MODE_UNUSED:
419 case CLOCK_EVT_MODE_SHUTDOWN:
420 HYPERVISOR_set_timer_op(0); /* cancel timeout */
421 break;
422 }
423}
424
425static int xen_timerop_set_next_event(unsigned long delta,
426 struct clock_event_device *evt)
427{
428 WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
429
430 if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
431 BUG();
432
433 /* We may have missed the deadline, but there's no real way of
434 knowing for sure. If the event was in the past, then we'll
435 get an immediate interrupt. */
436
437 return 0;
438}
439
440static const struct clock_event_device xen_timerop_clockevent = {
441 .name = "xen",
442 .features = CLOCK_EVT_FEAT_ONESHOT,
443
444 .max_delta_ns = 0xffffffff,
445 .min_delta_ns = TIMER_SLOP,
446
447 .mult = 1,
448 .shift = 0,
449 .rating = 500,
450
451 .set_mode = xen_timerop_set_mode,
452 .set_next_event = xen_timerop_set_next_event,
453};
454
455
456
457static void xen_vcpuop_set_mode(enum clock_event_mode mode,
458 struct clock_event_device *evt)
459{
460 int cpu = smp_processor_id();
461
462 switch (mode) {
463 case CLOCK_EVT_MODE_PERIODIC:
464 WARN_ON(1); /* unsupported */
465 break;
466
467 case CLOCK_EVT_MODE_ONESHOT:
468 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
469 BUG();
470 break;
471
472 case CLOCK_EVT_MODE_UNUSED:
473 case CLOCK_EVT_MODE_SHUTDOWN:
474 if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
475 HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
476 BUG();
477 break;
478 case CLOCK_EVT_MODE_RESUME:
479 break;
480 }
481}
482
483static int xen_vcpuop_set_next_event(unsigned long delta,
484 struct clock_event_device *evt)
485{
486 int cpu = smp_processor_id();
487 struct vcpu_set_singleshot_timer single;
488 int ret;
489
490 WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
491
492 single.timeout_abs_ns = get_abs_timeout(delta);
493 single.flags = VCPU_SSHOTTMR_future;
494
495 ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
496
497 BUG_ON(ret != 0 && ret != -ETIME);
498
499 return ret;
500}
501
502static const struct clock_event_device xen_vcpuop_clockevent = {
503 .name = "xen",
504 .features = CLOCK_EVT_FEAT_ONESHOT,
505
506 .max_delta_ns = 0xffffffff,
507 .min_delta_ns = TIMER_SLOP,
508
509 .mult = 1,
510 .shift = 0,
511 .rating = 500,
512
513 .set_mode = xen_vcpuop_set_mode,
514 .set_next_event = xen_vcpuop_set_next_event,
515};
516
517static const struct clock_event_device *xen_clockevent =
518 &xen_timerop_clockevent;
519static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
520
521static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
522{
523 struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
524 irqreturn_t ret;
525
526 ret = IRQ_NONE;
527 if (evt->event_handler) {
528 evt->event_handler(evt);
529 ret = IRQ_HANDLED;
530 }
531
532 do_stolen_accounting();
533
534 return ret;
535}
536
537void xen_setup_timer(int cpu)
538{
539 const char *name;
540 struct clock_event_device *evt;
541 int irq;
542
543 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
544
545 name = kasprintf(GFP_KERNEL, "timer%d", cpu);
546 if (!name)
547 name = "<timer kasprintf failed>";
548
549 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
550 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
551 name, NULL);
552
553 evt = &per_cpu(xen_clock_events, cpu);
554 memcpy(evt, xen_clockevent, sizeof(*evt));
555
556 evt->cpumask = cpumask_of_cpu(cpu);
557 evt->irq = irq;
558
559 setup_runstate_info(cpu);
560}
561
562void xen_setup_cpu_clockevents(void)
563{
564 BUG_ON(preemptible());
565
566 clockevents_register_device(&__get_cpu_var(xen_clock_events));
567}
568
569__init void xen_time_init(void)
570{
571 int cpu = smp_processor_id();
572
573 get_time_values_from_xen();
574
575 clocksource_register(&xen_clocksource);
576
577 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
578 /* Successfully turned off 100Hz tick, so we have the
579 vcpuop-based timer interface */
580 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
581 xen_clockevent = &xen_vcpuop_clockevent;
582 }
583
584 /* Set initial system time with full resolution */
585 xen_read_wallclock(&xtime);
586 set_normalized_timespec(&wall_to_monotonic,
587 -xtime.tv_sec, -xtime.tv_nsec);
588
589 tsc_disable = 0;
590
591 xen_setup_timer(cpu);
592 xen_setup_cpu_clockevents();
593}
diff --git a/arch/i386/xen/vdso.h b/arch/i386/xen/vdso.h
new file mode 100644
index 000000000000..861fedfe5230
--- /dev/null
+++ b/arch/i386/xen/vdso.h
@@ -0,0 +1,4 @@
1/* Bit used for the pseudo-hwcap for non-negative segments. We use
2 bit 1 to avoid bugs in some versions of glibc when bit 0 is
3 used; the choice is otherwise arbitrary. */
4#define VDSO_NOTE_NONEGSEG_BIT 1
diff --git a/arch/i386/xen/xen-asm.S b/arch/i386/xen/xen-asm.S
new file mode 100644
index 000000000000..1a43b60c0c62
--- /dev/null
+++ b/arch/i386/xen/xen-asm.S
@@ -0,0 +1,291 @@
1/*
2 Asm versions of Xen pv-ops, suitable for either direct use or inlining.
3 The inline versions are the same as the direct-use versions, with the
4 pre- and post-amble chopped off.
5
6 This code is encoded for size rather than absolute efficiency,
7 with a view to being able to inline as much as possible.
8
9 We only bother with direct forms (ie, vcpu in pda) of the operations
10 here; the indirect forms are better handled in C, since they're
11 generally too large to inline anyway.
12 */
13
14#include <linux/linkage.h>
15
16#include <asm/asm-offsets.h>
17#include <asm/thread_info.h>
18#include <asm/percpu.h>
19#include <asm/processor-flags.h>
20#include <asm/segment.h>
21
22#include <xen/interface/xen.h>
23
24#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
25#define ENDPATCH(x) .globl x##_end; x##_end=.
26
27/* Pseudo-flag used for virtual NMI, which we don't implement yet */
28#define XEN_EFLAGS_NMI 0x80000000
29
30/*
31 Enable events. This clears the event mask and tests the pending
32 event status with one and operation. If there are pending
33 events, then enter the hypervisor to get them handled.
34 */
35ENTRY(xen_irq_enable_direct)
36 /* Clear mask and test pending */
37 andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
38 /* Preempt here doesn't matter because that will deal with
39 any pending interrupts. The pending check may end up being
40 run on the wrong CPU, but that doesn't hurt. */
41 jz 1f
422: call check_events
431:
44ENDPATCH(xen_irq_enable_direct)
45 ret
46 ENDPROC(xen_irq_enable_direct)
47 RELOC(xen_irq_enable_direct, 2b+1)
48
49
50/*
51 Disabling events is simply a matter of making the event mask
52 non-zero.
53 */
54ENTRY(xen_irq_disable_direct)
55 movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
56ENDPATCH(xen_irq_disable_direct)
57 ret
58 ENDPROC(xen_irq_disable_direct)
59 RELOC(xen_irq_disable_direct, 0)
60
61/*
62 (xen_)save_fl is used to get the current interrupt enable status.
63 Callers expect the status to be in X86_EFLAGS_IF, and other bits
64 may be set in the return value. We take advantage of this by
65 making sure that X86_EFLAGS_IF has the right value (and other bits
66 in that byte are 0), but other bits in the return value are
67 undefined. We need to toggle the state of the bit, because
68 Xen and x86 use opposite senses (mask vs enable).
69 */
70ENTRY(xen_save_fl_direct)
71 testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
72 setz %ah
73 addb %ah,%ah
74ENDPATCH(xen_save_fl_direct)
75 ret
76 ENDPROC(xen_save_fl_direct)
77 RELOC(xen_save_fl_direct, 0)
78
79
80/*
81 In principle the caller should be passing us a value return
82 from xen_save_fl_direct, but for robustness sake we test only
83 the X86_EFLAGS_IF flag rather than the whole byte. After
84 setting the interrupt mask state, it checks for unmasked
85 pending events and enters the hypervisor to get them delivered
86 if so.
87 */
88ENTRY(xen_restore_fl_direct)
89 testb $X86_EFLAGS_IF>>8, %ah
90 setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
91 /* Preempt here doesn't matter because that will deal with
92 any pending interrupts. The pending check may end up being
93 run on the wrong CPU, but that doesn't hurt. */
94
95 /* check for unmasked and pending */
96 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
97 jz 1f
982: call check_events
991:
100ENDPATCH(xen_restore_fl_direct)
101 ret
102 ENDPROC(xen_restore_fl_direct)
103 RELOC(xen_restore_fl_direct, 2b+1)
104
105/*
106 This is run where a normal iret would be run, with the same stack setup:
107 8: eflags
108 4: cs
109 esp-> 0: eip
110
111 This attempts to make sure that any pending events are dealt
112 with on return to usermode, but there is a small window in
113 which an event can happen just before entering usermode. If
114 the nested interrupt ends up setting one of the TIF_WORK_MASK
115 pending work flags, they will not be tested again before
116 returning to usermode. This means that a process can end up
117 with pending work, which will be unprocessed until the process
118 enters and leaves the kernel again, which could be an
119 unbounded amount of time. This means that a pending signal or
120 reschedule event could be indefinitely delayed.
121
122 The fix is to notice a nested interrupt in the critical
123 window, and if one occurs, then fold the nested interrupt into
124 the current interrupt stack frame, and re-process it
125 iteratively rather than recursively. This means that it will
126 exit via the normal path, and all pending work will be dealt
127 with appropriately.
128
129 Because the nested interrupt handler needs to deal with the
130 current stack state in whatever form its in, we keep things
131 simple by only using a single register which is pushed/popped
132 on the stack.
133
134 Non-direct iret could be done in the same way, but it would
135 require an annoying amount of code duplication. We'll assume
136 that direct mode will be the common case once the hypervisor
137 support becomes commonplace.
138 */
139ENTRY(xen_iret_direct)
140 /* test eflags for special cases */
141 testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
142 jnz hyper_iret
143
144 push %eax
145 ESP_OFFSET=4 # bytes pushed onto stack
146
147 /* Store vcpu_info pointer for easy access. Do it this
148 way to avoid having to reload %fs */
149#ifdef CONFIG_SMP
150 GET_THREAD_INFO(%eax)
151 movl TI_cpu(%eax),%eax
152 movl __per_cpu_offset(,%eax,4),%eax
153 lea per_cpu__xen_vcpu_info(%eax),%eax
154#else
155 movl $per_cpu__xen_vcpu_info, %eax
156#endif
157
158 /* check IF state we're restoring */
159 testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
160
161 /* Maybe enable events. Once this happens we could get a
162 recursive event, so the critical region starts immediately
163 afterwards. However, if that happens we don't end up
164 resuming the code, so we don't have to be worried about
165 being preempted to another CPU. */
166 setz XEN_vcpu_info_mask(%eax)
167xen_iret_start_crit:
168
169 /* check for unmasked and pending */
170 cmpw $0x0001, XEN_vcpu_info_pending(%eax)
171
172 /* If there's something pending, mask events again so we
173 can jump back into xen_hypervisor_callback */
174 sete XEN_vcpu_info_mask(%eax)
175
176 popl %eax
177
178 /* From this point on the registers are restored and the stack
179 updated, so we don't need to worry about it if we're preempted */
180iret_restore_end:
181
182 /* Jump to hypervisor_callback after fixing up the stack.
183 Events are masked, so jumping out of the critical
184 region is OK. */
185 je xen_hypervisor_callback
186
187 iret
188xen_iret_end_crit:
189
190hyper_iret:
191 /* put this out of line since its very rarely used */
192 jmp hypercall_page + __HYPERVISOR_iret * 32
193
194 .globl xen_iret_start_crit, xen_iret_end_crit
195
196/*
197 This is called by xen_hypervisor_callback in entry.S when it sees
198 that the EIP at the time of interrupt was between xen_iret_start_crit
199 and xen_iret_end_crit. We're passed the EIP in %eax so we can do
200 a more refined determination of what to do.
201
202 The stack format at this point is:
203 ----------------
204 ss : (ss/esp may be present if we came from usermode)
205 esp :
206 eflags } outer exception info
207 cs }
208 eip }
209 ---------------- <- edi (copy dest)
210 eax : outer eax if it hasn't been restored
211 ----------------
212 eflags } nested exception info
213 cs } (no ss/esp because we're nested
214 eip } from the same ring)
215 orig_eax }<- esi (copy src)
216 - - - - - - - -
217 fs }
218 es }
219 ds } SAVE_ALL state
220 eax }
221 : :
222 ebx }
223 ----------------
224 return addr <- esp
225 ----------------
226
227 In order to deliver the nested exception properly, we need to shift
228 everything from the return addr up to the error code so it
229 sits just under the outer exception info. This means that when we
230 handle the exception, we do it in the context of the outer exception
231 rather than starting a new one.
232
233 The only caveat is that if the outer eax hasn't been
234 restored yet (ie, it's still on stack), we need to insert
235 its value into the SAVE_ALL state before going on, since
236 it's usermode state which we eventually need to restore.
237 */
238ENTRY(xen_iret_crit_fixup)
239 /* offsets +4 for return address */
240
241 /*
242 Paranoia: Make sure we're really coming from userspace.
243 One could imagine a case where userspace jumps into the
244 critical range address, but just before the CPU delivers a GP,
245 it decides to deliver an interrupt instead. Unlikely?
246 Definitely. Easy to avoid? Yes. The Intel documents
247 explicitly say that the reported EIP for a bad jump is the
248 jump instruction itself, not the destination, but some virtual
249 environments get this wrong.
250 */
251 movl PT_CS+4(%esp), %ecx
252 andl $SEGMENT_RPL_MASK, %ecx
253 cmpl $USER_RPL, %ecx
254 je 2f
255
256 lea PT_ORIG_EAX+4(%esp), %esi
257 lea PT_EFLAGS+4(%esp), %edi
258
259 /* If eip is before iret_restore_end then stack
260 hasn't been restored yet. */
261 cmp $iret_restore_end, %eax
262 jae 1f
263
264 movl 0+4(%edi),%eax /* copy EAX */
265 movl %eax, PT_EAX+4(%esp)
266
267 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
268
269 /* set up the copy */
2701: std
271 mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */
272 rep movsl
273 cld
274
275 lea 4(%edi),%esp /* point esp to new frame */
2762: ret
277
278
279/*
280 Force an event check by making a hypercall,
281 but preserve regs before making the call.
282 */
283check_events:
284 push %eax
285 push %ecx
286 push %edx
287 call force_evtchn_callback
288 pop %edx
289 pop %ecx
290 pop %eax
291 ret
diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S
new file mode 100644
index 000000000000..2998d55a0017
--- /dev/null
+++ b/arch/i386/xen/xen-head.S
@@ -0,0 +1,36 @@
1/* Xen-specific pieces of head.S, intended to be included in the right
2 place in head.S */
3
4#ifdef CONFIG_XEN
5
6#include <linux/elfnote.h>
7#include <asm/boot.h>
8#include <xen/interface/elfnote.h>
9
10ENTRY(startup_xen)
11 movl %esi,xen_start_info
12 cld
13 movl $(init_thread_union+THREAD_SIZE),%esp
14 jmp xen_start_kernel
15
16.pushsection ".bss.page_aligned"
17 .align PAGE_SIZE_asm
18ENTRY(hypercall_page)
19 .skip 0x1000
20.popsection
21
22 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
23 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
24 ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
25 ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
26 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
27 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
28 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
29#ifdef CONFIG_X86_PAE
30 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
31#else
32 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
33#endif
34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
35
36#endif /*CONFIG_XEN */
diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h
new file mode 100644
index 000000000000..b9aaea45f07f
--- /dev/null
+++ b/arch/i386/xen/xen-ops.h
@@ -0,0 +1,71 @@
1#ifndef XEN_OPS_H
2#define XEN_OPS_H
3
4#include <linux/init.h>
5
6/* These are code, but not functions. Defined in entry.S */
7extern const char xen_hypervisor_callback[];
8extern const char xen_failsafe_callback[];
9
10void xen_copy_trap_info(struct trap_info *traps);
11
12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
13DECLARE_PER_CPU(unsigned long, xen_cr3);
14
15extern struct start_info *xen_start_info;
16extern struct shared_info *HYPERVISOR_shared_info;
17
18char * __init xen_memory_setup(void);
19void __init xen_arch_setup(void);
20void __init xen_init_IRQ(void);
21
22void xen_setup_timer(int cpu);
23void xen_setup_cpu_clockevents(void);
24unsigned long xen_cpu_khz(void);
25void __init xen_time_init(void);
26unsigned long xen_get_wallclock(void);
27int xen_set_wallclock(unsigned long time);
28unsigned long long xen_sched_clock(void);
29
30void xen_mark_init_mm_pinned(void);
31
32DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
33
34static inline unsigned xen_get_lazy_mode(void)
35{
36 return x86_read_percpu(xen_lazy_mode);
37}
38
39void __init xen_fill_possible_map(void);
40
41void __init xen_setup_vcpu_info_placement(void);
42void xen_smp_prepare_boot_cpu(void);
43void xen_smp_prepare_cpus(unsigned int max_cpus);
44int xen_cpu_up(unsigned int cpu);
45void xen_smp_cpus_done(unsigned int max_cpus);
46
47void xen_smp_send_stop(void);
48void xen_smp_send_reschedule(int cpu);
49int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
50 int wait);
51int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
52 int nonatomic, int wait);
53
54int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
55 void *info, int wait);
56
57
58/* Declare an asm function, along with symbols needed to make it
59 inlineable */
60#define DECL_ASM(ret, name, ...) \
61 ret name(__VA_ARGS__); \
62 extern char name##_end[]; \
63 extern char name##_reloc[] \
64
65DECL_ASM(void, xen_irq_enable_direct, void);
66DECL_ASM(void, xen_irq_disable_direct, void);
67DECL_ASM(unsigned long, xen_save_fl_direct, void);
68DECL_ASM(void, xen_restore_fl_direct, unsigned long);
69
70void xen_iret_direct(void);
71#endif /* XEN_OPS_H */