aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig1269
-rw-r--r--arch/i386/Kconfig.debug72
-rw-r--r--arch/i386/Makefile173
-rw-r--r--arch/i386/boot/Makefile104
-rw-r--r--arch/i386/boot/bootsect.S98
-rw-r--r--arch/i386/boot/compressed/Makefile25
-rw-r--r--arch/i386/boot/compressed/head.S128
-rw-r--r--arch/i386/boot/compressed/misc.c382
-rw-r--r--arch/i386/boot/compressed/vmlinux.scr9
-rw-r--r--arch/i386/boot/edd.S176
-rw-r--r--arch/i386/boot/install.sh40
-rw-r--r--arch/i386/boot/mtools.conf.in17
-rw-r--r--arch/i386/boot/setup.S1028
-rw-r--r--arch/i386/boot/tools/build.c184
-rw-r--r--arch/i386/boot/video.S2007
-rw-r--r--arch/i386/crypto/Makefile9
-rw-r--r--arch/i386/crypto/aes-i586-asm.S376
-rw-r--r--arch/i386/crypto/aes.c520
-rw-r--r--arch/i386/defconfig1247
-rw-r--r--arch/i386/kernel/Makefile71
-rw-r--r--arch/i386/kernel/acpi/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c908
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c51
-rw-r--r--arch/i386/kernel/acpi/sleep.c93
-rw-r--r--arch/i386/kernel/acpi/wakeup.S318
-rw-r--r--arch/i386/kernel/apic.c1278
-rw-r--r--arch/i386/kernel/apm.c2428
-rw-r--r--arch/i386/kernel/asm-offsets.c72
-rw-r--r--arch/i386/kernel/bootflag.c99
-rw-r--r--arch/i386/kernel/cpu/Makefile19
-rw-r--r--arch/i386/kernel/cpu/amd.c249
-rw-r--r--arch/i386/kernel/cpu/centaur.c476
-rw-r--r--arch/i386/kernel/cpu/changelog63
-rw-r--r--arch/i386/kernel/cpu/common.c634
-rw-r--r--arch/i386/kernel/cpu/cpu.h30
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig231
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Makefile14
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c537
-rw-r--r--arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c457
-rw-r--r--arch/i386/kernel/cpu/cpufreq/elanfreq.c312
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c502
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c658
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h466
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longrun.c326
-rw-r--r--arch/i386/kernel/cpu/cpufreq/p4-clockmod.c337
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k6.c256
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c690
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.h44
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c1135
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h176
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c715
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h25
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-ich.c424
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-lib.c385
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-lib.h47
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-smi.c424
-rw-r--r--arch/i386/kernel/cpu/cyrix.c439
-rw-r--r--arch/i386/kernel/cpu/intel.c248
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c598
-rw-r--r--arch/i386/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c97
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c77
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.h14
-rw-r--r--arch/i386/kernel/cpu/mcheck/non-fatal.c93
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c271
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c54
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c115
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c37
-rw-r--r--arch/i386/kernel/cpu/mtrr/Makefile5
-rw-r--r--arch/i386/kernel/cpu/mtrr/amd.c121
-rw-r--r--arch/i386/kernel/cpu/mtrr/centaur.c223
-rw-r--r--arch/i386/kernel/cpu/mtrr/changelog229
-rw-r--r--arch/i386/kernel/cpu/mtrr/cyrix.c364
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c417
-rw-r--r--arch/i386/kernel/cpu/mtrr/if.c374
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c693
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h98
-rw-r--r--arch/i386/kernel/cpu/mtrr/state.c78
-rw-r--r--arch/i386/kernel/cpu/nexgen.c63
-rw-r--r--arch/i386/kernel/cpu/proc.c149
-rw-r--r--arch/i386/kernel/cpu/rise.c53
-rw-r--r--arch/i386/kernel/cpu/transmeta.c107
-rw-r--r--arch/i386/kernel/cpu/umc.c33
-rw-r--r--arch/i386/kernel/cpuid.c246
-rw-r--r--arch/i386/kernel/dmi_scan.c487
-rw-r--r--arch/i386/kernel/doublefault.c65
-rw-r--r--arch/i386/kernel/early_printk.c2
-rw-r--r--arch/i386/kernel/efi.c635
-rw-r--r--arch/i386/kernel/efi_stub.S124
-rw-r--r--arch/i386/kernel/entry.S950
-rw-r--r--arch/i386/kernel/head.S521
-rw-r--r--arch/i386/kernel/i386_ksyms.c195
-rw-r--r--arch/i386/kernel/i387.c555
-rw-r--r--arch/i386/kernel/i8259.c429
-rw-r--r--arch/i386/kernel/init_task.c46
-rw-r--r--arch/i386/kernel/io_apic.c2545
-rw-r--r--arch/i386/kernel/ioport.c147
-rw-r--r--arch/i386/kernel/irq.c261
-rw-r--r--arch/i386/kernel/kprobes.c385
-rw-r--r--arch/i386/kernel/ldt.c255
-rw-r--r--arch/i386/kernel/mca.c474
-rw-r--r--arch/i386/kernel/microcode.c512
-rw-r--r--arch/i386/kernel/module.c129
-rw-r--r--arch/i386/kernel/mpparse.c1109
-rw-r--r--arch/i386/kernel/msr.c346
-rw-r--r--arch/i386/kernel/nmi.c570
-rw-r--r--arch/i386/kernel/numaq.c79
-rw-r--r--arch/i386/kernel/pci-dma.c147
-rw-r--r--arch/i386/kernel/process.c848
-rw-r--r--arch/i386/kernel/ptrace.c717
-rw-r--r--arch/i386/kernel/quirks.c52
-rw-r--r--arch/i386/kernel/reboot.c382
-rw-r--r--arch/i386/kernel/scx200.c167
-rw-r--r--arch/i386/kernel/semaphore.c297
-rw-r--r--arch/i386/kernel/setup.c1535
-rw-r--r--arch/i386/kernel/sigframe.h21
-rw-r--r--arch/i386/kernel/signal.c665
-rw-r--r--arch/i386/kernel/smp.c612
-rw-r--r--arch/i386/kernel/smpboot.c1145
-rw-r--r--arch/i386/kernel/srat.c456
-rw-r--r--arch/i386/kernel/summit.c180
-rw-r--r--arch/i386/kernel/sys_i386.c252
-rw-r--r--arch/i386/kernel/sysenter.c65
-rw-r--r--arch/i386/kernel/time.c476
-rw-r--r--arch/i386/kernel/time_hpet.c458
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c160
-rw-r--r--arch/i386/kernel/timers/timer.c66
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c191
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c206
-rw-r--r--arch/i386/kernel/timers/timer_pm.c258
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c560
-rw-r--r--arch/i386/kernel/trampoline.S80
-rw-r--r--arch/i386/kernel/traps.c1084
-rw-r--r--arch/i386/kernel/vm86.c804
-rw-r--r--arch/i386/kernel/vmlinux.lds.S134
-rw-r--r--arch/i386/kernel/vsyscall-int80.S53
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S142
-rw-r--r--arch/i386/kernel/vsyscall-sysenter.S104
-rw-r--r--arch/i386/kernel/vsyscall.S15
-rw-r--r--arch/i386/kernel/vsyscall.lds.S65
-rw-r--r--arch/i386/lib/Makefile10
-rw-r--r--arch/i386/lib/bitops.c70
-rw-r--r--arch/i386/lib/checksum.S496
-rw-r--r--arch/i386/lib/dec_and_lock.c40
-rw-r--r--arch/i386/lib/delay.c49
-rw-r--r--arch/i386/lib/getuser.S70
-rw-r--r--arch/i386/lib/memcpy.c44
-rw-r--r--arch/i386/lib/mmx.c399
-rw-r--r--arch/i386/lib/putuser.S87
-rw-r--r--arch/i386/lib/strstr.c31
-rw-r--r--arch/i386/lib/usercopy.c636
-rw-r--r--arch/i386/mach-default/Makefile5
-rw-r--r--arch/i386/mach-default/setup.c106
-rw-r--r--arch/i386/mach-default/topology.c98
-rw-r--r--arch/i386/mach-es7000/Makefile6
-rw-r--r--arch/i386/mach-es7000/es7000.h110
-rw-r--r--arch/i386/mach-es7000/es7000plat.c316
-rw-r--r--arch/i386/mach-generic/Makefile7
-rw-r--r--arch/i386/mach-generic/bigsmp.c54
-rw-r--r--arch/i386/mach-generic/default.c27
-rw-r--r--arch/i386/mach-generic/es7000.c28
-rw-r--r--arch/i386/mach-generic/probe.c102
-rw-r--r--arch/i386/mach-generic/summit.c27
-rw-r--r--arch/i386/mach-visws/Makefile8
-rw-r--r--arch/i386/mach-visws/mpparse.c105
-rw-r--r--arch/i386/mach-visws/reboot.c51
-rw-r--r--arch/i386/mach-visws/setup.c134
-rw-r--r--arch/i386/mach-visws/traps.c69
-rw-r--r--arch/i386/mach-visws/visws_apic.c303
-rw-r--r--arch/i386/mach-voyager/Makefile8
-rw-r--r--arch/i386/mach-voyager/setup.c48
-rw-r--r--arch/i386/mach-voyager/voyager_basic.c325
-rw-r--r--arch/i386/mach-voyager/voyager_cat.c1178
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c1931
-rw-r--r--arch/i386/mach-voyager/voyager_thread.c167
-rw-r--r--arch/i386/math-emu/Makefile30
-rw-r--r--arch/i386/math-emu/README427
-rw-r--r--arch/i386/math-emu/control_w.h45
-rw-r--r--arch/i386/math-emu/div_Xsig.S365
-rw-r--r--arch/i386/math-emu/div_small.S47
-rw-r--r--arch/i386/math-emu/errors.c739
-rw-r--r--arch/i386/math-emu/exception.h53
-rw-r--r--arch/i386/math-emu/fpu_arith.c174
-rw-r--r--arch/i386/math-emu/fpu_asm.h32
-rw-r--r--arch/i386/math-emu/fpu_aux.c204
-rw-r--r--arch/i386/math-emu/fpu_emu.h217
-rw-r--r--arch/i386/math-emu/fpu_entry.c760
-rw-r--r--arch/i386/math-emu/fpu_etc.c143
-rw-r--r--arch/i386/math-emu/fpu_proto.h140
-rw-r--r--arch/i386/math-emu/fpu_system.h89
-rw-r--r--arch/i386/math-emu/fpu_tags.c127
-rw-r--r--arch/i386/math-emu/fpu_trig.c1845
-rw-r--r--arch/i386/math-emu/get_address.c449
-rw-r--r--arch/i386/math-emu/load_store.c270
-rw-r--r--arch/i386/math-emu/mul_Xsig.S176
-rw-r--r--arch/i386/math-emu/poly.h121
-rw-r--r--arch/i386/math-emu/poly_2xm1.c156
-rw-r--r--arch/i386/math-emu/poly_atan.c229
-rw-r--r--arch/i386/math-emu/poly_l2.c272
-rw-r--r--arch/i386/math-emu/poly_sin.c397
-rw-r--r--arch/i386/math-emu/poly_tan.c222
-rw-r--r--arch/i386/math-emu/polynom_Xsig.S135
-rw-r--r--arch/i386/math-emu/reg_add_sub.c374
-rw-r--r--arch/i386/math-emu/reg_compare.c381
-rw-r--r--arch/i386/math-emu/reg_constant.c120
-rw-r--r--arch/i386/math-emu/reg_constant.h25
-rw-r--r--arch/i386/math-emu/reg_convert.c53
-rw-r--r--arch/i386/math-emu/reg_divide.c207
-rw-r--r--arch/i386/math-emu/reg_ld_str.c1370
-rw-r--r--arch/i386/math-emu/reg_mul.c132
-rw-r--r--arch/i386/math-emu/reg_norm.S147
-rw-r--r--arch/i386/math-emu/reg_round.S708
-rw-r--r--arch/i386/math-emu/reg_u_add.S167
-rw-r--r--arch/i386/math-emu/reg_u_div.S471
-rw-r--r--arch/i386/math-emu/reg_u_mul.S148
-rw-r--r--arch/i386/math-emu/reg_u_sub.S272
-rw-r--r--arch/i386/math-emu/round_Xsig.S141
-rw-r--r--arch/i386/math-emu/shr_Xsig.S87
-rw-r--r--arch/i386/math-emu/status_w.h65
-rw-r--r--arch/i386/math-emu/version.h12
-rw-r--r--arch/i386/math-emu/wm_shrx.S204
-rw-r--r--arch/i386/math-emu/wm_sqrt.S470
-rw-r--r--arch/i386/mm/Makefile10
-rw-r--r--arch/i386/mm/boot_ioremap.c97
-rw-r--r--arch/i386/mm/discontig.c383
-rw-r--r--arch/i386/mm/extable.c36
-rw-r--r--arch/i386/mm/fault.c552
-rw-r--r--arch/i386/mm/highmem.c89
-rw-r--r--arch/i386/mm/hugetlbpage.c431
-rw-r--r--arch/i386/mm/init.c696
-rw-r--r--arch/i386/mm/ioremap.c320
-rw-r--r--arch/i386/mm/mmap.c76
-rw-r--r--arch/i386/mm/pageattr.c221
-rw-r--r--arch/i386/mm/pgtable.c260
-rw-r--r--arch/i386/oprofile/Kconfig23
-rw-r--r--arch/i386/oprofile/Makefile12
-rw-r--r--arch/i386/oprofile/backtrace.c111
-rw-r--r--arch/i386/oprofile/init.c48
-rw-r--r--arch/i386/oprofile/nmi_int.c427
-rw-r--r--arch/i386/oprofile/nmi_timer_int.c55
-rw-r--r--arch/i386/oprofile/op_counter.h29
-rw-r--r--arch/i386/oprofile/op_model_athlon.c149
-rw-r--r--arch/i386/oprofile/op_model_p4.c725
-rw-r--r--arch/i386/oprofile/op_model_ppro.c143
-rw-r--r--arch/i386/oprofile/op_x86_model.h50
-rw-r--r--arch/i386/pci/Makefile14
-rw-r--r--arch/i386/pci/acpi.c53
-rw-r--r--arch/i386/pci/common.c251
-rw-r--r--arch/i386/pci/direct.c289
-rw-r--r--arch/i386/pci/fixup.c386
-rw-r--r--arch/i386/pci/i386.c304
-rw-r--r--arch/i386/pci/irq.c1119
-rw-r--r--arch/i386/pci/legacy.c54
-rw-r--r--arch/i386/pci/mmconfig.c122
-rw-r--r--arch/i386/pci/numa.c130
-rw-r--r--arch/i386/pci/pcbios.c487
-rw-r--r--arch/i386/pci/pci.h74
-rw-r--r--arch/i386/pci/visws.c110
-rw-r--r--arch/i386/power/Makefile2
-rw-r--r--arch/i386/power/cpu.c152
-rw-r--r--arch/i386/power/swsusp.S73
264 files changed, 80767 insertions, 0 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
new file mode 100644
index 000000000000..17a0cbce6f30
--- /dev/null
+++ b/arch/i386/Kconfig
@@ -0,0 +1,1269 @@
1#
2# For a description of the syntax of this configuration file,
3# see Documentation/kbuild/kconfig-language.txt.
4#
5
6mainmenu "Linux Kernel Configuration"
7
8config X86
9 bool
10 default y
11 help
12 This is Linux's home port. Linux was originally native to the Intel
13 386, and runs on all the later x86 processors including the Intel
14 486, 586, Pentiums, and various instruction-set-compatible chips by
15 AMD, Cyrix, and others.
16
17config MMU
18 bool
19 default y
20
21config SBUS
22 bool
23
24config UID16
25 bool
26 default y
27
28config GENERIC_ISA_DMA
29 bool
30 default y
31
32config GENERIC_IOMAP
33 bool
34 default y
35
36source "init/Kconfig"
37
38menu "Processor type and features"
39
40choice
41 prompt "Subarchitecture Type"
42 default X86_PC
43
44config X86_PC
45 bool "PC-compatible"
46 help
47 Choose this option if your computer is a standard PC or compatible.
48
49config X86_ELAN
50 bool "AMD Elan"
51 help
52 Select this for an AMD Elan processor.
53
54 Do not use this option for K6/Athlon/Opteron processors!
55
56 If unsure, choose "PC-compatible" instead.
57
58config X86_VOYAGER
59 bool "Voyager (NCR)"
60 help
61 Voyager is an MCA-based 32-way capable SMP architecture proprietary
62 to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based.
63
64 *** WARNING ***
65
66 If you do not specifically know you have a Voyager based machine,
67 say N here, otherwise the kernel you build will not be bootable.
68
69config X86_NUMAQ
70 bool "NUMAQ (IBM/Sequent)"
71 select DISCONTIGMEM
72 select NUMA
73 help
74 This option is used for getting Linux to run on a (IBM/Sequent) NUMA
75 multiquad box. This changes the way that processors are bootstrapped,
76 and uses Clustered Logical APIC addressing mode instead of Flat Logical.
77 You will need a new lynxer.elf file to flash your firmware with - send
78 email to <Martin.Bligh@us.ibm.com>.
79
80config X86_SUMMIT
81 bool "Summit/EXA (IBM x440)"
82 depends on SMP
83 help
84 This option is needed for IBM systems that use the Summit/EXA chipset.
85 In particular, it is needed for the x440.
86
87 If you don't have one of these computers, you should say N here.
88
89config X86_BIGSMP
90 bool "Support for other sub-arch SMP systems with more than 8 CPUs"
91 depends on SMP
92 help
93 This option is needed for the systems that have more than 8 CPUs
94 and if the system is not of any sub-arch type above.
95
96 If you don't have such a system, you should say N here.
97
98config X86_VISWS
99 bool "SGI 320/540 (Visual Workstation)"
100 help
101 The SGI Visual Workstation series is an IA32-based workstation
102 based on SGI systems chips with some legacy PC hardware attached.
103
104 Say Y here to create a kernel to run on the SGI 320 or 540.
105
106 A kernel compiled for the Visual Workstation will not run on PCs
107 and vice versa. See <file:Documentation/sgi-visws.txt> for details.
108
109config X86_GENERICARCH
110 bool "Generic architecture (Summit, bigsmp, ES7000, default)"
111 depends on SMP
112 help
113 This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
114 It is intended for a generic binary kernel.
115
116config X86_ES7000
117 bool "Support for Unisys ES7000 IA32 series"
118 depends on SMP
119 help
120 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
121 supposed to run on an IA32-based Unisys ES7000 system.
122 Only choose this option if you have such a system, otherwise you
123 should say N here.
124
125endchoice
126
127config ACPI_SRAT
128 bool
129 default y
130 depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
131
132config X86_SUMMIT_NUMA
133 bool
134 default y
135 depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
136
137config X86_CYCLONE_TIMER
138 bool
139 default y
140 depends on X86_SUMMIT || X86_GENERICARCH
141
142config ES7000_CLUSTERED_APIC
143 bool
144 default y
145 depends on SMP && X86_ES7000 && MPENTIUMIII
146
147if !X86_ELAN
148
149choice
150 prompt "Processor family"
151 default M686
152
153config M386
154 bool "386"
155 ---help---
156 This is the processor type of your CPU. This information is used for
157 optimizing purposes. In order to compile a kernel that can run on
158 all x86 CPU types (albeit not optimally fast), you can specify
159 "386" here.
160
161 The kernel will not necessarily run on earlier architectures than
162 the one you have chosen, e.g. a Pentium optimized kernel will run on
163 a PPro, but not necessarily on a i486.
164
165 Here are the settings recommended for greatest speed:
166 - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
167 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels
168 will run on a 386 class machine.
169 - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
170 SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
171 - "586" for generic Pentium CPUs lacking the TSC
172 (time stamp counter) register.
173 - "Pentium-Classic" for the Intel Pentium.
174 - "Pentium-MMX" for the Intel Pentium MMX.
175 - "Pentium-Pro" for the Intel Pentium Pro.
176 - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
177 - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
178 - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
179 - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
180 - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
181 - "Crusoe" for the Transmeta Crusoe series.
182 - "Efficeon" for the Transmeta Efficeon series.
183 - "Winchip-C6" for original IDT Winchip.
184 - "Winchip-2" for IDT Winchip 2.
185 - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
186 - "MediaGX/Geode" for Cyrix MediaGX aka Geode.
187 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
188 - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
189
190 If you don't know what to do, choose "386".
191
192config M486
193 bool "486"
194 help
195 Select this for a 486 series processor, either Intel or one of the
196 compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
197 DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
198 U5S.
199
200config M586
201 bool "586/K5/5x86/6x86/6x86MX"
202 help
203 Select this for an 586 or 686 series processor such as the AMD K5,
204 the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
205 assume the RDTSC (Read Time Stamp Counter) instruction.
206
207config M586TSC
208 bool "Pentium-Classic"
209 help
210 Select this for a Pentium Classic processor with the RDTSC (Read
211 Time Stamp Counter) instruction for benchmarking.
212
213config M586MMX
214 bool "Pentium-MMX"
215 help
216 Select this for a Pentium with the MMX graphics/multimedia
217 extended instructions.
218
219config M686
220 bool "Pentium-Pro"
221 help
222 Select this for Intel Pentium Pro chips. This enables the use of
223 Pentium Pro extended instructions, and disables the init-time guard
224 against the f00f bug found in earlier Pentiums.
225
226config MPENTIUMII
227 bool "Pentium-II/Celeron(pre-Coppermine)"
228 help
229 Select this for Intel chips based on the Pentium-II and
230 pre-Coppermine Celeron core. This option enables an unaligned
231 copy optimization, compiles the kernel with optimization flags
232 tailored for the chip, and applies any applicable Pentium Pro
233 optimizations.
234
235config MPENTIUMIII
236 bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
237 help
238 Select this for Intel chips based on the Pentium-III and
239 Celeron-Coppermine core. This option enables use of some
240 extended prefetch instructions in addition to the Pentium II
241 extensions.
242
243config MPENTIUMM
244 bool "Pentium M"
245 help
246 Select this for Intel Pentium M (not Pentium-4 M)
247 notebook chips.
248
249config MPENTIUM4
250 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
251 help
252 Select this for Intel Pentium 4 chips. This includes the
253 Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
254 (not Pentium M) chips. This option enables compile flags
255 optimized for the chip, uses the correct cache shift, and
256 applies any applicable Pentium III optimizations.
257
258config MK6
259 bool "K6/K6-II/K6-III"
260 help
261 Select this for an AMD K6-family processor. Enables use of
262 some extended instructions, and passes appropriate optimization
263 flags to GCC.
264
265config MK7
266 bool "Athlon/Duron/K7"
267 help
268 Select this for an AMD Athlon K7-family processor. Enables use of
269 some extended instructions, and passes appropriate optimization
270 flags to GCC.
271
272config MK8
273 bool "Opteron/Athlon64/Hammer/K8"
274 help
275 Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables
276 use of some extended instructions, and passes appropriate optimization
277 flags to GCC.
278
279config MCRUSOE
280 bool "Crusoe"
281 help
282 Select this for a Transmeta Crusoe processor. Treats the processor
283 like a 586 with TSC, and sets some GCC optimization flags (like a
284 Pentium Pro with no alignment requirements).
285
286config MEFFICEON
287 bool "Efficeon"
288 help
289 Select this for a Transmeta Efficeon processor.
290
291config MWINCHIPC6
292 bool "Winchip-C6"
293 help
294 Select this for an IDT Winchip C6 chip. Linux and GCC
295 treat this chip as a 586TSC with some extended instructions
296 and alignment requirements.
297
298config MWINCHIP2
299 bool "Winchip-2"
300 help
301 Select this for an IDT Winchip-2. Linux and GCC
302 treat this chip as a 586TSC with some extended instructions
303 and alignment requirements.
304
305config MWINCHIP3D
306 bool "Winchip-2A/Winchip-3"
307 help
308 Select this for an IDT Winchip-2A or 3. Linux and GCC
309 treat this chip as a 586TSC with some extended instructions
310 and alignment reqirements. Also enable out of order memory
311 stores for this CPU, which can increase performance of some
312 operations.
313
314config MGEODE
315 bool "MediaGX/Geode"
316 help
317 Select this for a Cyrix MediaGX aka Geode chip. Linux and GCC
318 treat this chip as a 586TSC with some extended instructions
319 and alignment reqirements.
320
321config MCYRIXIII
322 bool "CyrixIII/VIA-C3"
323 help
324 Select this for a Cyrix III or C3 chip. Presently Linux and GCC
325 treat this chip as a generic 586. Whilst the CPU is 686 class,
326 it lacks the cmov extension which gcc assumes is present when
327 generating 686 code.
328 Note that Nehemiah (Model 9) and above will not boot with this
329 kernel due to them lacking the 3DNow! instructions used in earlier
330 incarnations of the CPU.
331
332config MVIAC3_2
333 bool "VIA C3-2 (Nehemiah)"
334 help
335 Select this for a VIA C3 "Nehemiah". Selecting this enables usage
336 of SSE and tells gcc to treat the CPU as a 686.
337 Note, this kernel will not boot on older (pre model 9) C3s.
338
339endchoice
340
341config X86_GENERIC
342 bool "Generic x86 support"
343 help
344 Instead of just including optimizations for the selected
345 x86 variant (e.g. PII, Crusoe or Athlon), include some more
346 generic optimizations as well. This will make the kernel
347 perform better on x86 CPUs other than that selected.
348
349 This is really intended for distributors who need more
350 generic optimizations.
351
352endif
353
354#
355# Define implied options from the CPU selection here
356#
357config X86_CMPXCHG
358 bool
359 depends on !M386
360 default y
361
362config X86_XADD
363 bool
364 depends on !M386
365 default y
366
367config X86_L1_CACHE_SHIFT
368 int
369 default "7" if MPENTIUM4 || X86_GENERIC
370 default "4" if X86_ELAN || M486 || M386
371 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE
372 default "6" if MK7 || MK8 || MPENTIUMM
373
374config RWSEM_GENERIC_SPINLOCK
375 bool
376 depends on M386
377 default y
378
379config RWSEM_XCHGADD_ALGORITHM
380 bool
381 depends on !M386
382 default y
383
384config GENERIC_CALIBRATE_DELAY
385 bool
386 default y
387
388config X86_PPRO_FENCE
389 bool
390 depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODE
391 default y
392
393config X86_F00F_BUG
394 bool
395 depends on M586MMX || M586TSC || M586 || M486 || M386
396 default y
397
398config X86_WP_WORKS_OK
399 bool
400 depends on !M386
401 default y
402
403config X86_INVLPG
404 bool
405 depends on !M386
406 default y
407
408config X86_BSWAP
409 bool
410 depends on !M386
411 default y
412
413config X86_POPAD_OK
414 bool
415 depends on !M386
416 default y
417
418config X86_ALIGNMENT_16
419 bool
420 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODE
421 default y
422
423config X86_GOOD_APIC
424 bool
425 depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON
426 default y
427
428config X86_INTEL_USERCOPY
429 bool
430 depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
431 default y
432
433config X86_USE_PPRO_CHECKSUM
434 bool
435 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON
436 default y
437
438config X86_USE_3DNOW
439 bool
440 depends on MCYRIXIII || MK7
441 default y
442
443config X86_OOSTORE
444 bool
445 depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MGEODE) && MTRR
446 default y
447
448config HPET_TIMER
449 bool "HPET Timer Support"
450 help
451 This enables the use of the HPET for the kernel's internal timer.
452 HPET is the next generation timer replacing legacy 8254s.
453 You can safely choose Y here. However, HPET will only be
454 activated if the platform and the BIOS support this feature.
455 Otherwise the 8254 will be used for timing services.
456
457 Choose N to continue using the legacy 8254 timer.
458
459config HPET_EMULATE_RTC
460 bool "Provide RTC interrupt"
461 depends on HPET_TIMER && RTC=y
462
463config SMP
464 bool "Symmetric multi-processing support"
465 ---help---
466 This enables support for systems with more than one CPU. If you have
467 a system with only one CPU, like most personal computers, say N. If
468 you have a system with more than one CPU, say Y.
469
470 If you say N here, the kernel will run on single and multiprocessor
471 machines, but will use only one CPU of a multiprocessor machine. If
472 you say Y here, the kernel will run on many, but not all,
473 singleprocessor machines. On a singleprocessor machine, the kernel
474 will run faster if you say N here.
475
476 Note that if you say Y here and choose architecture "586" or
477 "Pentium" under "Processor family", the kernel will not work on 486
478 architectures. Similarly, multiprocessor kernels for the "PPro"
479 architecture may not work on all Pentium based boards.
480
481 People using multiprocessor machines who say Y here should also say
482 Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
483 Management" code will be disabled if you say Y here.
484
485 See also the <file:Documentation/smp.txt>,
486 <file:Documentation/i386/IO-APIC.txt>,
487 <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
488 <http://www.tldp.org/docs.html#howto>.
489
490 If you don't know what to do here, say N.
491
492config NR_CPUS
493 int "Maximum number of CPUs (2-255)"
494 range 2 255
495 depends on SMP
496 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
497 default "8"
498 help
499 This allows you to specify the maximum number of CPUs which this
500 kernel will support. The maximum supported value is 255 and the
501 minimum value which makes sense is 2.
502
503 This is purely to save memory - each supported CPU adds
504 approximately eight kilobytes to the kernel image.
505
506config SCHED_SMT
507 bool "SMT (Hyperthreading) scheduler support"
508 depends on SMP
509 default off
510 help
511 SMT scheduler support improves the CPU scheduler's decision making
512 when dealing with Intel Pentium 4 chips with HyperThreading at a
513 cost of slightly increased overhead in some places. If unsure say
514 N here.
515
516config PREEMPT
517 bool "Preemptible Kernel"
518 help
519 This option reduces the latency of the kernel when reacting to
520 real-time or interactive events by allowing a low priority process to
521 be preempted even if it is in kernel mode executing a system call.
522 This allows applications to run more reliably even when the system is
523 under load.
524
525 Say Y here if you are building a kernel for a desktop, embedded
526 or real-time system. Say N if you are unsure.
527
528config PREEMPT_BKL
529 bool "Preempt The Big Kernel Lock"
530 depends on PREEMPT
531 default y
532 help
533 This option reduces the latency of the kernel by making the
534 big kernel lock preemptible.
535
536 Say Y here if you are building a kernel for a desktop system.
537 Say N if you are unsure.
538
539config X86_UP_APIC
540 bool "Local APIC support on uniprocessors"
541 depends on !SMP && !(X86_VISWS || X86_VOYAGER)
542 help
543 A local APIC (Advanced Programmable Interrupt Controller) is an
544 integrated interrupt controller in the CPU. If you have a single-CPU
545 system which has a processor with a local APIC, you can say Y here to
546 enable and use it. If you say Y here even though your machine doesn't
547 have a local APIC, then the kernel will still run with no slowdown at
548 all. The local APIC supports CPU-generated self-interrupts (timer,
549 performance counters), and the NMI watchdog which detects hard
550 lockups.
551
552config X86_UP_IOAPIC
553 bool "IO-APIC support on uniprocessors"
554 depends on X86_UP_APIC
555 help
556 An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
557 SMP-capable replacement for PC-style interrupt controllers. Most
558 SMP systems and many recent uniprocessor systems have one.
559
560 If you have a single-CPU system with an IO-APIC, you can say Y here
561 to use it. If you say Y here even though your machine doesn't have
562 an IO-APIC, then the kernel will still run with no slowdown at all.
563
564config X86_LOCAL_APIC
565 bool
566 depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
567 default y
568
569config X86_IO_APIC
570 bool
571 depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
572 default y
573
574config X86_VISWS_APIC
575 bool
576 depends on X86_VISWS
577 default y
578
579config X86_TSC
580 bool
581 depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODE) && !X86_NUMAQ
582 default y
583
584config X86_MCE
585 bool "Machine Check Exception"
586 depends on !X86_VOYAGER
587 ---help---
588 Machine Check Exception support allows the processor to notify the
589 kernel if it detects a problem (e.g. overheating, component failure).
590 The action the kernel takes depends on the severity of the problem,
591 ranging from a warning message on the console, to halting the machine.
592 Your processor must be a Pentium or newer to support this - check the
593 flags in /proc/cpuinfo for mce. Note that some older Pentium systems
594 have a design flaw which leads to false MCE events - hence MCE is
595 disabled on all P5 processors, unless explicitly enabled with "mce"
596 as a boot argument. Similarly, if MCE is built in and creates a
597 problem on some new non-standard machine, you can boot with "nomce"
598 to disable it. MCE support simply ignores non-MCE processors like
599 the 386 and 486, so nearly everyone can say Y here.
600
601config X86_MCE_NONFATAL
602 tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
603 depends on X86_MCE
604 help
605 Enabling this feature starts a timer that triggers every 5 seconds which
606 will look at the machine check registers to see if anything happened.
607 Non-fatal problems automatically get corrected (but still logged).
608 Disable this if you don't want to see these messages.
609 Seeing the messages this option prints out may be indicative of dying hardware,
610 or out-of-spec (ie, overclocked) hardware.
611 This option only does something on certain CPUs.
612 (AMD Athlon/Duron and Intel Pentium 4)
613
614config X86_MCE_P4THERMAL
615 bool "check for P4 thermal throttling interrupt."
616 depends on X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS
617 help
618 Enabling this feature will cause a message to be printed when the P4
619 enters thermal throttling.
620
621config TOSHIBA
622 tristate "Toshiba Laptop support"
623 ---help---
624 This adds a driver to safely access the System Management Mode of
625 the CPU on Toshiba portables with a genuine Toshiba BIOS. It does
626 not work on models with a Phoenix BIOS. The System Management Mode
627 is used to set the BIOS and power saving options on Toshiba portables.
628
629 For information on utilities to make use of this driver see the
630 Toshiba Linux utilities web site at:
631 <http://www.buzzard.org.uk/toshiba/>.
632
633 Say Y if you intend to run this kernel on a Toshiba portable.
634 Say N otherwise.
635
636config I8K
637 tristate "Dell laptop support"
638 ---help---
639 This adds a driver to safely access the System Management Mode
640 of the CPU on the Dell Inspiron 8000. The System Management Mode
641 is used to read cpu temperature and cooling fan status and to
642 control the fans on the I8K portables.
643
644 This driver has been tested only on the Inspiron 8000 but it may
645 also work with other Dell laptops. You can force loading on other
646 models by passing the parameter `force=1' to the module. Use at
647 your own risk.
648
649 For information on utilities to make use of this driver see the
650 I8K Linux utilities web site at:
651 <http://people.debian.org/~dz/i8k/>
652
653 Say Y if you intend to run this kernel on a Dell Inspiron 8000.
654 Say N otherwise.
655
656config MICROCODE
657 tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
658 ---help---
659 If you say Y here and also to "/dev file system support" in the
660 'File systems' section, you will be able to update the microcode on
661 Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II,
662 Pentium III, Pentium 4, Xeon etc. You will obviously need the
663 actual microcode binary data itself which is not shipped with the
664 Linux kernel.
665
666 For latest news and information on obtaining all the required
667 ingredients for this driver, check:
668 <http://www.urbanmyth.org/microcode/>.
669
670 To compile this driver as a module, choose M here: the
671 module will be called microcode.
672
673config X86_MSR
674 tristate "/dev/cpu/*/msr - Model-specific register support"
675 help
676 This device gives privileged processes access to the x86
677 Model-Specific Registers (MSRs). It is a character device with
678 major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
679 MSR accesses are directed to a specific CPU on multi-processor
680 systems.
681
682config X86_CPUID
683 tristate "/dev/cpu/*/cpuid - CPU information support"
684 help
685 This device gives processes access to the x86 CPUID instruction to
686 be executed on a specific processor. It is a character device
687 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
688 /dev/cpu/31/cpuid.
689
690source "drivers/firmware/Kconfig"
691
692choice
693 prompt "High Memory Support"
694 default NOHIGHMEM
695
696config NOHIGHMEM
697 bool "off"
698 ---help---
699 Linux can use up to 64 Gigabytes of physical memory on x86 systems.
700 However, the address space of 32-bit x86 processors is only 4
701 Gigabytes large. That means that, if you have a large amount of
702 physical memory, not all of it can be "permanently mapped" by the
703 kernel. The physical memory that's not permanently mapped is called
704 "high memory".
705
706 If you are compiling a kernel which will never run on a machine with
707 more than 1 Gigabyte total physical RAM, answer "off" here (default
708 choice and suitable for most users). This will result in a "3GB/1GB"
709 split: 3GB are mapped so that each process sees a 3GB virtual memory
710 space and the remaining part of the 4GB virtual memory space is used
711 by the kernel to permanently map as much physical memory as
712 possible.
713
714 If the machine has between 1 and 4 Gigabytes physical RAM, then
715 answer "4GB" here.
716
717 If more than 4 Gigabytes is used then answer "64GB" here. This
718 selection turns Intel PAE (Physical Address Extension) mode on.
719 PAE implements 3-level paging on IA32 processors. PAE is fully
720 supported by Linux, PAE mode is implemented on all recent Intel
721 processors (Pentium Pro and better). NOTE: If you say "64GB" here,
722 then the kernel will not boot on CPUs that don't support PAE!
723
724 The actual amount of total physical memory will either be
725 auto detected or can be forced by using a kernel command line option
726 such as "mem=256M". (Try "man bootparam" or see the documentation of
727 your boot loader (lilo or loadlin) about how to pass options to the
728 kernel at boot time.)
729
730 If unsure, say "off".
731
732config HIGHMEM4G
733 bool "4GB"
734 help
735 Select this if you have a 32-bit processor and between 1 and 4
736 gigabytes of physical RAM.
737
738config HIGHMEM64G
739 bool "64GB"
740 help
741 Select this if you have a 32-bit processor and more than 4
742 gigabytes of physical RAM.
743
744endchoice
745
746config HIGHMEM
747 bool
748 depends on HIGHMEM64G || HIGHMEM4G
749 default y
750
751config X86_PAE
752 bool
753 depends on HIGHMEM64G
754 default y
755
756# Common NUMA Features
757config NUMA
758 bool "Numa Memory Allocation and Scheduler Support"
759 depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
760 default n if X86_PC
761 default y if (X86_NUMAQ || X86_SUMMIT)
762
763# Need comments to help the hapless user trying to turn on NUMA support
764comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
765 depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
766
767comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
768 depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
769
770config DISCONTIGMEM
771 bool
772 depends on NUMA
773 default y
774
775config HAVE_ARCH_BOOTMEM_NODE
776 bool
777 depends on NUMA
778 default y
779
780config HAVE_MEMORY_PRESENT
781 bool
782 depends on DISCONTIGMEM
783 default y
784
785config NEED_NODE_MEMMAP_SIZE
786 bool
787 depends on DISCONTIGMEM
788 default y
789
790config HIGHPTE
791 bool "Allocate 3rd-level pagetables from highmem"
792 depends on HIGHMEM4G || HIGHMEM64G
793 help
794 The VM uses one page table entry for each page of physical memory.
795 For systems with a lot of RAM, this can be wasteful of precious
796 low memory. Setting this option will put user-space page table
797 entries in high memory.
798
799config MATH_EMULATION
800 bool "Math emulation"
801 ---help---
802 Linux can emulate a math coprocessor (used for floating point
803 operations) if you don't have one. 486DX and Pentium processors have
804 a math coprocessor built in, 486SX and 386 do not, unless you added
805 a 487DX or 387, respectively. (The messages during boot time can
806 give you some hints here ["man dmesg"].) Everyone needs either a
807 coprocessor or this emulation.
808
809 If you don't have a math coprocessor, you need to say Y here; if you
810 say Y here even though you have a coprocessor, the coprocessor will
811 be used nevertheless. (This behavior can be changed with the kernel
812 command line option "no387", which comes handy if your coprocessor
813 is broken. Try "man bootparam" or see the documentation of your boot
814 loader (lilo or loadlin) about how to pass options to the kernel at
815 boot time.) This means that it is a good idea to say Y here if you
816 intend to use this kernel on different machines.
817
818 More information about the internals of the Linux math coprocessor
819 emulation can be found in <file:arch/i386/math-emu/README>.
820
821 If you are not sure, say Y; apart from resulting in a 66 KB bigger
822 kernel, it won't hurt.
823
824config MTRR
825 bool "MTRR (Memory Type Range Register) support"
826 ---help---
827 On Intel P6 family processors (Pentium Pro, Pentium II and later)
828 the Memory Type Range Registers (MTRRs) may be used to control
829 processor access to memory ranges. This is most useful if you have
830 a video (VGA) card on a PCI or AGP bus. Enabling write-combining
831 allows bus write transfers to be combined into a larger transfer
832 before bursting over the PCI/AGP bus. This can increase performance
833 of image write operations 2.5 times or more. Saying Y here creates a
834 /proc/mtrr file which may be used to manipulate your processor's
835 MTRRs. Typically the X server should use this.
836
837 This code has a reasonably generic interface so that similar
838 control registers on other processors can be easily supported
839 as well:
840
841 The Cyrix 6x86, 6x86MX and M II processors have Address Range
842 Registers (ARRs) which provide a similar functionality to MTRRs. For
843 these, the ARRs are used to emulate the MTRRs.
844 The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
845 MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing
846 write-combining. All of these processors are supported by this code
847 and it makes sense to say Y here if you have one of them.
848
849 Saying Y here also fixes a problem with buggy SMP BIOSes which only
850 set the MTRRs for the boot CPU and not for the secondary CPUs. This
851 can lead to all sorts of problems, so it's good to say Y here.
852
853 You can safely say Y even if your machine doesn't have MTRRs, you'll
854 just add about 9 KB to your kernel.
855
856 See <file:Documentation/mtrr.txt> for more information.
857
858config EFI
859 bool "Boot from EFI support (EXPERIMENTAL)"
860 depends on ACPI
861 default n
862 ---help---
863 This enables the the kernel to boot on EFI platforms using
864 system configuration information passed to it from the firmware.
865 This also enables the kernel to use any EFI runtime services that are
866 available (such as the EFI variable services).
867
868 This option is only useful on systems that have EFI firmware
869 and will result in a kernel image that is ~8k larger. In addition,
870 you must use the latest ELILO loader available at
871 <http://elilo.sourceforge.net> in order to take advantage of
872 kernel initialization using EFI information (neither GRUB nor LILO know
873 anything about EFI). However, even with this option, the resultant
874 kernel should continue to boot on existing non-EFI platforms.
875
876config IRQBALANCE
877 bool "Enable kernel irq balancing"
878 depends on SMP && X86_IO_APIC
879 default y
880 help
881 The default yes will allow the kernel to do irq load balancing.
882 Saying no will keep the kernel from doing irq load balancing.
883
884config HAVE_DEC_LOCK
885 bool
886 depends on (SMP || PREEMPT) && X86_CMPXCHG
887 default y
888
889# turning this on wastes a bunch of space.
890# Summit needs it only when NUMA is on
891config BOOT_IOREMAP
892 bool
893 depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
894 default y
895
896config REGPARM
897 bool "Use register arguments (EXPERIMENTAL)"
898 depends on EXPERIMENTAL
899 default n
900 help
901 Compile the kernel with -mregparm=3. This uses a different ABI
902 and passes the first three arguments of a function call in registers.
903 This will probably break binary only modules.
904
905 This feature is only enabled for gcc-3.0 and later - earlier compilers
906 generate incorrect output with certain kernel constructs when
907 -mregparm=3 is used.
908
909config SECCOMP
910 bool "Enable seccomp to safely compute untrusted bytecode"
911 depends on PROC_FS
912 default y
913 help
914 This kernel feature is useful for number crunching applications
915 that may need to compute untrusted bytecode during their
916 execution. By using pipes or other transports made available to
917 the process as file descriptors supporting the read/write
918 syscalls, it's possible to isolate those applications in
919 their own address space using seccomp. Once seccomp is
920 enabled via /proc/<pid>/seccomp, it cannot be disabled
921 and the task is only allowed to execute a few safe syscalls
922 defined by each seccomp mode.
923
924 If unsure, say Y. Only embedded should say N here.
925
926endmenu
927
928
929menu "Power management options (ACPI, APM)"
930 depends on !X86_VOYAGER
931
932source kernel/power/Kconfig
933
934source "drivers/acpi/Kconfig"
935
936menu "APM (Advanced Power Management) BIOS Support"
937depends on PM && !X86_VISWS
938
939config APM
940 tristate "APM (Advanced Power Management) BIOS support"
941 depends on PM
942 ---help---
943 APM is a BIOS specification for saving power using several different
944 techniques. This is mostly useful for battery powered laptops with
945 APM compliant BIOSes. If you say Y here, the system time will be
946 reset after a RESUME operation, the /proc/apm device will provide
947 battery status information, and user-space programs will receive
948 notification of APM "events" (e.g. battery status change).
949
950 If you select "Y" here, you can disable actual use of the APM
951 BIOS by passing the "apm=off" option to the kernel at boot time.
952
953 Note that the APM support is almost completely disabled for
954 machines with more than one CPU.
955
956 In order to use APM, you will need supporting software. For location
957 and more information, read <file:Documentation/pm.txt> and the
958 Battery Powered Linux mini-HOWTO, available from
959 <http://www.tldp.org/docs.html#howto>.
960
961 This driver does not spin down disk drives (see the hdparm(8)
962 manpage ("man 8 hdparm") for that), and it doesn't turn off
963 VESA-compliant "green" monitors.
964
965 This driver does not support the TI 4000M TravelMate and the ACER
966 486/DX4/75 because they don't have compliant BIOSes. Many "green"
967 desktop machines also don't have compliant BIOSes, and this driver
968 may cause those machines to panic during the boot phase.
969
970 Generally, if you don't have a battery in your machine, there isn't
971 much point in using this driver and you should say N. If you get
972 random kernel OOPSes or reboots that don't seem to be related to
973 anything, try disabling/enabling this option (or disabling/enabling
974 APM in your BIOS).
975
976 Some other things you should try when experiencing seemingly random,
977 "weird" problems:
978
979 1) make sure that you have enough swap space and that it is
980 enabled.
981 2) pass the "no-hlt" option to the kernel
982 3) switch on floating point emulation in the kernel and pass
983 the "no387" option to the kernel
984 4) pass the "floppy=nodma" option to the kernel
985 5) pass the "mem=4M" option to the kernel (thereby disabling
986 all but the first 4 MB of RAM)
987 6) make sure that the CPU is not over clocked.
988 7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
989 8) disable the cache from your BIOS settings
990 9) install a fan for the video card or exchange video RAM
991 10) install a better fan for the CPU
992 11) exchange RAM chips
993 12) exchange the motherboard.
994
995 To compile this driver as a module, choose M here: the
996 module will be called apm.
997
998config APM_IGNORE_USER_SUSPEND
999 bool "Ignore USER SUSPEND"
1000 depends on APM
1001 help
1002 This option will ignore USER SUSPEND requests. On machines with a
1003 compliant APM BIOS, you want to say N. However, on the NEC Versa M
1004 series notebooks, it is necessary to say Y because of a BIOS bug.
1005
1006config APM_DO_ENABLE
1007 bool "Enable PM at boot time"
1008 depends on APM
1009 ---help---
1010 Enable APM features at boot time. From page 36 of the APM BIOS
1011 specification: "When disabled, the APM BIOS does not automatically
1012 power manage devices, enter the Standby State, enter the Suspend
1013 State, or take power saving steps in response to CPU Idle calls."
1014 This driver will make CPU Idle calls when Linux is idle (unless this
1015 feature is turned off -- see "Do CPU IDLE calls", below). This
1016 should always save battery power, but more complicated APM features
1017 will be dependent on your BIOS implementation. You may need to turn
1018 this option off if your computer hangs at boot time when using APM
1019 support, or if it beeps continuously instead of suspending. Turn
1020 this off if you have a NEC UltraLite Versa 33/C or a Toshiba
1021 T400CDT. This is off by default since most machines do fine without
1022 this feature.
1023
1024config APM_CPU_IDLE
1025 bool "Make CPU Idle calls when idle"
1026 depends on APM
1027 help
1028 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
1029 On some machines, this can activate improved power savings, such as
1030 a slowed CPU clock rate, when the machine is idle. These idle calls
1031 are made after the idle loop has run for some length of time (e.g.,
1032 333 mS). On some machines, this will cause a hang at boot time or
1033 whenever the CPU becomes idle. (On machines with more than one CPU,
1034 this option does nothing.)
1035
1036config APM_DISPLAY_BLANK
1037 bool "Enable console blanking using APM"
1038 depends on APM
1039 help
1040 Enable console blanking using the APM. Some laptops can use this to
1041 turn off the LCD backlight when the screen blanker of the Linux
1042 virtual console blanks the screen. Note that this is only used by
1043 the virtual console screen blanker, and won't turn off the backlight
1044 when using the X Window system. This also doesn't have anything to
1045 do with your VESA-compliant power-saving monitor. Further, this
1046 option doesn't work for all laptops -- it might not turn off your
1047 backlight at all, or it might print a lot of errors to the console,
1048 especially if you are using gpm.
1049
1050config APM_RTC_IS_GMT
1051 bool "RTC stores time in GMT"
1052 depends on APM
1053 help
1054 Say Y here if your RTC (Real Time Clock a.k.a. hardware clock)
1055 stores the time in GMT (Greenwich Mean Time). Say N if your RTC
1056 stores localtime.
1057
1058 It is in fact recommended to store GMT in your RTC, because then you
1059 don't have to worry about daylight savings time changes. The only
1060 reason not to use GMT in your RTC is if you also run a broken OS
1061 that doesn't understand GMT.
1062
1063config APM_ALLOW_INTS
1064 bool "Allow interrupts during APM BIOS calls"
1065 depends on APM
1066 help
1067 Normally we disable external interrupts while we are making calls to
1068 the APM BIOS as a measure to lessen the effects of a badly behaving
1069 BIOS implementation. The BIOS should reenable interrupts if it
1070 needs to. Unfortunately, some BIOSes do not -- especially those in
1071 many of the newer IBM Thinkpads. If you experience hangs when you
1072 suspend, try setting this to Y. Otherwise, say N.
1073
1074config APM_REAL_MODE_POWER_OFF
1075 bool "Use real mode APM BIOS call to power off"
1076 depends on APM
1077 help
1078 Use real mode APM BIOS calls to switch off the computer. This is
1079 a work-around for a number of buggy BIOSes. Switch this option on if
1080 your computer crashes instead of powering off properly.
1081
1082endmenu
1083
1084source "arch/i386/kernel/cpu/cpufreq/Kconfig"
1085
1086endmenu
1087
1088menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
1089
1090config PCI
1091 bool "PCI support" if !X86_VISWS
1092 depends on !X86_VOYAGER
1093 default y if X86_VISWS
1094 help
1095 Find out whether you have a PCI motherboard. PCI is the name of a
1096 bus system, i.e. the way the CPU talks to the other stuff inside
1097 your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
1098 VESA. If you have PCI, say Y, otherwise N.
1099
1100 The PCI-HOWTO, available from
1101 <http://www.tldp.org/docs.html#howto>, contains valuable
1102 information about which PCI hardware does work under Linux and which
1103 doesn't.
1104
1105choice
1106 prompt "PCI access mode"
1107 depends on PCI && !X86_VISWS
1108 default PCI_GOANY
1109 ---help---
1110 On PCI systems, the BIOS can be used to detect the PCI devices and
1111 determine their configuration. However, some old PCI motherboards
1112 have BIOS bugs and may crash if this is done. Also, some embedded
1113 PCI-based systems don't have any BIOS at all. Linux can also try to
1114 detect the PCI hardware directly without using the BIOS.
1115
1116 With this option, you can specify how Linux should detect the
1117 PCI devices. If you choose "BIOS", the BIOS will be used,
1118 if you choose "Direct", the BIOS won't be used, and if you
1119 choose "MMConfig", then PCI Express MMCONFIG will be used.
1120 If you choose "Any", the kernel will try MMCONFIG, then the
1121 direct access method and falls back to the BIOS if that doesn't
1122 work. If unsure, go with the default, which is "Any".
1123
1124config PCI_GOBIOS
1125 bool "BIOS"
1126
1127config PCI_GOMMCONFIG
1128 bool "MMConfig"
1129
1130config PCI_GODIRECT
1131 bool "Direct"
1132
1133config PCI_GOANY
1134 bool "Any"
1135
1136endchoice
1137
1138config PCI_BIOS
1139 bool
1140 depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
1141 default y
1142
1143config PCI_DIRECT
1144 bool
1145 depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
1146 default y
1147
1148config PCI_MMCONFIG
1149 bool
1150 depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
1151 select ACPI_BOOT
1152 default y
1153
1154source "drivers/pci/pcie/Kconfig"
1155
1156source "drivers/pci/Kconfig"
1157
1158config ISA
1159 bool "ISA support"
1160 depends on !(X86_VOYAGER || X86_VISWS)
1161 help
1162 Find out whether you have ISA slots on your motherboard. ISA is the
1163 name of a bus system, i.e. the way the CPU talks to the other stuff
1164 inside your box. Other bus systems are PCI, EISA, MicroChannel
1165 (MCA) or VESA. ISA is an older system, now being displaced by PCI;
1166 newer boards don't support it. If you have ISA, say Y, otherwise N.
1167
1168config EISA
1169 bool "EISA support"
1170 depends on ISA
1171 ---help---
1172 The Extended Industry Standard Architecture (EISA) bus was
1173 developed as an open alternative to the IBM MicroChannel bus.
1174
1175 The EISA bus provided some of the features of the IBM MicroChannel
1176 bus while maintaining backward compatibility with cards made for
1177 the older ISA bus. The EISA bus saw limited use between 1988 and
1178 1995 when it was made obsolete by the PCI bus.
1179
1180 Say Y here if you are building a kernel for an EISA-based machine.
1181
1182 Otherwise, say N.
1183
1184source "drivers/eisa/Kconfig"
1185
1186config MCA
1187 bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
1188 default y if X86_VOYAGER
1189 help
1190 MicroChannel Architecture is found in some IBM PS/2 machines and
1191 laptops. It is a bus system similar to PCI or ISA. See
1192 <file:Documentation/mca.txt> (and especially the web page given
1193 there) before attempting to build an MCA bus kernel.
1194
1195source "drivers/mca/Kconfig"
1196
1197config SCx200
1198 tristate "NatSemi SCx200 support"
1199 depends on !X86_VOYAGER
1200 help
1201 This provides basic support for the National Semiconductor SCx200
1202 processor. Right now this is just a driver for the GPIO pins.
1203
1204 If you don't know what to do here, say N.
1205
1206 This support is also available as a module. If compiled as a
1207 module, it will be called scx200.
1208
1209source "drivers/pcmcia/Kconfig"
1210
1211source "drivers/pci/hotplug/Kconfig"
1212
1213endmenu
1214
1215menu "Executable file formats"
1216
1217source "fs/Kconfig.binfmt"
1218
1219endmenu
1220
1221source "drivers/Kconfig"
1222
1223source "fs/Kconfig"
1224
1225source "arch/i386/oprofile/Kconfig"
1226
1227source "arch/i386/Kconfig.debug"
1228
1229source "security/Kconfig"
1230
1231source "crypto/Kconfig"
1232
1233source "lib/Kconfig"
1234
1235#
1236# Use the generic interrupt handling code in kernel/irq/:
1237#
1238config GENERIC_HARDIRQS
1239 bool
1240 default y
1241
1242config GENERIC_IRQ_PROBE
1243 bool
1244 default y
1245
1246config X86_SMP
1247 bool
1248 depends on SMP && !X86_VOYAGER
1249 default y
1250
1251config X86_HT
1252 bool
1253 depends on SMP && !(X86_VISWS || X86_VOYAGER)
1254 default y
1255
1256config X86_BIOS_REBOOT
1257 bool
1258 depends on !(X86_VISWS || X86_VOYAGER)
1259 default y
1260
1261config X86_TRAMPOLINE
1262 bool
1263 depends on X86_SMP || (X86_VOYAGER && SMP)
1264 default y
1265
1266config PC
1267 bool
1268 depends on X86 && !EMBEDDED
1269 default y
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
new file mode 100644
index 000000000000..bfb2064f7104
--- /dev/null
+++ b/arch/i386/Kconfig.debug
@@ -0,0 +1,72 @@
1menu "Kernel hacking"
2
3source "lib/Kconfig.debug"
4
5config EARLY_PRINTK
6 bool "Early printk" if EMBEDDED && DEBUG_KERNEL
7 default y
8 help
9 Write kernel log output directly into the VGA buffer or to a serial
10 port.
11
12 This is useful for kernel debugging when your machine crashes very
13 early before the console code is initialized. For normal operation
14 it is not recommended because it looks ugly and doesn't cooperate
15 with klogd/syslogd or the X server. You should normally N here,
16 unless you want to debug such a crash.
17
18config DEBUG_STACKOVERFLOW
19 bool "Check for stack overflows"
20 depends on DEBUG_KERNEL
21
22config KPROBES
23 bool "Kprobes"
24 depends on DEBUG_KERNEL
25 help
26 Kprobes allows you to trap at almost any kernel address and
27 execute a callback function. register_kprobe() establishes
28 a probepoint and specifies the callback. Kprobes is useful
29 for kernel debugging, non-intrusive instrumentation and testing.
30 If in doubt, say "N".
31
32config DEBUG_STACK_USAGE
33 bool "Stack utilization instrumentation"
34 depends on DEBUG_KERNEL
35 help
36 Enables the display of the minimum amount of free stack which each
37 task has ever had available in the sysrq-T and sysrq-P debug output.
38
39 This option will slow down process creation somewhat.
40
41comment "Page alloc debug is incompatible with Software Suspend on i386"
42 depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
43
44config DEBUG_PAGEALLOC
45 bool "Page alloc debugging"
46 depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND
47 help
48 Unmap pages from the kernel linear mapping after free_pages().
49 This results in a large slowdown, but helps to find certain types
50 of memory corruptions.
51
52config 4KSTACKS
53 bool "Use 4Kb for kernel stacks instead of 8Kb"
54 depends on DEBUG_KERNEL
55 help
56 If you say Y here the kernel will use a 4Kb stacksize for the
57 kernel stack attached to each process/thread. This facilitates
58 running more threads on a system and also reduces the pressure
59 on the VM subsystem for higher order allocations. This option
60 will also use IRQ stacks to compensate for the reduced stackspace.
61
62config X86_FIND_SMP_CONFIG
63 bool
64 depends on X86_LOCAL_APIC || X86_VOYAGER
65 default y
66
67config X86_MPPARSE
68 bool
69 depends on X86_LOCAL_APIC && !X86_VISWS
70 default y
71
72endmenu
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
new file mode 100644
index 000000000000..314c7146e9bf
--- /dev/null
+++ b/arch/i386/Makefile
@@ -0,0 +1,173 @@
1#
2# i386/Makefile
3#
4# This file is included by the global makefile so that you can add your own
5# architecture-specific flags and dependencies. Remember to do have actions
6# for "archclean" cleaning up for this architecture.
7#
8# This file is subject to the terms and conditions of the GNU General Public
9# License. See the file "COPYING" in the main directory of this archive
10# for more details.
11#
12# Copyright (C) 1994 by Linus Torvalds
13#
14# 19990713 Artur Skawina <skawina@geocities.com>
15# Added '-march' and '-mpreferred-stack-boundary' support
16#
17# Kianusch Sayah Karadji <kianusch@sk-tech.net>
18# Added support for GEODE CPU
19
20LDFLAGS := -m elf_i386
21OBJCOPYFLAGS := -O binary -R .note -R .comment -S
22LDFLAGS_vmlinux :=
23CHECKFLAGS += -D__i386__
24
25CFLAGS += -pipe -msoft-float
26
27# prevent gcc from keeping the stack 16 byte aligned
28CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
29
30align := $(cc-option-align)
31cflags-$(CONFIG_M386) += -march=i386
32cflags-$(CONFIG_M486) += -march=i486
33cflags-$(CONFIG_M586) += -march=i586
34cflags-$(CONFIG_M586TSC) += -march=i586
35cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586)
36cflags-$(CONFIG_M686) += -march=i686
37cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call cc-option,-mtune=pentium2)
38cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call cc-option,-mtune=pentium3)
39cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call cc-option,-mtune=pentium3)
40cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call cc-option,-mtune=pentium4)
41cflags-$(CONFIG_MK6) += -march=k6
42# Please note, that patches that add -march=athlon-xp and friends are pointless.
43# They make zero difference whatsosever to performance at this time.
44cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)
45cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4))
46cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
47cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
48cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
49cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586)
50cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
51cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
52cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
53
54# AMD Elan support
55cflags-$(CONFIG_X86_ELAN) += -march=i486
56
57# MediaGX aka Geode support
58cflags-$(CONFIG_MGEODE) += $(call cc-option,-march=pentium-mmx,-march=i586)
59
60# -mregparm=3 works ok on gcc-3.0 and later
61#
62GCC_VERSION := $(call cc-version)
63cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
64
65# Disable unit-at-a-time mode, it makes gcc use a lot more stack
66# due to the lack of sharing of stacklots.
67CFLAGS += $(call cc-option,-fno-unit-at-a-time)
68
69CFLAGS += $(cflags-y)
70
71# Default subarch .c files
72mcore-y := mach-default
73
74# Voyager subarch support
75mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-i386/mach-voyager
76mcore-$(CONFIG_X86_VOYAGER) := mach-voyager
77
78# VISWS subarch support
79mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-i386/mach-visws
80mcore-$(CONFIG_X86_VISWS) := mach-visws
81
82# NUMAQ subarch support
83mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-i386/mach-numaq
84mcore-$(CONFIG_X86_NUMAQ) := mach-default
85
86# BIGSMP subarch support
87mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-i386/mach-bigsmp
88mcore-$(CONFIG_X86_BIGSMP) := mach-default
89
90#Summit subarch support
91mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
92mcore-$(CONFIG_X86_SUMMIT) := mach-default
93
94# generic subarchitecture
95mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
96mcore-$(CONFIG_X86_GENERICARCH) := mach-default
97core-$(CONFIG_X86_GENERICARCH) += arch/i386/mach-generic/
98
99# ES7000 subarch support
100mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000
101mcore-$(CONFIG_X86_ES7000) := mach-default
102core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/
103
104# default subarch .h files
105mflags-y += -Iinclude/asm-i386/mach-default
106
107head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o
108
109libs-y += arch/i386/lib/
110core-y += arch/i386/kernel/ \
111 arch/i386/mm/ \
112 arch/i386/$(mcore-y)/ \
113 arch/i386/crypto/
114drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/
115drivers-$(CONFIG_PCI) += arch/i386/pci/
116# must be linked after kernel/
117drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/
118drivers-$(CONFIG_PM) += arch/i386/power/
119
120CFLAGS += $(mflags-y)
121AFLAGS += $(mflags-y)
122
123boot := arch/i386/boot
124
125.PHONY: zImage bzImage compressed zlilo bzlilo \
126 zdisk bzdisk fdimage fdimage144 fdimage288 install
127
128all: bzImage
129
130# KBUILD_IMAGE specify target image being built
131 KBUILD_IMAGE := $(boot)/bzImage
132zImage zlilo zdisk: KBUILD_IMAGE := arch/i386/boot/zImage
133
134zImage bzImage: vmlinux
135 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
136
137compressed: zImage
138
139zlilo bzlilo: vmlinux
140 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
141
142zdisk bzdisk: vmlinux
143 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
144
145fdimage fdimage144 fdimage288: vmlinux
146 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
147
148install:
149 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
150
151prepare: include/asm-$(ARCH)/asm_offsets.h
152CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
153
154arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
155 include/config/MARKER
156
157include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
158 $(call filechk,gen-asm-offsets)
159
160archclean:
161 $(Q)$(MAKE) $(clean)=arch/i386/boot
162
163define archhelp
164 echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)'
165 echo ' install - Install kernel using'
166 echo ' (your) ~/bin/installkernel or'
167 echo ' (distribution) /sbin/installkernel or'
168 echo ' install to $$(INSTALL_PATH) and run lilo'
169 echo ' bzdisk - Create a boot floppy in /dev/fd0'
170 echo ' fdimage - Create a boot floppy image'
171endef
172
173CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
new file mode 100644
index 000000000000..aa7064a75ee6
--- /dev/null
+++ b/arch/i386/boot/Makefile
@@ -0,0 +1,104 @@
1#
2# arch/i386/boot/Makefile
3#
4# This file is subject to the terms and conditions of the GNU General Public
5# License. See the file "COPYING" in the main directory of this archive
6# for more details.
7#
8# Copyright (C) 1994 by Linus Torvalds
9#
10
11# ROOT_DEV specifies the default root-device when making the image.
12# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
13# the default of FLOPPY is used by 'build'.
14
15ROOT_DEV := CURRENT
16
17# If you want to preset the SVGA mode, uncomment the next line and
18# set SVGA_MODE to whatever number you want.
19# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
20# The number is the same as you would ordinarily press at bootup.
21
22SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
23
24# If you want the RAM disk device, define this to be the size in blocks.
25
26#RAMDISK := -DRAMDISK=512
27
28targets := vmlinux.bin bootsect bootsect.o setup setup.o \
29 zImage bzImage
30subdir- := compressed
31
32hostprogs-y := tools/build
33
34HOSTCFLAGS_build.o := $(LINUXINCLUDE)
35
36# ---------------------------------------------------------------------------
37
38$(obj)/zImage: IMAGE_OFFSET := 0x1000
39$(obj)/zImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK)
40$(obj)/bzImage: IMAGE_OFFSET := 0x100000
41$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
42$(obj)/bzImage: BUILDFLAGS := -b
43
44quiet_cmd_image = BUILD $@
45cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
46 $(obj)/vmlinux.bin $(ROOT_DEV) > $@
47
48$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
49 $(obj)/vmlinux.bin $(obj)/tools/build FORCE
50 $(call if_changed,image)
51 @echo 'Kernel: $@ is ready'
52
53$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
54 $(call if_changed,objcopy)
55
56LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
57LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext
58
59$(obj)/setup $(obj)/bootsect: %: %.o FORCE
60 $(call if_changed,ld)
61
62$(obj)/compressed/vmlinux: FORCE
63 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
64
65# Set this if you want to pass append arguments to the zdisk/fdimage kernel
66FDARGS =
67
68$(obj)/mtools.conf: $(src)/mtools.conf.in
69 sed -e 's|@OBJ@|$(obj)|g' < $< > $@
70
71# This requires write access to /dev/fd0
72zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
73 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
74 syslinux /dev/fd0 ; sync
75 echo 'default linux $(FDARGS)' | \
76 MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
77 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync
78
79# These require being root or having syslinux 2.02 or higher installed
80fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
81 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
82 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
83 syslinux $(obj)/fdimage ; sync
84 echo 'default linux $(FDARGS)' | \
85 MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
86 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync
87
88fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
89 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
90 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
91 syslinux $(obj)/fdimage ; sync
92 echo 'default linux $(FDARGS)' | \
93 MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
94 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync
95
96zlilo: $(BOOTIMAGE)
97 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
98 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
99 cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz
100 cp System.map $(INSTALL_PATH)/
101 if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
102
103install: $(BOOTIMAGE)
104 sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S
new file mode 100644
index 000000000000..ba9fe14db6a9
--- /dev/null
+++ b/arch/i386/boot/bootsect.S
@@ -0,0 +1,98 @@
1/*
2 * bootsect.S Copyright (C) 1991, 1992 Linus Torvalds
3 *
4 * modified by Drew Eckhardt
5 * modified by Bruce Evans (bde)
6 * modified by Chris Noe (May 1999) (as86 -> gas)
7 * gutted by H. Peter Anvin (Jan 2003)
8 *
9 * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
10 * addresses must be multiplied by 16 to obtain their respective linear
11 * addresses. To avoid confusion, linear addresses are written using leading
12 * hex while segment addresses are written as segment:offset.
13 *
14 */
15
16#include <asm/boot.h>
17
18SETUPSECTS = 4 /* default nr of setup-sectors */
19BOOTSEG = 0x07C0 /* original address of boot-sector */
20INITSEG = DEF_INITSEG /* we move boot here - out of the way */
21SETUPSEG = DEF_SETUPSEG /* setup starts here */
22SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
23SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
24 /* to be loaded */
25ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
26SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
27
28#ifndef SVGA_MODE
29#define SVGA_MODE ASK_VGA
30#endif
31
32#ifndef RAMDISK
33#define RAMDISK 0
34#endif
35
36#ifndef ROOT_RDONLY
37#define ROOT_RDONLY 1
38#endif
39
40.code16
41.text
42
43.global _start
44_start:
45
46 # Normalize the start address
47 jmpl $BOOTSEG, $start2
48
49start2:
50 movw %cs, %ax
51 movw %ax, %ds
52 movw %ax, %es
53 movw %ax, %ss
54 movw $0x7c00, %sp
55 sti
56 cld
57
58 movw $bugger_off_msg, %si
59
60msg_loop:
61 lodsb
62 andb %al, %al
63 jz die
64 movb $0xe, %ah
65 movw $7, %bx
66 int $0x10
67 jmp msg_loop
68
69die:
70 # Allow the user to press a key, then reboot
71 xorw %ax, %ax
72 int $0x16
73 int $0x19
74
75 # int 0x19 should never return. In case it does anyway,
76 # invoke the BIOS reset code...
77 ljmp $0xf000,$0xfff0
78
79
80bugger_off_msg:
81 .ascii "Direct booting from floppy is no longer supported.\r\n"
82 .ascii "Please use a boot loader program instead.\r\n"
83 .ascii "\n"
84 .ascii "Remove disk and press any key to reboot . . .\r\n"
85 .byte 0
86
87
88 # Kernel attributes; used by setup
89
90 .org 497
91setup_sects: .byte SETUPSECTS
92root_flags: .word ROOT_RDONLY
93syssize: .word SYSSIZE
94swap_dev: .word SWAP_DEV
95ram_size: .word RAMDISK
96vid_mode: .word SVGA_MODE
97root_dev: .word ROOT_DEV
98boot_flag: .word 0xAA55
diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile
new file mode 100644
index 000000000000..258ea95224f6
--- /dev/null
+++ b/arch/i386/boot/compressed/Makefile
@@ -0,0 +1,25 @@
1#
2# linux/arch/i386/boot/compressed/Makefile
3#
4# create a compressed vmlinux image from the original vmlinux
5#
6
7targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
8EXTRA_AFLAGS := -traditional
9
10LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
11
12$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
13 $(call if_changed,ld)
14 @:
15
16$(obj)/vmlinux.bin: vmlinux FORCE
17 $(call if_changed,objcopy)
18
19$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
20 $(call if_changed,gzip)
21
22LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
23
24$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
25 $(call if_changed,ld)
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
new file mode 100644
index 000000000000..c5e80b69e7d4
--- /dev/null
+++ b/arch/i386/boot/compressed/head.S
@@ -0,0 +1,128 @@
1/*
2 * linux/boot/head.S
3 *
4 * Copyright (C) 1991, 1992, 1993 Linus Torvalds
5 */
6
7/*
8 * head.S contains the 32-bit startup code.
9 *
10 * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
11 * the page directory will exist. The startup code will be overwritten by
12 * the page directory. [According to comments etc elsewhere on a compressed
13 * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
14 *
15 * Page 0 is deliberately kept safe, since System Management Mode code in
16 * laptops may need to access the BIOS data stored there. This is also
17 * useful for future device drivers that either access the BIOS via VM86
18 * mode.
19 */
20
21/*
22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
23 */
24.text
25
26#include <linux/linkage.h>
27#include <asm/segment.h>
28
29 .globl startup_32
30
31startup_32:
32 cld
33 cli
34 movl $(__BOOT_DS),%eax
35 movl %eax,%ds
36 movl %eax,%es
37 movl %eax,%fs
38 movl %eax,%gs
39
40 lss stack_start,%esp
41 xorl %eax,%eax
421: incl %eax # check that A20 really IS enabled
43 movl %eax,0x000000 # loop forever if it isn't
44 cmpl %eax,0x100000
45 je 1b
46
47/*
48 * Initialize eflags. Some BIOS's leave bits like NT set. This would
49 * confuse the debugger if this code is traced.
50 * XXX - best to initialize before switching to protected mode.
51 */
52 pushl $0
53 popfl
54/*
55 * Clear BSS
56 */
57 xorl %eax,%eax
58 movl $_edata,%edi
59 movl $_end,%ecx
60 subl %edi,%ecx
61 cld
62 rep
63 stosb
64/*
65 * Do the decompression, and jump to the new kernel..
66 */
67 subl $16,%esp # place for structure on the stack
68 movl %esp,%eax
69 pushl %esi # real mode pointer as second arg
70 pushl %eax # address of structure as first arg
71 call decompress_kernel
72 orl %eax,%eax
73 jnz 3f
74 popl %esi # discard address
75 popl %esi # real mode pointer
76 xorl %ebx,%ebx
77 ljmp $(__BOOT_CS), $0x100000
78
79/*
80 * We come here, if we were loaded high.
81 * We need to move the move-in-place routine down to 0x1000
82 * and then start it with the buffer addresses in registers,
83 * which we got from the stack.
84 */
853:
86 movl $move_routine_start,%esi
87 movl $0x1000,%edi
88 movl $move_routine_end,%ecx
89 subl %esi,%ecx
90 addl $3,%ecx
91 shrl $2,%ecx
92 cld
93 rep
94 movsl
95
96 popl %esi # discard the address
97 popl %ebx # real mode pointer
98 popl %esi # low_buffer_start
99 popl %ecx # lcount
100 popl %edx # high_buffer_start
101 popl %eax # hcount
102 movl $0x100000,%edi
103 cli # make sure we don't get interrupted
104 ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
105
106/*
107 * Routine (template) for moving the decompressed kernel in place,
108 * if we were high loaded. This _must_ PIC-code !
109 */
110move_routine_start:
111 movl %ecx,%ebp
112 shrl $2,%ecx
113 rep
114 movsl
115 movl %ebp,%ecx
116 andl $3,%ecx
117 rep
118 movsb
119 movl %edx,%esi
120 movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
121 addl $3,%ecx
122 shrl $2,%ecx
123 rep
124 movsl
125 movl %ebx,%esi # Restore setup pointer
126 xorl %ebx,%ebx
127 ljmp $(__BOOT_CS), $0x100000
128move_routine_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
new file mode 100644
index 000000000000..fa67045234a3
--- /dev/null
+++ b/arch/i386/boot/compressed/misc.c
@@ -0,0 +1,382 @@
1/*
2 * misc.c
3 *
4 * This is a collection of several routines from gzip-1.0.3
5 * adapted for Linux.
6 *
7 * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
8 * puts by Nick Holloway 1993, better puts by Martin Mares 1995
9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
10 */
11
12#include <linux/linkage.h>
13#include <linux/vmalloc.h>
14#include <linux/tty.h>
15#include <video/edid.h>
16#include <asm/io.h>
17
18/*
19 * gzip declarations
20 */
21
22#define OF(args) args
23#define STATIC static
24
25#undef memset
26#undef memcpy
27
28/*
29 * Why do we do this? Don't ask me..
30 *
31 * Incomprehensible are the ways of bootloaders.
32 */
33static void* memset(void *, int, size_t);
34static void* memcpy(void *, __const void *, size_t);
35#define memzero(s, n) memset ((s), 0, (n))
36
37typedef unsigned char uch;
38typedef unsigned short ush;
39typedef unsigned long ulg;
40
41#define WSIZE 0x8000 /* Window size must be at least 32k, */
42 /* and a power of two */
43
44static uch *inbuf; /* input buffer */
45static uch window[WSIZE]; /* Sliding window buffer */
46
47static unsigned insize = 0; /* valid bytes in inbuf */
48static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
49static unsigned outcnt = 0; /* bytes in output buffer */
50
51/* gzip flag byte */
52#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
53#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
54#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
55#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
56#define COMMENT 0x10 /* bit 4 set: file comment present */
57#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
58#define RESERVED 0xC0 /* bit 6,7: reserved */
59
60#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())
61
62/* Diagnostic functions */
63#ifdef DEBUG
64# define Assert(cond,msg) {if(!(cond)) error(msg);}
65# define Trace(x) fprintf x
66# define Tracev(x) {if (verbose) fprintf x ;}
67# define Tracevv(x) {if (verbose>1) fprintf x ;}
68# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
69# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
70#else
71# define Assert(cond,msg)
72# define Trace(x)
73# define Tracev(x)
74# define Tracevv(x)
75# define Tracec(c,x)
76# define Tracecv(c,x)
77#endif
78
79static int fill_inbuf(void);
80static void flush_window(void);
81static void error(char *m);
82static void gzip_mark(void **);
83static void gzip_release(void **);
84
85/*
86 * This is set up by the setup-routine at boot-time
87 */
88static unsigned char *real_mode; /* Pointer to real-mode data */
89
90#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
91#ifndef STANDARD_MEMORY_BIOS_CALL
92#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
93#endif
94#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
95
96extern char input_data[];
97extern int input_len;
98
99static long bytes_out = 0;
100static uch *output_data;
101static unsigned long output_ptr = 0;
102
103static void *malloc(int size);
104static void free(void *where);
105
106static void putstr(const char *);
107
108extern int end;
109static long free_mem_ptr = (long)&end;
110static long free_mem_end_ptr;
111
112#define INPLACE_MOVE_ROUTINE 0x1000
113#define LOW_BUFFER_START 0x2000
114#define LOW_BUFFER_MAX 0x90000
115#define HEAP_SIZE 0x3000
116static unsigned int low_buffer_end, low_buffer_size;
117static int high_loaded =0;
118static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
119
120static char *vidmem = (char *)0xb8000;
121static int vidport;
122static int lines, cols;
123
124#ifdef CONFIG_X86_NUMAQ
125static void * xquad_portio = NULL;
126#endif
127
128#include "../../../../lib/inflate.c"
129
130static void *malloc(int size)
131{
132 void *p;
133
134 if (size <0) error("Malloc error");
135 if (free_mem_ptr <= 0) error("Memory error");
136
137 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
138
139 p = (void *)free_mem_ptr;
140 free_mem_ptr += size;
141
142 if (free_mem_ptr >= free_mem_end_ptr)
143 error("Out of memory");
144
145 return p;
146}
147
148static void free(void *where)
149{ /* Don't care */
150}
151
152static void gzip_mark(void **ptr)
153{
154 *ptr = (void *) free_mem_ptr;
155}
156
157static void gzip_release(void **ptr)
158{
159 free_mem_ptr = (long) *ptr;
160}
161
162static void scroll(void)
163{
164 int i;
165
166 memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
167 for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
168 vidmem[i] = ' ';
169}
170
171static void putstr(const char *s)
172{
173 int x,y,pos;
174 char c;
175
176 x = RM_SCREEN_INFO.orig_x;
177 y = RM_SCREEN_INFO.orig_y;
178
179 while ( ( c = *s++ ) != '\0' ) {
180 if ( c == '\n' ) {
181 x = 0;
182 if ( ++y >= lines ) {
183 scroll();
184 y--;
185 }
186 } else {
187 vidmem [ ( x + cols * y ) * 2 ] = c;
188 if ( ++x >= cols ) {
189 x = 0;
190 if ( ++y >= lines ) {
191 scroll();
192 y--;
193 }
194 }
195 }
196 }
197
198 RM_SCREEN_INFO.orig_x = x;
199 RM_SCREEN_INFO.orig_y = y;
200
201 pos = (x + cols * y) * 2; /* Update cursor position */
202 outb_p(14, vidport);
203 outb_p(0xff & (pos >> 9), vidport+1);
204 outb_p(15, vidport);
205 outb_p(0xff & (pos >> 1), vidport+1);
206}
207
208static void* memset(void* s, int c, size_t n)
209{
210 int i;
211 char *ss = (char*)s;
212
213 for (i=0;i<n;i++) ss[i] = c;
214 return s;
215}
216
217static void* memcpy(void* __dest, __const void* __src,
218 size_t __n)
219{
220 int i;
221 char *d = (char *)__dest, *s = (char *)__src;
222
223 for (i=0;i<__n;i++) d[i] = s[i];
224 return __dest;
225}
226
227/* ===========================================================================
228 * Fill the input buffer. This is called only when the buffer is empty
229 * and at least one byte is really needed.
230 */
231static int fill_inbuf(void)
232{
233 if (insize != 0) {
234 error("ran out of input data");
235 }
236
237 inbuf = input_data;
238 insize = input_len;
239 inptr = 1;
240 return inbuf[0];
241}
242
243/* ===========================================================================
244 * Write the output window window[0..outcnt-1] and update crc and bytes_out.
245 * (Used for the decompressed data only.)
246 */
247static void flush_window_low(void)
248{
249 ulg c = crc; /* temporary variable */
250 unsigned n;
251 uch *in, *out, ch;
252
253 in = window;
254 out = &output_data[output_ptr];
255 for (n = 0; n < outcnt; n++) {
256 ch = *out++ = *in++;
257 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
258 }
259 crc = c;
260 bytes_out += (ulg)outcnt;
261 output_ptr += (ulg)outcnt;
262 outcnt = 0;
263}
264
265static void flush_window_high(void)
266{
267 ulg c = crc; /* temporary variable */
268 unsigned n;
269 uch *in, ch;
270 in = window;
271 for (n = 0; n < outcnt; n++) {
272 ch = *output_data++ = *in++;
273 if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
274 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
275 }
276 crc = c;
277 bytes_out += (ulg)outcnt;
278 outcnt = 0;
279}
280
281static void flush_window(void)
282{
283 if (high_loaded) flush_window_high();
284 else flush_window_low();
285}
286
287static void error(char *x)
288{
289 putstr("\n\n");
290 putstr(x);
291 putstr("\n\n -- System halted");
292
293 while(1); /* Halt */
294}
295
296#define STACK_SIZE (4096)
297
298long user_stack [STACK_SIZE];
299
300struct {
301 long * a;
302 short b;
303 } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
304
305static void setup_normal_output_buffer(void)
306{
307#ifdef STANDARD_MEMORY_BIOS_CALL
308 if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
309#else
310 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
311#endif
312 output_data = (char *)0x100000; /* Points to 1M */
313 free_mem_end_ptr = (long)real_mode;
314}
315
316struct moveparams {
317 uch *low_buffer_start; int lcount;
318 uch *high_buffer_start; int hcount;
319};
320
321static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
322{
323 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
324#ifdef STANDARD_MEMORY_BIOS_CALL
325 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
326#else
327 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
328 (3*1024))
329 error("Less than 4MB of memory");
330#endif
331 mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
332 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
333 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
334 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
335 high_loaded = 1;
336 free_mem_end_ptr = (long)high_buffer_start;
337 if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
338 high_buffer_start = (uch *)(0x100000 + low_buffer_size);
339 mv->hcount = 0; /* say: we need not to move high_buffer */
340 }
341 else mv->hcount = -1;
342 mv->high_buffer_start = high_buffer_start;
343}
344
345static void close_output_buffer_if_we_run_high(struct moveparams *mv)
346{
347 if (bytes_out > low_buffer_size) {
348 mv->lcount = low_buffer_size;
349 if (mv->hcount)
350 mv->hcount = bytes_out - low_buffer_size;
351 } else {
352 mv->lcount = bytes_out;
353 mv->hcount = 0;
354 }
355}
356
357
358asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
359{
360 real_mode = rmode;
361
362 if (RM_SCREEN_INFO.orig_video_mode == 7) {
363 vidmem = (char *) 0xb0000;
364 vidport = 0x3b4;
365 } else {
366 vidmem = (char *) 0xb8000;
367 vidport = 0x3d4;
368 }
369
370 lines = RM_SCREEN_INFO.orig_video_lines;
371 cols = RM_SCREEN_INFO.orig_video_cols;
372
373 if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
374 else setup_output_buffer_if_we_run_high(mv);
375
376 makecrc();
377 putstr("Uncompressing Linux... ");
378 gunzip();
379 putstr("Ok, booting the kernel.\n");
380 if (high_loaded) close_output_buffer_if_we_run_high(mv);
381 return high_loaded;
382}
diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr
new file mode 100644
index 000000000000..1ed9d791f863
--- /dev/null
+++ b/arch/i386/boot/compressed/vmlinux.scr
@@ -0,0 +1,9 @@
1SECTIONS
2{
3 .data : {
4 input_len = .;
5 LONG(input_data_end - input_data) input_data = .;
6 *(.data)
7 input_data_end = .;
8 }
9}
diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
new file mode 100644
index 000000000000..027d6b354ffb
--- /dev/null
+++ b/arch/i386/boot/edd.S
@@ -0,0 +1,176 @@
1/*
2 * BIOS Enhanced Disk Drive support
3 * Copyright (C) 2002, 2003, 2004 Dell, Inc.
4 * by Matt Domsch <Matt_Domsch@dell.com> October 2002
5 * conformant to T13 Committee www.t13.org
6 * projects 1572D, 1484D, 1386D, 1226DT
7 * disk signature read by Matt Domsch <Matt_Domsch@dell.com>
8 * and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004
9 * legacy CHS retreival by Patrick J. LoPresti <patl@users.sourceforge.net>
10 * March 2004
11 * Command line option parsing, Matt Domsch, November 2004
12 */
13
14#include <linux/edd.h>
15#include <asm/setup.h>
16
17#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
18 movb $0, (EDD_MBR_SIG_NR_BUF)
19 movb $0, (EDDNR)
20
21# Check the command line for two options:
22# edd=of disables EDD completely (edd=off)
23# edd=sk skips the MBR test (edd=skipmbr)
24 pushl %esi
25 cmpl $0, %cs:cmd_line_ptr
26 jz done_cl
27 movl %cs:(cmd_line_ptr), %esi
28# ds:esi has the pointer to the command line now
29 movl $(COMMAND_LINE_SIZE-7), %ecx
30# loop through kernel command line one byte at a time
31cl_loop:
32 cmpl $EDD_CL_EQUALS, (%si)
33 jz found_edd_equals
34 incl %esi
35 loop cl_loop
36 jmp done_cl
37found_edd_equals:
38# only looking at first two characters after equals
39 addl $4, %esi
40 cmpw $EDD_CL_OFF, (%si) # edd=of
41 jz do_edd_off
42 cmpw $EDD_CL_SKIP, (%si) # edd=sk
43 jz do_edd_skipmbr
44 jmp done_cl
45do_edd_skipmbr:
46 popl %esi
47 jmp edd_start
48do_edd_off:
49 popl %esi
50 jmp edd_done
51done_cl:
52 popl %esi
53
54
55# Read the first sector of each BIOS disk device and store the 4-byte signature
56edd_mbr_sig_start:
57 movb $0x80, %dl # from device 80
58 movw $EDD_MBR_SIG_BUF, %bx # store buffer ptr in bx
59edd_mbr_sig_read:
60 movl $0xFFFFFFFF, %eax
61 movl %eax, (%bx) # assume failure
62 pushw %bx
63 movb $READ_SECTORS, %ah
64 movb $1, %al # read 1 sector
65 movb $0, %dh # at head 0
66 movw $1, %cx # cylinder 0, sector 0
67 pushw %es
68 pushw %ds
69 popw %es
70 movw $EDDBUF, %bx # disk's data goes into EDDBUF
71 pushw %dx # work around buggy BIOSes
72 stc # work around buggy BIOSes
73 int $0x13
74 sti # work around buggy BIOSes
75 popw %dx
76 popw %es
77 popw %bx
78 jc edd_mbr_sig_done # on failure, we're done.
79 movl (EDDBUF+EDD_MBR_SIG_OFFSET), %eax # read sig out of the MBR
80 movl %eax, (%bx) # store success
81 incb (EDD_MBR_SIG_NR_BUF) # note that we stored something
82 incb %dl # increment to next device
83 addw $4, %bx # increment sig buffer ptr
84 cmpb $EDD_MBR_SIG_MAX, (EDD_MBR_SIG_NR_BUF) # Out of space?
85 jb edd_mbr_sig_read # keep looping
86edd_mbr_sig_done:
87
88# Do the BIOS Enhanced Disk Drive calls
89# This consists of two calls:
90# int 13h ah=41h "Check Extensions Present"
91# int 13h ah=48h "Get Device Parameters"
92# int 13h ah=08h "Legacy Get Device Parameters"
93#
94# A buffer of size EDDMAXNR*(EDDEXTSIZE+EDDPARMSIZE) is reserved for our use
95# in the boot_params at EDDBUF. The first four bytes of which are
96# used to store the device number, interface support map and version
97# results from fn41. The next four bytes are used to store the legacy
98# cylinders, heads, and sectors from fn08. The following 74 bytes are used to
99# store the results from fn48. Starting from device 80h, fn41, then fn48
100# are called and their results stored in EDDBUF+n*(EDDEXTSIZE+EDDPARMIZE).
101# Then the pointer is incremented to store the data for the next call.
102# This repeats until either a device doesn't exist, or until EDDMAXNR
103# devices have been stored.
104# The one tricky part is that ds:si always points EDDEXTSIZE bytes into
105# the structure, and the fn41 and fn08 results are stored at offsets
106# from there. This removes the need to increment the pointer for
107# every store, and leaves it ready for the fn48 call.
108# A second one-byte buffer, EDDNR, in the boot_params stores
109# the number of BIOS devices which exist, up to EDDMAXNR.
110# In setup.c, copy_edd() stores both boot_params buffers away
111# for later use, as they would get overwritten otherwise.
112# This code is sensitive to the size of the structs in edd.h
113edd_start:
114 # %ds points to the bootsector
115 # result buffer for fn48
116 movw $EDDBUF+EDDEXTSIZE, %si # in ds:si, fn41 results
117 # kept just before that
118 movb $0x80, %dl # BIOS device 0x80
119
120edd_check_ext:
121 movb $CHECKEXTENSIONSPRESENT, %ah # Function 41
122 movw $EDDMAGIC1, %bx # magic
123 int $0x13 # make the call
124 jc edd_done # no more BIOS devices
125
126 cmpw $EDDMAGIC2, %bx # is magic right?
127 jne edd_next # nope, next...
128
129 movb %dl, %ds:-8(%si) # store device number
130 movb %ah, %ds:-7(%si) # store version
131 movw %cx, %ds:-6(%si) # store extensions
132 incb (EDDNR) # note that we stored something
133
134edd_get_device_params:
135 movw $EDDPARMSIZE, %ds:(%si) # put size
136 movw $0x0, %ds:2(%si) # work around buggy BIOSes
137 movb $GETDEVICEPARAMETERS, %ah # Function 48
138 int $0x13 # make the call
139 # Don't check for fail return
140 # it doesn't matter.
141edd_get_legacy_chs:
142 xorw %ax, %ax
143 movw %ax, %ds:-4(%si)
144 movw %ax, %ds:-2(%si)
145 # Ralf Brown's Interrupt List says to set ES:DI to
146 # 0000h:0000h "to guard against BIOS bugs"
147 pushw %es
148 movw %ax, %es
149 movw %ax, %di
150 pushw %dx # legacy call clobbers %dl
151 movb $LEGACYGETDEVICEPARAMETERS, %ah # Function 08
152 int $0x13 # make the call
153 jc edd_legacy_done # failed
154 movb %cl, %al # Low 6 bits are max
155 andb $0x3F, %al # sector number
156 movb %al, %ds:-1(%si) # Record max sect
157 movb %dh, %ds:-2(%si) # Record max head number
158 movb %ch, %al # Low 8 bits of max cyl
159 shr $6, %cl
160 movb %cl, %ah # High 2 bits of max cyl
161 movw %ax, %ds:-4(%si)
162
163edd_legacy_done:
164 popw %dx
165 popw %es
166 movw %si, %ax # increment si
167 addw $EDDPARMSIZE+EDDEXTSIZE, %ax
168 movw %ax, %si
169
170edd_next:
171 incb %dl # increment to next device
172 cmpb $EDDMAXNR, (EDDNR) # Out of space?
173 jb edd_check_ext # keep looping
174
175edd_done:
176#endif
diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh
new file mode 100644
index 000000000000..90f2452b3b9e
--- /dev/null
+++ b/arch/i386/boot/install.sh
@@ -0,0 +1,40 @@
1#!/bin/sh
2#
3# arch/i386/boot/install.sh
4#
5# This file is subject to the terms and conditions of the GNU General Public
6# License. See the file "COPYING" in the main directory of this archive
7# for more details.
8#
9# Copyright (C) 1995 by Linus Torvalds
10#
11# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
12#
13# "make install" script for i386 architecture
14#
15# Arguments:
16# $1 - kernel version
17# $2 - kernel image file
18# $3 - kernel map file
19# $4 - default install path (blank if root directory)
20#
21
22# User may have a custom install script
23
24if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
25if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
26
27# Default install - same as make zlilo
28
29if [ -f $4/vmlinuz ]; then
30 mv $4/vmlinuz $4/vmlinuz.old
31fi
32
33if [ -f $4/System.map ]; then
34 mv $4/System.map $4/System.old
35fi
36
37cat $2 > $4/vmlinuz
38cp $3 $4/System.map
39
40if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
diff --git a/arch/i386/boot/mtools.conf.in b/arch/i386/boot/mtools.conf.in
new file mode 100644
index 000000000000..efd6d2490c1d
--- /dev/null
+++ b/arch/i386/boot/mtools.conf.in
@@ -0,0 +1,17 @@
1#
2# mtools configuration file for "make (b)zdisk"
3#
4
5# Actual floppy drive
6drive a:
7 file="/dev/fd0"
8
9# 1.44 MB floppy disk image
10drive v:
11 file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
12
13# 2.88 MB floppy disk image (mostly for virtual uses)
14drive w:
15 file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
16
17
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
new file mode 100644
index 000000000000..a934ab32bf8e
--- /dev/null
+++ b/arch/i386/boot/setup.S
@@ -0,0 +1,1028 @@
1/*
2 * setup.S Copyright (C) 1991, 1992 Linus Torvalds
3 *
4 * setup.s is responsible for getting the system data from the BIOS,
5 * and putting them into the appropriate places in system memory.
6 * both setup.s and system has been loaded by the bootblock.
7 *
8 * This code asks the bios for memory/disk/other parameters, and
9 * puts them in a "safe" place: 0x90000-0x901FF, ie where the
10 * boot-block used to be. It is then up to the protected mode
11 * system to read them from there before the area is overwritten
12 * for buffer-blocks.
13 *
14 * Move PS/2 aux init code to psaux.c
15 * (troyer@saifr00.cfsat.Honeywell.COM) 03Oct92
16 *
17 * some changes and additional features by Christoph Niemann,
18 * March 1993/June 1994 (Christoph.Niemann@linux.org)
19 *
20 * add APM BIOS checking by Stephen Rothwell, May 1994
21 * (sfr@canb.auug.org.au)
22 *
23 * High load stuff, initrd support and position independency
24 * by Hans Lermen & Werner Almesberger, February 1996
25 * <lermen@elserv.ffm.fgan.de>, <almesber@lrc.epfl.ch>
26 *
27 * Video handling moved to video.S by Martin Mares, March 1996
28 * <mj@k332.feld.cvut.cz>
29 *
30 * Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
31 * parsons) to avoid loadlin confusion, July 1997
32 *
33 * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
34 * <stiker@northlink.com>
35 *
36 * Fix to work around buggy BIOSes which dont use carry bit correctly
37 * and/or report extended memory in CX/DX for e801h memory size detection
38 * call. As a result the kernel got wrong figures. The int15/e801h docs
39 * from Ralf Brown interrupt list seem to indicate AX/BX should be used
40 * anyway. So to avoid breaking many machines (presumably there was a reason
41 * to orginally use CX/DX instead of AX/BX), we do a kludge to see
42 * if CX/DX have been changed in the e801 call and if so use AX/BX .
43 * Michael Miller, April 2001 <michaelm@mjmm.org>
44 *
45 * New A20 code ported from SYSLINUX by H. Peter Anvin. AMD Elan bugfixes
46 * by Robert Schwebel, December 2001 <robert@schwebel.de>
47 */
48
49#include <linux/config.h>
50#include <asm/segment.h>
51#include <linux/version.h>
52#include <linux/compile.h>
53#include <asm/boot.h>
54#include <asm/e820.h>
55#include <asm/page.h>
56
57/* Signature words to ensure LILO loaded us right */
58#define SIG1 0xAA55
59#define SIG2 0x5A5A
60
61INITSEG = DEF_INITSEG # 0x9000, we move boot here, out of the way
62SYSSEG = DEF_SYSSEG # 0x1000, system loaded at 0x10000 (65536).
63SETUPSEG = DEF_SETUPSEG # 0x9020, this is the current segment
64 # ... and the former contents of CS
65
66DELTA_INITSEG = SETUPSEG - INITSEG # 0x0020
67
68.code16
69.globl begtext, begdata, begbss, endtext, enddata, endbss
70
71.text
72begtext:
73.data
74begdata:
75.bss
76begbss:
77.text
78
79start:
80 jmp trampoline
81
82# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
83
84 .ascii "HdrS" # header signature
85 .word 0x0203 # header version number (>= 0x0105)
86 # or else old loadlin-1.5 will fail)
87realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
88start_sys_seg: .word SYSSEG
89 .word kernel_version # pointing to kernel version string
90 # above section of header is compatible
91 # with loadlin-1.5 (header v1.5). Don't
92 # change it.
93
94type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin,
95 # Bootlin, SYSLX, bootsect...)
96 # See Documentation/i386/boot.txt for
97 # assigned ids
98
99# flags, unused bits must be zero (RFU) bit within loadflags
100loadflags:
101LOADED_HIGH = 1 # If set, the kernel is loaded high
102CAN_USE_HEAP = 0x80 # If set, the loader also has set
103 # heap_end_ptr to tell how much
104 # space behind setup.S can be used for
105 # heap purposes.
106 # Only the loader knows what is free
107#ifndef __BIG_KERNEL__
108 .byte 0
109#else
110 .byte LOADED_HIGH
111#endif
112
113setup_move_size: .word 0x8000 # size to move, when setup is not
114 # loaded at 0x90000. We will move setup
115 # to 0x90000 then just before jumping
116 # into the kernel. However, only the
117 # loader knows how much data behind
118 # us also needs to be loaded.
119
120code32_start: # here loaders can put a different
121 # start address for 32-bit code.
122#ifndef __BIG_KERNEL__
123 .long 0x1000 # 0x1000 = default for zImage
124#else
125 .long 0x100000 # 0x100000 = default for big kernel
126#endif
127
128ramdisk_image: .long 0 # address of loaded ramdisk image
129 # Here the loader puts the 32-bit
130 # address where it loaded the image.
131 # This only will be read by the kernel.
132
133ramdisk_size: .long 0 # its size in bytes
134
135bootsect_kludge:
136 .long 0 # obsolete
137
138heap_end_ptr: .word modelist+1024 # (Header version 0x0201 or later)
139 # space from here (exclusive) down to
140 # end of setup code can be used by setup
141 # for local heap purposes.
142
143pad1: .word 0
144cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
145 # If nonzero, a 32-bit pointer
146 # to the kernel command line.
147 # The command line should be
148 # located between the start of
149 # setup and the end of low
150 # memory (0xa0000), or it may
151 # get overwritten before it
152 # gets read. If this field is
153 # used, there is no longer
154 # anything magical about the
155 # 0x90000 segment; the setup
156 # can be located anywhere in
157 # low memory 0x10000 or higher.
158
159ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
160 # (Header version 0x0203 or later)
161 # The highest safe address for
162 # the contents of an initrd
163
164trampoline: call start_of_setup
165 .align 16
166 # The offset at this point is 0x240
167 .space (0x7ff-0x240+1) # E820 & EDD space (ending at 0x7ff)
168# End of setup header #####################################################
169
170start_of_setup:
171# Bootlin depends on this being done early
172 movw $0x01500, %ax
173 movb $0x81, %dl
174 int $0x13
175
176#ifdef SAFE_RESET_DISK_CONTROLLER
177# Reset the disk controller.
178 movw $0x0000, %ax
179 movb $0x80, %dl
180 int $0x13
181#endif
182
183# Set %ds = %cs, we know that SETUPSEG = %cs at this point
184 movw %cs, %ax # aka SETUPSEG
185 movw %ax, %ds
186# Check signature at end of setup
187 cmpw $SIG1, setup_sig1
188 jne bad_sig
189
190 cmpw $SIG2, setup_sig2
191 jne bad_sig
192
193 jmp good_sig1
194
195# Routine to print asciiz string at ds:si
196prtstr:
197 lodsb
198 andb %al, %al
199 jz fin
200
201 call prtchr
202 jmp prtstr
203
204fin: ret
205
206# Space printing
207prtsp2: call prtspc # Print double space
208prtspc: movb $0x20, %al # Print single space (note: fall-thru)
209
210# Part of above routine, this one just prints ascii al
211prtchr: pushw %ax
212 pushw %cx
213 movw $7,%bx
214 movw $0x01, %cx
215 movb $0x0e, %ah
216 int $0x10
217 popw %cx
218 popw %ax
219 ret
220
221beep: movb $0x07, %al
222 jmp prtchr
223
224no_sig_mess: .string "No setup signature found ..."
225
226good_sig1:
227 jmp good_sig
228
229# We now have to find the rest of the setup code/data
230bad_sig:
231 movw %cs, %ax # SETUPSEG
232 subw $DELTA_INITSEG, %ax # INITSEG
233 movw %ax, %ds
234 xorb %bh, %bh
235 movb (497), %bl # get setup sect from bootsect
236 subw $4, %bx # LILO loads 4 sectors of setup
237 shlw $8, %bx # convert to words (1sect=2^8 words)
238 movw %bx, %cx
239 shrw $3, %bx # convert to segment
240 addw $SYSSEG, %bx
241 movw %bx, %cs:start_sys_seg
242# Move rest of setup code/data to here
243 movw $2048, %di # four sectors loaded by LILO
244 subw %si, %si
245 pushw %cs
246 popw %es
247 movw $SYSSEG, %ax
248 movw %ax, %ds
249 rep
250 movsw
251 movw %cs, %ax # aka SETUPSEG
252 movw %ax, %ds
253 cmpw $SIG1, setup_sig1
254 jne no_sig
255
256 cmpw $SIG2, setup_sig2
257 jne no_sig
258
259 jmp good_sig
260
261no_sig:
262 lea no_sig_mess, %si
263 call prtstr
264
265no_sig_loop:
266 hlt
267 jmp no_sig_loop
268
269good_sig:
270 movw %cs, %ax # aka SETUPSEG
271 subw $DELTA_INITSEG, %ax # aka INITSEG
272 movw %ax, %ds
273# Check if an old loader tries to load a big-kernel
274 testb $LOADED_HIGH, %cs:loadflags # Do we have a big kernel?
275 jz loader_ok # No, no danger for old loaders.
276
277 cmpb $0, %cs:type_of_loader # Do we have a loader that
278 # can deal with us?
279 jnz loader_ok # Yes, continue.
280
281 pushw %cs # No, we have an old loader,
282 popw %ds # die.
283 lea loader_panic_mess, %si
284 call prtstr
285
286 jmp no_sig_loop
287
288loader_panic_mess: .string "Wrong loader, giving up..."
289
290loader_ok:
291# Get memory size (extended mem, kB)
292
293 xorl %eax, %eax
294 movl %eax, (0x1e0)
295#ifndef STANDARD_MEMORY_BIOS_CALL
296 movb %al, (E820NR)
297# Try three different memory detection schemes. First, try
298# e820h, which lets us assemble a memory map, then try e801h,
299# which returns a 32-bit memory size, and finally 88h, which
300# returns 0-64m
301
302# method E820H:
303# the memory map from hell. e820h returns memory classified into
304# a whole bunch of different types, and allows memory holes and
305# everything. We scan through this memory map and build a list
306# of the first 32 memory areas, which we return at [E820MAP].
307# This is documented at http://www.acpi.info/, in the ACPI 2.0 specification.
308
309#define SMAP 0x534d4150
310
311meme820:
312 xorl %ebx, %ebx # continuation counter
313 movw $E820MAP, %di # point into the whitelist
314 # so we can have the bios
315 # directly write into it.
316
317jmpe820:
318 movl $0x0000e820, %eax # e820, upper word zeroed
319 movl $SMAP, %edx # ascii 'SMAP'
320 movl $20, %ecx # size of the e820rec
321 pushw %ds # data record.
322 popw %es
323 int $0x15 # make the call
324 jc bail820 # fall to e801 if it fails
325
326 cmpl $SMAP, %eax # check the return is `SMAP'
327 jne bail820 # fall to e801 if it fails
328
329# cmpl $1, 16(%di) # is this usable memory?
330# jne again820
331
332 # If this is usable memory, we save it by simply advancing %di by
333 # sizeof(e820rec).
334 #
335good820:
336 movb (E820NR), %al # up to 32 entries
337 cmpb $E820MAX, %al
338 jnl bail820
339
340 incb (E820NR)
341 movw %di, %ax
342 addw $20, %ax
343 movw %ax, %di
344again820:
345 cmpl $0, %ebx # check to see if
346 jne jmpe820 # %ebx is set to EOF
347bail820:
348
349
350# method E801H:
351# memory size is in 1k chunksizes, to avoid confusing loadlin.
352# we store the 0xe801 memory size in a completely different place,
353# because it will most likely be longer than 16 bits.
354# (use 1e0 because that's what Larry Augustine uses in his
355# alternative new memory detection scheme, and it's sensible
356# to write everything into the same place.)
357
358meme801:
359 stc # fix to work around buggy
360 xorw %cx,%cx # BIOSes which dont clear/set
361 xorw %dx,%dx # carry on pass/error of
362 # e801h memory size call
363 # or merely pass cx,dx though
364 # without changing them.
365 movw $0xe801, %ax
366 int $0x15
367 jc mem88
368
369 cmpw $0x0, %cx # Kludge to handle BIOSes
370 jne e801usecxdx # which report their extended
371 cmpw $0x0, %dx # memory in AX/BX rather than
372 jne e801usecxdx # CX/DX. The spec I have read
373 movw %ax, %cx # seems to indicate AX/BX
374 movw %bx, %dx # are more reasonable anyway...
375
376e801usecxdx:
377 andl $0xffff, %edx # clear sign extend
378 shll $6, %edx # and go from 64k to 1k chunks
379 movl %edx, (0x1e0) # store extended memory size
380 andl $0xffff, %ecx # clear sign extend
381 addl %ecx, (0x1e0) # and add lower memory into
382 # total size.
383
384# Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
385# 64mb, depending on the bios) in ax.
386mem88:
387
388#endif
389 movb $0x88, %ah
390 int $0x15
391 movw %ax, (2)
392
393# Set the keyboard repeat rate to the max
394 movw $0x0305, %ax
395 xorw %bx, %bx
396 int $0x16
397
398# Check for video adapter and its parameters and allow the
399# user to browse video modes.
400 call video # NOTE: we need %ds pointing
401 # to bootsector
402
403# Get hd0 data...
404 xorw %ax, %ax
405 movw %ax, %ds
406 ldsw (4 * 0x41), %si
407 movw %cs, %ax # aka SETUPSEG
408 subw $DELTA_INITSEG, %ax # aka INITSEG
409 pushw %ax
410 movw %ax, %es
411 movw $0x0080, %di
412 movw $0x10, %cx
413 pushw %cx
414 cld
415 rep
416 movsb
417# Get hd1 data...
418 xorw %ax, %ax
419 movw %ax, %ds
420 ldsw (4 * 0x46), %si
421 popw %cx
422 popw %es
423 movw $0x0090, %di
424 rep
425 movsb
426# Check that there IS a hd1 :-)
427 movw $0x01500, %ax
428 movb $0x81, %dl
429 int $0x13
430 jc no_disk1
431
432 cmpb $3, %ah
433 je is_disk1
434
435no_disk1:
436 movw %cs, %ax # aka SETUPSEG
437 subw $DELTA_INITSEG, %ax # aka INITSEG
438 movw %ax, %es
439 movw $0x0090, %di
440 movw $0x10, %cx
441 xorw %ax, %ax
442 cld
443 rep
444 stosb
445is_disk1:
446# check for Micro Channel (MCA) bus
447 movw %cs, %ax # aka SETUPSEG
448 subw $DELTA_INITSEG, %ax # aka INITSEG
449 movw %ax, %ds
450 xorw %ax, %ax
451 movw %ax, (0xa0) # set table length to 0
452 movb $0xc0, %ah
453 stc
454 int $0x15 # moves feature table to es:bx
455 jc no_mca
456
457 pushw %ds
458 movw %es, %ax
459 movw %ax, %ds
460 movw %cs, %ax # aka SETUPSEG
461 subw $DELTA_INITSEG, %ax # aka INITSEG
462 movw %ax, %es
463 movw %bx, %si
464 movw $0xa0, %di
465 movw (%si), %cx
466 addw $2, %cx # table length is a short
467 cmpw $0x10, %cx
468 jc sysdesc_ok
469
470 movw $0x10, %cx # we keep only first 16 bytes
471sysdesc_ok:
472 rep
473 movsb
474 popw %ds
475no_mca:
476#ifdef CONFIG_X86_VOYAGER
477 movb $0xff, 0x40 # flag on config found
478 movb $0xc0, %al
479 mov $0xff, %ah
480 int $0x15 # put voyager config info at es:di
481 jc no_voyager
482 movw $0x40, %si # place voyager info in apm table
483 cld
484 movw $7, %cx
485voyager_rep:
486 movb %es:(%di), %al
487 movb %al,(%si)
488 incw %di
489 incw %si
490 decw %cx
491 jnz voyager_rep
492no_voyager:
493#endif
494# Check for PS/2 pointing device
495 movw %cs, %ax # aka SETUPSEG
496 subw $DELTA_INITSEG, %ax # aka INITSEG
497 movw %ax, %ds
498 movw $0, (0x1ff) # default is no pointing device
499 int $0x11 # int 0x11: equipment list
500 testb $0x04, %al # check if mouse installed
501 jz no_psmouse
502
503 movw $0xAA, (0x1ff) # device present
504no_psmouse:
505
506#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
507 movl $0x0000E980, %eax # IST Support
508 movl $0x47534943, %edx # Request value
509 int $0x15
510
511 movl %eax, (96)
512 movl %ebx, (100)
513 movl %ecx, (104)
514 movl %edx, (108)
515#endif
516
517#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
518# Then check for an APM BIOS...
519 # %ds points to the bootsector
520 movw $0, 0x40 # version = 0 means no APM BIOS
521 movw $0x05300, %ax # APM BIOS installation check
522 xorw %bx, %bx
523 int $0x15
524 jc done_apm_bios # Nope, no APM BIOS
525
526 cmpw $0x0504d, %bx # Check for "PM" signature
527 jne done_apm_bios # No signature, no APM BIOS
528
529 andw $0x02, %cx # Is 32 bit supported?
530 je done_apm_bios # No 32-bit, no (good) APM BIOS
531
532 movw $0x05304, %ax # Disconnect first just in case
533 xorw %bx, %bx
534 int $0x15 # ignore return code
535 movw $0x05303, %ax # 32 bit connect
536 xorl %ebx, %ebx
537 xorw %cx, %cx # paranoia :-)
538 xorw %dx, %dx # ...
539 xorl %esi, %esi # ...
540 xorw %di, %di # ...
541 int $0x15
542 jc no_32_apm_bios # Ack, error.
543
544 movw %ax, (66) # BIOS code segment
545 movl %ebx, (68) # BIOS entry point offset
546 movw %cx, (72) # BIOS 16 bit code segment
547 movw %dx, (74) # BIOS data segment
548 movl %esi, (78) # BIOS code segment lengths
549 movw %di, (82) # BIOS data segment length
550# Redo the installation check as the 32 bit connect
551# modifies the flags returned on some BIOSs
552 movw $0x05300, %ax # APM BIOS installation check
553 xorw %bx, %bx
554 xorw %cx, %cx # paranoia
555 int $0x15
556 jc apm_disconnect # error -> shouldn't happen
557
558 cmpw $0x0504d, %bx # check for "PM" signature
559 jne apm_disconnect # no sig -> shouldn't happen
560
561 movw %ax, (64) # record the APM BIOS version
562 movw %cx, (76) # and flags
563 jmp done_apm_bios
564
565apm_disconnect: # Tidy up
566 movw $0x05304, %ax # Disconnect
567 xorw %bx, %bx
568 int $0x15 # ignore return code
569
570 jmp done_apm_bios
571
572no_32_apm_bios:
573 andw $0xfffd, (76) # remove 32 bit support bit
574done_apm_bios:
575#endif
576
577#include "edd.S"
578
579# Now we want to move to protected mode ...
580 cmpw $0, %cs:realmode_swtch
581 jz rmodeswtch_normal
582
583 lcall *%cs:realmode_swtch
584
585 jmp rmodeswtch_end
586
587rmodeswtch_normal:
588 pushw %cs
589 call default_switch
590
591rmodeswtch_end:
592# we get the code32 start address and modify the below 'jmpi'
593# (loader may have changed it)
594 movl %cs:code32_start, %eax
595 movl %eax, %cs:code32
596
597# Now we move the system to its rightful place ... but we check if we have a
598# big-kernel. In that case we *must* not move it ...
599 testb $LOADED_HIGH, %cs:loadflags
600 jz do_move0 # .. then we have a normal low
601 # loaded zImage
602 # .. or else we have a high
603 # loaded bzImage
604 jmp end_move # ... and we skip moving
605
606do_move0:
607 movw $0x100, %ax # start of destination segment
608 movw %cs, %bp # aka SETUPSEG
609 subw $DELTA_INITSEG, %bp # aka INITSEG
610 movw %cs:start_sys_seg, %bx # start of source segment
611 cld
612do_move:
613 movw %ax, %es # destination segment
614 incb %ah # instead of add ax,#0x100
615 movw %bx, %ds # source segment
616 addw $0x100, %bx
617 subw %di, %di
618 subw %si, %si
619 movw $0x800, %cx
620 rep
621 movsw
622 cmpw %bp, %bx # assume start_sys_seg > 0x200,
623 # so we will perhaps read one
624 # page more than needed, but
625 # never overwrite INITSEG
626 # because destination is a
627 # minimum one page below source
628 jb do_move
629
630end_move:
631# then we load the segment descriptors
632 movw %cs, %ax # aka SETUPSEG
633 movw %ax, %ds
634
635# Check whether we need to be downward compatible with version <=201
636 cmpl $0, cmd_line_ptr
637 jne end_move_self # loader uses version >=202 features
638 cmpb $0x20, type_of_loader
639 je end_move_self # bootsect loader, we know of it
640
641# Boot loader doesnt support boot protocol version 2.02.
642# If we have our code not at 0x90000, we need to move it there now.
643# We also then need to move the params behind it (commandline)
644# Because we would overwrite the code on the current IP, we move
645# it in two steps, jumping high after the first one.
646 movw %cs, %ax
647 cmpw $SETUPSEG, %ax
648 je end_move_self
649
650 cli # make sure we really have
651 # interrupts disabled !
652 # because after this the stack
653 # should not be used
654 subw $DELTA_INITSEG, %ax # aka INITSEG
655 movw %ss, %dx
656 cmpw %ax, %dx
657 jb move_self_1
658
659 addw $INITSEG, %dx
660 subw %ax, %dx # this will go into %ss after
661 # the move
662move_self_1:
663 movw %ax, %ds
664 movw $INITSEG, %ax # real INITSEG
665 movw %ax, %es
666 movw %cs:setup_move_size, %cx
667 std # we have to move up, so we use
668 # direction down because the
669 # areas may overlap
670 movw %cx, %di
671 decw %di
672 movw %di, %si
673 subw $move_self_here+0x200, %cx
674 rep
675 movsb
676 ljmp $SETUPSEG, $move_self_here
677
678move_self_here:
679 movw $move_self_here+0x200, %cx
680 rep
681 movsb
682 movw $SETUPSEG, %ax
683 movw %ax, %ds
684 movw %dx, %ss
685end_move_self: # now we are at the right place
686
687#
688# Enable A20. This is at the very best an annoying procedure.
689# A20 code ported from SYSLINUX 1.52-1.63 by H. Peter Anvin.
690# AMD Elan bug fix by Robert Schwebel.
691#
692
693#if defined(CONFIG_X86_ELAN)
694 movb $0x02, %al # alternate A20 gate
695 outb %al, $0x92 # this works on SC410/SC520
696a20_elan_wait:
697 call a20_test
698 jz a20_elan_wait
699 jmp a20_done
700#endif
701
702
703A20_TEST_LOOPS = 32 # Iterations per wait
704A20_ENABLE_LOOPS = 255 # Total loops to try
705
706
707#ifndef CONFIG_X86_VOYAGER
708a20_try_loop:
709
710 # First, see if we are on a system with no A20 gate.
711a20_none:
712 call a20_test
713 jnz a20_done
714
715 # Next, try the BIOS (INT 0x15, AX=0x2401)
716a20_bios:
717 movw $0x2401, %ax
718 pushfl # Be paranoid about flags
719 int $0x15
720 popfl
721
722 call a20_test
723 jnz a20_done
724
725 # Try enabling A20 through the keyboard controller
726#endif /* CONFIG_X86_VOYAGER */
727a20_kbc:
728 call empty_8042
729
730#ifndef CONFIG_X86_VOYAGER
731 call a20_test # Just in case the BIOS worked
732 jnz a20_done # but had a delayed reaction.
733#endif
734
735 movb $0xD1, %al # command write
736 outb %al, $0x64
737 call empty_8042
738
739 movb $0xDF, %al # A20 on
740 outb %al, $0x60
741 call empty_8042
742
743#ifndef CONFIG_X86_VOYAGER
744 # Wait until a20 really *is* enabled; it can take a fair amount of
745 # time on certain systems; Toshiba Tecras are known to have this
746 # problem.
747a20_kbc_wait:
748 xorw %cx, %cx
749a20_kbc_wait_loop:
750 call a20_test
751 jnz a20_done
752 loop a20_kbc_wait_loop
753
754 # Final attempt: use "configuration port A"
755a20_fast:
756 inb $0x92, %al # Configuration Port A
757 orb $0x02, %al # "fast A20" version
758 andb $0xFE, %al # don't accidentally reset
759 outb %al, $0x92
760
761 # Wait for configuration port A to take effect
762a20_fast_wait:
763 xorw %cx, %cx
764a20_fast_wait_loop:
765 call a20_test
766 jnz a20_done
767 loop a20_fast_wait_loop
768
769 # A20 is still not responding. Try frobbing it again.
770 #
771 decb (a20_tries)
772 jnz a20_try_loop
773
774 movw $a20_err_msg, %si
775 call prtstr
776
777a20_die:
778 hlt
779 jmp a20_die
780
781a20_tries:
782 .byte A20_ENABLE_LOOPS
783
784a20_err_msg:
785 .ascii "linux: fatal error: A20 gate not responding!"
786 .byte 13, 10, 0
787
788 # If we get here, all is good
789a20_done:
790
791#endif /* CONFIG_X86_VOYAGER */
792# set up gdt and idt
793 lidt idt_48 # load idt with 0,0
794 xorl %eax, %eax # Compute gdt_base
795 movw %ds, %ax # (Convert %ds:gdt to a linear ptr)
796 shll $4, %eax
797 addl $gdt, %eax
798 movl %eax, (gdt_48+2)
799 lgdt gdt_48 # load gdt with whatever is
800 # appropriate
801
802# make sure any possible coprocessor is properly reset..
803 xorw %ax, %ax
804 outb %al, $0xf0
805 call delay
806
807 outb %al, $0xf1
808 call delay
809
810# well, that went ok, I hope. Now we mask all interrupts - the rest
811# is done in init_IRQ().
812 movb $0xFF, %al # mask all interrupts for now
813 outb %al, $0xA1
814 call delay
815
816 movb $0xFB, %al # mask all irq's but irq2 which
817 outb %al, $0x21 # is cascaded
818
819# Well, that certainly wasn't fun :-(. Hopefully it works, and we don't
820# need no steenking BIOS anyway (except for the initial loading :-).
821# The BIOS-routine wants lots of unnecessary data, and it's less
822# "interesting" anyway. This is how REAL programmers do it.
823#
824# Well, now's the time to actually move into protected mode. To make
825# things as simple as possible, we do no register set-up or anything,
826# we let the gnu-compiled 32-bit programs do that. We just jump to
827# absolute address 0x1000 (or the loader supplied one),
828# in 32-bit protected mode.
829#
830# Note that the short jump isn't strictly needed, although there are
831# reasons why it might be a good idea. It won't hurt in any case.
832 movw $1, %ax # protected mode (PE) bit
833 lmsw %ax # This is it!
834 jmp flush_instr
835
836flush_instr:
837 xorw %bx, %bx # Flag to indicate a boot
838 xorl %esi, %esi # Pointer to real-mode code
839 movw %cs, %si
840 subw $DELTA_INITSEG, %si
841 shll $4, %esi # Convert to 32-bit pointer
842
843# jump to startup_32 in arch/i386/boot/compressed/head.S
844#
845# NOTE: For high loaded big kernels we need a
846# jmpi 0x100000,__BOOT_CS
847#
848# but we yet haven't reloaded the CS register, so the default size
849# of the target offset still is 16 bit.
850# However, using an operand prefix (0x66), the CPU will properly
851# take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
852# Manual, Mixing 16-bit and 32-bit code, page 16-6)
853
854 .byte 0x66, 0xea # prefix + jmpi-opcode
855code32: .long 0x1000 # will be set to 0x100000
856 # for big kernels
857 .word __BOOT_CS
858
859# Here's a bunch of information about your current kernel..
860kernel_version: .ascii UTS_RELEASE
861 .ascii " ("
862 .ascii LINUX_COMPILE_BY
863 .ascii "@"
864 .ascii LINUX_COMPILE_HOST
865 .ascii ") "
866 .ascii UTS_VERSION
867 .byte 0
868
869# This is the default real mode switch routine.
870# to be called just before protected mode transition
871default_switch:
872 cli # no interrupts allowed !
873 movb $0x80, %al # disable NMI for bootup
874 # sequence
875 outb %al, $0x70
876 lret
877
878
879#ifndef CONFIG_X86_VOYAGER
880# This routine tests whether or not A20 is enabled. If so, it
881# exits with zf = 0.
882#
883# The memory address used, 0x200, is the int $0x80 vector, which
884# should be safe.
885
886A20_TEST_ADDR = 4*0x80
887
888a20_test:
889 pushw %cx
890 pushw %ax
891 xorw %cx, %cx
892 movw %cx, %fs # Low memory
893 decw %cx
894 movw %cx, %gs # High memory area
895 movw $A20_TEST_LOOPS, %cx
896 movw %fs:(A20_TEST_ADDR), %ax
897 pushw %ax
898a20_test_wait:
899 incw %ax
900 movw %ax, %fs:(A20_TEST_ADDR)
901 call delay # Serialize and make delay constant
902 cmpw %gs:(A20_TEST_ADDR+0x10), %ax
903 loope a20_test_wait
904
905 popw %fs:(A20_TEST_ADDR)
906 popw %ax
907 popw %cx
908 ret
909
910#endif /* CONFIG_X86_VOYAGER */
911
912# This routine checks that the keyboard command queue is empty
913# (after emptying the output buffers)
914#
915# Some machines have delusions that the keyboard buffer is always full
916# with no keyboard attached...
917#
918# If there is no keyboard controller, we will usually get 0xff
919# to all the reads. With each IO taking a microsecond and
920# a timeout of 100,000 iterations, this can take about half a
921# second ("delay" == outb to port 0x80). That should be ok,
922# and should also be plenty of time for a real keyboard controller
923# to empty.
924#
925
926empty_8042:
927 pushl %ecx
928 movl $100000, %ecx
929
930empty_8042_loop:
931 decl %ecx
932 jz empty_8042_end_loop
933
934 call delay
935
936 inb $0x64, %al # 8042 status port
937 testb $1, %al # output buffer?
938 jz no_output
939
940 call delay
941 inb $0x60, %al # read it
942 jmp empty_8042_loop
943
944no_output:
945 testb $2, %al # is input buffer full?
946 jnz empty_8042_loop # yes - loop
947empty_8042_end_loop:
948 popl %ecx
949 ret
950
951# Read the cmos clock. Return the seconds in al
952gettime:
953 pushw %cx
954 movb $0x02, %ah
955 int $0x1a
956 movb %dh, %al # %dh contains the seconds
957 andb $0x0f, %al
958 movb %dh, %ah
959 movb $0x04, %cl
960 shrb %cl, %ah
961 aad
962 popw %cx
963 ret
964
965# Delay is needed after doing I/O
966delay:
967 outb %al,$0x80
968 ret
969
970# Descriptor tables
971#
972# NOTE: The intel manual says gdt should be sixteen bytes aligned for
973# efficiency reasons. However, there are machines which are known not
974# to boot with misaligned GDTs, so alter this at your peril! If you alter
975# GDT_ENTRY_BOOT_CS (in asm/segment.h) remember to leave at least two
976# empty GDT entries (one for NULL and one reserved).
977#
978# NOTE: On some CPUs, the GDT must be 8 byte aligned. This is
979# true for the Voyager Quad CPU card which will not boot without
980# This directive. 16 byte aligment is recommended by intel.
981#
982 .align 16
983gdt:
984 .fill GDT_ENTRY_BOOT_CS,8,0
985
986 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
987 .word 0 # base address = 0
988 .word 0x9A00 # code read/exec
989 .word 0x00CF # granularity = 4096, 386
990 # (+5th nibble of limit)
991
992 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
993 .word 0 # base address = 0
994 .word 0x9200 # data read/write
995 .word 0x00CF # granularity = 4096, 386
996 # (+5th nibble of limit)
997gdt_end:
998 .align 4
999
1000 .word 0 # alignment byte
1001idt_48:
1002 .word 0 # idt limit = 0
1003 .word 0, 0 # idt base = 0L
1004
1005 .word 0 # alignment byte
1006gdt_48:
1007 .word gdt_end - gdt - 1 # gdt limit
1008 .word 0, 0 # gdt base (filled in later)
1009
1010# Include video setup & detection code
1011
1012#include "video.S"
1013
1014# Setup signature -- must be last
1015setup_sig1: .word SIG1
1016setup_sig2: .word SIG2
1017
1018# After this point, there is some free space which is used by the video mode
1019# handling code to store the temporary mode table (not used by the kernel).
1020
1021modelist:
1022
1023.text
1024endtext:
1025.data
1026enddata:
1027.bss
1028endbss:
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
new file mode 100644
index 000000000000..26509b826aed
--- /dev/null
+++ b/arch/i386/boot/tools/build.c
@@ -0,0 +1,184 @@
1/*
2 * $Id: build.c,v 1.5 1997/05/19 12:29:58 mj Exp $
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares
6 */
7
8/*
9 * This file builds a disk-image from three different files:
10 *
11 * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest
12 * - setup: 8086 machine code, sets up system parm
13 * - system: 80386 code for actual system
14 *
15 * It does some checking that all files are of the correct type, and
16 * just writes the result to stdout, removing headers and padding to
17 * the right amount. It also writes some system data to stderr.
18 */
19
20/*
21 * Changes by tytso to allow root device specification
22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
23 * Cross compiling fixes by Gertjan van Wingerde, July 1996
24 * Rewritten by Martin Mares, April 1997
25 */
26
27#include <stdio.h>
28#include <string.h>
29#include <stdlib.h>
30#include <stdarg.h>
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <sys/sysmacros.h>
34#include <unistd.h>
35#include <fcntl.h>
36#include <asm/boot.h>
37
38typedef unsigned char byte;
39typedef unsigned short word;
40typedef unsigned long u32;
41
42#define DEFAULT_MAJOR_ROOT 0
43#define DEFAULT_MINOR_ROOT 0
44
45/* Minimal number of setup sectors (see also bootsect.S) */
46#define SETUP_SECTS 4
47
48byte buf[1024];
49int fd;
50int is_big_kernel;
51
52void die(const char * str, ...)
53{
54 va_list args;
55 va_start(args, str);
56 vfprintf(stderr, str, args);
57 fputc('\n', stderr);
58 exit(1);
59}
60
61void file_open(const char *name)
62{
63 if ((fd = open(name, O_RDONLY, 0)) < 0)
64 die("Unable to open `%s': %m", name);
65}
66
67void usage(void)
68{
69 die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
70}
71
72int main(int argc, char ** argv)
73{
74 unsigned int i, c, sz, setup_sectors;
75 u32 sys_size;
76 byte major_root, minor_root;
77 struct stat sb;
78
79 if (argc > 2 && !strcmp(argv[1], "-b"))
80 {
81 is_big_kernel = 1;
82 argc--, argv++;
83 }
84 if ((argc < 4) || (argc > 5))
85 usage();
86 if (argc > 4) {
87 if (!strcmp(argv[4], "CURRENT")) {
88 if (stat("/", &sb)) {
89 perror("/");
90 die("Couldn't stat /");
91 }
92 major_root = major(sb.st_dev);
93 minor_root = minor(sb.st_dev);
94 } else if (strcmp(argv[4], "FLOPPY")) {
95 if (stat(argv[4], &sb)) {
96 perror(argv[4]);
97 die("Couldn't stat root device.");
98 }
99 major_root = major(sb.st_rdev);
100 minor_root = minor(sb.st_rdev);
101 } else {
102 major_root = 0;
103 minor_root = 0;
104 }
105 } else {
106 major_root = DEFAULT_MAJOR_ROOT;
107 minor_root = DEFAULT_MINOR_ROOT;
108 }
109 fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
110
111 file_open(argv[1]);
112 i = read(fd, buf, sizeof(buf));
113 fprintf(stderr,"Boot sector %d bytes.\n",i);
114 if (i != 512)
115 die("Boot block must be exactly 512 bytes");
116 if (buf[510] != 0x55 || buf[511] != 0xaa)
117 die("Boot block hasn't got boot flag (0xAA55)");
118 buf[508] = minor_root;
119 buf[509] = major_root;
120 if (write(1, buf, 512) != 512)
121 die("Write call failed");
122 close (fd);
123
124 file_open(argv[2]); /* Copy the setup code */
125 for (i=0 ; (c=read(fd, buf, sizeof(buf)))>0 ; i+=c )
126 if (write(1, buf, c) != c)
127 die("Write call failed");
128 if (c != 0)
129 die("read-error on `setup'");
130 close (fd);
131
132 setup_sectors = (i + 511) / 512; /* Pad unused space with zeros */
133 /* for compatibility with ancient versions of LILO. */
134 if (setup_sectors < SETUP_SECTS)
135 setup_sectors = SETUP_SECTS;
136 fprintf(stderr, "Setup is %d bytes.\n", i);
137 memset(buf, 0, sizeof(buf));
138 while (i < setup_sectors * 512) {
139 c = setup_sectors * 512 - i;
140 if (c > sizeof(buf))
141 c = sizeof(buf);
142 if (write(1, buf, c) != c)
143 die("Write call failed");
144 i += c;
145 }
146
147 file_open(argv[3]);
148 if (fstat (fd, &sb))
149 die("Unable to stat `%s': %m", argv[3]);
150 sz = sb.st_size;
151 fprintf (stderr, "System is %d kB\n", sz/1024);
152 sys_size = (sz + 15) / 16;
153 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
154 die("System is too big. Try using bzImage or modules.");
155 while (sz > 0) {
156 int l, n;
157
158 l = (sz > sizeof(buf)) ? sizeof(buf) : sz;
159 if ((n=read(fd, buf, l)) != l) {
160 if (n < 0)
161 die("Error reading %s: %m", argv[3]);
162 else
163 die("%s: Unexpected EOF", argv[3]);
164 }
165 if (write(1, buf, l) != l)
166 die("Write failed");
167 sz -= l;
168 }
169 close(fd);
170
171 if (lseek(1, 497, SEEK_SET) != 497) /* Write sizes to the bootsector */
172 die("Output: seek failed");
173 buf[0] = setup_sectors;
174 if (write(1, buf, 1) != 1)
175 die("Write of setup sector count failed");
176 if (lseek(1, 500, SEEK_SET) != 500)
177 die("Output: seek failed");
178 buf[0] = (sys_size & 0xff);
179 buf[1] = ((sys_size >> 8) & 0xff);
180 if (write(1, buf, 2) != 2)
181 die("Write of image length failed");
182
183 return 0; /* Everything is OK */
184}
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
new file mode 100644
index 000000000000..925d3f5a3824
--- /dev/null
+++ b/arch/i386/boot/video.S
@@ -0,0 +1,2007 @@
1/* video.S
2 *
3 * Display adapter & video mode setup, version 2.13 (14-May-99)
4 *
5 * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
6 * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
7 *
8 * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
9 *
10 * For further information, look at Documentation/svga.txt.
11 *
12 */
13
14#include <linux/config.h> /* for CONFIG_VIDEO_* */
15
16/* Enable autodetection of SVGA adapters and modes. */
17#undef CONFIG_VIDEO_SVGA
18
19/* Enable autodetection of VESA modes */
20#define CONFIG_VIDEO_VESA
21
22/* Enable compacting of mode table */
23#define CONFIG_VIDEO_COMPACT
24
25/* Retain screen contents when switching modes */
26#define CONFIG_VIDEO_RETAIN
27
28/* Enable local mode list */
29#undef CONFIG_VIDEO_LOCAL
30
31/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
32#undef CONFIG_VIDEO_400_HACK
33
34/* Hack that lets you force specific BIOS mode ID and specific dimensions */
35#undef CONFIG_VIDEO_GFX_HACK
36#define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */
37#define VIDEO_GFX_BIOS_BX 0x0102
38#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */
39
40/* This code uses an extended set of video mode numbers. These include:
41 * Aliases for standard modes
42 * NORMAL_VGA (-1)
43 * EXTENDED_VGA (-2)
44 * ASK_VGA (-3)
45 * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
46 * of compatibility when extending the table. These are between 0x00 and 0xff.
47 */
48#define VIDEO_FIRST_MENU 0x0000
49
50/* Standard BIOS video modes (BIOS number + 0x0100) */
51#define VIDEO_FIRST_BIOS 0x0100
52
53/* VESA BIOS video modes (VESA number + 0x0200) */
54#define VIDEO_FIRST_VESA 0x0200
55
56/* Video7 special modes (BIOS number + 0x0900) */
57#define VIDEO_FIRST_V7 0x0900
58
59/* Special video modes */
60#define VIDEO_FIRST_SPECIAL 0x0f00
61#define VIDEO_80x25 0x0f00
62#define VIDEO_8POINT 0x0f01
63#define VIDEO_80x43 0x0f02
64#define VIDEO_80x28 0x0f03
65#define VIDEO_CURRENT_MODE 0x0f04
66#define VIDEO_80x30 0x0f05
67#define VIDEO_80x34 0x0f06
68#define VIDEO_80x60 0x0f07
69#define VIDEO_GFX_HACK 0x0f08
70#define VIDEO_LAST_SPECIAL 0x0f09
71
72/* Video modes given by resolution */
73#define VIDEO_FIRST_RESOLUTION 0x1000
74
75/* The "recalculate timings" flag */
76#define VIDEO_RECALC 0x8000
77
78/* Positions of various video parameters passed to the kernel */
79/* (see also include/linux/tty.h) */
80#define PARAM_CURSOR_POS 0x00
81#define PARAM_VIDEO_PAGE 0x04
82#define PARAM_VIDEO_MODE 0x06
83#define PARAM_VIDEO_COLS 0x07
84#define PARAM_VIDEO_EGA_BX 0x0a
85#define PARAM_VIDEO_LINES 0x0e
86#define PARAM_HAVE_VGA 0x0f
87#define PARAM_FONT_POINTS 0x10
88
89#define PARAM_LFB_WIDTH 0x12
90#define PARAM_LFB_HEIGHT 0x14
91#define PARAM_LFB_DEPTH 0x16
92#define PARAM_LFB_BASE 0x18
93#define PARAM_LFB_SIZE 0x1c
94#define PARAM_LFB_LINELENGTH 0x24
95#define PARAM_LFB_COLORS 0x26
96#define PARAM_VESAPM_SEG 0x2e
97#define PARAM_VESAPM_OFF 0x30
98#define PARAM_LFB_PAGES 0x32
99#define PARAM_VESA_ATTRIB 0x34
100
101/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
102#ifdef CONFIG_VIDEO_RETAIN
103#define DO_STORE call store_screen
104#else
105#define DO_STORE
106#endif /* CONFIG_VIDEO_RETAIN */
107
108# This is the main entry point called by setup.S
109# %ds *must* be pointing to the bootsector
110video: pushw %ds # We use different segments
111 pushw %ds # FS contains original DS
112 popw %fs
113 pushw %cs # DS is equal to CS
114 popw %ds
115 pushw %cs # ES is equal to CS
116 popw %es
117 xorw %ax, %ax
118 movw %ax, %gs # GS is zero
119 cld
120 call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA)
121#ifdef CONFIG_VIDEO_SELECT
122 movw %fs:(0x01fa), %ax # User selected video mode
123 cmpw $ASK_VGA, %ax # Bring up the menu
124 jz vid2
125
126 call mode_set # Set the mode
127 jc vid1
128
129 leaw badmdt, %si # Invalid mode ID
130 call prtstr
131vid2: call mode_menu
132vid1:
133#ifdef CONFIG_VIDEO_RETAIN
134 call restore_screen # Restore screen contents
135#endif /* CONFIG_VIDEO_RETAIN */
136 call store_edid
137#endif /* CONFIG_VIDEO_SELECT */
138 call mode_params # Store mode parameters
139 popw %ds # Restore original DS
140 ret
141
142# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
143basic_detect:
144 movb $0, %fs:(PARAM_HAVE_VGA)
145 movb $0x12, %ah # Check EGA/VGA
146 movb $0x10, %bl
147 int $0x10
148 movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel
149 cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card.
150 je basret
151
152 incb adapter
153 movw $0x1a00, %ax # Check EGA or VGA?
154 int $0x10
155 cmpb $0x1a, %al # 1a means VGA...
156 jne basret # anything else is EGA.
157
158 incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA
159 incb adapter
160basret: ret
161
162# Store the video mode parameters for later usage by the kernel.
163# This is done by asking the BIOS except for the rows/columns
164# parameters in the default 80x25 mode -- these are set directly,
165# because some very obscure BIOSes supply insane values.
166mode_params:
167#ifdef CONFIG_VIDEO_SELECT
168 cmpb $0, graphic_mode
169 jnz mopar_gr
170#endif
171 movb $0x03, %ah # Read cursor position
172 xorb %bh, %bh
173 int $0x10
174 movw %dx, %fs:(PARAM_CURSOR_POS)
175 movb $0x0f, %ah # Read page/mode/width
176 int $0x10
177 movw %bx, %fs:(PARAM_VIDEO_PAGE)
178 movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width
179 cmpb $0x7, %al # MDA/HGA => segment differs
180 jnz mopar0
181
182 movw $0xb000, video_segment
183mopar0: movw %gs:(0x485), %ax # Font size
184 movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA)
185 movw force_size, %ax # Forced size?
186 orw %ax, %ax
187 jz mopar1
188
189 movb %ah, %fs:(PARAM_VIDEO_COLS)
190 movb %al, %fs:(PARAM_VIDEO_LINES)
191 ret
192
193mopar1: movb $25, %al
194 cmpb $0, adapter # If we are on CGA/MDA/HGA, the
195 jz mopar2 # screen must have 25 lines.
196
197 movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS
198 incb %al # location of max lines.
199mopar2: movb %al, %fs:(PARAM_VIDEO_LINES)
200 ret
201
202#ifdef CONFIG_VIDEO_SELECT
203# Fetching of VESA frame buffer parameters
204mopar_gr:
205 leaw modelist+1024, %di
206 movb $0x23, %fs:(PARAM_HAVE_VGA)
207 movw 16(%di), %ax
208 movw %ax, %fs:(PARAM_LFB_LINELENGTH)
209 movw 18(%di), %ax
210 movw %ax, %fs:(PARAM_LFB_WIDTH)
211 movw 20(%di), %ax
212 movw %ax, %fs:(PARAM_LFB_HEIGHT)
213 movb 25(%di), %al
214 movb $0, %ah
215 movw %ax, %fs:(PARAM_LFB_DEPTH)
216 movb 29(%di), %al
217 movb $0, %ah
218 movw %ax, %fs:(PARAM_LFB_PAGES)
219 movl 40(%di), %eax
220 movl %eax, %fs:(PARAM_LFB_BASE)
221 movl 31(%di), %eax
222 movl %eax, %fs:(PARAM_LFB_COLORS)
223 movl 35(%di), %eax
224 movl %eax, %fs:(PARAM_LFB_COLORS+4)
225 movw 0(%di), %ax
226 movw %ax, %fs:(PARAM_VESA_ATTRIB)
227
228# get video mem size
229 leaw modelist+1024, %di
230 movw $0x4f00, %ax
231 int $0x10
232 xorl %eax, %eax
233 movw 18(%di), %ax
234 movl %eax, %fs:(PARAM_LFB_SIZE)
235
236# switching the DAC to 8-bit is for <= 8 bpp only
237 movw %fs:(PARAM_LFB_DEPTH), %ax
238 cmpw $8, %ax
239 jg dac_done
240
241# get DAC switching capability
242 xorl %eax, %eax
243 movb 10(%di), %al
244 testb $1, %al
245 jz dac_set
246
247# attempt to switch DAC to 8-bit
248 movw $0x4f08, %ax
249 movw $0x0800, %bx
250 int $0x10
251 cmpw $0x004f, %ax
252 jne dac_set
253 movb %bh, dac_size # store actual DAC size
254
255dac_set:
256# set color size to DAC size
257 movb dac_size, %al
258 movb %al, %fs:(PARAM_LFB_COLORS+0)
259 movb %al, %fs:(PARAM_LFB_COLORS+2)
260 movb %al, %fs:(PARAM_LFB_COLORS+4)
261 movb %al, %fs:(PARAM_LFB_COLORS+6)
262
263# set color offsets to 0
264 movb $0, %fs:(PARAM_LFB_COLORS+1)
265 movb $0, %fs:(PARAM_LFB_COLORS+3)
266 movb $0, %fs:(PARAM_LFB_COLORS+5)
267 movb $0, %fs:(PARAM_LFB_COLORS+7)
268
269dac_done:
270# get protected mode interface informations
271 movw $0x4f0a, %ax
272 xorw %bx, %bx
273 xorw %di, %di
274 int $0x10
275 cmp $0x004f, %ax
276 jnz no_pm
277
278 movw %es, %fs:(PARAM_VESAPM_SEG)
279 movw %di, %fs:(PARAM_VESAPM_OFF)
280no_pm: ret
281
282# The video mode menu
283mode_menu:
284 leaw keymsg, %si # "Return/Space/Timeout" message
285 call prtstr
286 call flush
287nokey: call getkt
288
289 cmpb $0x0d, %al # ENTER ?
290 je listm # yes - manual mode selection
291
292 cmpb $0x20, %al # SPACE ?
293 je defmd1 # no - repeat
294
295 call beep
296 jmp nokey
297
298defmd1: ret # No mode chosen? Default 80x25
299
300listm: call mode_table # List mode table
301listm0: leaw name_bann, %si # Print adapter name
302 call prtstr
303 movw card_name, %si
304 orw %si, %si
305 jnz an2
306
307 movb adapter, %al
308 leaw old_name, %si
309 orb %al, %al
310 jz an1
311
312 leaw ega_name, %si
313 decb %al
314 jz an1
315
316 leaw vga_name, %si
317 jmp an1
318
319an2: call prtstr
320 leaw svga_name, %si
321an1: call prtstr
322 leaw listhdr, %si # Table header
323 call prtstr
324 movb $0x30, %dl # DL holds mode number
325 leaw modelist, %si
326lm1: cmpw $ASK_VGA, (%si) # End?
327 jz lm2
328
329 movb %dl, %al # Menu selection number
330 call prtchr
331 call prtsp2
332 lodsw
333 call prthw # Mode ID
334 call prtsp2
335 movb 0x1(%si), %al
336 call prtdec # Rows
337 movb $0x78, %al # the letter 'x'
338 call prtchr
339 lodsw
340 call prtdec # Columns
341 movb $0x0d, %al # New line
342 call prtchr
343 movb $0x0a, %al
344 call prtchr
345 incb %dl # Next character
346 cmpb $0x3a, %dl
347 jnz lm1
348
349 movb $0x61, %dl
350 jmp lm1
351
352lm2: leaw prompt, %si # Mode prompt
353 call prtstr
354 leaw edit_buf, %di # Editor buffer
355lm3: call getkey
356 cmpb $0x0d, %al # Enter?
357 jz lment
358
359 cmpb $0x08, %al # Backspace?
360 jz lmbs
361
362 cmpb $0x20, %al # Printable?
363 jc lm3
364
365 cmpw $edit_buf+4, %di # Enough space?
366 jz lm3
367
368 stosb
369 call prtchr
370 jmp lm3
371
372lmbs: cmpw $edit_buf, %di # Backspace
373 jz lm3
374
375 decw %di
376 movb $0x08, %al
377 call prtchr
378 call prtspc
379 movb $0x08, %al
380 call prtchr
381 jmp lm3
382
383lment: movb $0, (%di)
384 leaw crlft, %si
385 call prtstr
386 leaw edit_buf, %si
387 cmpb $0, (%si) # Empty string = default mode
388 jz lmdef
389
390 cmpb $0, 1(%si) # One character = menu selection
391 jz mnusel
392
393 cmpw $0x6373, (%si) # "scan" => mode scanning
394 jnz lmhx
395
396 cmpw $0x6e61, 2(%si)
397 jz lmscan
398
399lmhx: xorw %bx, %bx # Else => mode ID in hex
400lmhex: lodsb
401 orb %al, %al
402 jz lmuse1
403
404 subb $0x30, %al
405 jc lmbad
406
407 cmpb $10, %al
408 jc lmhx1
409
410 subb $7, %al
411 andb $0xdf, %al
412 cmpb $10, %al
413 jc lmbad
414
415 cmpb $16, %al
416 jnc lmbad
417
418lmhx1: shlw $4, %bx
419 orb %al, %bl
420 jmp lmhex
421
422lmuse1: movw %bx, %ax
423 jmp lmuse
424
425mnusel: lodsb # Menu selection
426 xorb %ah, %ah
427 subb $0x30, %al
428 jc lmbad
429
430 cmpb $10, %al
431 jc lmuse
432
433 cmpb $0x61-0x30, %al
434 jc lmbad
435
436 subb $0x61-0x30-10, %al
437 cmpb $36, %al
438 jnc lmbad
439
440lmuse: call mode_set
441 jc lmdef
442
443lmbad: leaw unknt, %si
444 call prtstr
445 jmp lm2
446lmscan: cmpb $0, adapter # Scanning only on EGA/VGA
447 jz lmbad
448
449 movw $0, mt_end # Scanning of modes is
450 movb $1, scanning # done as new autodetection.
451 call mode_table
452 jmp listm0
453lmdef: ret
454
455# Additional parts of mode_set... (relative jumps, you know)
456setv7: # Video7 extended modes
457 DO_STORE
458 subb $VIDEO_FIRST_V7>>8, %bh
459 movw $0x6f05, %ax
460 int $0x10
461 stc
462 ret
463
464_setrec: jmp setrec # Ugly...
465_set_80x25: jmp set_80x25
466
467# Aliases for backward compatibility.
468setalias:
469 movw $VIDEO_80x25, %ax
470 incw %bx
471 jz mode_set
472
473 movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
474 incw %bx
475 jnz setbad # Fall-through!
476
477# Setting of user mode (AX=mode ID) => CF=success
478mode_set:
479 movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S
480 movw %ax, %bx
481 cmpb $0xff, %ah
482 jz setalias
483
484 testb $VIDEO_RECALC>>8, %ah
485 jnz _setrec
486
487 cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
488 jnc setres
489
490 cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
491 jz setspc
492
493 cmpb $VIDEO_FIRST_V7>>8, %ah
494 jz setv7
495
496 cmpb $VIDEO_FIRST_VESA>>8, %ah
497 jnc check_vesa
498
499 orb %ah, %ah
500 jz setmenu
501
502 decb %ah
503 jz setbios
504
505setbad: clc
506 movb $0, do_restore # The screen needn't be restored
507 ret
508
509setvesa:
510 DO_STORE
511 subb $VIDEO_FIRST_VESA>>8, %bh
512 movw $0x4f02, %ax # VESA BIOS mode set call
513 int $0x10
514 cmpw $0x004f, %ax # AL=4f if implemented
515 jnz setbad # AH=0 if OK
516
517 stc
518 ret
519
520setbios:
521 DO_STORE
522 int $0x10 # Standard BIOS mode set call
523 pushw %bx
524 movb $0x0f, %ah # Check if really set
525 int $0x10
526 popw %bx
527 cmpb %bl, %al
528 jnz setbad
529
530 stc
531 ret
532
533setspc: xorb %bh, %bh # Set special mode
534 cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
535 jnc setbad
536
537 addw %bx, %bx
538 jmp *spec_inits(%bx)
539
540setmenu:
541 orb %al, %al # 80x25 is an exception
542 jz _set_80x25
543
544 pushw %bx # Set mode chosen from menu
545 call mode_table # Build the mode table
546 popw %ax
547 shlw $2, %ax
548 addw %ax, %si
549 cmpw %di, %si
550 jnc setbad
551
552 movw (%si), %ax # Fetch mode ID
553_m_s: jmp mode_set
554
555setres: pushw %bx # Set mode chosen by resolution
556 call mode_table
557 popw %bx
558 xchgb %bl, %bh
559setr1: lodsw
560 cmpw $ASK_VGA, %ax # End of the list?
561 jz setbad
562
563 lodsw
564 cmpw %bx, %ax
565 jnz setr1
566
567 movw -4(%si), %ax # Fetch mode ID
568 jmp _m_s
569
570check_vesa:
571 leaw modelist+1024, %di
572 subb $VIDEO_FIRST_VESA>>8, %bh
573 movw %bx, %cx # Get mode information structure
574 movw $0x4f01, %ax
575 int $0x10
576 addb $VIDEO_FIRST_VESA>>8, %bh
577 cmpw $0x004f, %ax
578 jnz setbad
579
580 movb (%di), %al # Check capabilities.
581 andb $0x19, %al
582 cmpb $0x09, %al
583 jz setvesa # This is a text mode
584
585 movb (%di), %al # Check capabilities.
586 andb $0x99, %al
587 cmpb $0x99, %al
588 jnz _setbad # Doh! No linear frame buffer.
589
590 subb $VIDEO_FIRST_VESA>>8, %bh
591 orw $0x4000, %bx # Use linear frame buffer
592 movw $0x4f02, %ax # VESA BIOS mode set call
593 int $0x10
594 cmpw $0x004f, %ax # AL=4f if implemented
595 jnz _setbad # AH=0 if OK
596
597 movb $1, graphic_mode # flag graphic mode
598 movb $0, do_restore # no screen restore
599 stc
600 ret
601
602_setbad: jmp setbad # Ugly...
603
604# Recalculate vertical display end registers -- this fixes various
605# inconsistencies of extended modes on many adapters. Called when
606# the VIDEO_RECALC flag is set in the mode ID.
607
608setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode
609 call mode_set
610 jnc rct3
611
612 movw %gs:(0x485), %ax # Font size in pixels
613 movb %gs:(0x484), %bl # Number of rows
614 incb %bl
615 mulb %bl # Number of visible
616 decw %ax # scan lines - 1
617 movw $0x3d4, %dx
618 movw %ax, %bx
619 movb $0x12, %al # Lower 8 bits
620 movb %bl, %ah
621 outw %ax, %dx
622 movb $0x07, %al # Bits 8 and 9 in the overflow register
623 call inidx
624 xchgb %al, %ah
625 andb $0xbd, %ah
626 shrb %bh
627 jnc rct1
628 orb $0x02, %ah
629rct1: shrb %bh
630 jnc rct2
631 orb $0x40, %ah
632rct2: movb $0x07, %al
633 outw %ax, %dx
634 stc
635rct3: ret
636
637# Table of routines for setting of the special modes.
638spec_inits:
639 .word set_80x25
640 .word set_8pixel
641 .word set_80x43
642 .word set_80x28
643 .word set_current
644 .word set_80x30
645 .word set_80x34
646 .word set_80x60
647 .word set_gfx
648
649# Set the 80x25 mode. If already set, do nothing.
650set_80x25:
651 movw $0x5019, force_size # Override possibly broken BIOS
652use_80x25:
653#ifdef CONFIG_VIDEO_400_HACK
654 movw $0x1202, %ax # Force 400 scan lines
655 movb $0x30, %bl
656 int $0x10
657#else
658 movb $0x0f, %ah # Get current mode ID
659 int $0x10
660 cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available
661 jz st80 # on CGA/MDA/HGA and is also available on EGAM
662
663 cmpw $0x5003, %ax # Unknown mode, force 80x25 color
664 jnz force3
665
666st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25
667 jz set80
668
669 movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc.
670 orb %al, %al # Some buggy BIOS'es set 0 rows
671 jz set80
672
673 cmpb $24, %al # It's hopefully correct
674 jz set80
675#endif /* CONFIG_VIDEO_400_HACK */
676force3: DO_STORE
677 movw $0x0003, %ax # Forced set
678 int $0x10
679set80: stc
680 ret
681
682# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
683set_8pixel:
684 DO_STORE
685 call use_80x25 # The base is 80x25
686set_8pt:
687 movw $0x1112, %ax # Use 8x8 font
688 xorb %bl, %bl
689 int $0x10
690 movw $0x1200, %ax # Use alternate print screen
691 movb $0x20, %bl
692 int $0x10
693 movw $0x1201, %ax # Turn off cursor emulation
694 movb $0x34, %bl
695 int $0x10
696 movb $0x01, %ah # Define cursor scan lines 6-7
697 movw $0x0607, %cx
698 int $0x10
699set_current:
700 stc
701 ret
702
703# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
704# 80x25 mode with 14-point fonts instead of 16-point.
705set_80x28:
706 DO_STORE
707 call use_80x25 # The base is 80x25
708set14: movw $0x1111, %ax # Use 9x14 font
709 xorb %bl, %bl
710 int $0x10
711 movb $0x01, %ah # Define cursor scan lines 11-12
712 movw $0x0b0c, %cx
713 int $0x10
714 stc
715 ret
716
717# Set the 80x43 mode. This mode is works on all VGA's.
718# It's a 350-scanline mode with 8-pixel font.
719set_80x43:
720 DO_STORE
721 movw $0x1201, %ax # Set 350 scans
722 movb $0x30, %bl
723 int $0x10
724 movw $0x0003, %ax # Reset video mode
725 int $0x10
726 jmp set_8pt # Use 8-pixel font
727
728# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
729set_80x30:
730 call use_80x25 # Start with real 80x25
731 DO_STORE
732 movw $0x3cc, %dx # Get CRTC port
733 inb %dx, %al
734 movb $0xd4, %dl
735 rorb %al # Mono or color?
736 jc set48a
737
738 movb $0xb4, %dl
739set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7)
740 call outidx
741 movw $0x0b06, %ax # Vertical total
742 call outidx
743 movw $0x3e07, %ax # (Vertical) overflow
744 call outidx
745 movw $0xea10, %ax # Vertical sync start
746 call outidx
747 movw $0xdf12, %ax # Vertical display end
748 call outidx
749 movw $0xe715, %ax # Vertical blank start
750 call outidx
751 movw $0x0416, %ax # Vertical blank end
752 call outidx
753 pushw %dx
754 movb $0xcc, %dl # Misc output register (read)
755 inb %dx, %al
756 movb $0xc2, %dl # (write)
757 andb $0x0d, %al # Preserve clock select bits and color bit
758 orb $0xe2, %al # Set correct sync polarity
759 outb %al, %dx
760 popw %dx
761 movw $0x501e, force_size
762 stc # That's all.
763 ret
764
765# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
766set_80x34:
767 call set_80x30 # Set 480 scans
768 call set14 # And 14-pt font
769 movw $0xdb12, %ax # VGA vertical display end
770 movw $0x5022, force_size
771setvde: call outidx
772 stc
773 ret
774
775# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
776set_80x60:
777 call set_80x30 # Set 480 scans
778 call set_8pt # And 8-pt font
779 movw $0xdf12, %ax # VGA vertical display end
780 movw $0x503c, force_size
781 jmp setvde
782
783# Special hack for ThinkPad graphics
784set_gfx:
785#ifdef CONFIG_VIDEO_GFX_HACK
786 movw $VIDEO_GFX_BIOS_AX, %ax
787 movw $VIDEO_GFX_BIOS_BX, %bx
788 int $0x10
789 movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size
790 stc
791#endif
792 ret
793
794#ifdef CONFIG_VIDEO_RETAIN
795
796# Store screen contents to temporary buffer.
797store_screen:
798 cmpb $0, do_restore # Already stored?
799 jnz stsr
800
801 testb $CAN_USE_HEAP, loadflags # Have we space for storing?
802 jz stsr
803
804 pushw %ax
805 pushw %bx
806 pushw force_size # Don't force specific size
807 movw $0, force_size
808 call mode_params # Obtain params of current mode
809 popw force_size
810 movb %fs:(PARAM_VIDEO_LINES), %ah
811 movb %fs:(PARAM_VIDEO_COLS), %al
812 movw %ax, %bx # BX=dimensions
813 mulb %ah
814 movw %ax, %cx # CX=number of characters
815 addw %ax, %ax # Calculate image size
816 addw $modelist+1024+4, %ax
817 cmpw heap_end_ptr, %ax
818 jnc sts1 # Unfortunately, out of memory
819
820 movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params
821 leaw modelist+1024, %di
822 stosw
823 movw %bx, %ax
824 stosw
825 pushw %ds # Store the screen
826 movw video_segment, %ds
827 xorw %si, %si
828 rep
829 movsw
830 popw %ds
831 incb do_restore # Screen will be restored later
832sts1: popw %bx
833 popw %ax
834stsr: ret
835
836# Restore screen contents from temporary buffer.
837restore_screen:
838 cmpb $0, do_restore # Has the screen been stored?
839 jz res1
840
841 call mode_params # Get parameters of current mode
842 movb %fs:(PARAM_VIDEO_LINES), %cl
843 movb %fs:(PARAM_VIDEO_COLS), %ch
844 leaw modelist+1024, %si # Screen buffer
845 lodsw # Set cursor position
846 movw %ax, %dx
847 cmpb %cl, %dh
848 jc res2
849
850 movb %cl, %dh
851 decb %dh
852res2: cmpb %ch, %dl
853 jc res3
854
855 movb %ch, %dl
856 decb %dl
857res3: movb $0x02, %ah
858 movb $0x00, %bh
859 int $0x10
860 lodsw # Display size
861 movb %ah, %dl # DL=number of lines
862 movb $0, %ah # BX=phys. length of orig. line
863 movw %ax, %bx
864 cmpb %cl, %dl # Too many?
865 jc res4
866
867 pushw %ax
868 movb %dl, %al
869 subb %cl, %al
870 mulb %bl
871 addw %ax, %si
872 addw %ax, %si
873 popw %ax
874 movb %cl, %dl
875res4: cmpb %ch, %al # Too wide?
876 jc res5
877
878 movb %ch, %al # AX=width of src. line
879res5: movb $0, %cl
880 xchgb %ch, %cl
881 movw %cx, %bp # BP=width of dest. line
882 pushw %es
883 movw video_segment, %es
884 xorw %di, %di # Move the data
885 addw %bx, %bx # Convert BX and BP to _bytes_
886 addw %bp, %bp
887res6: pushw %si
888 pushw %di
889 movw %ax, %cx
890 rep
891 movsw
892 popw %di
893 popw %si
894 addw %bp, %di
895 addw %bx, %si
896 decb %dl
897 jnz res6
898
899 popw %es # Done
900res1: ret
901#endif /* CONFIG_VIDEO_RETAIN */
902
903# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
904outidx: outb %al, %dx
905 pushw %ax
906 movb %ah, %al
907 incw %dx
908 outb %al, %dx
909 decw %dx
910 popw %ax
911 ret
912
913# Build the table of video modes (stored after the setup.S code at the
914# `modelist' label. Each video mode record looks like:
915# .word MODE-ID (our special mode ID (see above))
916# .byte rows (number of rows)
917# .byte columns (number of columns)
918# Returns address of the end of the table in DI, the end is marked
919# with a ASK_VGA ID.
920mode_table:
921 movw mt_end, %di # Already filled?
922 orw %di, %di
923 jnz mtab1x
924
925 leaw modelist, %di # Store standard modes:
926 movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
927 stosl
928 movb adapter, %al # CGA/MDA/HGA -- no more modes
929 orb %al, %al
930 jz mtabe
931
932 decb %al
933 jnz mtabv
934
935 movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode
936 stosl
937 jmp mtabe
938
939mtab1x: jmp mtab1
940
941mtabv: leaw vga_modes, %si # All modes for std VGA
942 movw $vga_modes_end-vga_modes, %cx
943 rep # I'm unable to use movsw as I don't know how to store a half
944 movsb # of the expression above to cx without using explicit shr.
945
946 cmpb $0, scanning # Mode scan requested?
947 jz mscan1
948
949 call mode_scan
950mscan1:
951
952#ifdef CONFIG_VIDEO_LOCAL
953 call local_modes
954#endif /* CONFIG_VIDEO_LOCAL */
955
956#ifdef CONFIG_VIDEO_VESA
957 call vesa_modes # Detect VESA VGA modes
958#endif /* CONFIG_VIDEO_VESA */
959
960#ifdef CONFIG_VIDEO_SVGA
961 cmpb $0, scanning # Bypass when scanning
962 jnz mscan2
963
964 call svga_modes # Detect SVGA cards & modes
965mscan2:
966#endif /* CONFIG_VIDEO_SVGA */
967
968mtabe:
969
970#ifdef CONFIG_VIDEO_COMPACT
971 leaw modelist, %si
972 movw %di, %dx
973 movw %si, %di
974cmt1: cmpw %dx, %si # Scan all modes
975 jz cmt2
976
977 leaw modelist, %bx # Find in previous entries
978 movw 2(%si), %cx
979cmt3: cmpw %bx, %si
980 jz cmt4
981
982 cmpw 2(%bx), %cx # Found => don't copy this entry
983 jz cmt5
984
985 addw $4, %bx
986 jmp cmt3
987
988cmt4: movsl # Copy entry
989 jmp cmt1
990
991cmt5: addw $4, %si # Skip entry
992 jmp cmt1
993
994cmt2:
995#endif /* CONFIG_VIDEO_COMPACT */
996
997 movw $ASK_VGA, (%di) # End marker
998 movw %di, mt_end
999mtab1: leaw modelist, %si # SI=mode list, DI=list end
1000ret0: ret
1001
1002# Modes usable on all standard VGAs
1003vga_modes:
1004 .word VIDEO_8POINT
1005 .word 0x5032 # 80x50
1006 .word VIDEO_80x43
1007 .word 0x502b # 80x43
1008 .word VIDEO_80x28
1009 .word 0x501c # 80x28
1010 .word VIDEO_80x30
1011 .word 0x501e # 80x30
1012 .word VIDEO_80x34
1013 .word 0x5022 # 80x34
1014 .word VIDEO_80x60
1015 .word 0x503c # 80x60
1016#ifdef CONFIG_VIDEO_GFX_HACK
1017 .word VIDEO_GFX_HACK
1018 .word VIDEO_GFX_DUMMY_RESOLUTION
1019#endif
1020
1021vga_modes_end:
1022# Detect VESA modes.
1023
1024#ifdef CONFIG_VIDEO_VESA
1025vesa_modes:
1026 cmpb $2, adapter # VGA only
1027 jnz ret0
1028
1029 movw %di, %bp # BP=original mode table end
1030 addw $0x200, %di # Buffer space
1031 movw $0x4f00, %ax # VESA Get card info call
1032 int $0x10
1033 movw %bp, %di
1034 cmpw $0x004f, %ax # Successful?
1035 jnz ret0
1036
1037 cmpw $0x4556, 0x200(%di)
1038 jnz ret0
1039
1040 cmpw $0x4153, 0x202(%di)
1041 jnz ret0
1042
1043 movw $vesa_name, card_name # Set name to "VESA VGA"
1044 pushw %gs
1045 lgsw 0x20e(%di), %si # GS:SI=mode list
1046 movw $128, %cx # Iteration limit
1047vesa1:
1048# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
1049# XXX: lodsw %gs:(%si), %ax # Get next mode in the list
1050 gs; lodsw
1051 cmpw $0xffff, %ax # End of the table?
1052 jz vesar
1053
1054 cmpw $0x0080, %ax # Check validity of mode ID
1055 jc vesa2
1056
1057 orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
1058 jz vesan # Certain BIOSes report 0x80-0xff!
1059
1060 cmpw $0x0800, %ax
1061 jnc vesae
1062
1063vesa2: pushw %cx
1064 movw %ax, %cx # Get mode information structure
1065 movw $0x4f01, %ax
1066 int $0x10
1067 movw %cx, %bx # BX=mode number
1068 addb $VIDEO_FIRST_VESA>>8, %bh
1069 popw %cx
1070 cmpw $0x004f, %ax
1071 jnz vesan # Don't report errors (buggy BIOSES)
1072
1073 movb (%di), %al # Check capabilities. We require
1074 andb $0x19, %al # a color text mode.
1075 cmpb $0x09, %al
1076 jnz vesan
1077
1078 cmpw $0xb800, 8(%di) # Standard video memory address required
1079 jnz vesan
1080
1081 testb $2, (%di) # Mode characteristics supplied?
1082 movw %bx, (%di) # Store mode number
1083 jz vesa3
1084
1085 xorw %dx, %dx
1086 movw 0x12(%di), %bx # Width
1087 orb %bh, %bh
1088 jnz vesan
1089
1090 movb %bl, 0x3(%di)
1091 movw 0x14(%di), %ax # Height
1092 orb %ah, %ah
1093 jnz vesan
1094
1095 movb %al, 2(%di)
1096 mulb %bl
1097 cmpw $8193, %ax # Small enough for Linux console driver?
1098 jnc vesan
1099
1100 jmp vesaok
1101
1102vesa3: subw $0x8108, %bx # This mode has no detailed info specified,
1103 jc vesan # so it must be a standard VESA mode.
1104
1105 cmpw $5, %bx
1106 jnc vesan
1107
1108 movw vesa_text_mode_table(%bx), %ax
1109 movw %ax, 2(%di)
1110vesaok: addw $4, %di # The mode is valid. Store it.
1111vesan: loop vesa1 # Next mode. Limit exceeded => error
1112vesae: leaw vesaer, %si
1113 call prtstr
1114 movw %bp, %di # Discard already found modes.
1115vesar: popw %gs
1116 ret
1117
1118# Dimensions of standard VESA text modes
1119vesa_text_mode_table:
1120 .byte 60, 80 # 0108
1121 .byte 25, 132 # 0109
1122 .byte 43, 132 # 010A
1123 .byte 50, 132 # 010B
1124 .byte 60, 132 # 010C
1125#endif /* CONFIG_VIDEO_VESA */
1126
1127# Scan for video modes. A bit dirty, but should work.
1128mode_scan:
1129 movw $0x0100, %cx # Start with mode 0
1130scm1: movb $0, %ah # Test the mode
1131 movb %cl, %al
1132 int $0x10
1133 movb $0x0f, %ah
1134 int $0x10
1135 cmpb %cl, %al
1136 jnz scm2 # Mode not set
1137
1138 movw $0x3c0, %dx # Test if it's a text mode
1139 movb $0x10, %al # Mode bits
1140 call inidx
1141 andb $0x03, %al
1142 jnz scm2
1143
1144 movb $0xce, %dl # Another set of mode bits
1145 movb $0x06, %al
1146 call inidx
1147 shrb %al
1148 jc scm2
1149
1150 movb $0xd4, %dl # Cursor location
1151 movb $0x0f, %al
1152 call inidx
1153 orb %al, %al
1154 jnz scm2
1155
1156 movw %cx, %ax # Ok, store the mode
1157 stosw
1158 movb %gs:(0x484), %al # Number of rows
1159 incb %al
1160 stosb
1161 movw %gs:(0x44a), %ax # Number of columns
1162 stosb
1163scm2: incb %cl
1164 jns scm1
1165
1166 movw $0x0003, %ax # Return back to mode 3
1167 int $0x10
1168 ret
1169
1170tstidx: outw %ax, %dx # OUT DX,AX and inidx
1171inidx: outb %al, %dx # Read from indexed VGA register
1172 incw %dx # AL=index, DX=index reg port -> AL=data
1173 inb %dx, %al
1174 decw %dx
1175 ret
1176
1177# Try to detect type of SVGA card and supply (usually approximate) video
1178# mode table for it.
1179
1180#ifdef CONFIG_VIDEO_SVGA
1181svga_modes:
1182 leaw svga_table, %si # Test all known SVGA adapters
1183dosvga: lodsw
1184 movw %ax, %bp # Default mode table
1185 orw %ax, %ax
1186 jz didsv1
1187
1188 lodsw # Pointer to test routine
1189 pushw %si
1190 pushw %di
1191 pushw %es
1192 movw $0xc000, %bx
1193 movw %bx, %es
1194 call *%ax # Call test routine
1195 popw %es
1196 popw %di
1197 popw %si
1198 orw %bp, %bp
1199 jz dosvga
1200
1201 movw %bp, %si # Found, copy the modes
1202 movb svga_prefix, %ah
1203cpsvga: lodsb
1204 orb %al, %al
1205 jz didsv
1206
1207 stosw
1208 movsw
1209 jmp cpsvga
1210
1211didsv: movw %si, card_name # Store pointer to card name
1212didsv1: ret
1213
1214# Table of all known SVGA cards. For each card, we store a pointer to
1215# a table of video modes supported by the card and a pointer to a routine
1216# used for testing of presence of the card. The video mode table is always
1217# followed by the name of the card or the chipset.
1218svga_table:
1219 .word ati_md, ati_test
1220 .word oak_md, oak_test
1221 .word paradise_md, paradise_test
1222 .word realtek_md, realtek_test
1223 .word s3_md, s3_test
1224 .word chips_md, chips_test
1225 .word video7_md, video7_test
1226 .word cirrus5_md, cirrus5_test
1227 .word cirrus6_md, cirrus6_test
1228 .word cirrus1_md, cirrus1_test
1229 .word ahead_md, ahead_test
1230 .word everex_md, everex_test
1231 .word genoa_md, genoa_test
1232 .word trident_md, trident_test
1233 .word tseng_md, tseng_test
1234 .word 0
1235
1236# Test routines and mode tables:
1237
1238# S3 - The test algorithm was taken from the SuperProbe package
1239# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
1240s3_test:
1241 movw $0x0f35, %cx # we store some constants in cl/ch
1242 movw $0x03d4, %dx
1243 movb $0x38, %al
1244 call inidx
1245 movb %al, %bh # store current CRT-register 0x38
1246 movw $0x0038, %ax
1247 call outidx # disable writing to special regs
1248 movb %cl, %al # check whether we can write special reg 0x35
1249 call inidx
1250 movb %al, %bl # save the current value of CRT reg 0x35
1251 andb $0xf0, %al # clear bits 0-3
1252 movb %al, %ah
1253 movb %cl, %al # and write it to CRT reg 0x35
1254 call outidx
1255 call inidx # now read it back
1256 andb %ch, %al # clear the upper 4 bits
1257 jz s3_2 # the first test failed. But we have a
1258
1259 movb %bl, %ah # second chance
1260 movb %cl, %al
1261 call outidx
1262 jmp s3_1 # do the other tests
1263
1264s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35
1265 orb %bl, %ah # set the upper 4 bits of ah with the orig value
1266 call outidx # write ...
1267 call inidx # ... and reread
1268 andb %cl, %al # turn off the upper 4 bits
1269 pushw %ax
1270 movb %bl, %ah # restore old value in register 0x35
1271 movb %cl, %al
1272 call outidx
1273 popw %ax
1274 cmpb %ch, %al # setting lower 4 bits was successful => bad
1275 je no_s3 # writing is allowed => this is not an S3
1276
1277s3_1: movw $0x4838, %ax # allow writing to special regs by putting
1278 call outidx # magic number into CRT-register 0x38
1279 movb %cl, %al # check whether we can write special reg 0x35
1280 call inidx
1281 movb %al, %bl
1282 andb $0xf0, %al
1283 movb %al, %ah
1284 movb %cl, %al
1285 call outidx
1286 call inidx
1287 andb %ch, %al
1288 jnz no_s3 # no, we can't write => no S3
1289
1290 movw %cx, %ax
1291 orb %bl, %ah
1292 call outidx
1293 call inidx
1294 andb %ch, %al
1295 pushw %ax
1296 movb %bl, %ah # restore old value in register 0x35
1297 movb %cl, %al
1298 call outidx
1299 popw %ax
1300 cmpb %ch, %al
1301 jne no_s31 # writing not possible => no S3
1302 movb $0x30, %al
1303 call inidx # now get the S3 id ...
1304 leaw idS3, %di
1305 movw $0x10, %cx
1306 repne
1307 scasb
1308 je no_s31
1309
1310 movb %bh, %ah
1311 movb $0x38, %al
1312 jmp s3rest
1313
1314no_s3: movb $0x35, %al # restore CRT register 0x35
1315 movb %bl, %ah
1316 call outidx
1317no_s31: xorw %bp, %bp # Detection failed
1318s3rest: movb %bh, %ah
1319 movb $0x38, %al # restore old value of CRT register 0x38
1320 jmp outidx
1321
1322idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
1323 .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
1324
1325s3_md: .byte 0x54, 0x2b, 0x84
1326 .byte 0x55, 0x19, 0x84
1327 .byte 0
1328 .ascii "S3"
1329 .byte 0
1330
1331# ATI cards.
1332ati_test:
1333 leaw idati, %si
1334 movw $0x31, %di
1335 movw $0x09, %cx
1336 repe
1337 cmpsb
1338 je atiok
1339
1340 xorw %bp, %bp
1341atiok: ret
1342
1343idati: .ascii "761295520"
1344
1345ati_md: .byte 0x23, 0x19, 0x84
1346 .byte 0x33, 0x2c, 0x84
1347 .byte 0x22, 0x1e, 0x64
1348 .byte 0x21, 0x19, 0x64
1349 .byte 0x58, 0x21, 0x50
1350 .byte 0x5b, 0x1e, 0x50
1351 .byte 0
1352 .ascii "ATI"
1353 .byte 0
1354
1355# AHEAD
1356ahead_test:
1357 movw $0x200f, %ax
1358 movw $0x3ce, %dx
1359 outw %ax, %dx
1360 incw %dx
1361 inb %dx, %al
1362 cmpb $0x20, %al
1363 je isahed
1364
1365 cmpb $0x21, %al
1366 je isahed
1367
1368 xorw %bp, %bp
1369isahed: ret
1370
1371ahead_md:
1372 .byte 0x22, 0x2c, 0x84
1373 .byte 0x23, 0x19, 0x84
1374 .byte 0x24, 0x1c, 0x84
1375 .byte 0x2f, 0x32, 0xa0
1376 .byte 0x32, 0x22, 0x50
1377 .byte 0x34, 0x42, 0x50
1378 .byte 0
1379 .ascii "Ahead"
1380 .byte 0
1381
1382# Chips & Tech.
1383chips_test:
1384 movw $0x3c3, %dx
1385 inb %dx, %al
1386 orb $0x10, %al
1387 outb %al, %dx
1388 movw $0x104, %dx
1389 inb %dx, %al
1390 movb %al, %bl
1391 movw $0x3c3, %dx
1392 inb %dx, %al
1393 andb $0xef, %al
1394 outb %al, %dx
1395 cmpb $0xa5, %bl
1396 je cantok
1397
1398 xorw %bp, %bp
1399cantok: ret
1400
1401chips_md:
1402 .byte 0x60, 0x19, 0x84
1403 .byte 0x61, 0x32, 0x84
1404 .byte 0
1405 .ascii "Chips & Technologies"
1406 .byte 0
1407
1408# Cirrus Logic 5X0
1409cirrus1_test:
1410 movw $0x3d4, %dx
1411 movb $0x0c, %al
1412 outb %al, %dx
1413 incw %dx
1414 inb %dx, %al
1415 movb %al, %bl
1416 xorb %al, %al
1417 outb %al, %dx
1418 decw %dx
1419 movb $0x1f, %al
1420 outb %al, %dx
1421 incw %dx
1422 inb %dx, %al
1423 movb %al, %bh
1424 xorb %ah, %ah
1425 shlb $4, %al
1426 movw %ax, %cx
1427 movb %bh, %al
1428 shrb $4, %al
1429 addw %ax, %cx
1430 shlw $8, %cx
1431 addw $6, %cx
1432 movw %cx, %ax
1433 movw $0x3c4, %dx
1434 outw %ax, %dx
1435 incw %dx
1436 inb %dx, %al
1437 andb %al, %al
1438 jnz nocirr
1439
1440 movb %bh, %al
1441 outb %al, %dx
1442 inb %dx, %al
1443 cmpb $0x01, %al
1444 je iscirr
1445
1446nocirr: xorw %bp, %bp
1447iscirr: movw $0x3d4, %dx
1448 movb %bl, %al
1449 xorb %ah, %ah
1450 shlw $8, %ax
1451 addw $0x0c, %ax
1452 outw %ax, %dx
1453 ret
1454
1455cirrus1_md:
1456 .byte 0x1f, 0x19, 0x84
1457 .byte 0x20, 0x2c, 0x84
1458 .byte 0x22, 0x1e, 0x84
1459 .byte 0x31, 0x25, 0x64
1460 .byte 0
1461 .ascii "Cirrus Logic 5X0"
1462 .byte 0
1463
1464# Cirrus Logic 54XX
1465cirrus5_test:
1466 movw $0x3c4, %dx
1467 movb $6, %al
1468 call inidx
1469 movb %al, %bl # BL=backup
1470 movw $6, %ax
1471 call tstidx
1472 cmpb $0x0f, %al
1473 jne c5fail
1474
1475 movw $0x1206, %ax
1476 call tstidx
1477 cmpb $0x12, %al
1478 jne c5fail
1479
1480 movb $0x1e, %al
1481 call inidx
1482 movb %al, %bh
1483 movb %bh, %ah
1484 andb $0xc0, %ah
1485 movb $0x1e, %al
1486 call tstidx
1487 andb $0x3f, %al
1488 jne c5xx
1489
1490 movb $0x1e, %al
1491 movb %bh, %ah
1492 orb $0x3f, %ah
1493 call tstidx
1494 xorb $0x3f, %al
1495 andb $0x3f, %al
1496c5xx: pushf
1497 movb $0x1e, %al
1498 movb %bh, %ah
1499 outw %ax, %dx
1500 popf
1501 je c5done
1502
1503c5fail: xorw %bp, %bp
1504c5done: movb $6, %al
1505 movb %bl, %ah
1506 outw %ax, %dx
1507 ret
1508
1509cirrus5_md:
1510 .byte 0x14, 0x19, 0x84
1511 .byte 0x54, 0x2b, 0x84
1512 .byte 0
1513 .ascii "Cirrus Logic 54XX"
1514 .byte 0
1515
1516# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
1517# it's misidentified by the Ahead test.
1518cirrus6_test:
1519 movw $0x3ce, %dx
1520 movb $0x0a, %al
1521 call inidx
1522 movb %al, %bl # BL=backup
1523 movw $0xce0a, %ax
1524 call tstidx
1525 orb %al, %al
1526 jne c2fail
1527
1528 movw $0xec0a, %ax
1529 call tstidx
1530 cmpb $0x01, %al
1531 jne c2fail
1532
1533 movb $0xaa, %al
1534 call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's.
1535 shrb $4, %al
1536 subb $4, %al
1537 jz c6done
1538
1539 decb %al
1540 jz c6done
1541
1542 subb $2, %al
1543 jz c6done
1544
1545 decb %al
1546 jz c6done
1547
1548c2fail: xorw %bp, %bp
1549c6done: movb $0x0a, %al
1550 movb %bl, %ah
1551 outw %ax, %dx
1552 ret
1553
1554cirrus6_md:
1555 .byte 0
1556 .ascii "Cirrus Logic 64XX"
1557 .byte 0
1558
1559# Everex / Trident
1560everex_test:
1561 movw $0x7000, %ax
1562 xorw %bx, %bx
1563 int $0x10
1564 cmpb $0x70, %al
1565 jne noevrx
1566
1567 shrw $4, %dx
1568 cmpw $0x678, %dx
1569 je evtrid
1570
1571 cmpw $0x236, %dx
1572 jne evrxok
1573
1574evtrid: leaw trident_md, %bp
1575evrxok: ret
1576
1577noevrx: xorw %bp, %bp
1578 ret
1579
1580everex_md:
1581 .byte 0x03, 0x22, 0x50
1582 .byte 0x04, 0x3c, 0x50
1583 .byte 0x07, 0x2b, 0x64
1584 .byte 0x08, 0x4b, 0x64
1585 .byte 0x0a, 0x19, 0x84
1586 .byte 0x0b, 0x2c, 0x84
1587 .byte 0x16, 0x1e, 0x50
1588 .byte 0x18, 0x1b, 0x64
1589 .byte 0x21, 0x40, 0xa0
1590 .byte 0x40, 0x1e, 0x84
1591 .byte 0
1592 .ascii "Everex/Trident"
1593 .byte 0
1594
1595# Genoa.
1596genoa_test:
1597 leaw idgenoa, %si # Check Genoa 'clues'
1598 xorw %ax, %ax
1599 movb %es:(0x37), %al
1600 movw %ax, %di
1601 movw $0x04, %cx
1602 decw %si
1603 decw %di
1604l1: incw %si
1605 incw %di
1606 movb (%si), %al
1607 testb %al, %al
1608 jz l2
1609
1610 cmpb %es:(%di), %al
1611l2: loope l1
1612 orw %cx, %cx
1613 je isgen
1614
1615 xorw %bp, %bp
1616isgen: ret
1617
1618idgenoa: .byte 0x77, 0x00, 0x99, 0x66
1619
1620genoa_md:
1621 .byte 0x58, 0x20, 0x50
1622 .byte 0x5a, 0x2a, 0x64
1623 .byte 0x60, 0x19, 0x84
1624 .byte 0x61, 0x1d, 0x84
1625 .byte 0x62, 0x20, 0x84
1626 .byte 0x63, 0x2c, 0x84
1627 .byte 0x64, 0x3c, 0x84
1628 .byte 0x6b, 0x4f, 0x64
1629 .byte 0x72, 0x3c, 0x50
1630 .byte 0x74, 0x42, 0x50
1631 .byte 0x78, 0x4b, 0x64
1632 .byte 0
1633 .ascii "Genoa"
1634 .byte 0
1635
1636# OAK
1637oak_test:
1638 leaw idoakvga, %si
1639 movw $0x08, %di
1640 movw $0x08, %cx
1641 repe
1642 cmpsb
1643 je isoak
1644
1645 xorw %bp, %bp
1646isoak: ret
1647
1648idoakvga: .ascii "OAK VGA "
1649
1650oak_md: .byte 0x4e, 0x3c, 0x50
1651 .byte 0x4f, 0x3c, 0x84
1652 .byte 0x50, 0x19, 0x84
1653 .byte 0x51, 0x2b, 0x84
1654 .byte 0
1655 .ascii "OAK"
1656 .byte 0
1657
1658# WD Paradise.
1659paradise_test:
1660 leaw idparadise, %si
1661 movw $0x7d, %di
1662 movw $0x04, %cx
1663 repe
1664 cmpsb
1665 je ispara
1666
1667 xorw %bp, %bp
1668ispara: ret
1669
1670idparadise: .ascii "VGA="
1671
1672paradise_md:
1673 .byte 0x41, 0x22, 0x50
1674 .byte 0x47, 0x1c, 0x84
1675 .byte 0x55, 0x19, 0x84
1676 .byte 0x54, 0x2c, 0x84
1677 .byte 0
1678 .ascii "Paradise"
1679 .byte 0
1680
1681# Trident.
1682trident_test:
1683 movw $0x3c4, %dx
1684 movb $0x0e, %al
1685 outb %al, %dx
1686 incw %dx
1687 inb %dx, %al
1688 xchgb %al, %ah
1689 xorb %al, %al
1690 outb %al, %dx
1691 inb %dx, %al
1692 xchgb %ah, %al
1693 movb %al, %bl # Strange thing ... in the book this wasn't
1694 andb $0x02, %bl # necessary but it worked on my card which
1695 jz setb2 # is a trident. Without it the screen goes
1696 # blurred ...
1697 andb $0xfd, %al
1698 jmp clrb2
1699
1700setb2: orb $0x02, %al
1701clrb2: outb %al, %dx
1702 andb $0x0f, %ah
1703 cmpb $0x02, %ah
1704 je istrid
1705
1706 xorw %bp, %bp
1707istrid: ret
1708
1709trident_md:
1710 .byte 0x50, 0x1e, 0x50
1711 .byte 0x51, 0x2b, 0x50
1712 .byte 0x52, 0x3c, 0x50
1713 .byte 0x57, 0x19, 0x84
1714 .byte 0x58, 0x1e, 0x84
1715 .byte 0x59, 0x2b, 0x84
1716 .byte 0x5a, 0x3c, 0x84
1717 .byte 0
1718 .ascii "Trident"
1719 .byte 0
1720
1721# Tseng.
1722tseng_test:
1723 movw $0x3cd, %dx
1724 inb %dx, %al # Could things be this simple ! :-)
1725 movb %al, %bl
1726 movb $0x55, %al
1727 outb %al, %dx
1728 inb %dx, %al
1729 movb %al, %ah
1730 movb %bl, %al
1731 outb %al, %dx
1732 cmpb $0x55, %ah
1733 je istsen
1734
1735isnot: xorw %bp, %bp
1736istsen: ret
1737
1738tseng_md:
1739 .byte 0x26, 0x3c, 0x50
1740 .byte 0x2a, 0x28, 0x64
1741 .byte 0x23, 0x19, 0x84
1742 .byte 0x24, 0x1c, 0x84
1743 .byte 0x22, 0x2c, 0x84
1744 .byte 0x21, 0x3c, 0x84
1745 .byte 0
1746 .ascii "Tseng"
1747 .byte 0
1748
1749# Video7.
1750video7_test:
1751 movw $0x3cc, %dx
1752 inb %dx, %al
1753 movw $0x3b4, %dx
1754 andb $0x01, %al
1755 jz even7
1756
1757 movw $0x3d4, %dx
1758even7: movb $0x0c, %al
1759 outb %al, %dx
1760 incw %dx
1761 inb %dx, %al
1762 movb %al, %bl
1763 movb $0x55, %al
1764 outb %al, %dx
1765 inb %dx, %al
1766 decw %dx
1767 movb $0x1f, %al
1768 outb %al, %dx
1769 incw %dx
1770 inb %dx, %al
1771 movb %al, %bh
1772 decw %dx
1773 movb $0x0c, %al
1774 outb %al, %dx
1775 incw %dx
1776 movb %bl, %al
1777 outb %al, %dx
1778 movb $0x55, %al
1779 xorb $0xea, %al
1780 cmpb %bh, %al
1781 jne isnot
1782
1783 movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
1784 ret
1785
1786video7_md:
1787 .byte 0x40, 0x2b, 0x50
1788 .byte 0x43, 0x3c, 0x50
1789 .byte 0x44, 0x3c, 0x64
1790 .byte 0x41, 0x19, 0x84
1791 .byte 0x42, 0x2c, 0x84
1792 .byte 0x45, 0x1c, 0x84
1793 .byte 0
1794 .ascii "Video 7"
1795 .byte 0
1796
1797# Realtek VGA
1798realtek_test:
1799 leaw idrtvga, %si
1800 movw $0x45, %di
1801 movw $0x0b, %cx
1802 repe
1803 cmpsb
1804 je isrt
1805
1806 xorw %bp, %bp
1807isrt: ret
1808
1809idrtvga: .ascii "REALTEK VGA"
1810
1811realtek_md:
1812 .byte 0x1a, 0x3c, 0x50
1813 .byte 0x1b, 0x19, 0x84
1814 .byte 0x1c, 0x1e, 0x84
1815 .byte 0x1d, 0x2b, 0x84
1816 .byte 0x1e, 0x3c, 0x84
1817 .byte 0
1818 .ascii "REALTEK"
1819 .byte 0
1820
1821#endif /* CONFIG_VIDEO_SVGA */
1822
1823# User-defined local mode table (VGA only)
1824#ifdef CONFIG_VIDEO_LOCAL
1825local_modes:
1826 leaw local_mode_table, %si
1827locm1: lodsw
1828 orw %ax, %ax
1829 jz locm2
1830
1831 stosw
1832 movsw
1833 jmp locm1
1834
1835locm2: ret
1836
1837# This is the table of local video modes which can be supplied manually
1838# by the user. Each entry consists of mode ID (word) and dimensions
1839# (byte for column count and another byte for row count). These modes
1840# are placed before all SVGA and VESA modes and override them if table
1841# compacting is enabled. The table must end with a zero word followed
1842# by NUL-terminated video adapter name.
1843local_mode_table:
1844 .word 0x0100 # Example: 40x25
1845 .byte 25,40
1846 .word 0
1847 .ascii "Local"
1848 .byte 0
1849#endif /* CONFIG_VIDEO_LOCAL */
1850
1851# Read a key and return the ASCII code in al, scan code in ah
1852getkey: xorb %ah, %ah
1853 int $0x16
1854 ret
1855
1856# Read a key with a timeout of 30 seconds.
1857# The hardware clock is used to get the time.
1858getkt: call gettime
1859 addb $30, %al # Wait 30 seconds
1860 cmpb $60, %al
1861 jl lminute
1862
1863 subb $60, %al
1864lminute:
1865 movb %al, %cl
1866again: movb $0x01, %ah
1867 int $0x16
1868 jnz getkey # key pressed, so get it
1869
1870 call gettime
1871 cmpb %cl, %al
1872 jne again
1873
1874 movb $0x20, %al # timeout, return `space'
1875 ret
1876
1877# Flush the keyboard buffer
1878flush: movb $0x01, %ah
1879 int $0x16
1880 jz empty
1881
1882 xorb %ah, %ah
1883 int $0x16
1884 jmp flush
1885
1886empty: ret
1887
1888# Print hexadecimal number.
1889prthw: pushw %ax
1890 movb %ah, %al
1891 call prthb
1892 popw %ax
1893prthb: pushw %ax
1894 shrb $4, %al
1895 call prthn
1896 popw %ax
1897 andb $0x0f, %al
1898prthn: cmpb $0x0a, %al
1899 jc prth1
1900
1901 addb $0x07, %al
1902prth1: addb $0x30, %al
1903 jmp prtchr
1904
1905# Print decimal number in al
1906prtdec: pushw %ax
1907 pushw %cx
1908 xorb %ah, %ah
1909 movb $0x0a, %cl
1910 idivb %cl
1911 cmpb $0x09, %al
1912 jbe lt100
1913
1914 call prtdec
1915 jmp skip10
1916
1917lt100: addb $0x30, %al
1918 call prtchr
1919skip10: movb %ah, %al
1920 addb $0x30, %al
1921 call prtchr
1922 popw %cx
1923 popw %ax
1924 ret
1925
1926store_edid:
1927 pushw %es # just save all registers
1928 pushw %ax
1929 pushw %bx
1930 pushw %cx
1931 pushw %dx
1932 pushw %di
1933
1934 pushw %fs
1935 popw %es
1936
1937 movl $0x13131313, %eax # memset block with 0x13
1938 movw $32, %cx
1939 movw $0x140, %di
1940 cld
1941 rep
1942 stosl
1943
1944 movw $0x4f15, %ax # do VBE/DDC
1945 movw $0x01, %bx
1946 movw $0x00, %cx
1947 movw $0x01, %dx
1948 movw $0x140, %di
1949 int $0x10
1950
1951 popw %di # restore all registers
1952 popw %dx
1953 popw %cx
1954 popw %bx
1955 popw %ax
1956 popw %es
1957 ret
1958
1959# VIDEO_SELECT-only variables
1960mt_end: .word 0 # End of video mode table if built
1961edit_buf: .space 6 # Line editor buffer
1962card_name: .word 0 # Pointer to adapter name
1963scanning: .byte 0 # Performing mode scan
1964do_restore: .byte 0 # Screen contents altered during mode change
1965svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes
1966graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
1967dac_size: .byte 6 # DAC bit depth
1968
1969# Status messages
1970keymsg: .ascii "Press <RETURN> to see video modes available, "
1971 .ascii "<SPACE> to continue or wait 30 secs"
1972 .byte 0x0d, 0x0a, 0
1973
1974listhdr: .byte 0x0d, 0x0a
1975 .ascii "Mode: COLSxROWS:"
1976
1977crlft: .byte 0x0d, 0x0a, 0
1978
1979prompt: .byte 0x0d, 0x0a
1980 .asciz "Enter mode number or `scan': "
1981
1982unknt: .asciz "Unknown mode ID. Try again."
1983
1984badmdt: .ascii "You passed an undefined mode number."
1985 .byte 0x0d, 0x0a, 0
1986
1987vesaer: .ascii "Error: Scanning of VESA modes failed. Please "
1988 .ascii "report to <mj@ucw.cz>."
1989 .byte 0x0d, 0x0a, 0
1990
1991old_name: .asciz "CGA/MDA/HGA"
1992
1993ega_name: .asciz "EGA"
1994
1995svga_name: .ascii " "
1996
1997vga_name: .asciz "VGA"
1998
1999vesa_name: .asciz "VESA"
2000
2001name_bann: .asciz "Video adapter: "
2002#endif /* CONFIG_VIDEO_SELECT */
2003
2004# Other variables:
2005adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
2006video_segment: .word 0xb800 # Video memory segment
2007force_size: .word 0 # Use this size instead of the one in BIOS vars
diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile
new file mode 100644
index 000000000000..103c353d0a63
--- /dev/null
+++ b/arch/i386/crypto/Makefile
@@ -0,0 +1,9 @@
1#
2# i386/crypto/Makefile
3#
4# Arch-specific CryptoAPI modules.
5#
6
7obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
8
9aes-i586-y := aes-i586-asm.o aes.o
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S
new file mode 100644
index 000000000000..7b73c67cb4e8
--- /dev/null
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -0,0 +1,376 @@
1// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10// 1. distributions of this source code include the above copyright
11// notice, this list of conditions and the following disclaimer//
12//
13// 2. distributions in binary form include the above copyright
14// notice, this list of conditions and the following disclaimer
15// in the documentation and/or other associated materials//
16//
17// 3. the copyright holder's name is not used to endorse products
18// built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
39// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
40// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
41
42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
43
44// offsets to parameters with one register pushed onto stack
45
46#define in_blk 8 // input byte array address parameter
47#define out_blk 12 // output byte array address parameter
48#define ctx 16 // AES context structure
49
50// offsets in context structure
51
52#define ekey 0 // encryption key schedule base address
53#define nrnd 256 // number of rounds
54#define dkey 260 // decryption key schedule base address
55
56// register mapping for encrypt and decrypt subroutines
57
58#define r0 eax
59#define r1 ebx
60#define r2 ecx
61#define r3 edx
62#define r4 esi
63#define r5 edi
64
65#define eaxl al
66#define eaxh ah
67#define ebxl bl
68#define ebxh bh
69#define ecxl cl
70#define ecxh ch
71#define edxl dl
72#define edxh dh
73
74#define _h(reg) reg##h
75#define h(reg) _h(reg)
76
77#define _l(reg) reg##l
78#define l(reg) _l(reg)
79
80// This macro takes a 32-bit word representing a column and uses
81// each of its four bytes to index into four tables of 256 32-bit
82// words to obtain values that are then xored into the appropriate
83// output registers r0, r1, r4 or r5.
84
85// Parameters:
86// table table base address
87// %1 out_state[0]
88// %2 out_state[1]
89// %3 out_state[2]
90// %4 out_state[3]
91// idx input register for the round (destroyed)
92// tmp scratch register for the round
93// sched key schedule
94
95#define do_col(table, a1,a2,a3,a4, idx, tmp) \
96 movzx %l(idx),%tmp; \
97 xor table(,%tmp,4),%a1; \
98 movzx %h(idx),%tmp; \
99 shr $16,%idx; \
100 xor table+tlen(,%tmp,4),%a2; \
101 movzx %l(idx),%tmp; \
102 movzx %h(idx),%idx; \
103 xor table+2*tlen(,%tmp,4),%a3; \
104 xor table+3*tlen(,%idx,4),%a4;
105
106// initialise output registers from the key schedule
107// NB1: original value of a3 is in idx on exit
108// NB2: original values of a1,a2,a4 aren't used
109#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
110 mov 0 sched,%a1; \
111 movzx %l(idx),%tmp; \
112 mov 12 sched,%a2; \
113 xor table(,%tmp,4),%a1; \
114 mov 4 sched,%a4; \
115 movzx %h(idx),%tmp; \
116 shr $16,%idx; \
117 xor table+tlen(,%tmp,4),%a2; \
118 movzx %l(idx),%tmp; \
119 movzx %h(idx),%idx; \
120 xor table+3*tlen(,%idx,4),%a4; \
121 mov %a3,%idx; \
122 mov 8 sched,%a3; \
123 xor table+2*tlen(,%tmp,4),%a3;
124
125// initialise output registers from the key schedule
126// NB1: original value of a3 is in idx on exit
127// NB2: original values of a1,a2,a4 aren't used
128#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
129 mov 0 sched,%a1; \
130 movzx %l(idx),%tmp; \
131 mov 4 sched,%a2; \
132 xor table(,%tmp,4),%a1; \
133 mov 12 sched,%a4; \
134 movzx %h(idx),%tmp; \
135 shr $16,%idx; \
136 xor table+tlen(,%tmp,4),%a2; \
137 movzx %l(idx),%tmp; \
138 movzx %h(idx),%idx; \
139 xor table+3*tlen(,%idx,4),%a4; \
140 mov %a3,%idx; \
141 mov 8 sched,%a3; \
142 xor table+2*tlen(,%tmp,4),%a3;
143
144
145// original Gladman had conditional saves to MMX regs.
146#define save(a1, a2) \
147 mov %a2,4*a1(%esp)
148
149#define restore(a1, a2) \
150 mov 4*a2(%esp),%a1
151
152// These macros perform a forward encryption cycle. They are entered with
153// the first previous round column values in r0,r1,r4,r5 and
154// exit with the final values in the same registers, using stack
155// for temporary storage.
156
157// round column values
158// on entry: r0,r1,r4,r5
159// on exit: r2,r1,r4,r5
160#define fwd_rnd1(arg, table) \
161 save (0,r1); \
162 save (1,r5); \
163 \
164 /* compute new column values */ \
165 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
166 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
167 restore(r0,0); \
168 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
169 restore(r0,1); \
170 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
171
172// round column values
173// on entry: r2,r1,r4,r5
174// on exit: r0,r1,r4,r5
175#define fwd_rnd2(arg, table) \
176 save (0,r1); \
177 save (1,r5); \
178 \
179 /* compute new column values */ \
180 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
181 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
182 restore(r2,0); \
183 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
184 restore(r2,1); \
185 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
186
187// These macros performs an inverse encryption cycle. They are entered with
188// the first previous round column values in r0,r1,r4,r5 and
189// exit with the final values in the same registers, using stack
190// for temporary storage
191
192// round column values
193// on entry: r0,r1,r4,r5
194// on exit: r2,r1,r4,r5
195#define inv_rnd1(arg, table) \
196 save (0,r1); \
197 save (1,r5); \
198 \
199 /* compute new column values */ \
200 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
201 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
202 restore(r0,0); \
203 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
204 restore(r0,1); \
205 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
206
207// round column values
208// on entry: r2,r1,r4,r5
209// on exit: r0,r1,r4,r5
210#define inv_rnd2(arg, table) \
211 save (0,r1); \
212 save (1,r5); \
213 \
214 /* compute new column values */ \
215 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
216 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
217 restore(r2,0); \
218 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
219 restore(r2,1); \
220 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
221
222// AES (Rijndael) Encryption Subroutine
223
224.global aes_enc_blk
225
226.extern ft_tab
227.extern fl_tab
228
229.align 4
230
231aes_enc_blk:
232 push %ebp
233 mov ctx(%esp),%ebp // pointer to context
234
235// CAUTION: the order and the values used in these assigns
236// rely on the register mappings
237
2381: push %ebx
239 mov in_blk+4(%esp),%r2
240 push %esi
241 mov nrnd(%ebp),%r3 // number of rounds
242 push %edi
243#if ekey != 0
244 lea ekey(%ebp),%ebp // key pointer
245#endif
246
247// input four columns and xor in first round key
248
249 mov (%r2),%r0
250 mov 4(%r2),%r1
251 mov 8(%r2),%r4
252 mov 12(%r2),%r5
253 xor (%ebp),%r0
254 xor 4(%ebp),%r1
255 xor 8(%ebp),%r4
256 xor 12(%ebp),%r5
257
258 sub $8,%esp // space for register saves on stack
259 add $16,%ebp // increment to next round key
260 sub $10,%r3
261 je 4f // 10 rounds for 128-bit key
262 add $32,%ebp
263 sub $2,%r3
264 je 3f // 12 rounds for 128-bit key
265 add $32,%ebp
266
2672: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 128-bit key
268 fwd_rnd2( -48(%ebp) ,ft_tab)
2693: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 128-bit key
270 fwd_rnd2( -16(%ebp) ,ft_tab)
2714: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
272 fwd_rnd2( +16(%ebp) ,ft_tab)
273 fwd_rnd1( +32(%ebp) ,ft_tab)
274 fwd_rnd2( +48(%ebp) ,ft_tab)
275 fwd_rnd1( +64(%ebp) ,ft_tab)
276 fwd_rnd2( +80(%ebp) ,ft_tab)
277 fwd_rnd1( +96(%ebp) ,ft_tab)
278 fwd_rnd2(+112(%ebp) ,ft_tab)
279 fwd_rnd1(+128(%ebp) ,ft_tab)
280 fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
281
282// move final values to the output array. CAUTION: the
283// order of these assigns rely on the register mappings
284
285 add $8,%esp
286 mov out_blk+12(%esp),%ebp
287 mov %r5,12(%ebp)
288 pop %edi
289 mov %r4,8(%ebp)
290 pop %esi
291 mov %r1,4(%ebp)
292 pop %ebx
293 mov %r0,(%ebp)
294 pop %ebp
295 mov $1,%eax
296 ret
297
298// AES (Rijndael) Decryption Subroutine
299
300.global aes_dec_blk
301
302.extern it_tab
303.extern il_tab
304
305.align 4
306
307aes_dec_blk:
308 push %ebp
309 mov ctx(%esp),%ebp // pointer to context
310
311// CAUTION: the order and the values used in these assigns
312// rely on the register mappings
313
3141: push %ebx
315 mov in_blk+4(%esp),%r2
316 push %esi
317 mov nrnd(%ebp),%r3 // number of rounds
318 push %edi
319#if dkey != 0
320 lea dkey(%ebp),%ebp // key pointer
321#endif
322 mov %r3,%r0
323 shl $4,%r0
324 add %r0,%ebp
325
326// input four columns and xor in first round key
327
328 mov (%r2),%r0
329 mov 4(%r2),%r1
330 mov 8(%r2),%r4
331 mov 12(%r2),%r5
332 xor (%ebp),%r0
333 xor 4(%ebp),%r1
334 xor 8(%ebp),%r4
335 xor 12(%ebp),%r5
336
337 sub $8,%esp // space for register saves on stack
338 sub $16,%ebp // increment to next round key
339 sub $10,%r3
340 je 4f // 10 rounds for 128-bit key
341 sub $32,%ebp
342 sub $2,%r3
343 je 3f // 12 rounds for 128-bit key
344 sub $32,%ebp
345
3462: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 128-bit key
347 inv_rnd2( +48(%ebp), it_tab)
3483: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 128-bit key
349 inv_rnd2( +16(%ebp), it_tab)
3504: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
351 inv_rnd2( -16(%ebp), it_tab)
352 inv_rnd1( -32(%ebp), it_tab)
353 inv_rnd2( -48(%ebp), it_tab)
354 inv_rnd1( -64(%ebp), it_tab)
355 inv_rnd2( -80(%ebp), it_tab)
356 inv_rnd1( -96(%ebp), it_tab)
357 inv_rnd2(-112(%ebp), it_tab)
358 inv_rnd1(-128(%ebp), it_tab)
359 inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
360
361// move final values to the output array. CAUTION: the
362// order of these assigns rely on the register mappings
363
364 add $8,%esp
365 mov out_blk+12(%esp),%ebp
366 mov %r5,12(%ebp)
367 pop %edi
368 mov %r4,8(%ebp)
369 pop %esi
370 mov %r1,4(%ebp)
371 pop %ebx
372 mov %r0,(%ebp)
373 pop %ebp
374 mov $1,%eax
375 ret
376
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
new file mode 100644
index 000000000000..1019430fc1f1
--- /dev/null
+++ b/arch/i386/crypto/aes.c
@@ -0,0 +1,520 @@
1/*
2 *
3 * Glue Code for optimized 586 assembler version of AES
4 *
5 * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
6 * All rights reserved.
7 *
8 * LICENSE TERMS
9 *
10 * The free distribution and use of this software in both source and binary
11 * form is allowed (with or without changes) provided that:
12 *
13 * 1. distributions of this source code include the above copyright
14 * notice, this list of conditions and the following disclaimer;
15 *
16 * 2. distributions in binary form include the above copyright
17 * notice, this list of conditions and the following disclaimer
18 * in the documentation and/or other associated materials;
19 *
20 * 3. the copyright holder's name is not used to endorse products
21 * built using this software without specific written permission.
22 *
23 * ALTERNATIVELY, provided that this notice is retained in full, this product
24 * may be distributed under the terms of the GNU General Public License (GPL),
25 * in which case the provisions of the GPL apply INSTEAD OF those given above.
26 *
27 * DISCLAIMER
28 *
29 * This software is provided 'as is' with no explicit or implied warranties
30 * in respect of its properties, including, but not limited to, correctness
31 * and/or fitness for purpose.
32 *
33 * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
34 * 2.5 API).
35 * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
36 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
37 *
38 */
39#include <linux/kernel.h>
40#include <linux/module.h>
41#include <linux/init.h>
42#include <linux/types.h>
43#include <linux/crypto.h>
44#include <linux/linkage.h>
45
46asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx);
47asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx);
48
49#define AES_MIN_KEY_SIZE 16
50#define AES_MAX_KEY_SIZE 32
51#define AES_BLOCK_SIZE 16
52#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
53#define RC_LENGTH 29
54
55struct aes_ctx {
56 u32 ekey[AES_KS_LENGTH];
57 u32 rounds;
58 u32 dkey[AES_KS_LENGTH];
59};
60
61#define WPOLY 0x011b
62#define u32_in(x) le32_to_cpu(*(const u32 *)(x))
63#define bytes2word(b0, b1, b2, b3) \
64 (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
65
66/* define the finite field multiplies required for Rijndael */
67#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
68#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
69#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
70#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
71#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
72#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
73#define fi(x) ((x) ? pow[255 - log[x]]: 0)
74
75static inline u32 upr(u32 x, int n)
76{
77 return (x << 8 * n) | (x >> (32 - 8 * n));
78}
79
80static inline u8 bval(u32 x, int n)
81{
82 return x >> 8 * n;
83}
84
85/* The forward and inverse affine transformations used in the S-box */
86#define fwd_affine(x) \
87 (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
88
89#define inv_affine(x) \
90 (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
91
92static u32 rcon_tab[RC_LENGTH];
93
94u32 ft_tab[4][256];
95u32 fl_tab[4][256];
96static u32 ls_tab[4][256];
97static u32 im_tab[4][256];
98u32 il_tab[4][256];
99u32 it_tab[4][256];
100
101static void gen_tabs(void)
102{
103 u32 i, w;
104 u8 pow[512], log[256];
105
106 /*
107 * log and power tables for GF(2^8) finite field with
108 * WPOLY as modular polynomial - the simplest primitive
109 * root is 0x03, used here to generate the tables.
110 */
111 i = 0; w = 1;
112
113 do {
114 pow[i] = (u8)w;
115 pow[i + 255] = (u8)w;
116 log[w] = (u8)i++;
117 w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0);
118 } while (w != 1);
119
120 for(i = 0, w = 1; i < RC_LENGTH; ++i) {
121 rcon_tab[i] = bytes2word(w, 0, 0, 0);
122 w = f2(w);
123 }
124
125 for(i = 0; i < 256; ++i) {
126 u8 b;
127
128 b = fwd_affine(fi((u8)i));
129 w = bytes2word(f2(b), b, b, f3(b));
130
131 /* tables for a normal encryption round */
132 ft_tab[0][i] = w;
133 ft_tab[1][i] = upr(w, 1);
134 ft_tab[2][i] = upr(w, 2);
135 ft_tab[3][i] = upr(w, 3);
136 w = bytes2word(b, 0, 0, 0);
137
138 /*
139 * tables for last encryption round
140 * (may also be used in the key schedule)
141 */
142 fl_tab[0][i] = w;
143 fl_tab[1][i] = upr(w, 1);
144 fl_tab[2][i] = upr(w, 2);
145 fl_tab[3][i] = upr(w, 3);
146
147 /*
148 * table for key schedule if fl_tab above is
149 * not of the required form
150 */
151 ls_tab[0][i] = w;
152 ls_tab[1][i] = upr(w, 1);
153 ls_tab[2][i] = upr(w, 2);
154 ls_tab[3][i] = upr(w, 3);
155
156 b = fi(inv_affine((u8)i));
157 w = bytes2word(fe(b), f9(b), fd(b), fb(b));
158
159 /* tables for the inverse mix column operation */
160 im_tab[0][b] = w;
161 im_tab[1][b] = upr(w, 1);
162 im_tab[2][b] = upr(w, 2);
163 im_tab[3][b] = upr(w, 3);
164
165 /* tables for a normal decryption round */
166 it_tab[0][i] = w;
167 it_tab[1][i] = upr(w,1);
168 it_tab[2][i] = upr(w,2);
169 it_tab[3][i] = upr(w,3);
170
171 w = bytes2word(b, 0, 0, 0);
172
173 /* tables for last decryption round */
174 il_tab[0][i] = w;
175 il_tab[1][i] = upr(w,1);
176 il_tab[2][i] = upr(w,2);
177 il_tab[3][i] = upr(w,3);
178 }
179}
180
181#define four_tables(x,tab,vf,rf,c) \
182( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \
183 tab[1][bval(vf(x,1,c),rf(1,c))] ^ \
184 tab[2][bval(vf(x,2,c),rf(2,c))] ^ \
185 tab[3][bval(vf(x,3,c),rf(3,c))] \
186)
187
188#define vf1(x,r,c) (x)
189#define rf1(r,c) (r)
190#define rf2(r,c) ((r-c)&3)
191
192#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
193#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
194
195#define ff(x) inv_mcol(x)
196
197#define ke4(k,i) \
198{ \
199 k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
200 k[4*(i)+5] = ss[1] ^= ss[0]; \
201 k[4*(i)+6] = ss[2] ^= ss[1]; \
202 k[4*(i)+7] = ss[3] ^= ss[2]; \
203}
204
205#define kel4(k,i) \
206{ \
207 k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
208 k[4*(i)+5] = ss[1] ^= ss[0]; \
209 k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
210}
211
212#define ke6(k,i) \
213{ \
214 k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
215 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
216 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
217 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
218 k[6*(i)+10] = ss[4] ^= ss[3]; \
219 k[6*(i)+11] = ss[5] ^= ss[4]; \
220}
221
222#define kel6(k,i) \
223{ \
224 k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
225 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
226 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
227 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
228}
229
230#define ke8(k,i) \
231{ \
232 k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
233 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
234 k[8*(i)+10] = ss[2] ^= ss[1]; \
235 k[8*(i)+11] = ss[3] ^= ss[2]; \
236 k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
237 k[8*(i)+13] = ss[5] ^= ss[4]; \
238 k[8*(i)+14] = ss[6] ^= ss[5]; \
239 k[8*(i)+15] = ss[7] ^= ss[6]; \
240}
241
242#define kel8(k,i) \
243{ \
244 k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
245 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
246 k[8*(i)+10] = ss[2] ^= ss[1]; \
247 k[8*(i)+11] = ss[3] ^= ss[2]; \
248}
249
250#define kdf4(k,i) \
251{ \
252 ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
253 ss[1] = ss[1] ^ ss[3]; \
254 ss[2] = ss[2] ^ ss[3]; \
255 ss[3] = ss[3]; \
256 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
257 ss[i % 4] ^= ss[4]; \
258 ss[4] ^= k[4*(i)]; \
259 k[4*(i)+4] = ff(ss[4]); \
260 ss[4] ^= k[4*(i)+1]; \
261 k[4*(i)+5] = ff(ss[4]); \
262 ss[4] ^= k[4*(i)+2]; \
263 k[4*(i)+6] = ff(ss[4]); \
264 ss[4] ^= k[4*(i)+3]; \
265 k[4*(i)+7] = ff(ss[4]); \
266}
267
268#define kd4(k,i) \
269{ \
270 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
271 ss[i % 4] ^= ss[4]; \
272 ss[4] = ff(ss[4]); \
273 k[4*(i)+4] = ss[4] ^= k[4*(i)]; \
274 k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
275 k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \
276 k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
277}
278
279#define kdl4(k,i) \
280{ \
281 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
282 ss[i % 4] ^= ss[4]; \
283 k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
284 k[4*(i)+5] = ss[1] ^ ss[3]; \
285 k[4*(i)+6] = ss[0]; \
286 k[4*(i)+7] = ss[1]; \
287}
288
289#define kdf6(k,i) \
290{ \
291 ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
292 k[6*(i)+ 6] = ff(ss[0]); \
293 ss[1] ^= ss[0]; \
294 k[6*(i)+ 7] = ff(ss[1]); \
295 ss[2] ^= ss[1]; \
296 k[6*(i)+ 8] = ff(ss[2]); \
297 ss[3] ^= ss[2]; \
298 k[6*(i)+ 9] = ff(ss[3]); \
299 ss[4] ^= ss[3]; \
300 k[6*(i)+10] = ff(ss[4]); \
301 ss[5] ^= ss[4]; \
302 k[6*(i)+11] = ff(ss[5]); \
303}
304
305#define kd6(k,i) \
306{ \
307 ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
308 ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
309 k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
310 ss[1] ^= ss[0]; \
311 k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
312 ss[2] ^= ss[1]; \
313 k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
314 ss[3] ^= ss[2]; \
315 k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
316 ss[4] ^= ss[3]; \
317 k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
318 ss[5] ^= ss[4]; \
319 k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
320}
321
322#define kdl6(k,i) \
323{ \
324 ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
325 k[6*(i)+ 6] = ss[0]; \
326 ss[1] ^= ss[0]; \
327 k[6*(i)+ 7] = ss[1]; \
328 ss[2] ^= ss[1]; \
329 k[6*(i)+ 8] = ss[2]; \
330 ss[3] ^= ss[2]; \
331 k[6*(i)+ 9] = ss[3]; \
332}
333
334#define kdf8(k,i) \
335{ \
336 ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
337 k[8*(i)+ 8] = ff(ss[0]); \
338 ss[1] ^= ss[0]; \
339 k[8*(i)+ 9] = ff(ss[1]); \
340 ss[2] ^= ss[1]; \
341 k[8*(i)+10] = ff(ss[2]); \
342 ss[3] ^= ss[2]; \
343 k[8*(i)+11] = ff(ss[3]); \
344 ss[4] ^= ls_box(ss[3],0); \
345 k[8*(i)+12] = ff(ss[4]); \
346 ss[5] ^= ss[4]; \
347 k[8*(i)+13] = ff(ss[5]); \
348 ss[6] ^= ss[5]; \
349 k[8*(i)+14] = ff(ss[6]); \
350 ss[7] ^= ss[6]; \
351 k[8*(i)+15] = ff(ss[7]); \
352}
353
354#define kd8(k,i) \
355{ \
356 u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \
357 ss[0] ^= __g; \
358 __g = ff(__g); \
359 k[8*(i)+ 8] = __g ^= k[8*(i)]; \
360 ss[1] ^= ss[0]; \
361 k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \
362 ss[2] ^= ss[1]; \
363 k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \
364 ss[3] ^= ss[2]; \
365 k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \
366 __g = ls_box(ss[3],0); \
367 ss[4] ^= __g; \
368 __g = ff(__g); \
369 k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \
370 ss[5] ^= ss[4]; \
371 k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \
372 ss[6] ^= ss[5]; \
373 k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \
374 ss[7] ^= ss[6]; \
375 k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \
376}
377
378#define kdl8(k,i) \
379{ \
380 ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
381 k[8*(i)+ 8] = ss[0]; \
382 ss[1] ^= ss[0]; \
383 k[8*(i)+ 9] = ss[1]; \
384 ss[2] ^= ss[1]; \
385 k[8*(i)+10] = ss[2]; \
386 ss[3] ^= ss[2]; \
387 k[8*(i)+11] = ss[3]; \
388}
389
390static int
391aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
392{
393 int i;
394 u32 ss[8];
395 struct aes_ctx *ctx = ctx_arg;
396
397 /* encryption schedule */
398
399 ctx->ekey[0] = ss[0] = u32_in(in_key);
400 ctx->ekey[1] = ss[1] = u32_in(in_key + 4);
401 ctx->ekey[2] = ss[2] = u32_in(in_key + 8);
402 ctx->ekey[3] = ss[3] = u32_in(in_key + 12);
403
404 switch(key_len) {
405 case 16:
406 for (i = 0; i < 9; i++)
407 ke4(ctx->ekey, i);
408 kel4(ctx->ekey, 9);
409 ctx->rounds = 10;
410 break;
411
412 case 24:
413 ctx->ekey[4] = ss[4] = u32_in(in_key + 16);
414 ctx->ekey[5] = ss[5] = u32_in(in_key + 20);
415 for (i = 0; i < 7; i++)
416 ke6(ctx->ekey, i);
417 kel6(ctx->ekey, 7);
418 ctx->rounds = 12;
419 break;
420
421 case 32:
422 ctx->ekey[4] = ss[4] = u32_in(in_key + 16);
423 ctx->ekey[5] = ss[5] = u32_in(in_key + 20);
424 ctx->ekey[6] = ss[6] = u32_in(in_key + 24);
425 ctx->ekey[7] = ss[7] = u32_in(in_key + 28);
426 for (i = 0; i < 6; i++)
427 ke8(ctx->ekey, i);
428 kel8(ctx->ekey, 6);
429 ctx->rounds = 14;
430 break;
431
432 default:
433 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
434 return -EINVAL;
435 }
436
437 /* decryption schedule */
438
439 ctx->dkey[0] = ss[0] = u32_in(in_key);
440 ctx->dkey[1] = ss[1] = u32_in(in_key + 4);
441 ctx->dkey[2] = ss[2] = u32_in(in_key + 8);
442 ctx->dkey[3] = ss[3] = u32_in(in_key + 12);
443
444 switch (key_len) {
445 case 16:
446 kdf4(ctx->dkey, 0);
447 for (i = 1; i < 9; i++)
448 kd4(ctx->dkey, i);
449 kdl4(ctx->dkey, 9);
450 break;
451
452 case 24:
453 ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16));
454 ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20));
455 kdf6(ctx->dkey, 0);
456 for (i = 1; i < 7; i++)
457 kd6(ctx->dkey, i);
458 kdl6(ctx->dkey, 7);
459 break;
460
461 case 32:
462 ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16));
463 ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20));
464 ctx->dkey[6] = ff(ss[6] = u32_in(in_key + 24));
465 ctx->dkey[7] = ff(ss[7] = u32_in(in_key + 28));
466 kdf8(ctx->dkey, 0);
467 for (i = 1; i < 6; i++)
468 kd8(ctx->dkey, i);
469 kdl8(ctx->dkey, 6);
470 break;
471 }
472 return 0;
473}
474
475static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src)
476{
477 aes_enc_blk(src, dst, ctx);
478}
479static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src)
480{
481 aes_dec_blk(src, dst, ctx);
482}
483
484
485static struct crypto_alg aes_alg = {
486 .cra_name = "aes",
487 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
488 .cra_blocksize = AES_BLOCK_SIZE,
489 .cra_ctxsize = sizeof(struct aes_ctx),
490 .cra_module = THIS_MODULE,
491 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
492 .cra_u = {
493 .cipher = {
494 .cia_min_keysize = AES_MIN_KEY_SIZE,
495 .cia_max_keysize = AES_MAX_KEY_SIZE,
496 .cia_setkey = aes_set_key,
497 .cia_encrypt = aes_encrypt,
498 .cia_decrypt = aes_decrypt
499 }
500 }
501};
502
503static int __init aes_init(void)
504{
505 gen_tabs();
506 return crypto_register_alg(&aes_alg);
507}
508
509static void __exit aes_fini(void)
510{
511 crypto_unregister_alg(&aes_alg);
512}
513
514module_init(aes_init);
515module_exit(aes_fini);
516
517MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
518MODULE_LICENSE("Dual BSD/GPL");
519MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
520MODULE_ALIAS("aes");
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
new file mode 100644
index 000000000000..28e620383799
--- /dev/null
+++ b/arch/i386/defconfig
@@ -0,0 +1,1247 @@
1#
2# Automatically generated make config: don't edit
3#
4CONFIG_X86=y
5CONFIG_MMU=y
6CONFIG_UID16=y
7CONFIG_GENERIC_ISA_DMA=y
8
9#
10# Code maturity level options
11#
12CONFIG_EXPERIMENTAL=y
13CONFIG_CLEAN_COMPILE=y
14CONFIG_STANDALONE=y
15
16#
17# General setup
18#
19CONFIG_SWAP=y
20CONFIG_SYSVIPC=y
21CONFIG_POSIX_MQUEUE=y
22# CONFIG_BSD_PROCESS_ACCT is not set
23CONFIG_SYSCTL=y
24CONFIG_AUDIT=y
25CONFIG_AUDITSYSCALL=y
26CONFIG_LOG_BUF_SHIFT=15
27CONFIG_HOTPLUG=y
28# CONFIG_IKCONFIG is not set
29# CONFIG_EMBEDDED is not set
30CONFIG_KALLSYMS=y
31CONFIG_FUTEX=y
32CONFIG_EPOLL=y
33CONFIG_IOSCHED_NOOP=y
34CONFIG_IOSCHED_AS=y
35CONFIG_IOSCHED_DEADLINE=y
36CONFIG_IOSCHED_CFQ=y
37# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
38
39#
40# Loadable module support
41#
42CONFIG_MODULES=y
43# CONFIG_MODULE_UNLOAD is not set
44CONFIG_OBSOLETE_MODPARM=y
45# CONFIG_MODVERSIONS is not set
46CONFIG_KMOD=y
47
48#
49# Processor type and features
50#
51CONFIG_X86_PC=y
52# CONFIG_X86_ELAN is not set
53# CONFIG_X86_VOYAGER is not set
54# CONFIG_X86_NUMAQ is not set
55# CONFIG_X86_SUMMIT is not set
56# CONFIG_X86_BIGSMP is not set
57# CONFIG_X86_VISWS is not set
58# CONFIG_X86_GENERICARCH is not set
59# CONFIG_X86_ES7000 is not set
60# CONFIG_M386 is not set
61# CONFIG_M486 is not set
62# CONFIG_M586 is not set
63# CONFIG_M586TSC is not set
64# CONFIG_M586MMX is not set
65# CONFIG_M686 is not set
66# CONFIG_MPENTIUMII is not set
67# CONFIG_MPENTIUMIII is not set
68# CONFIG_MPENTIUMM is not set
69CONFIG_MPENTIUM4=y
70# CONFIG_MK6 is not set
71# CONFIG_MK7 is not set
72# CONFIG_MK8 is not set
73# CONFIG_MCRUSOE is not set
74# CONFIG_MEFFICEON is not set
75# CONFIG_MWINCHIPC6 is not set
76# CONFIG_MWINCHIP2 is not set
77# CONFIG_MWINCHIP3D is not set
78# CONFIG_MCYRIXIII is not set
79# CONFIG_MVIAC3_2 is not set
80# CONFIG_X86_GENERIC is not set
81CONFIG_X86_CMPXCHG=y
82CONFIG_X86_XADD=y
83CONFIG_X86_L1_CACHE_SHIFT=7
84CONFIG_RWSEM_XCHGADD_ALGORITHM=y
85CONFIG_X86_WP_WORKS_OK=y
86CONFIG_X86_INVLPG=y
87CONFIG_X86_BSWAP=y
88CONFIG_X86_POPAD_OK=y
89CONFIG_X86_GOOD_APIC=y
90CONFIG_X86_INTEL_USERCOPY=y
91CONFIG_X86_USE_PPRO_CHECKSUM=y
92# CONFIG_HPET_TIMER is not set
93# CONFIG_HPET_EMULATE_RTC is not set
94CONFIG_SMP=y
95CONFIG_NR_CPUS=8
96CONFIG_SCHED_SMT=y
97CONFIG_PREEMPT=y
98CONFIG_X86_LOCAL_APIC=y
99CONFIG_X86_IO_APIC=y
100CONFIG_X86_TSC=y
101CONFIG_X86_MCE=y
102CONFIG_X86_MCE_NONFATAL=y
103CONFIG_X86_MCE_P4THERMAL=y
104# CONFIG_TOSHIBA is not set
105# CONFIG_I8K is not set
106# CONFIG_MICROCODE is not set
107# CONFIG_X86_MSR is not set
108# CONFIG_X86_CPUID is not set
109
110#
111# Firmware Drivers
112#
113# CONFIG_EDD is not set
114CONFIG_NOHIGHMEM=y
115# CONFIG_HIGHMEM4G is not set
116# CONFIG_HIGHMEM64G is not set
117# CONFIG_MATH_EMULATION is not set
118CONFIG_MTRR=y
119# CONFIG_EFI is not set
120CONFIG_IRQBALANCE=y
121CONFIG_HAVE_DEC_LOCK=y
122# CONFIG_REGPARM is not set
123
124#
125# Power management options (ACPI, APM)
126#
127CONFIG_PM=y
128CONFIG_SOFTWARE_SUSPEND=y
129# CONFIG_PM_DISK is not set
130
131#
132# ACPI (Advanced Configuration and Power Interface) Support
133#
134CONFIG_ACPI=y
135CONFIG_ACPI_BOOT=y
136CONFIG_ACPI_INTERPRETER=y
137CONFIG_ACPI_SLEEP=y
138CONFIG_ACPI_SLEEP_PROC_FS=y
139CONFIG_ACPI_AC=y
140CONFIG_ACPI_BATTERY=y
141CONFIG_ACPI_BUTTON=y
142CONFIG_ACPI_FAN=y
143CONFIG_ACPI_PROCESSOR=y
144CONFIG_ACPI_THERMAL=y
145# CONFIG_ACPI_ASUS is not set
146# CONFIG_ACPI_TOSHIBA is not set
147# CONFIG_ACPI_DEBUG is not set
148CONFIG_ACPI_BUS=y
149CONFIG_ACPI_EC=y
150CONFIG_ACPI_POWER=y
151CONFIG_ACPI_PCI=y
152CONFIG_ACPI_SYSTEM=y
153# CONFIG_X86_PM_TIMER is not set
154
155#
156# APM (Advanced Power Management) BIOS Support
157#
158# CONFIG_APM is not set
159
160#
161# CPU Frequency scaling
162#
163# CONFIG_CPU_FREQ is not set
164
165#
166# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
167#
168CONFIG_PCI=y
169# CONFIG_PCI_GOBIOS is not set
170# CONFIG_PCI_GOMMCONFIG is not set
171# CONFIG_PCI_GODIRECT is not set
172CONFIG_PCI_GOANY=y
173CONFIG_PCI_BIOS=y
174CONFIG_PCI_DIRECT=y
175CONFIG_PCI_MMCONFIG=y
176# CONFIG_PCI_USE_VECTOR is not set
177CONFIG_PCI_LEGACY_PROC=y
178CONFIG_PCI_NAMES=y
179CONFIG_ISA=y
180# CONFIG_EISA is not set
181# CONFIG_MCA is not set
182# CONFIG_SCx200 is not set
183
184#
185# PCMCIA/CardBus support
186#
187# CONFIG_PCMCIA is not set
188CONFIG_PCMCIA_PROBE=y
189
190#
191# PCI Hotplug Support
192#
193# CONFIG_HOTPLUG_PCI is not set
194
195#
196# Executable file formats
197#
198CONFIG_BINFMT_ELF=y
199CONFIG_BINFMT_AOUT=y
200CONFIG_BINFMT_MISC=y
201
202#
203# Device Drivers
204#
205
206#
207# Generic Driver Options
208#
209CONFIG_FW_LOADER=m
210
211#
212# Memory Technology Devices (MTD)
213#
214# CONFIG_MTD is not set
215
216#
217# Parallel port support
218#
219CONFIG_PARPORT=y
220CONFIG_PARPORT_PC=y
221CONFIG_PARPORT_PC_CML1=y
222# CONFIG_PARPORT_SERIAL is not set
223# CONFIG_PARPORT_PC_FIFO is not set
224# CONFIG_PARPORT_PC_SUPERIO is not set
225# CONFIG_PARPORT_OTHER is not set
226# CONFIG_PARPORT_1284 is not set
227
228#
229# Plug and Play support
230#
231CONFIG_PNP=y
232# CONFIG_PNP_DEBUG is not set
233
234#
235# Protocols
236#
237# CONFIG_ISAPNP is not set
238# CONFIG_PNPBIOS is not set
239
240#
241# Block devices
242#
243CONFIG_BLK_DEV_FD=y
244# CONFIG_BLK_DEV_XD is not set
245# CONFIG_PARIDE is not set
246# CONFIG_BLK_CPQ_DA is not set
247# CONFIG_BLK_CPQ_CISS_DA is not set
248# CONFIG_BLK_DEV_DAC960 is not set
249# CONFIG_BLK_DEV_UMEM is not set
250# CONFIG_BLK_DEV_LOOP is not set
251# CONFIG_BLK_DEV_NBD is not set
252# CONFIG_BLK_DEV_CARMEL is not set
253# CONFIG_BLK_DEV_RAM is not set
254CONFIG_LBD=y
255
256#
257# ATA/ATAPI/MFM/RLL support
258#
259CONFIG_IDE=y
260CONFIG_BLK_DEV_IDE=y
261
262#
263# Please see Documentation/ide.txt for help/info on IDE drives
264#
265# CONFIG_BLK_DEV_HD_IDE is not set
266CONFIG_BLK_DEV_IDEDISK=y
267CONFIG_IDEDISK_MULTI_MODE=y
268CONFIG_BLK_DEV_IDECD=y
269# CONFIG_BLK_DEV_IDETAPE is not set
270# CONFIG_BLK_DEV_IDEFLOPPY is not set
271# CONFIG_BLK_DEV_IDESCSI is not set
272# CONFIG_IDE_TASK_IOCTL is not set
273CONFIG_IDE_TASKFILE_IO=y
274
275#
276# IDE chipset support/bugfixes
277#
278CONFIG_IDE_GENERIC=y
279CONFIG_BLK_DEV_CMD640=y
280# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
281# CONFIG_BLK_DEV_IDEPNP is not set
282CONFIG_BLK_DEV_IDEPCI=y
283CONFIG_IDEPCI_SHARE_IRQ=y
284# CONFIG_BLK_DEV_OFFBOARD is not set
285CONFIG_BLK_DEV_GENERIC=y
286# CONFIG_BLK_DEV_OPTI621 is not set
287CONFIG_BLK_DEV_RZ1000=y
288CONFIG_BLK_DEV_IDEDMA_PCI=y
289# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
290CONFIG_IDEDMA_PCI_AUTO=y
291# CONFIG_IDEDMA_ONLYDISK is not set
292CONFIG_BLK_DEV_ADMA=y
293# CONFIG_BLK_DEV_AEC62XX is not set
294# CONFIG_BLK_DEV_ALI15X3 is not set
295# CONFIG_BLK_DEV_AMD74XX is not set
296# CONFIG_BLK_DEV_ATIIXP is not set
297# CONFIG_BLK_DEV_CMD64X is not set
298# CONFIG_BLK_DEV_TRIFLEX is not set
299# CONFIG_BLK_DEV_CY82C693 is not set
300# CONFIG_BLK_DEV_CS5520 is not set
301# CONFIG_BLK_DEV_CS5530 is not set
302# CONFIG_BLK_DEV_HPT34X is not set
303# CONFIG_BLK_DEV_HPT366 is not set
304# CONFIG_BLK_DEV_SC1200 is not set
305CONFIG_BLK_DEV_PIIX=y
306# CONFIG_BLK_DEV_NS87415 is not set
307# CONFIG_BLK_DEV_PDC202XX_OLD is not set
308# CONFIG_BLK_DEV_PDC202XX_NEW is not set
309# CONFIG_BLK_DEV_SVWKS is not set
310# CONFIG_BLK_DEV_SIIMAGE is not set
311# CONFIG_BLK_DEV_SIS5513 is not set
312# CONFIG_BLK_DEV_SLC90E66 is not set
313# CONFIG_BLK_DEV_TRM290 is not set
314# CONFIG_BLK_DEV_VIA82CXXX is not set
315# CONFIG_IDE_ARM is not set
316# CONFIG_IDE_CHIPSETS is not set
317CONFIG_BLK_DEV_IDEDMA=y
318# CONFIG_IDEDMA_IVB is not set
319CONFIG_IDEDMA_AUTO=y
320# CONFIG_BLK_DEV_HD is not set
321
322#
323# SCSI device support
324#
325CONFIG_SCSI=y
326CONFIG_SCSI_PROC_FS=y
327
328#
329# SCSI support type (disk, tape, CD-ROM)
330#
331CONFIG_BLK_DEV_SD=y
332# CONFIG_CHR_DEV_ST is not set
333# CONFIG_CHR_DEV_OSST is not set
334# CONFIG_BLK_DEV_SR is not set
335CONFIG_CHR_DEV_SG=y
336
337#
338# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
339#
340# CONFIG_SCSI_MULTI_LUN is not set
341# CONFIG_SCSI_CONSTANTS is not set
342# CONFIG_SCSI_LOGGING is not set
343
344#
345# SCSI Transport Attributes
346#
347# CONFIG_SCSI_SPI_ATTRS is not set
348# CONFIG_SCSI_FC_ATTRS is not set
349
350#
351# SCSI low-level drivers
352#
353# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
354# CONFIG_SCSI_7000FASST is not set
355# CONFIG_SCSI_ACARD is not set
356# CONFIG_SCSI_AHA152X is not set
357# CONFIG_SCSI_AHA1542 is not set
358# CONFIG_SCSI_AACRAID is not set
359# CONFIG_SCSI_AIC7XXX is not set
360# CONFIG_SCSI_AIC7XXX_OLD is not set
361# CONFIG_SCSI_AIC79XX is not set
362CONFIG_SCSI_DPT_I2O=m
363# CONFIG_SCSI_ADVANSYS is not set
364# CONFIG_SCSI_IN2000 is not set
365# CONFIG_SCSI_MEGARAID is not set
366CONFIG_SCSI_SATA=y
367# CONFIG_SCSI_SATA_SVW is not set
368CONFIG_SCSI_ATA_PIIX=y
369# CONFIG_SCSI_SATA_PROMISE is not set
370CONFIG_SCSI_SATA_SX4=m
371# CONFIG_SCSI_SATA_SIL is not set
372CONFIG_SCSI_SATA_SIS=m
373# CONFIG_SCSI_SATA_VIA is not set
374# CONFIG_SCSI_SATA_VITESSE is not set
375# CONFIG_SCSI_BUSLOGIC is not set
376# CONFIG_SCSI_CPQFCTS is not set
377# CONFIG_SCSI_DMX3191D is not set
378# CONFIG_SCSI_DTC3280 is not set
379# CONFIG_SCSI_EATA is not set
380# CONFIG_SCSI_EATA_PIO is not set
381# CONFIG_SCSI_FUTURE_DOMAIN is not set
382# CONFIG_SCSI_GDTH is not set
383# CONFIG_SCSI_GENERIC_NCR5380 is not set
384# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
385# CONFIG_SCSI_IPS is not set
386# CONFIG_SCSI_INIA100 is not set
387# CONFIG_SCSI_PPA is not set
388# CONFIG_SCSI_IMM is not set
389# CONFIG_SCSI_NCR53C406A is not set
390# CONFIG_SCSI_SYM53C8XX_2 is not set
391CONFIG_SCSI_IPR=m
392# CONFIG_SCSI_IPR_TRACE is not set
393# CONFIG_SCSI_IPR_DUMP is not set
394# CONFIG_SCSI_PAS16 is not set
395# CONFIG_SCSI_PSI240I is not set
396# CONFIG_SCSI_QLOGIC_FAS is not set
397# CONFIG_SCSI_QLOGIC_ISP is not set
398# CONFIG_SCSI_QLOGIC_FC is not set
399# CONFIG_SCSI_QLOGIC_1280 is not set
400CONFIG_SCSI_QLA2XXX=y
401# CONFIG_SCSI_QLA21XX is not set
402# CONFIG_SCSI_QLA22XX is not set
403# CONFIG_SCSI_QLA2300 is not set
404# CONFIG_SCSI_QLA2322 is not set
405# CONFIG_SCSI_QLA6312 is not set
406# CONFIG_SCSI_QLA6322 is not set
407# CONFIG_SCSI_SYM53C416 is not set
408# CONFIG_SCSI_DC395x is not set
409# CONFIG_SCSI_DC390T is not set
410# CONFIG_SCSI_T128 is not set
411# CONFIG_SCSI_U14_34F is not set
412# CONFIG_SCSI_ULTRASTOR is not set
413# CONFIG_SCSI_NSP32 is not set
414# CONFIG_SCSI_DEBUG is not set
415
416#
417# Old CD-ROM drivers (not SCSI, not IDE)
418#
419# CONFIG_CD_NO_IDESCSI is not set
420
421#
422# Multi-device support (RAID and LVM)
423#
424# CONFIG_MD is not set
425
426#
427# Fusion MPT device support
428#
429# CONFIG_FUSION is not set
430
431#
432# IEEE 1394 (FireWire) support
433#
434CONFIG_IEEE1394=y
435
436#
437# Subsystem Options
438#
439# CONFIG_IEEE1394_VERBOSEDEBUG is not set
440# CONFIG_IEEE1394_OUI_DB is not set
441# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
442
443#
444# Device Drivers
445#
446
447#
448# Texas Instruments PCILynx requires I2C
449#
450CONFIG_IEEE1394_OHCI1394=y
451
452#
453# Protocol Drivers
454#
455# CONFIG_IEEE1394_VIDEO1394 is not set
456# CONFIG_IEEE1394_SBP2 is not set
457# CONFIG_IEEE1394_ETH1394 is not set
458# CONFIG_IEEE1394_DV1394 is not set
459CONFIG_IEEE1394_RAWIO=y
460# CONFIG_IEEE1394_CMP is not set
461
462#
463# I2O device support
464#
465# CONFIG_I2O is not set
466
467#
468# Networking support
469#
470CONFIG_NET=y
471
472#
473# Networking options
474#
475CONFIG_PACKET=y
476# CONFIG_PACKET_MMAP is not set
477# CONFIG_NETLINK_DEV is not set
478CONFIG_UNIX=y
479# CONFIG_NET_KEY is not set
480CONFIG_INET=y
481CONFIG_IP_MULTICAST=y
482# CONFIG_IP_ADVANCED_ROUTER is not set
483# CONFIG_IP_PNP is not set
484# CONFIG_NET_IPIP is not set
485# CONFIG_NET_IPGRE is not set
486# CONFIG_IP_MROUTE is not set
487# CONFIG_ARPD is not set
488# CONFIG_SYN_COOKIES is not set
489# CONFIG_INET_AH is not set
490# CONFIG_INET_ESP is not set
491# CONFIG_INET_IPCOMP is not set
492
493#
494# IP: Virtual Server Configuration
495#
496# CONFIG_IP_VS is not set
497# CONFIG_IPV6 is not set
498CONFIG_NETFILTER=y
499# CONFIG_NETFILTER_DEBUG is not set
500
501#
502# IP: Netfilter Configuration
503#
504CONFIG_IP_NF_CONNTRACK=y
505# CONFIG_IP_NF_FTP is not set
506# CONFIG_IP_NF_IRC is not set
507# CONFIG_IP_NF_TFTP is not set
508# CONFIG_IP_NF_AMANDA is not set
509CONFIG_IP_NF_QUEUE=y
510CONFIG_IP_NF_IPTABLES=y
511CONFIG_IP_NF_MATCH_LIMIT=y
512CONFIG_IP_NF_MATCH_IPRANGE=y
513CONFIG_IP_NF_MATCH_MAC=y
514CONFIG_IP_NF_MATCH_PKTTYPE=y
515CONFIG_IP_NF_MATCH_MARK=y
516CONFIG_IP_NF_MATCH_MULTIPORT=y
517CONFIG_IP_NF_MATCH_TOS=y
518CONFIG_IP_NF_MATCH_RECENT=y
519CONFIG_IP_NF_MATCH_ECN=y
520CONFIG_IP_NF_MATCH_DSCP=y
521CONFIG_IP_NF_MATCH_AH_ESP=y
522CONFIG_IP_NF_MATCH_LENGTH=y
523CONFIG_IP_NF_MATCH_TTL=y
524CONFIG_IP_NF_MATCH_TCPMSS=y
525CONFIG_IP_NF_MATCH_HELPER=y
526CONFIG_IP_NF_MATCH_STATE=y
527CONFIG_IP_NF_MATCH_CONNTRACK=y
528CONFIG_IP_NF_MATCH_OWNER=y
529CONFIG_IP_NF_FILTER=y
530CONFIG_IP_NF_TARGET_REJECT=y
531CONFIG_IP_NF_NAT=y
532CONFIG_IP_NF_NAT_NEEDED=y
533CONFIG_IP_NF_TARGET_MASQUERADE=y
534CONFIG_IP_NF_TARGET_REDIRECT=y
535CONFIG_IP_NF_TARGET_NETMAP=y
536CONFIG_IP_NF_TARGET_SAME=y
537# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
538CONFIG_IP_NF_MANGLE=y
539CONFIG_IP_NF_TARGET_TOS=y
540CONFIG_IP_NF_TARGET_ECN=y
541CONFIG_IP_NF_TARGET_DSCP=y
542CONFIG_IP_NF_TARGET_MARK=y
543CONFIG_IP_NF_TARGET_CLASSIFY=y
544CONFIG_IP_NF_TARGET_LOG=y
545CONFIG_IP_NF_TARGET_ULOG=y
546CONFIG_IP_NF_TARGET_TCPMSS=y
547CONFIG_IP_NF_ARPTABLES=y
548CONFIG_IP_NF_ARPFILTER=y
549CONFIG_IP_NF_ARP_MANGLE=y
550CONFIG_IP_NF_TARGET_NOTRACK=m
551CONFIG_IP_NF_RAW=m
552
553#
554# SCTP Configuration (EXPERIMENTAL)
555#
556# CONFIG_IP_SCTP is not set
557# CONFIG_ATM is not set
558# CONFIG_BRIDGE is not set
559# CONFIG_VLAN_8021Q is not set
560# CONFIG_DECNET is not set
561# CONFIG_LLC2 is not set
562# CONFIG_IPX is not set
563# CONFIG_ATALK is not set
564# CONFIG_X25 is not set
565# CONFIG_LAPB is not set
566# CONFIG_NET_DIVERT is not set
567# CONFIG_ECONET is not set
568# CONFIG_WAN_ROUTER is not set
569# CONFIG_NET_FASTROUTE is not set
570# CONFIG_NET_HW_FLOWCONTROL is not set
571
572#
573# QoS and/or fair queueing
574#
575# CONFIG_NET_SCHED is not set
576
577#
578# Network testing
579#
580# CONFIG_NET_PKTGEN is not set
581# CONFIG_NETPOLL is not set
582# CONFIG_NET_POLL_CONTROLLER is not set
583# CONFIG_HAMRADIO is not set
584# CONFIG_IRDA is not set
585# CONFIG_BT is not set
586CONFIG_NETDEVICES=y
587CONFIG_DUMMY=m
588# CONFIG_BONDING is not set
589# CONFIG_EQUALIZER is not set
590# CONFIG_TUN is not set
591# CONFIG_NET_SB1000 is not set
592
593#
594# ARCnet devices
595#
596# CONFIG_ARCNET is not set
597
598#
599# Ethernet (10 or 100Mbit)
600#
601CONFIG_NET_ETHERNET=y
602CONFIG_MII=y
603# CONFIG_HAPPYMEAL is not set
604# CONFIG_SUNGEM is not set
605# CONFIG_NET_VENDOR_3COM is not set
606# CONFIG_LANCE is not set
607# CONFIG_NET_VENDOR_SMC is not set
608# CONFIG_NET_VENDOR_RACAL is not set
609
610#
611# Tulip family network device support
612#
613# CONFIG_NET_TULIP is not set
614# CONFIG_AT1700 is not set
615# CONFIG_DEPCA is not set
616# CONFIG_HP100 is not set
617# CONFIG_NET_ISA is not set
618CONFIG_NET_PCI=y
619# CONFIG_PCNET32 is not set
620# CONFIG_AMD8111_ETH is not set
621# CONFIG_ADAPTEC_STARFIRE is not set
622# CONFIG_AC3200 is not set
623# CONFIG_APRICOT is not set
624# CONFIG_B44 is not set
625# CONFIG_FORCEDETH is not set
626# CONFIG_CS89x0 is not set
627# CONFIG_DGRS is not set
628# CONFIG_EEPRO100 is not set
629# CONFIG_E100 is not set
630# CONFIG_FEALNX is not set
631# CONFIG_NATSEMI is not set
632# CONFIG_NE2K_PCI is not set
633# CONFIG_8139CP is not set
634CONFIG_8139TOO=y
635CONFIG_8139TOO_PIO=y
636# CONFIG_8139TOO_TUNE_TWISTER is not set
637# CONFIG_8139TOO_8129 is not set
638# CONFIG_8139_OLD_RX_RESET is not set
639# CONFIG_SIS900 is not set
640# CONFIG_EPIC100 is not set
641# CONFIG_SUNDANCE is not set
642# CONFIG_TLAN is not set
643# CONFIG_VIA_RHINE is not set
644# CONFIG_NET_POCKET is not set
645
646#
647# Ethernet (1000 Mbit)
648#
649# CONFIG_ACENIC is not set
650# CONFIG_DL2K is not set
651# CONFIG_E1000 is not set
652# CONFIG_NS83820 is not set
653# CONFIG_HAMACHI is not set
654# CONFIG_YELLOWFIN is not set
655# CONFIG_R8169 is not set
656# CONFIG_SK98LIN is not set
657# CONFIG_TIGON3 is not set
658
659#
660# Ethernet (10000 Mbit)
661#
662# CONFIG_IXGB is not set
663CONFIG_S2IO=m
664# CONFIG_S2IO_NAPI is not set
665
666#
667# Token Ring devices
668#
669# CONFIG_TR is not set
670
671#
672# Wireless LAN (non-hamradio)
673#
674# CONFIG_NET_RADIO is not set
675
676#
677# Wan interfaces
678#
679# CONFIG_WAN is not set
680# CONFIG_FDDI is not set
681# CONFIG_HIPPI is not set
682# CONFIG_PLIP is not set
683# CONFIG_PPP is not set
684# CONFIG_SLIP is not set
685# CONFIG_NET_FC is not set
686# CONFIG_SHAPER is not set
687# CONFIG_NETCONSOLE is not set
688
689#
690# ISDN subsystem
691#
692# CONFIG_ISDN is not set
693
694#
695# Telephony Support
696#
697# CONFIG_PHONE is not set
698
699#
700# Input device support
701#
702CONFIG_INPUT=y
703
704#
705# Userland interfaces
706#
707CONFIG_INPUT_MOUSEDEV=y
708CONFIG_INPUT_MOUSEDEV_PSAUX=y
709CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
710CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
711# CONFIG_INPUT_JOYDEV is not set
712# CONFIG_INPUT_TSDEV is not set
713# CONFIG_INPUT_EVDEV is not set
714# CONFIG_INPUT_EVBUG is not set
715
716#
717# Input I/O drivers
718#
719# CONFIG_GAMEPORT is not set
720CONFIG_SOUND_GAMEPORT=y
721CONFIG_SERIO=y
722CONFIG_SERIO_I8042=y
723# CONFIG_SERIO_SERPORT is not set
724# CONFIG_SERIO_CT82C710 is not set
725# CONFIG_SERIO_PARKBD is not set
726# CONFIG_SERIO_PCIPS2 is not set
727
728#
729# Input Device Drivers
730#
731CONFIG_INPUT_KEYBOARD=y
732CONFIG_KEYBOARD_ATKBD=y
733# CONFIG_KEYBOARD_SUNKBD is not set
734# CONFIG_KEYBOARD_LKKBD is not set
735# CONFIG_KEYBOARD_XTKBD is not set
736# CONFIG_KEYBOARD_NEWTON is not set
737CONFIG_INPUT_MOUSE=y
738CONFIG_MOUSE_PS2=y
739# CONFIG_MOUSE_SERIAL is not set
740# CONFIG_MOUSE_INPORT is not set
741# CONFIG_MOUSE_LOGIBM is not set
742# CONFIG_MOUSE_PC110PAD is not set
743# CONFIG_MOUSE_VSXXXAA is not set
744# CONFIG_INPUT_JOYSTICK is not set
745# CONFIG_INPUT_TOUCHSCREEN is not set
746# CONFIG_INPUT_MISC is not set
747
748#
749# Character devices
750#
751CONFIG_VT=y
752CONFIG_VT_CONSOLE=y
753CONFIG_HW_CONSOLE=y
754# CONFIG_SERIAL_NONSTANDARD is not set
755
756#
757# Serial drivers
758#
759CONFIG_SERIAL_8250=y
760# CONFIG_SERIAL_8250_CONSOLE is not set
761# CONFIG_SERIAL_8250_ACPI is not set
762CONFIG_SERIAL_8250_NR_UARTS=4
763# CONFIG_SERIAL_8250_EXTENDED is not set
764
765#
766# Non-8250 serial port support
767#
768CONFIG_SERIAL_CORE=y
769CONFIG_UNIX98_PTYS=y
770CONFIG_LEGACY_PTYS=y
771CONFIG_LEGACY_PTY_COUNT=256
772CONFIG_PRINTER=y
773# CONFIG_LP_CONSOLE is not set
774# CONFIG_PPDEV is not set
775# CONFIG_TIPAR is not set
776# CONFIG_QIC02_TAPE is not set
777
778#
779# IPMI
780#
781# CONFIG_IPMI_HANDLER is not set
782
783#
784# Watchdog Cards
785#
786# CONFIG_WATCHDOG is not set
787# CONFIG_HW_RANDOM is not set
788# CONFIG_NVRAM is not set
789# CONFIG_RTC is not set
790# CONFIG_GEN_RTC is not set
791# CONFIG_DTLK is not set
792# CONFIG_R3964 is not set
793# CONFIG_APPLICOM is not set
794# CONFIG_SONYPI is not set
795
796#
797# Ftape, the floppy tape device driver
798#
799CONFIG_AGP=y
800# CONFIG_AGP_ALI is not set
801# CONFIG_AGP_ATI is not set
802# CONFIG_AGP_AMD is not set
803# CONFIG_AGP_AMD64 is not set
804CONFIG_AGP_INTEL=y
805# CONFIG_AGP_NVIDIA is not set
806# CONFIG_AGP_SIS is not set
807# CONFIG_AGP_SWORKS is not set
808# CONFIG_AGP_VIA is not set
809# CONFIG_AGP_EFFICEON is not set
810CONFIG_DRM=y
811# CONFIG_DRM_TDFX is not set
812# CONFIG_DRM_GAMMA is not set
813# CONFIG_DRM_R128 is not set
814# CONFIG_DRM_RADEON is not set
815# CONFIG_DRM_I810 is not set
816CONFIG_DRM_I830=y
817# CONFIG_DRM_MGA is not set
818# CONFIG_DRM_SIS is not set
819# CONFIG_MWAVE is not set
820# CONFIG_RAW_DRIVER is not set
821# CONFIG_HANGCHECK_TIMER is not set
822
823#
824# I2C support
825#
826# CONFIG_I2C is not set
827
828#
829# Misc devices
830#
831# CONFIG_IBM_ASM is not set
832
833#
834# Multimedia devices
835#
836# CONFIG_VIDEO_DEV is not set
837
838#
839# Digital Video Broadcasting Devices
840#
841# CONFIG_DVB is not set
842
843#
844# Graphics support
845#
846# CONFIG_FB is not set
847# CONFIG_VIDEO_SELECT is not set
848
849#
850# Console display driver support
851#
852CONFIG_VGA_CONSOLE=y
853# CONFIG_MDA_CONSOLE is not set
854CONFIG_DUMMY_CONSOLE=y
855
856#
857# Sound
858#
859CONFIG_SOUND=y
860
861#
862# Advanced Linux Sound Architecture
863#
864CONFIG_SND=y
865CONFIG_SND_TIMER=y
866CONFIG_SND_PCM=y
867CONFIG_SND_RAWMIDI=y
868CONFIG_SND_SEQUENCER=y
869# CONFIG_SND_SEQ_DUMMY is not set
870CONFIG_SND_OSSEMUL=y
871CONFIG_SND_MIXER_OSS=y
872CONFIG_SND_PCM_OSS=y
873CONFIG_SND_SEQUENCER_OSS=y
874# CONFIG_SND_VERBOSE_PRINTK is not set
875# CONFIG_SND_DEBUG is not set
876
877#
878# Generic devices
879#
880CONFIG_SND_MPU401_UART=y
881# CONFIG_SND_DUMMY is not set
882# CONFIG_SND_VIRMIDI is not set
883# CONFIG_SND_MTPAV is not set
884# CONFIG_SND_SERIAL_U16550 is not set
885# CONFIG_SND_MPU401 is not set
886
887#
888# ISA devices
889#
890# CONFIG_SND_AD1848 is not set
891# CONFIG_SND_CS4231 is not set
892# CONFIG_SND_CS4232 is not set
893# CONFIG_SND_CS4236 is not set
894# CONFIG_SND_ES1688 is not set
895# CONFIG_SND_ES18XX is not set
896# CONFIG_SND_GUSCLASSIC is not set
897# CONFIG_SND_GUSEXTREME is not set
898# CONFIG_SND_GUSMAX is not set
899# CONFIG_SND_INTERWAVE is not set
900# CONFIG_SND_INTERWAVE_STB is not set
901# CONFIG_SND_OPTI92X_AD1848 is not set
902# CONFIG_SND_OPTI92X_CS4231 is not set
903# CONFIG_SND_OPTI93X is not set
904# CONFIG_SND_SB8 is not set
905# CONFIG_SND_SB16 is not set
906# CONFIG_SND_SBAWE is not set
907# CONFIG_SND_WAVEFRONT is not set
908# CONFIG_SND_CMI8330 is not set
909# CONFIG_SND_OPL3SA2 is not set
910# CONFIG_SND_SGALAXY is not set
911# CONFIG_SND_SSCAPE is not set
912
913#
914# PCI devices
915#
916CONFIG_SND_AC97_CODEC=y
917# CONFIG_SND_ALI5451 is not set
918# CONFIG_SND_ATIIXP is not set
919# CONFIG_SND_AU8810 is not set
920# CONFIG_SND_AU8820 is not set
921# CONFIG_SND_AU8830 is not set
922# CONFIG_SND_AZT3328 is not set
923# CONFIG_SND_BT87X is not set
924# CONFIG_SND_CS46XX is not set
925# CONFIG_SND_CS4281 is not set
926# CONFIG_SND_EMU10K1 is not set
927# CONFIG_SND_KORG1212 is not set
928# CONFIG_SND_MIXART is not set
929# CONFIG_SND_NM256 is not set
930# CONFIG_SND_RME32 is not set
931# CONFIG_SND_RME96 is not set
932# CONFIG_SND_RME9652 is not set
933# CONFIG_SND_HDSP is not set
934# CONFIG_SND_TRIDENT is not set
935# CONFIG_SND_YMFPCI is not set
936# CONFIG_SND_ALS4000 is not set
937# CONFIG_SND_CMIPCI is not set
938# CONFIG_SND_ENS1370 is not set
939# CONFIG_SND_ENS1371 is not set
940# CONFIG_SND_ES1938 is not set
941# CONFIG_SND_ES1968 is not set
942# CONFIG_SND_MAESTRO3 is not set
943# CONFIG_SND_FM801 is not set
944# CONFIG_SND_ICE1712 is not set
945# CONFIG_SND_ICE1724 is not set
946CONFIG_SND_INTEL8X0=y
947# CONFIG_SND_INTEL8X0M is not set
948# CONFIG_SND_SONICVIBES is not set
949# CONFIG_SND_VIA82XX is not set
950# CONFIG_SND_VX222 is not set
951
952#
953# ALSA USB devices
954#
955# CONFIG_SND_USB_AUDIO is not set
956
957#
958# Open Sound System
959#
960# CONFIG_SOUND_PRIME is not set
961
962#
963# USB support
964#
965CONFIG_USB=y
966# CONFIG_USB_DEBUG is not set
967
968#
969# Miscellaneous USB options
970#
971CONFIG_USB_DEVICEFS=y
972# CONFIG_USB_BANDWIDTH is not set
973# CONFIG_USB_DYNAMIC_MINORS is not set
974
975#
976# USB Host Controller Drivers
977#
978CONFIG_USB_EHCI_HCD=y
979# CONFIG_USB_EHCI_SPLIT_ISO is not set
980# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
981# CONFIG_USB_OHCI_HCD is not set
982CONFIG_USB_UHCI_HCD=y
983
984#
985# USB Device Class drivers
986#
987# CONFIG_USB_AUDIO is not set
988# CONFIG_USB_BLUETOOTH_TTY is not set
989# CONFIG_USB_MIDI is not set
990# CONFIG_USB_ACM is not set
991CONFIG_USB_PRINTER=y
992CONFIG_USB_STORAGE=y
993# CONFIG_USB_STORAGE_DEBUG is not set
994# CONFIG_USB_STORAGE_DATAFAB is not set
995# CONFIG_USB_STORAGE_FREECOM is not set
996# CONFIG_USB_STORAGE_ISD200 is not set
997# CONFIG_USB_STORAGE_DPCM is not set
998# CONFIG_USB_STORAGE_HP8200e is not set
999# CONFIG_USB_STORAGE_SDDR09 is not set
1000# CONFIG_USB_STORAGE_SDDR55 is not set
1001# CONFIG_USB_STORAGE_JUMPSHOT is not set
1002
1003#
1004# USB Human Interface Devices (HID)
1005#
1006CONFIG_USB_HID=y
1007CONFIG_USB_HIDINPUT=y
1008# CONFIG_HID_FF is not set
1009# CONFIG_USB_HIDDEV is not set
1010# CONFIG_USB_AIPTEK is not set
1011# CONFIG_USB_WACOM is not set
1012# CONFIG_USB_KBTAB is not set
1013# CONFIG_USB_POWERMATE is not set
1014# CONFIG_USB_MTOUCH is not set
1015CONFIG_USB_EGALAX=m
1016# CONFIG_USB_XPAD is not set
1017# CONFIG_USB_ATI_REMOTE is not set
1018
1019#
1020# USB Imaging devices
1021#
1022# CONFIG_USB_MDC800 is not set
1023# CONFIG_USB_MICROTEK is not set
1024# CONFIG_USB_HPUSBSCSI is not set
1025
1026#
1027# USB Multimedia devices
1028#
1029# CONFIG_USB_DABUSB is not set
1030
1031#
1032# Video4Linux support is needed for USB Multimedia device support
1033#
1034
1035#
1036# USB Network adaptors
1037#
1038# CONFIG_USB_CATC is not set
1039# CONFIG_USB_KAWETH is not set
1040# CONFIG_USB_PEGASUS is not set
1041# CONFIG_USB_RTL8150 is not set
1042# CONFIG_USB_USBNET is not set
1043
1044#
1045# USB port drivers
1046#
1047# CONFIG_USB_USS720 is not set
1048
1049#
1050# USB Serial Converter support
1051#
1052# CONFIG_USB_SERIAL is not set
1053
1054#
1055# USB Miscellaneous drivers
1056#
1057# CONFIG_USB_EMI62 is not set
1058# CONFIG_USB_EMI26 is not set
1059# CONFIG_USB_TIGL is not set
1060# CONFIG_USB_AUERSWALD is not set
1061# CONFIG_USB_RIO500 is not set
1062# CONFIG_USB_LEGOTOWER is not set
1063# CONFIG_USB_LCD is not set
1064# CONFIG_USB_LED is not set
1065CONFIG_USB_CYTHERM=m
1066CONFIG_USB_PHIDGETSERVO=m
1067# CONFIG_USB_TEST is not set
1068
1069#
1070# USB Gadget Support
1071#
1072# CONFIG_USB_GADGET is not set
1073
1074#
1075# File systems
1076#
1077CONFIG_EXT2_FS=y
1078# CONFIG_EXT2_FS_XATTR is not set
1079CONFIG_EXT3_FS=y
1080CONFIG_EXT3_FS_XATTR=y
1081# CONFIG_EXT3_FS_POSIX_ACL is not set
1082# CONFIG_EXT3_FS_SECURITY is not set
1083CONFIG_JBD=y
1084# CONFIG_JBD_DEBUG is not set
1085CONFIG_FS_MBCACHE=y
1086# CONFIG_REISERFS_FS is not set
1087# CONFIG_JFS_FS is not set
1088# CONFIG_XFS_FS is not set
1089# CONFIG_MINIX_FS is not set
1090# CONFIG_ROMFS_FS is not set
1091# CONFIG_QUOTA is not set
1092# CONFIG_AUTOFS_FS is not set
1093CONFIG_AUTOFS4_FS=y
1094
1095#
1096# CD-ROM/DVD Filesystems
1097#
1098CONFIG_ISO9660_FS=y
1099CONFIG_JOLIET=y
1100# CONFIG_ZISOFS is not set
1101CONFIG_UDF_FS=y
1102
1103#
1104# DOS/FAT/NT Filesystems
1105#
1106CONFIG_FAT_FS=y
1107CONFIG_MSDOS_FS=y
1108CONFIG_VFAT_FS=y
1109# CONFIG_NTFS_FS is not set
1110
1111#
1112# Pseudo filesystems
1113#
1114CONFIG_PROC_FS=y
1115CONFIG_PROC_KCORE=y
1116CONFIG_SYSFS=y
1117# CONFIG_DEVFS_FS is not set
1118# CONFIG_DEVPTS_FS_XATTR is not set
1119CONFIG_TMPFS=y
1120# CONFIG_HUGETLBFS is not set
1121# CONFIG_HUGETLB_PAGE is not set
1122CONFIG_RAMFS=y
1123
1124#
1125# Miscellaneous filesystems
1126#
1127# CONFIG_ADFS_FS is not set
1128# CONFIG_AFFS_FS is not set
1129# CONFIG_HFS_FS is not set
1130# CONFIG_HFSPLUS_FS is not set
1131# CONFIG_BEFS_FS is not set
1132# CONFIG_BFS_FS is not set
1133# CONFIG_EFS_FS is not set
1134# CONFIG_CRAMFS is not set
1135# CONFIG_VXFS_FS is not set
1136# CONFIG_HPFS_FS is not set
1137# CONFIG_QNX4FS_FS is not set
1138# CONFIG_SYSV_FS is not set
1139# CONFIG_UFS_FS is not set
1140
1141#
1142# Network File Systems
1143#
1144CONFIG_NFS_FS=y
1145# CONFIG_NFS_V3 is not set
1146# CONFIG_NFS_V4 is not set
1147# CONFIG_NFS_DIRECTIO is not set
1148CONFIG_NFSD=y
1149# CONFIG_NFSD_V3 is not set
1150CONFIG_NFSD_TCP=y
1151CONFIG_LOCKD=y
1152CONFIG_EXPORTFS=y
1153CONFIG_SUNRPC=y
1154# CONFIG_RPCSEC_GSS_KRB5 is not set
1155# CONFIG_SMB_FS is not set
1156# CONFIG_CIFS is not set
1157# CONFIG_NCP_FS is not set
1158# CONFIG_CODA_FS is not set
1159# CONFIG_AFS_FS is not set
1160
1161#
1162# Partition Types
1163#
1164# CONFIG_PARTITION_ADVANCED is not set
1165CONFIG_MSDOS_PARTITION=y
1166
1167#
1168# Native Language Support
1169#
1170CONFIG_NLS=y
1171CONFIG_NLS_DEFAULT="iso8859-1"
1172CONFIG_NLS_CODEPAGE_437=y
1173# CONFIG_NLS_CODEPAGE_737 is not set
1174# CONFIG_NLS_CODEPAGE_775 is not set
1175# CONFIG_NLS_CODEPAGE_850 is not set
1176# CONFIG_NLS_CODEPAGE_852 is not set
1177# CONFIG_NLS_CODEPAGE_855 is not set
1178# CONFIG_NLS_CODEPAGE_857 is not set
1179# CONFIG_NLS_CODEPAGE_860 is not set
1180# CONFIG_NLS_CODEPAGE_861 is not set
1181# CONFIG_NLS_CODEPAGE_862 is not set
1182# CONFIG_NLS_CODEPAGE_863 is not set
1183# CONFIG_NLS_CODEPAGE_864 is not set
1184# CONFIG_NLS_CODEPAGE_865 is not set
1185# CONFIG_NLS_CODEPAGE_866 is not set
1186# CONFIG_NLS_CODEPAGE_869 is not set
1187# CONFIG_NLS_CODEPAGE_936 is not set
1188# CONFIG_NLS_CODEPAGE_950 is not set
1189# CONFIG_NLS_CODEPAGE_932 is not set
1190# CONFIG_NLS_CODEPAGE_949 is not set
1191# CONFIG_NLS_CODEPAGE_874 is not set
1192# CONFIG_NLS_ISO8859_8 is not set
1193# CONFIG_NLS_CODEPAGE_1250 is not set
1194# CONFIG_NLS_CODEPAGE_1251 is not set
1195CONFIG_NLS_ISO8859_1=y
1196# CONFIG_NLS_ISO8859_2 is not set
1197# CONFIG_NLS_ISO8859_3 is not set
1198# CONFIG_NLS_ISO8859_4 is not set
1199# CONFIG_NLS_ISO8859_5 is not set
1200# CONFIG_NLS_ISO8859_6 is not set
1201# CONFIG_NLS_ISO8859_7 is not set
1202# CONFIG_NLS_ISO8859_9 is not set
1203# CONFIG_NLS_ISO8859_13 is not set
1204# CONFIG_NLS_ISO8859_14 is not set
1205# CONFIG_NLS_ISO8859_15 is not set
1206# CONFIG_NLS_KOI8_R is not set
1207# CONFIG_NLS_KOI8_U is not set
1208# CONFIG_NLS_UTF8 is not set
1209
1210#
1211# Profiling support
1212#
1213CONFIG_PROFILING=y
1214CONFIG_OPROFILE=y
1215
1216#
1217# Kernel hacking
1218#
1219# CONFIG_DEBUG_KERNEL is not set
1220CONFIG_EARLY_PRINTK=y
1221CONFIG_DEBUG_SPINLOCK_SLEEP=y
1222# CONFIG_FRAME_POINTER is not set
1223CONFIG_4KSTACKS=y
1224CONFIG_X86_FIND_SMP_CONFIG=y
1225CONFIG_X86_MPPARSE=y
1226
1227#
1228# Security options
1229#
1230# CONFIG_SECURITY is not set
1231
1232#
1233# Cryptographic options
1234#
1235# CONFIG_CRYPTO is not set
1236
1237#
1238# Library routines
1239#
1240CONFIG_CRC32=y
1241CONFIG_LIBCRC32C=m
1242CONFIG_X86_SMP=y
1243CONFIG_X86_HT=y
1244CONFIG_X86_BIOS_REBOOT=y
1245CONFIG_X86_TRAMPOLINE=y
1246CONFIG_X86_STD_RESOURCES=y
1247CONFIG_PC=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
new file mode 100644
index 000000000000..933787a46b4c
--- /dev/null
+++ b/arch/i386/kernel/Makefile
@@ -0,0 +1,71 @@
1#
2# Makefile for the linux kernel.
3#
4
5extra-y := head.o init_task.o vmlinux.lds
6
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
10 doublefault.o quirks.o
11
12obj-y += cpu/
13obj-y += timers/
14obj-$(CONFIG_ACPI_BOOT) += acpi/
15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
16obj-$(CONFIG_MCA) += mca.o
17obj-$(CONFIG_X86_MSR) += msr.o
18obj-$(CONFIG_X86_CPUID) += cpuid.o
19obj-$(CONFIG_MICROCODE) += microcode.o
20obj-$(CONFIG_APM) += apm.o
21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
23obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
25obj-$(CONFIG_X86_IO_APIC) += io_apic.o
26obj-$(CONFIG_X86_NUMAQ) += numaq.o
27obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_MODULES) += module.o
30obj-y += sysenter.o vsyscall.o
31obj-$(CONFIG_ACPI_SRAT) += srat.o
32obj-$(CONFIG_HPET_TIMER) += time_hpet.o
33obj-$(CONFIG_EFI) += efi.o efi_stub.o
34obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
35
36EXTRA_AFLAGS := -traditional
37
38obj-$(CONFIG_SCx200) += scx200.o
39
40# vsyscall.o contains the vsyscall DSO images as __initdata.
41# We must build both images before we can assemble it.
42# Note: kbuild does not track this dependency due to usage of .incbin
43$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
44targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
45targets += vsyscall.lds
46
47# The DSO images are built using a special linker script.
48quiet_cmd_syscall = SYSCALL $@
49 cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
50 -Wl,-T,$(filter-out FORCE,$^) -o $@
51
52export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
53
54vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1
55SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags)
56SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags)
57
58$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
59$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
60 $(call if_changed,syscall)
61
62# We also create a special relocatable object that should mirror the symbol
63# table and layout of the linked DSO. With ld -R we can then refer to
64# these symbols in the kernel code rather than hand-coded addresses.
65extra-y += vsyscall-syms.o
66$(obj)/built-in.o: $(obj)/vsyscall-syms.o
67$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
68
69SYSCFLAGS_vsyscall-syms.o = -r
70$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
71 $(call if_changed,syscall)
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
new file mode 100644
index 000000000000..ee75cb286cfe
--- /dev/null
+++ b/arch/i386/kernel/acpi/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_ACPI_BOOT) := boot.o
2obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
3obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
4
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
new file mode 100644
index 000000000000..9ba0b957d11f
--- /dev/null
+++ b/arch/i386/kernel/acpi/boot.c
@@ -0,0 +1,908 @@
1/*
2 * boot.c - Architecture-Specific Low-Level ACPI Boot Support
3 *
4 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
5 * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
6 *
7 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/init.h>
27#include <linux/config.h>
28#include <linux/acpi.h>
29#include <linux/efi.h>
30#include <linux/irq.h>
31#include <linux/module.h>
32
33#include <asm/pgtable.h>
34#include <asm/io_apic.h>
35#include <asm/apic.h>
36#include <asm/io.h>
37#include <asm/irq.h>
38#include <asm/mpspec.h>
39
40#ifdef CONFIG_X86_64
41
42static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
43extern void __init clustered_apic_check(void);
44static inline int ioapic_setup_disabled(void) { return 0; }
45#include <asm/proto.h>
46
47#else /* X86 */
48
49#ifdef CONFIG_X86_LOCAL_APIC
50#include <mach_apic.h>
51#include <mach_mpparse.h>
52#endif /* CONFIG_X86_LOCAL_APIC */
53
54#endif /* X86 */
55
56#define BAD_MADT_ENTRY(entry, end) ( \
57 (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
58 ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
59
60#define PREFIX "ACPI: "
61
62#ifdef CONFIG_ACPI_PCI
63int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
64int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
65#else
66int acpi_noirq __initdata = 1;
67int acpi_pci_disabled __initdata = 1;
68#endif
69int acpi_ht __initdata = 1; /* enable HT */
70
71int acpi_lapic;
72int acpi_ioapic;
73int acpi_strict;
74EXPORT_SYMBOL(acpi_strict);
75
76acpi_interrupt_flags acpi_sci_flags __initdata;
77int acpi_sci_override_gsi __initdata;
78int acpi_skip_timer_override __initdata;
79
80#ifdef CONFIG_X86_LOCAL_APIC
81static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
82#endif
83
84#ifndef __HAVE_ARCH_CMPXCHG
85#warning ACPI uses CMPXCHG, i486 and later hardware
86#endif
87
88#define MAX_MADT_ENTRIES 256
89u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
90 { [0 ... MAX_MADT_ENTRIES-1] = 0xff };
91EXPORT_SYMBOL(x86_acpiid_to_apicid);
92
93/* --------------------------------------------------------------------------
94 Boot-time Configuration
95 -------------------------------------------------------------------------- */
96
97/*
98 * The default interrupt routing model is PIC (8259). This gets
99 * overriden if IOAPICs are enumerated (below).
100 */
101enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
102
103#ifdef CONFIG_X86_64
104
105/* rely on all ACPI tables being in the direct mapping */
106char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
107{
108 if (!phys_addr || !size)
109 return NULL;
110
111 if (phys_addr < (end_pfn_map << PAGE_SHIFT))
112 return __va(phys_addr);
113
114 return NULL;
115}
116
117#else
118
119/*
120 * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
121 * to map the target physical address. The problem is that set_fixmap()
122 * provides a single page, and it is possible that the page is not
123 * sufficient.
124 * By using this area, we can map up to MAX_IO_APICS pages temporarily,
125 * i.e. until the next __va_range() call.
126 *
127 * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
128 * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
129 * count idx down while incrementing the phys address.
130 */
131char *__acpi_map_table(unsigned long phys, unsigned long size)
132{
133 unsigned long base, offset, mapped_size;
134 int idx;
135
136 if (phys + size < 8*1024*1024)
137 return __va(phys);
138
139 offset = phys & (PAGE_SIZE - 1);
140 mapped_size = PAGE_SIZE - offset;
141 set_fixmap(FIX_ACPI_END, phys);
142 base = fix_to_virt(FIX_ACPI_END);
143
144 /*
145 * Most cases can be covered by the below.
146 */
147 idx = FIX_ACPI_END;
148 while (mapped_size < size) {
149 if (--idx < FIX_ACPI_BEGIN)
150 return NULL; /* cannot handle this */
151 phys += PAGE_SIZE;
152 set_fixmap(idx, phys);
153 mapped_size += PAGE_SIZE;
154 }
155
156 return ((unsigned char *) base + offset);
157}
158#endif
159
160#ifdef CONFIG_PCI_MMCONFIG
161static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
162{
163 struct acpi_table_mcfg *mcfg;
164
165 if (!phys_addr || !size)
166 return -EINVAL;
167
168 mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
169 if (!mcfg) {
170 printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
171 return -ENODEV;
172 }
173
174 if (mcfg->base_reserved) {
175 printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
176 return -ENODEV;
177 }
178
179 pci_mmcfg_base_addr = mcfg->base_address;
180
181 return 0;
182}
183#else
184#define acpi_parse_mcfg NULL
185#endif /* !CONFIG_PCI_MMCONFIG */
186
187#ifdef CONFIG_X86_LOCAL_APIC
188static int __init
189acpi_parse_madt (
190 unsigned long phys_addr,
191 unsigned long size)
192{
193 struct acpi_table_madt *madt = NULL;
194
195 if (!phys_addr || !size)
196 return -EINVAL;
197
198 madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
199 if (!madt) {
200 printk(KERN_WARNING PREFIX "Unable to map MADT\n");
201 return -ENODEV;
202 }
203
204 if (madt->lapic_address) {
205 acpi_lapic_addr = (u64) madt->lapic_address;
206
207 printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
208 madt->lapic_address);
209 }
210
211 acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
212
213 return 0;
214}
215
216
217static int __init
218acpi_parse_lapic (
219 acpi_table_entry_header *header, const unsigned long end)
220{
221 struct acpi_table_lapic *processor = NULL;
222
223 processor = (struct acpi_table_lapic*) header;
224
225 if (BAD_MADT_ENTRY(processor, end))
226 return -EINVAL;
227
228 acpi_table_print_madt_entry(header);
229
230 /* no utility in registering a disabled processor */
231 if (processor->flags.enabled == 0)
232 return 0;
233
234 x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
235
236 mp_register_lapic (
237 processor->id, /* APIC ID */
238 processor->flags.enabled); /* Enabled? */
239
240 return 0;
241}
242
243static int __init
244acpi_parse_lapic_addr_ovr (
245 acpi_table_entry_header *header, const unsigned long end)
246{
247 struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
248
249 lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
250
251 if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
252 return -EINVAL;
253
254 acpi_lapic_addr = lapic_addr_ovr->address;
255
256 return 0;
257}
258
259static int __init
260acpi_parse_lapic_nmi (
261 acpi_table_entry_header *header, const unsigned long end)
262{
263 struct acpi_table_lapic_nmi *lapic_nmi = NULL;
264
265 lapic_nmi = (struct acpi_table_lapic_nmi*) header;
266
267 if (BAD_MADT_ENTRY(lapic_nmi, end))
268 return -EINVAL;
269
270 acpi_table_print_madt_entry(header);
271
272 if (lapic_nmi->lint != 1)
273 printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
274
275 return 0;
276}
277
278
279#endif /*CONFIG_X86_LOCAL_APIC*/
280
281#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
282
283static int __init
284acpi_parse_ioapic (
285 acpi_table_entry_header *header, const unsigned long end)
286{
287 struct acpi_table_ioapic *ioapic = NULL;
288
289 ioapic = (struct acpi_table_ioapic*) header;
290
291 if (BAD_MADT_ENTRY(ioapic, end))
292 return -EINVAL;
293
294 acpi_table_print_madt_entry(header);
295
296 mp_register_ioapic (
297 ioapic->id,
298 ioapic->address,
299 ioapic->global_irq_base);
300
301 return 0;
302}
303
304/*
305 * Parse Interrupt Source Override for the ACPI SCI
306 */
307static void
308acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
309{
310 if (trigger == 0) /* compatible SCI trigger is level */
311 trigger = 3;
312
313 if (polarity == 0) /* compatible SCI polarity is low */
314 polarity = 3;
315
316 /* Command-line over-ride via acpi_sci= */
317 if (acpi_sci_flags.trigger)
318 trigger = acpi_sci_flags.trigger;
319
320 if (acpi_sci_flags.polarity)
321 polarity = acpi_sci_flags.polarity;
322
323 /*
324 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
325 * If GSI is < 16, this will update its flags,
326 * else it will create a new mp_irqs[] entry.
327 */
328 mp_override_legacy_irq(gsi, polarity, trigger, gsi);
329
330 /*
331 * stash over-ride to indicate we've been here
332 * and for later update of acpi_fadt
333 */
334 acpi_sci_override_gsi = gsi;
335 return;
336}
337
338static int __init
339acpi_parse_int_src_ovr (
340 acpi_table_entry_header *header, const unsigned long end)
341{
342 struct acpi_table_int_src_ovr *intsrc = NULL;
343
344 intsrc = (struct acpi_table_int_src_ovr*) header;
345
346 if (BAD_MADT_ENTRY(intsrc, end))
347 return -EINVAL;
348
349 acpi_table_print_madt_entry(header);
350
351 if (intsrc->bus_irq == acpi_fadt.sci_int) {
352 acpi_sci_ioapic_setup(intsrc->global_irq,
353 intsrc->flags.polarity, intsrc->flags.trigger);
354 return 0;
355 }
356
357 if (acpi_skip_timer_override &&
358 intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
359 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
360 return 0;
361 }
362
363 mp_override_legacy_irq (
364 intsrc->bus_irq,
365 intsrc->flags.polarity,
366 intsrc->flags.trigger,
367 intsrc->global_irq);
368
369 return 0;
370}
371
372
373static int __init
374acpi_parse_nmi_src (
375 acpi_table_entry_header *header, const unsigned long end)
376{
377 struct acpi_table_nmi_src *nmi_src = NULL;
378
379 nmi_src = (struct acpi_table_nmi_src*) header;
380
381 if (BAD_MADT_ENTRY(nmi_src, end))
382 return -EINVAL;
383
384 acpi_table_print_madt_entry(header);
385
386 /* TBD: Support nimsrc entries? */
387
388 return 0;
389}
390
391#endif /* CONFIG_X86_IO_APIC */
392
393#ifdef CONFIG_ACPI_BUS
394
395/*
396 * acpi_pic_sci_set_trigger()
397 *
398 * use ELCR to set PIC-mode trigger type for SCI
399 *
400 * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
401 * it may require Edge Trigger -- use "acpi_sci=edge"
402 *
403 * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
404 * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
405 * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
406 * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
407 */
408
409void __init
410acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
411{
412 unsigned int mask = 1 << irq;
413 unsigned int old, new;
414
415 /* Real old ELCR mask */
416 old = inb(0x4d0) | (inb(0x4d1) << 8);
417
418 /*
419 * If we use ACPI to set PCI irq's, then we should clear ELCR
420 * since we will set it correctly as we enable the PCI irq
421 * routing.
422 */
423 new = acpi_noirq ? old : 0;
424
425 /*
426 * Update SCI information in the ELCR, it isn't in the PCI
427 * routing tables..
428 */
429 switch (trigger) {
430 case 1: /* Edge - clear */
431 new &= ~mask;
432 break;
433 case 3: /* Level - set */
434 new |= mask;
435 break;
436 }
437
438 if (old == new)
439 return;
440
441 printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
442 outb(new, 0x4d0);
443 outb(new >> 8, 0x4d1);
444}
445
446
447#endif /* CONFIG_ACPI_BUS */
448
449int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
450{
451#ifdef CONFIG_X86_IO_APIC
452 if (use_pci_vector() && !platform_legacy_irq(gsi))
453 *irq = IO_APIC_VECTOR(gsi);
454 else
455#endif
456 *irq = gsi;
457 return 0;
458}
459
460unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
461{
462 unsigned int irq;
463 unsigned int plat_gsi = gsi;
464
465#ifdef CONFIG_PCI
466 /*
467 * Make sure all (legacy) PCI IRQs are set as level-triggered.
468 */
469 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
470 extern void eisa_set_level_irq(unsigned int irq);
471
472 if (edge_level == ACPI_LEVEL_SENSITIVE)
473 eisa_set_level_irq(gsi);
474 }
475#endif
476
477#ifdef CONFIG_X86_IO_APIC
478 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
479 plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
480 }
481#endif
482 acpi_gsi_to_irq(plat_gsi, &irq);
483 return irq;
484}
485EXPORT_SYMBOL(acpi_register_gsi);
486
487/*
488 * ACPI based hotplug support for CPU
489 */
490#ifdef CONFIG_ACPI_HOTPLUG_CPU
491int
492acpi_map_lsapic(acpi_handle handle, int *pcpu)
493{
494 /* TBD */
495 return -EINVAL;
496}
497EXPORT_SYMBOL(acpi_map_lsapic);
498
499
500int
501acpi_unmap_lsapic(int cpu)
502{
503 /* TBD */
504 return -EINVAL;
505}
506EXPORT_SYMBOL(acpi_unmap_lsapic);
507#endif /* CONFIG_ACPI_HOTPLUG_CPU */
508
509static unsigned long __init
510acpi_scan_rsdp (
511 unsigned long start,
512 unsigned long length)
513{
514 unsigned long offset = 0;
515 unsigned long sig_len = sizeof("RSD PTR ") - 1;
516
517 /*
518 * Scan all 16-byte boundaries of the physical memory region for the
519 * RSDP signature.
520 */
521 for (offset = 0; offset < length; offset += 16) {
522 if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
523 continue;
524 return (start + offset);
525 }
526
527 return 0;
528}
529
530static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
531{
532 struct acpi_table_sbf *sb;
533
534 if (!phys_addr || !size)
535 return -EINVAL;
536
537 sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
538 if (!sb) {
539 printk(KERN_WARNING PREFIX "Unable to map SBF\n");
540 return -ENODEV;
541 }
542
543 sbf_port = sb->sbf_cmos; /* Save CMOS port */
544
545 return 0;
546}
547
548
549#ifdef CONFIG_HPET_TIMER
550
551static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
552{
553 struct acpi_table_hpet *hpet_tbl;
554
555 if (!phys || !size)
556 return -EINVAL;
557
558 hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
559 if (!hpet_tbl) {
560 printk(KERN_WARNING PREFIX "Unable to map HPET\n");
561 return -ENODEV;
562 }
563
564 if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
565 printk(KERN_WARNING PREFIX "HPET timers must be located in "
566 "memory.\n");
567 return -1;
568 }
569
570#ifdef CONFIG_X86_64
571 vxtime.hpet_address = hpet_tbl->addr.addrl |
572 ((long) hpet_tbl->addr.addrh << 32);
573
574 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
575 hpet_tbl->id, vxtime.hpet_address);
576#else /* X86 */
577 {
578 extern unsigned long hpet_address;
579
580 hpet_address = hpet_tbl->addr.addrl;
581 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
582 hpet_tbl->id, hpet_address);
583 }
584#endif /* X86 */
585
586 return 0;
587}
588#else
589#define acpi_parse_hpet NULL
590#endif
591
592#ifdef CONFIG_X86_PM_TIMER
593extern u32 pmtmr_ioport;
594#endif
595
596static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
597{
598 struct fadt_descriptor_rev2 *fadt = NULL;
599
600 fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
601 if(!fadt) {
602 printk(KERN_WARNING PREFIX "Unable to map FADT\n");
603 return 0;
604 }
605
606#ifdef CONFIG_ACPI_INTERPRETER
607 /* initialize sci_int early for INT_SRC_OVR MADT parsing */
608 acpi_fadt.sci_int = fadt->sci_int;
609#endif
610
611#ifdef CONFIG_X86_PM_TIMER
612 /* detect the location of the ACPI PM Timer */
613 if (fadt->revision >= FADT2_REVISION_ID) {
614 /* FADT rev. 2 */
615 if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
616 return 0;
617
618 pmtmr_ioport = fadt->xpm_tmr_blk.address;
619 } else {
620 /* FADT rev. 1 */
621 pmtmr_ioport = fadt->V1_pm_tmr_blk;
622 }
623 if (pmtmr_ioport)
624 printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
625#endif
626 return 0;
627}
628
629
630unsigned long __init
631acpi_find_rsdp (void)
632{
633 unsigned long rsdp_phys = 0;
634
635 if (efi_enabled) {
636 if (efi.acpi20)
637 return __pa(efi.acpi20);
638 else if (efi.acpi)
639 return __pa(efi.acpi);
640 }
641 /*
642 * Scan memory looking for the RSDP signature. First search EBDA (low
643 * memory) paragraphs and then search upper memory (E0000-FFFFF).
644 */
645 rsdp_phys = acpi_scan_rsdp (0, 0x400);
646 if (!rsdp_phys)
647 rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
648
649 return rsdp_phys;
650}
651
652#ifdef CONFIG_X86_LOCAL_APIC
653/*
654 * Parse LAPIC entries in MADT
655 * returns 0 on success, < 0 on error
656 */
657static int __init
658acpi_parse_madt_lapic_entries(void)
659{
660 int count;
661
662 /*
663 * Note that the LAPIC address is obtained from the MADT (32-bit value)
664 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
665 */
666
667 count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
668 if (count < 0) {
669 printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
670 return count;
671 }
672
673 mp_register_lapic_address(acpi_lapic_addr);
674
675 count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
676 MAX_APICS);
677 if (!count) {
678 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
679 /* TBD: Cleanup to allow fallback to MPS */
680 return -ENODEV;
681 }
682 else if (count < 0) {
683 printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
684 /* TBD: Cleanup to allow fallback to MPS */
685 return count;
686 }
687
688 count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
689 if (count < 0) {
690 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
691 /* TBD: Cleanup to allow fallback to MPS */
692 return count;
693 }
694 return 0;
695}
696#endif /* CONFIG_X86_LOCAL_APIC */
697
698#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
699/*
700 * Parse IOAPIC related entries in MADT
701 * returns 0 on success, < 0 on error
702 */
703static int __init
704acpi_parse_madt_ioapic_entries(void)
705{
706 int count;
707
708 /*
709 * ACPI interpreter is required to complete interrupt setup,
710 * so if it is off, don't enumerate the io-apics with ACPI.
711 * If MPS is present, it will handle them,
712 * otherwise the system will stay in PIC mode
713 */
714 if (acpi_disabled || acpi_noirq) {
715 return -ENODEV;
716 }
717
718 /*
719 * if "noapic" boot option, don't look for IO-APICs
720 */
721 if (skip_ioapic_setup) {
722 printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
723 "due to 'noapic' option.\n");
724 return -ENODEV;
725 }
726
727 count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
728 if (!count) {
729 printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
730 return -ENODEV;
731 }
732 else if (count < 0) {
733 printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
734 return count;
735 }
736
737 count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
738 if (count < 0) {
739 printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
740 /* TBD: Cleanup to allow fallback to MPS */
741 return count;
742 }
743
744 /*
745 * If BIOS did not supply an INT_SRC_OVR for the SCI
746 * pretend we got one so we can set the SCI flags.
747 */
748 if (!acpi_sci_override_gsi)
749 acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
750
751 /* Fill in identity legacy mapings where no override */
752 mp_config_acpi_legacy_irqs();
753
754 count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
755 if (count < 0) {
756 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
757 /* TBD: Cleanup to allow fallback to MPS */
758 return count;
759 }
760
761 return 0;
762}
763#else
764static inline int acpi_parse_madt_ioapic_entries(void)
765{
766 return -1;
767}
768#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
769
770
771static void __init
772acpi_process_madt(void)
773{
774#ifdef CONFIG_X86_LOCAL_APIC
775 int count, error;
776
777 count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
778 if (count >= 1) {
779
780 /*
781 * Parse MADT LAPIC entries
782 */
783 error = acpi_parse_madt_lapic_entries();
784 if (!error) {
785 acpi_lapic = 1;
786
787 /*
788 * Parse MADT IO-APIC entries
789 */
790 error = acpi_parse_madt_ioapic_entries();
791 if (!error) {
792 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
793 acpi_irq_balance_set(NULL);
794 acpi_ioapic = 1;
795
796 smp_found_config = 1;
797 clustered_apic_check();
798 }
799 }
800 if (error == -EINVAL) {
801 /*
802 * Dell Precision Workstation 410, 610 come here.
803 */
804 printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
805 disable_acpi();
806 }
807 }
808#endif
809 return;
810}
811
812/*
813 * acpi_boot_table_init() and acpi_boot_init()
814 * called from setup_arch(), always.
815 * 1. checksums all tables
816 * 2. enumerates lapics
817 * 3. enumerates io-apics
818 *
819 * acpi_table_init() is separate to allow reading SRAT without
820 * other side effects.
821 *
822 * side effects of acpi_boot_init:
823 * acpi_lapic = 1 if LAPIC found
824 * acpi_ioapic = 1 if IOAPIC found
825 * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
826 * if acpi_blacklisted() acpi_disabled = 1;
827 * acpi_irq_model=...
828 * ...
829 *
830 * return value: (currently ignored)
831 * 0: success
832 * !0: failure
833 */
834
835int __init
836acpi_boot_table_init(void)
837{
838 int error;
839
840 /*
841 * If acpi_disabled, bail out
842 * One exception: acpi=ht continues far enough to enumerate LAPICs
843 */
844 if (acpi_disabled && !acpi_ht)
845 return 1;
846
847 /*
848 * Initialize the ACPI boot-time table parser.
849 */
850 error = acpi_table_init();
851 if (error) {
852 disable_acpi();
853 return error;
854 }
855
856#ifdef __i386__
857 check_acpi_pci();
858#endif
859
860 acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
861
862 /*
863 * blacklist may disable ACPI entirely
864 */
865 error = acpi_blacklisted();
866 if (error) {
867 extern int acpi_force;
868
869 if (acpi_force) {
870 printk(KERN_WARNING PREFIX "acpi=force override\n");
871 } else {
872 printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
873 disable_acpi();
874 return error;
875 }
876 }
877
878 return 0;
879}
880
881
882int __init acpi_boot_init(void)
883{
884 /*
885 * If acpi_disabled, bail out
886 * One exception: acpi=ht continues far enough to enumerate LAPICs
887 */
888 if (acpi_disabled && !acpi_ht)
889 return 1;
890
891 acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
892
893 /*
894 * set sci_int and PM timer address
895 */
896 acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
897
898 /*
899 * Process the Multiple APIC Description Table (MADT), if present
900 */
901 acpi_process_madt();
902
903 acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
904 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
905
906 return 0;
907}
908
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
new file mode 100644
index 000000000000..726a5ca4b165
--- /dev/null
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -0,0 +1,51 @@
1/*
2 * Do early PCI probing for bug detection when the main PCI subsystem is
3 * not up yet.
4 */
5#include <linux/init.h>
6#include <linux/kernel.h>
7#include <linux/pci.h>
8#include <asm/pci-direct.h>
9#include <asm/acpi.h>
10
11static int __init check_bridge(int vendor, int device)
12{
13 /* According to Nvidia all timer overrides are bogus. Just ignore
14 them all. */
15 if (vendor == PCI_VENDOR_ID_NVIDIA) {
16 acpi_skip_timer_override = 1;
17 }
18 return 0;
19}
20
21void __init check_acpi_pci(void)
22{
23 int num,slot,func;
24
25 /* Assume the machine supports type 1. If not it will
26 always read ffffffff and should not have any side effect. */
27
28 /* Poor man's PCI discovery */
29 for (num = 0; num < 32; num++) {
30 for (slot = 0; slot < 32; slot++) {
31 for (func = 0; func < 8; func++) {
32 u32 class;
33 u32 vendor;
34 class = read_pci_config(num,slot,func,
35 PCI_CLASS_REVISION);
36 if (class == 0xffffffff)
37 break;
38
39 if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
40 continue;
41
42 vendor = read_pci_config(num, slot, func,
43 PCI_VENDOR_ID);
44
45 if (check_bridge(vendor&0xffff, vendor >> 16))
46 return;
47 }
48
49 }
50 }
51}
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
new file mode 100644
index 000000000000..28bb0514bb6e
--- /dev/null
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -0,0 +1,93 @@
1/*
2 * sleep.c - x86-specific ACPI sleep support.
3 *
4 * Copyright (C) 2001-2003 Patrick Mochel
5 * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
6 */
7
8#include <linux/acpi.h>
9#include <linux/bootmem.h>
10#include <asm/smp.h>
11#include <asm/tlbflush.h>
12
13/* address in low memory of the wakeup routine. */
14unsigned long acpi_wakeup_address = 0;
15unsigned long acpi_video_flags;
16extern char wakeup_start, wakeup_end;
17
18extern void zap_low_mappings(void);
19
20extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
21
22static void init_low_mapping(pgd_t *pgd, int pgd_limit)
23{
24 int pgd_ofs = 0;
25
26 while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
27 set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD));
28 pgd_ofs++, pgd++;
29 }
30 flush_tlb_all();
31}
32
33/**
34 * acpi_save_state_mem - save kernel state
35 *
36 * Create an identity mapped page table and copy the wakeup routine to
37 * low memory.
38 */
39int acpi_save_state_mem (void)
40{
41 if (!acpi_wakeup_address)
42 return 1;
43 init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
44 memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start);
45 acpi_copy_wakeup_routine(acpi_wakeup_address);
46
47 return 0;
48}
49
50/*
51 * acpi_restore_state - undo effects of acpi_save_state_mem
52 */
53void acpi_restore_state_mem (void)
54{
55 zap_low_mappings();
56}
57
58/**
59 * acpi_reserve_bootmem - do _very_ early ACPI initialisation
60 *
61 * We allocate a page from the first 1MB of memory for the wakeup
62 * routine for when we come back from a sleep state. The
63 * runtime allocator allows specification of <16MB pages, but not
64 * <1MB pages.
65 */
66void __init acpi_reserve_bootmem(void)
67{
68 if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
69 printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n");
70 return;
71 }
72
73 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
74 if (!acpi_wakeup_address)
75 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
76}
77
78static int __init acpi_sleep_setup(char *str)
79{
80 while ((str != NULL) && (*str != '\0')) {
81 if (strncmp(str, "s3_bios", 7) == 0)
82 acpi_video_flags = 1;
83 if (strncmp(str, "s3_mode", 7) == 0)
84 acpi_video_flags |= 2;
85 str = strchr(str, ',');
86 if (str != NULL)
87 str += strspn(str, ", \t");
88 }
89 return 1;
90}
91
92
93__setup("acpi_sleep=", acpi_sleep_setup);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
new file mode 100644
index 000000000000..39d32484f6f5
--- /dev/null
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -0,0 +1,318 @@
1.text
2#include <linux/linkage.h>
3#include <asm/segment.h>
4#include <asm/page.h>
5
6#
7# wakeup_code runs in real mode, and at unknown address (determined at run-time).
8# Therefore it must only use relative jumps/calls.
9#
10# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
11#
12# If physical address of wakeup_code is 0x12345, BIOS should call us with
13# cs = 0x1234, eip = 0x05
14#
15
16ALIGN
17 .align 4096
18ENTRY(wakeup_start)
19wakeup_code:
20 wakeup_code_start = .
21 .code16
22
23 movw $0xb800, %ax
24 movw %ax,%fs
25 movw $0x0e00 + 'L', %fs:(0x10)
26
27 cli
28 cld
29
30 # setup data segment
31 movw %cs, %ax
32 movw %ax, %ds # Make ds:0 point to wakeup_start
33 movw %ax, %ss
34 mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
35 movw $0x0e00 + 'S', %fs:(0x12)
36
37 pushl $0 # Kill any dangerous flags
38 popfl
39
40 movl real_magic - wakeup_code, %eax
41 cmpl $0x12345678, %eax
42 jne bogus_real_magic
43
44 testl $1, video_flags - wakeup_code
45 jz 1f
46 lcall $0xc000,$3
47 movw %cs, %ax
48 movw %ax, %ds # Bios might have played with that
49 movw %ax, %ss
501:
51
52 testl $2, video_flags - wakeup_code
53 jz 1f
54 mov video_mode - wakeup_code, %ax
55 call mode_set
561:
57
58 # set up page table
59 movl $swapper_pg_dir-__PAGE_OFFSET, %eax
60 movl %eax, %cr3
61
62 testl $1, real_efer_save_restore - wakeup_code
63 jz 4f
64 # restore efer setting
65 movl real_save_efer_edx - wakeup_code, %edx
66 movl real_save_efer_eax - wakeup_code, %eax
67 mov $0xc0000080, %ecx
68 wrmsr
694:
70 # make sure %cr4 is set correctly (features, etc)
71 movl real_save_cr4 - wakeup_code, %eax
72 movl %eax, %cr4
73 movw $0xb800, %ax
74 movw %ax,%fs
75 movw $0x0e00 + 'i', %fs:(0x12)
76
77 # need a gdt
78 lgdt real_save_gdt - wakeup_code
79
80 movl real_save_cr0 - wakeup_code, %eax
81 movl %eax, %cr0
82 jmp 1f
831:
84 movw $0x0e00 + 'n', %fs:(0x14)
85
86 movl real_magic - wakeup_code, %eax
87 cmpl $0x12345678, %eax
88 jne bogus_real_magic
89
90 ljmpl $__KERNEL_CS,$wakeup_pmode_return
91
92real_save_gdt: .word 0
93 .long 0
94real_save_cr0: .long 0
95real_save_cr3: .long 0
96real_save_cr4: .long 0
97real_magic: .long 0
98video_mode: .long 0
99video_flags: .long 0
100real_efer_save_restore: .long 0
101real_save_efer_edx: .long 0
102real_save_efer_eax: .long 0
103
104bogus_real_magic:
105 movw $0x0e00 + 'B', %fs:(0x12)
106 jmp bogus_real_magic
107
108/* This code uses an extended set of video mode numbers. These include:
109 * Aliases for standard modes
110 * NORMAL_VGA (-1)
111 * EXTENDED_VGA (-2)
112 * ASK_VGA (-3)
113 * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
114 * of compatibility when extending the table. These are between 0x00 and 0xff.
115 */
116#define VIDEO_FIRST_MENU 0x0000
117
118/* Standard BIOS video modes (BIOS number + 0x0100) */
119#define VIDEO_FIRST_BIOS 0x0100
120
121/* VESA BIOS video modes (VESA number + 0x0200) */
122#define VIDEO_FIRST_VESA 0x0200
123
124/* Video7 special modes (BIOS number + 0x0900) */
125#define VIDEO_FIRST_V7 0x0900
126
127# Setting of user mode (AX=mode ID) => CF=success
128mode_set:
129 movw %ax, %bx
130#if 0
131 cmpb $0xff, %ah
132 jz setalias
133
134 testb $VIDEO_RECALC>>8, %ah
135 jnz _setrec
136
137 cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
138 jnc setres
139
140 cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
141 jz setspc
142
143 cmpb $VIDEO_FIRST_V7>>8, %ah
144 jz setv7
145#endif
146
147 cmpb $VIDEO_FIRST_VESA>>8, %ah
148 jnc check_vesa
149#if 0
150 orb %ah, %ah
151 jz setmenu
152#endif
153
154 decb %ah
155# jz setbios Add bios modes later
156
157setbad: clc
158 ret
159
160check_vesa:
161 subb $VIDEO_FIRST_VESA>>8, %bh
162 orw $0x4000, %bx # Use linear frame buffer
163 movw $0x4f02, %ax # VESA BIOS mode set call
164 int $0x10
165 cmpw $0x004f, %ax # AL=4f if implemented
166 jnz _setbad # AH=0 if OK
167
168 stc
169 ret
170
171_setbad: jmp setbad
172
173 .code32
174 ALIGN
175
176.org 0x800
177wakeup_stack_begin: # Stack grows down
178
179.org 0xff0 # Just below end of page
180wakeup_stack:
181ENTRY(wakeup_end)
182
183.org 0x1000
184
185wakeup_pmode_return:
186 movw $__KERNEL_DS, %ax
187 movw %ax, %ss
188 movw %ax, %ds
189 movw %ax, %es
190 movw %ax, %fs
191 movw %ax, %gs
192 movw $0x0e00 + 'u', 0xb8016
193
194 # reload the gdt, as we need the full 32 bit address
195 lgdt saved_gdt
196 lidt saved_idt
197 lldt saved_ldt
198 ljmp $(__KERNEL_CS),$1f
1991:
200 movl %cr3, %eax
201 movl %eax, %cr3
202 wbinvd
203
204 # and restore the stack ... but you need gdt for this to work
205 movl saved_context_esp, %esp
206
207 movl %cs:saved_magic, %eax
208 cmpl $0x12345678, %eax
209 jne bogus_magic
210
211 # jump to place where we left off
212 movl saved_eip,%eax
213 jmp *%eax
214
215bogus_magic:
216 movw $0x0e00 + 'B', 0xb8018
217 jmp bogus_magic
218
219
220##
221# acpi_copy_wakeup_routine
222#
223# Copy the above routine to low memory.
224#
225# Parameters:
226# %eax: place to copy wakeup routine to
227#
228# Returned address is location of code in low memory (past data and stack)
229#
230ENTRY(acpi_copy_wakeup_routine)
231
232 sgdt saved_gdt
233 sidt saved_idt
234 sldt saved_ldt
235 str saved_tss
236
237 movl nx_enabled, %edx
238 movl %edx, real_efer_save_restore - wakeup_start (%eax)
239 testl $1, real_efer_save_restore - wakeup_start (%eax)
240 jz 2f
241 # save efer setting
242 pushl %eax
243 movl %eax, %ebx
244 mov $0xc0000080, %ecx
245 rdmsr
246 movl %edx, real_save_efer_edx - wakeup_start (%ebx)
247 movl %eax, real_save_efer_eax - wakeup_start (%ebx)
248 popl %eax
2492:
250
251 movl %cr3, %edx
252 movl %edx, real_save_cr3 - wakeup_start (%eax)
253 movl %cr4, %edx
254 movl %edx, real_save_cr4 - wakeup_start (%eax)
255 movl %cr0, %edx
256 movl %edx, real_save_cr0 - wakeup_start (%eax)
257 sgdt real_save_gdt - wakeup_start (%eax)
258
259 movl saved_videomode, %edx
260 movl %edx, video_mode - wakeup_start (%eax)
261 movl acpi_video_flags, %edx
262 movl %edx, video_flags - wakeup_start (%eax)
263 movl $0x12345678, real_magic - wakeup_start (%eax)
264 movl $0x12345678, saved_magic
265 ret
266
267.data
268ALIGN
269ENTRY(saved_magic) .long 0
270ENTRY(saved_eip) .long 0
271
272save_registers:
273 leal 4(%esp), %eax
274 movl %eax, saved_context_esp
275 movl %ebx, saved_context_ebx
276 movl %ebp, saved_context_ebp
277 movl %esi, saved_context_esi
278 movl %edi, saved_context_edi
279 pushfl ; popl saved_context_eflags
280
281 movl $ret_point, saved_eip
282 ret
283
284
285restore_registers:
286 movl saved_context_ebp, %ebp
287 movl saved_context_ebx, %ebx
288 movl saved_context_esi, %esi
289 movl saved_context_edi, %edi
290 pushl saved_context_eflags ; popfl
291 ret
292
293ENTRY(do_suspend_lowlevel)
294 call save_processor_state
295 call save_registers
296 pushl $3
297 call acpi_enter_sleep_state
298 addl $4, %esp
299 ret
300 .p2align 4,,7
301ret_point:
302 call restore_registers
303 call restore_processor_state
304 ret
305
306ENTRY(do_suspend_lowlevel_s4bios)
307 call save_processor_state
308 call save_registers
309 call acpi_enter_sleep_state_s4bios
310 ret
311
312ALIGN
313# saved registers
314saved_gdt: .long 0,0
315saved_idt: .long 0,0
316saved_ldt: .long 0
317saved_tss: .long 0
318
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
new file mode 100644
index 000000000000..35c1751ea0b0
--- /dev/null
+++ b/arch/i386/kernel/apic.c
@@ -0,0 +1,1278 @@
1/*
2 * Local APIC handling, local APIC timers
3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
8 * thanks to Eric Gilmore
9 * and Rolf G. Tews
10 * for testing these extensively.
11 * Maciej W. Rozycki : Various updates and fixes.
12 * Mikael Pettersson : Power Management for UP-APIC.
13 * Pavel Machek and
14 * Mikael Pettersson : PM converted to driver model.
15 */
16
17#include <linux/config.h>
18#include <linux/init.h>
19
20#include <linux/mm.h>
21#include <linux/irq.h>
22#include <linux/delay.h>
23#include <linux/bootmem.h>
24#include <linux/smp_lock.h>
25#include <linux/interrupt.h>
26#include <linux/mc146818rtc.h>
27#include <linux/kernel_stat.h>
28#include <linux/sysdev.h>
29
30#include <asm/atomic.h>
31#include <asm/smp.h>
32#include <asm/mtrr.h>
33#include <asm/mpspec.h>
34#include <asm/desc.h>
35#include <asm/arch_hooks.h>
36#include <asm/hpet.h>
37
38#include <mach_apic.h>
39
40#include "io_ports.h"
41
42/*
43 * Debug level
44 */
45int apic_verbosity;
46
47
48static void apic_pm_activate(void);
49
50/*
51 * 'what should we do if we get a hw irq event on an illegal vector'.
52 * each architecture has to answer this themselves.
53 */
54void ack_bad_irq(unsigned int irq)
55{
56 printk("unexpected IRQ trap at vector %02x\n", irq);
57 /*
58 * Currently unexpected vectors happen only on SMP and APIC.
59 * We _must_ ack these because every local APIC has only N
60 * irq slots per priority level, and a 'hanging, unacked' IRQ
61 * holds up an irq slot - in excessive cases (when multiple
62 * unexpected vectors occur) that might lock up the APIC
63 * completely.
64 */
65 ack_APIC_irq();
66}
67
68void __init apic_intr_init(void)
69{
70#ifdef CONFIG_SMP
71 smp_intr_init();
72#endif
73 /* self generated IPI for local APIC timer */
74 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
75
76 /* IPI vectors for APIC spurious and error interrupts */
77 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
78 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
79
80 /* thermal monitor LVT interrupt */
81#ifdef CONFIG_X86_MCE_P4THERMAL
82 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
83#endif
84}
85
86/* Using APIC to generate smp_local_timer_interrupt? */
87int using_apic_timer = 0;
88
89static DEFINE_PER_CPU(int, prof_multiplier) = 1;
90static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
91static DEFINE_PER_CPU(int, prof_counter) = 1;
92
93static int enabled_via_apicbase;
94
95void enable_NMI_through_LVT0 (void * dummy)
96{
97 unsigned int v, ver;
98
99 ver = apic_read(APIC_LVR);
100 ver = GET_APIC_VERSION(ver);
101 v = APIC_DM_NMI; /* unmask and set to NMI */
102 if (!APIC_INTEGRATED(ver)) /* 82489DX */
103 v |= APIC_LVT_LEVEL_TRIGGER;
104 apic_write_around(APIC_LVT0, v);
105}
106
107int get_physical_broadcast(void)
108{
109 unsigned int lvr, version;
110 lvr = apic_read(APIC_LVR);
111 version = GET_APIC_VERSION(lvr);
112 if (!APIC_INTEGRATED(version) || version >= 0x14)
113 return 0xff;
114 else
115 return 0xf;
116}
117
118int get_maxlvt(void)
119{
120 unsigned int v, ver, maxlvt;
121
122 v = apic_read(APIC_LVR);
123 ver = GET_APIC_VERSION(v);
124 /* 82489DXs do not report # of LVT entries. */
125 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
126 return maxlvt;
127}
128
129void clear_local_APIC(void)
130{
131 int maxlvt;
132 unsigned long v;
133
134 maxlvt = get_maxlvt();
135
136 /*
137 * Masking an LVT entry on a P6 can trigger a local APIC error
138 * if the vector is zero. Mask LVTERR first to prevent this.
139 */
140 if (maxlvt >= 3) {
141 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
142 apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
143 }
144 /*
145 * Careful: we have to set masks only first to deassert
146 * any level-triggered sources.
147 */
148 v = apic_read(APIC_LVTT);
149 apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
150 v = apic_read(APIC_LVT0);
151 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
152 v = apic_read(APIC_LVT1);
153 apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
154 if (maxlvt >= 4) {
155 v = apic_read(APIC_LVTPC);
156 apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
157 }
158
159/* lets not touch this if we didn't frob it */
160#ifdef CONFIG_X86_MCE_P4THERMAL
161 if (maxlvt >= 5) {
162 v = apic_read(APIC_LVTTHMR);
163 apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
164 }
165#endif
166 /*
167 * Clean APIC state for other OSs:
168 */
169 apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
170 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
171 apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
172 if (maxlvt >= 3)
173 apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
174 if (maxlvt >= 4)
175 apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
176
177#ifdef CONFIG_X86_MCE_P4THERMAL
178 if (maxlvt >= 5)
179 apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
180#endif
181 v = GET_APIC_VERSION(apic_read(APIC_LVR));
182 if (APIC_INTEGRATED(v)) { /* !82489DX */
183 if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
184 apic_write(APIC_ESR, 0);
185 apic_read(APIC_ESR);
186 }
187}
188
189void __init connect_bsp_APIC(void)
190{
191 if (pic_mode) {
192 /*
193 * Do not trust the local APIC being empty at bootup.
194 */
195 clear_local_APIC();
196 /*
197 * PIC mode, enable APIC mode in the IMCR, i.e.
198 * connect BSP's local APIC to INT and NMI lines.
199 */
200 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
201 "enabling APIC mode.\n");
202 outb(0x70, 0x22);
203 outb(0x01, 0x23);
204 }
205 enable_apic_mode();
206}
207
208void disconnect_bsp_APIC(void)
209{
210 if (pic_mode) {
211 /*
212 * Put the board back into PIC mode (has an effect
213 * only on certain older boards). Note that APIC
214 * interrupts, including IPIs, won't work beyond
215 * this point! The only exception are INIT IPIs.
216 */
217 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
218 "entering PIC mode.\n");
219 outb(0x70, 0x22);
220 outb(0x00, 0x23);
221 }
222}
223
224void disable_local_APIC(void)
225{
226 unsigned long value;
227
228 clear_local_APIC();
229
230 /*
231 * Disable APIC (implies clearing of registers
232 * for 82489DX!).
233 */
234 value = apic_read(APIC_SPIV);
235 value &= ~APIC_SPIV_APIC_ENABLED;
236 apic_write_around(APIC_SPIV, value);
237
238 if (enabled_via_apicbase) {
239 unsigned int l, h;
240 rdmsr(MSR_IA32_APICBASE, l, h);
241 l &= ~MSR_IA32_APICBASE_ENABLE;
242 wrmsr(MSR_IA32_APICBASE, l, h);
243 }
244}
245
246/*
247 * This is to verify that we're looking at a real local APIC.
248 * Check these against your board if the CPUs aren't getting
249 * started for no apparent reason.
250 */
251int __init verify_local_APIC(void)
252{
253 unsigned int reg0, reg1;
254
255 /*
256 * The version register is read-only in a real APIC.
257 */
258 reg0 = apic_read(APIC_LVR);
259 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
260 apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
261 reg1 = apic_read(APIC_LVR);
262 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
263
264 /*
265 * The two version reads above should print the same
266 * numbers. If the second one is different, then we
267 * poke at a non-APIC.
268 */
269 if (reg1 != reg0)
270 return 0;
271
272 /*
273 * Check if the version looks reasonably.
274 */
275 reg1 = GET_APIC_VERSION(reg0);
276 if (reg1 == 0x00 || reg1 == 0xff)
277 return 0;
278 reg1 = get_maxlvt();
279 if (reg1 < 0x02 || reg1 == 0xff)
280 return 0;
281
282 /*
283 * The ID register is read/write in a real APIC.
284 */
285 reg0 = apic_read(APIC_ID);
286 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
287
288 /*
289 * The next two are just to see if we have sane values.
290 * They're only really relevant if we're in Virtual Wire
291 * compatibility mode, but most boxes are anymore.
292 */
293 reg0 = apic_read(APIC_LVT0);
294 apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
295 reg1 = apic_read(APIC_LVT1);
296 apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
297
298 return 1;
299}
300
301void __init sync_Arb_IDs(void)
302{
303 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
304 unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
305 if (ver >= 0x14) /* P4 or higher */
306 return;
307 /*
308 * Wait for idle.
309 */
310 apic_wait_icr_idle();
311
312 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
313 apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
314 | APIC_DM_INIT);
315}
316
317extern void __error_in_apic_c (void);
318
319/*
320 * An initial setup of the virtual wire mode.
321 */
322void __init init_bsp_APIC(void)
323{
324 unsigned long value, ver;
325
326 /*
327 * Don't do the setup now if we have a SMP BIOS as the
328 * through-I/O-APIC virtual wire mode might be active.
329 */
330 if (smp_found_config || !cpu_has_apic)
331 return;
332
333 value = apic_read(APIC_LVR);
334 ver = GET_APIC_VERSION(value);
335
336 /*
337 * Do not trust the local APIC being empty at bootup.
338 */
339 clear_local_APIC();
340
341 /*
342 * Enable APIC.
343 */
344 value = apic_read(APIC_SPIV);
345 value &= ~APIC_VECTOR_MASK;
346 value |= APIC_SPIV_APIC_ENABLED;
347
348 /* This bit is reserved on P4/Xeon and should be cleared */
349 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
350 value &= ~APIC_SPIV_FOCUS_DISABLED;
351 else
352 value |= APIC_SPIV_FOCUS_DISABLED;
353 value |= SPURIOUS_APIC_VECTOR;
354 apic_write_around(APIC_SPIV, value);
355
356 /*
357 * Set up the virtual wire mode.
358 */
359 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
360 value = APIC_DM_NMI;
361 if (!APIC_INTEGRATED(ver)) /* 82489DX */
362 value |= APIC_LVT_LEVEL_TRIGGER;
363 apic_write_around(APIC_LVT1, value);
364}
365
366void __init setup_local_APIC (void)
367{
368 unsigned long oldvalue, value, ver, maxlvt;
369
370 /* Pound the ESR really hard over the head with a big hammer - mbligh */
371 if (esr_disable) {
372 apic_write(APIC_ESR, 0);
373 apic_write(APIC_ESR, 0);
374 apic_write(APIC_ESR, 0);
375 apic_write(APIC_ESR, 0);
376 }
377
378 value = apic_read(APIC_LVR);
379 ver = GET_APIC_VERSION(value);
380
381 if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
382 __error_in_apic_c();
383
384 /*
385 * Double-check whether this APIC is really registered.
386 */
387 if (!apic_id_registered())
388 BUG();
389
390 /*
391 * Intel recommends to set DFR, LDR and TPR before enabling
392 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
393 * document number 292116). So here it goes...
394 */
395 init_apic_ldr();
396
397 /*
398 * Set Task Priority to 'accept all'. We never change this
399 * later on.
400 */
401 value = apic_read(APIC_TASKPRI);
402 value &= ~APIC_TPRI_MASK;
403 apic_write_around(APIC_TASKPRI, value);
404
405 /*
406 * Now that we are all set up, enable the APIC
407 */
408 value = apic_read(APIC_SPIV);
409 value &= ~APIC_VECTOR_MASK;
410 /*
411 * Enable APIC
412 */
413 value |= APIC_SPIV_APIC_ENABLED;
414
415 /*
416 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
417 * certain networking cards. If high frequency interrupts are
418 * happening on a particular IOAPIC pin, plus the IOAPIC routing
419 * entry is masked/unmasked at a high rate as well then sooner or
420 * later IOAPIC line gets 'stuck', no more interrupts are received
421 * from the device. If focus CPU is disabled then the hang goes
422 * away, oh well :-(
423 *
424 * [ This bug can be reproduced easily with a level-triggered
425 * PCI Ne2000 networking cards and PII/PIII processors, dual
426 * BX chipset. ]
427 */
428 /*
429 * Actually disabling the focus CPU check just makes the hang less
430 * frequent as it makes the interrupt distributon model be more
431 * like LRU than MRU (the short-term load is more even across CPUs).
432 * See also the comment in end_level_ioapic_irq(). --macro
433 */
434#if 1
435 /* Enable focus processor (bit==0) */
436 value &= ~APIC_SPIV_FOCUS_DISABLED;
437#else
438 /* Disable focus processor (bit==1) */
439 value |= APIC_SPIV_FOCUS_DISABLED;
440#endif
441 /*
442 * Set spurious IRQ vector
443 */
444 value |= SPURIOUS_APIC_VECTOR;
445 apic_write_around(APIC_SPIV, value);
446
447 /*
448 * Set up LVT0, LVT1:
449 *
450 * set up through-local-APIC on the BP's LINT0. This is not
451 * strictly necessery in pure symmetric-IO mode, but sometimes
452 * we delegate interrupts to the 8259A.
453 */
454 /*
455 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
456 */
457 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
458 if (!smp_processor_id() && (pic_mode || !value)) {
459 value = APIC_DM_EXTINT;
460 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
461 smp_processor_id());
462 } else {
463 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
464 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
465 smp_processor_id());
466 }
467 apic_write_around(APIC_LVT0, value);
468
469 /*
470 * only the BP should see the LINT1 NMI signal, obviously.
471 */
472 if (!smp_processor_id())
473 value = APIC_DM_NMI;
474 else
475 value = APIC_DM_NMI | APIC_LVT_MASKED;
476 if (!APIC_INTEGRATED(ver)) /* 82489DX */
477 value |= APIC_LVT_LEVEL_TRIGGER;
478 apic_write_around(APIC_LVT1, value);
479
480 if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */
481 maxlvt = get_maxlvt();
482 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
483 apic_write(APIC_ESR, 0);
484 oldvalue = apic_read(APIC_ESR);
485
486 value = ERROR_APIC_VECTOR; // enables sending errors
487 apic_write_around(APIC_LVTERR, value);
488 /*
489 * spec says clear errors after enabling vector.
490 */
491 if (maxlvt > 3)
492 apic_write(APIC_ESR, 0);
493 value = apic_read(APIC_ESR);
494 if (value != oldvalue)
495 apic_printk(APIC_VERBOSE, "ESR value before enabling "
496 "vector: 0x%08lx after: 0x%08lx\n",
497 oldvalue, value);
498 } else {
499 if (esr_disable)
500 /*
501 * Something untraceble is creating bad interrupts on
502 * secondary quads ... for the moment, just leave the
503 * ESR disabled - we can't do anything useful with the
504 * errors anyway - mbligh
505 */
506 printk("Leaving ESR disabled.\n");
507 else
508 printk("No ESR for 82489DX.\n");
509 }
510
511 if (nmi_watchdog == NMI_LOCAL_APIC)
512 setup_apic_nmi_watchdog();
513 apic_pm_activate();
514}
515
516/*
517 * If Linux enabled the LAPIC against the BIOS default
518 * disable it down before re-entering the BIOS on shutdown.
519 * Otherwise the BIOS may get confused and not power-off.
520 */
521void lapic_shutdown(void)
522{
523 if (!cpu_has_apic || !enabled_via_apicbase)
524 return;
525
526 local_irq_disable();
527 disable_local_APIC();
528 local_irq_enable();
529}
530
531#ifdef CONFIG_PM
532
533static struct {
534 int active;
535 /* r/w apic fields */
536 unsigned int apic_id;
537 unsigned int apic_taskpri;
538 unsigned int apic_ldr;
539 unsigned int apic_dfr;
540 unsigned int apic_spiv;
541 unsigned int apic_lvtt;
542 unsigned int apic_lvtpc;
543 unsigned int apic_lvt0;
544 unsigned int apic_lvt1;
545 unsigned int apic_lvterr;
546 unsigned int apic_tmict;
547 unsigned int apic_tdcr;
548 unsigned int apic_thmr;
549} apic_pm_state;
550
551static int lapic_suspend(struct sys_device *dev, u32 state)
552{
553 unsigned long flags;
554
555 if (!apic_pm_state.active)
556 return 0;
557
558 apic_pm_state.apic_id = apic_read(APIC_ID);
559 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
560 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
561 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
562 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
563 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
564 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
565 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
566 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
567 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
568 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
569 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
570 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
571
572 local_irq_save(flags);
573 disable_local_APIC();
574 local_irq_restore(flags);
575 return 0;
576}
577
578static int lapic_resume(struct sys_device *dev)
579{
580 unsigned int l, h;
581 unsigned long flags;
582
583 if (!apic_pm_state.active)
584 return 0;
585
586 local_irq_save(flags);
587
588 /*
589 * Make sure the APICBASE points to the right address
590 *
591 * FIXME! This will be wrong if we ever support suspend on
592 * SMP! We'll need to do this as part of the CPU restore!
593 */
594 rdmsr(MSR_IA32_APICBASE, l, h);
595 l &= ~MSR_IA32_APICBASE_BASE;
596 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
597 wrmsr(MSR_IA32_APICBASE, l, h);
598
599 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
600 apic_write(APIC_ID, apic_pm_state.apic_id);
601 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
602 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
603 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
604 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
605 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
606 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
607 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
608 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
609 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
610 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
611 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
612 apic_write(APIC_ESR, 0);
613 apic_read(APIC_ESR);
614 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
615 apic_write(APIC_ESR, 0);
616 apic_read(APIC_ESR);
617 local_irq_restore(flags);
618 return 0;
619}
620
621/*
622 * This device has no shutdown method - fully functioning local APICs
623 * are needed on every CPU up until machine_halt/restart/poweroff.
624 */
625
626static struct sysdev_class lapic_sysclass = {
627 set_kset_name("lapic"),
628 .resume = lapic_resume,
629 .suspend = lapic_suspend,
630};
631
632static struct sys_device device_lapic = {
633 .id = 0,
634 .cls = &lapic_sysclass,
635};
636
637static void __init apic_pm_activate(void)
638{
639 apic_pm_state.active = 1;
640}
641
642static int __init init_lapic_sysfs(void)
643{
644 int error;
645
646 if (!cpu_has_apic)
647 return 0;
648 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
649
650 error = sysdev_class_register(&lapic_sysclass);
651 if (!error)
652 error = sysdev_register(&device_lapic);
653 return error;
654}
655device_initcall(init_lapic_sysfs);
656
657#else /* CONFIG_PM */
658
659static void apic_pm_activate(void) { }
660
661#endif /* CONFIG_PM */
662
663/*
664 * Detect and enable local APICs on non-SMP boards.
665 * Original code written by Keir Fraser.
666 */
667
668/*
669 * Knob to control our willingness to enable the local APIC.
670 */
671int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
672
673static int __init lapic_disable(char *str)
674{
675 enable_local_apic = -1;
676 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
677 return 0;
678}
679__setup("nolapic", lapic_disable);
680
681static int __init lapic_enable(char *str)
682{
683 enable_local_apic = 1;
684 return 0;
685}
686__setup("lapic", lapic_enable);
687
688static int __init apic_set_verbosity(char *str)
689{
690 if (strcmp("debug", str) == 0)
691 apic_verbosity = APIC_DEBUG;
692 else if (strcmp("verbose", str) == 0)
693 apic_verbosity = APIC_VERBOSE;
694 else
695 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
696 " use apic=verbose or apic=debug", str);
697
698 return 0;
699}
700
701__setup("apic=", apic_set_verbosity);
702
703static int __init detect_init_APIC (void)
704{
705 u32 h, l, features;
706 extern void get_cpu_vendor(struct cpuinfo_x86*);
707
708 /* Disabled by kernel option? */
709 if (enable_local_apic < 0)
710 return -1;
711
712 /* Workaround for us being called before identify_cpu(). */
713 get_cpu_vendor(&boot_cpu_data);
714
715 switch (boot_cpu_data.x86_vendor) {
716 case X86_VENDOR_AMD:
717 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
718 (boot_cpu_data.x86 == 15))
719 break;
720 goto no_apic;
721 case X86_VENDOR_INTEL:
722 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
723 (boot_cpu_data.x86 == 5 && cpu_has_apic))
724 break;
725 goto no_apic;
726 default:
727 goto no_apic;
728 }
729
730 if (!cpu_has_apic) {
731 /*
732 * Over-ride BIOS and try to enable the local
733 * APIC only if "lapic" specified.
734 */
735 if (enable_local_apic <= 0) {
736 printk("Local APIC disabled by BIOS -- "
737 "you can enable it with \"lapic\"\n");
738 return -1;
739 }
740 /*
741 * Some BIOSes disable the local APIC in the
742 * APIC_BASE MSR. This can only be done in
743 * software for Intel P6 or later and AMD K7
744 * (Model > 1) or later.
745 */
746 rdmsr(MSR_IA32_APICBASE, l, h);
747 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
748 printk("Local APIC disabled by BIOS -- reenabling.\n");
749 l &= ~MSR_IA32_APICBASE_BASE;
750 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
751 wrmsr(MSR_IA32_APICBASE, l, h);
752 enabled_via_apicbase = 1;
753 }
754 }
755 /*
756 * The APIC feature bit should now be enabled
757 * in `cpuid'
758 */
759 features = cpuid_edx(1);
760 if (!(features & (1 << X86_FEATURE_APIC))) {
761 printk("Could not enable APIC!\n");
762 return -1;
763 }
764 set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
765 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
766
767 /* The BIOS may have set up the APIC at some other address */
768 rdmsr(MSR_IA32_APICBASE, l, h);
769 if (l & MSR_IA32_APICBASE_ENABLE)
770 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
771
772 if (nmi_watchdog != NMI_NONE)
773 nmi_watchdog = NMI_LOCAL_APIC;
774
775 printk("Found and enabled local APIC!\n");
776
777 apic_pm_activate();
778
779 return 0;
780
781no_apic:
782 printk("No local APIC present or hardware disabled\n");
783 return -1;
784}
785
786void __init init_apic_mappings(void)
787{
788 unsigned long apic_phys;
789
790 /*
791 * If no local APIC can be found then set up a fake all
792 * zeroes page to simulate the local APIC and another
793 * one for the IO-APIC.
794 */
795 if (!smp_found_config && detect_init_APIC()) {
796 apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
797 apic_phys = __pa(apic_phys);
798 } else
799 apic_phys = mp_lapic_addr;
800
801 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
802 printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
803 apic_phys);
804
805 /*
806 * Fetch the APIC ID of the BSP in case we have a
807 * default configuration (or the MP table is broken).
808 */
809 if (boot_cpu_physical_apicid == -1U)
810 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
811
812#ifdef CONFIG_X86_IO_APIC
813 {
814 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
815 int i;
816
817 for (i = 0; i < nr_ioapics; i++) {
818 if (smp_found_config) {
819 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
820 if (!ioapic_phys) {
821 printk(KERN_ERR
822 "WARNING: bogus zero IO-APIC "
823 "address found in MPTABLE, "
824 "disabling IO/APIC support!\n");
825 smp_found_config = 0;
826 skip_ioapic_setup = 1;
827 goto fake_ioapic_page;
828 }
829 } else {
830fake_ioapic_page:
831 ioapic_phys = (unsigned long)
832 alloc_bootmem_pages(PAGE_SIZE);
833 ioapic_phys = __pa(ioapic_phys);
834 }
835 set_fixmap_nocache(idx, ioapic_phys);
836 printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
837 __fix_to_virt(idx), ioapic_phys);
838 idx++;
839 }
840 }
841#endif
842}
843
844/*
845 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
846 * per second. We assume that the caller has already set up the local
847 * APIC.
848 *
849 * The APIC timer is not exactly sync with the external timer chip, it
850 * closely follows bus clocks.
851 */
852
853/*
854 * The timer chip is already set up at HZ interrupts per second here,
855 * but we do not accept timer interrupts yet. We only allow the BP
856 * to calibrate.
857 */
858static unsigned int __init get_8254_timer_count(void)
859{
860 extern spinlock_t i8253_lock;
861 unsigned long flags;
862
863 unsigned int count;
864
865 spin_lock_irqsave(&i8253_lock, flags);
866
867 outb_p(0x00, PIT_MODE);
868 count = inb_p(PIT_CH0);
869 count |= inb_p(PIT_CH0) << 8;
870
871 spin_unlock_irqrestore(&i8253_lock, flags);
872
873 return count;
874}
875
876/* next tick in 8254 can be caught by catching timer wraparound */
877static void __init wait_8254_wraparound(void)
878{
879 unsigned int curr_count, prev_count;
880
881 curr_count = get_8254_timer_count();
882 do {
883 prev_count = curr_count;
884 curr_count = get_8254_timer_count();
885
886 /* workaround for broken Mercury/Neptune */
887 if (prev_count >= curr_count + 0x100)
888 curr_count = get_8254_timer_count();
889
890 } while (prev_count >= curr_count);
891}
892
893/*
894 * Default initialization for 8254 timers. If we use other timers like HPET,
895 * we override this later
896 */
897void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
898
899/*
900 * This function sets up the local APIC timer, with a timeout of
901 * 'clocks' APIC bus clock. During calibration we actually call
902 * this function twice on the boot CPU, once with a bogus timeout
903 * value, second time for real. The other (noncalibrating) CPUs
904 * call this function only once, with the real, calibrated value.
905 *
906 * We do reads before writes even if unnecessary, to get around the
907 * P5 APIC double write bug.
908 */
909
910#define APIC_DIVISOR 16
911
912static void __setup_APIC_LVTT(unsigned int clocks)
913{
914 unsigned int lvtt_value, tmp_value, ver;
915
916 ver = GET_APIC_VERSION(apic_read(APIC_LVR));
917 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
918 if (!APIC_INTEGRATED(ver))
919 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
920 apic_write_around(APIC_LVTT, lvtt_value);
921
922 /*
923 * Divide PICLK by 16
924 */
925 tmp_value = apic_read(APIC_TDCR);
926 apic_write_around(APIC_TDCR, (tmp_value
927 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
928 | APIC_TDR_DIV_16);
929
930 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
931}
932
933static void __init setup_APIC_timer(unsigned int clocks)
934{
935 unsigned long flags;
936
937 local_irq_save(flags);
938
939 /*
940 * Wait for IRQ0's slice:
941 */
942 wait_timer_tick();
943
944 __setup_APIC_LVTT(clocks);
945
946 local_irq_restore(flags);
947}
948
949/*
950 * In this function we calibrate APIC bus clocks to the external
951 * timer. Unfortunately we cannot use jiffies and the timer irq
952 * to calibrate, since some later bootup code depends on getting
953 * the first irq? Ugh.
954 *
955 * We want to do the calibration only once since we
956 * want to have local timer irqs syncron. CPUs connected
957 * by the same APIC bus have the very same bus frequency.
958 * And we want to have irqs off anyways, no accidental
959 * APIC irq that way.
960 */
961
962static int __init calibrate_APIC_clock(void)
963{
964 unsigned long long t1 = 0, t2 = 0;
965 long tt1, tt2;
966 long result;
967 int i;
968 const int LOOPS = HZ/10;
969
970 apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
971
972 /*
973 * Put whatever arbitrary (but long enough) timeout
974 * value into the APIC clock, we just want to get the
975 * counter running for calibration.
976 */
977 __setup_APIC_LVTT(1000000000);
978
979 /*
980 * The timer chip counts down to zero. Let's wait
981 * for a wraparound to start exact measurement:
982 * (the current tick might have been already half done)
983 */
984
985 wait_timer_tick();
986
987 /*
988 * We wrapped around just now. Let's start:
989 */
990 if (cpu_has_tsc)
991 rdtscll(t1);
992 tt1 = apic_read(APIC_TMCCT);
993
994 /*
995 * Let's wait LOOPS wraprounds:
996 */
997 for (i = 0; i < LOOPS; i++)
998 wait_timer_tick();
999
1000 tt2 = apic_read(APIC_TMCCT);
1001 if (cpu_has_tsc)
1002 rdtscll(t2);
1003
1004 /*
1005 * The APIC bus clock counter is 32 bits only, it
1006 * might have overflown, but note that we use signed
1007 * longs, thus no extra care needed.
1008 *
1009 * underflown to be exact, as the timer counts down ;)
1010 */
1011
1012 result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
1013
1014 if (cpu_has_tsc)
1015 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
1016 "%ld.%04ld MHz.\n",
1017 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
1018 ((long)(t2-t1)/LOOPS)%(1000000/HZ));
1019
1020 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
1021 "%ld.%04ld MHz.\n",
1022 result/(1000000/HZ),
1023 result%(1000000/HZ));
1024
1025 return result;
1026}
1027
1028static unsigned int calibration_result;
1029
1030void __init setup_boot_APIC_clock(void)
1031{
1032 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1033 using_apic_timer = 1;
1034
1035 local_irq_disable();
1036
1037 calibration_result = calibrate_APIC_clock();
1038 /*
1039 * Now set up the timer for real.
1040 */
1041 setup_APIC_timer(calibration_result);
1042
1043 local_irq_enable();
1044}
1045
1046void __init setup_secondary_APIC_clock(void)
1047{
1048 setup_APIC_timer(calibration_result);
1049}
1050
1051void __init disable_APIC_timer(void)
1052{
1053 if (using_apic_timer) {
1054 unsigned long v;
1055
1056 v = apic_read(APIC_LVTT);
1057 apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
1058 }
1059}
1060
1061void enable_APIC_timer(void)
1062{
1063 if (using_apic_timer) {
1064 unsigned long v;
1065
1066 v = apic_read(APIC_LVTT);
1067 apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
1068 }
1069}
1070
1071/*
1072 * the frequency of the profiling timer can be changed
1073 * by writing a multiplier value into /proc/profile.
1074 */
1075int setup_profiling_timer(unsigned int multiplier)
1076{
1077 int i;
1078
1079 /*
1080 * Sanity check. [at least 500 APIC cycles should be
1081 * between APIC interrupts as a rule of thumb, to avoid
1082 * irqs flooding us]
1083 */
1084 if ( (!multiplier) || (calibration_result/multiplier < 500))
1085 return -EINVAL;
1086
1087 /*
1088 * Set the new multiplier for each CPU. CPUs don't start using the
1089 * new values until the next timer interrupt in which they do process
1090 * accounting. At that time they also adjust their APIC timers
1091 * accordingly.
1092 */
1093 for (i = 0; i < NR_CPUS; ++i)
1094 per_cpu(prof_multiplier, i) = multiplier;
1095
1096 return 0;
1097}
1098
1099#undef APIC_DIVISOR
1100
1101/*
1102 * Local timer interrupt handler. It does both profiling and
1103 * process statistics/rescheduling.
1104 *
1105 * We do profiling in every local tick, statistics/rescheduling
1106 * happen only every 'profiling multiplier' ticks. The default
1107 * multiplier is 1 and it can be changed by writing the new multiplier
1108 * value into /proc/profile.
1109 */
1110
1111inline void smp_local_timer_interrupt(struct pt_regs * regs)
1112{
1113 int cpu = smp_processor_id();
1114
1115 profile_tick(CPU_PROFILING, regs);
1116 if (--per_cpu(prof_counter, cpu) <= 0) {
1117 /*
1118 * The multiplier may have changed since the last time we got
1119 * to this point as a result of the user writing to
1120 * /proc/profile. In this case we need to adjust the APIC
1121 * timer accordingly.
1122 *
1123 * Interrupts are already masked off at this point.
1124 */
1125 per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
1126 if (per_cpu(prof_counter, cpu) !=
1127 per_cpu(prof_old_multiplier, cpu)) {
1128 __setup_APIC_LVTT(
1129 calibration_result/
1130 per_cpu(prof_counter, cpu));
1131 per_cpu(prof_old_multiplier, cpu) =
1132 per_cpu(prof_counter, cpu);
1133 }
1134
1135#ifdef CONFIG_SMP
1136 update_process_times(user_mode(regs));
1137#endif
1138 }
1139
1140 /*
1141 * We take the 'long' return path, and there every subsystem
1142 * grabs the apropriate locks (kernel lock/ irq lock).
1143 *
1144 * we might want to decouple profiling from the 'long path',
1145 * and do the profiling totally in assembly.
1146 *
1147 * Currently this isn't too much of an issue (performance wise),
1148 * we can take more than 100K local irqs per second on a 100 MHz P5.
1149 */
1150}
1151
1152/*
1153 * Local APIC timer interrupt. This is the most natural way for doing
1154 * local interrupts, but local timer interrupts can be emulated by
1155 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1156 *
1157 * [ if a single-CPU system runs an SMP kernel then we call the local
1158 * interrupt as well. Thus we cannot inline the local irq ... ]
1159 */
1160
1161fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
1162{
1163 int cpu = smp_processor_id();
1164
1165 /*
1166 * the NMI deadlock-detector uses this.
1167 */
1168 per_cpu(irq_stat, cpu).apic_timer_irqs++;
1169
1170 /*
1171 * NOTE! We'd better ACK the irq immediately,
1172 * because timer handling can be slow.
1173 */
1174 ack_APIC_irq();
1175 /*
1176 * update_process_times() expects us to have done irq_enter().
1177 * Besides, if we don't timer interrupts ignore the global
1178 * interrupt lock, which is the WrongThing (tm) to do.
1179 */
1180 irq_enter();
1181 smp_local_timer_interrupt(regs);
1182 irq_exit();
1183}
1184
1185/*
1186 * This interrupt should _never_ happen with our APIC/SMP architecture
1187 */
1188fastcall void smp_spurious_interrupt(struct pt_regs *regs)
1189{
1190 unsigned long v;
1191
1192 irq_enter();
1193 /*
1194 * Check if this really is a spurious interrupt and ACK it
1195 * if it is a vectored one. Just in case...
1196 * Spurious interrupts should not be ACKed.
1197 */
1198 v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1199 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1200 ack_APIC_irq();
1201
1202 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1203 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
1204 smp_processor_id());
1205 irq_exit();
1206}
1207
1208/*
1209 * This interrupt should never happen with our APIC/SMP architecture
1210 */
1211
1212fastcall void smp_error_interrupt(struct pt_regs *regs)
1213{
1214 unsigned long v, v1;
1215
1216 irq_enter();
1217 /* First tickle the hardware, only then report what went on. -- REW */
1218 v = apic_read(APIC_ESR);
1219 apic_write(APIC_ESR, 0);
1220 v1 = apic_read(APIC_ESR);
1221 ack_APIC_irq();
1222 atomic_inc(&irq_err_count);
1223
1224 /* Here is what the APIC error bits mean:
1225 0: Send CS error
1226 1: Receive CS error
1227 2: Send accept error
1228 3: Receive accept error
1229 4: Reserved
1230 5: Send illegal vector
1231 6: Received illegal vector
1232 7: Illegal register address
1233 */
1234 printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
1235 smp_processor_id(), v , v1);
1236 irq_exit();
1237}
1238
1239/*
1240 * This initializes the IO-APIC and APIC hardware if this is
1241 * a UP kernel.
1242 */
1243int __init APIC_init_uniprocessor (void)
1244{
1245 if (enable_local_apic < 0)
1246 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
1247
1248 if (!smp_found_config && !cpu_has_apic)
1249 return -1;
1250
1251 /*
1252 * Complain if the BIOS pretends there is one.
1253 */
1254 if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1255 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1256 boot_cpu_physical_apicid);
1257 return -1;
1258 }
1259
1260 verify_local_APIC();
1261
1262 connect_bsp_APIC();
1263
1264 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
1265
1266 setup_local_APIC();
1267
1268 if (nmi_watchdog == NMI_LOCAL_APIC)
1269 check_nmi_watchdog();
1270#ifdef CONFIG_X86_IO_APIC
1271 if (smp_found_config)
1272 if (!skip_ioapic_setup && nr_ioapics)
1273 setup_IO_APIC();
1274#endif
1275 setup_boot_APIC_clock();
1276
1277 return 0;
1278}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
new file mode 100644
index 000000000000..45641a872550
--- /dev/null
+++ b/arch/i386/kernel/apm.c
@@ -0,0 +1,2428 @@
1/* -*- linux-c -*-
2 * APM BIOS driver for Linux
3 * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au)
4 *
5 * Initial development of this driver was funded by NEC Australia P/L
6 * and NEC Corporation
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * October 1995, Rik Faith (faith@cs.unc.edu):
19 * Minor enhancements and updates (to the patch set) for 1.3.x
20 * Documentation
21 * January 1996, Rik Faith (faith@cs.unc.edu):
22 * Make /proc/apm easy to format (bump driver version)
23 * March 1996, Rik Faith (faith@cs.unc.edu):
24 * Prohibit APM BIOS calls unless apm_enabled.
25 * (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>)
26 * April 1996, Stephen Rothwell (sfr@canb.auug.org.au)
27 * Version 1.0 and 1.1
28 * May 1996, Version 1.2
29 * Feb 1998, Version 1.3
30 * Feb 1998, Version 1.4
31 * Aug 1998, Version 1.5
32 * Sep 1998, Version 1.6
33 * Nov 1998, Version 1.7
34 * Jan 1999, Version 1.8
35 * Jan 1999, Version 1.9
36 * Oct 1999, Version 1.10
37 * Nov 1999, Version 1.11
38 * Jan 2000, Version 1.12
39 * Feb 2000, Version 1.13
40 * Nov 2000, Version 1.14
41 * Oct 2001, Version 1.15
42 * Jan 2002, Version 1.16
43 * Oct 2002, Version 1.16ac
44 *
45 * History:
46 * 0.6b: first version in official kernel, Linux 1.3.46
47 * 0.7: changed /proc/apm format, Linux 1.3.58
48 * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59
49 * 0.9: only call bios if bios is present, Linux 1.3.72
50 * 1.0: use fixed device number, consolidate /proc/apm into this file,
51 * Linux 1.3.85
52 * 1.1: support user-space standby and suspend, power off after system
53 * halted, Linux 1.3.98
54 * 1.2: When resetting RTC after resume, take care so that the time
55 * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth
56 * <jtoth@princeton.edu>); improve interaction between
57 * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
58 * 1.2a:Simple change to stop mysterious bug reports with SMP also added
59 * levels to the printk calls. APM is not defined for SMP machines.
60 * The new replacment for it is, but Linux doesn't yet support this.
61 * Alan Cox Linux 2.1.55
62 * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
63 * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
64 * Dean Gaudet <dgaudet@arctic.org>.
65 * C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87
66 * 1.5: Fix segment register reloading (in case of bad segments saved
67 * across BIOS call).
68 * Stephen Rothwell
69 * 1.6: Cope with complier/assembler differences.
70 * Only try to turn off the first display device.
71 * Fix OOPS at power off with no APM BIOS by Jan Echternach
72 * <echter@informatik.uni-rostock.de>
73 * Stephen Rothwell
74 * 1.7: Modify driver's cached copy of the disabled/disengaged flags
75 * to reflect current state of APM BIOS.
76 * Chris Rankin <rankinc@bellsouth.net>
77 * Reset interrupt 0 timer to 100Hz after suspend
78 * Chad Miller <cmiller@surfsouth.com>
79 * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE
80 * Richard Gooch <rgooch@atnf.csiro.au>
81 * Allow boot time disabling of APM
82 * Make boot messages far less verbose by default
83 * Make asm safer
84 * Stephen Rothwell
85 * 1.8: Add CONFIG_APM_RTC_IS_GMT
86 * Richard Gooch <rgooch@atnf.csiro.au>
87 * change APM_NOINTS to CONFIG_APM_ALLOW_INTS
88 * remove dependency on CONFIG_PROC_FS
89 * Stephen Rothwell
90 * 1.9: Fix small typo. <laslo@wodip.opole.pl>
91 * Try to cope with BIOS's that need to have all display
92 * devices blanked and not just the first one.
93 * Ross Paterson <ross@soi.city.ac.uk>
94 * Fix segment limit setting it has always been wrong as
95 * the segments needed to have byte granularity.
96 * Mark a few things __init.
97 * Add hack to allow power off of SMP systems by popular request.
98 * Use CONFIG_SMP instead of __SMP__
99 * Ignore BOUNCES for three seconds.
100 * Stephen Rothwell
101 * 1.10: Fix for Thinkpad return code.
102 * Merge 2.2 and 2.3 drivers.
103 * Remove APM dependencies in arch/i386/kernel/process.c
104 * Remove APM dependencies in drivers/char/sysrq.c
105 * Reset time across standby.
106 * Allow more inititialisation on SMP.
107 * Remove CONFIG_APM_POWER_OFF and make it boot time
108 * configurable (default on).
109 * Make debug only a boot time parameter (remove APM_DEBUG).
110 * Try to blank all devices on any error.
111 * 1.11: Remove APM dependencies in drivers/char/console.c
112 * Check nr_running to detect if we are idle (from
113 * Borislav Deianov <borislav@lix.polytechnique.fr>)
114 * Fix for bioses that don't zero the top part of the
115 * entrypoint offset (Mario Sitta <sitta@al.unipmn.it>)
116 * (reported by Panos Katsaloulis <teras@writeme.com>).
117 * Real mode power off patch (Walter Hofmann
118 * <Walter.Hofmann@physik.stud.uni-erlangen.de>).
119 * 1.12: Remove CONFIG_SMP as the compiler will optimize
120 * the code away anyway (smp_num_cpus == 1 in UP)
121 * noted by Artur Skawina <skawina@geocities.com>.
122 * Make power off under SMP work again.
123 * Fix thinko with initial engaging of BIOS.
124 * Make sure power off only happens on CPU 0
125 * (Paul "Rusty" Russell <rusty@rustcorp.com.au>).
126 * Do error notification to user mode if BIOS calls fail.
127 * Move entrypoint offset fix to ...boot/setup.S
128 * where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>).
129 * Remove smp-power-off. SMP users must now specify
130 * "apm=power-off" on the kernel command line. Suggested
131 * by Jim Avera <jima@hal.com>, modified by Alan Cox
132 * <alan@lxorguk.ukuu.org.uk>.
133 * Register the /proc/apm entry even on SMP so that
134 * scripts that check for it before doing power off
135 * work (Jim Avera <jima@hal.com>).
136 * 1.13: Changes for new pm_ interfaces (Andy Henroid
137 * <andy_henroid@yahoo.com>).
138 * Modularize the code.
139 * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS
140 * is now the way life works).
141 * Fix thinko in suspend() (wrong return).
142 * Notify drivers on critical suspend.
143 * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz>
144 * modified by sfr).
145 * Disable interrupts while we are suspended (Andy Henroid
146 * <andy_henroid@yahoo.com> fixed by sfr).
147 * Make power off work on SMP again (Tony Hoyle
148 * <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr.
149 * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore
150 * interval is now configurable.
151 * 1.14: Make connection version persist across module unload/load.
152 * Enable and engage power management earlier.
153 * Disengage power management on module unload.
154 * Changed to use the sysrq-register hack for registering the
155 * power off function called by magic sysrq based upon discussions
156 * in irc://irc.openprojects.net/#kernelnewbies
157 * (Crutcher Dunnavant <crutcher+kernel@datastacks.com>).
158 * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable.
159 * (Arjan van de Ven <arjanv@redhat.com>) modified by sfr.
160 * Work around byte swap bug in one of the Vaio's BIOS's
161 * (Marc Boucher <marc@mbsi.ca>).
162 * Exposed the disable flag to dmi so that we can handle known
163 * broken APM (Alan Cox <alan@redhat.com>).
164 * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
165 * calling it - instead idle. (Alan Cox <alan@redhat.com>)
166 * If an APM idle fails log it and idle sensibly
167 * 1.15: Don't queue events to clients who open the device O_WRONLY.
168 * Don't expect replies from clients who open the device O_RDONLY.
169 * (Idea from Thomas Hood)
170 * Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>)
171 * 1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.)
172 * Notify listeners of standby or suspend events before notifying
173 * drivers. Return EBUSY to ioctl() if suspend is rejected.
174 * (Russell King <rmk@arm.linux.org.uk> and Thomas Hood)
175 * Ignore first resume after we generate our own resume event
176 * after a suspend (Thomas Hood)
177 * Daemonize now gets rid of our controlling terminal (sfr).
178 * CONFIG_APM_CPU_IDLE now just affects the default value of
179 * idle_threshold (sfr).
180 * Change name of kernel apm daemon (as it no longer idles) (sfr).
181 * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we
182 * make _all_ APM calls on the CPU#0. Fix unsafe sign bug.
183 * TODO: determine if its "boot CPU" or "CPU0" we want to lock to.
184 *
185 * APM 1.1 Reference:
186 *
187 * Intel Corporation, Microsoft Corporation. Advanced Power Management
188 * (APM) BIOS Interface Specification, Revision 1.1, September 1993.
189 * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01.
190 *
191 * [This document is available free from Intel by calling 800.628.8686 (fax
192 * 916.356.6100) or 800.548.4725; or via anonymous ftp from
193 * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also
194 * available from Microsoft by calling 206.882.8080.]
195 *
196 * APM 1.2 Reference:
197 * Intel Corporation, Microsoft Corporation. Advanced Power Management
198 * (APM) BIOS Interface Specification, Revision 1.2, February 1996.
199 *
200 * [This document is available from Microsoft at:
201 * http://www.microsoft.com/hwdev/busbios/amp_12.htm]
202 */
203
204#include <linux/config.h>
205#include <linux/module.h>
206
207#include <linux/poll.h>
208#include <linux/types.h>
209#include <linux/stddef.h>
210#include <linux/timer.h>
211#include <linux/fcntl.h>
212#include <linux/slab.h>
213#include <linux/stat.h>
214#include <linux/proc_fs.h>
215#include <linux/miscdevice.h>
216#include <linux/apm_bios.h>
217#include <linux/init.h>
218#include <linux/time.h>
219#include <linux/sched.h>
220#include <linux/pm.h>
221#include <linux/device.h>
222#include <linux/kernel.h>
223#include <linux/smp.h>
224#include <linux/smp_lock.h>
225#include <linux/dmi.h>
226#include <linux/suspend.h>
227
228#include <asm/system.h>
229#include <asm/uaccess.h>
230#include <asm/desc.h>
231
232#include "io_ports.h"
233
234extern spinlock_t i8253_lock;
235extern unsigned long get_cmos_time(void);
236extern void machine_real_restart(unsigned char *, int);
237
238#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
239extern int (*console_blank_hook)(int);
240#endif
241
242/*
243 * The apm_bios device is one of the misc char devices.
244 * This is its minor number.
245 */
246#define APM_MINOR_DEV 134
247
248/*
249 * See Documentation/Config.help for the configuration options.
250 *
251 * Various options can be changed at boot time as follows:
252 * (We allow underscores for compatibility with the modules code)
253 * apm=on/off enable/disable APM
254 * [no-]allow[-_]ints allow interrupts during BIOS calls
255 * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call
256 * [no-]realmode[-_]power[-_]off switch to real mode before
257 * powering off
258 * [no-]debug log some debugging messages
259 * [no-]power[-_]off power off on shutdown
260 * [no-]smp Use apm even on an SMP box
261 * bounce[-_]interval=<n> number of ticks to ignore suspend
262 * bounces
263 * idle[-_]threshold=<n> System idle percentage above which to
264 * make APM BIOS idle calls. Set it to
265 * 100 to disable.
266 * idle[-_]period=<n> Period (in 1/100s of a second) over
267 * which the idle percentage is
268 * calculated.
269 */
270
271/* KNOWN PROBLEM MACHINES:
272 *
273 * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant
274 * [Confirmed by TI representative]
275 * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification
276 * [Confirmed by BIOS disassembly]
277 * [This may work now ...]
278 * P: Toshiba 1950S: battery life information only gets updated after resume
279 * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking
280 * broken in BIOS [Reported by Garst R. Reese <reese@isn.net>]
281 * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP
282 * Neale Banks <neale@lowendale.com.au> December 2000
283 *
284 * Legend: U = unusable with APM patches
285 * P = partially usable with APM patches
286 */
287
288/*
289 * Define as 1 to make the driver always call the APM BIOS busy
290 * routine even if the clock was not reported as slowed by the
291 * idle routine. Otherwise, define as 0.
292 */
293#define ALWAYS_CALL_BUSY 1
294
295/*
296 * Define to make the APM BIOS calls zero all data segment registers (so
297 * that an incorrect BIOS implementation will cause a kernel panic if it
298 * tries to write to arbitrary memory).
299 */
300#define APM_ZERO_SEGS
301
302#include "apm.h"
303
304/*
305 * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is
306 * supposed to provide limit information that it recognizes. Many machines
307 * do this correctly, but many others do not restrict themselves to their
308 * claimed limit. When this happens, they will cause a segmentation
309 * violation in the kernel at boot time. Most BIOS's, however, will
310 * respect a 64k limit, so we use that. If you want to be pedantic and
311 * hold your BIOS to its claims, then undefine this.
312 */
313#define APM_RELAX_SEGMENTS
314
315/*
316 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
317 * This patched by Chad Miller <cmiller@surfsouth.com>, original code by
318 * David Chen <chen@ctpa04.mit.edu>
319 */
320#undef INIT_TIMER_AFTER_SUSPEND
321
322#ifdef INIT_TIMER_AFTER_SUSPEND
323#include <linux/timex.h>
324#include <asm/io.h>
325#include <linux/delay.h>
326#endif
327
328/*
329 * Need to poll the APM BIOS every second
330 */
331#define APM_CHECK_TIMEOUT (HZ)
332
333/*
334 * Ignore suspend events for this amount of time after a resume
335 */
336#define DEFAULT_BOUNCE_INTERVAL (3 * HZ)
337
338/*
339 * Maximum number of events stored
340 */
341#define APM_MAX_EVENTS 20
342
343/*
344 * The per-file APM data
345 */
346struct apm_user {
347 int magic;
348 struct apm_user * next;
349 int suser: 1;
350 int writer: 1;
351 int reader: 1;
352 int suspend_wait: 1;
353 int suspend_result;
354 int suspends_pending;
355 int standbys_pending;
356 int suspends_read;
357 int standbys_read;
358 int event_head;
359 int event_tail;
360 apm_event_t events[APM_MAX_EVENTS];
361};
362
363/*
364 * The magic number in apm_user
365 */
366#define APM_BIOS_MAGIC 0x4101
367
368/*
369 * idle percentage above which bios idle calls are done
370 */
371#ifdef CONFIG_APM_CPU_IDLE
372#define DEFAULT_IDLE_THRESHOLD 95
373#else
374#define DEFAULT_IDLE_THRESHOLD 100
375#endif
376#define DEFAULT_IDLE_PERIOD (100 / 3)
377
378/*
379 * Local variables
380 */
381static struct {
382 unsigned long offset;
383 unsigned short segment;
384} apm_bios_entry;
385static int clock_slowed;
386static int idle_threshold = DEFAULT_IDLE_THRESHOLD;
387static int idle_period = DEFAULT_IDLE_PERIOD;
388static int set_pm_idle;
389static int suspends_pending;
390static int standbys_pending;
391static int ignore_sys_suspend;
392static int ignore_normal_resume;
393static int bounce_interval = DEFAULT_BOUNCE_INTERVAL;
394
395#ifdef CONFIG_APM_RTC_IS_GMT
396# define clock_cmos_diff 0
397# define got_clock_diff 1
398#else
399static long clock_cmos_diff;
400static int got_clock_diff;
401#endif
402static int debug;
403static int smp;
404static int apm_disabled = -1;
405#ifdef CONFIG_SMP
406static int power_off;
407#else
408static int power_off = 1;
409#endif
410#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
411static int realmode_power_off = 1;
412#else
413static int realmode_power_off;
414#endif
415static int exit_kapmd;
416static int kapmd_running;
417#ifdef CONFIG_APM_ALLOW_INTS
418static int allow_ints = 1;
419#else
420static int allow_ints;
421#endif
422static int broken_psr;
423
424static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
425static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
426static struct apm_user * user_list;
427static DEFINE_SPINLOCK(user_list_lock);
428static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
429
430static char driver_version[] = "1.16ac"; /* no spaces */
431
432/*
433 * APM event names taken from the APM 1.2 specification. These are
434 * the message codes that the BIOS uses to tell us about events
435 */
436static char * apm_event_name[] = {
437 "system standby",
438 "system suspend",
439 "normal resume",
440 "critical resume",
441 "low battery",
442 "power status change",
443 "update time",
444 "critical suspend",
445 "user standby",
446 "user suspend",
447 "system standby resume",
448 "capabilities change"
449};
450#define NR_APM_EVENT_NAME \
451 (sizeof(apm_event_name) / sizeof(apm_event_name[0]))
452
453typedef struct lookup_t {
454 int key;
455 char * msg;
456} lookup_t;
457
458/*
459 * The BIOS returns a set of standard error codes in AX when the
460 * carry flag is set.
461 */
462
463static const lookup_t error_table[] = {
464/* N/A { APM_SUCCESS, "Operation succeeded" }, */
465 { APM_DISABLED, "Power management disabled" },
466 { APM_CONNECTED, "Real mode interface already connected" },
467 { APM_NOT_CONNECTED, "Interface not connected" },
468 { APM_16_CONNECTED, "16 bit interface already connected" },
469/* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */
470 { APM_32_CONNECTED, "32 bit interface already connected" },
471 { APM_32_UNSUPPORTED, "32 bit interface not supported" },
472 { APM_BAD_DEVICE, "Unrecognized device ID" },
473 { APM_BAD_PARAM, "Parameter out of range" },
474 { APM_NOT_ENGAGED, "Interface not engaged" },
475 { APM_BAD_FUNCTION, "Function not supported" },
476 { APM_RESUME_DISABLED, "Resume timer disabled" },
477 { APM_BAD_STATE, "Unable to enter requested state" },
478/* N/A { APM_NO_EVENTS, "No events pending" }, */
479 { APM_NO_ERROR, "BIOS did not set a return code" },
480 { APM_NOT_PRESENT, "No APM present" }
481};
482#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t))
483
484/**
485 * apm_error - display an APM error
486 * @str: information string
487 * @err: APM BIOS return code
488 *
489 * Write a meaningful log entry to the kernel log in the event of
490 * an APM error.
491 */
492
493static void apm_error(char *str, int err)
494{
495 int i;
496
497 for (i = 0; i < ERROR_COUNT; i++)
498 if (error_table[i].key == err) break;
499 if (i < ERROR_COUNT)
500 printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
501 else
502 printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
503 str, err);
504}
505
506/*
507 * Lock APM functionality to physical CPU 0
508 */
509
510#ifdef CONFIG_SMP
511
512static cpumask_t apm_save_cpus(void)
513{
514 cpumask_t x = current->cpus_allowed;
515 /* Some bioses don't like being called from CPU != 0 */
516 set_cpus_allowed(current, cpumask_of_cpu(0));
517 BUG_ON(smp_processor_id() != 0);
518 return x;
519}
520
521static inline void apm_restore_cpus(cpumask_t mask)
522{
523 set_cpus_allowed(current, mask);
524}
525
526#else
527
528/*
529 * No CPU lockdown needed on a uniprocessor
530 */
531
532#define apm_save_cpus() (current->cpus_allowed)
533#define apm_restore_cpus(x) (void)(x)
534
535#endif
536
537/*
538 * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and
539 * apm_info.allow_ints, we are being really paranoid here! Not only
540 * are interrupts disabled, but all the segment registers (except SS)
541 * are saved and zeroed this means that if the BIOS tries to reference
542 * any data without explicitly loading the segment registers, the kernel
543 * will fault immediately rather than have some unforeseen circumstances
544 * for the rest of the kernel. And it will be very obvious! :-) Doing
545 * this depends on CS referring to the same physical memory as DS so that
546 * DS can be zeroed before the call. Unfortunately, we can't do anything
547 * about the stack segment/pointer. Also, we tell the compiler that
548 * everything could change.
549 *
550 * Also, we KNOW that for the non error case of apm_bios_call, there
551 * is no useful data returned in the low order 8 bits of eax.
552 */
553#define APM_DO_CLI \
554 if (apm_info.allow_ints) \
555 local_irq_enable(); \
556 else \
557 local_irq_disable();
558
559#ifdef APM_ZERO_SEGS
560# define APM_DECL_SEGS \
561 unsigned int saved_fs; unsigned int saved_gs;
562# define APM_DO_SAVE_SEGS \
563 savesegment(fs, saved_fs); savesegment(gs, saved_gs)
564# define APM_DO_RESTORE_SEGS \
565 loadsegment(fs, saved_fs); loadsegment(gs, saved_gs)
566#else
567# define APM_DECL_SEGS
568# define APM_DO_SAVE_SEGS
569# define APM_DO_RESTORE_SEGS
570#endif
571
572/**
573 * apm_bios_call - Make an APM BIOS 32bit call
574 * @func: APM function to execute
575 * @ebx_in: EBX register for call entry
576 * @ecx_in: ECX register for call entry
577 * @eax: EAX register return
578 * @ebx: EBX register return
579 * @ecx: ECX register return
580 * @edx: EDX register return
581 * @esi: ESI register return
582 *
583 * Make an APM call using the 32bit protected mode interface. The
584 * caller is responsible for knowing if APM BIOS is configured and
585 * enabled. This call can disable interrupts for a long period of
586 * time on some laptops. The return value is in AH and the carry
587 * flag is loaded into AL. If there is an error, then the error
588 * code is returned in AH (bits 8-15 of eax) and this function
589 * returns non-zero.
590 */
591
592static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
593 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
594{
595 APM_DECL_SEGS
596 unsigned long flags;
597 cpumask_t cpus;
598 int cpu;
599 struct desc_struct save_desc_40;
600
601 cpus = apm_save_cpus();
602
603 cpu = get_cpu();
604 save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8];
605 per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc;
606
607 local_save_flags(flags);
608 APM_DO_CLI;
609 APM_DO_SAVE_SEGS;
610 apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
611 APM_DO_RESTORE_SEGS;
612 local_irq_restore(flags);
613 per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40;
614 put_cpu();
615 apm_restore_cpus(cpus);
616
617 return *eax & 0xff;
618}
619
620/**
621 * apm_bios_call_simple - make a simple APM BIOS 32bit call
622 * @func: APM function to invoke
623 * @ebx_in: EBX register value for BIOS call
624 * @ecx_in: ECX register value for BIOS call
625 * @eax: EAX register on return from the BIOS call
626 *
627 * Make a BIOS call that does only returns one value, or just status.
628 * If there is an error, then the error code is returned in AH
629 * (bits 8-15 of eax) and this function returns non-zero. This is
630 * used for simpler BIOS operations. This call may hold interrupts
631 * off for a long time on some laptops.
632 */
633
634static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
635{
636 u8 error;
637 APM_DECL_SEGS
638 unsigned long flags;
639 cpumask_t cpus;
640 int cpu;
641 struct desc_struct save_desc_40;
642
643
644 cpus = apm_save_cpus();
645
646 cpu = get_cpu();
647 save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8];
648 per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc;
649
650 local_save_flags(flags);
651 APM_DO_CLI;
652 APM_DO_SAVE_SEGS;
653 error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
654 APM_DO_RESTORE_SEGS;
655 local_irq_restore(flags);
656 __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40;
657 put_cpu();
658 apm_restore_cpus(cpus);
659 return error;
660}
661
662/**
663 * apm_driver_version - APM driver version
664 * @val: loaded with the APM version on return
665 *
666 * Retrieve the APM version supported by the BIOS. This is only
667 * supported for APM 1.1 or higher. An error indicates APM 1.0 is
668 * probably present.
669 *
670 * On entry val should point to a value indicating the APM driver
671 * version with the high byte being the major and the low byte the
672 * minor number both in BCD
673 *
674 * On return it will hold the BIOS revision supported in the
675 * same format.
676 */
677
678static int apm_driver_version(u_short *val)
679{
680 u32 eax;
681
682 if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
683 return (eax >> 8) & 0xff;
684 *val = eax;
685 return APM_SUCCESS;
686}
687
688/**
689 * apm_get_event - get an APM event from the BIOS
690 * @event: pointer to the event
691 * @info: point to the event information
692 *
693 * The APM BIOS provides a polled information for event
694 * reporting. The BIOS expects to be polled at least every second
695 * when events are pending. When a message is found the caller should
696 * poll until no more messages are present. However, this causes
697 * problems on some laptops where a suspend event notification is
698 * not cleared until it is acknowledged.
699 *
700 * Additional information is returned in the info pointer, providing
701 * that APM 1.2 is in use. If no messges are pending the value 0x80
702 * is returned (No power management events pending).
703 */
704
705static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
706{
707 u32 eax;
708 u32 ebx;
709 u32 ecx;
710 u32 dummy;
711
712 if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
713 &dummy, &dummy))
714 return (eax >> 8) & 0xff;
715 *event = ebx;
716 if (apm_info.connection_version < 0x0102)
717 *info = ~0; /* indicate info not valid */
718 else
719 *info = ecx;
720 return APM_SUCCESS;
721}
722
723/**
724 * set_power_state - set the power management state
725 * @what: which items to transition
726 * @state: state to transition to
727 *
728 * Request an APM change of state for one or more system devices. The
729 * processor state must be transitioned last of all. what holds the
730 * class of device in the upper byte and the device number (0xFF for
731 * all) for the object to be transitioned.
732 *
733 * The state holds the state to transition to, which may in fact
734 * be an acceptance of a BIOS requested state change.
735 */
736
737static int set_power_state(u_short what, u_short state)
738{
739 u32 eax;
740
741 if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
742 return (eax >> 8) & 0xff;
743 return APM_SUCCESS;
744}
745
746/**
747 * set_system_power_state - set system wide power state
748 * @state: which state to enter
749 *
750 * Transition the entire system into a new APM power state.
751 */
752
753static int set_system_power_state(u_short state)
754{
755 return set_power_state(APM_DEVICE_ALL, state);
756}
757
758/**
759 * apm_do_idle - perform power saving
760 *
761 * This function notifies the BIOS that the processor is (in the view
762 * of the OS) idle. It returns -1 in the event that the BIOS refuses
763 * to handle the idle request. On a success the function returns 1
764 * if the BIOS did clock slowing or 0 otherwise.
765 */
766
767static int apm_do_idle(void)
768{
769 u32 eax;
770
771 if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
772 static unsigned long t;
773
774 /* This always fails on some SMP boards running UP kernels.
775 * Only report the failure the first 5 times.
776 */
777 if (++t < 5)
778 {
779 printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
780 (eax >> 8) & 0xff);
781 t = jiffies;
782 }
783 return -1;
784 }
785 clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0;
786 return clock_slowed;
787}
788
789/**
790 * apm_do_busy - inform the BIOS the CPU is busy
791 *
792 * Request that the BIOS brings the CPU back to full performance.
793 */
794
795static void apm_do_busy(void)
796{
797 u32 dummy;
798
799 if (clock_slowed || ALWAYS_CALL_BUSY) {
800 (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
801 clock_slowed = 0;
802 }
803}
804
805/*
806 * If no process has really been interested in
807 * the CPU for some time, we want to call BIOS
808 * power management - we probably want
809 * to conserve power.
810 */
811#define IDLE_CALC_LIMIT (HZ * 100)
812#define IDLE_LEAKY_MAX 16
813
814static void (*original_pm_idle)(void);
815
816extern void default_idle(void);
817
818/**
819 * apm_cpu_idle - cpu idling for APM capable Linux
820 *
821 * This is the idling function the kernel executes when APM is available. It
822 * tries to do BIOS powermanagement based on the average system idle time.
823 * Furthermore it calls the system default idle routine.
824 */
825
826static void apm_cpu_idle(void)
827{
828 static int use_apm_idle; /* = 0 */
829 static unsigned int last_jiffies; /* = 0 */
830 static unsigned int last_stime; /* = 0 */
831
832 int apm_idle_done = 0;
833 unsigned int jiffies_since_last_check = jiffies - last_jiffies;
834 unsigned int bucket;
835
836recalc:
837 if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
838 use_apm_idle = 0;
839 last_jiffies = jiffies;
840 last_stime = current->stime;
841 } else if (jiffies_since_last_check > idle_period) {
842 unsigned int idle_percentage;
843
844 idle_percentage = current->stime - last_stime;
845 idle_percentage *= 100;
846 idle_percentage /= jiffies_since_last_check;
847 use_apm_idle = (idle_percentage > idle_threshold);
848 if (apm_info.forbid_idle)
849 use_apm_idle = 0;
850 last_jiffies = jiffies;
851 last_stime = current->stime;
852 }
853
854 bucket = IDLE_LEAKY_MAX;
855
856 while (!need_resched()) {
857 if (use_apm_idle) {
858 unsigned int t;
859
860 t = jiffies;
861 switch (apm_do_idle()) {
862 case 0: apm_idle_done = 1;
863 if (t != jiffies) {
864 if (bucket) {
865 bucket = IDLE_LEAKY_MAX;
866 continue;
867 }
868 } else if (bucket) {
869 bucket--;
870 continue;
871 }
872 break;
873 case 1: apm_idle_done = 1;
874 break;
875 default: /* BIOS refused */
876 break;
877 }
878 }
879 if (original_pm_idle)
880 original_pm_idle();
881 else
882 default_idle();
883 jiffies_since_last_check = jiffies - last_jiffies;
884 if (jiffies_since_last_check > idle_period)
885 goto recalc;
886 }
887
888 if (apm_idle_done)
889 apm_do_busy();
890}
891
892/**
893 * apm_power_off - ask the BIOS to power off
894 *
895 * Handle the power off sequence. This is the one piece of code we
896 * will execute even on SMP machines. In order to deal with BIOS
897 * bugs we support real mode APM BIOS power off calls. We also make
898 * the SMP call on CPU0 as some systems will only honour this call
899 * on their first cpu.
900 */
901
902static void apm_power_off(void)
903{
904 unsigned char po_bios_call[] = {
905 0xb8, 0x00, 0x10, /* movw $0x1000,ax */
906 0x8e, 0xd0, /* movw ax,ss */
907 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */
908 0xb8, 0x07, 0x53, /* movw $0x5307,ax */
909 0xbb, 0x01, 0x00, /* movw $0x0001,bx */
910 0xb9, 0x03, 0x00, /* movw $0x0003,cx */
911 0xcd, 0x15 /* int $0x15 */
912 };
913
914 /*
915 * This may be called on an SMP machine.
916 */
917#ifdef CONFIG_SMP
918 /* Some bioses don't like being called from CPU != 0 */
919 set_cpus_allowed(current, cpumask_of_cpu(0));
920 BUG_ON(smp_processor_id() != 0);
921#endif
922 if (apm_info.realmode_power_off)
923 {
924 (void)apm_save_cpus();
925 machine_real_restart(po_bios_call, sizeof(po_bios_call));
926 }
927 else
928 (void) set_system_power_state(APM_STATE_OFF);
929}
930
931#ifdef CONFIG_APM_DO_ENABLE
932
933/**
934 * apm_enable_power_management - enable BIOS APM power management
935 * @enable: enable yes/no
936 *
937 * Enable or disable the APM BIOS power services.
938 */
939
940static int apm_enable_power_management(int enable)
941{
942 u32 eax;
943
944 if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
945 return APM_NOT_ENGAGED;
946 if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
947 enable, &eax))
948 return (eax >> 8) & 0xff;
949 if (enable)
950 apm_info.bios.flags &= ~APM_BIOS_DISABLED;
951 else
952 apm_info.bios.flags |= APM_BIOS_DISABLED;
953 return APM_SUCCESS;
954}
955#endif
956
957/**
958 * apm_get_power_status - get current power state
959 * @status: returned status
960 * @bat: battery info
961 * @life: estimated life
962 *
963 * Obtain the current power status from the APM BIOS. We return a
964 * status which gives the rough battery status, and current power
965 * source. The bat value returned give an estimate as a percentage
966 * of life and a status value for the battery. The estimated life
967 * if reported is a lifetime in secodnds/minutes at current powwer
968 * consumption.
969 */
970
971static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
972{
973 u32 eax;
974 u32 ebx;
975 u32 ecx;
976 u32 edx;
977 u32 dummy;
978
979 if (apm_info.get_power_status_broken)
980 return APM_32_UNSUPPORTED;
981 if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
982 &eax, &ebx, &ecx, &edx, &dummy))
983 return (eax >> 8) & 0xff;
984 *status = ebx;
985 *bat = ecx;
986 if (apm_info.get_power_status_swabinminutes) {
987 *life = swab16((u16)edx);
988 *life |= 0x8000;
989 } else
990 *life = edx;
991 return APM_SUCCESS;
992}
993
994#if 0
995static int apm_get_battery_status(u_short which, u_short *status,
996 u_short *bat, u_short *life, u_short *nbat)
997{
998 u32 eax;
999 u32 ebx;
1000 u32 ecx;
1001 u32 edx;
1002 u32 esi;
1003
1004 if (apm_info.connection_version < 0x0102) {
1005 /* pretend we only have one battery. */
1006 if (which != 1)
1007 return APM_BAD_DEVICE;
1008 *nbat = 1;
1009 return apm_get_power_status(status, bat, life);
1010 }
1011
1012 if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
1013 &ebx, &ecx, &edx, &esi))
1014 return (eax >> 8) & 0xff;
1015 *status = ebx;
1016 *bat = ecx;
1017 *life = edx;
1018 *nbat = esi;
1019 return APM_SUCCESS;
1020}
1021#endif
1022
1023/**
1024 * apm_engage_power_management - enable PM on a device
1025 * @device: identity of device
1026 * @enable: on/off
1027 *
1028 * Activate or deactive power management on either a specific device
1029 * or the entire system (%APM_DEVICE_ALL).
1030 */
1031
1032static int apm_engage_power_management(u_short device, int enable)
1033{
1034 u32 eax;
1035
1036 if ((enable == 0) && (device == APM_DEVICE_ALL)
1037 && (apm_info.bios.flags & APM_BIOS_DISABLED))
1038 return APM_DISABLED;
1039 if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
1040 return (eax >> 8) & 0xff;
1041 if (device == APM_DEVICE_ALL) {
1042 if (enable)
1043 apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
1044 else
1045 apm_info.bios.flags |= APM_BIOS_DISENGAGED;
1046 }
1047 return APM_SUCCESS;
1048}
1049
1050#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
1051
1052/**
1053 * apm_console_blank - blank the display
1054 * @blank: on/off
1055 *
1056 * Attempt to blank the console, firstly by blanking just video device
1057 * zero, and if that fails (some BIOSes don't support it) then it blanks
1058 * all video devices. Typically the BIOS will do laptop backlight and
1059 * monitor powerdown for us.
1060 */
1061
1062static int apm_console_blank(int blank)
1063{
1064 int error;
1065 u_short state;
1066
1067 state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
1068 /* Blank the first display device */
1069 error = set_power_state(0x100, state);
1070 if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
1071 /* try to blank them all instead */
1072 error = set_power_state(0x1ff, state);
1073 if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
1074 /* try to blank device one instead */
1075 error = set_power_state(0x101, state);
1076 }
1077 if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
1078 return 1;
1079 if (error == APM_NOT_ENGAGED) {
1080 static int tried;
1081 int eng_error;
1082 if (tried++ == 0) {
1083 eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1);
1084 if (eng_error) {
1085 apm_error("set display", error);
1086 apm_error("engage interface", eng_error);
1087 return 0;
1088 } else
1089 return apm_console_blank(blank);
1090 }
1091 }
1092 apm_error("set display", error);
1093 return 0;
1094}
1095#endif
1096
1097static int queue_empty(struct apm_user *as)
1098{
1099 return as->event_head == as->event_tail;
1100}
1101
1102static apm_event_t get_queued_event(struct apm_user *as)
1103{
1104 as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
1105 return as->events[as->event_tail];
1106}
1107
1108static void queue_event(apm_event_t event, struct apm_user *sender)
1109{
1110 struct apm_user * as;
1111
1112 spin_lock(&user_list_lock);
1113 if (user_list == NULL)
1114 goto out;
1115 for (as = user_list; as != NULL; as = as->next) {
1116 if ((as == sender) || (!as->reader))
1117 continue;
1118 as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
1119 if (as->event_head == as->event_tail) {
1120 static int notified;
1121
1122 if (notified++ == 0)
1123 printk(KERN_ERR "apm: an event queue overflowed\n");
1124 as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
1125 }
1126 as->events[as->event_head] = event;
1127 if ((!as->suser) || (!as->writer))
1128 continue;
1129 switch (event) {
1130 case APM_SYS_SUSPEND:
1131 case APM_USER_SUSPEND:
1132 as->suspends_pending++;
1133 suspends_pending++;
1134 break;
1135
1136 case APM_SYS_STANDBY:
1137 case APM_USER_STANDBY:
1138 as->standbys_pending++;
1139 standbys_pending++;
1140 break;
1141 }
1142 }
1143 wake_up_interruptible(&apm_waitqueue);
1144out:
1145 spin_unlock(&user_list_lock);
1146}
1147
1148static void set_time(void)
1149{
1150 if (got_clock_diff) { /* Must know time zone in order to set clock */
1151 xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
1152 xtime.tv_nsec = 0;
1153 }
1154}
1155
1156static void get_time_diff(void)
1157{
1158#ifndef CONFIG_APM_RTC_IS_GMT
1159 /*
1160 * Estimate time zone so that set_time can update the clock
1161 */
1162 clock_cmos_diff = -get_cmos_time();
1163 clock_cmos_diff += get_seconds();
1164 got_clock_diff = 1;
1165#endif
1166}
1167
1168static void reinit_timer(void)
1169{
1170#ifdef INIT_TIMER_AFTER_SUSPEND
1171 unsigned long flags;
1172 extern spinlock_t i8253_lock;
1173
1174 spin_lock_irqsave(&i8253_lock, flags);
1175 /* set the clock to 100 Hz */
1176 outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
1177 udelay(10);
1178 outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
1179 udelay(10);
1180 outb(LATCH >> 8, PIT_CH0); /* MSB */
1181 udelay(10);
1182 spin_unlock_irqrestore(&i8253_lock, flags);
1183#endif
1184}
1185
1186static int suspend(int vetoable)
1187{
1188 int err;
1189 struct apm_user *as;
1190
1191 if (pm_send_all(PM_SUSPEND, (void *)3)) {
1192 /* Vetoed */
1193 if (vetoable) {
1194 if (apm_info.connection_version > 0x100)
1195 set_system_power_state(APM_STATE_REJECT);
1196 err = -EBUSY;
1197 ignore_sys_suspend = 0;
1198 printk(KERN_WARNING "apm: suspend was vetoed.\n");
1199 goto out;
1200 }
1201 printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
1202 }
1203
1204 device_suspend(PMSG_SUSPEND);
1205 local_irq_disable();
1206 device_power_down(PMSG_SUSPEND);
1207
1208 /* serialize with the timer interrupt */
1209 write_seqlock(&xtime_lock);
1210
1211 /* protect against access to timer chip registers */
1212 spin_lock(&i8253_lock);
1213
1214 get_time_diff();
1215 /*
1216 * Irq spinlock must be dropped around set_system_power_state.
1217 * We'll undo any timer changes due to interrupts below.
1218 */
1219 spin_unlock(&i8253_lock);
1220 write_sequnlock(&xtime_lock);
1221 local_irq_enable();
1222
1223 save_processor_state();
1224 err = set_system_power_state(APM_STATE_SUSPEND);
1225 restore_processor_state();
1226
1227 local_irq_disable();
1228 write_seqlock(&xtime_lock);
1229 spin_lock(&i8253_lock);
1230 reinit_timer();
1231 set_time();
1232 ignore_normal_resume = 1;
1233
1234 spin_unlock(&i8253_lock);
1235 write_sequnlock(&xtime_lock);
1236
1237 if (err == APM_NO_ERROR)
1238 err = APM_SUCCESS;
1239 if (err != APM_SUCCESS)
1240 apm_error("suspend", err);
1241 err = (err == APM_SUCCESS) ? 0 : -EIO;
1242 device_power_up();
1243 local_irq_enable();
1244 device_resume();
1245 pm_send_all(PM_RESUME, (void *)0);
1246 queue_event(APM_NORMAL_RESUME, NULL);
1247 out:
1248 spin_lock(&user_list_lock);
1249 for (as = user_list; as != NULL; as = as->next) {
1250 as->suspend_wait = 0;
1251 as->suspend_result = err;
1252 }
1253 spin_unlock(&user_list_lock);
1254 wake_up_interruptible(&apm_suspend_waitqueue);
1255 return err;
1256}
1257
1258static void standby(void)
1259{
1260 int err;
1261
1262 local_irq_disable();
1263 device_power_down(PMSG_SUSPEND);
1264 /* serialize with the timer interrupt */
1265 write_seqlock(&xtime_lock);
1266 /* If needed, notify drivers here */
1267 get_time_diff();
1268 write_sequnlock(&xtime_lock);
1269 local_irq_enable();
1270
1271 err = set_system_power_state(APM_STATE_STANDBY);
1272 if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
1273 apm_error("standby", err);
1274
1275 local_irq_disable();
1276 device_power_up();
1277 local_irq_enable();
1278}
1279
1280static apm_event_t get_event(void)
1281{
1282 int error;
1283 apm_event_t event;
1284 apm_eventinfo_t info;
1285
1286 static int notified;
1287
1288 /* we don't use the eventinfo */
1289 error = apm_get_event(&event, &info);
1290 if (error == APM_SUCCESS)
1291 return event;
1292
1293 if ((error != APM_NO_EVENTS) && (notified++ == 0))
1294 apm_error("get_event", error);
1295
1296 return 0;
1297}
1298
1299static void check_events(void)
1300{
1301 apm_event_t event;
1302 static unsigned long last_resume;
1303 static int ignore_bounce;
1304
1305 while ((event = get_event()) != 0) {
1306 if (debug) {
1307 if (event <= NR_APM_EVENT_NAME)
1308 printk(KERN_DEBUG "apm: received %s notify\n",
1309 apm_event_name[event - 1]);
1310 else
1311 printk(KERN_DEBUG "apm: received unknown "
1312 "event 0x%02x\n", event);
1313 }
1314 if (ignore_bounce
1315 && ((jiffies - last_resume) > bounce_interval))
1316 ignore_bounce = 0;
1317
1318 switch (event) {
1319 case APM_SYS_STANDBY:
1320 case APM_USER_STANDBY:
1321 queue_event(event, NULL);
1322 if (standbys_pending <= 0)
1323 standby();
1324 break;
1325
1326 case APM_USER_SUSPEND:
1327#ifdef CONFIG_APM_IGNORE_USER_SUSPEND
1328 if (apm_info.connection_version > 0x100)
1329 set_system_power_state(APM_STATE_REJECT);
1330 break;
1331#endif
1332 case APM_SYS_SUSPEND:
1333 if (ignore_bounce) {
1334 if (apm_info.connection_version > 0x100)
1335 set_system_power_state(APM_STATE_REJECT);
1336 break;
1337 }
1338 /*
1339 * If we are already processing a SUSPEND,
1340 * then further SUSPEND events from the BIOS
1341 * will be ignored. We also return here to
1342 * cope with the fact that the Thinkpads keep
1343 * sending a SUSPEND event until something else
1344 * happens!
1345 */
1346 if (ignore_sys_suspend)
1347 return;
1348 ignore_sys_suspend = 1;
1349 queue_event(event, NULL);
1350 if (suspends_pending <= 0)
1351 (void) suspend(1);
1352 break;
1353
1354 case APM_NORMAL_RESUME:
1355 case APM_CRITICAL_RESUME:
1356 case APM_STANDBY_RESUME:
1357 ignore_sys_suspend = 0;
1358 last_resume = jiffies;
1359 ignore_bounce = 1;
1360 if ((event != APM_NORMAL_RESUME)
1361 || (ignore_normal_resume == 0)) {
1362 write_seqlock_irq(&xtime_lock);
1363 set_time();
1364 write_sequnlock_irq(&xtime_lock);
1365 device_resume();
1366 pm_send_all(PM_RESUME, (void *)0);
1367 queue_event(event, NULL);
1368 }
1369 ignore_normal_resume = 0;
1370 break;
1371
1372 case APM_CAPABILITY_CHANGE:
1373 case APM_LOW_BATTERY:
1374 case APM_POWER_STATUS_CHANGE:
1375 queue_event(event, NULL);
1376 /* If needed, notify drivers here */
1377 break;
1378
1379 case APM_UPDATE_TIME:
1380 write_seqlock_irq(&xtime_lock);
1381 set_time();
1382 write_sequnlock_irq(&xtime_lock);
1383 break;
1384
1385 case APM_CRITICAL_SUSPEND:
1386 /*
1387 * We are not allowed to reject a critical suspend.
1388 */
1389 (void) suspend(0);
1390 break;
1391 }
1392 }
1393}
1394
1395static void apm_event_handler(void)
1396{
1397 static int pending_count = 4;
1398 int err;
1399
1400 if ((standbys_pending > 0) || (suspends_pending > 0)) {
1401 if ((apm_info.connection_version > 0x100) &&
1402 (pending_count-- <= 0)) {
1403 pending_count = 4;
1404 if (debug)
1405 printk(KERN_DEBUG "apm: setting state busy\n");
1406 err = set_system_power_state(APM_STATE_BUSY);
1407 if (err)
1408 apm_error("busy", err);
1409 }
1410 } else
1411 pending_count = 4;
1412 check_events();
1413}
1414
1415/*
1416 * This is the APM thread main loop.
1417 */
1418
1419static void apm_mainloop(void)
1420{
1421 DECLARE_WAITQUEUE(wait, current);
1422
1423 add_wait_queue(&apm_waitqueue, &wait);
1424 set_current_state(TASK_INTERRUPTIBLE);
1425 for (;;) {
1426 schedule_timeout(APM_CHECK_TIMEOUT);
1427 if (exit_kapmd)
1428 break;
1429 /*
1430 * Ok, check all events, check for idle (and mark us sleeping
1431 * so as not to count towards the load average)..
1432 */
1433 set_current_state(TASK_INTERRUPTIBLE);
1434 apm_event_handler();
1435 }
1436 remove_wait_queue(&apm_waitqueue, &wait);
1437}
1438
1439static int check_apm_user(struct apm_user *as, const char *func)
1440{
1441 if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
1442 printk(KERN_ERR "apm: %s passed bad filp\n", func);
1443 return 1;
1444 }
1445 return 0;
1446}
1447
1448static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
1449{
1450 struct apm_user * as;
1451 int i;
1452 apm_event_t event;
1453
1454 as = fp->private_data;
1455 if (check_apm_user(as, "read"))
1456 return -EIO;
1457 if ((int)count < sizeof(apm_event_t))
1458 return -EINVAL;
1459 if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK))
1460 return -EAGAIN;
1461 wait_event_interruptible(apm_waitqueue, !queue_empty(as));
1462 i = count;
1463 while ((i >= sizeof(event)) && !queue_empty(as)) {
1464 event = get_queued_event(as);
1465 if (copy_to_user(buf, &event, sizeof(event))) {
1466 if (i < count)
1467 break;
1468 return -EFAULT;
1469 }
1470 switch (event) {
1471 case APM_SYS_SUSPEND:
1472 case APM_USER_SUSPEND:
1473 as->suspends_read++;
1474 break;
1475
1476 case APM_SYS_STANDBY:
1477 case APM_USER_STANDBY:
1478 as->standbys_read++;
1479 break;
1480 }
1481 buf += sizeof(event);
1482 i -= sizeof(event);
1483 }
1484 if (i < count)
1485 return count - i;
1486 if (signal_pending(current))
1487 return -ERESTARTSYS;
1488 return 0;
1489}
1490
1491static unsigned int do_poll(struct file *fp, poll_table * wait)
1492{
1493 struct apm_user * as;
1494
1495 as = fp->private_data;
1496 if (check_apm_user(as, "poll"))
1497 return 0;
1498 poll_wait(fp, &apm_waitqueue, wait);
1499 if (!queue_empty(as))
1500 return POLLIN | POLLRDNORM;
1501 return 0;
1502}
1503
1504static int do_ioctl(struct inode * inode, struct file *filp,
1505 u_int cmd, u_long arg)
1506{
1507 struct apm_user * as;
1508
1509 as = filp->private_data;
1510 if (check_apm_user(as, "ioctl"))
1511 return -EIO;
1512 if ((!as->suser) || (!as->writer))
1513 return -EPERM;
1514 switch (cmd) {
1515 case APM_IOC_STANDBY:
1516 if (as->standbys_read > 0) {
1517 as->standbys_read--;
1518 as->standbys_pending--;
1519 standbys_pending--;
1520 } else
1521 queue_event(APM_USER_STANDBY, as);
1522 if (standbys_pending <= 0)
1523 standby();
1524 break;
1525 case APM_IOC_SUSPEND:
1526 if (as->suspends_read > 0) {
1527 as->suspends_read--;
1528 as->suspends_pending--;
1529 suspends_pending--;
1530 } else
1531 queue_event(APM_USER_SUSPEND, as);
1532 if (suspends_pending <= 0) {
1533 return suspend(1);
1534 } else {
1535 as->suspend_wait = 1;
1536 wait_event_interruptible(apm_suspend_waitqueue,
1537 as->suspend_wait == 0);
1538 return as->suspend_result;
1539 }
1540 break;
1541 default:
1542 return -EINVAL;
1543 }
1544 return 0;
1545}
1546
1547static int do_release(struct inode * inode, struct file * filp)
1548{
1549 struct apm_user * as;
1550
1551 as = filp->private_data;
1552 if (check_apm_user(as, "release"))
1553 return 0;
1554 filp->private_data = NULL;
1555 if (as->standbys_pending > 0) {
1556 standbys_pending -= as->standbys_pending;
1557 if (standbys_pending <= 0)
1558 standby();
1559 }
1560 if (as->suspends_pending > 0) {
1561 suspends_pending -= as->suspends_pending;
1562 if (suspends_pending <= 0)
1563 (void) suspend(1);
1564 }
1565 spin_lock(&user_list_lock);
1566 if (user_list == as)
1567 user_list = as->next;
1568 else {
1569 struct apm_user * as1;
1570
1571 for (as1 = user_list;
1572 (as1 != NULL) && (as1->next != as);
1573 as1 = as1->next)
1574 ;
1575 if (as1 == NULL)
1576 printk(KERN_ERR "apm: filp not in user list\n");
1577 else
1578 as1->next = as->next;
1579 }
1580 spin_unlock(&user_list_lock);
1581 kfree(as);
1582 return 0;
1583}
1584
1585static int do_open(struct inode * inode, struct file * filp)
1586{
1587 struct apm_user * as;
1588
1589 as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
1590 if (as == NULL) {
1591 printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
1592 sizeof(*as));
1593 return -ENOMEM;
1594 }
1595 as->magic = APM_BIOS_MAGIC;
1596 as->event_tail = as->event_head = 0;
1597 as->suspends_pending = as->standbys_pending = 0;
1598 as->suspends_read = as->standbys_read = 0;
1599 /*
1600 * XXX - this is a tiny bit broken, when we consider BSD
1601 * process accounting. If the device is opened by root, we
1602 * instantly flag that we used superuser privs. Who knows,
1603 * we might close the device immediately without doing a
1604 * privileged operation -- cevans
1605 */
1606 as->suser = capable(CAP_SYS_ADMIN);
1607 as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
1608 as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
1609 spin_lock(&user_list_lock);
1610 as->next = user_list;
1611 user_list = as;
1612 spin_unlock(&user_list_lock);
1613 filp->private_data = as;
1614 return 0;
1615}
1616
1617static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1618{
1619 char * p;
1620 unsigned short bx;
1621 unsigned short cx;
1622 unsigned short dx;
1623 int error;
1624 unsigned short ac_line_status = 0xff;
1625 unsigned short battery_status = 0xff;
1626 unsigned short battery_flag = 0xff;
1627 int percentage = -1;
1628 int time_units = -1;
1629 char *units = "?";
1630
1631 p = buf;
1632
1633 if ((num_online_cpus() == 1) &&
1634 !(error = apm_get_power_status(&bx, &cx, &dx))) {
1635 ac_line_status = (bx >> 8) & 0xff;
1636 battery_status = bx & 0xff;
1637 if ((cx & 0xff) != 0xff)
1638 percentage = cx & 0xff;
1639
1640 if (apm_info.connection_version > 0x100) {
1641 battery_flag = (cx >> 8) & 0xff;
1642 if (dx != 0xffff) {
1643 units = (dx & 0x8000) ? "min" : "sec";
1644 time_units = dx & 0x7fff;
1645 }
1646 }
1647 }
1648 /* Arguments, with symbols from linux/apm_bios.h. Information is
1649 from the Get Power Status (0x0a) call unless otherwise noted.
1650
1651 0) Linux driver version (this will change if format changes)
1652 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2.
1653 2) APM flags from APM Installation Check (0x00):
1654 bit 0: APM_16_BIT_SUPPORT
1655 bit 1: APM_32_BIT_SUPPORT
1656 bit 2: APM_IDLE_SLOWS_CLOCK
1657 bit 3: APM_BIOS_DISABLED
1658 bit 4: APM_BIOS_DISENGAGED
1659 3) AC line status
1660 0x00: Off-line
1661 0x01: On-line
1662 0x02: On backup power (BIOS >= 1.1 only)
1663 0xff: Unknown
1664 4) Battery status
1665 0x00: High
1666 0x01: Low
1667 0x02: Critical
1668 0x03: Charging
1669 0x04: Selected battery not present (BIOS >= 1.2 only)
1670 0xff: Unknown
1671 5) Battery flag
1672 bit 0: High
1673 bit 1: Low
1674 bit 2: Critical
1675 bit 3: Charging
1676 bit 7: No system battery
1677 0xff: Unknown
1678 6) Remaining battery life (percentage of charge):
1679 0-100: valid
1680 -1: Unknown
1681 7) Remaining battery life (time units):
1682 Number of remaining minutes or seconds
1683 -1: Unknown
1684 8) min = minutes; sec = seconds */
1685
1686 p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
1687 driver_version,
1688 (apm_info.bios.version >> 8) & 0xff,
1689 apm_info.bios.version & 0xff,
1690 apm_info.bios.flags,
1691 ac_line_status,
1692 battery_status,
1693 battery_flag,
1694 percentage,
1695 time_units,
1696 units);
1697
1698 return p - buf;
1699}
1700
1701static int apm(void *unused)
1702{
1703 unsigned short bx;
1704 unsigned short cx;
1705 unsigned short dx;
1706 int error;
1707 char * power_stat;
1708 char * bat_stat;
1709
1710 kapmd_running = 1;
1711
1712 daemonize("kapmd");
1713
1714 current->flags |= PF_NOFREEZE;
1715
1716#ifdef CONFIG_SMP
1717 /* 2002/08/01 - WT
1718 * This is to avoid random crashes at boot time during initialization
1719 * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
1720 * Some bioses don't like being called from CPU != 0.
1721 * Method suggested by Ingo Molnar.
1722 */
1723 set_cpus_allowed(current, cpumask_of_cpu(0));
1724 BUG_ON(smp_processor_id() != 0);
1725#endif
1726
1727 if (apm_info.connection_version == 0) {
1728 apm_info.connection_version = apm_info.bios.version;
1729 if (apm_info.connection_version > 0x100) {
1730 /*
1731 * We only support BIOSs up to version 1.2
1732 */
1733 if (apm_info.connection_version > 0x0102)
1734 apm_info.connection_version = 0x0102;
1735 error = apm_driver_version(&apm_info.connection_version);
1736 if (error != APM_SUCCESS) {
1737 apm_error("driver version", error);
1738 /* Fall back to an APM 1.0 connection. */
1739 apm_info.connection_version = 0x100;
1740 }
1741 }
1742 }
1743
1744 if (debug)
1745 printk(KERN_INFO "apm: Connection version %d.%d\n",
1746 (apm_info.connection_version >> 8) & 0xff,
1747 apm_info.connection_version & 0xff);
1748
1749#ifdef CONFIG_APM_DO_ENABLE
1750 if (apm_info.bios.flags & APM_BIOS_DISABLED) {
1751 /*
1752 * This call causes my NEC UltraLite Versa 33/C to hang if it
1753 * is booted with PM disabled but not in the docking station.
1754 * Unfortunate ...
1755 */
1756 error = apm_enable_power_management(1);
1757 if (error) {
1758 apm_error("enable power management", error);
1759 return -1;
1760 }
1761 }
1762#endif
1763
1764 if ((apm_info.bios.flags & APM_BIOS_DISENGAGED)
1765 && (apm_info.connection_version > 0x0100)) {
1766 error = apm_engage_power_management(APM_DEVICE_ALL, 1);
1767 if (error) {
1768 apm_error("engage power management", error);
1769 return -1;
1770 }
1771 }
1772
1773 if (debug && (num_online_cpus() == 1 || smp )) {
1774 error = apm_get_power_status(&bx, &cx, &dx);
1775 if (error)
1776 printk(KERN_INFO "apm: power status not available\n");
1777 else {
1778 switch ((bx >> 8) & 0xff) {
1779 case 0: power_stat = "off line"; break;
1780 case 1: power_stat = "on line"; break;
1781 case 2: power_stat = "on backup power"; break;
1782 default: power_stat = "unknown"; break;
1783 }
1784 switch (bx & 0xff) {
1785 case 0: bat_stat = "high"; break;
1786 case 1: bat_stat = "low"; break;
1787 case 2: bat_stat = "critical"; break;
1788 case 3: bat_stat = "charging"; break;
1789 default: bat_stat = "unknown"; break;
1790 }
1791 printk(KERN_INFO
1792 "apm: AC %s, battery status %s, battery life ",
1793 power_stat, bat_stat);
1794 if ((cx & 0xff) == 0xff)
1795 printk("unknown\n");
1796 else
1797 printk("%d%%\n", cx & 0xff);
1798 if (apm_info.connection_version > 0x100) {
1799 printk(KERN_INFO
1800 "apm: battery flag 0x%02x, battery life ",
1801 (cx >> 8) & 0xff);
1802 if (dx == 0xffff)
1803 printk("unknown\n");
1804 else
1805 printk("%d %s\n", dx & 0x7fff,
1806 (dx & 0x8000) ?
1807 "minutes" : "seconds");
1808 }
1809 }
1810 }
1811
1812 /* Install our power off handler.. */
1813 if (power_off)
1814 pm_power_off = apm_power_off;
1815
1816 if (num_online_cpus() == 1 || smp) {
1817#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
1818 console_blank_hook = apm_console_blank;
1819#endif
1820 apm_mainloop();
1821#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
1822 console_blank_hook = NULL;
1823#endif
1824 }
1825 kapmd_running = 0;
1826
1827 return 0;
1828}
1829
1830#ifndef MODULE
1831static int __init apm_setup(char *str)
1832{
1833 int invert;
1834
1835 while ((str != NULL) && (*str != '\0')) {
1836 if (strncmp(str, "off", 3) == 0)
1837 apm_disabled = 1;
1838 if (strncmp(str, "on", 2) == 0)
1839 apm_disabled = 0;
1840 if ((strncmp(str, "bounce-interval=", 16) == 0) ||
1841 (strncmp(str, "bounce_interval=", 16) == 0))
1842 bounce_interval = simple_strtol(str + 16, NULL, 0);
1843 if ((strncmp(str, "idle-threshold=", 15) == 0) ||
1844 (strncmp(str, "idle_threshold=", 15) == 0))
1845 idle_threshold = simple_strtol(str + 15, NULL, 0);
1846 if ((strncmp(str, "idle-period=", 12) == 0) ||
1847 (strncmp(str, "idle_period=", 12) == 0))
1848 idle_period = simple_strtol(str + 12, NULL, 0);
1849 invert = (strncmp(str, "no-", 3) == 0) ||
1850 (strncmp(str, "no_", 3) == 0);
1851 if (invert)
1852 str += 3;
1853 if (strncmp(str, "debug", 5) == 0)
1854 debug = !invert;
1855 if ((strncmp(str, "power-off", 9) == 0) ||
1856 (strncmp(str, "power_off", 9) == 0))
1857 power_off = !invert;
1858 if (strncmp(str, "smp", 3) == 0)
1859 {
1860 smp = !invert;
1861 idle_threshold = 100;
1862 }
1863 if ((strncmp(str, "allow-ints", 10) == 0) ||
1864 (strncmp(str, "allow_ints", 10) == 0))
1865 apm_info.allow_ints = !invert;
1866 if ((strncmp(str, "broken-psr", 10) == 0) ||
1867 (strncmp(str, "broken_psr", 10) == 0))
1868 apm_info.get_power_status_broken = !invert;
1869 if ((strncmp(str, "realmode-power-off", 18) == 0) ||
1870 (strncmp(str, "realmode_power_off", 18) == 0))
1871 apm_info.realmode_power_off = !invert;
1872 str = strchr(str, ',');
1873 if (str != NULL)
1874 str += strspn(str, ", \t");
1875 }
1876 return 1;
1877}
1878
1879__setup("apm=", apm_setup);
1880#endif
1881
1882static struct file_operations apm_bios_fops = {
1883 .owner = THIS_MODULE,
1884 .read = do_read,
1885 .poll = do_poll,
1886 .ioctl = do_ioctl,
1887 .open = do_open,
1888 .release = do_release,
1889};
1890
1891static struct miscdevice apm_device = {
1892 APM_MINOR_DEV,
1893 "apm_bios",
1894 &apm_bios_fops
1895};
1896
1897
1898/* Simple "print if true" callback */
1899static int __init print_if_true(struct dmi_system_id *d)
1900{
1901 printk("%s\n", d->ident);
1902 return 0;
1903}
1904
1905/*
1906 * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was
1907 * disabled before the suspend. Linux used to get terribly confused by that.
1908 */
1909static int __init broken_ps2_resume(struct dmi_system_id *d)
1910{
1911 printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
1912 return 0;
1913}
1914
1915/* Some bioses have a broken protected mode poweroff and need to use realmode */
1916static int __init set_realmode_power_off(struct dmi_system_id *d)
1917{
1918 if (apm_info.realmode_power_off == 0) {
1919 apm_info.realmode_power_off = 1;
1920 printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
1921 }
1922 return 0;
1923}
1924
1925/* Some laptops require interrupts to be enabled during APM calls */
1926static int __init set_apm_ints(struct dmi_system_id *d)
1927{
1928 if (apm_info.allow_ints == 0) {
1929 apm_info.allow_ints = 1;
1930 printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
1931 }
1932 return 0;
1933}
1934
1935/* Some APM bioses corrupt memory or just plain do not work */
1936static int __init apm_is_horked(struct dmi_system_id *d)
1937{
1938 if (apm_info.disabled == 0) {
1939 apm_info.disabled = 1;
1940 printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
1941 }
1942 return 0;
1943}
1944
1945static int __init apm_is_horked_d850md(struct dmi_system_id *d)
1946{
1947 if (apm_info.disabled == 0) {
1948 apm_info.disabled = 1;
1949 printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
1950 printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
1951 printk(KERN_INFO "download from support.intel.com \n");
1952 }
1953 return 0;
1954}
1955
1956/* Some APM bioses hang on APM idle calls */
1957static int __init apm_likes_to_melt(struct dmi_system_id *d)
1958{
1959 if (apm_info.forbid_idle == 0) {
1960 apm_info.forbid_idle = 1;
1961 printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
1962 }
1963 return 0;
1964}
1965
1966/*
1967 * Check for clue free BIOS implementations who use
1968 * the following QA technique
1969 *
1970 * [ Write BIOS Code ]<------
1971 * | ^
1972 * < Does it Compile >----N--
1973 * |Y ^
1974 * < Does it Boot Win98 >-N--
1975 * |Y
1976 * [Ship It]
1977 *
1978 * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e)
1979 * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000)
1980 */
1981static int __init broken_apm_power(struct dmi_system_id *d)
1982{
1983 apm_info.get_power_status_broken = 1;
1984 printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
1985 return 0;
1986}
1987
1988/*
1989 * This bios swaps the APM minute reporting bytes over (Many sony laptops
1990 * have this problem).
1991 */
1992static int __init swab_apm_power_in_minutes(struct dmi_system_id *d)
1993{
1994 apm_info.get_power_status_swabinminutes = 1;
1995 printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
1996 return 0;
1997}
1998
1999static struct dmi_system_id __initdata apm_dmi_table[] = {
2000 {
2001 print_if_true,
2002 KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
2003 { DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2004 DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), },
2005 },
2006 { /* Handle problems with APM on the C600 */
2007 broken_ps2_resume, "Dell Latitude C600",
2008 { DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
2009 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), },
2010 },
2011 { /* Allow interrupts during suspend on Dell Latitude laptops*/
2012 set_apm_ints, "Dell Latitude",
2013 { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
2014 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }
2015 },
2016 { /* APM crashes */
2017 apm_is_horked, "Dell Inspiron 2500",
2018 { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
2019 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
2020 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
2021 DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
2022 },
2023 { /* Allow interrupts during suspend on Dell Inspiron laptops*/
2024 set_apm_ints, "Dell Inspiron", {
2025 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
2026 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), },
2027 },
2028 { /* Handle problems with APM on Inspiron 5000e */
2029 broken_apm_power, "Dell Inspiron 5000e",
2030 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2031 DMI_MATCH(DMI_BIOS_VERSION, "A04"),
2032 DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), },
2033 },
2034 { /* Handle problems with APM on Inspiron 2500 */
2035 broken_apm_power, "Dell Inspiron 2500",
2036 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2037 DMI_MATCH(DMI_BIOS_VERSION, "A12"),
2038 DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), },
2039 },
2040 { /* APM crashes */
2041 apm_is_horked, "Dell Dimension 4100",
2042 { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
2043 DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
2044 DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
2045 DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
2046 },
2047 { /* Allow interrupts during suspend on Compaq Laptops*/
2048 set_apm_ints, "Compaq 12XL125",
2049 { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
2050 DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
2051 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2052 DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
2053 },
2054 { /* Allow interrupts during APM or the clock goes slow */
2055 set_apm_ints, "ASUSTeK",
2056 { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
2057 DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), },
2058 },
2059 { /* APM blows on shutdown */
2060 apm_is_horked, "ABIT KX7-333[R]",
2061 { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"),
2062 DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), },
2063 },
2064 { /* APM crashes */
2065 apm_is_horked, "Trigem Delhi3",
2066 { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"),
2067 DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), },
2068 },
2069 { /* APM crashes */
2070 apm_is_horked, "Fujitsu-Siemens",
2071 { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"),
2072 DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), },
2073 },
2074 { /* APM crashes */
2075 apm_is_horked_d850md, "Intel D850MD",
2076 { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
2077 DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), },
2078 },
2079 { /* APM crashes */
2080 apm_is_horked, "Intel D810EMO",
2081 { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
2082 DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), },
2083 },
2084 { /* APM crashes */
2085 apm_is_horked, "Dell XPS-Z",
2086 { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
2087 DMI_MATCH(DMI_BIOS_VERSION, "A11"),
2088 DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), },
2089 },
2090 { /* APM crashes */
2091 apm_is_horked, "Sharp PC-PJ/AX",
2092 { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
2093 DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
2094 DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
2095 DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
2096 },
2097 { /* APM crashes */
2098 apm_is_horked, "Dell Inspiron 2500",
2099 { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
2100 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
2101 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
2102 DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
2103 },
2104 { /* APM idle hangs */
2105 apm_likes_to_melt, "Jabil AMD",
2106 { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
2107 DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), },
2108 },
2109 { /* APM idle hangs */
2110 apm_likes_to_melt, "AMI Bios",
2111 { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
2112 DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), },
2113 },
2114 { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */
2115 swab_apm_power_in_minutes, "Sony VAIO",
2116 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2117 DMI_MATCH(DMI_BIOS_VERSION, "R0206H"),
2118 DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), },
2119 },
2120 { /* Handle problems with APM on Sony Vaio PCG-N505VX */
2121 swab_apm_power_in_minutes, "Sony VAIO",
2122 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2123 DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"),
2124 DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), },
2125 },
2126 { /* Handle problems with APM on Sony Vaio PCG-XG29 */
2127 swab_apm_power_in_minutes, "Sony VAIO",
2128 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2129 DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"),
2130 DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), },
2131 },
2132 { /* Handle problems with APM on Sony Vaio PCG-Z600NE */
2133 swab_apm_power_in_minutes, "Sony VAIO",
2134 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2135 DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"),
2136 DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), },
2137 },
2138 { /* Handle problems with APM on Sony Vaio PCG-Z600NE */
2139 swab_apm_power_in_minutes, "Sony VAIO",
2140 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2141 DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"),
2142 DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), },
2143 },
2144 { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */
2145 swab_apm_power_in_minutes, "Sony VAIO",
2146 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2147 DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"),
2148 DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), },
2149 },
2150 { /* Handle problems with APM on Sony Vaio PCG-Z505LS */
2151 swab_apm_power_in_minutes, "Sony VAIO",
2152 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2153 DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"),
2154 DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), },
2155 },
2156 { /* Handle problems with APM on Sony Vaio PCG-Z505LS */
2157 swab_apm_power_in_minutes, "Sony VAIO",
2158 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2159 DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"),
2160 DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), },
2161 },
2162 { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */
2163 swab_apm_power_in_minutes, "Sony VAIO",
2164 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2165 DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"),
2166 DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), },
2167 },
2168 { /* Handle problems with APM on Sony Vaio PCG-F104K */
2169 swab_apm_power_in_minutes, "Sony VAIO",
2170 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2171 DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"),
2172 DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), },
2173 },
2174
2175 { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */
2176 swab_apm_power_in_minutes, "Sony VAIO",
2177 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2178 DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"),
2179 DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), },
2180 },
2181 { /* Handle problems with APM on Sony Vaio PCG-C1VE */
2182 swab_apm_power_in_minutes, "Sony VAIO",
2183 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2184 DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"),
2185 DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), },
2186 },
2187 { /* Handle problems with APM on Sony Vaio PCG-C1VE */
2188 swab_apm_power_in_minutes, "Sony VAIO",
2189 { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
2190 DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"),
2191 DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), },
2192 },
2193 { /* broken PM poweroff bios */
2194 set_realmode_power_off, "Award Software v4.60 PGMA",
2195 { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."),
2196 DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"),
2197 DMI_MATCH(DMI_BIOS_DATE, "134526184"), },
2198 },
2199
2200 /* Generic per vendor APM settings */
2201
2202 { /* Allow interrupts during suspend on IBM laptops */
2203 set_apm_ints, "IBM",
2204 { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), },
2205 },
2206
2207 { }
2208};
2209
2210/*
2211 * Just start the APM thread. We do NOT want to do APM BIOS
2212 * calls from anything but the APM thread, if for no other reason
2213 * than the fact that we don't trust the APM BIOS. This way,
2214 * most common APM BIOS problems that lead to protection errors
2215 * etc will have at least some level of being contained...
2216 *
2217 * In short, if something bad happens, at least we have a choice
2218 * of just killing the apm thread..
2219 */
2220static int __init apm_init(void)
2221{
2222 struct proc_dir_entry *apm_proc;
2223 int ret;
2224 int i;
2225
2226 dmi_check_system(apm_dmi_table);
2227
2228 if (apm_info.bios.version == 0) {
2229 printk(KERN_INFO "apm: BIOS not found.\n");
2230 return -ENODEV;
2231 }
2232 printk(KERN_INFO
2233 "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
2234 ((apm_info.bios.version >> 8) & 0xff),
2235 (apm_info.bios.version & 0xff),
2236 apm_info.bios.flags,
2237 driver_version);
2238 if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
2239 printk(KERN_INFO "apm: no 32 bit BIOS support\n");
2240 return -ENODEV;
2241 }
2242
2243 if (allow_ints)
2244 apm_info.allow_ints = 1;
2245 if (broken_psr)
2246 apm_info.get_power_status_broken = 1;
2247 if (realmode_power_off)
2248 apm_info.realmode_power_off = 1;
2249 /* User can override, but default is to trust DMI */
2250 if (apm_disabled != -1)
2251 apm_info.disabled = apm_disabled;
2252
2253 /*
2254 * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1
2255 * but is reportedly a 1.0 BIOS.
2256 */
2257 if (apm_info.bios.version == 0x001)
2258 apm_info.bios.version = 0x100;
2259
2260 /* BIOS < 1.2 doesn't set cseg_16_len */
2261 if (apm_info.bios.version < 0x102)
2262 apm_info.bios.cseg_16_len = 0; /* 64k */
2263
2264 if (debug) {
2265 printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
2266 apm_info.bios.cseg, apm_info.bios.offset,
2267 apm_info.bios.cseg_16, apm_info.bios.dseg);
2268 if (apm_info.bios.version > 0x100)
2269 printk(" cseg len %x, dseg len %x",
2270 apm_info.bios.cseg_len,
2271 apm_info.bios.dseg_len);
2272 if (apm_info.bios.version > 0x101)
2273 printk(" cseg16 len %x", apm_info.bios.cseg_16_len);
2274 printk("\n");
2275 }
2276
2277 if (apm_info.disabled) {
2278 printk(KERN_NOTICE "apm: disabled on user request.\n");
2279 return -ENODEV;
2280 }
2281 if ((num_online_cpus() > 1) && !power_off && !smp) {
2282 printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
2283 apm_info.disabled = 1;
2284 return -ENODEV;
2285 }
2286 if (PM_IS_ACTIVE()) {
2287 printk(KERN_NOTICE "apm: overridden by ACPI.\n");
2288 apm_info.disabled = 1;
2289 return -ENODEV;
2290 }
2291 pm_active = 1;
2292
2293 /*
2294 * Set up a segment that references the real mode segment 0x40
2295 * that extends up to the end of page zero (that we have reserved).
2296 * This is for buggy BIOS's that refer to (real mode) segment 0x40
2297 * even though they are called in protected mode.
2298 */
2299 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
2300 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
2301
2302 apm_bios_entry.offset = apm_info.bios.offset;
2303 apm_bios_entry.segment = APM_CS;
2304
2305 for (i = 0; i < NR_CPUS; i++) {
2306 set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3],
2307 __va((unsigned long)apm_info.bios.cseg << 4));
2308 set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3],
2309 __va((unsigned long)apm_info.bios.cseg_16 << 4));
2310 set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3],
2311 __va((unsigned long)apm_info.bios.dseg << 4));
2312#ifndef APM_RELAX_SEGMENTS
2313 if (apm_info.bios.version == 0x100) {
2314#endif
2315 /* For ASUS motherboard, Award BIOS rev 110 (and others?) */
2316 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1);
2317 /* For some unknown machine. */
2318 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1);
2319 /* For the DEC Hinote Ultra CT475 (and others?) */
2320 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1);
2321#ifndef APM_RELAX_SEGMENTS
2322 } else {
2323 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3],
2324 (apm_info.bios.cseg_len - 1) & 0xffff);
2325 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3],
2326 (apm_info.bios.cseg_16_len - 1) & 0xffff);
2327 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3],
2328 (apm_info.bios.dseg_len - 1) & 0xffff);
2329 /* workaround for broken BIOSes */
2330 if (apm_info.bios.cseg_len <= apm_info.bios.offset)
2331 _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1);
2332 if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */
2333 /* for the BIOS that assumes granularity = 1 */
2334 per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000;
2335 printk(KERN_NOTICE "apm: we set the granularity of dseg.\n");
2336 }
2337 }
2338#endif
2339 }
2340
2341 apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
2342 if (apm_proc)
2343 apm_proc->owner = THIS_MODULE;
2344
2345 ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
2346 if (ret < 0) {
2347 printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
2348 return -ENOMEM;
2349 }
2350
2351 if (num_online_cpus() > 1 && !smp ) {
2352 printk(KERN_NOTICE
2353 "apm: disabled - APM is not SMP safe (power off active).\n");
2354 return 0;
2355 }
2356
2357 misc_register(&apm_device);
2358
2359 if (HZ != 100)
2360 idle_period = (idle_period * HZ) / 100;
2361 if (idle_threshold < 100) {
2362 original_pm_idle = pm_idle;
2363 pm_idle = apm_cpu_idle;
2364 set_pm_idle = 1;
2365 }
2366
2367 return 0;
2368}
2369
2370static void __exit apm_exit(void)
2371{
2372 int error;
2373
2374 if (set_pm_idle) {
2375 pm_idle = original_pm_idle;
2376 /*
2377 * We are about to unload the current idle thread pm callback
2378 * (pm_idle), Wait for all processors to update cached/local
2379 * copies of pm_idle before proceeding.
2380 */
2381 cpu_idle_wait();
2382 }
2383 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
2384 && (apm_info.connection_version > 0x0100)) {
2385 error = apm_engage_power_management(APM_DEVICE_ALL, 0);
2386 if (error)
2387 apm_error("disengage power management", error);
2388 }
2389 misc_deregister(&apm_device);
2390 remove_proc_entry("apm", NULL);
2391 if (power_off)
2392 pm_power_off = NULL;
2393 exit_kapmd = 1;
2394 while (kapmd_running)
2395 schedule();
2396 pm_active = 0;
2397}
2398
2399module_init(apm_init);
2400module_exit(apm_exit);
2401
2402MODULE_AUTHOR("Stephen Rothwell");
2403MODULE_DESCRIPTION("Advanced Power Management");
2404MODULE_LICENSE("GPL");
2405module_param(debug, bool, 0644);
2406MODULE_PARM_DESC(debug, "Enable debug mode");
2407module_param(power_off, bool, 0444);
2408MODULE_PARM_DESC(power_off, "Enable power off");
2409module_param(bounce_interval, int, 0444);
2410MODULE_PARM_DESC(bounce_interval,
2411 "Set the number of ticks to ignore suspend bounces");
2412module_param(allow_ints, bool, 0444);
2413MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls");
2414module_param(broken_psr, bool, 0444);
2415MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call");
2416module_param(realmode_power_off, bool, 0444);
2417MODULE_PARM_DESC(realmode_power_off,
2418 "Switch to real mode before powering off");
2419module_param(idle_threshold, int, 0444);
2420MODULE_PARM_DESC(idle_threshold,
2421 "System idle percentage above which to make APM BIOS idle calls");
2422module_param(idle_period, int, 0444);
2423MODULE_PARM_DESC(idle_period,
2424 "Period (in sec/100) over which to caculate the idle percentage");
2425module_param(smp, bool, 0444);
2426MODULE_PARM_DESC(smp,
2427 "Set this to enable APM use on an SMP platform. Use with caution on older systems");
2428MODULE_ALIAS_MISCDEV(APM_MINOR_DEV);
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
new file mode 100644
index 000000000000..36d66e2077d0
--- /dev/null
+++ b/arch/i386/kernel/asm-offsets.c
@@ -0,0 +1,72 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 */
6
7#include <linux/sched.h>
8#include <linux/signal.h>
9#include <linux/personality.h>
10#include <linux/suspend.h>
11#include <asm/ucontext.h>
12#include "sigframe.h"
13#include <asm/fixmap.h>
14#include <asm/processor.h>
15#include <asm/thread_info.h>
16
17#define DEFINE(sym, val) \
18 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
19
20#define BLANK() asm volatile("\n->" : : )
21
22#define OFFSET(sym, str, mem) \
23 DEFINE(sym, offsetof(struct str, mem));
24
25void foo(void)
26{
27 OFFSET(SIGCONTEXT_eax, sigcontext, eax);
28 OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
29 OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
30 OFFSET(SIGCONTEXT_edx, sigcontext, edx);
31 OFFSET(SIGCONTEXT_esi, sigcontext, esi);
32 OFFSET(SIGCONTEXT_edi, sigcontext, edi);
33 OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
34 OFFSET(SIGCONTEXT_esp, sigcontext, esp);
35 OFFSET(SIGCONTEXT_eip, sigcontext, eip);
36 BLANK();
37
38 OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
39 OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
40 OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
41 OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
42 OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
43 OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
44 OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
45 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
46 BLANK();
47
48 OFFSET(TI_task, thread_info, task);
49 OFFSET(TI_exec_domain, thread_info, exec_domain);
50 OFFSET(TI_flags, thread_info, flags);
51 OFFSET(TI_status, thread_info, status);
52 OFFSET(TI_cpu, thread_info, cpu);
53 OFFSET(TI_preempt_count, thread_info, preempt_count);
54 OFFSET(TI_addr_limit, thread_info, addr_limit);
55 OFFSET(TI_restart_block, thread_info, restart_block);
56 BLANK();
57
58 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
59 OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
60 BLANK();
61
62 OFFSET(pbe_address, pbe, address);
63 OFFSET(pbe_orig_address, pbe, orig_address);
64 OFFSET(pbe_next, pbe, next);
65
66 /* Offset from the sysenter stack to tss.esp0 */
67 DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
68 sizeof(struct tss_struct));
69
70 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
71 DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
72}
diff --git a/arch/i386/kernel/bootflag.c b/arch/i386/kernel/bootflag.c
new file mode 100644
index 000000000000..4c30ed01f4e1
--- /dev/null
+++ b/arch/i386/kernel/bootflag.c
@@ -0,0 +1,99 @@
1/*
2 * Implement 'Simple Boot Flag Specification 2.0'
3 */
4
5
6#include <linux/config.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/init.h>
10#include <linux/string.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/acpi.h>
14#include <asm/io.h>
15
16#include <linux/mc146818rtc.h>
17
18
19#define SBF_RESERVED (0x78)
20#define SBF_PNPOS (1<<0)
21#define SBF_BOOTING (1<<1)
22#define SBF_DIAG (1<<2)
23#define SBF_PARITY (1<<7)
24
25
26int sbf_port __initdata = -1; /* set via acpi_boot_init() */
27
28
29static int __init parity(u8 v)
30{
31 int x = 0;
32 int i;
33
34 for(i=0;i<8;i++)
35 {
36 x^=(v&1);
37 v>>=1;
38 }
39 return x;
40}
41
42static void __init sbf_write(u8 v)
43{
44 unsigned long flags;
45 if(sbf_port != -1)
46 {
47 v &= ~SBF_PARITY;
48 if(!parity(v))
49 v|=SBF_PARITY;
50
51 printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v);
52
53 spin_lock_irqsave(&rtc_lock, flags);
54 CMOS_WRITE(v, sbf_port);
55 spin_unlock_irqrestore(&rtc_lock, flags);
56 }
57}
58
59static u8 __init sbf_read(void)
60{
61 u8 v;
62 unsigned long flags;
63 if(sbf_port == -1)
64 return 0;
65 spin_lock_irqsave(&rtc_lock, flags);
66 v = CMOS_READ(sbf_port);
67 spin_unlock_irqrestore(&rtc_lock, flags);
68 return v;
69}
70
71static int __init sbf_value_valid(u8 v)
72{
73 if(v&SBF_RESERVED) /* Reserved bits */
74 return 0;
75 if(!parity(v))
76 return 0;
77 return 1;
78}
79
80static int __init sbf_init(void)
81{
82 u8 v;
83 if(sbf_port == -1)
84 return 0;
85 v = sbf_read();
86 if(!sbf_value_valid(v))
87 printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v);
88
89 v &= ~SBF_RESERVED;
90 v &= ~SBF_BOOTING;
91 v &= ~SBF_DIAG;
92#if defined(CONFIG_ISAPNP)
93 v |= SBF_PNPOS;
94#endif
95 sbf_write(v);
96 return 0;
97}
98
99module_init(sbf_init);
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
new file mode 100644
index 000000000000..010aecfffbc1
--- /dev/null
+++ b/arch/i386/kernel/cpu/Makefile
@@ -0,0 +1,19 @@
1#
2# Makefile for x86-compatible CPU details and quirks
3#
4
5obj-y := common.o proc.o
6
7obj-y += amd.o
8obj-y += cyrix.o
9obj-y += centaur.o
10obj-y += transmeta.o
11obj-y += intel.o intel_cacheinfo.o
12obj-y += rise.o
13obj-y += nexgen.o
14obj-y += umc.o
15
16obj-$(CONFIG_X86_MCE) += mcheck/
17
18obj-$(CONFIG_MTRR) += mtrr/
19obj-$(CONFIG_CPU_FREQ) += cpufreq/
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
new file mode 100644
index 000000000000..ae94585d0445
--- /dev/null
+++ b/arch/i386/kernel/cpu/amd.c
@@ -0,0 +1,249 @@
1#include <linux/init.h>
2#include <linux/bitops.h>
3#include <linux/mm.h>
4#include <asm/io.h>
5#include <asm/processor.h>
6
7#include "cpu.h"
8
9/*
10 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
11 * misexecution of code under Linux. Owners of such processors should
12 * contact AMD for precise details and a CPU swap.
13 *
14 * See http://www.multimania.com/poulot/k6bug.html
15 * http://www.amd.com/K6/k6docs/revgd.html
16 *
17 * The following test is erm.. interesting. AMD neglected to up
18 * the chip setting when fixing the bug but they also tweaked some
19 * performance at the same time..
20 */
21
22extern void vide(void);
23__asm__(".align 4\nvide: ret");
24
25static void __init init_amd(struct cpuinfo_x86 *c)
26{
27 u32 l, h;
28 int mbytes = num_physpages >> (20-PAGE_SHIFT);
29 int r;
30
31 /*
32 * FIXME: We should handle the K5 here. Set up the write
33 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
34 * no bus pipeline)
35 */
36
37 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
38 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
39 clear_bit(0*32+31, c->x86_capability);
40
41 r = get_model_name(c);
42
43 switch(c->x86)
44 {
45 case 4:
46 /*
47 * General Systems BIOSen alias the cpu frequency registers
48 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
49 * drivers subsequently pokes it, and changes the CPU speed.
50 * Workaround : Remove the unneeded alias.
51 */
52#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
53#define CBAR_ENB (0x80000000)
54#define CBAR_KEY (0X000000CB)
55 if (c->x86_model==9 || c->x86_model == 10) {
56 if (inl (CBAR) & CBAR_ENB)
57 outl (0 | CBAR_KEY, CBAR);
58 }
59 break;
60 case 5:
61 if( c->x86_model < 6 )
62 {
63 /* Based on AMD doc 20734R - June 2000 */
64 if ( c->x86_model == 0 ) {
65 clear_bit(X86_FEATURE_APIC, c->x86_capability);
66 set_bit(X86_FEATURE_PGE, c->x86_capability);
67 }
68 break;
69 }
70
71 if ( c->x86_model == 6 && c->x86_mask == 1 ) {
72 const int K6_BUG_LOOP = 1000000;
73 int n;
74 void (*f_vide)(void);
75 unsigned long d, d2;
76
77 printk(KERN_INFO "AMD K6 stepping B detected - ");
78
79 /*
80 * It looks like AMD fixed the 2.6.2 bug and improved indirect
81 * calls at the same time.
82 */
83
84 n = K6_BUG_LOOP;
85 f_vide = vide;
86 rdtscl(d);
87 while (n--)
88 f_vide();
89 rdtscl(d2);
90 d = d2-d;
91
92 /* Knock these two lines out if it debugs out ok */
93 printk(KERN_INFO "AMD K6 stepping B detected - ");
94 /* -- cut here -- */
95 if (d > 20*K6_BUG_LOOP)
96 printk("system stability may be impaired when more than 32 MB are used.\n");
97 else
98 printk("probably OK (after B9730xxxx).\n");
99 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
100 }
101
102 /* K6 with old style WHCR */
103 if (c->x86_model < 8 ||
104 (c->x86_model== 8 && c->x86_mask < 8)) {
105 /* We can only write allocate on the low 508Mb */
106 if(mbytes>508)
107 mbytes=508;
108
109 rdmsr(MSR_K6_WHCR, l, h);
110 if ((l&0x0000FFFF)==0) {
111 unsigned long flags;
112 l=(1<<0)|((mbytes/4)<<1);
113 local_irq_save(flags);
114 wbinvd();
115 wrmsr(MSR_K6_WHCR, l, h);
116 local_irq_restore(flags);
117 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
118 mbytes);
119 }
120 break;
121 }
122
123 if ((c->x86_model == 8 && c->x86_mask >7) ||
124 c->x86_model == 9 || c->x86_model == 13) {
125 /* The more serious chips .. */
126
127 if(mbytes>4092)
128 mbytes=4092;
129
130 rdmsr(MSR_K6_WHCR, l, h);
131 if ((l&0xFFFF0000)==0) {
132 unsigned long flags;
133 l=((mbytes>>2)<<22)|(1<<16);
134 local_irq_save(flags);
135 wbinvd();
136 wrmsr(MSR_K6_WHCR, l, h);
137 local_irq_restore(flags);
138 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
139 mbytes);
140 }
141
142 /* Set MTRR capability flag if appropriate */
143 if (c->x86_model == 13 || c->x86_model == 9 ||
144 (c->x86_model == 8 && c->x86_mask >= 8))
145 set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
146 break;
147 }
148 break;
149
150 case 6: /* An Athlon/Duron */
151
152 /* Bit 15 of Athlon specific MSR 15, needs to be 0
153 * to enable SSE on Palomino/Morgan/Barton CPU's.
154 * If the BIOS didn't enable it already, enable it here.
155 */
156 if (c->x86_model >= 6 && c->x86_model <= 10) {
157 if (!cpu_has(c, X86_FEATURE_XMM)) {
158 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
159 rdmsr(MSR_K7_HWCR, l, h);
160 l &= ~0x00008000;
161 wrmsr(MSR_K7_HWCR, l, h);
162 set_bit(X86_FEATURE_XMM, c->x86_capability);
163 }
164 }
165
166 /* It's been determined by AMD that Athlons since model 8 stepping 1
167 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
168 * As per AMD technical note 27212 0.2
169 */
170 if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
171 rdmsr(MSR_K7_CLK_CTL, l, h);
172 if ((l & 0xfff00000) != 0x20000000) {
173 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
174 ((l & 0x000fffff)|0x20000000));
175 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
176 }
177 }
178 break;
179 }
180
181 switch (c->x86) {
182 case 15:
183 set_bit(X86_FEATURE_K8, c->x86_capability);
184 break;
185 case 6:
186 set_bit(X86_FEATURE_K7, c->x86_capability);
187 break;
188 }
189
190 display_cacheinfo(c);
191 detect_ht(c);
192
193#ifdef CONFIG_X86_HT
194 /* AMD dual core looks like HT but isn't really. Hide it from the
195 scheduler. This works around problems with the domain scheduler.
196 Also probably gives slightly better scheduling and disables
197 SMT nice which is harmful on dual core.
198 TBD tune the domain scheduler for dual core. */
199 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
200 smp_num_siblings = 1;
201#endif
202
203 if (cpuid_eax(0x80000000) >= 0x80000008) {
204 c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
205 if (c->x86_num_cores & (c->x86_num_cores - 1))
206 c->x86_num_cores = 1;
207 }
208}
209
210static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
211{
212 /* AMD errata T13 (order #21922) */
213 if ((c->x86 == 6)) {
214 if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */
215 size = 64;
216 if (c->x86_model == 4 &&
217 (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */
218 size = 256;
219 }
220 return size;
221}
222
223static struct cpu_dev amd_cpu_dev __initdata = {
224 .c_vendor = "AMD",
225 .c_ident = { "AuthenticAMD" },
226 .c_models = {
227 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
228 {
229 [3] = "486 DX/2",
230 [7] = "486 DX/2-WB",
231 [8] = "486 DX/4",
232 [9] = "486 DX/4-WB",
233 [14] = "Am5x86-WT",
234 [15] = "Am5x86-WB"
235 }
236 },
237 },
238 .c_init = init_amd,
239 .c_identify = generic_identify,
240 .c_size_cache = amd_size_cache,
241};
242
243int __init amd_init_cpu(void)
244{
245 cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
246 return 0;
247}
248
249//early_arch_initcall(amd_init_cpu);
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
new file mode 100644
index 000000000000..394814e57672
--- /dev/null
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -0,0 +1,476 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/bitops.h>
4#include <asm/processor.h>
5#include <asm/msr.h>
6#include <asm/e820.h>
7#include "cpu.h"
8
9#ifdef CONFIG_X86_OOSTORE
10
11static u32 __init power2(u32 x)
12{
13 u32 s=1;
14 while(s<=x)
15 s<<=1;
16 return s>>=1;
17}
18
19
20/*
21 * Set up an actual MCR
22 */
23
24static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
25{
26 u32 lo, hi;
27
28 hi = base & ~0xFFF;
29 lo = ~(size-1); /* Size is a power of 2 so this makes a mask */
30 lo &= ~0xFFF; /* Remove the ctrl value bits */
31 lo |= key; /* Attribute we wish to set */
32 wrmsr(reg+MSR_IDT_MCR0, lo, hi);
33 mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */
34}
35
36/*
37 * Figure what we can cover with MCR's
38 *
39 * Shortcut: We know you can't put 4Gig of RAM on a winchip
40 */
41
42static u32 __init ramtop(void) /* 16388 */
43{
44 int i;
45 u32 top = 0;
46 u32 clip = 0xFFFFFFFFUL;
47
48 for (i = 0; i < e820.nr_map; i++) {
49 unsigned long start, end;
50
51 if (e820.map[i].addr > 0xFFFFFFFFUL)
52 continue;
53 /*
54 * Don't MCR over reserved space. Ignore the ISA hole
55 * we frob around that catastrophy already
56 */
57
58 if (e820.map[i].type == E820_RESERVED)
59 {
60 if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
61 clip = e820.map[i].addr;
62 continue;
63 }
64 start = e820.map[i].addr;
65 end = e820.map[i].addr + e820.map[i].size;
66 if (start >= end)
67 continue;
68 if (end > top)
69 top = end;
70 }
71 /* Everything below 'top' should be RAM except for the ISA hole.
72 Because of the limited MCR's we want to map NV/ACPI into our
73 MCR range for gunk in RAM
74
75 Clip might cause us to MCR insufficient RAM but that is an
76 acceptable failure mode and should only bite obscure boxes with
77 a VESA hole at 15Mb
78
79 The second case Clip sometimes kicks in is when the EBDA is marked
80 as reserved. Again we fail safe with reasonable results
81 */
82
83 if(top>clip)
84 top=clip;
85
86 return top;
87}
88
89/*
90 * Compute a set of MCR's to give maximum coverage
91 */
92
93static int __init centaur_mcr_compute(int nr, int key)
94{
95 u32 mem = ramtop();
96 u32 root = power2(mem);
97 u32 base = root;
98 u32 top = root;
99 u32 floor = 0;
100 int ct = 0;
101
102 while(ct<nr)
103 {
104 u32 fspace = 0;
105
106 /*
107 * Find the largest block we will fill going upwards
108 */
109
110 u32 high = power2(mem-top);
111
112 /*
113 * Find the largest block we will fill going downwards
114 */
115
116 u32 low = base/2;
117
118 /*
119 * Don't fill below 1Mb going downwards as there
120 * is an ISA hole in the way.
121 */
122
123 if(base <= 1024*1024)
124 low = 0;
125
126 /*
127 * See how much space we could cover by filling below
128 * the ISA hole
129 */
130
131 if(floor == 0)
132 fspace = 512*1024;
133 else if(floor ==512*1024)
134 fspace = 128*1024;
135
136 /* And forget ROM space */
137
138 /*
139 * Now install the largest coverage we get
140 */
141
142 if(fspace > high && fspace > low)
143 {
144 centaur_mcr_insert(ct, floor, fspace, key);
145 floor += fspace;
146 }
147 else if(high > low)
148 {
149 centaur_mcr_insert(ct, top, high, key);
150 top += high;
151 }
152 else if(low > 0)
153 {
154 base -= low;
155 centaur_mcr_insert(ct, base, low, key);
156 }
157 else break;
158 ct++;
159 }
160 /*
161 * We loaded ct values. We now need to set the mask. The caller
162 * must do this bit.
163 */
164
165 return ct;
166}
167
168static void __init centaur_create_optimal_mcr(void)
169{
170 int i;
171 /*
172 * Allocate up to 6 mcrs to mark as much of ram as possible
173 * as write combining and weak write ordered.
174 *
175 * To experiment with: Linux never uses stack operations for
176 * mmio spaces so we could globally enable stack operation wc
177 *
178 * Load the registers with type 31 - full write combining, all
179 * writes weakly ordered.
180 */
181 int used = centaur_mcr_compute(6, 31);
182
183 /*
184 * Wipe unused MCRs
185 */
186
187 for(i=used;i<8;i++)
188 wrmsr(MSR_IDT_MCR0+i, 0, 0);
189}
190
191static void __init winchip2_create_optimal_mcr(void)
192{
193 u32 lo, hi;
194 int i;
195
196 /*
197 * Allocate up to 6 mcrs to mark as much of ram as possible
198 * as write combining, weak store ordered.
199 *
200 * Load the registers with type 25
201 * 8 - weak write ordering
202 * 16 - weak read ordering
203 * 1 - write combining
204 */
205
206 int used = centaur_mcr_compute(6, 25);
207
208 /*
209 * Mark the registers we are using.
210 */
211
212 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
213 for(i=0;i<used;i++)
214 lo|=1<<(9+i);
215 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
216
217 /*
218 * Wipe unused MCRs
219 */
220
221 for(i=used;i<8;i++)
222 wrmsr(MSR_IDT_MCR0+i, 0, 0);
223}
224
225/*
226 * Handle the MCR key on the Winchip 2.
227 */
228
229static void __init winchip2_unprotect_mcr(void)
230{
231 u32 lo, hi;
232 u32 key;
233
234 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
235 lo&=~0x1C0; /* blank bits 8-6 */
236 key = (lo>>17) & 7;
237 lo |= key<<6; /* replace with unlock key */
238 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
239}
240
241static void __init winchip2_protect_mcr(void)
242{
243 u32 lo, hi;
244
245 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
246 lo&=~0x1C0; /* blank bits 8-6 */
247 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
248}
249#endif /* CONFIG_X86_OOSTORE */
250
251#define ACE_PRESENT (1 << 6)
252#define ACE_ENABLED (1 << 7)
253#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
254
255#define RNG_PRESENT (1 << 2)
256#define RNG_ENABLED (1 << 3)
257#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
258
259static void __init init_c3(struct cpuinfo_x86 *c)
260{
261 u32 lo, hi;
262
263 /* Test for Centaur Extended Feature Flags presence */
264 if (cpuid_eax(0xC0000000) >= 0xC0000001) {
265 u32 tmp = cpuid_edx(0xC0000001);
266
267 /* enable ACE unit, if present and disabled */
268 if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
269 rdmsr (MSR_VIA_FCR, lo, hi);
270 lo |= ACE_FCR; /* enable ACE unit */
271 wrmsr (MSR_VIA_FCR, lo, hi);
272 printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
273 }
274
275 /* enable RNG unit, if present and disabled */
276 if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
277 rdmsr (MSR_VIA_RNG, lo, hi);
278 lo |= RNG_ENABLE; /* enable RNG unit */
279 wrmsr (MSR_VIA_RNG, lo, hi);
280 printk(KERN_INFO "CPU: Enabled h/w RNG\n");
281 }
282
283 /* store Centaur Extended Feature Flags as
284 * word 5 of the CPU capability bit array
285 */
286 c->x86_capability[5] = cpuid_edx(0xC0000001);
287 }
288
289 /* Cyrix III family needs CX8 & PGE explicity enabled. */
290 if (c->x86_model >=6 && c->x86_model <= 9) {
291 rdmsr (MSR_VIA_FCR, lo, hi);
292 lo |= (1<<1 | 1<<7);
293 wrmsr (MSR_VIA_FCR, lo, hi);
294 set_bit(X86_FEATURE_CX8, c->x86_capability);
295 }
296
297 /* Before Nehemiah, the C3's had 3dNOW! */
298 if (c->x86_model >=6 && c->x86_model <9)
299 set_bit(X86_FEATURE_3DNOW, c->x86_capability);
300
301 get_model_name(c);
302 display_cacheinfo(c);
303}
304
305static void __init init_centaur(struct cpuinfo_x86 *c)
306{
307 enum {
308 ECX8=1<<1,
309 EIERRINT=1<<2,
310 DPM=1<<3,
311 DMCE=1<<4,
312 DSTPCLK=1<<5,
313 ELINEAR=1<<6,
314 DSMC=1<<7,
315 DTLOCK=1<<8,
316 EDCTLB=1<<8,
317 EMMX=1<<9,
318 DPDC=1<<11,
319 EBRPRED=1<<12,
320 DIC=1<<13,
321 DDC=1<<14,
322 DNA=1<<15,
323 ERETSTK=1<<16,
324 E2MMX=1<<19,
325 EAMD3D=1<<20,
326 };
327
328 char *name;
329 u32 fcr_set=0;
330 u32 fcr_clr=0;
331 u32 lo,hi,newlo;
332 u32 aa,bb,cc,dd;
333
334 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
335 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
336 clear_bit(0*32+31, c->x86_capability);
337
338 switch (c->x86) {
339
340 case 5:
341 switch(c->x86_model) {
342 case 4:
343 name="C6";
344 fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
345 fcr_clr=DPDC;
346 printk(KERN_NOTICE "Disabling bugged TSC.\n");
347 clear_bit(X86_FEATURE_TSC, c->x86_capability);
348#ifdef CONFIG_X86_OOSTORE
349 centaur_create_optimal_mcr();
350 /* Enable
351 write combining on non-stack, non-string
352 write combining on string, all types
353 weak write ordering
354
355 The C6 original lacks weak read order
356
357 Note 0x120 is write only on Winchip 1 */
358
359 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
360#endif
361 break;
362 case 8:
363 switch(c->x86_mask) {
364 default:
365 name="2";
366 break;
367 case 7 ... 9:
368 name="2A";
369 break;
370 case 10 ... 15:
371 name="2B";
372 break;
373 }
374 fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
375 fcr_clr=DPDC;
376#ifdef CONFIG_X86_OOSTORE
377 winchip2_unprotect_mcr();
378 winchip2_create_optimal_mcr();
379 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
380 /* Enable
381 write combining on non-stack, non-string
382 write combining on string, all types
383 weak write ordering
384 */
385 lo|=31;
386 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
387 winchip2_protect_mcr();
388#endif
389 break;
390 case 9:
391 name="3";
392 fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
393 fcr_clr=DPDC;
394#ifdef CONFIG_X86_OOSTORE
395 winchip2_unprotect_mcr();
396 winchip2_create_optimal_mcr();
397 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
398 /* Enable
399 write combining on non-stack, non-string
400 write combining on string, all types
401 weak write ordering
402 */
403 lo|=31;
404 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
405 winchip2_protect_mcr();
406#endif
407 break;
408 case 10:
409 name="4";
410 /* no info on the WC4 yet */
411 break;
412 default:
413 name="??";
414 }
415
416 rdmsr(MSR_IDT_FCR1, lo, hi);
417 newlo=(lo|fcr_set) & (~fcr_clr);
418
419 if (newlo!=lo) {
420 printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
421 wrmsr(MSR_IDT_FCR1, newlo, hi );
422 } else {
423 printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
424 }
425 /* Emulate MTRRs using Centaur's MCR. */
426 set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
427 /* Report CX8 */
428 set_bit(X86_FEATURE_CX8, c->x86_capability);
429 /* Set 3DNow! on Winchip 2 and above. */
430 if (c->x86_model >=8)
431 set_bit(X86_FEATURE_3DNOW, c->x86_capability);
432 /* See if we can find out some more. */
433 if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
434 /* Yes, we can. */
435 cpuid(0x80000005,&aa,&bb,&cc,&dd);
436 /* Add L1 data and code cache sizes. */
437 c->x86_cache_size = (cc>>24)+(dd>>24);
438 }
439 sprintf( c->x86_model_id, "WinChip %s", name );
440 break;
441
442 case 6:
443 init_c3(c);
444 break;
445 }
446}
447
448static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
449{
450 /* VIA C3 CPUs (670-68F) need further shifting. */
451 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
452 size >>= 8;
453
454 /* VIA also screwed up Nehemiah stepping 1, and made
455 it return '65KB' instead of '64KB'
456 - Note, it seems this may only be in engineering samples. */
457 if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
458 size -=1;
459
460 return size;
461}
462
463static struct cpu_dev centaur_cpu_dev __initdata = {
464 .c_vendor = "Centaur",
465 .c_ident = { "CentaurHauls" },
466 .c_init = init_centaur,
467 .c_size_cache = centaur_size_cache,
468};
469
470int __init centaur_init_cpu(void)
471{
472 cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
473 return 0;
474}
475
476//early_arch_initcall(centaur_init_cpu);
diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog
new file mode 100644
index 000000000000..cef76b80a710
--- /dev/null
+++ b/arch/i386/kernel/cpu/changelog
@@ -0,0 +1,63 @@
1/*
2 * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean
3 * and Martin Mares, November 1997.
4 *
5 * Force Cyrix 6x86(MX) and M II processors to report MTRR capability
6 * and Cyrix "coma bug" recognition by
7 * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999.
8 *
9 * Force Centaur C6 processors to report MTRR capability.
10 * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999.
11 *
12 * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999.
13 *
14 * IDT Winchip tweaks, misc clean ups.
15 * Dave Jones <davej@suse.de>, August 1999
16 *
17 * Better detection of Centaur/IDT WinChip models.
18 * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999.
19 *
20 * Cleaned up cache-detection code
21 * Dave Jones <davej@suse.de>, October 1999
22 *
23 * Added proper L2 cache detection for Coppermine
24 * Dragan Stancevic <visitor@valinux.com>, October 1999
25 *
26 * Added the original array for capability flags but forgot to credit
27 * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff
28 * Jauder Ho <jauderho@carumba.com>, January 2000
29 *
30 * Detection for Celeron coppermine, identify_cpu() overhauled,
31 * and a few other clean ups.
32 * Dave Jones <davej@suse.de>, April 2000
33 *
34 * Pentium III FXSR, SSE support
35 * General FPU state handling cleanups
36 * Gareth Hughes <gareth@valinux.com>, May 2000
37 *
38 * Added proper Cascades CPU and L2 cache detection for Cascades
39 * and 8-way type cache happy bunch from Intel:^)
40 * Dragan Stancevic <visitor@valinux.com>, May 2000
41 *
42 * Forward port AMD Duron errata T13 from 2.2.17pre
43 * Dave Jones <davej@suse.de>, August 2000
44 *
45 * Forward port lots of fixes/improvements from 2.2.18pre
46 * Cyrix III, Pentium IV support.
47 * Dave Jones <davej@suse.de>, October 2000
48 *
49 * Massive cleanup of CPU detection and bug handling;
50 * Transmeta CPU detection,
51 * H. Peter Anvin <hpa@zytor.com>, November 2000
52 *
53 * VIA C3 Support.
54 * Dave Jones <davej@suse.de>, March 2001
55 *
56 * AMD Athlon/Duron/Thunderbird bluesmoke support.
57 * Dave Jones <davej@suse.de>, April 2001.
58 *
59 * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix.
60 * Dave Jones <davej@suse.de>, September, October 2001.
61 *
62 */
63
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
new file mode 100644
index 000000000000..ebd5d8247faa
--- /dev/null
+++ b/arch/i386/kernel/cpu/common.c
@@ -0,0 +1,634 @@
1#include <linux/init.h>
2#include <linux/string.h>
3#include <linux/delay.h>
4#include <linux/smp.h>
5#include <linux/module.h>
6#include <linux/percpu.h>
7#include <asm/semaphore.h>
8#include <asm/processor.h>
9#include <asm/i387.h>
10#include <asm/msr.h>
11#include <asm/io.h>
12#include <asm/mmu_context.h>
13#ifdef CONFIG_X86_LOCAL_APIC
14#include <asm/mpspec.h>
15#include <asm/apic.h>
16#include <mach_apic.h>
17#endif
18
19#include "cpu.h"
20
21DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
22EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
23
24DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
25EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
26
27static int cachesize_override __initdata = -1;
28static int disable_x86_fxsr __initdata = 0;
29static int disable_x86_serial_nr __initdata = 1;
30
31struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
32
33extern void mcheck_init(struct cpuinfo_x86 *c);
34
35extern int disable_pse;
36
37static void default_init(struct cpuinfo_x86 * c)
38{
39 /* Not much we can do here... */
40 /* Check if at least it has cpuid */
41 if (c->cpuid_level == -1) {
42 /* No cpuid. It must be an ancient CPU */
43 if (c->x86 == 4)
44 strcpy(c->x86_model_id, "486");
45 else if (c->x86 == 3)
46 strcpy(c->x86_model_id, "386");
47 }
48}
49
50static struct cpu_dev default_cpu = {
51 .c_init = default_init,
52};
53static struct cpu_dev * this_cpu = &default_cpu;
54
55static int __init cachesize_setup(char *str)
56{
57 get_option (&str, &cachesize_override);
58 return 1;
59}
60__setup("cachesize=", cachesize_setup);
61
62int __init get_model_name(struct cpuinfo_x86 *c)
63{
64 unsigned int *v;
65 char *p, *q;
66
67 if (cpuid_eax(0x80000000) < 0x80000004)
68 return 0;
69
70 v = (unsigned int *) c->x86_model_id;
71 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
72 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
73 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
74 c->x86_model_id[48] = 0;
75
76 /* Intel chips right-justify this string for some dumb reason;
77 undo that brain damage */
78 p = q = &c->x86_model_id[0];
79 while ( *p == ' ' )
80 p++;
81 if ( p != q ) {
82 while ( *p )
83 *q++ = *p++;
84 while ( q <= &c->x86_model_id[48] )
85 *q++ = '\0'; /* Zero-pad the rest */
86 }
87
88 return 1;
89}
90
91
92void __init display_cacheinfo(struct cpuinfo_x86 *c)
93{
94 unsigned int n, dummy, ecx, edx, l2size;
95
96 n = cpuid_eax(0x80000000);
97
98 if (n >= 0x80000005) {
99 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
100 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
101 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
102 c->x86_cache_size=(ecx>>24)+(edx>>24);
103 }
104
105 if (n < 0x80000006) /* Some chips just has a large L1. */
106 return;
107
108 ecx = cpuid_ecx(0x80000006);
109 l2size = ecx >> 16;
110
111 /* do processor-specific cache resizing */
112 if (this_cpu->c_size_cache)
113 l2size = this_cpu->c_size_cache(c,l2size);
114
115 /* Allow user to override all this if necessary. */
116 if (cachesize_override != -1)
117 l2size = cachesize_override;
118
119 if ( l2size == 0 )
120 return; /* Again, no L2 cache is possible */
121
122 c->x86_cache_size = l2size;
123
124 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
125 l2size, ecx & 0xFF);
126}
127
128/* Naming convention should be: <Name> [(<Codename>)] */
129/* This table only is used unless init_<vendor>() below doesn't set it; */
130/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
131
132/* Look up CPU names by table lookup. */
133static char __init *table_lookup_model(struct cpuinfo_x86 *c)
134{
135 struct cpu_model_info *info;
136
137 if ( c->x86_model >= 16 )
138 return NULL; /* Range check */
139
140 if (!this_cpu)
141 return NULL;
142
143 info = this_cpu->c_models;
144
145 while (info && info->family) {
146 if (info->family == c->x86)
147 return info->model_names[c->x86_model];
148 info++;
149 }
150 return NULL; /* Not found */
151}
152
153
154void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
155{
156 char *v = c->x86_vendor_id;
157 int i;
158
159 for (i = 0; i < X86_VENDOR_NUM; i++) {
160 if (cpu_devs[i]) {
161 if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
162 (cpu_devs[i]->c_ident[1] &&
163 !strcmp(v,cpu_devs[i]->c_ident[1]))) {
164 c->x86_vendor = i;
165 if (!early)
166 this_cpu = cpu_devs[i];
167 break;
168 }
169 }
170 }
171}
172
173
174static int __init x86_fxsr_setup(char * s)
175{
176 disable_x86_fxsr = 1;
177 return 1;
178}
179__setup("nofxsr", x86_fxsr_setup);
180
181
182/* Standard macro to see if a specific flag is changeable */
183static inline int flag_is_changeable_p(u32 flag)
184{
185 u32 f1, f2;
186
187 asm("pushfl\n\t"
188 "pushfl\n\t"
189 "popl %0\n\t"
190 "movl %0,%1\n\t"
191 "xorl %2,%0\n\t"
192 "pushl %0\n\t"
193 "popfl\n\t"
194 "pushfl\n\t"
195 "popl %0\n\t"
196 "popfl\n\t"
197 : "=&r" (f1), "=&r" (f2)
198 : "ir" (flag));
199
200 return ((f1^f2) & flag) != 0;
201}
202
203
204/* Probe for the CPUID instruction */
205static int __init have_cpuid_p(void)
206{
207 return flag_is_changeable_p(X86_EFLAGS_ID);
208}
209
210/* Do minimum CPU detection early.
211 Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
212 The others are not touched to avoid unwanted side effects. */
213static void __init early_cpu_detect(void)
214{
215 struct cpuinfo_x86 *c = &boot_cpu_data;
216
217 c->x86_cache_alignment = 32;
218
219 if (!have_cpuid_p())
220 return;
221
222 /* Get vendor name */
223 cpuid(0x00000000, &c->cpuid_level,
224 (int *)&c->x86_vendor_id[0],
225 (int *)&c->x86_vendor_id[8],
226 (int *)&c->x86_vendor_id[4]);
227
228 get_cpu_vendor(c, 1);
229
230 c->x86 = 4;
231 if (c->cpuid_level >= 0x00000001) {
232 u32 junk, tfms, cap0, misc;
233 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
234 c->x86 = (tfms >> 8) & 15;
235 c->x86_model = (tfms >> 4) & 15;
236 if (c->x86 == 0xf) {
237 c->x86 += (tfms >> 20) & 0xff;
238 c->x86_model += ((tfms >> 16) & 0xF) << 4;
239 }
240 c->x86_mask = tfms & 15;
241 if (cap0 & (1<<19))
242 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
243 }
244
245 early_intel_workaround(c);
246}
247
248void __init generic_identify(struct cpuinfo_x86 * c)
249{
250 u32 tfms, xlvl;
251 int junk;
252
253 if (have_cpuid_p()) {
254 /* Get vendor name */
255 cpuid(0x00000000, &c->cpuid_level,
256 (int *)&c->x86_vendor_id[0],
257 (int *)&c->x86_vendor_id[8],
258 (int *)&c->x86_vendor_id[4]);
259
260 get_cpu_vendor(c, 0);
261 /* Initialize the standard set of capabilities */
262 /* Note that the vendor-specific code below might override */
263
264 /* Intel-defined flags: level 0x00000001 */
265 if ( c->cpuid_level >= 0x00000001 ) {
266 u32 capability, excap;
267 cpuid(0x00000001, &tfms, &junk, &excap, &capability);
268 c->x86_capability[0] = capability;
269 c->x86_capability[4] = excap;
270 c->x86 = (tfms >> 8) & 15;
271 c->x86_model = (tfms >> 4) & 15;
272 if (c->x86 == 0xf) {
273 c->x86 += (tfms >> 20) & 0xff;
274 c->x86_model += ((tfms >> 16) & 0xF) << 4;
275 }
276 c->x86_mask = tfms & 15;
277 } else {
278 /* Have CPUID level 0 only - unheard of */
279 c->x86 = 4;
280 }
281
282 /* AMD-defined flags: level 0x80000001 */
283 xlvl = cpuid_eax(0x80000000);
284 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
285 if ( xlvl >= 0x80000001 ) {
286 c->x86_capability[1] = cpuid_edx(0x80000001);
287 c->x86_capability[6] = cpuid_ecx(0x80000001);
288 }
289 if ( xlvl >= 0x80000004 )
290 get_model_name(c); /* Default name */
291 }
292 }
293}
294
295static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
296{
297 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
298 /* Disable processor serial number */
299 unsigned long lo,hi;
300 rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
301 lo |= 0x200000;
302 wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
303 printk(KERN_NOTICE "CPU serial number disabled.\n");
304 clear_bit(X86_FEATURE_PN, c->x86_capability);
305
306 /* Disabling the serial number may affect the cpuid level */
307 c->cpuid_level = cpuid_eax(0);
308 }
309}
310
311static int __init x86_serial_nr_setup(char *s)
312{
313 disable_x86_serial_nr = 0;
314 return 1;
315}
316__setup("serialnumber", x86_serial_nr_setup);
317
318
319
320/*
321 * This does the hard work of actually picking apart the CPU stuff...
322 */
323void __init identify_cpu(struct cpuinfo_x86 *c)
324{
325 int i;
326
327 c->loops_per_jiffy = loops_per_jiffy;
328 c->x86_cache_size = -1;
329 c->x86_vendor = X86_VENDOR_UNKNOWN;
330 c->cpuid_level = -1; /* CPUID not detected */
331 c->x86_model = c->x86_mask = 0; /* So far unknown... */
332 c->x86_vendor_id[0] = '\0'; /* Unset */
333 c->x86_model_id[0] = '\0'; /* Unset */
334 c->x86_num_cores = 1;
335 memset(&c->x86_capability, 0, sizeof c->x86_capability);
336
337 if (!have_cpuid_p()) {
338 /* First of all, decide if this is a 486 or higher */
339 /* It's a 486 if we can modify the AC flag */
340 if ( flag_is_changeable_p(X86_EFLAGS_AC) )
341 c->x86 = 4;
342 else
343 c->x86 = 3;
344 }
345
346 generic_identify(c);
347
348 printk(KERN_DEBUG "CPU: After generic identify, caps:");
349 for (i = 0; i < NCAPINTS; i++)
350 printk(" %08lx", c->x86_capability[i]);
351 printk("\n");
352
353 if (this_cpu->c_identify) {
354 this_cpu->c_identify(c);
355
356 printk(KERN_DEBUG "CPU: After vendor identify, caps:");
357 for (i = 0; i < NCAPINTS; i++)
358 printk(" %08lx", c->x86_capability[i]);
359 printk("\n");
360 }
361
362 /*
363 * Vendor-specific initialization. In this section we
364 * canonicalize the feature flags, meaning if there are
365 * features a certain CPU supports which CPUID doesn't
366 * tell us, CPUID claiming incorrect flags, or other bugs,
367 * we handle them here.
368 *
369 * At the end of this section, c->x86_capability better
370 * indicate the features this CPU genuinely supports!
371 */
372 if (this_cpu->c_init)
373 this_cpu->c_init(c);
374
375 /* Disable the PN if appropriate */
376 squash_the_stupid_serial_number(c);
377
378 /*
379 * The vendor-specific functions might have changed features. Now
380 * we do "generic changes."
381 */
382
383 /* TSC disabled? */
384 if ( tsc_disable )
385 clear_bit(X86_FEATURE_TSC, c->x86_capability);
386
387 /* FXSR disabled? */
388 if (disable_x86_fxsr) {
389 clear_bit(X86_FEATURE_FXSR, c->x86_capability);
390 clear_bit(X86_FEATURE_XMM, c->x86_capability);
391 }
392
393 if (disable_pse)
394 clear_bit(X86_FEATURE_PSE, c->x86_capability);
395
396 /* If the model name is still unset, do table lookup. */
397 if ( !c->x86_model_id[0] ) {
398 char *p;
399 p = table_lookup_model(c);
400 if ( p )
401 strcpy(c->x86_model_id, p);
402 else
403 /* Last resort... */
404 sprintf(c->x86_model_id, "%02x/%02x",
405 c->x86_vendor, c->x86_model);
406 }
407
408 /* Now the feature flags better reflect actual CPU features! */
409
410 printk(KERN_DEBUG "CPU: After all inits, caps:");
411 for (i = 0; i < NCAPINTS; i++)
412 printk(" %08lx", c->x86_capability[i]);
413 printk("\n");
414
415 /*
416 * On SMP, boot_cpu_data holds the common feature set between
417 * all CPUs; so make sure that we indicate which features are
418 * common between the CPUs. The first time this routine gets
419 * executed, c == &boot_cpu_data.
420 */
421 if ( c != &boot_cpu_data ) {
422 /* AND the already accumulated flags with these */
423 for ( i = 0 ; i < NCAPINTS ; i++ )
424 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
425 }
426
427 /* Init Machine Check Exception if available. */
428#ifdef CONFIG_X86_MCE
429 mcheck_init(c);
430#endif
431}
432
433#ifdef CONFIG_X86_HT
434void __init detect_ht(struct cpuinfo_x86 *c)
435{
436 u32 eax, ebx, ecx, edx;
437 int index_lsb, index_msb, tmp;
438 int cpu = smp_processor_id();
439
440 if (!cpu_has(c, X86_FEATURE_HT))
441 return;
442
443 cpuid(1, &eax, &ebx, &ecx, &edx);
444 smp_num_siblings = (ebx & 0xff0000) >> 16;
445
446 if (smp_num_siblings == 1) {
447 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
448 } else if (smp_num_siblings > 1 ) {
449 index_lsb = 0;
450 index_msb = 31;
451
452 if (smp_num_siblings > NR_CPUS) {
453 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
454 smp_num_siblings = 1;
455 return;
456 }
457 tmp = smp_num_siblings;
458 while ((tmp & 1) == 0) {
459 tmp >>=1 ;
460 index_lsb++;
461 }
462 tmp = smp_num_siblings;
463 while ((tmp & 0x80000000 ) == 0) {
464 tmp <<=1 ;
465 index_msb--;
466 }
467 if (index_lsb != index_msb )
468 index_msb++;
469 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
470
471 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
472 phys_proc_id[cpu]);
473 }
474}
475#endif
476
477void __init print_cpu_info(struct cpuinfo_x86 *c)
478{
479 char *vendor = NULL;
480
481 if (c->x86_vendor < X86_VENDOR_NUM)
482 vendor = this_cpu->c_vendor;
483 else if (c->cpuid_level >= 0)
484 vendor = c->x86_vendor_id;
485
486 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
487 printk("%s ", vendor);
488
489 if (!c->x86_model_id[0])
490 printk("%d86", c->x86);
491 else
492 printk("%s", c->x86_model_id);
493
494 if (c->x86_mask || c->cpuid_level >= 0)
495 printk(" stepping %02x\n", c->x86_mask);
496 else
497 printk("\n");
498}
499
500cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
501
502/* This is hacky. :)
503 * We're emulating future behavior.
504 * In the future, the cpu-specific init functions will be called implicitly
505 * via the magic of initcalls.
506 * They will insert themselves into the cpu_devs structure.
507 * Then, when cpu_init() is called, we can just iterate over that array.
508 */
509
510extern int intel_cpu_init(void);
511extern int cyrix_init_cpu(void);
512extern int nsc_init_cpu(void);
513extern int amd_init_cpu(void);
514extern int centaur_init_cpu(void);
515extern int transmeta_init_cpu(void);
516extern int rise_init_cpu(void);
517extern int nexgen_init_cpu(void);
518extern int umc_init_cpu(void);
519
520void __init early_cpu_init(void)
521{
522 intel_cpu_init();
523 cyrix_init_cpu();
524 nsc_init_cpu();
525 amd_init_cpu();
526 centaur_init_cpu();
527 transmeta_init_cpu();
528 rise_init_cpu();
529 nexgen_init_cpu();
530 umc_init_cpu();
531 early_cpu_detect();
532
533#ifdef CONFIG_DEBUG_PAGEALLOC
534 /* pse is not compatible with on-the-fly unmapping,
535 * disable it even if the cpus claim to support it.
536 */
537 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
538 disable_pse = 1;
539#endif
540}
541/*
542 * cpu_init() initializes state that is per-CPU. Some data is already
543 * initialized (naturally) in the bootstrap process, such as the GDT
544 * and IDT. We reload them nevertheless, this function acts as a
545 * 'CPU state barrier', nothing should get across.
546 */
547void __init cpu_init (void)
548{
549 int cpu = smp_processor_id();
550 struct tss_struct * t = &per_cpu(init_tss, cpu);
551 struct thread_struct *thread = &current->thread;
552 __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
553
554 if (cpu_test_and_set(cpu, cpu_initialized)) {
555 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
556 for (;;) local_irq_enable();
557 }
558 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
559
560 if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
561 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
562 if (tsc_disable && cpu_has_tsc) {
563 printk(KERN_NOTICE "Disabling TSC...\n");
564 /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
565 clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
566 set_in_cr4(X86_CR4_TSD);
567 }
568
569 /*
570 * Initialize the per-CPU GDT with the boot GDT,
571 * and set up the GDT descriptor:
572 */
573 memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
574 GDT_SIZE);
575
576 /* Set up GDT entry for 16bit stack */
577 *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
578 ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
579 ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
580 (CPU_16BIT_STACK_SIZE - 1);
581
582 cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
583 cpu_gdt_descr[cpu].address =
584 (unsigned long)&per_cpu(cpu_gdt_table, cpu);
585
586 /*
587 * Set up the per-thread TLS descriptor cache:
588 */
589 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
590 GDT_ENTRY_TLS_ENTRIES * 8);
591
592 __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu]));
593 __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
594
595 /*
596 * Delete NT
597 */
598 __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
599
600 /*
601 * Set up and load the per-CPU TSS and LDT
602 */
603 atomic_inc(&init_mm.mm_count);
604 current->active_mm = &init_mm;
605 if (current->mm)
606 BUG();
607 enter_lazy_tlb(&init_mm, current);
608
609 load_esp0(t, thread);
610 set_tss_desc(cpu,t);
611 load_TR_desc();
612 load_LDT(&init_mm.context);
613
614 /* Set up doublefault TSS pointer in the GDT */
615 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
616
617 /* Clear %fs and %gs. */
618 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
619
620 /* Clear all 6 debug registers: */
621
622#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
623
624 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
625
626#undef CD
627
628 /*
629 * Force FPU initialization:
630 */
631 current_thread_info()->status = 0;
632 clear_used_math();
633 mxcsr_feature_mask_init();
634}
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h
new file mode 100644
index 000000000000..5a1d4f163e84
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpu.h
@@ -0,0 +1,30 @@
1
2struct cpu_model_info {
3 int vendor;
4 int family;
5 char *model_names[16];
6};
7
8/* attempt to consolidate cpu attributes */
9struct cpu_dev {
10 char * c_vendor;
11
12 /* some have two possibilities for cpuid string */
13 char * c_ident[2];
14
15 struct cpu_model_info c_models[4];
16
17 void (*c_init)(struct cpuinfo_x86 * c);
18 void (*c_identify)(struct cpuinfo_x86 * c);
19 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
20};
21
22extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
23
24extern int get_model_name(struct cpuinfo_x86 *c);
25extern void display_cacheinfo(struct cpuinfo_x86 *c);
26
27extern void generic_identify(struct cpuinfo_x86 * c);
28
29extern void early_intel_workaround(struct cpuinfo_x86 *c);
30
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
new file mode 100644
index 000000000000..f25ffd74235c
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -0,0 +1,231 @@
1#
2# CPU Frequency scaling
3#
4
5menu "CPU Frequency scaling"
6
7source "drivers/cpufreq/Kconfig"
8
9if CPU_FREQ
10
11comment "CPUFreq processor drivers"
12
13config X86_ACPI_CPUFREQ
14 tristate "ACPI Processor P-States driver"
15 select CPU_FREQ_TABLE
16 depends on ACPI_PROCESSOR
17 help
18 This driver adds a CPUFreq driver which utilizes the ACPI
19 Processor Performance States.
20
21 For details, take a look at <file:Documentation/cpu-freq/>.
22
23 If in doubt, say N.
24
25config ELAN_CPUFREQ
26 tristate "AMD Elan"
27 select CPU_FREQ_TABLE
28 depends on X86_ELAN
29 ---help---
30 This adds the CPUFreq driver for AMD Elan SC400 and SC410
31 processors.
32
33 You need to specify the processor maximum speed as boot
34 parameter: elanfreq=maxspeed (in kHz) or as module
35 parameter "max_freq".
36
37 For details, take a look at <file:Documentation/cpu-freq/>.
38
39 If in doubt, say N.
40
41config X86_POWERNOW_K6
42 tristate "AMD Mobile K6-2/K6-3 PowerNow!"
43 select CPU_FREQ_TABLE
44 help
45 This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
46 AMD K6-3+ processors.
47
48 For details, take a look at <file:Documentation/cpu-freq/>.
49
50 If in doubt, say N.
51
52config X86_POWERNOW_K7
53 tristate "AMD Mobile Athlon/Duron PowerNow!"
54 select CPU_FREQ_TABLE
55 help
56 This adds the CPUFreq driver for mobile AMD K7 mobile processors.
57
58 For details, take a look at <file:Documentation/cpu-freq/>.
59
60 If in doubt, say N.
61
62config X86_POWERNOW_K7_ACPI
63 bool
64 depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
65 depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
66 default y
67
68config X86_POWERNOW_K8
69 tristate "AMD Opteron/Athlon64 PowerNow!"
70 select CPU_FREQ_TABLE
71 depends on EXPERIMENTAL
72 help
73 This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
74
75 For details, take a look at <file:Documentation/cpu-freq/>.
76
77 If in doubt, say N.
78
79config X86_POWERNOW_K8_ACPI
80 bool
81 depends on X86_POWERNOW_K8 && ACPI_PROCESSOR
82 depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
83 default y
84
85config X86_GX_SUSPMOD
86 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
87 help
88 This add the CPUFreq driver for NatSemi Geode processors which
89 support suspend modulation.
90
91 For details, take a look at <file:Documentation/cpu-freq/>.
92
93 If in doubt, say N.
94
95config X86_SPEEDSTEP_CENTRINO
96 tristate "Intel Enhanced SpeedStep"
97 select CPU_FREQ_TABLE
98 select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI)
99 help
100 This adds the CPUFreq driver for Enhanced SpeedStep enabled
101 mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However,
102 you also need to say Y to "Use ACPI tables to decode..." below
103 [which might imply enabling ACPI] if you want to use this driver
104 on non-Banias CPUs.
105
106 For details, take a look at <file:Documentation/cpu-freq/>.
107
108 If in doubt, say N.
109
110config X86_SPEEDSTEP_CENTRINO_ACPI
111 bool "Use ACPI tables to decode valid frequency/voltage pairs"
112 depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR
113 depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m)
114 default y
115 help
116 Use primarily the information provided in the BIOS ACPI tables
117 to determine valid CPU frequency and voltage pairings. It is
118 required for the driver to work on non-Banias CPUs.
119
120 If in doubt, say Y.
121
122config X86_SPEEDSTEP_CENTRINO_TABLE
123 bool "Built-in tables for Banias CPUs"
124 depends on X86_SPEEDSTEP_CENTRINO
125 default y
126 help
127 Use built-in tables for Banias CPUs if ACPI encoding
128 is not available.
129
130 If in doubt, say N.
131
132config X86_SPEEDSTEP_ICH
133 tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
134 select CPU_FREQ_TABLE
135 help
136 This adds the CPUFreq driver for certain mobile Intel Pentium III
137 (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
138 mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
139 ICH3 or ICH4 southbridge.
140
141 For details, take a look at <file:Documentation/cpu-freq/>.
142
143 If in doubt, say N.
144
145config X86_SPEEDSTEP_SMI
146 tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
147 select CPU_FREQ_TABLE
148 depends on EXPERIMENTAL
149 help
150 This adds the CPUFreq driver for certain mobile Intel Pentium III
151 (Coppermine), all mobile Intel Pentium III-M (Tualatin)
152 on systems which have an Intel 440BX/ZX/MX southbridge.
153
154 For details, take a look at <file:Documentation/cpu-freq/>.
155
156 If in doubt, say N.
157
158config X86_P4_CLOCKMOD
159 tristate "Intel Pentium 4 clock modulation"
160 select CPU_FREQ_TABLE
161 help
162 This adds the CPUFreq driver for Intel Pentium 4 / XEON
163 processors.
164
165 For details, take a look at <file:Documentation/cpu-freq/>.
166
167 If in doubt, say N.
168
169config X86_CPUFREQ_NFORCE2
170 tristate "nVidia nForce2 FSB changing"
171 depends on EXPERIMENTAL
172 help
173 This adds the CPUFreq driver for FSB changing on nVidia nForce2
174 platforms.
175
176 For details, take a look at <file:Documentation/cpu-freq/>.
177
178 If in doubt, say N.
179
180config X86_LONGRUN
181 tristate "Transmeta LongRun"
182 help
183 This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
184 which support LongRun.
185
186 For details, take a look at <file:Documentation/cpu-freq/>.
187
188 If in doubt, say N.
189
190config X86_LONGHAUL
191 tristate "VIA Cyrix III Longhaul"
192 select CPU_FREQ_TABLE
193 help
194 This adds the CPUFreq driver for VIA Samuel/CyrixIII,
195 VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
196 processors.
197
198 For details, take a look at <file:Documentation/cpu-freq/>.
199
200 If in doubt, say N.
201
202comment "shared options"
203
204config X86_ACPI_CPUFREQ_PROC_INTF
205 bool "/proc/acpi/processor/../performance interface (deprecated)"
206 depends on PROC_FS
207 depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
208 help
209 This enables the deprecated /proc/acpi/processor/../performance
210 interface. While it is helpful for debugging, the generic,
211 cross-architecture cpufreq interfaces should be used.
212
213 If in doubt, say N.
214
215config X86_SPEEDSTEP_LIB
216 tristate
217 default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD
218
219config X86_SPEEDSTEP_RELAXED_CAP_CHECK
220 bool "Relaxed speedstep capability checks"
221 depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
222 help
223 Don't perform all checks for a speedstep capable system which would
224 normally be done. Some ancient or strange systems, though speedstep
225 capable, don't always indicate that they are speedstep capable. This
226 option lets the probing code bypass some of those checks if the
227 parameter "relaxed_check=1" is passed to the module.
228
229endif # CPU_FREQ
230
231endmenu
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile
new file mode 100644
index 000000000000..a922e97aeedd
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/Makefile
@@ -0,0 +1,14 @@
1obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
5obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
6obj-$(CONFIG_X86_LONGRUN) += longrun.o
7obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
8obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
9obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
10obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
11obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
12obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
13obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
14obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
new file mode 100644
index 000000000000..963e17aa205d
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,537 @@
1/*
2 * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.3 $)
3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or (at
13 * your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
23 *
24 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 */
26
27#include <linux/config.h>
28#include <linux/kernel.h>
29#include <linux/module.h>
30#include <linux/init.h>
31#include <linux/cpufreq.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <asm/io.h>
35#include <asm/delay.h>
36#include <asm/uaccess.h>
37
38#include <linux/acpi.h>
39#include <acpi/processor.h>
40
41#include "speedstep-est-common.h"
42
43#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
44
45MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
46MODULE_DESCRIPTION("ACPI Processor P-States Driver");
47MODULE_LICENSE("GPL");
48
49
50struct cpufreq_acpi_io {
51 struct acpi_processor_performance acpi_data;
52 struct cpufreq_frequency_table *freq_table;
53 unsigned int resume;
54};
55
56static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS];
57
58static struct cpufreq_driver acpi_cpufreq_driver;
59
60static int
61acpi_processor_write_port(
62 u16 port,
63 u8 bit_width,
64 u32 value)
65{
66 if (bit_width <= 8) {
67 outb(value, port);
68 } else if (bit_width <= 16) {
69 outw(value, port);
70 } else if (bit_width <= 32) {
71 outl(value, port);
72 } else {
73 return -ENODEV;
74 }
75 return 0;
76}
77
78static int
79acpi_processor_read_port(
80 u16 port,
81 u8 bit_width,
82 u32 *ret)
83{
84 *ret = 0;
85 if (bit_width <= 8) {
86 *ret = inb(port);
87 } else if (bit_width <= 16) {
88 *ret = inw(port);
89 } else if (bit_width <= 32) {
90 *ret = inl(port);
91 } else {
92 return -ENODEV;
93 }
94 return 0;
95}
96
97static int
98acpi_processor_set_performance (
99 struct cpufreq_acpi_io *data,
100 unsigned int cpu,
101 int state)
102{
103 u16 port = 0;
104 u8 bit_width = 0;
105 int ret = 0;
106 u32 value = 0;
107 int i = 0;
108 struct cpufreq_freqs cpufreq_freqs;
109 cpumask_t saved_mask;
110 int retval;
111
112 dprintk("acpi_processor_set_performance\n");
113
114 /*
115 * TBD: Use something other than set_cpus_allowed.
116 * As set_cpus_allowed is a bit racy,
117 * with any other set_cpus_allowed for this process.
118 */
119 saved_mask = current->cpus_allowed;
120 set_cpus_allowed(current, cpumask_of_cpu(cpu));
121 if (smp_processor_id() != cpu) {
122 return (-EAGAIN);
123 }
124
125 if (state == data->acpi_data.state) {
126 if (unlikely(data->resume)) {
127 dprintk("Called after resume, resetting to P%d\n", state);
128 data->resume = 0;
129 } else {
130 dprintk("Already at target state (P%d)\n", state);
131 retval = 0;
132 goto migrate_end;
133 }
134 }
135
136 dprintk("Transitioning from P%d to P%d\n",
137 data->acpi_data.state, state);
138
139 /* cpufreq frequency struct */
140 cpufreq_freqs.cpu = cpu;
141 cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
142 cpufreq_freqs.new = data->freq_table[state].frequency;
143
144 /* notify cpufreq */
145 cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
146
147 /*
148 * First we write the target state's 'control' value to the
149 * control_register.
150 */
151
152 port = data->acpi_data.control_register.address;
153 bit_width = data->acpi_data.control_register.bit_width;
154 value = (u32) data->acpi_data.states[state].control;
155
156 dprintk("Writing 0x%08x to port 0x%04x\n", value, port);
157
158 ret = acpi_processor_write_port(port, bit_width, value);
159 if (ret) {
160 dprintk("Invalid port width 0x%04x\n", bit_width);
161 retval = ret;
162 goto migrate_end;
163 }
164
165 /*
166 * Then we read the 'status_register' and compare the value with the
167 * target state's 'status' to make sure the transition was successful.
168 * Note that we'll poll for up to 1ms (100 cycles of 10us) before
169 * giving up.
170 */
171
172 port = data->acpi_data.status_register.address;
173 bit_width = data->acpi_data.status_register.bit_width;
174
175 dprintk("Looking for 0x%08x from port 0x%04x\n",
176 (u32) data->acpi_data.states[state].status, port);
177
178 for (i=0; i<100; i++) {
179 ret = acpi_processor_read_port(port, bit_width, &value);
180 if (ret) {
181 dprintk("Invalid port width 0x%04x\n", bit_width);
182 retval = ret;
183 goto migrate_end;
184 }
185 if (value == (u32) data->acpi_data.states[state].status)
186 break;
187 udelay(10);
188 }
189
190 /* notify cpufreq */
191 cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
192
193 if (value != (u32) data->acpi_data.states[state].status) {
194 unsigned int tmp = cpufreq_freqs.new;
195 cpufreq_freqs.new = cpufreq_freqs.old;
196 cpufreq_freqs.old = tmp;
197 cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
198 cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
199 printk(KERN_WARNING "acpi-cpufreq: Transition failed\n");
200 retval = -ENODEV;
201 goto migrate_end;
202 }
203
204 dprintk("Transition successful after %d microseconds\n", i * 10);
205
206 data->acpi_data.state = state;
207
208 retval = 0;
209migrate_end:
210 set_cpus_allowed(current, saved_mask);
211 return (retval);
212}
213
214
215static int
216acpi_cpufreq_target (
217 struct cpufreq_policy *policy,
218 unsigned int target_freq,
219 unsigned int relation)
220{
221 struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
222 unsigned int next_state = 0;
223 unsigned int result = 0;
224
225 dprintk("acpi_cpufreq_setpolicy\n");
226
227 result = cpufreq_frequency_table_target(policy,
228 data->freq_table,
229 target_freq,
230 relation,
231 &next_state);
232 if (result)
233 return (result);
234
235 result = acpi_processor_set_performance (data, policy->cpu, next_state);
236
237 return (result);
238}
239
240
241static int
242acpi_cpufreq_verify (
243 struct cpufreq_policy *policy)
244{
245 unsigned int result = 0;
246 struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
247
248 dprintk("acpi_cpufreq_verify\n");
249
250 result = cpufreq_frequency_table_verify(policy,
251 data->freq_table);
252
253 return (result);
254}
255
256
257static unsigned long
258acpi_cpufreq_guess_freq (
259 struct cpufreq_acpi_io *data,
260 unsigned int cpu)
261{
262 if (cpu_khz) {
263 /* search the closest match to cpu_khz */
264 unsigned int i;
265 unsigned long freq;
266 unsigned long freqn = data->acpi_data.states[0].core_frequency * 1000;
267
268 for (i=0; i < (data->acpi_data.state_count - 1); i++) {
269 freq = freqn;
270 freqn = data->acpi_data.states[i+1].core_frequency * 1000;
271 if ((2 * cpu_khz) > (freqn + freq)) {
272 data->acpi_data.state = i;
273 return (freq);
274 }
275 }
276 data->acpi_data.state = data->acpi_data.state_count - 1;
277 return (freqn);
278 } else
279 /* assume CPU is at P0... */
280 data->acpi_data.state = 0;
281 return data->acpi_data.states[0].core_frequency * 1000;
282
283}
284
285
286/*
287 * acpi_processor_cpu_init_pdc_est - let BIOS know about the SMP capabilities
288 * of this driver
289 * @perf: processor-specific acpi_io_data struct
290 * @cpu: CPU being initialized
291 *
292 * To avoid issues with legacy OSes, some BIOSes require to be informed of
293 * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC
294 * accordingly, for Enhanced Speedstep. Actual call to _PDC is done in
295 * driver/acpi/processor.c
296 */
297static void
298acpi_processor_cpu_init_pdc_est(
299 struct acpi_processor_performance *perf,
300 unsigned int cpu,
301 struct acpi_object_list *obj_list
302 )
303{
304 union acpi_object *obj;
305 u32 *buf;
306 struct cpuinfo_x86 *c = cpu_data + cpu;
307 dprintk("acpi_processor_cpu_init_pdc_est\n");
308
309 if (!cpu_has(c, X86_FEATURE_EST))
310 return;
311
312 /* Initialize pdc. It will be used later. */
313 if (!obj_list)
314 return;
315
316 if (!(obj_list->count && obj_list->pointer))
317 return;
318
319 obj = obj_list->pointer;
320 if ((obj->buffer.length == 12) && obj->buffer.pointer) {
321 buf = (u32 *)obj->buffer.pointer;
322 buf[0] = ACPI_PDC_REVISION_ID;
323 buf[1] = 1;
324 buf[2] = ACPI_PDC_EST_CAPABILITY_SMP;
325 perf->pdc = obj_list;
326 }
327 return;
328}
329
330
331/* CPU specific PDC initialization */
332static void
333acpi_processor_cpu_init_pdc(
334 struct acpi_processor_performance *perf,
335 unsigned int cpu,
336 struct acpi_object_list *obj_list
337 )
338{
339 struct cpuinfo_x86 *c = cpu_data + cpu;
340 dprintk("acpi_processor_cpu_init_pdc\n");
341 perf->pdc = NULL;
342 if (cpu_has(c, X86_FEATURE_EST))
343 acpi_processor_cpu_init_pdc_est(perf, cpu, obj_list);
344 return;
345}
346
347
348static int
349acpi_cpufreq_cpu_init (
350 struct cpufreq_policy *policy)
351{
352 unsigned int i;
353 unsigned int cpu = policy->cpu;
354 struct cpufreq_acpi_io *data;
355 unsigned int result = 0;
356
357 union acpi_object arg0 = {ACPI_TYPE_BUFFER};
358 u32 arg0_buf[3];
359 struct acpi_object_list arg_list = {1, &arg0};
360
361 dprintk("acpi_cpufreq_cpu_init\n");
362 /* setup arg_list for _PDC settings */
363 arg0.buffer.length = 12;
364 arg0.buffer.pointer = (u8 *) arg0_buf;
365
366 data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
367 if (!data)
368 return (-ENOMEM);
369 memset(data, 0, sizeof(struct cpufreq_acpi_io));
370
371 acpi_io_data[cpu] = data;
372
373 acpi_processor_cpu_init_pdc(&data->acpi_data, cpu, &arg_list);
374 result = acpi_processor_register_performance(&data->acpi_data, cpu);
375 data->acpi_data.pdc = NULL;
376
377 if (result)
378 goto err_free;
379
380 if (is_const_loops_cpu(cpu)) {
381 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
382 }
383
384 /* capability check */
385 if (data->acpi_data.state_count <= 1) {
386 dprintk("No P-States\n");
387 result = -ENODEV;
388 goto err_unreg;
389 }
390 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) ||
391 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
392 dprintk("Unsupported address space [%d, %d]\n",
393 (u32) (data->acpi_data.control_register.space_id),
394 (u32) (data->acpi_data.status_register.space_id));
395 result = -ENODEV;
396 goto err_unreg;
397 }
398
399 /* alloc freq_table */
400 data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (data->acpi_data.state_count + 1), GFP_KERNEL);
401 if (!data->freq_table) {
402 result = -ENOMEM;
403 goto err_unreg;
404 }
405
406 /* detect transition latency */
407 policy->cpuinfo.transition_latency = 0;
408 for (i=0; i<data->acpi_data.state_count; i++) {
409 if ((data->acpi_data.states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency)
410 policy->cpuinfo.transition_latency = data->acpi_data.states[i].transition_latency * 1000;
411 }
412 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
413
414 /* The current speed is unknown and not detectable by ACPI... */
415 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
416
417 /* table init */
418 for (i=0; i<=data->acpi_data.state_count; i++)
419 {
420 data->freq_table[i].index = i;
421 if (i<data->acpi_data.state_count)
422 data->freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
423 else
424 data->freq_table[i].frequency = CPUFREQ_TABLE_END;
425 }
426
427 result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
428 if (result) {
429 goto err_freqfree;
430 }
431
432 /* notify BIOS that we exist */
433 acpi_processor_notify_smm(THIS_MODULE);
434
435 printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n",
436 cpu);
437 for (i = 0; i < data->acpi_data.state_count; i++)
438 dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",
439 (i == data->acpi_data.state?'*':' '), i,
440 (u32) data->acpi_data.states[i].core_frequency,
441 (u32) data->acpi_data.states[i].power,
442 (u32) data->acpi_data.states[i].transition_latency);
443
444 cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
445 return (result);
446
447 err_freqfree:
448 kfree(data->freq_table);
449 err_unreg:
450 acpi_processor_unregister_performance(&data->acpi_data, cpu);
451 err_free:
452 kfree(data);
453 acpi_io_data[cpu] = NULL;
454
455 return (result);
456}
457
458
459static int
460acpi_cpufreq_cpu_exit (
461 struct cpufreq_policy *policy)
462{
463 struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
464
465
466 dprintk("acpi_cpufreq_cpu_exit\n");
467
468 if (data) {
469 cpufreq_frequency_table_put_attr(policy->cpu);
470 acpi_io_data[policy->cpu] = NULL;
471 acpi_processor_unregister_performance(&data->acpi_data, policy->cpu);
472 kfree(data);
473 }
474
475 return (0);
476}
477
478static int
479acpi_cpufreq_resume (
480 struct cpufreq_policy *policy)
481{
482 struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
483
484
485 dprintk("acpi_cpufreq_resume\n");
486
487 data->resume = 1;
488
489 return (0);
490}
491
492
493static struct freq_attr* acpi_cpufreq_attr[] = {
494 &cpufreq_freq_attr_scaling_available_freqs,
495 NULL,
496};
497
498static struct cpufreq_driver acpi_cpufreq_driver = {
499 .verify = acpi_cpufreq_verify,
500 .target = acpi_cpufreq_target,
501 .init = acpi_cpufreq_cpu_init,
502 .exit = acpi_cpufreq_cpu_exit,
503 .resume = acpi_cpufreq_resume,
504 .name = "acpi-cpufreq",
505 .owner = THIS_MODULE,
506 .attr = acpi_cpufreq_attr,
507};
508
509
510static int __init
511acpi_cpufreq_init (void)
512{
513 int result = 0;
514
515 dprintk("acpi_cpufreq_init\n");
516
517 result = cpufreq_register_driver(&acpi_cpufreq_driver);
518
519 return (result);
520}
521
522
523static void __exit
524acpi_cpufreq_exit (void)
525{
526 dprintk("acpi_cpufreq_exit\n");
527
528 cpufreq_unregister_driver(&acpi_cpufreq_driver);
529
530 return;
531}
532
533
534late_initcall(acpi_cpufreq_init);
535module_exit(acpi_cpufreq_exit);
536
537MODULE_ALIAS("acpi");
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
new file mode 100644
index 000000000000..04a405345203
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -0,0 +1,457 @@
1/*
2 * (C) 2004 Sebastian Witt <se.witt@gmx.net>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 * Based upon reverse engineered information
6 *
7 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/moduleparam.h>
13#include <linux/init.h>
14#include <linux/cpufreq.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17
18#define NFORCE2_XTAL 25
19#define NFORCE2_BOOTFSB 0x48
20#define NFORCE2_PLLENABLE 0xa8
21#define NFORCE2_PLLREG 0xa4
22#define NFORCE2_PLLADR 0xa0
23#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
24
25#define NFORCE2_MIN_FSB 50
26#define NFORCE2_SAFE_DISTANCE 50
27
28/* Delay in ms between FSB changes */
29//#define NFORCE2_DELAY 10
30
31/* nforce2_chipset:
32 * FSB is changed using the chipset
33 */
34static struct pci_dev *nforce2_chipset_dev;
35
36/* fid:
37 * multiplier * 10
38 */
39static int fid = 0;
40
41/* min_fsb, max_fsb:
42 * minimum and maximum FSB (= FSB at boot time)
43 */
44static int min_fsb = 0;
45static int max_fsb = 0;
46
47MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
48MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
49MODULE_LICENSE("GPL");
50
51module_param(fid, int, 0444);
52module_param(min_fsb, int, 0444);
53
54MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
55MODULE_PARM_DESC(min_fsb,
56 "Minimum FSB to use, if not defined: current FSB - 50");
57
58#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
59
60/*
61 * nforce2_calc_fsb - calculate FSB
62 * @pll: PLL value
63 *
64 * Calculates FSB from PLL value
65 */
66static int nforce2_calc_fsb(int pll)
67{
68 unsigned char mul, div;
69
70 mul = (pll >> 8) & 0xff;
71 div = pll & 0xff;
72
73 if (div > 0)
74 return NFORCE2_XTAL * mul / div;
75
76 return 0;
77}
78
79/*
80 * nforce2_calc_pll - calculate PLL value
81 * @fsb: FSB
82 *
83 * Calculate PLL value for given FSB
84 */
85static int nforce2_calc_pll(unsigned int fsb)
86{
87 unsigned char xmul, xdiv;
88 unsigned char mul = 0, div = 0;
89 int tried = 0;
90
91 /* Try to calculate multiplier and divider up to 4 times */
92 while (((mul == 0) || (div == 0)) && (tried <= 3)) {
93 for (xdiv = 1; xdiv <= 0x80; xdiv++)
94 for (xmul = 1; xmul <= 0xfe; xmul++)
95 if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
96 fsb + tried) {
97 mul = xmul;
98 div = xdiv;
99 }
100 tried++;
101 }
102
103 if ((mul == 0) || (div == 0))
104 return -1;
105
106 return NFORCE2_PLL(mul, div);
107}
108
109/*
110 * nforce2_write_pll - write PLL value to chipset
111 * @pll: PLL value
112 *
113 * Writes new FSB PLL value to chipset
114 */
115static void nforce2_write_pll(int pll)
116{
117 int temp;
118
119 /* Set the pll addr. to 0x00 */
120 temp = 0x00;
121 pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp);
122
123 /* Now write the value in all 64 registers */
124 for (temp = 0; temp <= 0x3f; temp++) {
125 pci_write_config_dword(nforce2_chipset_dev,
126 NFORCE2_PLLREG, pll);
127 }
128
129 return;
130}
131
132/*
133 * nforce2_fsb_read - Read FSB
134 *
135 * Read FSB from chipset
136 * If bootfsb != 0, return FSB at boot-time
137 */
138static unsigned int nforce2_fsb_read(int bootfsb)
139{
140 struct pci_dev *nforce2_sub5;
141 u32 fsb, temp = 0;
142
143
144 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
145 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
146 0x01EF,
147 PCI_ANY_ID,
148 PCI_ANY_ID,
149 NULL);
150
151 if (!nforce2_sub5)
152 return 0;
153
154 pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
155 fsb /= 1000000;
156
157 /* Check if PLL register is already set */
158 pci_read_config_byte(nforce2_chipset_dev,
159 NFORCE2_PLLENABLE, (u8 *)&temp);
160
161 if(bootfsb || !temp)
162 return fsb;
163
164 /* Use PLL register FSB value */
165 pci_read_config_dword(nforce2_chipset_dev,
166 NFORCE2_PLLREG, &temp);
167 fsb = nforce2_calc_fsb(temp);
168
169 return fsb;
170}
171
172/*
173 * nforce2_set_fsb - set new FSB
174 * @fsb: New FSB
175 *
176 * Sets new FSB
177 */
178static int nforce2_set_fsb(unsigned int fsb)
179{
180 u32 pll, temp = 0;
181 unsigned int tfsb;
182 int diff;
183
184 if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
185 printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
186 return -EINVAL;
187 }
188
189 tfsb = nforce2_fsb_read(0);
190 if (!tfsb) {
191 printk(KERN_ERR "cpufreq: Error while reading the FSB\n");
192 return -EINVAL;
193 }
194
195 /* First write? Then set actual value */
196 pci_read_config_byte(nforce2_chipset_dev,
197 NFORCE2_PLLENABLE, (u8 *)&temp);
198 if (!temp) {
199 pll = nforce2_calc_pll(tfsb);
200
201 if (pll < 0)
202 return -EINVAL;
203
204 nforce2_write_pll(pll);
205 }
206
207 /* Enable write access */
208 temp = 0x01;
209 pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp);
210
211 diff = tfsb - fsb;
212
213 if (!diff)
214 return 0;
215
216 while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
217 if (diff < 0)
218 tfsb++;
219 else
220 tfsb--;
221
222 /* Calculate the PLL reg. value */
223 if ((pll = nforce2_calc_pll(tfsb)) == -1)
224 return -EINVAL;
225
226 nforce2_write_pll(pll);
227#ifdef NFORCE2_DELAY
228 mdelay(NFORCE2_DELAY);
229#endif
230 }
231
232 temp = 0x40;
233 pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp);
234
235 return 0;
236}
237
238/**
239 * nforce2_get - get the CPU frequency
240 * @cpu: CPU number
241 *
242 * Returns the CPU frequency
243 */
244static unsigned int nforce2_get(unsigned int cpu)
245{
246 if (cpu)
247 return 0;
248 return nforce2_fsb_read(0) * fid * 100;
249}
250
251/**
252 * nforce2_target - set a new CPUFreq policy
253 * @policy: new policy
254 * @target_freq: the target frequency
255 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
256 *
257 * Sets a new CPUFreq policy.
258 */
259static int nforce2_target(struct cpufreq_policy *policy,
260 unsigned int target_freq, unsigned int relation)
261{
262// unsigned long flags;
263 struct cpufreq_freqs freqs;
264 unsigned int target_fsb;
265
266 if ((target_freq > policy->max) || (target_freq < policy->min))
267 return -EINVAL;
268
269 target_fsb = target_freq / (fid * 100);
270
271 freqs.old = nforce2_get(policy->cpu);
272 freqs.new = target_fsb * fid * 100;
273 freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */
274
275 if (freqs.old == freqs.new)
276 return 0;
277
278 dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n",
279 freqs.old, freqs.new);
280
281 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
282
283 /* Disable IRQs */
284 //local_irq_save(flags);
285
286 if (nforce2_set_fsb(target_fsb) < 0)
287 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
288 target_fsb);
289 else
290 dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n",
291 target_fsb);
292
293 /* Enable IRQs */
294 //local_irq_restore(flags);
295
296 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
297
298 return 0;
299}
300
301/**
302 * nforce2_verify - verifies a new CPUFreq policy
303 * @policy: new policy
304 */
305static int nforce2_verify(struct cpufreq_policy *policy)
306{
307 unsigned int fsb_pol_max;
308
309 fsb_pol_max = policy->max / (fid * 100);
310
311 if (policy->min < (fsb_pol_max * fid * 100))
312 policy->max = (fsb_pol_max + 1) * fid * 100;
313
314 cpufreq_verify_within_limits(policy,
315 policy->cpuinfo.min_freq,
316 policy->cpuinfo.max_freq);
317 return 0;
318}
319
320static int nforce2_cpu_init(struct cpufreq_policy *policy)
321{
322 unsigned int fsb;
323 unsigned int rfid;
324
325 /* capability check */
326 if (policy->cpu != 0)
327 return -ENODEV;
328
329 /* Get current FSB */
330 fsb = nforce2_fsb_read(0);
331
332 if (!fsb)
333 return -EIO;
334
335 /* FIX: Get FID from CPU */
336 if (!fid) {
337 if (!cpu_khz) {
338 printk(KERN_WARNING
339 "cpufreq: cpu_khz not set, can't calculate multiplier!\n");
340 return -ENODEV;
341 }
342
343 fid = cpu_khz / (fsb * 100);
344 rfid = fid % 5;
345
346 if (rfid) {
347 if (rfid > 2)
348 fid += 5 - rfid;
349 else
350 fid -= rfid;
351 }
352 }
353
354 printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb,
355 fid / 10, fid % 10);
356
357 /* Set maximum FSB to FSB at boot time */
358 max_fsb = nforce2_fsb_read(1);
359
360 if(!max_fsb)
361 return -EIO;
362
363 if (!min_fsb)
364 min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
365
366 if (min_fsb < NFORCE2_MIN_FSB)
367 min_fsb = NFORCE2_MIN_FSB;
368
369 /* cpuinfo and default policy values */
370 policy->cpuinfo.min_freq = min_fsb * fid * 100;
371 policy->cpuinfo.max_freq = max_fsb * fid * 100;
372 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
373 policy->cur = nforce2_get(policy->cpu);
374 policy->min = policy->cpuinfo.min_freq;
375 policy->max = policy->cpuinfo.max_freq;
376 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
377
378 return 0;
379}
380
381static int nforce2_cpu_exit(struct cpufreq_policy *policy)
382{
383 return 0;
384}
385
386static struct cpufreq_driver nforce2_driver = {
387 .name = "nforce2",
388 .verify = nforce2_verify,
389 .target = nforce2_target,
390 .get = nforce2_get,
391 .init = nforce2_cpu_init,
392 .exit = nforce2_cpu_exit,
393 .owner = THIS_MODULE,
394};
395
396/**
397 * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
398 *
399 * Detects nForce2 A2 and C1 stepping
400 *
401 */
402static unsigned int nforce2_detect_chipset(void)
403{
404 u8 revision;
405
406 nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
407 PCI_DEVICE_ID_NVIDIA_NFORCE2,
408 PCI_ANY_ID,
409 PCI_ANY_ID,
410 NULL);
411
412 if (nforce2_chipset_dev == NULL)
413 return -ENODEV;
414
415 pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision);
416
417 printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
418 revision);
419 printk(KERN_INFO
420 "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
421
422 return 0;
423}
424
425/**
426 * nforce2_init - initializes the nForce2 CPUFreq driver
427 *
428 * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
429 * devices, -EINVAL on problems during initiatization, and zero on
430 * success.
431 */
432static int __init nforce2_init(void)
433{
434 /* TODO: do we need to detect the processor? */
435
436 /* detect chipset */
437 if (nforce2_detect_chipset()) {
438 printk(KERN_ERR "cpufreq: No nForce2 chipset.\n");
439 return -ENODEV;
440 }
441
442 return cpufreq_register_driver(&nforce2_driver);
443}
444
445/**
446 * nforce2_exit - unregisters cpufreq module
447 *
448 * Unregisters nForce2 FSB change support.
449 */
450static void __exit nforce2_exit(void)
451{
452 cpufreq_unregister_driver(&nforce2_driver);
453}
454
455module_init(nforce2_init);
456module_exit(nforce2_exit);
457
diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
new file mode 100644
index 000000000000..3f7caa4ae6d6
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
@@ -0,0 +1,312 @@
1/*
2 * elanfreq: cpufreq driver for the AMD ELAN family
3 *
4 * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
5 *
6 * Parts of this code are (c) Sven Geggus <sven@geggus.net>
7 *
8 * All Rights Reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
16 *
17 */
18
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/init.h>
22
23#include <linux/slab.h>
24#include <linux/delay.h>
25#include <linux/cpufreq.h>
26
27#include <asm/msr.h>
28#include <asm/timex.h>
29#include <asm/io.h>
30
31#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */
32#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */
33
34/* Module parameter */
35static int max_freq;
36
37struct s_elan_multiplier {
38 int clock; /* frequency in kHz */
39 int val40h; /* PMU Force Mode register */
40 int val80h; /* CPU Clock Speed Register */
41};
42
43/*
44 * It is important that the frequencies
45 * are listed in ascending order here!
46 */
47struct s_elan_multiplier elan_multiplier[] = {
48 {1000, 0x02, 0x18},
49 {2000, 0x02, 0x10},
50 {4000, 0x02, 0x08},
51 {8000, 0x00, 0x00},
52 {16000, 0x00, 0x02},
53 {33000, 0x00, 0x04},
54 {66000, 0x01, 0x04},
55 {99000, 0x01, 0x05}
56};
57
58static struct cpufreq_frequency_table elanfreq_table[] = {
59 {0, 1000},
60 {1, 2000},
61 {2, 4000},
62 {3, 8000},
63 {4, 16000},
64 {5, 33000},
65 {6, 66000},
66 {7, 99000},
67 {0, CPUFREQ_TABLE_END},
68};
69
70
71/**
72 * elanfreq_get_cpu_frequency: determine current cpu speed
73 *
74 * Finds out at which frequency the CPU of the Elan SOC runs
75 * at the moment. Frequencies from 1 to 33 MHz are generated
76 * the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
77 * and have the rest of the chip running with 33 MHz.
78 */
79
80static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
81{
82 u8 clockspeed_reg; /* Clock Speed Register */
83
84 local_irq_disable();
85 outb_p(0x80,REG_CSCIR);
86 clockspeed_reg = inb_p(REG_CSCDR);
87 local_irq_enable();
88
89 if ((clockspeed_reg & 0xE0) == 0xE0) { return 0; }
90
91 /* Are we in CPU clock multiplied mode (66/99 MHz)? */
92 if ((clockspeed_reg & 0xE0) == 0xC0) {
93 if ((clockspeed_reg & 0x01) == 0) {
94 return 66000;
95 } else {
96 return 99000;
97 }
98 }
99
100 /* 33 MHz is not 32 MHz... */
101 if ((clockspeed_reg & 0xE0)==0xA0)
102 return 33000;
103
104 return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
105}
106
107
108/**
109 * elanfreq_set_cpu_frequency: Change the CPU core frequency
110 * @cpu: cpu number
111 * @freq: frequency in kHz
112 *
113 * This function takes a frequency value and changes the CPU frequency
114 * according to this. Note that the frequency has to be checked by
115 * elanfreq_validatespeed() for correctness!
116 *
117 * There is no return value.
118 */
119
120static void elanfreq_set_cpu_state (unsigned int state) {
121
122 struct cpufreq_freqs freqs;
123
124 freqs.old = elanfreq_get_cpu_frequency(0);
125 freqs.new = elan_multiplier[state].clock;
126 freqs.cpu = 0; /* elanfreq.c is UP only driver */
127
128 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
129
130 printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",elan_multiplier[state].clock);
131
132
133 /*
134 * Access to the Elan's internal registers is indexed via
135 * 0x22: Chip Setup & Control Register Index Register (CSCI)
136 * 0x23: Chip Setup & Control Register Data Register (CSCD)
137 *
138 */
139
140 /*
141 * 0x40 is the Power Management Unit's Force Mode Register.
142 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
143 */
144
145 local_irq_disable();
146 outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */
147 outb_p(0x00,REG_CSCDR);
148 local_irq_enable(); /* wait till internal pipelines and */
149 udelay(1000); /* buffers have cleaned up */
150
151 local_irq_disable();
152
153 /* now, set the CPU clock speed register (0x80) */
154 outb_p(0x80,REG_CSCIR);
155 outb_p(elan_multiplier[state].val80h,REG_CSCDR);
156
157 /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
158 outb_p(0x40,REG_CSCIR);
159 outb_p(elan_multiplier[state].val40h,REG_CSCDR);
160 udelay(10000);
161 local_irq_enable();
162
163 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
164};
165
166
167/**
168 * elanfreq_validatespeed: test if frequency range is valid
169 * @policy: the policy to validate
170 *
171 * This function checks if a given frequency range in kHz is valid
172 * for the hardware supported by the driver.
173 */
174
175static int elanfreq_verify (struct cpufreq_policy *policy)
176{
177 return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
178}
179
180static int elanfreq_target (struct cpufreq_policy *policy,
181 unsigned int target_freq,
182 unsigned int relation)
183{
184 unsigned int newstate = 0;
185
186 if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate))
187 return -EINVAL;
188
189 elanfreq_set_cpu_state(newstate);
190
191 return 0;
192}
193
194
195/*
196 * Module init and exit code
197 */
198
199static int elanfreq_cpu_init(struct cpufreq_policy *policy)
200{
201 struct cpuinfo_x86 *c = cpu_data;
202 unsigned int i;
203 int result;
204
205 /* capability check */
206 if ((c->x86_vendor != X86_VENDOR_AMD) ||
207 (c->x86 != 4) || (c->x86_model!=10))
208 return -ENODEV;
209
210 /* max freq */
211 if (!max_freq)
212 max_freq = elanfreq_get_cpu_frequency(0);
213
214 /* table init */
215 for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
216 if (elanfreq_table[i].frequency > max_freq)
217 elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
218 }
219
220 /* cpuinfo and default policy values */
221 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
222 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
223 policy->cur = elanfreq_get_cpu_frequency(0);
224
225 result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
226 if (result)
227 return (result);
228
229 cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
230
231 return 0;
232}
233
234
235static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
236{
237 cpufreq_frequency_table_put_attr(policy->cpu);
238 return 0;
239}
240
241
242#ifndef MODULE
243/**
244 * elanfreq_setup - elanfreq command line parameter parsing
245 *
246 * elanfreq command line parameter. Use:
247 * elanfreq=66000
248 * to set the maximum CPU frequency to 66 MHz. Note that in
249 * case you do not give this boot parameter, the maximum
250 * frequency will fall back to _current_ CPU frequency which
251 * might be lower. If you build this as a module, use the
252 * max_freq module parameter instead.
253 */
254static int __init elanfreq_setup(char *str)
255{
256 max_freq = simple_strtoul(str, &str, 0);
257 printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
258 return 1;
259}
260__setup("elanfreq=", elanfreq_setup);
261#endif
262
263
264static struct freq_attr* elanfreq_attr[] = {
265 &cpufreq_freq_attr_scaling_available_freqs,
266 NULL,
267};
268
269
270static struct cpufreq_driver elanfreq_driver = {
271 .get = elanfreq_get_cpu_frequency,
272 .verify = elanfreq_verify,
273 .target = elanfreq_target,
274 .init = elanfreq_cpu_init,
275 .exit = elanfreq_cpu_exit,
276 .name = "elanfreq",
277 .owner = THIS_MODULE,
278 .attr = elanfreq_attr,
279};
280
281
282static int __init elanfreq_init(void)
283{
284 struct cpuinfo_x86 *c = cpu_data;
285
286 /* Test if we have the right hardware */
287 if ((c->x86_vendor != X86_VENDOR_AMD) ||
288 (c->x86 != 4) || (c->x86_model!=10))
289 {
290 printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
291 return -ENODEV;
292 }
293
294 return cpufreq_register_driver(&elanfreq_driver);
295}
296
297
298static void __exit elanfreq_exit(void)
299{
300 cpufreq_unregister_driver(&elanfreq_driver);
301}
302
303
304module_param (max_freq, int, 0444);
305
306MODULE_LICENSE("GPL");
307MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
308MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
309
310module_init(elanfreq_init);
311module_exit(elanfreq_exit);
312
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
new file mode 100644
index 000000000000..1a49adb1f4a6
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -0,0 +1,502 @@
1/*
2 * Cyrix MediaGX and NatSemi Geode Suspend Modulation
3 * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
4 * (C) 2002 Hiroshi Miura <miura@da-cha.org>
5 * All Rights Reserved
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation
10 *
11 * The author(s) of this software shall not be held liable for damages
12 * of any nature resulting due to the use of this software. This
13 * software is provided AS-IS with no warranties.
14 *
15 * Theoritical note:
16 *
17 * (see Geode(tm) CS5530 manual (rev.4.1) page.56)
18 *
19 * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
20 * are based on Suspend Moduration.
21 *
22 * Suspend Modulation works by asserting and de-asserting the SUSP# pin
23 * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
24 * the CPU enters an idle state. GX1 stops its core clock when SUSP# is
25 * asserted then power consumption is reduced.
26 *
27 * Suspend Modulation's OFF/ON duration are configurable
28 * with 'Suspend Modulation OFF Count Register'
29 * and 'Suspend Modulation ON Count Register'.
30 * These registers are 8bit counters that represent the number of
31 * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
32 * to the processor.
33 *
34 * These counters define a ratio which is the effective frequency
35 * of operation of the system.
36 *
37 * OFF Count
38 * F_eff = Fgx * ----------------------
39 * OFF Count + ON Count
40 *
41 * 0 <= On Count, Off Count <= 255
42 *
43 * From these limits, we can get register values
44 *
45 * off_duration + on_duration <= MAX_DURATION
46 * on_duration = off_duration * (stock_freq - freq) / freq
47 *
48 * off_duration = (freq * DURATION) / stock_freq
49 * on_duration = DURATION - off_duration
50 *
51 *
52 *---------------------------------------------------------------------------
53 *
54 * ChangeLog:
55 * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org>
56 * - fix on/off register mistake
57 * - fix cpu_khz calc when it stops cpu modulation.
58 *
59 * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org>
60 * - rewrite for Cyrix MediaGX Cx5510/5520 and
61 * NatSemi Geode Cs5530(A).
62 *
63 * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com>
64 * - cs5530_mod patch for 2.4.19-rc1.
65 *
66 *---------------------------------------------------------------------------
67 *
68 * Todo
69 * Test on machines with 5510, 5530, 5530A
70 */
71
72/************************************************************************
73 * Suspend Modulation - Definitions *
74 ************************************************************************/
75
76#include <linux/kernel.h>
77#include <linux/module.h>
78#include <linux/init.h>
79#include <linux/smp.h>
80#include <linux/cpufreq.h>
81#include <linux/pci.h>
82#include <asm/processor.h>
83#include <asm/errno.h>
84
85/* PCI config registers, all at F0 */
86#define PCI_PMER1 0x80 /* power management enable register 1 */
87#define PCI_PMER2 0x81 /* power management enable register 2 */
88#define PCI_PMER3 0x82 /* power management enable register 3 */
89#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */
90#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */
91#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */
92#define PCI_MODON 0x95 /* suspend modulation ON counter register */
93#define PCI_SUSCFG 0x96 /* suspend configuration register */
94
95/* PMER1 bits */
96#define GPM (1<<0) /* global power management */
97#define GIT (1<<1) /* globally enable PM device idle timers */
98#define GTR (1<<2) /* globally enable IO traps */
99#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */
100#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */
101
102/* SUSCFG bits */
103#define SUSMOD (1<<0) /* enable/disable suspend modulation */
104/* the belows support only with cs5530 (after rev.1.2)/cs5530A */
105#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */
106 /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
107#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
108/* the belows support only with cs5530A */
109#define PWRSVE_ISA (1<<3) /* stop ISA clock */
110#define PWRSVE (1<<4) /* active idle */
111
112struct gxfreq_params {
113 u8 on_duration;
114 u8 off_duration;
115 u8 pci_suscfg;
116 u8 pci_pmer1;
117 u8 pci_pmer2;
118 u8 pci_rev;
119 struct pci_dev *cs55x0;
120};
121
122static struct gxfreq_params *gx_params;
123static int stock_freq;
124
125/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
126static int pci_busclk = 0;
127module_param (pci_busclk, int, 0444);
128
129/* maximum duration for which the cpu may be suspended
130 * (32us * MAX_DURATION). If no parameter is given, this defaults
131 * to 255.
132 * Note that this leads to a maximum of 8 ms(!) where the CPU clock
133 * is suspended -- processing power is just 0.39% of what it used to be,
134 * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
135static int max_duration = 255;
136module_param (max_duration, int, 0444);
137
138/* For the default policy, we want at least some processing power
139 * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
140 */
141#define POLICY_MIN_DIV 20
142
143
144#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg)
145
146/**
147 * we can detect a core multipiler from dir0_lsb
148 * from GX1 datasheet p.56,
149 * MULT[3:0]:
150 * 0000 = SYSCLK multiplied by 4 (test only)
151 * 0001 = SYSCLK multiplied by 10
152 * 0010 = SYSCLK multiplied by 4
153 * 0011 = SYSCLK multiplied by 6
154 * 0100 = SYSCLK multiplied by 9
155 * 0101 = SYSCLK multiplied by 5
156 * 0110 = SYSCLK multiplied by 7
157 * 0111 = SYSCLK multiplied by 8
158 * of 33.3MHz
159 **/
160static int gx_freq_mult[16] = {
161 4, 10, 4, 6, 9, 5, 7, 8,
162 0, 0, 0, 0, 0, 0, 0, 0
163};
164
165
166/****************************************************************
167 * Low Level chipset interface *
168 ****************************************************************/
169static struct pci_device_id gx_chipset_tbl[] __initdata = {
170 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID },
171 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID },
172 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID },
173 { 0, },
174};
175
176/**
177 * gx_detect_chipset:
178 *
179 **/
180static __init struct pci_dev *gx_detect_chipset(void)
181{
182 struct pci_dev *gx_pci = NULL;
183
184 /* check if CPU is a MediaGX or a Geode. */
185 if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
186 (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
187 dprintk("error: no MediaGX/Geode processor found!\n");
188 return NULL;
189 }
190
191 /* detect which companion chip is used */
192 while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
193 if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) {
194 return gx_pci;
195 }
196 }
197
198 dprintk("error: no supported chipset found!\n");
199 return NULL;
200}
201
202/**
203 * gx_get_cpuspeed:
204 *
205 * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs.
206 */
207static unsigned int gx_get_cpuspeed(unsigned int cpu)
208{
209 if ((gx_params->pci_suscfg & SUSMOD) == 0)
210 return stock_freq;
211
212 return (stock_freq * gx_params->off_duration)
213 / (gx_params->on_duration + gx_params->off_duration);
214}
215
216/**
217 * gx_validate_speed:
218 * determine current cpu speed
219 *
220**/
221
222static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration)
223{
224 unsigned int i;
225 u8 tmp_on, tmp_off;
226 int old_tmp_freq = stock_freq;
227 int tmp_freq;
228
229 *off_duration=1;
230 *on_duration=0;
231
232 for (i=max_duration; i>0; i--) {
233 tmp_off = ((khz * i) / stock_freq) & 0xff;
234 tmp_on = i - tmp_off;
235 tmp_freq = (stock_freq * tmp_off) / i;
236 /* if this relation is closer to khz, use this. If it's equal,
237 * prefer it, too - lower latency */
238 if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
239 *on_duration = tmp_on;
240 *off_duration = tmp_off;
241 old_tmp_freq = tmp_freq;
242 }
243 }
244
245 return old_tmp_freq;
246}
247
248
249/**
250 * gx_set_cpuspeed:
251 * set cpu speed in khz.
252 **/
253
254static void gx_set_cpuspeed(unsigned int khz)
255{
256 u8 suscfg, pmer1;
257 unsigned int new_khz;
258 unsigned long flags;
259 struct cpufreq_freqs freqs;
260
261
262 freqs.cpu = 0;
263 freqs.old = gx_get_cpuspeed(0);
264
265 new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration);
266
267 freqs.new = new_khz;
268
269 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
270 local_irq_save(flags);
271
272 if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */
273 switch (gx_params->cs55x0->device) {
274 case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
275 pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
276 /* FIXME: need to test other values -- Zwane,Miura */
277 pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */
278 pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
279 pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
280
281 if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */
282 suscfg = gx_params->pci_suscfg | SUSMOD;
283 } else { /* CS5530A,B.. */
284 suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
285 }
286 break;
287 case PCI_DEVICE_ID_CYRIX_5520:
288 case PCI_DEVICE_ID_CYRIX_5510:
289 suscfg = gx_params->pci_suscfg | SUSMOD;
290 break;
291 default:
292 local_irq_restore(flags);
293 dprintk("fatal: try to set unknown chipset.\n");
294 return;
295 }
296 } else {
297 suscfg = gx_params->pci_suscfg & ~(SUSMOD);
298 gx_params->off_duration = 0;
299 gx_params->on_duration = 0;
300 dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n");
301 }
302
303 pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration);
304 pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration);
305
306 pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg);
307 pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
308
309 local_irq_restore(flags);
310
311 gx_params->pci_suscfg = suscfg;
312
313 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
314
315 dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
316 gx_params->on_duration * 32, gx_params->off_duration * 32);
317 dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
318}
319
320/****************************************************************
321 * High level functions *
322 ****************************************************************/
323
324/*
325 * cpufreq_gx_verify: test if frequency range is valid
326 *
327 * This function checks if a given frequency range in kHz is valid
328 * for the hardware supported by the driver.
329 */
330
331static int cpufreq_gx_verify(struct cpufreq_policy *policy)
332{
333 unsigned int tmp_freq = 0;
334 u8 tmp1, tmp2;
335
336 if (!stock_freq || !policy)
337 return -EINVAL;
338
339 policy->cpu = 0;
340 cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
341
342 /* it needs to be assured that at least one supported frequency is
343 * within policy->min and policy->max. If it is not, policy->max
344 * needs to be increased until one freuqency is supported.
345 * policy->min may not be decreased, though. This way we guarantee a
346 * specific processing capacity.
347 */
348 tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
349 if (tmp_freq < policy->min)
350 tmp_freq += stock_freq / max_duration;
351 policy->min = tmp_freq;
352 if (policy->min > policy->max)
353 policy->max = tmp_freq;
354 tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
355 if (tmp_freq > policy->max)
356 tmp_freq -= stock_freq / max_duration;
357 policy->max = tmp_freq;
358 if (policy->max < policy->min)
359 policy->max = policy->min;
360 cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
361
362 return 0;
363}
364
365/*
366 * cpufreq_gx_target:
367 *
368 */
369static int cpufreq_gx_target(struct cpufreq_policy *policy,
370 unsigned int target_freq,
371 unsigned int relation)
372{
373 u8 tmp1, tmp2;
374 unsigned int tmp_freq;
375
376 if (!stock_freq || !policy)
377 return -EINVAL;
378
379 policy->cpu = 0;
380
381 tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
382 while (tmp_freq < policy->min) {
383 tmp_freq += stock_freq / max_duration;
384 tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
385 }
386 while (tmp_freq > policy->max) {
387 tmp_freq -= stock_freq / max_duration;
388 tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
389 }
390
391 gx_set_cpuspeed(tmp_freq);
392
393 return 0;
394}
395
396static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
397{
398 unsigned int maxfreq, curfreq;
399
400 if (!policy || policy->cpu != 0)
401 return -ENODEV;
402
403 /* determine maximum frequency */
404 if (pci_busclk) {
405 maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
406 } else if (cpu_khz) {
407 maxfreq = cpu_khz;
408 } else {
409 maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
410 }
411 stock_freq = maxfreq;
412 curfreq = gx_get_cpuspeed(0);
413
414 dprintk("cpu max frequency is %d.\n", maxfreq);
415 dprintk("cpu current frequency is %dkHz.\n",curfreq);
416
417 /* setup basic struct for cpufreq API */
418 policy->cpu = 0;
419
420 if (max_duration < POLICY_MIN_DIV)
421 policy->min = maxfreq / max_duration;
422 else
423 policy->min = maxfreq / POLICY_MIN_DIV;
424 policy->max = maxfreq;
425 policy->cur = curfreq;
426 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
427 policy->cpuinfo.min_freq = maxfreq / max_duration;
428 policy->cpuinfo.max_freq = maxfreq;
429 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
430
431 return 0;
432}
433
434/*
435 * cpufreq_gx_init:
436 * MediaGX/Geode GX initialize cpufreq driver
437 */
438static struct cpufreq_driver gx_suspmod_driver = {
439 .get = gx_get_cpuspeed,
440 .verify = cpufreq_gx_verify,
441 .target = cpufreq_gx_target,
442 .init = cpufreq_gx_cpu_init,
443 .name = "gx-suspmod",
444 .owner = THIS_MODULE,
445};
446
447static int __init cpufreq_gx_init(void)
448{
449 int ret;
450 struct gxfreq_params *params;
451 struct pci_dev *gx_pci;
452 u32 class_rev;
453
454 /* Test if we have the right hardware */
455 if ((gx_pci = gx_detect_chipset()) == NULL)
456 return -ENODEV;
457
458 /* check whether module parameters are sane */
459 if (max_duration > 0xff)
460 max_duration = 0xff;
461
462 dprintk("geode suspend modulation available.\n");
463
464 params = kmalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
465 if (params == NULL)
466 return -ENOMEM;
467 memset(params, 0, sizeof(struct gxfreq_params));
468
469 params->cs55x0 = gx_pci;
470 gx_params = params;
471
472 /* keep cs55x0 configurations */
473 pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
474 pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
475 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
476 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
477 pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
478 pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev);
479 params->pci_rev = class_rev && 0xff;
480
481 if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
482 kfree(params);
483 return ret; /* register error! */
484 }
485
486 return 0;
487}
488
489static void __exit cpufreq_gx_exit(void)
490{
491 cpufreq_unregister_driver(&gx_suspmod_driver);
492 pci_dev_put(gx_params->cs55x0);
493 kfree(gx_params);
494}
495
496MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>");
497MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
498MODULE_LICENSE ("GPL");
499
500module_init(cpufreq_gx_init);
501module_exit(cpufreq_gx_exit);
502
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
new file mode 100644
index 000000000000..ab0f9f5aac11
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -0,0 +1,658 @@
1/*
2 * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk>
3 * (C) 2002 Padraig Brady. <padraig@antefacto.com>
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 * Based upon datasheets & sample CPUs kindly provided by VIA.
7 *
8 * VIA have currently 3 different versions of Longhaul.
9 * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
10 * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
11 * Version 2 of longhaul is the same as v1, but adds voltage scaling.
12 * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C)
13 * voltage scaling support has currently been disabled in this driver
14 * until we have code that gets it right.
15 * Version 3 of longhaul got renamed to Powersaver and redesigned
16 * to use the POWERSAVER MSR at 0x110a.
17 * It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
18 * It's pretty much the same feature wise to longhaul v2, though
19 * there is provision for scaling FSB too, but this doesn't work
20 * too well in practice so we don't even try to use this.
21 *
22 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
23 */
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/moduleparam.h>
28#include <linux/init.h>
29#include <linux/cpufreq.h>
30#include <linux/slab.h>
31#include <linux/string.h>
32
33#include <asm/msr.h>
34#include <asm/timex.h>
35#include <asm/io.h>
36
37#include "longhaul.h"
38
39#define PFX "longhaul: "
40
41#define TYPE_LONGHAUL_V1 1
42#define TYPE_LONGHAUL_V2 2
43#define TYPE_POWERSAVER 3
44
45#define CPU_SAMUEL 1
46#define CPU_SAMUEL2 2
47#define CPU_EZRA 3
48#define CPU_EZRA_T 4
49#define CPU_NEHEMIAH 5
50
51static int cpu_model;
52static unsigned int numscales=16, numvscales;
53static unsigned int fsb;
54static int minvid, maxvid;
55static unsigned int minmult, maxmult;
56static int can_scale_voltage;
57static int vrmrev;
58
59/* Module parameters */
60static int dont_scale_voltage;
61
62
63#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
64
65
66#define __hlt() __asm__ __volatile__("hlt": : :"memory")
67
68/* Clock ratios multiplied by 10 */
69static int clock_ratio[32];
70static int eblcr_table[32];
71static int voltage_table[32];
72static unsigned int highest_speed, lowest_speed; /* kHz */
73static int longhaul_version;
74static struct cpufreq_frequency_table *longhaul_table;
75
76#ifdef CONFIG_CPU_FREQ_DEBUG
77static char speedbuffer[8];
78
79static char *print_speed(int speed)
80{
81 if (speed > 1000) {
82 if (speed%1000 == 0)
83 sprintf (speedbuffer, "%dGHz", speed/1000);
84 else
85 sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100);
86 } else
87 sprintf (speedbuffer, "%dMHz", speed);
88
89 return speedbuffer;
90}
91#endif
92
93
94static unsigned int calc_speed(int mult)
95{
96 int khz;
97 khz = (mult/10)*fsb;
98 if (mult%10)
99 khz += fsb/2;
100 khz *= 1000;
101 return khz;
102}
103
104
105static int longhaul_get_cpu_mult(void)
106{
107 unsigned long invalue=0,lo, hi;
108
109 rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
110 invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22;
111 if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) {
112 if (lo & (1<<27))
113 invalue+=16;
114 }
115 return eblcr_table[invalue];
116}
117
118
119static void do_powersaver(union msr_longhaul *longhaul,
120 unsigned int clock_ratio_index)
121{
122 int version;
123
124 switch (cpu_model) {
125 case CPU_EZRA_T:
126 version = 3;
127 break;
128 case CPU_NEHEMIAH:
129 version = 0xf;
130 break;
131 default:
132 return;
133 }
134
135 rdmsrl(MSR_VIA_LONGHAUL, longhaul->val);
136 longhaul->bits.SoftBusRatio = clock_ratio_index & 0xf;
137 longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
138 longhaul->bits.EnableSoftBusRatio = 1;
139 longhaul->bits.RevisionKey = 0;
140 local_irq_disable();
141 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
142 local_irq_enable();
143 __hlt();
144
145 rdmsrl(MSR_VIA_LONGHAUL, longhaul->val);
146 longhaul->bits.EnableSoftBusRatio = 0;
147 longhaul->bits.RevisionKey = version;
148 local_irq_disable();
149 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
150 local_irq_enable();
151}
152
153/**
154 * longhaul_set_cpu_frequency()
155 * @clock_ratio_index : bitpattern of the new multiplier.
156 *
157 * Sets a new clock ratio.
158 */
159
160static void longhaul_setstate(unsigned int clock_ratio_index)
161{
162 int speed, mult;
163 struct cpufreq_freqs freqs;
164 union msr_longhaul longhaul;
165 union msr_bcr2 bcr2;
166 static unsigned int old_ratio=-1;
167
168 if (old_ratio == clock_ratio_index)
169 return;
170 old_ratio = clock_ratio_index;
171
172 mult = clock_ratio[clock_ratio_index];
173 if (mult == -1)
174 return;
175
176 speed = calc_speed(mult);
177 if ((speed > highest_speed) || (speed < lowest_speed))
178 return;
179
180 freqs.old = calc_speed(longhaul_get_cpu_mult());
181 freqs.new = speed;
182 freqs.cpu = 0; /* longhaul.c is UP only driver */
183
184 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
185
186 dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
187 fsb, mult/10, mult%10, print_speed(speed/1000));
188
189 switch (longhaul_version) {
190
191 /*
192 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
193 * Software controlled multipliers only.
194 *
195 * *NB* Until we get voltage scaling working v1 & v2 are the same code.
196 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C]
197 */
198 case TYPE_LONGHAUL_V1:
199 case TYPE_LONGHAUL_V2:
200 rdmsrl (MSR_VIA_BCR2, bcr2.val);
201 /* Enable software clock multiplier */
202 bcr2.bits.ESOFTBF = 1;
203 bcr2.bits.CLOCKMUL = clock_ratio_index;
204 local_irq_disable();
205 wrmsrl (MSR_VIA_BCR2, bcr2.val);
206 local_irq_enable();
207
208 __hlt();
209
210 /* Disable software clock multiplier */
211 rdmsrl (MSR_VIA_BCR2, bcr2.val);
212 bcr2.bits.ESOFTBF = 0;
213 local_irq_disable();
214 wrmsrl (MSR_VIA_BCR2, bcr2.val);
215 local_irq_enable();
216 break;
217
218 /*
219 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
220 * We can scale voltage with this too, but that's currently
221 * disabled until we come up with a decent 'match freq to voltage'
222 * algorithm.
223 * When we add voltage scaling, we will also need to do the
224 * voltage/freq setting in order depending on the direction
225 * of scaling (like we do in powernow-k7.c)
226 * Nehemiah can do FSB scaling too, but this has never been proven
227 * to work in practice.
228 */
229 case TYPE_POWERSAVER:
230 do_powersaver(&longhaul, clock_ratio_index);
231 break;
232 }
233
234 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
235}
236
237/*
238 * Centaur decided to make life a little more tricky.
239 * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
240 * Samuel2 and above have to try and guess what the FSB is.
241 * We do this by assuming we booted at maximum multiplier, and interpolate
242 * between that value multiplied by possible FSBs and cpu_mhz which
243 * was calculated at boot time. Really ugly, but no other way to do this.
244 */
245
246#define ROUNDING 0xf
247
248static int _guess(int guess)
249{
250 int target;
251
252 target = ((maxmult/10)*guess);
253 if (maxmult%10 != 0)
254 target += (guess/2);
255 target += ROUNDING/2;
256 target &= ~ROUNDING;
257 return target;
258}
259
260
261static int guess_fsb(void)
262{
263 int speed = (cpu_khz/1000);
264 int i;
265 int speeds[3] = { 66, 100, 133 };
266
267 speed += ROUNDING/2;
268 speed &= ~ROUNDING;
269
270 for (i=0; i<3; i++) {
271 if (_guess(speeds[i]) == speed)
272 return speeds[i];
273 }
274 return 0;
275}
276
277
278static int __init longhaul_get_ranges(void)
279{
280 unsigned long invalue;
281 unsigned int multipliers[32]= {
282 50,30,40,100,55,35,45,95,90,70,80,60,120,75,85,65,
283 -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 };
284 unsigned int j, k = 0;
285 union msr_longhaul longhaul;
286 unsigned long lo, hi;
287 unsigned int eblcr_fsb_table_v1[] = { 66, 133, 100, -1 };
288 unsigned int eblcr_fsb_table_v2[] = { 133, 100, -1, 66 };
289
290 switch (longhaul_version) {
291 case TYPE_LONGHAUL_V1:
292 case TYPE_LONGHAUL_V2:
293 /* Ugh, Longhaul v1 didn't have the min/max MSRs.
294 Assume min=3.0x & max = whatever we booted at. */
295 minmult = 30;
296 maxmult = longhaul_get_cpu_mult();
297 rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
298 invalue = (lo & (1<<18|1<<19)) >>18;
299 if (cpu_model==CPU_SAMUEL || cpu_model==CPU_SAMUEL2)
300 fsb = eblcr_fsb_table_v1[invalue];
301 else
302 fsb = guess_fsb();
303 break;
304
305 case TYPE_POWERSAVER:
306 /* Ezra-T */
307 if (cpu_model==CPU_EZRA_T) {
308 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
309 invalue = longhaul.bits.MaxMHzBR;
310 if (longhaul.bits.MaxMHzBR4)
311 invalue += 16;
312 maxmult=multipliers[invalue];
313
314 invalue = longhaul.bits.MinMHzBR;
315 if (longhaul.bits.MinMHzBR4 == 1)
316 minmult = 30;
317 else
318 minmult = multipliers[invalue];
319 fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB];
320 break;
321 }
322
323 /* Nehemiah */
324 if (cpu_model==CPU_NEHEMIAH) {
325 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
326
327 /*
328 * TODO: This code works, but raises a lot of questions.
329 * - Some Nehemiah's seem to have broken Min/MaxMHzBR's.
330 * We get around this by using a hardcoded multiplier of 4.0x
331 * for the minimimum speed, and the speed we booted up at for the max.
332 * This is done in longhaul_get_cpu_mult() by reading the EBLCR register.
333 * - According to some VIA documentation EBLCR is only
334 * in pre-Nehemiah C3s. How this still works is a mystery.
335 * We're possibly using something undocumented and unsupported,
336 * But it works, so we don't grumble.
337 */
338 minmult=40;
339 maxmult=longhaul_get_cpu_mult();
340
341 /* Starting with the 1.2GHz parts, theres a 200MHz bus. */
342 if ((cpu_khz/1000) > 1200)
343 fsb = 200;
344 else
345 fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB];
346 break;
347 }
348 }
349
350 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
351 minmult/10, minmult%10, maxmult/10, maxmult%10);
352
353 if (fsb == -1) {
354 printk (KERN_INFO PFX "Invalid (reserved) FSB!\n");
355 return -EINVAL;
356 }
357
358 highest_speed = calc_speed(maxmult);
359 lowest_speed = calc_speed(minmult);
360 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
361 print_speed(lowest_speed/1000),
362 print_speed(highest_speed/1000));
363
364 if (lowest_speed == highest_speed) {
365 printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n");
366 return -EINVAL;
367 }
368 if (lowest_speed > highest_speed) {
369 printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
370 lowest_speed, highest_speed);
371 return -EINVAL;
372 }
373
374 longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL);
375 if(!longhaul_table)
376 return -ENOMEM;
377
378 for (j=0; j < numscales; j++) {
379 unsigned int ratio;
380 ratio = clock_ratio[j];
381 if (ratio == -1)
382 continue;
383 if (ratio > maxmult || ratio < minmult)
384 continue;
385 longhaul_table[k].frequency = calc_speed(ratio);
386 longhaul_table[k].index = j;
387 k++;
388 }
389
390 longhaul_table[k].frequency = CPUFREQ_TABLE_END;
391 if (!k) {
392 kfree (longhaul_table);
393 return -EINVAL;
394 }
395
396 return 0;
397}
398
399
400static void __init longhaul_setup_voltagescaling(void)
401{
402 union msr_longhaul longhaul;
403
404 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
405
406 if (!(longhaul.bits.RevisionID & 1))
407 return;
408
409 minvid = longhaul.bits.MinimumVID;
410 maxvid = longhaul.bits.MaximumVID;
411 vrmrev = longhaul.bits.VRMRev;
412
413 if (minvid == 0 || maxvid == 0) {
414 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
415 "Voltage scaling disabled.\n",
416 minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
417 return;
418 }
419
420 if (minvid == maxvid) {
421 printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
422 "both %d.%03d. Voltage scaling disabled\n",
423 maxvid/1000, maxvid%1000);
424 return;
425 }
426
427 if (vrmrev==0) {
428 dprintk ("VRM 8.5 \n");
429 memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
430 numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
431 } else {
432 dprintk ("Mobile VRM \n");
433 memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
434 numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
435 }
436
437 /* Current voltage isn't readable at first, so we need to
438 set it to a known value. The spec says to use maxvid */
439 longhaul.bits.RevisionKey = longhaul.bits.RevisionID; /* FIXME: This is bad. */
440 longhaul.bits.EnableSoftVID = 1;
441 longhaul.bits.SoftVID = maxvid;
442 wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
443
444 minvid = voltage_table[minvid];
445 maxvid = voltage_table[maxvid];
446
447 dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
448 maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
449
450 can_scale_voltage = 1;
451}
452
453
454static int longhaul_verify(struct cpufreq_policy *policy)
455{
456 return cpufreq_frequency_table_verify(policy, longhaul_table);
457}
458
459
460static int longhaul_target(struct cpufreq_policy *policy,
461 unsigned int target_freq, unsigned int relation)
462{
463 unsigned int table_index = 0;
464 unsigned int new_clock_ratio = 0;
465
466 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
467 return -EINVAL;
468
469 new_clock_ratio = longhaul_table[table_index].index & 0xFF;
470
471 longhaul_setstate(new_clock_ratio);
472
473 return 0;
474}
475
476
477static unsigned int longhaul_get(unsigned int cpu)
478{
479 if (cpu)
480 return 0;
481 return calc_speed(longhaul_get_cpu_mult());
482}
483
484
485static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
486{
487 struct cpuinfo_x86 *c = cpu_data;
488 char *cpuname=NULL;
489 int ret;
490
491 switch (c->x86_model) {
492 case 6:
493 cpu_model = CPU_SAMUEL;
494 cpuname = "C3 'Samuel' [C5A]";
495 longhaul_version = TYPE_LONGHAUL_V1;
496 memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
497 memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr));
498 break;
499
500 case 7:
501 longhaul_version = TYPE_LONGHAUL_V1;
502 switch (c->x86_mask) {
503 case 0:
504 cpu_model = CPU_SAMUEL2;
505 cpuname = "C3 'Samuel 2' [C5B]";
506 /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */
507 memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
508 memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr));
509 break;
510 case 1 ... 15:
511 if (c->x86_mask < 8) {
512 cpu_model = CPU_SAMUEL2;
513 cpuname = "C3 'Samuel 2' [C5B]";
514 } else {
515 cpu_model = CPU_EZRA;
516 cpuname = "C3 'Ezra' [C5C]";
517 }
518 memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio));
519 memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr));
520 break;
521 }
522 break;
523
524 case 8:
525 cpu_model = CPU_EZRA_T;
526 cpuname = "C3 'Ezra-T' [C5M]";
527 longhaul_version = TYPE_POWERSAVER;
528 numscales=32;
529 memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio));
530 memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr));
531 break;
532
533 case 9:
534 cpu_model = CPU_NEHEMIAH;
535 longhaul_version = TYPE_POWERSAVER;
536 numscales=32;
537 switch (c->x86_mask) {
538 case 0 ... 1:
539 cpuname = "C3 'Nehemiah A' [C5N]";
540 memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio));
541 memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr));
542 break;
543 case 2 ... 4:
544 cpuname = "C3 'Nehemiah B' [C5N]";
545 memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio));
546 memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr));
547 break;
548 case 5 ... 15:
549 cpuname = "C3 'Nehemiah C' [C5N]";
550 memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio));
551 memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr));
552 break;
553 }
554 break;
555
556 default:
557 cpuname = "Unknown";
558 break;
559 }
560
561 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
562 switch (longhaul_version) {
563 case TYPE_LONGHAUL_V1:
564 case TYPE_LONGHAUL_V2:
565 printk ("Longhaul v%d supported.\n", longhaul_version);
566 break;
567 case TYPE_POWERSAVER:
568 printk ("Powersaver supported.\n");
569 break;
570 };
571
572 ret = longhaul_get_ranges();
573 if (ret != 0)
574 return ret;
575
576 if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
577 (dont_scale_voltage==0))
578 longhaul_setup_voltagescaling();
579
580 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
581 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
582 policy->cur = calc_speed(longhaul_get_cpu_mult());
583
584 ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
585 if (ret)
586 return ret;
587
588 cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
589
590 return 0;
591}
592
593static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
594{
595 cpufreq_frequency_table_put_attr(policy->cpu);
596 return 0;
597}
598
599static struct freq_attr* longhaul_attr[] = {
600 &cpufreq_freq_attr_scaling_available_freqs,
601 NULL,
602};
603
604static struct cpufreq_driver longhaul_driver = {
605 .verify = longhaul_verify,
606 .target = longhaul_target,
607 .get = longhaul_get,
608 .init = longhaul_cpu_init,
609 .exit = __devexit_p(longhaul_cpu_exit),
610 .name = "longhaul",
611 .owner = THIS_MODULE,
612 .attr = longhaul_attr,
613};
614
615
616static int __init longhaul_init(void)
617{
618 struct cpuinfo_x86 *c = cpu_data;
619
620 if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
621 return -ENODEV;
622
623 switch (c->x86_model) {
624 case 6 ... 9:
625 return cpufreq_register_driver(&longhaul_driver);
626 default:
627 printk (KERN_INFO PFX "Unknown VIA CPU. Contact davej@codemonkey.org.uk\n");
628 }
629
630 return -ENODEV;
631}
632
633
634static void __exit longhaul_exit(void)
635{
636 int i=0;
637
638 for (i=0; i < numscales; i++) {
639 if (clock_ratio[i] == maxmult) {
640 longhaul_setstate(i);
641 break;
642 }
643 }
644
645 cpufreq_unregister_driver(&longhaul_driver);
646 kfree(longhaul_table);
647}
648
649module_param (dont_scale_voltage, int, 0644);
650MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
651
652MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
653MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
654MODULE_LICENSE ("GPL");
655
656module_init(longhaul_init);
657module_exit(longhaul_exit);
658
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
new file mode 100644
index 000000000000..2a495c162ec7
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -0,0 +1,466 @@
1/*
2 * longhaul.h
3 * (C) 2003 Dave Jones.
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 *
7 * VIA-specific information
8 */
9
10union msr_bcr2 {
11 struct {
12 unsigned Reseved:19, // 18:0
13 ESOFTBF:1, // 19
14 Reserved2:3, // 22:20
15 CLOCKMUL:4, // 26:23
16 Reserved3:5; // 31:27
17 } bits;
18 unsigned long val;
19};
20
21union msr_longhaul {
22 struct {
23 unsigned RevisionID:4, // 3:0
24 RevisionKey:4, // 7:4
25 EnableSoftBusRatio:1, // 8
26 EnableSoftVID:1, // 9
27 EnableSoftBSEL:1, // 10
28 Reserved:3, // 11:13
29 SoftBusRatio4:1, // 14
30 VRMRev:1, // 15
31 SoftBusRatio:4, // 19:16
32 SoftVID:5, // 24:20
33 Reserved2:3, // 27:25
34 SoftBSEL:2, // 29:28
35 Reserved3:2, // 31:30
36 MaxMHzBR:4, // 35:32
37 MaximumVID:5, // 40:36
38 MaxMHzFSB:2, // 42:41
39 MaxMHzBR4:1, // 43
40 Reserved4:4, // 47:44
41 MinMHzBR:4, // 51:48
42 MinimumVID:5, // 56:52
43 MinMHzFSB:2, // 58:57
44 MinMHzBR4:1, // 59
45 Reserved5:4; // 63:60
46 } bits;
47 unsigned long long val;
48};
49
50/*
51 * Clock ratio tables. Div/Mod by 10 to get ratio.
52 * The eblcr ones specify the ratio read from the CPU.
53 * The clock_ratio ones specify what to write to the CPU.
54 */
55
56/*
57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
58 */
59static int __initdata samuel1_clock_ratio[16] = {
60 -1, /* 0000 -> RESERVED */
61 30, /* 0001 -> 3.0x */
62 40, /* 0010 -> 4.0x */
63 -1, /* 0011 -> RESERVED */
64 -1, /* 0100 -> RESERVED */
65 35, /* 0101 -> 3.5x */
66 45, /* 0110 -> 4.5x */
67 55, /* 0111 -> 5.5x */
68 60, /* 1000 -> 6.0x */
69 70, /* 1001 -> 7.0x */
70 80, /* 1010 -> 8.0x */
71 50, /* 1011 -> 5.0x */
72 65, /* 1100 -> 6.5x */
73 75, /* 1101 -> 7.5x */
74 -1, /* 1110 -> RESERVED */
75 -1, /* 1111 -> RESERVED */
76};
77
78static int __initdata samuel1_eblcr[16] = {
79 50, /* 0000 -> RESERVED */
80 30, /* 0001 -> 3.0x */
81 40, /* 0010 -> 4.0x */
82 -1, /* 0011 -> RESERVED */
83 55, /* 0100 -> 5.5x */
84 35, /* 0101 -> 3.5x */
85 45, /* 0110 -> 4.5x */
86 -1, /* 0111 -> RESERVED */
87 -1, /* 1000 -> RESERVED */
88 70, /* 1001 -> 7.0x */
89 80, /* 1010 -> 8.0x */
90 60, /* 1011 -> 6.0x */
91 -1, /* 1100 -> RESERVED */
92 75, /* 1101 -> 7.5x */
93 -1, /* 1110 -> RESERVED */
94 65, /* 1111 -> 6.5x */
95};
96
97/*
98 * VIA C3 Samuel2 Stepping 1->15
99 */
100static int __initdata samuel2_eblcr[16] = {
101 50, /* 0000 -> 5.0x */
102 30, /* 0001 -> 3.0x */
103 40, /* 0010 -> 4.0x */
104 100, /* 0011 -> 10.0x */
105 55, /* 0100 -> 5.5x */
106 35, /* 0101 -> 3.5x */
107 45, /* 0110 -> 4.5x */
108 110, /* 0111 -> 11.0x */
109 90, /* 1000 -> 9.0x */
110 70, /* 1001 -> 7.0x */
111 80, /* 1010 -> 8.0x */
112 60, /* 1011 -> 6.0x */
113 120, /* 1100 -> 12.0x */
114 75, /* 1101 -> 7.5x */
115 130, /* 1110 -> 13.0x */
116 65, /* 1111 -> 6.5x */
117};
118
119/*
120 * VIA C3 Ezra
121 */
122static int __initdata ezra_clock_ratio[16] = {
123 100, /* 0000 -> 10.0x */
124 30, /* 0001 -> 3.0x */
125 40, /* 0010 -> 4.0x */
126 90, /* 0011 -> 9.0x */
127 95, /* 0100 -> 9.5x */
128 35, /* 0101 -> 3.5x */
129 45, /* 0110 -> 4.5x */
130 55, /* 0111 -> 5.5x */
131 60, /* 1000 -> 6.0x */
132 70, /* 1001 -> 7.0x */
133 80, /* 1010 -> 8.0x */
134 50, /* 1011 -> 5.0x */
135 65, /* 1100 -> 6.5x */
136 75, /* 1101 -> 7.5x */
137 85, /* 1110 -> 8.5x */
138 120, /* 1111 -> 12.0x */
139};
140
141static int __initdata ezra_eblcr[16] = {
142 50, /* 0000 -> 5.0x */
143 30, /* 0001 -> 3.0x */
144 40, /* 0010 -> 4.0x */
145 100, /* 0011 -> 10.0x */
146 55, /* 0100 -> 5.5x */
147 35, /* 0101 -> 3.5x */
148 45, /* 0110 -> 4.5x */
149 95, /* 0111 -> 9.5x */
150 90, /* 1000 -> 9.0x */
151 70, /* 1001 -> 7.0x */
152 80, /* 1010 -> 8.0x */
153 60, /* 1011 -> 6.0x */
154 120, /* 1100 -> 12.0x */
155 75, /* 1101 -> 7.5x */
156 85, /* 1110 -> 8.5x */
157 65, /* 1111 -> 6.5x */
158};
159
160/*
161 * VIA C3 (Ezra-T) [C5M].
162 */
163static int __initdata ezrat_clock_ratio[32] = {
164 100, /* 0000 -> 10.0x */
165 30, /* 0001 -> 3.0x */
166 40, /* 0010 -> 4.0x */
167 90, /* 0011 -> 9.0x */
168 95, /* 0100 -> 9.5x */
169 35, /* 0101 -> 3.5x */
170 45, /* 0110 -> 4.5x */
171 55, /* 0111 -> 5.5x */
172 60, /* 1000 -> 6.0x */
173 70, /* 1001 -> 7.0x */
174 80, /* 1010 -> 8.0x */
175 50, /* 1011 -> 5.0x */
176 65, /* 1100 -> 6.5x */
177 75, /* 1101 -> 7.5x */
178 85, /* 1110 -> 8.5x */
179 120, /* 1111 -> 12.0x */
180
181 -1, /* 0000 -> RESERVED (10.0x) */
182 110, /* 0001 -> 11.0x */
183 120, /* 0010 -> 12.0x */
184 -1, /* 0011 -> RESERVED (9.0x)*/
185 105, /* 0100 -> 10.5x */
186 115, /* 0101 -> 11.5x */
187 125, /* 0110 -> 12.5x */
188 135, /* 0111 -> 13.5x */
189 140, /* 1000 -> 14.0x */
190 150, /* 1001 -> 15.0x */
191 160, /* 1010 -> 16.0x */
192 130, /* 1011 -> 13.0x */
193 145, /* 1100 -> 14.5x */
194 155, /* 1101 -> 15.5x */
195 -1, /* 1110 -> RESERVED (13.0x) */
196 -1, /* 1111 -> RESERVED (12.0x) */
197};
198
199static int __initdata ezrat_eblcr[32] = {
200 50, /* 0000 -> 5.0x */
201 30, /* 0001 -> 3.0x */
202 40, /* 0010 -> 4.0x */
203 100, /* 0011 -> 10.0x */
204 55, /* 0100 -> 5.5x */
205 35, /* 0101 -> 3.5x */
206 45, /* 0110 -> 4.5x */
207 95, /* 0111 -> 9.5x */
208 90, /* 1000 -> 9.0x */
209 70, /* 1001 -> 7.0x */
210 80, /* 1010 -> 8.0x */
211 60, /* 1011 -> 6.0x */
212 120, /* 1100 -> 12.0x */
213 75, /* 1101 -> 7.5x */
214 85, /* 1110 -> 8.5x */
215 65, /* 1111 -> 6.5x */
216
217 -1, /* 0000 -> RESERVED (9.0x) */
218 110, /* 0001 -> 11.0x */
219 120, /* 0010 -> 12.0x */
220 -1, /* 0011 -> RESERVED (10.0x)*/
221 135, /* 0100 -> 13.5x */
222 115, /* 0101 -> 11.5x */
223 125, /* 0110 -> 12.5x */
224 105, /* 0111 -> 10.5x */
225 130, /* 1000 -> 13.0x */
226 150, /* 1001 -> 15.0x */
227 160, /* 1010 -> 16.0x */
228 140, /* 1011 -> 14.0x */
229 -1, /* 1100 -> RESERVED (12.0x) */
230 155, /* 1101 -> 15.5x */
231 -1, /* 1110 -> RESERVED (13.0x) */
232 145, /* 1111 -> 14.5x */
233};
234
235/*
236 * VIA C3 Nehemiah */
237
238static int __initdata nehemiah_a_clock_ratio[32] = {
239 100, /* 0000 -> 10.0x */
240 160, /* 0001 -> 16.0x */
241 -1, /* 0010 -> RESERVED */
242 90, /* 0011 -> 9.0x */
243 95, /* 0100 -> 9.5x */
244 -1, /* 0101 -> RESERVED */
245 -1, /* 0110 -> RESERVED */
246 55, /* 0111 -> 5.5x */
247 60, /* 1000 -> 6.0x */
248 70, /* 1001 -> 7.0x */
249 80, /* 1010 -> 8.0x */
250 50, /* 1011 -> 5.0x */
251 65, /* 1100 -> 6.5x */
252 75, /* 1101 -> 7.5x */
253 85, /* 1110 -> 8.5x */
254 120, /* 1111 -> 12.0x */
255 100, /* 0000 -> 10.0x */
256 -1, /* 0001 -> RESERVED */
257 120, /* 0010 -> 12.0x */
258 90, /* 0011 -> 9.0x */
259 105, /* 0100 -> 10.5x */
260 115, /* 0101 -> 11.5x */
261 125, /* 0110 -> 12.5x */
262 135, /* 0111 -> 13.5x */
263 140, /* 1000 -> 14.0x */
264 150, /* 1001 -> 15.0x */
265 160, /* 1010 -> 16.0x */
266 130, /* 1011 -> 13.0x */
267 145, /* 1100 -> 14.5x */
268 155, /* 1101 -> 15.5x */
269 -1, /* 1110 -> RESERVED (13.0x) */
270 120, /* 1111 -> 12.0x */
271};
272
273static int __initdata nehemiah_b_clock_ratio[32] = {
274 100, /* 0000 -> 10.0x */
275 160, /* 0001 -> 16.0x */
276 -1, /* 0010 -> RESERVED */
277 90, /* 0011 -> 9.0x */
278 95, /* 0100 -> 9.5x */
279 -1, /* 0101 -> RESERVED */
280 -1, /* 0110 -> RESERVED */
281 55, /* 0111 -> 5.5x */
282 60, /* 1000 -> 6.0x */
283 70, /* 1001 -> 7.0x */
284 80, /* 1010 -> 8.0x */
285 50, /* 1011 -> 5.0x */
286 65, /* 1100 -> 6.5x */
287 75, /* 1101 -> 7.5x */
288 85, /* 1110 -> 8.5x */
289 120, /* 1111 -> 12.0x */
290 100, /* 0000 -> 10.0x */
291 110, /* 0001 -> 11.0x */
292 120, /* 0010 -> 12.0x */
293 90, /* 0011 -> 9.0x */
294 105, /* 0100 -> 10.5x */
295 115, /* 0101 -> 11.5x */
296 125, /* 0110 -> 12.5x */
297 135, /* 0111 -> 13.5x */
298 140, /* 1000 -> 14.0x */
299 150, /* 1001 -> 15.0x */
300 160, /* 1010 -> 16.0x */
301 130, /* 1011 -> 13.0x */
302 145, /* 1100 -> 14.5x */
303 155, /* 1101 -> 15.5x */
304 -1, /* 1110 -> RESERVED (13.0x) */
305 120, /* 1111 -> 12.0x */
306};
307
308static int __initdata nehemiah_c_clock_ratio[32] = {
309 100, /* 0000 -> 10.0x */
310 160, /* 0001 -> 16.0x */
311 40, /* 0010 -> RESERVED */
312 90, /* 0011 -> 9.0x */
313 95, /* 0100 -> 9.5x */
314 -1, /* 0101 -> RESERVED */
315 45, /* 0110 -> RESERVED */
316 55, /* 0111 -> 5.5x */
317 60, /* 1000 -> 6.0x */
318 70, /* 1001 -> 7.0x */
319 80, /* 1010 -> 8.0x */
320 50, /* 1011 -> 5.0x */
321 65, /* 1100 -> 6.5x */
322 75, /* 1101 -> 7.5x */
323 85, /* 1110 -> 8.5x */
324 120, /* 1111 -> 12.0x */
325 100, /* 0000 -> 10.0x */
326 110, /* 0001 -> 11.0x */
327 120, /* 0010 -> 12.0x */
328 90, /* 0011 -> 9.0x */
329 105, /* 0100 -> 10.5x */
330 115, /* 0101 -> 11.5x */
331 125, /* 0110 -> 12.5x */
332 135, /* 0111 -> 13.5x */
333 140, /* 1000 -> 14.0x */
334 150, /* 1001 -> 15.0x */
335 160, /* 1010 -> 16.0x */
336 130, /* 1011 -> 13.0x */
337 145, /* 1100 -> 14.5x */
338 155, /* 1101 -> 15.5x */
339 -1, /* 1110 -> RESERVED (13.0x) */
340 120, /* 1111 -> 12.0x */
341};
342
343static int __initdata nehemiah_a_eblcr[32] = {
344 50, /* 0000 -> 5.0x */
345 160, /* 0001 -> 16.0x */
346 -1, /* 0010 -> RESERVED */
347 100, /* 0011 -> 10.0x */
348 55, /* 0100 -> 5.5x */
349 -1, /* 0101 -> RESERVED */
350 -1, /* 0110 -> RESERVED */
351 95, /* 0111 -> 9.5x */
352 90, /* 1000 -> 9.0x */
353 70, /* 1001 -> 7.0x */
354 80, /* 1010 -> 8.0x */
355 60, /* 1011 -> 6.0x */
356 120, /* 1100 -> 12.0x */
357 75, /* 1101 -> 7.5x */
358 85, /* 1110 -> 8.5x */
359 65, /* 1111 -> 6.5x */
360 90, /* 0000 -> 9.0x */
361 -1, /* 0001 -> RESERVED */
362 120, /* 0010 -> 12.0x */
363 100, /* 0011 -> 10.0x */
364 135, /* 0100 -> 13.5x */
365 115, /* 0101 -> 11.5x */
366 125, /* 0110 -> 12.5x */
367 105, /* 0111 -> 10.5x */
368 130, /* 1000 -> 13.0x */
369 150, /* 1001 -> 15.0x */
370 160, /* 1010 -> 16.0x */
371 140, /* 1011 -> 14.0x */
372 120, /* 1100 -> 12.0x */
373 155, /* 1101 -> 15.5x */
374 -1, /* 1110 -> RESERVED (13.0x) */
375 145 /* 1111 -> 14.5x */
376 /* end of table */
377};
378static int __initdata nehemiah_b_eblcr[32] = {
379 50, /* 0000 -> 5.0x */
380 160, /* 0001 -> 16.0x */
381 -1, /* 0010 -> RESERVED */
382 100, /* 0011 -> 10.0x */
383 55, /* 0100 -> 5.5x */
384 -1, /* 0101 -> RESERVED */
385 -1, /* 0110 -> RESERVED */
386 95, /* 0111 -> 9.5x */
387 90, /* 1000 -> 9.0x */
388 70, /* 1001 -> 7.0x */
389 80, /* 1010 -> 8.0x */
390 60, /* 1011 -> 6.0x */
391 120, /* 1100 -> 12.0x */
392 75, /* 1101 -> 7.5x */
393 85, /* 1110 -> 8.5x */
394 65, /* 1111 -> 6.5x */
395 90, /* 0000 -> 9.0x */
396 110, /* 0001 -> 11.0x */
397 120, /* 0010 -> 12.0x */
398 100, /* 0011 -> 10.0x */
399 135, /* 0100 -> 13.5x */
400 115, /* 0101 -> 11.5x */
401 125, /* 0110 -> 12.5x */
402 105, /* 0111 -> 10.5x */
403 130, /* 1000 -> 13.0x */
404 150, /* 1001 -> 15.0x */
405 160, /* 1010 -> 16.0x */
406 140, /* 1011 -> 14.0x */
407 120, /* 1100 -> 12.0x */
408 155, /* 1101 -> 15.5x */
409 -1, /* 1110 -> RESERVED (13.0x) */
410 145 /* 1111 -> 14.5x */
411 /* end of table */
412};
413static int __initdata nehemiah_c_eblcr[32] = {
414 50, /* 0000 -> 5.0x */
415 160, /* 0001 -> 16.0x */
416 40, /* 0010 -> RESERVED */
417 100, /* 0011 -> 10.0x */
418 55, /* 0100 -> 5.5x */
419 -1, /* 0101 -> RESERVED */
420 45, /* 0110 -> RESERVED */
421 95, /* 0111 -> 9.5x */
422 90, /* 1000 -> 9.0x */
423 70, /* 1001 -> 7.0x */
424 80, /* 1010 -> 8.0x */
425 60, /* 1011 -> 6.0x */
426 120, /* 1100 -> 12.0x */
427 75, /* 1101 -> 7.5x */
428 85, /* 1110 -> 8.5x */
429 65, /* 1111 -> 6.5x */
430 90, /* 0000 -> 9.0x */
431 110, /* 0001 -> 11.0x */
432 120, /* 0010 -> 12.0x */
433 100, /* 0011 -> 10.0x */
434 135, /* 0100 -> 13.5x */
435 115, /* 0101 -> 11.5x */
436 125, /* 0110 -> 12.5x */
437 105, /* 0111 -> 10.5x */
438 130, /* 1000 -> 13.0x */
439 150, /* 1001 -> 15.0x */
440 160, /* 1010 -> 16.0x */
441 140, /* 1011 -> 14.0x */
442 120, /* 1100 -> 12.0x */
443 155, /* 1101 -> 15.5x */
444 -1, /* 1110 -> RESERVED (13.0x) */
445 145 /* 1111 -> 14.5x */
446 /* end of table */
447};
448
449/*
450 * Voltage scales. Div/Mod by 1000 to get actual voltage.
451 * Which scale to use depends on the VRM type in use.
452 */
453static int __initdata vrm85scales[32] = {
454 1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
455 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
456 1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
457 1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
458};
459
460static int __initdata mobilevrmscales[32] = {
461 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
462 1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
463 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
464 1075, 1050, 1025, 1000, 975, 950, 925, -1,
465};
466
diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c
new file mode 100644
index 000000000000..e3868de4dc2e
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/longrun.c
@@ -0,0 +1,326 @@
1/*
2 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/slab.h>
13#include <linux/cpufreq.h>
14
15#include <asm/msr.h>
16#include <asm/processor.h>
17#include <asm/timex.h>
18
19#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg)
20
21static struct cpufreq_driver longrun_driver;
22
23/**
24 * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
25 * values into per cent values. In TMTA microcode, the following is valid:
26 * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
27 */
28static unsigned int longrun_low_freq, longrun_high_freq;
29
30
31/**
32 * longrun_get_policy - get the current LongRun policy
33 * @policy: struct cpufreq_policy where current policy is written into
34 *
35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
36 * and MSR_TMTA_LONGRUN_CTRL
37 */
38static void __init longrun_get_policy(struct cpufreq_policy *policy)
39{
40 u32 msr_lo, msr_hi;
41
42 rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
43 dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
44 if (msr_lo & 0x01)
45 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
46 else
47 policy->policy = CPUFREQ_POLICY_POWERSAVE;
48
49 rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
50 dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
51 msr_lo &= 0x0000007F;
52 msr_hi &= 0x0000007F;
53
54 if ( longrun_high_freq <= longrun_low_freq ) {
55 /* Assume degenerate Longrun table */
56 policy->min = policy->max = longrun_high_freq;
57 } else {
58 policy->min = longrun_low_freq + msr_lo *
59 ((longrun_high_freq - longrun_low_freq) / 100);
60 policy->max = longrun_low_freq + msr_hi *
61 ((longrun_high_freq - longrun_low_freq) / 100);
62 }
63 policy->cpu = 0;
64}
65
66
67/**
68 * longrun_set_policy - sets a new CPUFreq policy
69 * @policy: new policy
70 *
71 * Sets a new CPUFreq policy on LongRun-capable processors. This function
72 * has to be called with cpufreq_driver locked.
73 */
74static int longrun_set_policy(struct cpufreq_policy *policy)
75{
76 u32 msr_lo, msr_hi;
77 u32 pctg_lo, pctg_hi;
78
79 if (!policy)
80 return -EINVAL;
81
82 if ( longrun_high_freq <= longrun_low_freq ) {
83 /* Assume degenerate Longrun table */
84 pctg_lo = pctg_hi = 100;
85 } else {
86 pctg_lo = (policy->min - longrun_low_freq) /
87 ((longrun_high_freq - longrun_low_freq) / 100);
88 pctg_hi = (policy->max - longrun_low_freq) /
89 ((longrun_high_freq - longrun_low_freq) / 100);
90 }
91
92 if (pctg_hi > 100)
93 pctg_hi = 100;
94 if (pctg_lo > pctg_hi)
95 pctg_lo = pctg_hi;
96
97 /* performance or economy mode */
98 rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
99 msr_lo &= 0xFFFFFFFE;
100 switch (policy->policy) {
101 case CPUFREQ_POLICY_PERFORMANCE:
102 msr_lo |= 0x00000001;
103 break;
104 case CPUFREQ_POLICY_POWERSAVE:
105 break;
106 }
107 wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
108
109 /* lower and upper boundary */
110 rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
111 msr_lo &= 0xFFFFFF80;
112 msr_hi &= 0xFFFFFF80;
113 msr_lo |= pctg_lo;
114 msr_hi |= pctg_hi;
115 wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
116
117 return 0;
118}
119
120
121/**
122 * longrun_verify_poliy - verifies a new CPUFreq policy
123 * @policy: the policy to verify
124 *
125 * Validates a new CPUFreq policy. This function has to be called with
126 * cpufreq_driver locked.
127 */
128static int longrun_verify_policy(struct cpufreq_policy *policy)
129{
130 if (!policy)
131 return -EINVAL;
132
133 policy->cpu = 0;
134 cpufreq_verify_within_limits(policy,
135 policy->cpuinfo.min_freq,
136 policy->cpuinfo.max_freq);
137
138 if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
139 (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
140 return -EINVAL;
141
142 return 0;
143}
144
145static unsigned int longrun_get(unsigned int cpu)
146{
147 u32 eax, ebx, ecx, edx;
148
149 if (cpu)
150 return 0;
151
152 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
153 dprintk("cpuid eax is %u\n", eax);
154
155 return (eax * 1000);
156}
157
158/**
159 * longrun_determine_freqs - determines the lowest and highest possible core frequency
160 * @low_freq: an int to put the lowest frequency into
161 * @high_freq: an int to put the highest frequency into
162 *
163 * Determines the lowest and highest possible core frequencies on this CPU.
164 * This is necessary to calculate the performance percentage according to
165 * TMTA rules:
166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
167 */
168static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
169 unsigned int *high_freq)
170{
171 u32 msr_lo, msr_hi;
172 u32 save_lo, save_hi;
173 u32 eax, ebx, ecx, edx;
174 u32 try_hi;
175 struct cpuinfo_x86 *c = cpu_data;
176
177 if (!low_freq || !high_freq)
178 return -EINVAL;
179
180 if (cpu_has(c, X86_FEATURE_LRTI)) {
181 /* if the LongRun Table Interface is present, the
182 * detection is a bit easier:
183 * For minimum frequency, read out the maximum
184 * level (msr_hi), write that into "currently
185 * selected level", and read out the frequency.
186 * For maximum frequency, read out level zero.
187 */
188 /* minimum */
189 rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
190 wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
191 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
192 *low_freq = msr_lo * 1000; /* to kHz */
193
194 /* maximum */
195 wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
196 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
197 *high_freq = msr_lo * 1000; /* to kHz */
198
199 dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq);
200
201 if (*low_freq > *high_freq)
202 *low_freq = *high_freq;
203 return 0;
204 }
205
206 /* set the upper border to the value determined during TSC init */
207 *high_freq = (cpu_khz / 1000);
208 *high_freq = *high_freq * 1000;
209 dprintk("high frequency is %u kHz\n", *high_freq);
210
211 /* get current borders */
212 rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
213 save_lo = msr_lo & 0x0000007F;
214 save_hi = msr_hi & 0x0000007F;
215
216 /* if current perf_pctg is larger than 90%, we need to decrease the
217 * upper limit to make the calculation more accurate.
218 */
219 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
220 /* try decreasing in 10% steps, some processors react only
221 * on some barrier values */
222 for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) {
223 /* set to 0 to try_hi perf_pctg */
224 msr_lo &= 0xFFFFFF80;
225 msr_hi &= 0xFFFFFF80;
226 msr_lo |= 0;
227 msr_hi |= try_hi;
228 wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
229
230 /* read out current core MHz and current perf_pctg */
231 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
232
233 /* restore values */
234 wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
235 }
236 dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
237
238 /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
239 * eqals
240 * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
241 *
242 * high_freq * perf_pctg is stored tempoarily into "ebx".
243 */
244 ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
245
246 if ((ecx > 95) || (ecx == 0) || (eax < ebx))
247 return -EIO;
248
249 edx = (eax - ebx) / (100 - ecx);
250 *low_freq = edx * 1000; /* back to kHz */
251
252 dprintk("low frequency is %u kHz\n", *low_freq);
253
254 if (*low_freq > *high_freq)
255 *low_freq = *high_freq;
256
257 return 0;
258}
259
260
261static int __init longrun_cpu_init(struct cpufreq_policy *policy)
262{
263 int result = 0;
264
265 /* capability check */
266 if (policy->cpu != 0)
267 return -ENODEV;
268
269 /* detect low and high frequency */
270 result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
271 if (result)
272 return result;
273
274 /* cpuinfo and default policy values */
275 policy->cpuinfo.min_freq = longrun_low_freq;
276 policy->cpuinfo.max_freq = longrun_high_freq;
277 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
278 longrun_get_policy(policy);
279
280 return 0;
281}
282
283
284static struct cpufreq_driver longrun_driver = {
285 .flags = CPUFREQ_CONST_LOOPS,
286 .verify = longrun_verify_policy,
287 .setpolicy = longrun_set_policy,
288 .get = longrun_get,
289 .init = longrun_cpu_init,
290 .name = "longrun",
291 .owner = THIS_MODULE,
292};
293
294
295/**
296 * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
297 *
298 * Initializes the LongRun support.
299 */
300static int __init longrun_init(void)
301{
302 struct cpuinfo_x86 *c = cpu_data;
303
304 if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
305 !cpu_has(c, X86_FEATURE_LONGRUN))
306 return -ENODEV;
307
308 return cpufreq_register_driver(&longrun_driver);
309}
310
311
312/**
313 * longrun_exit - unregisters LongRun support
314 */
315static void __exit longrun_exit(void)
316{
317 cpufreq_unregister_driver(&longrun_driver);
318}
319
320
321MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
322MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors.");
323MODULE_LICENSE ("GPL");
324
325module_init(longrun_init);
326module_exit(longrun_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
new file mode 100644
index 000000000000..aa622d52c6e5
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
@@ -0,0 +1,337 @@
1/*
2 * Pentium 4/Xeon CPU on demand clock modulation/speed scaling
3 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
4 * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
5 * (C) 2002 Arjan van de Ven <arjanv@redhat.com>
6 * (C) 2002 Tora T. Engstad
7 * All Rights Reserved
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 * The author(s) of this software shall not be held liable for damages
15 * of any nature resulting due to the use of this software. This
16 * software is provided AS-IS with no warranties.
17 *
18 * Date Errata Description
19 * 20020525 N44, O17 12.5% or 25% DC causes lockup
20 *
21 */
22
23#include <linux/config.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/init.h>
27#include <linux/smp.h>
28#include <linux/cpufreq.h>
29#include <linux/slab.h>
30#include <linux/cpumask.h>
31
32#include <asm/processor.h>
33#include <asm/msr.h>
34#include <asm/timex.h>
35
36#include "speedstep-lib.h"
37
38#define PFX "p4-clockmod: "
39#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
40
41/*
42 * Duty Cycle (3bits), note DC_DISABLE is not specified in
43 * intel docs i just use it to mean disable
44 */
45enum {
46 DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
47 DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
48};
49
50#define DC_ENTRIES 8
51
52
53static int has_N44_O17_errata[NR_CPUS];
54static unsigned int stock_freq;
55static struct cpufreq_driver p4clockmod_driver;
56static unsigned int cpufreq_p4_get(unsigned int cpu);
57
58static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
59{
60 u32 l, h;
61
62 if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
63 return -EINVAL;
64
65 rdmsr(MSR_IA32_THERM_STATUS, l, h);
66
67 if (l & 0x01)
68 dprintk("CPU#%d currently thermal throttled\n", cpu);
69
70 if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
71 newstate = DC_38PT;
72
73 rdmsr(MSR_IA32_THERM_CONTROL, l, h);
74 if (newstate == DC_DISABLE) {
75 dprintk("CPU#%d disabling modulation\n", cpu);
76 wrmsr(MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
77 } else {
78 dprintk("CPU#%d setting duty cycle to %d%%\n",
79 cpu, ((125 * newstate) / 10));
80 /* bits 63 - 5 : reserved
81 * bit 4 : enable/disable
82 * bits 3-1 : duty cycle
83 * bit 0 : reserved
84 */
85 l = (l & ~14);
86 l = l | (1<<4) | ((newstate & 0x7)<<1);
87 wrmsr(MSR_IA32_THERM_CONTROL, l, h);
88 }
89
90 return 0;
91}
92
93
94static struct cpufreq_frequency_table p4clockmod_table[] = {
95 {DC_RESV, CPUFREQ_ENTRY_INVALID},
96 {DC_DFLT, 0},
97 {DC_25PT, 0},
98 {DC_38PT, 0},
99 {DC_50PT, 0},
100 {DC_64PT, 0},
101 {DC_75PT, 0},
102 {DC_88PT, 0},
103 {DC_DISABLE, 0},
104 {DC_RESV, CPUFREQ_TABLE_END},
105};
106
107
108static int cpufreq_p4_target(struct cpufreq_policy *policy,
109 unsigned int target_freq,
110 unsigned int relation)
111{
112 unsigned int newstate = DC_RESV;
113 struct cpufreq_freqs freqs;
114 cpumask_t cpus_allowed;
115 int i;
116
117 if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
118 return -EINVAL;
119
120 freqs.old = cpufreq_p4_get(policy->cpu);
121 freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
122
123 if (freqs.new == freqs.old)
124 return 0;
125
126 /* notifiers */
127 for_each_cpu_mask(i, policy->cpus) {
128 freqs.cpu = i;
129 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
130 }
131
132 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
133 * Developer's Manual, Volume 3
134 */
135 cpus_allowed = current->cpus_allowed;
136
137 for_each_cpu_mask(i, policy->cpus) {
138 cpumask_t this_cpu = cpumask_of_cpu(i);
139
140 set_cpus_allowed(current, this_cpu);
141 BUG_ON(smp_processor_id() != i);
142
143 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
144 }
145 set_cpus_allowed(current, cpus_allowed);
146
147 /* notifiers */
148 for_each_cpu_mask(i, policy->cpus) {
149 freqs.cpu = i;
150 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
151 }
152
153 return 0;
154}
155
156
157static int cpufreq_p4_verify(struct cpufreq_policy *policy)
158{
159 return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
160}
161
162
163static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
164{
165 if ((c->x86 == 0x06) && (c->x86_model == 0x09)) {
166 /* Pentium M (Banias) */
167 printk(KERN_WARNING PFX "Warning: Pentium M detected. "
168 "The speedstep_centrino module offers voltage scaling"
169 " in addition of frequency scaling. You should use "
170 "that instead of p4-clockmod, if possible.\n");
171 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
172 }
173
174 if ((c->x86 == 0x06) && (c->x86_model == 0x0D)) {
175 /* Pentium M (Dothan) */
176 printk(KERN_WARNING PFX "Warning: Pentium M detected. "
177 "The speedstep_centrino module offers voltage scaling"
178 " in addition of frequency scaling. You should use "
179 "that instead of p4-clockmod, if possible.\n");
180 /* on P-4s, the TSC runs with constant frequency independent whether
181 * throttling is active or not. */
182 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
183 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
184 }
185
186 if (c->x86 != 0xF) {
187 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <linux@brodo.de>\n");
188 return 0;
189 }
190
191 /* on P-4s, the TSC runs with constant frequency independent whether
192 * throttling is active or not. */
193 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
194
195 if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
196 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
197 "The speedstep-ich or acpi cpufreq modules offer "
198 "voltage scaling in addition of frequency scaling. "
199 "You should use either one instead of p4-clockmod, "
200 "if possible.\n");
201 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
202 }
203
204 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
205}
206
207
208
209static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
210{
211 struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
212 int cpuid = 0;
213 unsigned int i;
214
215#ifdef CONFIG_SMP
216 policy->cpus = cpu_sibling_map[policy->cpu];
217#endif
218
219 /* Errata workaround */
220 cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
221 switch (cpuid) {
222 case 0x0f07:
223 case 0x0f0a:
224 case 0x0f11:
225 case 0x0f12:
226 has_N44_O17_errata[policy->cpu] = 1;
227 dprintk("has errata -- disabling low frequencies\n");
228 }
229
230 /* get max frequency */
231 stock_freq = cpufreq_p4_get_frequency(c);
232 if (!stock_freq)
233 return -EINVAL;
234
235 /* table init */
236 for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
237 if ((i<2) && (has_N44_O17_errata[policy->cpu]))
238 p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
239 else
240 p4clockmod_table[i].frequency = (stock_freq * i)/8;
241 }
242 cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
243
244 /* cpuinfo and default policy values */
245 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
246 policy->cpuinfo.transition_latency = 1000000; /* assumed */
247 policy->cur = stock_freq;
248
249 return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
250}
251
252
253static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
254{
255 cpufreq_frequency_table_put_attr(policy->cpu);
256 return 0;
257}
258
259static unsigned int cpufreq_p4_get(unsigned int cpu)
260{
261 cpumask_t cpus_allowed;
262 u32 l, h;
263
264 cpus_allowed = current->cpus_allowed;
265
266 set_cpus_allowed(current, cpumask_of_cpu(cpu));
267 BUG_ON(smp_processor_id() != cpu);
268
269 rdmsr(MSR_IA32_THERM_CONTROL, l, h);
270
271 set_cpus_allowed(current, cpus_allowed);
272
273 if (l & 0x10) {
274 l = l >> 1;
275 l &= 0x7;
276 } else
277 l = DC_DISABLE;
278
279 if (l != DC_DISABLE)
280 return (stock_freq * l / 8);
281
282 return stock_freq;
283}
284
285static struct freq_attr* p4clockmod_attr[] = {
286 &cpufreq_freq_attr_scaling_available_freqs,
287 NULL,
288};
289
290static struct cpufreq_driver p4clockmod_driver = {
291 .verify = cpufreq_p4_verify,
292 .target = cpufreq_p4_target,
293 .init = cpufreq_p4_cpu_init,
294 .exit = cpufreq_p4_cpu_exit,
295 .get = cpufreq_p4_get,
296 .name = "p4-clockmod",
297 .owner = THIS_MODULE,
298 .attr = p4clockmod_attr,
299};
300
301
302static int __init cpufreq_p4_init(void)
303{
304 struct cpuinfo_x86 *c = cpu_data;
305 int ret;
306
307 /*
308 * THERM_CONTROL is architectural for IA32 now, so
309 * we can rely on the capability checks
310 */
311 if (c->x86_vendor != X86_VENDOR_INTEL)
312 return -ENODEV;
313
314 if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) ||
315 !test_bit(X86_FEATURE_ACC, c->x86_capability))
316 return -ENODEV;
317
318 ret = cpufreq_register_driver(&p4clockmod_driver);
319 if (!ret)
320 printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
321
322 return (ret);
323}
324
325
326static void __exit cpufreq_p4_exit(void)
327{
328 cpufreq_unregister_driver(&p4clockmod_driver);
329}
330
331
332MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
333MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
334MODULE_LICENSE ("GPL");
335
336late_initcall(cpufreq_p4_init);
337module_exit(cpufreq_p4_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
new file mode 100644
index 000000000000..222f8cfe3c57
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
@@ -0,0 +1,256 @@
1/*
2 * This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
3 * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski.
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 *
7 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/cpufreq.h>
14#include <linux/ioport.h>
15#include <linux/slab.h>
16
17#include <asm/msr.h>
18#include <asm/timex.h>
19#include <asm/io.h>
20
21
22#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
23 as it is unused */
24
25static unsigned int busfreq; /* FSB, in 10 kHz */
26static unsigned int max_multiplier;
27
28
29/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
30static struct cpufreq_frequency_table clock_ratio[] = {
31 {45, /* 000 -> 4.5x */ 0},
32 {50, /* 001 -> 5.0x */ 0},
33 {40, /* 010 -> 4.0x */ 0},
34 {55, /* 011 -> 5.5x */ 0},
35 {20, /* 100 -> 2.0x */ 0},
36 {30, /* 101 -> 3.0x */ 0},
37 {60, /* 110 -> 6.0x */ 0},
38 {35, /* 111 -> 3.5x */ 0},
39 {0, CPUFREQ_TABLE_END}
40};
41
42
43/**
44 * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
45 *
46 * Returns the current setting of the frequency multiplier. Core clock
47 * speed is frequency of the Front-Side Bus multiplied with this value.
48 */
49static int powernow_k6_get_cpu_multiplier(void)
50{
51 u64 invalue = 0;
52 u32 msrval;
53
54 msrval = POWERNOW_IOPORT + 0x1;
55 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
56 invalue=inl(POWERNOW_IOPORT + 0x8);
57 msrval = POWERNOW_IOPORT + 0x0;
58 wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
59
60 return clock_ratio[(invalue >> 5)&7].index;
61}
62
63
64/**
65 * powernow_k6_set_state - set the PowerNow! multiplier
66 * @best_i: clock_ratio[best_i] is the target multiplier
67 *
68 * Tries to change the PowerNow! multiplier
69 */
70static void powernow_k6_set_state (unsigned int best_i)
71{
72 unsigned long outvalue=0, invalue=0;
73 unsigned long msrval;
74 struct cpufreq_freqs freqs;
75
76 if (clock_ratio[best_i].index > max_multiplier) {
77 printk(KERN_ERR "cpufreq: invalid target frequency\n");
78 return;
79 }
80
81 freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
82 freqs.new = busfreq * clock_ratio[best_i].index;
83 freqs.cpu = 0; /* powernow-k6.c is UP only driver */
84
85 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
86
87 /* we now need to transform best_i to the BVC format, see AMD#23446 */
88
89 outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
90
91 msrval = POWERNOW_IOPORT + 0x1;
92 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
93 invalue=inl(POWERNOW_IOPORT + 0x8);
94 invalue = invalue & 0xf;
95 outvalue = outvalue | invalue;
96 outl(outvalue ,(POWERNOW_IOPORT + 0x8));
97 msrval = POWERNOW_IOPORT + 0x0;
98 wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
99
100 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
101
102 return;
103}
104
105
106/**
107 * powernow_k6_verify - verifies a new CPUfreq policy
108 * @policy: new policy
109 *
110 * Policy must be within lowest and highest possible CPU Frequency,
111 * and at least one possible state must be within min and max.
112 */
113static int powernow_k6_verify(struct cpufreq_policy *policy)
114{
115 return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
116}
117
118
119/**
120 * powernow_k6_setpolicy - sets a new CPUFreq policy
121 * @policy: new policy
122 * @target_freq: the target frequency
123 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
124 *
125 * sets a new CPUFreq policy
126 */
127static int powernow_k6_target (struct cpufreq_policy *policy,
128 unsigned int target_freq,
129 unsigned int relation)
130{
131 unsigned int newstate = 0;
132
133 if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate))
134 return -EINVAL;
135
136 powernow_k6_set_state(newstate);
137
138 return 0;
139}
140
141
142static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
143{
144 unsigned int i;
145 int result;
146
147 if (policy->cpu != 0)
148 return -ENODEV;
149
150 /* get frequencies */
151 max_multiplier = powernow_k6_get_cpu_multiplier();
152 busfreq = cpu_khz / max_multiplier;
153
154 /* table init */
155 for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
156 if (clock_ratio[i].index > max_multiplier)
157 clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
158 else
159 clock_ratio[i].frequency = busfreq * clock_ratio[i].index;
160 }
161
162 /* cpuinfo and default policy values */
163 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
164 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
165 policy->cur = busfreq * max_multiplier;
166
167 result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
168 if (result)
169 return (result);
170
171 cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
172
173 return 0;
174}
175
176
177static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
178{
179 unsigned int i;
180 for (i=0; i<8; i++) {
181 if (i==max_multiplier)
182 powernow_k6_set_state(i);
183 }
184 cpufreq_frequency_table_put_attr(policy->cpu);
185 return 0;
186}
187
188static unsigned int powernow_k6_get(unsigned int cpu)
189{
190 return busfreq * powernow_k6_get_cpu_multiplier();
191}
192
193static struct freq_attr* powernow_k6_attr[] = {
194 &cpufreq_freq_attr_scaling_available_freqs,
195 NULL,
196};
197
198static struct cpufreq_driver powernow_k6_driver = {
199 .verify = powernow_k6_verify,
200 .target = powernow_k6_target,
201 .init = powernow_k6_cpu_init,
202 .exit = powernow_k6_cpu_exit,
203 .get = powernow_k6_get,
204 .name = "powernow-k6",
205 .owner = THIS_MODULE,
206 .attr = powernow_k6_attr,
207};
208
209
210/**
211 * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
212 *
213 * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
214 * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
215 * on success.
216 */
217static int __init powernow_k6_init(void)
218{
219 struct cpuinfo_x86 *c = cpu_data;
220
221 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
222 ((c->x86_model != 12) && (c->x86_model != 13)))
223 return -ENODEV;
224
225 if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
226 printk("cpufreq: PowerNow IOPORT region already used.\n");
227 return -EIO;
228 }
229
230 if (cpufreq_register_driver(&powernow_k6_driver)) {
231 release_region (POWERNOW_IOPORT, 16);
232 return -EINVAL;
233 }
234
235 return 0;
236}
237
238
239/**
240 * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
241 *
242 * Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
243 */
244static void __exit powernow_k6_exit(void)
245{
246 cpufreq_unregister_driver(&powernow_k6_driver);
247 release_region (POWERNOW_IOPORT, 16);
248}
249
250
251MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
252MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
253MODULE_LICENSE ("GPL");
254
255module_init(powernow_k6_init);
256module_exit(powernow_k6_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
new file mode 100644
index 000000000000..913f652623d9
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -0,0 +1,690 @@
1/*
2 * AMD K7 Powernow driver.
3 * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
4 * (C) 2003-2004 Dave Jones <davej@redhat.com>
5 *
6 * Licensed under the terms of the GNU GPL License version 2.
7 * Based upon datasheets & sample CPUs kindly provided by AMD.
8 *
9 * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt.
10 * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
11 * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect.
12 * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
13 */
14
15#include <linux/config.h>
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/moduleparam.h>
19#include <linux/init.h>
20#include <linux/cpufreq.h>
21#include <linux/slab.h>
22#include <linux/string.h>
23#include <linux/dmi.h>
24
25#include <asm/msr.h>
26#include <asm/timex.h>
27#include <asm/io.h>
28#include <asm/system.h>
29
30#ifdef CONFIG_X86_POWERNOW_K7_ACPI
31#include <linux/acpi.h>
32#include <acpi/processor.h>
33#endif
34
35#include "powernow-k7.h"
36
37#define PFX "powernow: "
38
39
40struct psb_s {
41 u8 signature[10];
42 u8 tableversion;
43 u8 flags;
44 u16 settlingtime;
45 u8 reserved1;
46 u8 numpst;
47};
48
49struct pst_s {
50 u32 cpuid;
51 u8 fsbspeed;
52 u8 maxfid;
53 u8 startvid;
54 u8 numpstates;
55};
56
57#ifdef CONFIG_X86_POWERNOW_K7_ACPI
58union powernow_acpi_control_t {
59 struct {
60 unsigned long fid:5,
61 vid:5,
62 sgtc:20,
63 res1:2;
64 } bits;
65 unsigned long val;
66};
67#endif
68
69#ifdef CONFIG_CPU_FREQ_DEBUG
70/* divide by 1000 to get VCore voltage in V. */
71static int mobile_vid_table[32] = {
72 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
73 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
74 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
75 1075, 1050, 1025, 1000, 975, 950, 925, 0,
76};
77#endif
78
79/* divide by 10 to get FID. */
80static int fid_codes[32] = {
81 110, 115, 120, 125, 50, 55, 60, 65,
82 70, 75, 80, 85, 90, 95, 100, 105,
83 30, 190, 40, 200, 130, 135, 140, 210,
84 150, 225, 160, 165, 170, 180, -1, -1,
85};
86
87/* This parameter is used in order to force ACPI instead of legacy method for
88 * configuration purpose.
89 */
90
91static int acpi_force;
92
93static struct cpufreq_frequency_table *powernow_table;
94
95static unsigned int can_scale_bus;
96static unsigned int can_scale_vid;
97static unsigned int minimum_speed=-1;
98static unsigned int maximum_speed;
99static unsigned int number_scales;
100static unsigned int fsb;
101static unsigned int latency;
102static char have_a0;
103
104#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg)
105
106static int check_fsb(unsigned int fsbspeed)
107{
108 int delta;
109 unsigned int f = fsb / 1000;
110
111 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
112 return (delta < 5);
113}
114
115static int check_powernow(void)
116{
117 struct cpuinfo_x86 *c = cpu_data;
118 unsigned int maxei, eax, ebx, ecx, edx;
119
120 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
121#ifdef MODULE
122 printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n");
123#endif
124 return 0;
125 }
126
127 /* Get maximum capabilities */
128 maxei = cpuid_eax (0x80000000);
129 if (maxei < 0x80000007) { /* Any powernow info ? */
130#ifdef MODULE
131 printk (KERN_INFO PFX "No powernow capabilities detected\n");
132#endif
133 return 0;
134 }
135
136 if ((c->x86_model == 6) && (c->x86_mask == 0)) {
137 printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n");
138 have_a0 = 1;
139 }
140
141 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
142
143 /* Check we can actually do something before we say anything.*/
144 if (!(edx & (1 << 1 | 1 << 2)))
145 return 0;
146
147 printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
148
149 if (edx & 1 << 1) {
150 printk ("frequency");
151 can_scale_bus=1;
152 }
153
154 if ((edx & (1 << 1 | 1 << 2)) == 0x6)
155 printk (" and ");
156
157 if (edx & 1 << 2) {
158 printk ("voltage");
159 can_scale_vid=1;
160 }
161
162 printk (".\n");
163 return 1;
164}
165
166
167static int get_ranges (unsigned char *pst)
168{
169 unsigned int j;
170 unsigned int speed;
171 u8 fid, vid;
172
173 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
174 if (!powernow_table)
175 return -ENOMEM;
176 memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1)));
177
178 for (j=0 ; j < number_scales; j++) {
179 fid = *pst++;
180
181 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
182 powernow_table[j].index = fid; /* lower 8 bits */
183
184 speed = powernow_table[j].frequency;
185
186 if ((fid_codes[fid] % 10)==5) {
187#ifdef CONFIG_X86_POWERNOW_K7_ACPI
188 if (have_a0 == 1)
189 powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID;
190#endif
191 }
192
193 if (speed < minimum_speed)
194 minimum_speed = speed;
195 if (speed > maximum_speed)
196 maximum_speed = speed;
197
198 vid = *pst++;
199 powernow_table[j].index |= (vid << 8); /* upper 8 bits */
200
201 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
202 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
203 fid_codes[fid] % 10, speed/1000, vid,
204 mobile_vid_table[vid]/1000,
205 mobile_vid_table[vid]%1000);
206 }
207 powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
208 powernow_table[number_scales].index = 0;
209
210 return 0;
211}
212
213
214static void change_FID(int fid)
215{
216 union msr_fidvidctl fidvidctl;
217
218 rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
219 if (fidvidctl.bits.FID != fid) {
220 fidvidctl.bits.SGTC = latency;
221 fidvidctl.bits.FID = fid;
222 fidvidctl.bits.VIDC = 0;
223 fidvidctl.bits.FIDC = 1;
224 wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
225 }
226}
227
228
229static void change_VID(int vid)
230{
231 union msr_fidvidctl fidvidctl;
232
233 rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
234 if (fidvidctl.bits.VID != vid) {
235 fidvidctl.bits.SGTC = latency;
236 fidvidctl.bits.VID = vid;
237 fidvidctl.bits.FIDC = 0;
238 fidvidctl.bits.VIDC = 1;
239 wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
240 }
241}
242
243
244static void change_speed (unsigned int index)
245{
246 u8 fid, vid;
247 struct cpufreq_freqs freqs;
248 union msr_fidvidstatus fidvidstatus;
249 int cfid;
250
251 /* fid are the lower 8 bits of the index we stored into
252 * the cpufreq frequency table in powernow_decode_bios,
253 * vid are the upper 8 bits.
254 */
255
256 fid = powernow_table[index].index & 0xFF;
257 vid = (powernow_table[index].index & 0xFF00) >> 8;
258
259 freqs.cpu = 0;
260
261 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
262 cfid = fidvidstatus.bits.CFID;
263 freqs.old = fsb * fid_codes[cfid] / 10;
264
265 freqs.new = powernow_table[index].frequency;
266
267 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
268
269 /* Now do the magic poking into the MSRs. */
270
271 if (have_a0 == 1) /* A0 errata 5 */
272 local_irq_disable();
273
274 if (freqs.old > freqs.new) {
275 /* Going down, so change FID first */
276 change_FID(fid);
277 change_VID(vid);
278 } else {
279 /* Going up, so change VID first */
280 change_VID(vid);
281 change_FID(fid);
282 }
283
284
285 if (have_a0 == 1)
286 local_irq_enable();
287
288 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
289}
290
291
292#ifdef CONFIG_X86_POWERNOW_K7_ACPI
293
294static struct acpi_processor_performance *acpi_processor_perf;
295
296static int powernow_acpi_init(void)
297{
298 int i;
299 int retval = 0;
300 union powernow_acpi_control_t pc;
301
302 if (acpi_processor_perf != NULL && powernow_table != NULL) {
303 retval = -EINVAL;
304 goto err0;
305 }
306
307 acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance),
308 GFP_KERNEL);
309
310 if (!acpi_processor_perf) {
311 retval = -ENOMEM;
312 goto err0;
313 }
314
315 memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance));
316
317 if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
318 retval = -EIO;
319 goto err1;
320 }
321
322 if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
323 retval = -ENODEV;
324 goto err2;
325 }
326
327 if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
328 retval = -ENODEV;
329 goto err2;
330 }
331
332 number_scales = acpi_processor_perf->state_count;
333
334 if (number_scales < 2) {
335 retval = -ENODEV;
336 goto err2;
337 }
338
339 powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
340 if (!powernow_table) {
341 retval = -ENOMEM;
342 goto err2;
343 }
344
345 memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table)));
346
347 pc.val = (unsigned long) acpi_processor_perf->states[0].control;
348 for (i = 0; i < number_scales; i++) {
349 u8 fid, vid;
350 unsigned int speed;
351
352 pc.val = (unsigned long) acpi_processor_perf->states[i].control;
353 dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
354 i,
355 (u32) acpi_processor_perf->states[i].core_frequency,
356 (u32) acpi_processor_perf->states[i].power,
357 (u32) acpi_processor_perf->states[i].transition_latency,
358 (u32) acpi_processor_perf->states[i].control,
359 pc.bits.sgtc);
360
361 vid = pc.bits.vid;
362 fid = pc.bits.fid;
363
364 powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
365 powernow_table[i].index = fid; /* lower 8 bits */
366 powernow_table[i].index |= (vid << 8); /* upper 8 bits */
367
368 speed = powernow_table[i].frequency;
369
370 if ((fid_codes[fid] % 10)==5) {
371 if (have_a0 == 1)
372 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
373 }
374
375 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
376 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
377 fid_codes[fid] % 10, speed/1000, vid,
378 mobile_vid_table[vid]/1000,
379 mobile_vid_table[vid]%1000);
380
381 if (latency < pc.bits.sgtc)
382 latency = pc.bits.sgtc;
383
384 if (speed < minimum_speed)
385 minimum_speed = speed;
386 if (speed > maximum_speed)
387 maximum_speed = speed;
388 }
389
390 powernow_table[i].frequency = CPUFREQ_TABLE_END;
391 powernow_table[i].index = 0;
392
393 /* notify BIOS that we exist */
394 acpi_processor_notify_smm(THIS_MODULE);
395
396 return 0;
397
398err2:
399 acpi_processor_unregister_performance(acpi_processor_perf, 0);
400err1:
401 kfree(acpi_processor_perf);
402err0:
403 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
404 acpi_processor_perf = NULL;
405 return retval;
406}
407#else
408static int powernow_acpi_init(void)
409{
410 printk(KERN_INFO PFX "no support for ACPI processor found."
411 " Please recompile your kernel with ACPI processor\n");
412 return -EINVAL;
413}
414#endif
415
416static int powernow_decode_bios (int maxfid, int startvid)
417{
418 struct psb_s *psb;
419 struct pst_s *pst;
420 unsigned int i, j;
421 unsigned char *p;
422 unsigned int etuple;
423 unsigned int ret;
424
425 etuple = cpuid_eax(0x80000001);
426
427 for (i=0xC0000; i < 0xffff0 ; i+=16) {
428
429 p = phys_to_virt(i);
430
431 if (memcmp(p, "AMDK7PNOW!", 10) == 0){
432 dprintk ("Found PSB header at %p\n", p);
433 psb = (struct psb_s *) p;
434 dprintk ("Table version: 0x%x\n", psb->tableversion);
435 if (psb->tableversion != 0x12) {
436 printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n");
437 return -ENODEV;
438 }
439
440 dprintk ("Flags: 0x%x\n", psb->flags);
441 if ((psb->flags & 1)==0) {
442 dprintk ("Mobile voltage regulator\n");
443 } else {
444 dprintk ("Desktop voltage regulator\n");
445 }
446
447 latency = psb->settlingtime;
448 if (latency < 100) {
449 printk (KERN_INFO PFX "BIOS set settling time to %d microseconds."
450 "Should be at least 100. Correcting.\n", latency);
451 latency = 100;
452 }
453 dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime);
454 dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst);
455
456 p += sizeof (struct psb_s);
457
458 pst = (struct pst_s *) p;
459
460 for (i = 0 ; i <psb->numpst; i++) {
461 pst = (struct pst_s *) p;
462 number_scales = pst->numpstates;
463
464 if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
465 (maxfid==pst->maxfid) && (startvid==pst->startvid))
466 {
467 dprintk ("PST:%d (@%p)\n", i, pst);
468 dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
469 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
470
471 ret = get_ranges ((char *) pst + sizeof (struct pst_s));
472 return ret;
473
474 } else {
475 p = (char *) pst + sizeof (struct pst_s);
476 for (j=0 ; j < number_scales; j++)
477 p+=2;
478 }
479 }
480 printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple);
481 printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n");
482
483 return -EINVAL;
484 }
485 p++;
486 }
487
488 return -ENODEV;
489}
490
491
492static int powernow_target (struct cpufreq_policy *policy,
493 unsigned int target_freq,
494 unsigned int relation)
495{
496 unsigned int newstate;
497
498 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate))
499 return -EINVAL;
500
501 change_speed(newstate);
502
503 return 0;
504}
505
506
507static int powernow_verify (struct cpufreq_policy *policy)
508{
509 return cpufreq_frequency_table_verify(policy, powernow_table);
510}
511
512/*
513 * We use the fact that the bus frequency is somehow
514 * a multiple of 100000/3 khz, then we compute sgtc according
515 * to this multiple.
516 * That way, we match more how AMD thinks all of that work.
517 * We will then get the same kind of behaviour already tested under
518 * the "well-known" other OS.
519 */
520static int __init fixup_sgtc(void)
521{
522 unsigned int sgtc;
523 unsigned int m;
524
525 m = fsb / 3333;
526 if ((m % 10) >= 5)
527 m += 5;
528
529 m /= 10;
530
531 sgtc = 100 * m * latency;
532 sgtc = sgtc / 3;
533 if (sgtc > 0xfffff) {
534 printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
535 sgtc = 0xfffff;
536 }
537 return sgtc;
538}
539
540static unsigned int powernow_get(unsigned int cpu)
541{
542 union msr_fidvidstatus fidvidstatus;
543 unsigned int cfid;
544
545 if (cpu)
546 return 0;
547 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
548 cfid = fidvidstatus.bits.CFID;
549
550 return (fsb * fid_codes[cfid] / 10);
551}
552
553
554static int __init acer_cpufreq_pst(struct dmi_system_id *d)
555{
556 printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
557 printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
558 printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n");
559 return 0;
560}
561
562/*
563 * Some Athlon laptops have really fucked PST tables.
564 * A BIOS update is all that can save them.
565 * Mention this, and disable cpufreq.
566 */
567static struct dmi_system_id __initdata powernow_dmi_table[] = {
568 {
569 .callback = acer_cpufreq_pst,
570 .ident = "Acer Aspire",
571 .matches = {
572 DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
573 DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
574 },
575 },
576 { }
577};
578
579static int __init powernow_cpu_init (struct cpufreq_policy *policy)
580{
581 union msr_fidvidstatus fidvidstatus;
582 int result;
583
584 if (policy->cpu != 0)
585 return -ENODEV;
586
587 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
588
589 /* A K7 with powernow technology is set to max frequency by BIOS */
590 fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.MFID];
591 if (!fsb) {
592 printk(KERN_WARNING PFX "can not determine bus frequency\n");
593 return -EINVAL;
594 }
595 dprintk("FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000);
596
597 if (dmi_check_system(powernow_dmi_table) || acpi_force) {
598 printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n");
599 result = powernow_acpi_init();
600 } else {
601 result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID);
602 if (result) {
603 printk (KERN_INFO PFX "Trying ACPI perflib\n");
604 maximum_speed = 0;
605 minimum_speed = -1;
606 latency = 0;
607 result = powernow_acpi_init();
608 if (result) {
609 printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
610 printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n");
611 }
612 } else {
613 /* SGTC use the bus clock as timer */
614 latency = fixup_sgtc();
615 printk(KERN_INFO PFX "SGTC: %d\n", latency);
616 }
617 }
618
619 if (result)
620 return result;
621
622 printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
623 minimum_speed/1000, maximum_speed/1000);
624
625 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
626
627 policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency);
628
629 policy->cur = powernow_get(0);
630
631 cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
632
633 return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
634}
635
636static int powernow_cpu_exit (struct cpufreq_policy *policy) {
637 cpufreq_frequency_table_put_attr(policy->cpu);
638
639#ifdef CONFIG_X86_POWERNOW_K7_ACPI
640 if (acpi_processor_perf) {
641 acpi_processor_unregister_performance(acpi_processor_perf, 0);
642 kfree(acpi_processor_perf);
643 }
644#endif
645
646 if (powernow_table)
647 kfree(powernow_table);
648
649 return 0;
650}
651
652static struct freq_attr* powernow_table_attr[] = {
653 &cpufreq_freq_attr_scaling_available_freqs,
654 NULL,
655};
656
657static struct cpufreq_driver powernow_driver = {
658 .verify = powernow_verify,
659 .target = powernow_target,
660 .get = powernow_get,
661 .init = powernow_cpu_init,
662 .exit = powernow_cpu_exit,
663 .name = "powernow-k7",
664 .owner = THIS_MODULE,
665 .attr = powernow_table_attr,
666};
667
668static int __init powernow_init (void)
669{
670 if (check_powernow()==0)
671 return -ENODEV;
672 return cpufreq_register_driver(&powernow_driver);
673}
674
675
676static void __exit powernow_exit (void)
677{
678 cpufreq_unregister_driver(&powernow_driver);
679}
680
681module_param(acpi_force, int, 0444);
682MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
683
684MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
685MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
686MODULE_LICENSE ("GPL");
687
688late_initcall(powernow_init);
689module_exit(powernow_exit);
690
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h
new file mode 100644
index 000000000000..f8a63b3664e3
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h
@@ -0,0 +1,44 @@
1/*
2 * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
3 * (C) 2003 Dave Jones.
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 *
7 * AMD-specific information
8 *
9 */
10
11union msr_fidvidctl {
12 struct {
13 unsigned FID:5, // 4:0
14 reserved1:3, // 7:5
15 VID:5, // 12:8
16 reserved2:3, // 15:13
17 FIDC:1, // 16
18 VIDC:1, // 17
19 reserved3:2, // 19:18
20 FIDCHGRATIO:1, // 20
21 reserved4:11, // 31-21
22 SGTC:20, // 32:51
23 reserved5:12; // 63:52
24 } bits;
25 unsigned long long val;
26};
27
28union msr_fidvidstatus {
29 struct {
30 unsigned CFID:5, // 4:0
31 reserved1:3, // 7:5
32 SFID:5, // 12:8
33 reserved2:3, // 15:13
34 MFID:5, // 20:16
35 reserved3:11, // 31:21
36 CVID:5, // 36:32
37 reserved4:3, // 39:37
38 SVID:5, // 44:40
39 reserved5:3, // 47:45
40 MVID:5, // 52:48
41 reserved6:11; // 63:53
42 } bits;
43 unsigned long long val;
44};
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
new file mode 100644
index 000000000000..a65ff7e32e5d
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -0,0 +1,1135 @@
1/*
2 * (c) 2003, 2004 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Support : paul.devriendt@amd.com
8 *
9 * Based on the powernow-k7.c module written by Dave Jones.
10 * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
11 * (C) 2004 Dominik Brodowski <linux@brodo.de>
12 * (C) 2004 Pavel Machek <pavel@suse.cz>
13 * Licensed under the terms of the GNU GPL License version 2.
14 * Based upon datasheets & sample CPUs kindly provided by AMD.
15 *
16 * Valuable input gratefully received from Dave Jones, Pavel Machek,
17 * Dominik Brodowski, and others.
18 * Processor information obtained from Chapter 9 (Power and Thermal Management)
19 * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
20 * Opteron Processors" available for download from www.amd.com
21 *
22 * Tables for specific CPUs can be infrerred from
23 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
24 */
25
26#include <linux/kernel.h>
27#include <linux/smp.h>
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/cpufreq.h>
31#include <linux/slab.h>
32#include <linux/string.h>
33
34#include <asm/msr.h>
35#include <asm/io.h>
36#include <asm/delay.h>
37
38#ifdef CONFIG_X86_POWERNOW_K8_ACPI
39#include <linux/acpi.h>
40#include <acpi/processor.h>
41#endif
42
43#define PFX "powernow-k8: "
44#define BFX PFX "BIOS error: "
45#define VERSION "version 1.00.09e"
46#include "powernow-k8.h"
47
48/* serialize freq changes */
49static DECLARE_MUTEX(fidvid_sem);
50
51static struct powernow_k8_data *powernow_data[NR_CPUS];
52
53/* Return a frequency in MHz, given an input fid */
54static u32 find_freq_from_fid(u32 fid)
55{
56 return 800 + (fid * 100);
57}
58
59/* Return a frequency in KHz, given an input fid */
60static u32 find_khz_freq_from_fid(u32 fid)
61{
62 return 1000 * find_freq_from_fid(fid);
63}
64
65/* Return a voltage in miliVolts, given an input vid */
66static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid)
67{
68 return 1550-vid*25;
69}
70
71/* Return the vco fid for an input fid
72 *
73 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
74 * only from corresponding high fids. This returns "high" fid corresponding to
75 * "low" one.
76 */
77static u32 convert_fid_to_vco_fid(u32 fid)
78{
79 if (fid < HI_FID_TABLE_BOTTOM) {
80 return 8 + (2 * fid);
81 } else {
82 return fid;
83 }
84}
85
86/*
87 * Return 1 if the pending bit is set. Unless we just instructed the processor
88 * to transition to a new state, seeing this bit set is really bad news.
89 */
90static int pending_bit_stuck(void)
91{
92 u32 lo, hi;
93
94 rdmsr(MSR_FIDVID_STATUS, lo, hi);
95 return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
96}
97
98/*
99 * Update the global current fid / vid values from the status msr.
100 * Returns 1 on error.
101 */
102static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
103{
104 u32 lo, hi;
105 u32 i = 0;
106
107 lo = MSR_S_LO_CHANGE_PENDING;
108 while (lo & MSR_S_LO_CHANGE_PENDING) {
109 if (i++ > 0x1000000) {
110 printk(KERN_ERR PFX "detected change pending stuck\n");
111 return 1;
112 }
113 rdmsr(MSR_FIDVID_STATUS, lo, hi);
114 }
115
116 data->currvid = hi & MSR_S_HI_CURRENT_VID;
117 data->currfid = lo & MSR_S_LO_CURRENT_FID;
118
119 return 0;
120}
121
122/* the isochronous relief time */
123static void count_off_irt(struct powernow_k8_data *data)
124{
125 udelay((1 << data->irt) * 10);
126 return;
127}
128
129/* the voltage stabalization time */
130static void count_off_vst(struct powernow_k8_data *data)
131{
132 udelay(data->vstable * VST_UNITS_20US);
133 return;
134}
135
136/* need to init the control msr to a safe value (for each cpu) */
137static void fidvid_msr_init(void)
138{
139 u32 lo, hi;
140 u8 fid, vid;
141
142 rdmsr(MSR_FIDVID_STATUS, lo, hi);
143 vid = hi & MSR_S_HI_CURRENT_VID;
144 fid = lo & MSR_S_LO_CURRENT_FID;
145 lo = fid | (vid << MSR_C_LO_VID_SHIFT);
146 hi = MSR_C_HI_STP_GNT_BENIGN;
147 dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
148 wrmsr(MSR_FIDVID_CTL, lo, hi);
149}
150
151
152/* write the new fid value along with the other control fields to the msr */
153static int write_new_fid(struct powernow_k8_data *data, u32 fid)
154{
155 u32 lo;
156 u32 savevid = data->currvid;
157
158 if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
159 printk(KERN_ERR PFX "internal error - overflow on fid write\n");
160 return 1;
161 }
162
163 lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
164
165 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
166 fid, lo, data->plllock * PLL_LOCK_CONVERSION);
167
168 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
169
170 if (query_current_values_with_pending_wait(data))
171 return 1;
172
173 count_off_irt(data);
174
175 if (savevid != data->currvid) {
176 printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
177 savevid, data->currvid);
178 return 1;
179 }
180
181 if (fid != data->currfid) {
182 printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
183 data->currfid);
184 return 1;
185 }
186
187 return 0;
188}
189
190/* Write a new vid to the hardware */
191static int write_new_vid(struct powernow_k8_data *data, u32 vid)
192{
193 u32 lo;
194 u32 savefid = data->currfid;
195
196 if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
197 printk(KERN_ERR PFX "internal error - overflow on vid write\n");
198 return 1;
199 }
200
201 lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
202
203 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
204 vid, lo, STOP_GRANT_5NS);
205
206 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
207
208 if (query_current_values_with_pending_wait(data))
209 return 1;
210
211 if (savefid != data->currfid) {
212 printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
213 savefid, data->currfid);
214 return 1;
215 }
216
217 if (vid != data->currvid) {
218 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
219 data->currvid);
220 return 1;
221 }
222
223 return 0;
224}
225
226/*
227 * Reduce the vid by the max of step or reqvid.
228 * Decreasing vid codes represent increasing voltages:
229 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off.
230 */
231static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
232{
233 if ((data->currvid - reqvid) > step)
234 reqvid = data->currvid - step;
235
236 if (write_new_vid(data, reqvid))
237 return 1;
238
239 count_off_vst(data);
240
241 return 0;
242}
243
244/* Change the fid and vid, by the 3 phases. */
245static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
246{
247 if (core_voltage_pre_transition(data, reqvid))
248 return 1;
249
250 if (core_frequency_transition(data, reqfid))
251 return 1;
252
253 if (core_voltage_post_transition(data, reqvid))
254 return 1;
255
256 if (query_current_values_with_pending_wait(data))
257 return 1;
258
259 if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
260 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
261 smp_processor_id(),
262 reqfid, reqvid, data->currfid, data->currvid);
263 return 1;
264 }
265
266 dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
267 smp_processor_id(), data->currfid, data->currvid);
268
269 return 0;
270}
271
272/* Phase 1 - core voltage transition ... setup voltage */
273static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
274{
275 u32 rvosteps = data->rvo;
276 u32 savefid = data->currfid;
277
278 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
279 smp_processor_id(),
280 data->currfid, data->currvid, reqvid, data->rvo);
281
282 while (data->currvid > reqvid) {
283 dprintk("ph1: curr 0x%x, req vid 0x%x\n",
284 data->currvid, reqvid);
285 if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
286 return 1;
287 }
288
289 while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) {
290 if (data->currvid == 0) {
291 rvosteps = 0;
292 } else {
293 dprintk("ph1: changing vid for rvo, req 0x%x\n",
294 data->currvid - 1);
295 if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
296 return 1;
297 rvosteps--;
298 }
299 }
300
301 if (query_current_values_with_pending_wait(data))
302 return 1;
303
304 if (savefid != data->currfid) {
305 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
306 return 1;
307 }
308
309 dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
310 data->currfid, data->currvid);
311
312 return 0;
313}
314
315/* Phase 2 - core frequency transition */
316static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
317{
318 u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid;
319
320 if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
321 printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
322 reqfid, data->currfid);
323 return 1;
324 }
325
326 if (data->currfid == reqfid) {
327 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
328 return 0;
329 }
330
331 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
332 smp_processor_id(),
333 data->currfid, data->currvid, reqfid);
334
335 vcoreqfid = convert_fid_to_vco_fid(reqfid);
336 vcocurrfid = convert_fid_to_vco_fid(data->currfid);
337 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
338 : vcoreqfid - vcocurrfid;
339
340 while (vcofiddiff > 2) {
341 if (reqfid > data->currfid) {
342 if (data->currfid > LO_FID_TABLE_TOP) {
343 if (write_new_fid(data, data->currfid + 2)) {
344 return 1;
345 }
346 } else {
347 if (write_new_fid
348 (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
349 return 1;
350 }
351 }
352 } else {
353 if (write_new_fid(data, data->currfid - 2))
354 return 1;
355 }
356
357 vcocurrfid = convert_fid_to_vco_fid(data->currfid);
358 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
359 : vcoreqfid - vcocurrfid;
360 }
361
362 if (write_new_fid(data, reqfid))
363 return 1;
364
365 if (query_current_values_with_pending_wait(data))
366 return 1;
367
368 if (data->currfid != reqfid) {
369 printk(KERN_ERR PFX
370 "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
371 data->currfid, reqfid);
372 return 1;
373 }
374
375 if (savevid != data->currvid) {
376 printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
377 savevid, data->currvid);
378 return 1;
379 }
380
381 dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
382 data->currfid, data->currvid);
383
384 return 0;
385}
386
387/* Phase 3 - core voltage transition flow ... jump to the final vid. */
388static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
389{
390 u32 savefid = data->currfid;
391 u32 savereqvid = reqvid;
392
393 dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
394 smp_processor_id(),
395 data->currfid, data->currvid);
396
397 if (reqvid != data->currvid) {
398 if (write_new_vid(data, reqvid))
399 return 1;
400
401 if (savefid != data->currfid) {
402 printk(KERN_ERR PFX
403 "ph3: bad fid change, save 0x%x, curr 0x%x\n",
404 savefid, data->currfid);
405 return 1;
406 }
407
408 if (data->currvid != reqvid) {
409 printk(KERN_ERR PFX
410 "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
411 reqvid, data->currvid);
412 return 1;
413 }
414 }
415
416 if (query_current_values_with_pending_wait(data))
417 return 1;
418
419 if (savereqvid != data->currvid) {
420 dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
421 return 1;
422 }
423
424 if (savefid != data->currfid) {
425 dprintk("ph3 failed, currfid changed 0x%x\n",
426 data->currfid);
427 return 1;
428 }
429
430 dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
431 data->currfid, data->currvid);
432
433 return 0;
434}
435
436static int check_supported_cpu(unsigned int cpu)
437{
438 cpumask_t oldmask = CPU_MASK_ALL;
439 u32 eax, ebx, ecx, edx;
440 unsigned int rc = 0;
441
442 oldmask = current->cpus_allowed;
443 set_cpus_allowed(current, cpumask_of_cpu(cpu));
444 schedule();
445
446 if (smp_processor_id() != cpu) {
447 printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
448 goto out;
449 }
450
451 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
452 goto out;
453
454 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
455 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
456 ((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
457 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) {
458 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
459 goto out;
460 }
461
462 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
463 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
464 printk(KERN_INFO PFX
465 "No frequency change capabilities detected\n");
466 goto out;
467 }
468
469 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
470 if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
471 printk(KERN_INFO PFX "Power state transitions not supported\n");
472 goto out;
473 }
474
475 rc = 1;
476
477out:
478 set_cpus_allowed(current, oldmask);
479 schedule();
480 return rc;
481
482}
483
484static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
485{
486 unsigned int j;
487 u8 lastfid = 0xff;
488
489 for (j = 0; j < data->numps; j++) {
490 if (pst[j].vid > LEAST_VID) {
491 printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid);
492 return -EINVAL;
493 }
494 if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */
495 printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j);
496 return -ENODEV;
497 }
498 if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */
499 printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j);
500 return -ENODEV;
501 }
502 if ((pst[j].fid > MAX_FID)
503 || (pst[j].fid & 1)
504 || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) {
505 /* Only first fid is allowed to be in "low" range */
506 printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
507 return -EINVAL;
508 }
509 if (pst[j].fid < lastfid)
510 lastfid = pst[j].fid;
511 }
512 if (lastfid & 1) {
513 printk(KERN_ERR PFX "lastfid invalid\n");
514 return -EINVAL;
515 }
516 if (lastfid > LO_FID_TABLE_TOP)
517 printk(KERN_INFO PFX "first fid not from lo freq table\n");
518
519 return 0;
520}
521
522static void print_basics(struct powernow_k8_data *data)
523{
524 int j;
525 for (j = 0; j < data->numps; j++) {
526 if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID)
527 printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j,
528 data->powernow_table[j].index & 0xff,
529 data->powernow_table[j].frequency/1000,
530 data->powernow_table[j].index >> 8,
531 find_millivolts_from_vid(data, data->powernow_table[j].index >> 8));
532 }
533 if (data->batps)
534 printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
535}
536
537static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
538{
539 struct cpufreq_frequency_table *powernow_table;
540 unsigned int j;
541
542 if (data->batps) { /* use ACPI support to get full speed on mains power */
543 printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
544 data->numps = data->batps;
545 }
546
547 for ( j=1; j<data->numps; j++ ) {
548 if (pst[j-1].fid >= pst[j].fid) {
549 printk(KERN_ERR PFX "PST out of sequence\n");
550 return -EINVAL;
551 }
552 }
553
554 if (data->numps < 2) {
555 printk(KERN_ERR PFX "no p states to transition\n");
556 return -ENODEV;
557 }
558
559 if (check_pst_table(data, pst, maxvid))
560 return -EINVAL;
561
562 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
563 * (data->numps + 1)), GFP_KERNEL);
564 if (!powernow_table) {
565 printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
566 return -ENOMEM;
567 }
568
569 for (j = 0; j < data->numps; j++) {
570 powernow_table[j].index = pst[j].fid; /* lower 8 bits */
571 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
572 powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
573 }
574 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
575 powernow_table[data->numps].index = 0;
576
577 if (query_current_values_with_pending_wait(data)) {
578 kfree(powernow_table);
579 return -EIO;
580 }
581
582 dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
583 data->powernow_table = powernow_table;
584 print_basics(data);
585
586 for (j = 0; j < data->numps; j++)
587 if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
588 return 0;
589
590 dprintk("currfid/vid do not match PST, ignoring\n");
591 return 0;
592}
593
594/* Find and validate the PSB/PST table in BIOS. */
595static int find_psb_table(struct powernow_k8_data *data)
596{
597 struct psb_s *psb;
598 unsigned int i;
599 u32 mvs;
600 u8 maxvid;
601 u32 cpst = 0;
602 u32 thiscpuid;
603
604 for (i = 0xc0000; i < 0xffff0; i += 0x10) {
605 /* Scan BIOS looking for the signature. */
606 /* It can not be at ffff0 - it is too big. */
607
608 psb = phys_to_virt(i);
609 if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
610 continue;
611
612 dprintk("found PSB header at 0x%p\n", psb);
613
614 dprintk("table vers: 0x%x\n", psb->tableversion);
615 if (psb->tableversion != PSB_VERSION_1_4) {
616 printk(KERN_INFO BFX "PSB table is not v1.4\n");
617 return -ENODEV;
618 }
619
620 dprintk("flags: 0x%x\n", psb->flags1);
621 if (psb->flags1) {
622 printk(KERN_ERR BFX "unknown flags\n");
623 return -ENODEV;
624 }
625
626 data->vstable = psb->vstable;
627 dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
628
629 dprintk("flags2: 0x%x\n", psb->flags2);
630 data->rvo = psb->flags2 & 3;
631 data->irt = ((psb->flags2) >> 2) & 3;
632 mvs = ((psb->flags2) >> 4) & 3;
633 data->vidmvs = 1 << mvs;
634 data->batps = ((psb->flags2) >> 6) & 3;
635
636 dprintk("ramp voltage offset: %d\n", data->rvo);
637 dprintk("isochronous relief time: %d\n", data->irt);
638 dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
639
640 dprintk("numpst: 0x%x\n", psb->num_tables);
641 cpst = psb->num_tables;
642 if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
643 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
644 if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
645 cpst = 1;
646 }
647 }
648 if (cpst != 1) {
649 printk(KERN_ERR BFX "numpst must be 1\n");
650 return -ENODEV;
651 }
652
653 data->plllock = psb->plllocktime;
654 dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
655 dprintk("maxfid: 0x%x\n", psb->maxfid);
656 dprintk("maxvid: 0x%x\n", psb->maxvid);
657 maxvid = psb->maxvid;
658
659 data->numps = psb->numps;
660 dprintk("numpstates: 0x%x\n", data->numps);
661 return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
662 }
663 /*
664 * If you see this message, complain to BIOS manufacturer. If
665 * he tells you "we do not support Linux" or some similar
666 * nonsense, remember that Windows 2000 uses the same legacy
667 * mechanism that the old Linux PSB driver uses. Tell them it
668 * is broken with Windows 2000.
669 *
670 * The reference to the AMD documentation is chapter 9 in the
671 * BIOS and Kernel Developer's Guide, which is available on
672 * www.amd.com
673 */
674 printk(KERN_ERR PFX "BIOS error - no PSB\n");
675 return -ENODEV;
676}
677
678#ifdef CONFIG_X86_POWERNOW_K8_ACPI
679static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
680{
681 if (!data->acpi_data.state_count)
682 return;
683
684 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
685 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
686 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
687 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
688 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
689}
690
691static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
692{
693 int i;
694 int cntlofreq = 0;
695 struct cpufreq_frequency_table *powernow_table;
696
697 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
698 dprintk("register performance failed\n");
699 return -EIO;
700 }
701
702 /* verify the data contained in the ACPI structures */
703 if (data->acpi_data.state_count <= 1) {
704 dprintk("No ACPI P-States\n");
705 goto err_out;
706 }
707
708 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
709 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
710 dprintk("Invalid control/status registers (%x - %x)\n",
711 data->acpi_data.control_register.space_id,
712 data->acpi_data.status_register.space_id);
713 goto err_out;
714 }
715
716 /* fill in data->powernow_table */
717 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
718 * (data->acpi_data.state_count + 1)), GFP_KERNEL);
719 if (!powernow_table) {
720 dprintk("powernow_table memory alloc failure\n");
721 goto err_out;
722 }
723
724 for (i = 0; i < data->acpi_data.state_count; i++) {
725 u32 fid = data->acpi_data.states[i].control & FID_MASK;
726 u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
727
728 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
729
730 powernow_table[i].index = fid; /* lower 8 bits */
731 powernow_table[i].index |= (vid << 8); /* upper 8 bits */
732 powernow_table[i].frequency = find_khz_freq_from_fid(fid);
733
734 /* verify frequency is OK */
735 if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
736 (powernow_table[i].frequency < (MIN_FREQ * 1000))) {
737 dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
738 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
739 continue;
740 }
741
742 /* verify voltage is OK - BIOSs are using "off" to indicate invalid */
743 if (vid == 0x1f) {
744 dprintk("invalid vid %u, ignoring\n", vid);
745 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
746 continue;
747 }
748
749 if (fid < HI_FID_TABLE_BOTTOM) {
750 if (cntlofreq) {
751 /* if both entries are the same, ignore this
752 * one...
753 */
754 if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
755 (powernow_table[i].index != powernow_table[cntlofreq].index)) {
756 printk(KERN_ERR PFX "Too many lo freq table entries\n");
757 goto err_out_mem;
758 }
759
760 dprintk("double low frequency table entry, ignoring it.\n");
761 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
762 continue;
763 } else
764 cntlofreq = i;
765 }
766
767 if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
768 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
769 powernow_table[i].frequency,
770 (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
771 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
772 continue;
773 }
774 }
775
776 powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
777 powernow_table[data->acpi_data.state_count].index = 0;
778 data->powernow_table = powernow_table;
779
780 /* fill in data */
781 data->numps = data->acpi_data.state_count;
782 print_basics(data);
783 powernow_k8_acpi_pst_values(data, 0);
784
785 /* notify BIOS that we exist */
786 acpi_processor_notify_smm(THIS_MODULE);
787
788 return 0;
789
790err_out_mem:
791 kfree(powernow_table);
792
793err_out:
794 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
795
796 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
797 data->acpi_data.state_count = 0;
798
799 return -ENODEV;
800}
801
802static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
803{
804 if (data->acpi_data.state_count)
805 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
806}
807
808#else
809static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
810static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
811static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
812#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
813
814/* Take a frequency, and issue the fid/vid transition command */
815static int transition_frequency(struct powernow_k8_data *data, unsigned int index)
816{
817 u32 fid;
818 u32 vid;
819 int res;
820 struct cpufreq_freqs freqs;
821
822 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
823
824 /* fid are the lower 8 bits of the index we stored into
825 * the cpufreq frequency table in find_psb_table, vid are
826 * the upper 8 bits.
827 */
828
829 fid = data->powernow_table[index].index & 0xFF;
830 vid = (data->powernow_table[index].index & 0xFF00) >> 8;
831
832 dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
833
834 if (query_current_values_with_pending_wait(data))
835 return 1;
836
837 if ((data->currvid == vid) && (data->currfid == fid)) {
838 dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
839 fid, vid);
840 return 0;
841 }
842
843 if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
844 printk("ignoring illegal change in lo freq table-%x to 0x%x\n",
845 data->currfid, fid);
846 return 1;
847 }
848
849 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
850 smp_processor_id(), fid, vid);
851
852 freqs.cpu = data->cpu;
853
854 freqs.old = find_khz_freq_from_fid(data->currfid);
855 freqs.new = find_khz_freq_from_fid(fid);
856 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
857
858 down(&fidvid_sem);
859 res = transition_fid_vid(data, fid, vid);
860 up(&fidvid_sem);
861
862 freqs.new = find_khz_freq_from_fid(data->currfid);
863 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
864
865 return res;
866}
867
868/* Driver entry point to switch to the target frequency */
869static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
870{
871 cpumask_t oldmask = CPU_MASK_ALL;
872 struct powernow_k8_data *data = powernow_data[pol->cpu];
873 u32 checkfid = data->currfid;
874 u32 checkvid = data->currvid;
875 unsigned int newstate;
876 int ret = -EIO;
877
878 /* only run on specific CPU from here on */
879 oldmask = current->cpus_allowed;
880 set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
881 schedule();
882
883 if (smp_processor_id() != pol->cpu) {
884 printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
885 goto err_out;
886 }
887
888 if (pending_bit_stuck()) {
889 printk(KERN_ERR PFX "failing targ, change pending bit set\n");
890 goto err_out;
891 }
892
893 dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
894 pol->cpu, targfreq, pol->min, pol->max, relation);
895
896 if (query_current_values_with_pending_wait(data)) {
897 ret = -EIO;
898 goto err_out;
899 }
900
901 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
902 data->currfid, data->currvid);
903
904 if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
905 printk(KERN_ERR PFX
906 "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n",
907 checkfid, data->currfid, checkvid, data->currvid);
908 }
909
910 if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
911 goto err_out;
912
913 powernow_k8_acpi_pst_values(data, newstate);
914
915 if (transition_frequency(data, newstate)) {
916 printk(KERN_ERR PFX "transition frequency failed\n");
917 ret = 1;
918 goto err_out;
919 }
920
921 pol->cur = find_khz_freq_from_fid(data->currfid);
922 ret = 0;
923
924err_out:
925 set_cpus_allowed(current, oldmask);
926 schedule();
927
928 return ret;
929}
930
931/* Driver entry point to verify the policy and range of frequencies */
932static int powernowk8_verify(struct cpufreq_policy *pol)
933{
934 struct powernow_k8_data *data = powernow_data[pol->cpu];
935
936 return cpufreq_frequency_table_verify(pol, data->powernow_table);
937}
938
939/* per CPU init entry point to the driver */
940static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
941{
942 struct powernow_k8_data *data;
943 cpumask_t oldmask = CPU_MASK_ALL;
944 int rc;
945
946 if (!check_supported_cpu(pol->cpu))
947 return -ENODEV;
948
949 data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
950 if (!data) {
951 printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
952 return -ENOMEM;
953 }
954 memset(data,0,sizeof(struct powernow_k8_data));
955
956 data->cpu = pol->cpu;
957
958 if (powernow_k8_cpu_init_acpi(data)) {
959 /*
960 * Use the PSB BIOS structure. This is only availabe on
961 * an UP version, and is deprecated by AMD.
962 */
963
964 if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) {
965 printk(KERN_INFO PFX "MP systems not supported by PSB BIOS structure\n");
966 kfree(data);
967 return -ENODEV;
968 }
969 if (pol->cpu != 0) {
970 printk(KERN_ERR PFX "init not cpu 0\n");
971 kfree(data);
972 return -ENODEV;
973 }
974 rc = find_psb_table(data);
975 if (rc) {
976 kfree(data);
977 return -ENODEV;
978 }
979 }
980
981 /* only run on specific CPU from here on */
982 oldmask = current->cpus_allowed;
983 set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
984 schedule();
985
986 if (smp_processor_id() != pol->cpu) {
987 printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
988 goto err_out;
989 }
990
991 if (pending_bit_stuck()) {
992 printk(KERN_ERR PFX "failing init, change pending bit set\n");
993 goto err_out;
994 }
995
996 if (query_current_values_with_pending_wait(data))
997 goto err_out;
998
999 fidvid_msr_init();
1000
1001 /* run on any CPU again */
1002 set_cpus_allowed(current, oldmask);
1003 schedule();
1004
1005 pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
1006
1007 /* Take a crude guess here.
1008 * That guess was in microseconds, so multiply with 1000 */
1009 pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
1010 + (3 * (1 << data->irt) * 10)) * 1000;
1011
1012 pol->cur = find_khz_freq_from_fid(data->currfid);
1013 dprintk("policy current frequency %d kHz\n", pol->cur);
1014
1015 /* min/max the cpu is capable of */
1016 if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
1017 printk(KERN_ERR PFX "invalid powernow_table\n");
1018 powernow_k8_cpu_exit_acpi(data);
1019 kfree(data->powernow_table);
1020 kfree(data);
1021 return -EINVAL;
1022 }
1023
1024 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1025
1026 printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1027 data->currfid, data->currvid);
1028
1029 powernow_data[pol->cpu] = data;
1030
1031 return 0;
1032
1033err_out:
1034 set_cpus_allowed(current, oldmask);
1035 schedule();
1036 powernow_k8_cpu_exit_acpi(data);
1037
1038 kfree(data);
1039 return -ENODEV;
1040}
1041
1042static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
1043{
1044 struct powernow_k8_data *data = powernow_data[pol->cpu];
1045
1046 if (!data)
1047 return -EINVAL;
1048
1049 powernow_k8_cpu_exit_acpi(data);
1050
1051 cpufreq_frequency_table_put_attr(pol->cpu);
1052
1053 kfree(data->powernow_table);
1054 kfree(data);
1055
1056 return 0;
1057}
1058
1059static unsigned int powernowk8_get (unsigned int cpu)
1060{
1061 struct powernow_k8_data *data = powernow_data[cpu];
1062 cpumask_t oldmask = current->cpus_allowed;
1063 unsigned int khz = 0;
1064
1065 set_cpus_allowed(current, cpumask_of_cpu(cpu));
1066 if (smp_processor_id() != cpu) {
1067 printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
1068 set_cpus_allowed(current, oldmask);
1069 return 0;
1070 }
1071 preempt_disable();
1072
1073 if (query_current_values_with_pending_wait(data))
1074 goto out;
1075
1076 khz = find_khz_freq_from_fid(data->currfid);
1077
1078 out:
1079 preempt_enable_no_resched();
1080 set_cpus_allowed(current, oldmask);
1081
1082 return khz;
1083}
1084
1085static struct freq_attr* powernow_k8_attr[] = {
1086 &cpufreq_freq_attr_scaling_available_freqs,
1087 NULL,
1088};
1089
1090static struct cpufreq_driver cpufreq_amd64_driver = {
1091 .verify = powernowk8_verify,
1092 .target = powernowk8_target,
1093 .init = powernowk8_cpu_init,
1094 .exit = __devexit_p(powernowk8_cpu_exit),
1095 .get = powernowk8_get,
1096 .name = "powernow-k8",
1097 .owner = THIS_MODULE,
1098 .attr = powernow_k8_attr,
1099};
1100
1101/* driver entry point for init */
1102static int __init powernowk8_init(void)
1103{
1104 unsigned int i, supported_cpus = 0;
1105
1106 for (i=0; i<NR_CPUS; i++) {
1107 if (!cpu_online(i))
1108 continue;
1109 if (check_supported_cpu(i))
1110 supported_cpus++;
1111 }
1112
1113 if (supported_cpus == num_online_cpus()) {
1114 printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n",
1115 supported_cpus);
1116 return cpufreq_register_driver(&cpufreq_amd64_driver);
1117 }
1118
1119 return -ENODEV;
1120}
1121
1122/* driver entry point for term */
1123static void __exit powernowk8_exit(void)
1124{
1125 dprintk("exit\n");
1126
1127 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1128}
1129
1130MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com>");
1131MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1132MODULE_LICENSE("GPL");
1133
1134late_initcall(powernowk8_init);
1135module_exit(powernowk8_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
new file mode 100644
index 000000000000..63ebc8470f52
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -0,0 +1,176 @@
1/*
2 * (c) 2003, 2004 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 */
7
8struct powernow_k8_data {
9 unsigned int cpu;
10
11 u32 numps; /* number of p-states */
12 u32 batps; /* number of p-states supported on battery */
13
14 /* these values are constant when the PSB is used to determine
15 * vid/fid pairings, but are modified during the ->target() call
16 * when ACPI is used */
17 u32 rvo; /* ramp voltage offset */
18 u32 irt; /* isochronous relief time */
19 u32 vidmvs; /* usable value calculated from mvs */
20 u32 vstable; /* voltage stabilization time, units 20 us */
21 u32 plllock; /* pll lock time, units 1 us */
22
23 /* keep track of the current fid / vid */
24 u32 currvid, currfid;
25
26 /* the powernow_table includes all frequency and vid/fid pairings:
27 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
28 * frequency is in kHz */
29 struct cpufreq_frequency_table *powernow_table;
30
31#ifdef CONFIG_X86_POWERNOW_K8_ACPI
32 /* the acpi table needs to be kept. it's only available if ACPI was
33 * used to determine valid frequency/vid/fid states */
34 struct acpi_processor_performance acpi_data;
35#endif
36};
37
38
39/* processor's cpuid instruction support */
40#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */
41#define CPUID_XFAM 0x0ff00000 /* extended family */
42#define CPUID_XFAM_K8 0
43#define CPUID_XMOD 0x000f0000 /* extended model */
44#define CPUID_XMOD_REV_E 0x00020000
45#define CPUID_USE_XFAM_XMOD 0x00000f00
46#define CPUID_GET_MAX_CAPABILITIES 0x80000000
47#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
48#define P_STATE_TRANSITION_CAPABLE 6
49
50/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */
51/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */
52/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
53/* the register number is placed in ecx, and the data is returned in edx:eax. */
54
55#define MSR_FIDVID_CTL 0xc0010041
56#define MSR_FIDVID_STATUS 0xc0010042
57
58/* Field definitions within the FID VID Low Control MSR : */
59#define MSR_C_LO_INIT_FID_VID 0x00010000
60#define MSR_C_LO_NEW_VID 0x00001f00
61#define MSR_C_LO_NEW_FID 0x0000002f
62#define MSR_C_LO_VID_SHIFT 8
63
64/* Field definitions within the FID VID High Control MSR : */
65#define MSR_C_HI_STP_GNT_TO 0x000fffff
66
67/* Field definitions within the FID VID Low Status MSR : */
68#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
69#define MSR_S_LO_MAX_RAMP_VID 0x1f000000
70#define MSR_S_LO_MAX_FID 0x003f0000
71#define MSR_S_LO_START_FID 0x00003f00
72#define MSR_S_LO_CURRENT_FID 0x0000003f
73
74/* Field definitions within the FID VID High Status MSR : */
75#define MSR_S_HI_MAX_WORKING_VID 0x001f0000
76#define MSR_S_HI_START_VID 0x00001f00
77#define MSR_S_HI_CURRENT_VID 0x0000001f
78#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
79
80/*
81 * There are restrictions frequencies have to follow:
82 * - only 1 entry in the low fid table ( <=1.4GHz )
83 * - lowest entry in the high fid table must be >= 2 * the entry in the
84 * low fid table
85 * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
86 * in the low fid table
87 * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid
88 * - lowest frequency must be >= interprocessor hypertransport link speed
89 * (only applies to MP systems obviously)
90 */
91
92/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
93#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */
94#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */
95
96#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */
97#define HI_VCOFREQ_TABLE_BOTTOM 1600
98
99#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */
100
101#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */
102#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */
103
104#define MIN_FREQ 800 /* Min and max freqs, per spec */
105#define MAX_FREQ 5000
106
107#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */
108#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */
109
110#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
111
112#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
113
114#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */
115#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */
116
117/*
118 * Most values of interest are enocoded in a single field of the _PSS
119 * entries: the "control" value.
120 */
121
122#define IRT_SHIFT 30
123#define RVO_SHIFT 28
124#define PLL_L_SHIFT 20
125#define MVS_SHIFT 18
126#define VST_SHIFT 11
127#define VID_SHIFT 6
128#define IRT_MASK 3
129#define RVO_MASK 3
130#define PLL_L_MASK 0x7f
131#define MVS_MASK 3
132#define VST_MASK 0x7f
133#define VID_MASK 0x1f
134#define FID_MASK 0x3f
135
136
137/*
138 * Version 1.4 of the PSB table. This table is constructed by BIOS and is
139 * to tell the OS's power management driver which VIDs and FIDs are
140 * supported by this particular processor.
141 * If the data in the PSB / PST is wrong, then this driver will program the
142 * wrong values into hardware, which is very likely to lead to a crash.
143 */
144
145#define PSB_ID_STRING "AMDK7PNOW!"
146#define PSB_ID_STRING_LEN 10
147
148#define PSB_VERSION_1_4 0x14
149
150struct psb_s {
151 u8 signature[10];
152 u8 tableversion;
153 u8 flags1;
154 u16 vstable;
155 u8 flags2;
156 u8 num_tables;
157 u32 cpuid;
158 u8 plllocktime;
159 u8 maxfid;
160 u8 maxvid;
161 u8 numps;
162};
163
164/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
165struct pst_s {
166 u8 fid;
167 u8 vid;
168};
169
170#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
171
172static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid);
173static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
174static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
175
176static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
new file mode 100644
index 000000000000..07d5612dc00f
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -0,0 +1,715 @@
1/*
2 * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
3 * M (part of the Centrino chipset).
4 *
5 * Despite the "SpeedStep" in the name, this is almost entirely unlike
6 * traditional SpeedStep.
7 *
8 * Modelled on speedstep.c
9 *
10 * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
11 *
12 * WARNING WARNING WARNING
13 *
14 * This driver manipulates the PERF_CTL MSR, which is only somewhat
15 * documented. While it seems to work on my laptop, it has not been
16 * tested anywhere else, and it may not work for you, do strange
17 * things or simply crash.
18 */
19
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/cpufreq.h>
24#include <linux/config.h>
25#include <linux/delay.h>
26#include <linux/compiler.h>
27
28#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
29#include <linux/acpi.h>
30#include <acpi/processor.h>
31#endif
32
33#include <asm/msr.h>
34#include <asm/processor.h>
35#include <asm/cpufeature.h>
36
37#include "speedstep-est-common.h"
38
39#define PFX "speedstep-centrino: "
40#define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>"
41
42#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
43
44
45struct cpu_id
46{
47 __u8 x86; /* CPU family */
48 __u8 x86_model; /* model */
49 __u8 x86_mask; /* stepping */
50};
51
52enum {
53 CPU_BANIAS,
54 CPU_DOTHAN_A1,
55 CPU_DOTHAN_A2,
56 CPU_DOTHAN_B0,
57};
58
59static const struct cpu_id cpu_ids[] = {
60 [CPU_BANIAS] = { 6, 9, 5 },
61 [CPU_DOTHAN_A1] = { 6, 13, 1 },
62 [CPU_DOTHAN_A2] = { 6, 13, 2 },
63 [CPU_DOTHAN_B0] = { 6, 13, 6 },
64};
65#define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0]))
66
67struct cpu_model
68{
69 const struct cpu_id *cpu_id;
70 const char *model_name;
71 unsigned max_freq; /* max clock in kHz */
72
73 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
74};
75static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x);
76
77/* Operating points for current CPU */
78static struct cpu_model *centrino_model[NR_CPUS];
79static const struct cpu_id *centrino_cpu[NR_CPUS];
80
81static struct cpufreq_driver centrino_driver;
82
83#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
84
85/* Computes the correct form for IA32_PERF_CTL MSR for a particular
86 frequency/voltage operating point; frequency in MHz, volts in mV.
87 This is stored as "index" in the structure. */
88#define OP(mhz, mv) \
89 { \
90 .frequency = (mhz) * 1000, \
91 .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \
92 }
93
94/*
95 * These voltage tables were derived from the Intel Pentium M
96 * datasheet, document 25261202.pdf, Table 5. I have verified they
97 * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
98 * M.
99 */
100
101/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
102static struct cpufreq_frequency_table banias_900[] =
103{
104 OP(600, 844),
105 OP(800, 988),
106 OP(900, 1004),
107 { .frequency = CPUFREQ_TABLE_END }
108};
109
110/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
111static struct cpufreq_frequency_table banias_1000[] =
112{
113 OP(600, 844),
114 OP(800, 972),
115 OP(900, 988),
116 OP(1000, 1004),
117 { .frequency = CPUFREQ_TABLE_END }
118};
119
120/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
121static struct cpufreq_frequency_table banias_1100[] =
122{
123 OP( 600, 956),
124 OP( 800, 1020),
125 OP( 900, 1100),
126 OP(1000, 1164),
127 OP(1100, 1180),
128 { .frequency = CPUFREQ_TABLE_END }
129};
130
131
132/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
133static struct cpufreq_frequency_table banias_1200[] =
134{
135 OP( 600, 956),
136 OP( 800, 1004),
137 OP( 900, 1020),
138 OP(1000, 1100),
139 OP(1100, 1164),
140 OP(1200, 1180),
141 { .frequency = CPUFREQ_TABLE_END }
142};
143
144/* Intel Pentium M processor 1.30GHz (Banias) */
145static struct cpufreq_frequency_table banias_1300[] =
146{
147 OP( 600, 956),
148 OP( 800, 1260),
149 OP(1000, 1292),
150 OP(1200, 1356),
151 OP(1300, 1388),
152 { .frequency = CPUFREQ_TABLE_END }
153};
154
155/* Intel Pentium M processor 1.40GHz (Banias) */
156static struct cpufreq_frequency_table banias_1400[] =
157{
158 OP( 600, 956),
159 OP( 800, 1180),
160 OP(1000, 1308),
161 OP(1200, 1436),
162 OP(1400, 1484),
163 { .frequency = CPUFREQ_TABLE_END }
164};
165
166/* Intel Pentium M processor 1.50GHz (Banias) */
167static struct cpufreq_frequency_table banias_1500[] =
168{
169 OP( 600, 956),
170 OP( 800, 1116),
171 OP(1000, 1228),
172 OP(1200, 1356),
173 OP(1400, 1452),
174 OP(1500, 1484),
175 { .frequency = CPUFREQ_TABLE_END }
176};
177
178/* Intel Pentium M processor 1.60GHz (Banias) */
179static struct cpufreq_frequency_table banias_1600[] =
180{
181 OP( 600, 956),
182 OP( 800, 1036),
183 OP(1000, 1164),
184 OP(1200, 1276),
185 OP(1400, 1420),
186 OP(1600, 1484),
187 { .frequency = CPUFREQ_TABLE_END }
188};
189
190/* Intel Pentium M processor 1.70GHz (Banias) */
191static struct cpufreq_frequency_table banias_1700[] =
192{
193 OP( 600, 956),
194 OP( 800, 1004),
195 OP(1000, 1116),
196 OP(1200, 1228),
197 OP(1400, 1308),
198 OP(1700, 1484),
199 { .frequency = CPUFREQ_TABLE_END }
200};
201#undef OP
202
203#define _BANIAS(cpuid, max, name) \
204{ .cpu_id = cpuid, \
205 .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \
206 .max_freq = (max)*1000, \
207 .op_points = banias_##max, \
208}
209#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
210
211/* CPU models, their operating frequency range, and freq/voltage
212 operating points */
213static struct cpu_model models[] =
214{
215 _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
216 BANIAS(1000),
217 BANIAS(1100),
218 BANIAS(1200),
219 BANIAS(1300),
220 BANIAS(1400),
221 BANIAS(1500),
222 BANIAS(1600),
223 BANIAS(1700),
224
225 /* NULL model_name is a wildcard */
226 { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
227 { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
228 { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
229
230 { NULL, }
231};
232#undef _BANIAS
233#undef BANIAS
234
235static int centrino_cpu_init_table(struct cpufreq_policy *policy)
236{
237 struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
238 struct cpu_model *model;
239
240 for(model = models; model->cpu_id != NULL; model++)
241 if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
242 (model->model_name == NULL ||
243 strcmp(cpu->x86_model_id, model->model_name) == 0))
244 break;
245
246 if (model->cpu_id == NULL) {
247 /* No match at all */
248 dprintk(KERN_INFO PFX "no support for CPU model \"%s\": "
249 "send /proc/cpuinfo to " MAINTAINER "\n",
250 cpu->x86_model_id);
251 return -ENOENT;
252 }
253
254 if (model->op_points == NULL) {
255 /* Matched a non-match */
256 dprintk(KERN_INFO PFX "no table support for CPU model \"%s\": \n",
257 cpu->x86_model_id);
258#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
259 dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
260#endif
261 return -ENOENT;
262 }
263
264 centrino_model[policy->cpu] = model;
265
266 dprintk("found \"%s\": max frequency: %dkHz\n",
267 model->model_name, model->max_freq);
268
269 return 0;
270}
271
272#else
273static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; }
274#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
275
276static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x)
277{
278 if ((c->x86 == x->x86) &&
279 (c->x86_model == x->x86_model) &&
280 (c->x86_mask == x->x86_mask))
281 return 1;
282 return 0;
283}
284
285/* To be called only after centrino_model is initialized */
286static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
287{
288 int i;
289
290 /*
291 * Extract clock in kHz from PERF_CTL value
292 * for centrino, as some DSDTs are buggy.
293 * Ideally, this can be done using the acpi_data structure.
294 */
295 if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) ||
296 (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) ||
297 (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) {
298 msr = (msr >> 8) & 0xff;
299 return msr * 100000;
300 }
301
302 if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points))
303 return 0;
304
305 msr &= 0xffff;
306 for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) {
307 if (msr == centrino_model[cpu]->op_points[i].index)
308 return centrino_model[cpu]->op_points[i].frequency;
309 }
310 if (failsafe)
311 return centrino_model[cpu]->op_points[i-1].frequency;
312 else
313 return 0;
314}
315
316/* Return the current CPU frequency in kHz */
317static unsigned int get_cur_freq(unsigned int cpu)
318{
319 unsigned l, h;
320 unsigned clock_freq;
321 cpumask_t saved_mask;
322
323 saved_mask = current->cpus_allowed;
324 set_cpus_allowed(current, cpumask_of_cpu(cpu));
325 if (smp_processor_id() != cpu)
326 return 0;
327
328 rdmsr(MSR_IA32_PERF_STATUS, l, h);
329 clock_freq = extract_clock(l, cpu, 0);
330
331 if (unlikely(clock_freq == 0)) {
332 /*
333 * On some CPUs, we can see transient MSR values (which are
334 * not present in _PSS), while CPU is doing some automatic
335 * P-state transition (like TM2). Get the last freq set
336 * in PERF_CTL.
337 */
338 rdmsr(MSR_IA32_PERF_CTL, l, h);
339 clock_freq = extract_clock(l, cpu, 1);
340 }
341
342 set_cpus_allowed(current, saved_mask);
343 return clock_freq;
344}
345
346
347#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
348
349static struct acpi_processor_performance p;
350
351/*
352 * centrino_cpu_init_acpi - register with ACPI P-States library
353 *
354 * Register with the ACPI P-States library (part of drivers/acpi/processor.c)
355 * in order to determine correct frequency and voltage pairings by reading
356 * the _PSS of the ACPI DSDT or SSDT tables.
357 */
358static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
359{
360 union acpi_object arg0 = {ACPI_TYPE_BUFFER};
361 u32 arg0_buf[3];
362 struct acpi_object_list arg_list = {1, &arg0};
363 unsigned long cur_freq;
364 int result = 0, i;
365 unsigned int cpu = policy->cpu;
366
367 /* _PDC settings */
368 arg0.buffer.length = 12;
369 arg0.buffer.pointer = (u8 *) arg0_buf;
370 arg0_buf[0] = ACPI_PDC_REVISION_ID;
371 arg0_buf[1] = 1;
372 arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR;
373
374 p.pdc = &arg_list;
375
376 /* register with ACPI core */
377 if (acpi_processor_register_performance(&p, cpu)) {
378 dprintk(KERN_INFO PFX "obtaining ACPI data failed\n");
379 return -EIO;
380 }
381
382 /* verify the acpi_data */
383 if (p.state_count <= 1) {
384 dprintk("No P-States\n");
385 result = -ENODEV;
386 goto err_unreg;
387 }
388
389 if ((p.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
390 (p.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
391 dprintk("Invalid control/status registers (%x - %x)\n",
392 p.control_register.space_id, p.status_register.space_id);
393 result = -EIO;
394 goto err_unreg;
395 }
396
397 for (i=0; i<p.state_count; i++) {
398 if (p.states[i].control != p.states[i].status) {
399 dprintk("Different control (%x) and status values (%x)\n",
400 p.states[i].control, p.states[i].status);
401 result = -EINVAL;
402 goto err_unreg;
403 }
404
405 if (!p.states[i].core_frequency) {
406 dprintk("Zero core frequency for state %u\n", i);
407 result = -EINVAL;
408 goto err_unreg;
409 }
410
411 if (p.states[i].core_frequency > p.states[0].core_frequency) {
412 dprintk("P%u has larger frequency (%u) than P0 (%u), skipping\n", i,
413 p.states[i].core_frequency, p.states[0].core_frequency);
414 p.states[i].core_frequency = 0;
415 continue;
416 }
417 }
418
419 centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL);
420 if (!centrino_model[cpu]) {
421 result = -ENOMEM;
422 goto err_unreg;
423 }
424 memset(centrino_model[cpu], 0, sizeof(struct cpu_model));
425
426 centrino_model[cpu]->model_name=NULL;
427 centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;
428 centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) *
429 (p.state_count + 1), GFP_KERNEL);
430 if (!centrino_model[cpu]->op_points) {
431 result = -ENOMEM;
432 goto err_kfree;
433 }
434
435 for (i=0; i<p.state_count; i++) {
436 centrino_model[cpu]->op_points[i].index = p.states[i].control;
437 centrino_model[cpu]->op_points[i].frequency = p.states[i].core_frequency * 1000;
438 dprintk("adding state %i with frequency %u and control value %04x\n",
439 i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index);
440 }
441 centrino_model[cpu]->op_points[p.state_count].frequency = CPUFREQ_TABLE_END;
442
443 cur_freq = get_cur_freq(cpu);
444
445 for (i=0; i<p.state_count; i++) {
446 if (!p.states[i].core_frequency) {
447 dprintk("skipping state %u\n", i);
448 centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID;
449 continue;
450 }
451
452 if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) !=
453 (centrino_model[cpu]->op_points[i].frequency)) {
454 dprintk("Invalid encoded frequency (%u vs. %u)\n",
455 extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0),
456 centrino_model[cpu]->op_points[i].frequency);
457 result = -EINVAL;
458 goto err_kfree_all;
459 }
460
461 if (cur_freq == centrino_model[cpu]->op_points[i].frequency)
462 p.state = i;
463 }
464
465 /* notify BIOS that we exist */
466 acpi_processor_notify_smm(THIS_MODULE);
467
468 return 0;
469
470 err_kfree_all:
471 kfree(centrino_model[cpu]->op_points);
472 err_kfree:
473 kfree(centrino_model[cpu]);
474 err_unreg:
475 acpi_processor_unregister_performance(&p, cpu);
476 dprintk(KERN_INFO PFX "invalid ACPI data\n");
477 return (result);
478}
479#else
480static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; }
481#endif
482
483static int centrino_cpu_init(struct cpufreq_policy *policy)
484{
485 struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
486 unsigned freq;
487 unsigned l, h;
488 int ret;
489 int i;
490
491 /* Only Intel makes Enhanced Speedstep-capable CPUs */
492 if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
493 return -ENODEV;
494
495 for (i = 0; i < N_IDS; i++)
496 if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
497 break;
498
499 if (i != N_IDS)
500 centrino_cpu[policy->cpu] = &cpu_ids[i];
501
502 if (is_const_loops_cpu(policy->cpu)) {
503 centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
504 }
505
506 if (centrino_cpu_init_acpi(policy)) {
507 if (policy->cpu != 0)
508 return -ENODEV;
509
510 if (!centrino_cpu[policy->cpu]) {
511 dprintk(KERN_INFO PFX "found unsupported CPU with "
512 "Enhanced SpeedStep: send /proc/cpuinfo to "
513 MAINTAINER "\n");
514 return -ENODEV;
515 }
516
517 if (centrino_cpu_init_table(policy)) {
518 return -ENODEV;
519 }
520 }
521
522 /* Check to see if Enhanced SpeedStep is enabled, and try to
523 enable it if not. */
524 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
525
526 if (!(l & (1<<16))) {
527 l |= (1<<16);
528 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
529 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
530
531 /* check to see if it stuck */
532 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
533 if (!(l & (1<<16))) {
534 printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
535 return -ENODEV;
536 }
537 }
538
539 freq = get_cur_freq(policy->cpu);
540
541 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
542 policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */
543 policy->cur = freq;
544
545 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
546
547 ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points);
548 if (ret)
549 return (ret);
550
551 cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu);
552
553 return 0;
554}
555
556static int centrino_cpu_exit(struct cpufreq_policy *policy)
557{
558 unsigned int cpu = policy->cpu;
559
560 if (!centrino_model[cpu])
561 return -ENODEV;
562
563 cpufreq_frequency_table_put_attr(cpu);
564
565#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
566 if (!centrino_model[cpu]->model_name) {
567 dprintk("unregistering and freeing ACPI data\n");
568 acpi_processor_unregister_performance(&p, cpu);
569 kfree(centrino_model[cpu]->op_points);
570 kfree(centrino_model[cpu]);
571 }
572#endif
573
574 centrino_model[cpu] = NULL;
575
576 return 0;
577}
578
579/**
580 * centrino_verify - verifies a new CPUFreq policy
581 * @policy: new policy
582 *
583 * Limit must be within this model's frequency range at least one
584 * border included.
585 */
586static int centrino_verify (struct cpufreq_policy *policy)
587{
588 return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points);
589}
590
591/**
592 * centrino_setpolicy - set a new CPUFreq policy
593 * @policy: new policy
594 * @target_freq: the target frequency
595 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
596 *
597 * Sets a new CPUFreq policy.
598 */
599static int centrino_target (struct cpufreq_policy *policy,
600 unsigned int target_freq,
601 unsigned int relation)
602{
603 unsigned int newstate = 0;
604 unsigned int msr, oldmsr, h, cpu = policy->cpu;
605 struct cpufreq_freqs freqs;
606 cpumask_t saved_mask;
607 int retval;
608
609 if (centrino_model[cpu] == NULL)
610 return -ENODEV;
611
612 /*
613 * Support for SMP systems.
614 * Make sure we are running on the CPU that wants to change frequency
615 */
616 saved_mask = current->cpus_allowed;
617 set_cpus_allowed(current, policy->cpus);
618 if (!cpu_isset(smp_processor_id(), policy->cpus)) {
619 dprintk("couldn't limit to CPUs in this domain\n");
620 return(-EAGAIN);
621 }
622
623 if (cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq,
624 relation, &newstate)) {
625 retval = -EINVAL;
626 goto migrate_end;
627 }
628
629 msr = centrino_model[cpu]->op_points[newstate].index;
630 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
631
632 if (msr == (oldmsr & 0xffff)) {
633 retval = 0;
634 dprintk("no change needed - msr was and needs to be %x\n", oldmsr);
635 goto migrate_end;
636 }
637
638 freqs.cpu = cpu;
639 freqs.old = extract_clock(oldmsr, cpu, 0);
640 freqs.new = extract_clock(msr, cpu, 0);
641
642 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
643 target_freq, freqs.old, freqs.new, msr);
644
645 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
646
647 /* all but 16 LSB are "reserved", so treat them with
648 care */
649 oldmsr &= ~0xffff;
650 msr &= 0xffff;
651 oldmsr |= msr;
652
653 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
654
655 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
656
657 retval = 0;
658migrate_end:
659 set_cpus_allowed(current, saved_mask);
660 return (retval);
661}
662
663static struct freq_attr* centrino_attr[] = {
664 &cpufreq_freq_attr_scaling_available_freqs,
665 NULL,
666};
667
668static struct cpufreq_driver centrino_driver = {
669 .name = "centrino", /* should be speedstep-centrino,
670 but there's a 16 char limit */
671 .init = centrino_cpu_init,
672 .exit = centrino_cpu_exit,
673 .verify = centrino_verify,
674 .target = centrino_target,
675 .get = get_cur_freq,
676 .attr = centrino_attr,
677 .owner = THIS_MODULE,
678};
679
680
681/**
682 * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
683 *
684 * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
685 * unsupported devices, -ENOENT if there's no voltage table for this
686 * particular CPU model, -EINVAL on problems during initiatization,
687 * and zero on success.
688 *
689 * This is quite picky. Not only does the CPU have to advertise the
690 * "est" flag in the cpuid capability flags, we look for a specific
691 * CPU model and stepping, and we need to have the exact model name in
692 * our voltage tables. That is, be paranoid about not releasing
693 * someone's valuable magic smoke.
694 */
695static int __init centrino_init(void)
696{
697 struct cpuinfo_x86 *cpu = cpu_data;
698
699 if (!cpu_has(cpu, X86_FEATURE_EST))
700 return -ENODEV;
701
702 return cpufreq_register_driver(&centrino_driver);
703}
704
705static void __exit centrino_exit(void)
706{
707 cpufreq_unregister_driver(&centrino_driver);
708}
709
710MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
711MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
712MODULE_LICENSE ("GPL");
713
714late_initcall(centrino_init);
715module_exit(centrino_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
new file mode 100644
index 000000000000..5ce995c9d866
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
@@ -0,0 +1,25 @@
1/*
2 * Routines common for drivers handling Enhanced Speedstep Technology
3 * Copyright (C) 2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
4 *
5 * Licensed under the terms of the GNU GPL License version 2 -- see
6 * COPYING for details.
7 */
8
9static inline int is_const_loops_cpu(unsigned int cpu)
10{
11 struct cpuinfo_x86 *c = cpu_data + cpu;
12
13 if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST))
14 return 0;
15
16 /*
17 * on P-4s, the TSC runs with constant frequency independent of cpu freq
18 * when we use EST
19 */
20 if (c->x86 == 0xf)
21 return 1;
22
23 return 0;
24}
25
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
new file mode 100644
index 000000000000..5b7d18a06afa
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
@@ -0,0 +1,424 @@
1/*
2 * (C) 2001 Dave Jones, Arjan van de ven.
3 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 * Based upon reverse engineered information, and on Intel documentation
7 * for chipsets ICH2-M and ICH3-M.
8 *
9 * Many thanks to Ducrot Bruno for finding and fixing the last
10 * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
11 * for extensive testing.
12 *
13 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
14 */
15
16
17/*********************************************************************
18 * SPEEDSTEP - DEFINITIONS *
19 *********************************************************************/
20
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/cpufreq.h>
25#include <linux/pci.h>
26#include <linux/slab.h>
27
28#include "speedstep-lib.h"
29
30
31/* speedstep_chipset:
32 * It is necessary to know which chipset is used. As accesses to
33 * this device occur at various places in this module, we need a
34 * static struct pci_dev * pointing to that device.
35 */
36static struct pci_dev *speedstep_chipset_dev;
37
38
39/* speedstep_processor
40 */
41static unsigned int speedstep_processor = 0;
42
43
44/*
45 * There are only two frequency states for each processor. Values
46 * are in kHz for the time being.
47 */
48static struct cpufreq_frequency_table speedstep_freqs[] = {
49 {SPEEDSTEP_HIGH, 0},
50 {SPEEDSTEP_LOW, 0},
51 {0, CPUFREQ_TABLE_END},
52};
53
54
55#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
56
57
58/**
59 * speedstep_set_state - set the SpeedStep state
60 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
61 *
62 * Tries to change the SpeedStep state.
63 */
64static void speedstep_set_state (unsigned int state)
65{
66 u32 pmbase;
67 u8 pm2_blk;
68 u8 value;
69 unsigned long flags;
70
71 if (!speedstep_chipset_dev || (state > 0x1))
72 return;
73
74 /* get PMBASE */
75 pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
76 if (!(pmbase & 0x01)) {
77 printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
78 return;
79 }
80
81 pmbase &= 0xFFFFFFFE;
82 if (!pmbase) {
83 printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
84 return;
85 }
86
87 /* Disable IRQs */
88 local_irq_save(flags);
89
90 /* read state */
91 value = inb(pmbase + 0x50);
92
93 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
94
95 /* write new state */
96 value &= 0xFE;
97 value |= state;
98
99 dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
100
101 /* Disable bus master arbitration */
102 pm2_blk = inb(pmbase + 0x20);
103 pm2_blk |= 0x01;
104 outb(pm2_blk, (pmbase + 0x20));
105
106 /* Actual transition */
107 outb(value, (pmbase + 0x50));
108
109 /* Restore bus master arbitration */
110 pm2_blk &= 0xfe;
111 outb(pm2_blk, (pmbase + 0x20));
112
113 /* check if transition was successful */
114 value = inb(pmbase + 0x50);
115
116 /* Enable IRQs */
117 local_irq_restore(flags);
118
119 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
120
121 if (state == (value & 0x1)) {
122 dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
123 } else {
124 printk (KERN_ERR "cpufreq: change failed - I/O error\n");
125 }
126
127 return;
128}
129
130
131/**
132 * speedstep_activate - activate SpeedStep control in the chipset
133 *
134 * Tries to activate the SpeedStep status and control registers.
135 * Returns -EINVAL on an unsupported chipset, and zero on success.
136 */
137static int speedstep_activate (void)
138{
139 u16 value = 0;
140
141 if (!speedstep_chipset_dev)
142 return -EINVAL;
143
144 pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
145 if (!(value & 0x08)) {
146 value |= 0x08;
147 dprintk("activating SpeedStep (TM) registers\n");
148 pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
149 }
150
151 return 0;
152}
153
154
155/**
156 * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
157 *
158 * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
159 * the LPC bridge / PM module which contains all power-management
160 * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
161 * chipset, or zero on failure.
162 */
163static unsigned int speedstep_detect_chipset (void)
164{
165 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
166 PCI_DEVICE_ID_INTEL_82801DB_12,
167 PCI_ANY_ID,
168 PCI_ANY_ID,
169 NULL);
170 if (speedstep_chipset_dev)
171 return 4; /* 4-M */
172
173 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
174 PCI_DEVICE_ID_INTEL_82801CA_12,
175 PCI_ANY_ID,
176 PCI_ANY_ID,
177 NULL);
178 if (speedstep_chipset_dev)
179 return 3; /* 3-M */
180
181
182 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
183 PCI_DEVICE_ID_INTEL_82801BA_10,
184 PCI_ANY_ID,
185 PCI_ANY_ID,
186 NULL);
187 if (speedstep_chipset_dev) {
188 /* speedstep.c causes lockups on Dell Inspirons 8000 and
189 * 8100 which use a pretty old revision of the 82815
190 * host brige. Abort on these systems.
191 */
192 static struct pci_dev *hostbridge;
193 u8 rev = 0;
194
195 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
196 PCI_DEVICE_ID_INTEL_82815_MC,
197 PCI_ANY_ID,
198 PCI_ANY_ID,
199 NULL);
200
201 if (!hostbridge)
202 return 2; /* 2-M */
203
204 pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev);
205 if (rev < 5) {
206 dprintk("hostbridge does not support speedstep\n");
207 speedstep_chipset_dev = NULL;
208 pci_dev_put(hostbridge);
209 return 0;
210 }
211
212 pci_dev_put(hostbridge);
213 return 2; /* 2-M */
214 }
215
216 return 0;
217}
218
219static unsigned int _speedstep_get(cpumask_t cpus)
220{
221 unsigned int speed;
222 cpumask_t cpus_allowed;
223
224 cpus_allowed = current->cpus_allowed;
225 set_cpus_allowed(current, cpus);
226 speed = speedstep_get_processor_frequency(speedstep_processor);
227 set_cpus_allowed(current, cpus_allowed);
228 dprintk("detected %u kHz as current frequency\n", speed);
229 return speed;
230}
231
232static unsigned int speedstep_get(unsigned int cpu)
233{
234 return _speedstep_get(cpumask_of_cpu(cpu));
235}
236
237/**
238 * speedstep_target - set a new CPUFreq policy
239 * @policy: new policy
240 * @target_freq: the target frequency
241 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
242 *
243 * Sets a new CPUFreq policy.
244 */
245static int speedstep_target (struct cpufreq_policy *policy,
246 unsigned int target_freq,
247 unsigned int relation)
248{
249 unsigned int newstate = 0;
250 struct cpufreq_freqs freqs;
251 cpumask_t cpus_allowed;
252 int i;
253
254 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
255 return -EINVAL;
256
257 freqs.old = _speedstep_get(policy->cpus);
258 freqs.new = speedstep_freqs[newstate].frequency;
259 freqs.cpu = policy->cpu;
260
261 dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
262
263 /* no transition necessary */
264 if (freqs.old == freqs.new)
265 return 0;
266
267 cpus_allowed = current->cpus_allowed;
268
269 for_each_cpu_mask(i, policy->cpus) {
270 freqs.cpu = i;
271 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
272 }
273
274 /* switch to physical CPU where state is to be changed */
275 set_cpus_allowed(current, policy->cpus);
276
277 speedstep_set_state(newstate);
278
279 /* allow to be run on all CPUs */
280 set_cpus_allowed(current, cpus_allowed);
281
282 for_each_cpu_mask(i, policy->cpus) {
283 freqs.cpu = i;
284 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
285 }
286
287 return 0;
288}
289
290
291/**
292 * speedstep_verify - verifies a new CPUFreq policy
293 * @policy: new policy
294 *
295 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
296 * at least one border included.
297 */
298static int speedstep_verify (struct cpufreq_policy *policy)
299{
300 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
301}
302
303
304static int speedstep_cpu_init(struct cpufreq_policy *policy)
305{
306 int result = 0;
307 unsigned int speed;
308 cpumask_t cpus_allowed;
309
310 /* only run on CPU to be set, or on its sibling */
311#ifdef CONFIG_SMP
312 policy->cpus = cpu_sibling_map[policy->cpu];
313#endif
314
315 cpus_allowed = current->cpus_allowed;
316 set_cpus_allowed(current, policy->cpus);
317
318 /* detect low and high frequency */
319 result = speedstep_get_freqs(speedstep_processor,
320 &speedstep_freqs[SPEEDSTEP_LOW].frequency,
321 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
322 &speedstep_set_state);
323 set_cpus_allowed(current, cpus_allowed);
324 if (result)
325 return result;
326
327 /* get current speed setting */
328 speed = _speedstep_get(policy->cpus);
329 if (!speed)
330 return -EIO;
331
332 dprintk("currently at %s speed setting - %i MHz\n",
333 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
334 (speed / 1000));
335
336 /* cpuinfo and default policy values */
337 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
338 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
339 policy->cur = speed;
340
341 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
342 if (result)
343 return (result);
344
345 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
346
347 return 0;
348}
349
350
351static int speedstep_cpu_exit(struct cpufreq_policy *policy)
352{
353 cpufreq_frequency_table_put_attr(policy->cpu);
354 return 0;
355}
356
357static struct freq_attr* speedstep_attr[] = {
358 &cpufreq_freq_attr_scaling_available_freqs,
359 NULL,
360};
361
362
363static struct cpufreq_driver speedstep_driver = {
364 .name = "speedstep-ich",
365 .verify = speedstep_verify,
366 .target = speedstep_target,
367 .init = speedstep_cpu_init,
368 .exit = speedstep_cpu_exit,
369 .get = speedstep_get,
370 .owner = THIS_MODULE,
371 .attr = speedstep_attr,
372};
373
374
375/**
376 * speedstep_init - initializes the SpeedStep CPUFreq driver
377 *
378 * Initializes the SpeedStep support. Returns -ENODEV on unsupported
379 * devices, -EINVAL on problems during initiatization, and zero on
380 * success.
381 */
382static int __init speedstep_init(void)
383{
384 /* detect processor */
385 speedstep_processor = speedstep_detect_processor();
386 if (!speedstep_processor) {
387 dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
388 return -ENODEV;
389 }
390
391 /* detect chipset */
392 if (!speedstep_detect_chipset()) {
393 dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
394 return -ENODEV;
395 }
396
397 /* activate speedstep support */
398 if (speedstep_activate()) {
399 pci_dev_put(speedstep_chipset_dev);
400 return -EINVAL;
401 }
402
403 return cpufreq_register_driver(&speedstep_driver);
404}
405
406
407/**
408 * speedstep_exit - unregisters SpeedStep support
409 *
410 * Unregisters SpeedStep support.
411 */
412static void __exit speedstep_exit(void)
413{
414 pci_dev_put(speedstep_chipset_dev);
415 cpufreq_unregister_driver(&speedstep_driver);
416}
417
418
419MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
420MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
421MODULE_LICENSE ("GPL");
422
423module_init(speedstep_init);
424module_exit(speedstep_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c
new file mode 100644
index 000000000000..8ba430a9c3a2
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c
@@ -0,0 +1,385 @@
1/*
2 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * Library for common functions for Intel SpeedStep v.1 and v.2 support
7 *
8 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/moduleparam.h>
14#include <linux/init.h>
15#include <linux/cpufreq.h>
16#include <linux/pci.h>
17#include <linux/slab.h>
18
19#include <asm/msr.h>
20#include "speedstep-lib.h"
21
22#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg)
23
24#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
25static int relaxed_check = 0;
26#else
27#define relaxed_check 0
28#endif
29
30/*********************************************************************
31 * GET PROCESSOR CORE SPEED IN KHZ *
32 *********************************************************************/
33
34static unsigned int pentium3_get_frequency (unsigned int processor)
35{
36 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
37 struct {
38 unsigned int ratio; /* Frequency Multiplier (x10) */
39 u8 bitmap; /* power on configuration bits
40 [27, 25:22] (in MSR 0x2a) */
41 } msr_decode_mult [] = {
42 { 30, 0x01 },
43 { 35, 0x05 },
44 { 40, 0x02 },
45 { 45, 0x06 },
46 { 50, 0x00 },
47 { 55, 0x04 },
48 { 60, 0x0b },
49 { 65, 0x0f },
50 { 70, 0x09 },
51 { 75, 0x0d },
52 { 80, 0x0a },
53 { 85, 0x26 },
54 { 90, 0x20 },
55 { 100, 0x2b },
56 { 0, 0xff } /* error or unknown value */
57 };
58
59 /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
60 struct {
61 unsigned int value; /* Front Side Bus speed in MHz */
62 u8 bitmap; /* power on configuration bits [18: 19]
63 (in MSR 0x2a) */
64 } msr_decode_fsb [] = {
65 { 66, 0x0 },
66 { 100, 0x2 },
67 { 133, 0x1 },
68 { 0, 0xff}
69 };
70
71 u32 msr_lo, msr_tmp;
72 int i = 0, j = 0;
73
74 /* read MSR 0x2a - we only need the low 32 bits */
75 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
76 dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
77 msr_tmp = msr_lo;
78
79 /* decode the FSB */
80 msr_tmp &= 0x00c0000;
81 msr_tmp >>= 18;
82 while (msr_tmp != msr_decode_fsb[i].bitmap) {
83 if (msr_decode_fsb[i].bitmap == 0xff)
84 return 0;
85 i++;
86 }
87
88 /* decode the multiplier */
89 if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) {
90 dprintk("workaround for early PIIIs\n");
91 msr_lo &= 0x03c00000;
92 } else
93 msr_lo &= 0x0bc00000;
94 msr_lo >>= 22;
95 while (msr_lo != msr_decode_mult[j].bitmap) {
96 if (msr_decode_mult[j].bitmap == 0xff)
97 return 0;
98 j++;
99 }
100
101 dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
102
103 return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100);
104}
105
106
107static unsigned int pentiumM_get_frequency(void)
108{
109 u32 msr_lo, msr_tmp;
110
111 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
112 dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
113
114 /* see table B-2 of 24547212.pdf */
115 if (msr_lo & 0x00040000) {
116 printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp);
117 return 0;
118 }
119
120 msr_tmp = (msr_lo >> 22) & 0x1f;
121 dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000));
122
123 return (msr_tmp * 100 * 1000);
124}
125
126
127static unsigned int pentium4_get_frequency(void)
128{
129 struct cpuinfo_x86 *c = &boot_cpu_data;
130 u32 msr_lo, msr_hi, mult;
131 unsigned int fsb = 0;
132
133 rdmsr(0x2c, msr_lo, msr_hi);
134
135 dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
136
137 /* decode the FSB: see IA-32 Intel (C) Architecture Software
138 * Developer's Manual, Volume 3: System Prgramming Guide,
139 * revision #12 in Table B-1: MSRs in the Pentium 4 and
140 * Intel Xeon Processors, on page B-4 and B-5.
141 */
142 if (c->x86_model < 2)
143 fsb = 100 * 1000;
144 else {
145 u8 fsb_code = (msr_lo >> 16) & 0x7;
146 switch (fsb_code) {
147 case 0:
148 fsb = 100 * 1000;
149 break;
150 case 1:
151 fsb = 13333 * 10;
152 break;
153 case 2:
154 fsb = 200 * 1000;
155 break;
156 }
157 }
158
159 if (!fsb)
160 printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
161
162 /* Multiplier. */
163 if (c->x86_model < 2)
164 mult = msr_lo >> 27;
165 else
166 mult = msr_lo >> 24;
167
168 dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
169
170 return (fsb * mult);
171}
172
173
174unsigned int speedstep_get_processor_frequency(unsigned int processor)
175{
176 switch (processor) {
177 case SPEEDSTEP_PROCESSOR_PM:
178 return pentiumM_get_frequency();
179 case SPEEDSTEP_PROCESSOR_P4D:
180 case SPEEDSTEP_PROCESSOR_P4M:
181 return pentium4_get_frequency();
182 case SPEEDSTEP_PROCESSOR_PIII_T:
183 case SPEEDSTEP_PROCESSOR_PIII_C:
184 case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
185 return pentium3_get_frequency(processor);
186 default:
187 return 0;
188 };
189 return 0;
190}
191EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
192
193
194/*********************************************************************
195 * DETECT SPEEDSTEP-CAPABLE PROCESSOR *
196 *********************************************************************/
197
198unsigned int speedstep_detect_processor (void)
199{
200 struct cpuinfo_x86 *c = cpu_data;
201 u32 ebx, msr_lo, msr_hi;
202
203 dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
204
205 if ((c->x86_vendor != X86_VENDOR_INTEL) ||
206 ((c->x86 != 6) && (c->x86 != 0xF)))
207 return 0;
208
209 if (c->x86 == 0xF) {
210 /* Intel Mobile Pentium 4-M
211 * or Intel Mobile Pentium 4 with 533 MHz FSB */
212 if (c->x86_model != 2)
213 return 0;
214
215 ebx = cpuid_ebx(0x00000001);
216 ebx &= 0x000000FF;
217
218 dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
219
220 switch (c->x86_mask) {
221 case 4:
222 /*
223 * B-stepping [M-P4-M]
224 * sample has ebx = 0x0f, production has 0x0e.
225 */
226 if ((ebx == 0x0e) || (ebx == 0x0f))
227 return SPEEDSTEP_PROCESSOR_P4M;
228 break;
229 case 7:
230 /*
231 * C-stepping [M-P4-M]
232 * needs to have ebx=0x0e, else it's a celeron:
233 * cf. 25130917.pdf / page 7, footnote 5 even
234 * though 25072120.pdf / page 7 doesn't say
235 * samples are only of B-stepping...
236 */
237 if (ebx == 0x0e)
238 return SPEEDSTEP_PROCESSOR_P4M;
239 break;
240 case 9:
241 /*
242 * D-stepping [M-P4-M or M-P4/533]
243 *
244 * this is totally strange: CPUID 0x0F29 is
245 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
246 * The latter need to be sorted out as they don't
247 * support speedstep.
248 * Celerons with CPUID 0x0F29 may have either
249 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
250 * specific.
251 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
252 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
253 * also, M-P4M HTs have ebx=0x8, too
254 * For now, they are distinguished by the model_id string
255 */
256 if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL))
257 return SPEEDSTEP_PROCESSOR_P4M;
258 break;
259 default:
260 break;
261 }
262 return 0;
263 }
264
265 switch (c->x86_model) {
266 case 0x0B: /* Intel PIII [Tualatin] */
267 /* cpuid_ebx(1) is 0x04 for desktop PIII,
268 0x06 for mobile PIII-M */
269 ebx = cpuid_ebx(0x00000001);
270 dprintk("ebx is %x\n", ebx);
271
272 ebx &= 0x000000FF;
273
274 if (ebx != 0x06)
275 return 0;
276
277 /* So far all PIII-M processors support SpeedStep. See
278 * Intel's 24540640.pdf of June 2003
279 */
280
281 return SPEEDSTEP_PROCESSOR_PIII_T;
282
283 case 0x08: /* Intel PIII [Coppermine] */
284
285 /* all mobile PIII Coppermines have FSB 100 MHz
286 * ==> sort out a few desktop PIIIs. */
287 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
288 dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi);
289 msr_lo &= 0x00c0000;
290 if (msr_lo != 0x0080000)
291 return 0;
292
293 /*
294 * If the processor is a mobile version,
295 * platform ID has bit 50 set
296 * it has SpeedStep technology if either
297 * bit 56 or 57 is set
298 */
299 rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
300 dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi);
301 if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
302 if (c->x86_mask == 0x01) {
303 dprintk("early PIII version\n");
304 return SPEEDSTEP_PROCESSOR_PIII_C_EARLY;
305 } else
306 return SPEEDSTEP_PROCESSOR_PIII_C;
307 }
308
309 default:
310 return 0;
311 }
312}
313EXPORT_SYMBOL_GPL(speedstep_detect_processor);
314
315
316/*********************************************************************
317 * DETECT SPEEDSTEP SPEEDS *
318 *********************************************************************/
319
320unsigned int speedstep_get_freqs(unsigned int processor,
321 unsigned int *low_speed,
322 unsigned int *high_speed,
323 void (*set_state) (unsigned int state))
324{
325 unsigned int prev_speed;
326 unsigned int ret = 0;
327 unsigned long flags;
328
329 if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
330 return -EINVAL;
331
332 dprintk("trying to determine both speeds\n");
333
334 /* get current speed */
335 prev_speed = speedstep_get_processor_frequency(processor);
336 if (!prev_speed)
337 return -EIO;
338
339 dprintk("previous seped is %u\n", prev_speed);
340
341 local_irq_save(flags);
342
343 /* switch to low state */
344 set_state(SPEEDSTEP_LOW);
345 *low_speed = speedstep_get_processor_frequency(processor);
346 if (!*low_speed) {
347 ret = -EIO;
348 goto out;
349 }
350
351 dprintk("low seped is %u\n", *low_speed);
352
353 /* switch to high state */
354 set_state(SPEEDSTEP_HIGH);
355 *high_speed = speedstep_get_processor_frequency(processor);
356 if (!*high_speed) {
357 ret = -EIO;
358 goto out;
359 }
360
361 dprintk("high seped is %u\n", *high_speed);
362
363 if (*low_speed == *high_speed) {
364 ret = -ENODEV;
365 goto out;
366 }
367
368 /* switch to previous state, if necessary */
369 if (*high_speed != prev_speed)
370 set_state(SPEEDSTEP_LOW);
371
372 out:
373 local_irq_restore(flags);
374 return (ret);
375}
376EXPORT_SYMBOL_GPL(speedstep_get_freqs);
377
378#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
379module_param(relaxed_check, int, 0444);
380MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability.");
381#endif
382
383MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
384MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
385MODULE_LICENSE ("GPL");
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h
new file mode 100644
index 000000000000..261a2c9b7f6b
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h
@@ -0,0 +1,47 @@
1/*
2 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * Library for common functions for Intel SpeedStep v.1 and v.2 support
7 *
8 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
9 */
10
11
12
13/* processors */
14
15#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */
16#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */
17#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */
18#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */
19
20/* the following processors are not speedstep-capable and are not auto-detected
21 * in speedstep_detect_processor(). However, their speed can be detected using
22 * the speedstep_get_processor_frequency() call. */
23#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */
24#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */
25
26/* speedstep states -- only two of them */
27
28#define SPEEDSTEP_HIGH 0x00000000
29#define SPEEDSTEP_LOW 0x00000001
30
31
32/* detect a speedstep-capable processor */
33extern unsigned int speedstep_detect_processor (void);
34
35/* detect the current speed (in khz) of the processor */
36extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
37
38
39/* detect the low and high speeds of the processor. The callback
40 * set_state"'s first argument is either SPEEDSTEP_HIGH or
41 * SPEEDSTEP_LOW; the second argument is zero so that no
42 * cpufreq_notify_transition calls are initiated.
43 */
44extern unsigned int speedstep_get_freqs(unsigned int processor,
45 unsigned int *low_speed,
46 unsigned int *high_speed,
47 void (*set_state) (unsigned int state));
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
new file mode 100644
index 000000000000..79440b3f087e
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
@@ -0,0 +1,424 @@
1/*
2 * Intel SpeedStep SMI driver.
3 *
4 * (C) 2003 Hiroshi Miura <miura@da-cha.org>
5 *
6 * Licensed under the terms of the GNU GPL License version 2.
7 *
8 */
9
10
11/*********************************************************************
12 * SPEEDSTEP - DEFINITIONS *
13 *********************************************************************/
14
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/moduleparam.h>
18#include <linux/init.h>
19#include <linux/cpufreq.h>
20#include <linux/pci.h>
21#include <linux/slab.h>
22#include <linux/delay.h>
23#include <asm/ist.h>
24
25#include "speedstep-lib.h"
26
27/* speedstep system management interface port/command.
28 *
29 * These parameters are got from IST-SMI BIOS call.
30 * If user gives it, these are used.
31 *
32 */
33static int smi_port = 0;
34static int smi_cmd = 0;
35static unsigned int smi_sig = 0;
36
37/* info about the processor */
38static unsigned int speedstep_processor = 0;
39
40/*
41 * There are only two frequency states for each processor. Values
42 * are in kHz for the time being.
43 */
44static struct cpufreq_frequency_table speedstep_freqs[] = {
45 {SPEEDSTEP_HIGH, 0},
46 {SPEEDSTEP_LOW, 0},
47 {0, CPUFREQ_TABLE_END},
48};
49
50#define GET_SPEEDSTEP_OWNER 0
51#define GET_SPEEDSTEP_STATE 1
52#define SET_SPEEDSTEP_STATE 2
53#define GET_SPEEDSTEP_FREQS 4
54
55/* how often shall the SMI call be tried if it failed, e.g. because
56 * of DMA activity going on? */
57#define SMI_TRIES 5
58
59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg)
60
61/**
62 * speedstep_smi_ownership
63 */
64static int speedstep_smi_ownership (void)
65{
66 u32 command, result, magic;
67 u32 function = GET_SPEEDSTEP_OWNER;
68 unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
69
70 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
71 magic = virt_to_phys(magic_data);
72
73 dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
74
75 __asm__ __volatile__(
76 "out %%al, (%%dx)\n"
77 : "=D" (result)
78 : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic)
79 );
80
81 dprintk("result is %x\n", result);
82
83 return result;
84}
85
86/**
87 * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
88 * @low: the low frequency value is placed here
89 * @high: the high frequency value is placed here
90 *
91 * Only available on later SpeedStep-enabled systems, returns false results or
92 * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
93 * shows that the latter occurs if !(ist_info.event & 0xFFFF).
94 */
95static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
96{
97 u32 command, result = 0, edi, high_mhz, low_mhz;
98 u32 state=0;
99 u32 function = GET_SPEEDSTEP_FREQS;
100
101 if (!(ist_info.event & 0xFFFF)) {
102 dprintk("bug #1422 -- can't read freqs from BIOS\n", result);
103 return -ENODEV;
104 }
105
106 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
107
108 dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
109
110 __asm__ __volatile__("movl $0, %%edi\n"
111 "out %%al, (%%dx)\n"
112 : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
113 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
114 );
115
116 dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
117
118 /* abort if results are obviously incorrect... */
119 if ((high_mhz + low_mhz) < 600)
120 return -EINVAL;
121
122 *high = high_mhz * 1000;
123 *low = low_mhz * 1000;
124
125 return result;
126}
127
128/**
129 * speedstep_get_state - set the SpeedStep state
130 * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
131 *
132 */
133static int speedstep_get_state (void)
134{
135 u32 function=GET_SPEEDSTEP_STATE;
136 u32 result, state, edi, command;
137
138 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
139
140 dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
141
142 __asm__ __volatile__("movl $0, %%edi\n"
143 "out %%al, (%%dx)\n"
144 : "=a" (result), "=b" (state), "=D" (edi)
145 : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
146 );
147
148 dprintk("state is %x, result is %x\n", state, result);
149
150 return (state & 1);
151}
152
153
154/**
155 * speedstep_set_state - set the SpeedStep state
156 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
157 *
158 */
159static void speedstep_set_state (unsigned int state)
160{
161 unsigned int result = 0, command, new_state;
162 unsigned long flags;
163 unsigned int function=SET_SPEEDSTEP_STATE;
164 unsigned int retry = 0;
165
166 if (state > 0x1)
167 return;
168
169 /* Disable IRQs */
170 local_irq_save(flags);
171
172 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
173
174 dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port);
175
176 do {
177 if (retry) {
178 dprintk("retry %u, previous result %u, waiting...\n", retry, result);
179 mdelay(retry * 50);
180 }
181 retry++;
182 __asm__ __volatile__(
183 "movl $0, %%edi\n"
184 "out %%al, (%%dx)\n"
185 : "=b" (new_state), "=D" (result)
186 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
187 );
188 } while ((new_state != state) && (retry <= SMI_TRIES));
189
190 /* enable IRQs */
191 local_irq_restore(flags);
192
193 if (new_state == state) {
194 dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
195 } else {
196 printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
197 }
198
199 return;
200}
201
202
203/**
204 * speedstep_target - set a new CPUFreq policy
205 * @policy: new policy
206 * @target_freq: new freq
207 * @relation:
208 *
209 * Sets a new CPUFreq policy/freq.
210 */
211static int speedstep_target (struct cpufreq_policy *policy,
212 unsigned int target_freq, unsigned int relation)
213{
214 unsigned int newstate = 0;
215 struct cpufreq_freqs freqs;
216
217 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
218 return -EINVAL;
219
220 freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
221 freqs.new = speedstep_freqs[newstate].frequency;
222 freqs.cpu = 0; /* speedstep.c is UP only driver */
223
224 if (freqs.old == freqs.new)
225 return 0;
226
227 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
228 speedstep_set_state(newstate);
229 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
230
231 return 0;
232}
233
234
235/**
236 * speedstep_verify - verifies a new CPUFreq policy
237 * @policy: new policy
238 *
239 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
240 * at least one border included.
241 */
242static int speedstep_verify (struct cpufreq_policy *policy)
243{
244 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
245}
246
247
248static int speedstep_cpu_init(struct cpufreq_policy *policy)
249{
250 int result;
251 unsigned int speed,state;
252
253 /* capability check */
254 if (policy->cpu != 0)
255 return -ENODEV;
256
257 result = speedstep_smi_ownership();
258 if (result) {
259 dprintk("fails in aquiring ownership of a SMI interface.\n");
260 return -EINVAL;
261 }
262
263 /* detect low and high frequency */
264 result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency,
265 &speedstep_freqs[SPEEDSTEP_HIGH].frequency);
266 if (result) {
267 /* fall back to speedstep_lib.c dection mechanism: try both states out */
268 dprintk("could not detect low and high frequencies by SMI call.\n");
269 result = speedstep_get_freqs(speedstep_processor,
270 &speedstep_freqs[SPEEDSTEP_LOW].frequency,
271 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
272 &speedstep_set_state);
273
274 if (result) {
275 dprintk("could not detect two different speeds -- aborting.\n");
276 return result;
277 } else
278 dprintk("workaround worked.\n");
279 }
280
281 /* get current speed setting */
282 state = speedstep_get_state();
283 speed = speedstep_freqs[state].frequency;
284
285 dprintk("currently at %s speed setting - %i MHz\n",
286 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
287 (speed / 1000));
288
289 /* cpuinfo and default policy values */
290 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
291 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
292 policy->cur = speed;
293
294 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
295 if (result)
296 return (result);
297
298 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
299
300 return 0;
301}
302
303static int speedstep_cpu_exit(struct cpufreq_policy *policy)
304{
305 cpufreq_frequency_table_put_attr(policy->cpu);
306 return 0;
307}
308
309static unsigned int speedstep_get(unsigned int cpu)
310{
311 if (cpu)
312 return -ENODEV;
313 return speedstep_get_processor_frequency(speedstep_processor);
314}
315
316
317static int speedstep_resume(struct cpufreq_policy *policy)
318{
319 int result = speedstep_smi_ownership();
320
321 if (result)
322 dprintk("fails in re-aquiring ownership of a SMI interface.\n");
323
324 return result;
325}
326
327static struct freq_attr* speedstep_attr[] = {
328 &cpufreq_freq_attr_scaling_available_freqs,
329 NULL,
330};
331
332static struct cpufreq_driver speedstep_driver = {
333 .name = "speedstep-smi",
334 .verify = speedstep_verify,
335 .target = speedstep_target,
336 .init = speedstep_cpu_init,
337 .exit = speedstep_cpu_exit,
338 .get = speedstep_get,
339 .resume = speedstep_resume,
340 .owner = THIS_MODULE,
341 .attr = speedstep_attr,
342};
343
344/**
345 * speedstep_init - initializes the SpeedStep CPUFreq driver
346 *
347 * Initializes the SpeedStep support. Returns -ENODEV on unsupported
348 * BIOS, -EINVAL on problems during initiatization, and zero on
349 * success.
350 */
351static int __init speedstep_init(void)
352{
353 speedstep_processor = speedstep_detect_processor();
354
355 switch (speedstep_processor) {
356 case SPEEDSTEP_PROCESSOR_PIII_T:
357 case SPEEDSTEP_PROCESSOR_PIII_C:
358 case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
359 break;
360 default:
361 speedstep_processor = 0;
362 }
363
364 if (!speedstep_processor) {
365 dprintk ("No supported Intel CPU detected.\n");
366 return -ENODEV;
367 }
368
369 dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n",
370 ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level);
371
372
373 /* Error if no IST-SMI BIOS or no PARM
374 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
375 if ((ist_info.signature != 0x47534943) && (
376 (smi_port == 0) || (smi_cmd == 0)))
377 return -ENODEV;
378
379 if (smi_sig == 1)
380 smi_sig = 0x47534943;
381 else
382 smi_sig = ist_info.signature;
383
384 /* setup smi_port from MODLULE_PARM or BIOS */
385 if ((smi_port > 0xff) || (smi_port < 0)) {
386 return -EINVAL;
387 } else if (smi_port == 0) {
388 smi_port = ist_info.command & 0xff;
389 }
390
391 if ((smi_cmd > 0xff) || (smi_cmd < 0)) {
392 return -EINVAL;
393 } else if (smi_cmd == 0) {
394 smi_cmd = (ist_info.command >> 16) & 0xff;
395 }
396
397 return cpufreq_register_driver(&speedstep_driver);
398}
399
400
401/**
402 * speedstep_exit - unregisters SpeedStep support
403 *
404 * Unregisters SpeedStep support.
405 */
406static void __exit speedstep_exit(void)
407{
408 cpufreq_unregister_driver(&speedstep_driver);
409}
410
411module_param(smi_port, int, 0444);
412module_param(smi_cmd, int, 0444);
413module_param(smi_sig, uint, 0444);
414
415MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2");
416MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82");
417MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface.");
418
419MODULE_AUTHOR ("Hiroshi Miura");
420MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface.");
421MODULE_LICENSE ("GPL");
422
423module_init(speedstep_init);
424module_exit(speedstep_exit);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
new file mode 100644
index 000000000000..ba4b01138c8f
--- /dev/null
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -0,0 +1,439 @@
1#include <linux/init.h>
2#include <linux/bitops.h>
3#include <linux/delay.h>
4#include <linux/pci.h>
5#include <asm/dma.h>
6#include <asm/io.h>
7#include <asm/processor.h>
8#include <asm/timer.h>
9
10#include "cpu.h"
11
12/*
13 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
14 */
15static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
16{
17 unsigned char ccr2, ccr3;
18 unsigned long flags;
19
20 /* we test for DEVID by checking whether CCR3 is writable */
21 local_irq_save(flags);
22 ccr3 = getCx86(CX86_CCR3);
23 setCx86(CX86_CCR3, ccr3 ^ 0x80);
24 getCx86(0xc0); /* dummy to change bus */
25
26 if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */
27 ccr2 = getCx86(CX86_CCR2);
28 setCx86(CX86_CCR2, ccr2 ^ 0x04);
29 getCx86(0xc0); /* dummy */
30
31 if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
32 *dir0 = 0xfd;
33 else { /* Cx486S A step */
34 setCx86(CX86_CCR2, ccr2);
35 *dir0 = 0xfe;
36 }
37 }
38 else {
39 setCx86(CX86_CCR3, ccr3); /* restore CCR3 */
40
41 /* read DIR0 and DIR1 CPU registers */
42 *dir0 = getCx86(CX86_DIR0);
43 *dir1 = getCx86(CX86_DIR1);
44 }
45 local_irq_restore(flags);
46}
47
48/*
49 * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
50 * order to identify the Cyrix CPU model after we're out of setup.c
51 *
52 * Actually since bugs.h doesn't even reference this perhaps someone should
53 * fix the documentation ???
54 */
55static unsigned char Cx86_dir0_msb __initdata = 0;
56
57static char Cx86_model[][9] __initdata = {
58 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
59 "M II ", "Unknown"
60};
61static char Cx486_name[][5] __initdata = {
62 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
63 "SRx2", "DRx2"
64};
65static char Cx486S_name[][4] __initdata = {
66 "S", "S2", "Se", "S2e"
67};
68static char Cx486D_name[][4] __initdata = {
69 "DX", "DX2", "?", "?", "?", "DX4"
70};
71static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
72static char cyrix_model_mult1[] __initdata = "12??43";
73static char cyrix_model_mult2[] __initdata = "12233445";
74
75/*
76 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
77 * BIOSes for compatibility with DOS games. This makes the udelay loop
78 * work correctly, and improves performance.
79 *
80 * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
81 */
82
83extern void calibrate_delay(void) __init;
84
85static void __init check_cx686_slop(struct cpuinfo_x86 *c)
86{
87 unsigned long flags;
88
89 if (Cx86_dir0_msb == 3) {
90 unsigned char ccr3, ccr5;
91
92 local_irq_save(flags);
93 ccr3 = getCx86(CX86_CCR3);
94 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
95 ccr5 = getCx86(CX86_CCR5);
96 if (ccr5 & 2)
97 setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */
98 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
99 local_irq_restore(flags);
100
101 if (ccr5 & 2) { /* possible wrong calibration done */
102 printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
103 calibrate_delay();
104 c->loops_per_jiffy = loops_per_jiffy;
105 }
106 }
107}
108
109
110static void __init set_cx86_reorder(void)
111{
112 u8 ccr3;
113
114 printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
115 ccr3 = getCx86(CX86_CCR3);
116 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
117
118 /* Load/Store Serialize to mem access disable (=reorder it)  */
119 setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
120 /* set load/store serialize from 1GB to 4GB */
121 ccr3 |= 0xe0;
122 setCx86(CX86_CCR3, ccr3);
123}
124
125static void __init set_cx86_memwb(void)
126{
127 u32 cr0;
128
129 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
130
131 /* CCR2 bit 2: unlock NW bit */
132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
133 /* set 'Not Write-through' */
134 cr0 = 0x20000000;
135 __asm__("movl %%cr0,%%eax\n\t"
136 "orl %0,%%eax\n\t"
137 "movl %%eax,%%cr0\n"
138 : : "r" (cr0)
139 :"ax");
140 /* CCR2 bit 2: lock NW bit and set WT1 */
141 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
142}
143
144static void __init set_cx86_inc(void)
145{
146 unsigned char ccr3;
147
148 printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
149
150 ccr3 = getCx86(CX86_CCR3);
151 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
152 /* PCR1 -- Performance Control */
153 /* Incrementor on, whatever that is */
154 setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
155 /* PCR0 -- Performance Control */
156 /* Incrementor Margin 10 */
157 setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
158 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
159}
160
161/*
162 * Configure later MediaGX and/or Geode processor.
163 */
164
165static void __init geode_configure(void)
166{
167 unsigned long flags;
168 u8 ccr3, ccr4;
169 local_irq_save(flags);
170
171 /* Suspend on halt power saving and enable #SUSP pin */
172 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
173
174 ccr3 = getCx86(CX86_CCR3);
175 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */
176
177 ccr4 = getCx86(CX86_CCR4);
178 ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */
179
180 setCx86(CX86_CCR3, ccr3);
181
182 set_cx86_memwb();
183 set_cx86_reorder();
184 set_cx86_inc();
185
186 local_irq_restore(flags);
187}
188
189
190#ifdef CONFIG_PCI
191static struct pci_device_id cyrix_55x0[] = {
192 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
193 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
194 { },
195};
196#endif
197
198static void __init init_cyrix(struct cpuinfo_x86 *c)
199{
200 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
201 char *buf = c->x86_model_id;
202 const char *p = NULL;
203
204 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
205 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
206 clear_bit(0*32+31, c->x86_capability);
207
208 /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
209 if ( test_bit(1*32+24, c->x86_capability) ) {
210 clear_bit(1*32+24, c->x86_capability);
211 set_bit(X86_FEATURE_CXMMX, c->x86_capability);
212 }
213
214 do_cyrix_devid(&dir0, &dir1);
215
216 check_cx686_slop(c);
217
218 Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */
219 dir0_lsn = dir0 & 0xf; /* model or clock multiplier */
220
221 /* common case step number/rev -- exceptions handled below */
222 c->x86_model = (dir1 >> 4) + 1;
223 c->x86_mask = dir1 & 0xf;
224
225 /* Now cook; the original recipe is by Channing Corn, from Cyrix.
226 * We do the same thing for each generation: we work out
227 * the model, multiplier and stepping. Black magic included,
228 * to make the silicon step/rev numbers match the printed ones.
229 */
230
231 switch (dir0_msn) {
232 unsigned char tmp;
233
234 case 0: /* Cx486SLC/DLC/SRx/DRx */
235 p = Cx486_name[dir0_lsn & 7];
236 break;
237
238 case 1: /* Cx486S/DX/DX2/DX4 */
239 p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5]
240 : Cx486S_name[dir0_lsn & 3];
241 break;
242
243 case 2: /* 5x86 */
244 Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
245 p = Cx86_cb+2;
246 break;
247
248 case 3: /* 6x86/6x86L */
249 Cx86_cb[1] = ' ';
250 Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
251 if (dir1 > 0x21) { /* 686L */
252 Cx86_cb[0] = 'L';
253 p = Cx86_cb;
254 (c->x86_model)++;
255 } else /* 686 */
256 p = Cx86_cb+1;
257 /* Emulate MTRRs using Cyrix's ARRs. */
258 set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
259 /* 6x86's contain this bug */
260 c->coma_bug = 1;
261 break;
262
263 case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
264#ifdef CONFIG_PCI
265 /* It isn't really a PCI quirk directly, but the cure is the
266 same. The MediaGX has deep magic SMM stuff that handles the
267 SB emulation. It thows away the fifo on disable_dma() which
268 is wrong and ruins the audio.
269
270 Bug2: VSA1 has a wrap bug so that using maximum sized DMA
271 causes bad things. According to NatSemi VSA2 has another
272 bug to do with 'hlt'. I've not seen any boards using VSA2
273 and X doesn't seem to support it either so who cares 8).
274 VSA1 we work around however.
275 */
276
277 printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
278 isa_dma_bridge_buggy = 2;
279#endif
280 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
281
282 /*
283 * The 5510/5520 companion chips have a funky PIT.
284 */
285 if (pci_dev_present(cyrix_55x0))
286 pit_latch_buggy = 1;
287
288 /* GXm supports extended cpuid levels 'ala' AMD */
289 if (c->cpuid_level == 2) {
290 /* Enable cxMMX extensions (GX1 Datasheet 54) */
291 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
292
293 /* GXlv/GXm/GX1 */
294 if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63)
295 geode_configure();
296 get_model_name(c); /* get CPU marketing name */
297 return;
298 }
299 else { /* MediaGX */
300 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
301 p = Cx86_cb+2;
302 c->x86_model = (dir1 & 0x20) ? 1 : 2;
303 }
304 break;
305
306 case 5: /* 6x86MX/M II */
307 if (dir1 > 7)
308 {
309 dir0_msn++; /* M II */
310 /* Enable MMX extensions (App note 108) */
311 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
312 }
313 else
314 {
315 c->coma_bug = 1; /* 6x86MX, it has the bug. */
316 }
317 tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
318 Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
319 p = Cx86_cb+tmp;
320 if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
321 (c->x86_model)++;
322 /* Emulate MTRRs using Cyrix's ARRs. */
323 set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
324 break;
325
326 case 0xf: /* Cyrix 486 without DEVID registers */
327 switch (dir0_lsn) {
328 case 0xd: /* either a 486SLC or DLC w/o DEVID */
329 dir0_msn = 0;
330 p = Cx486_name[(c->hard_math) ? 1 : 0];
331 break;
332
333 case 0xe: /* a 486S A step */
334 dir0_msn = 0;
335 p = Cx486S_name[0];
336 break;
337 }
338 break;
339
340 default: /* unknown (shouldn't happen, we know everyone ;-) */
341 dir0_msn = 7;
342 break;
343 }
344 strcpy(buf, Cx86_model[dir0_msn & 7]);
345 if (p) strcat(buf, p);
346 return;
347}
348
349/*
350 * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
351 * by the fact that they preserve the flags across the division of 5/2.
352 * PII and PPro exhibit this behavior too, but they have cpuid available.
353 */
354
355/*
356 * Perform the Cyrix 5/2 test. A Cyrix won't change
357 * the flags, while other 486 chips will.
358 */
359static inline int test_cyrix_52div(void)
360{
361 unsigned int test;
362
363 __asm__ __volatile__(
364 "sahf\n\t" /* clear flags (%eax = 0x0005) */
365 "div %b2\n\t" /* divide 5 by 2 */
366 "lahf" /* store flags into %ah */
367 : "=a" (test)
368 : "0" (5), "q" (2)
369 : "cc");
370
371 /* AH is 0x02 on Cyrix after the divide.. */
372 return (unsigned char) (test >> 8) == 0x02;
373}
374
375static void cyrix_identify(struct cpuinfo_x86 * c)
376{
377 /* Detect Cyrix with disabled CPUID */
378 if ( c->x86 == 4 && test_cyrix_52div() ) {
379 unsigned char dir0, dir1;
380
381 strcpy(c->x86_vendor_id, "CyrixInstead");
382 c->x86_vendor = X86_VENDOR_CYRIX;
383
384 /* Actually enable cpuid on the older cyrix */
385
386 /* Retrieve CPU revisions */
387
388 do_cyrix_devid(&dir0, &dir1);
389
390 dir0>>=4;
391
392 /* Check it is an affected model */
393
394 if (dir0 == 5 || dir0 == 3)
395 {
396 unsigned char ccr3, ccr4;
397 unsigned long flags;
398 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
399 local_irq_save(flags);
400 ccr3 = getCx86(CX86_CCR3);
401 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
402 ccr4 = getCx86(CX86_CCR4);
403 setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */
404 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
405 local_irq_restore(flags);
406 }
407 }
408 generic_identify(c);
409}
410
411static struct cpu_dev cyrix_cpu_dev __initdata = {
412 .c_vendor = "Cyrix",
413 .c_ident = { "CyrixInstead" },
414 .c_init = init_cyrix,
415 .c_identify = cyrix_identify,
416};
417
418int __init cyrix_init_cpu(void)
419{
420 cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
421 return 0;
422}
423
424//early_arch_initcall(cyrix_init_cpu);
425
426static struct cpu_dev nsc_cpu_dev __initdata = {
427 .c_vendor = "NSC",
428 .c_ident = { "Geode by NSC" },
429 .c_init = init_cyrix,
430 .c_identify = generic_identify,
431};
432
433int __init nsc_init_cpu(void)
434{
435 cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
436 return 0;
437}
438
439//early_arch_initcall(nsc_init_cpu);
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
new file mode 100644
index 000000000000..b8d847b850dc
--- /dev/null
+++ b/arch/i386/kernel/cpu/intel.c
@@ -0,0 +1,248 @@
1#include <linux/config.h>
2#include <linux/init.h>
3#include <linux/kernel.h>
4
5#include <linux/string.h>
6#include <linux/bitops.h>
7#include <linux/smp.h>
8#include <linux/thread_info.h>
9
10#include <asm/processor.h>
11#include <asm/msr.h>
12#include <asm/uaccess.h>
13
14#include "cpu.h"
15
16#ifdef CONFIG_X86_LOCAL_APIC
17#include <asm/mpspec.h>
18#include <asm/apic.h>
19#include <mach_apic.h>
20#endif
21
22extern int trap_init_f00f_bug(void);
23
24#ifdef CONFIG_X86_INTEL_USERCOPY
25/*
26 * Alignment at which movsl is preferred for bulk memory copies.
27 */
28struct movsl_mask movsl_mask;
29#endif
30
31void __init early_intel_workaround(struct cpuinfo_x86 *c)
32{
33 if (c->x86_vendor != X86_VENDOR_INTEL)
34 return;
35 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
36 if (c->x86 == 15 && c->x86_cache_alignment == 64)
37 c->x86_cache_alignment = 128;
38}
39
40/*
41 * Early probe support logic for ppro memory erratum #50
42 *
43 * This is called before we do cpu ident work
44 */
45
46int __init ppro_with_ram_bug(void)
47{
48 /* Uses data from early_cpu_detect now */
49 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
50 boot_cpu_data.x86 == 6 &&
51 boot_cpu_data.x86_model == 1 &&
52 boot_cpu_data.x86_mask < 8) {
53 printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
54 return 1;
55 }
56 return 0;
57}
58
59
60/*
61 * P4 Xeon errata 037 workaround.
62 * Hardware prefetcher may cause stale data to be loaded into the cache.
63 */
64static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
65{
66 unsigned long lo, hi;
67
68 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
69 rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
70 if ((lo & (1<<9)) == 0) {
71 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
72 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
73 lo |= (1<<9); /* Disable hw prefetching */
74 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
75 }
76 }
77}
78
79
80static void __init init_intel(struct cpuinfo_x86 *c)
81{
82 unsigned int l2 = 0;
83 char *p = NULL;
84
85#ifdef CONFIG_X86_F00F_BUG
86 /*
87 * All current models of Pentium and Pentium with MMX technology CPUs
88 * have the F0 0F bug, which lets nonprivileged users lock up the system.
89 * Note that the workaround only should be initialized once...
90 */
91 c->f00f_bug = 0;
92 if ( c->x86 == 5 ) {
93 static int f00f_workaround_enabled = 0;
94
95 c->f00f_bug = 1;
96 if ( !f00f_workaround_enabled ) {
97 trap_init_f00f_bug();
98 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
99 f00f_workaround_enabled = 1;
100 }
101 }
102#endif
103
104 select_idle_routine(c);
105 l2 = init_intel_cacheinfo(c);
106
107 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
108 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
109 clear_bit(X86_FEATURE_SEP, c->x86_capability);
110
111 /* Names for the Pentium II/Celeron processors
112 detectable only by also checking the cache size.
113 Dixon is NOT a Celeron. */
114 if (c->x86 == 6) {
115 switch (c->x86_model) {
116 case 5:
117 if (c->x86_mask == 0) {
118 if (l2 == 0)
119 p = "Celeron (Covington)";
120 else if (l2 == 256)
121 p = "Mobile Pentium II (Dixon)";
122 }
123 break;
124
125 case 6:
126 if (l2 == 128)
127 p = "Celeron (Mendocino)";
128 else if (c->x86_mask == 0 || c->x86_mask == 5)
129 p = "Celeron-A";
130 break;
131
132 case 8:
133 if (l2 == 128)
134 p = "Celeron (Coppermine)";
135 break;
136 }
137 }
138
139 if ( p )
140 strcpy(c->x86_model_id, p);
141
142 detect_ht(c);
143
144 /* Work around errata */
145 Intel_errata_workarounds(c);
146
147#ifdef CONFIG_X86_INTEL_USERCOPY
148 /*
149 * Set up the preferred alignment for movsl bulk memory moves
150 */
151 switch (c->x86) {
152 case 4: /* 486: untested */
153 break;
154 case 5: /* Old Pentia: untested */
155 break;
156 case 6: /* PII/PIII only like movsl with 8-byte alignment */
157 movsl_mask.mask = 7;
158 break;
159 case 15: /* P4 is OK down to 8-byte alignment */
160 movsl_mask.mask = 7;
161 break;
162 }
163#endif
164
165 if (c->x86 == 15)
166 set_bit(X86_FEATURE_P4, c->x86_capability);
167 if (c->x86 == 6)
168 set_bit(X86_FEATURE_P3, c->x86_capability);
169}
170
171
172static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
173{
174 /* Intel PIII Tualatin. This comes in two flavours.
175 * One has 256kb of cache, the other 512. We have no way
176 * to determine which, so we use a boottime override
177 * for the 512kb model, and assume 256 otherwise.
178 */
179 if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
180 size = 256;
181 return size;
182}
183
184static struct cpu_dev intel_cpu_dev __initdata = {
185 .c_vendor = "Intel",
186 .c_ident = { "GenuineIntel" },
187 .c_models = {
188 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
189 {
190 [0] = "486 DX-25/33",
191 [1] = "486 DX-50",
192 [2] = "486 SX",
193 [3] = "486 DX/2",
194 [4] = "486 SL",
195 [5] = "486 SX/2",
196 [7] = "486 DX/2-WB",
197 [8] = "486 DX/4",
198 [9] = "486 DX/4-WB"
199 }
200 },
201 { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
202 {
203 [0] = "Pentium 60/66 A-step",
204 [1] = "Pentium 60/66",
205 [2] = "Pentium 75 - 200",
206 [3] = "OverDrive PODP5V83",
207 [4] = "Pentium MMX",
208 [7] = "Mobile Pentium 75 - 200",
209 [8] = "Mobile Pentium MMX"
210 }
211 },
212 { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
213 {
214 [0] = "Pentium Pro A-step",
215 [1] = "Pentium Pro",
216 [3] = "Pentium II (Klamath)",
217 [4] = "Pentium II (Deschutes)",
218 [5] = "Pentium II (Deschutes)",
219 [6] = "Mobile Pentium II",
220 [7] = "Pentium III (Katmai)",
221 [8] = "Pentium III (Coppermine)",
222 [10] = "Pentium III (Cascades)",
223 [11] = "Pentium III (Tualatin)",
224 }
225 },
226 { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
227 {
228 [0] = "Pentium 4 (Unknown)",
229 [1] = "Pentium 4 (Willamette)",
230 [2] = "Pentium 4 (Northwood)",
231 [4] = "Pentium 4 (Foster)",
232 [5] = "Pentium 4 (Foster)",
233 }
234 },
235 },
236 .c_init = init_intel,
237 .c_identify = generic_identify,
238 .c_size_cache = intel_size_cache,
239};
240
241__init int intel_cpu_init(void)
242{
243 cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
244 return 0;
245}
246
247// arch_initcall(intel_cpu_init);
248
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
new file mode 100644
index 000000000000..aeb5b4ef8c8b
--- /dev/null
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -0,0 +1,598 @@
1/*
2 * Routines to indentify caches on Intel CPU.
3 *
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 */
7
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/device.h>
11#include <linux/compiler.h>
12#include <linux/cpu.h>
13
14#include <asm/processor.h>
15#include <asm/smp.h>
16
17#define LVL_1_INST 1
18#define LVL_1_DATA 2
19#define LVL_2 3
20#define LVL_3 4
21#define LVL_TRACE 5
22
23struct _cache_table
24{
25 unsigned char descriptor;
26 char cache_type;
27 short size;
28};
29
30/* all the cache descriptor types we care about (no TLB or trace cache entries) */
31static struct _cache_table cache_table[] __initdata =
32{
33 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
34 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
35 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
36 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
37 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
38 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
39 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
40 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
41 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
42 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
43 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
44 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
45 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
46 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
47 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
48 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
49 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
50 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
51 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
53 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
54 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
55 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
56 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
57 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
58 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
59 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
60 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
61 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
62 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
63 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
64 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
65 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
66 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
67 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
68 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
69 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
70 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
71 { 0x00, 0, 0}
72};
73
74
75enum _cache_type
76{
77 CACHE_TYPE_NULL = 0,
78 CACHE_TYPE_DATA = 1,
79 CACHE_TYPE_INST = 2,
80 CACHE_TYPE_UNIFIED = 3
81};
82
83union _cpuid4_leaf_eax {
84 struct {
85 enum _cache_type type:5;
86 unsigned int level:3;
87 unsigned int is_self_initializing:1;
88 unsigned int is_fully_associative:1;
89 unsigned int reserved:4;
90 unsigned int num_threads_sharing:12;
91 unsigned int num_cores_on_die:6;
92 } split;
93 u32 full;
94};
95
96union _cpuid4_leaf_ebx {
97 struct {
98 unsigned int coherency_line_size:12;
99 unsigned int physical_line_partition:10;
100 unsigned int ways_of_associativity:10;
101 } split;
102 u32 full;
103};
104
105union _cpuid4_leaf_ecx {
106 struct {
107 unsigned int number_of_sets:32;
108 } split;
109 u32 full;
110};
111
112struct _cpuid4_info {
113 union _cpuid4_leaf_eax eax;
114 union _cpuid4_leaf_ebx ebx;
115 union _cpuid4_leaf_ecx ecx;
116 unsigned long size;
117 cpumask_t shared_cpu_map;
118};
119
120#define MAX_CACHE_LEAVES 4
121static unsigned short __devinitdata num_cache_leaves;
122
123static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
124{
125 unsigned int eax, ebx, ecx, edx;
126 union _cpuid4_leaf_eax cache_eax;
127
128 cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
129 cache_eax.full = eax;
130 if (cache_eax.split.type == CACHE_TYPE_NULL)
131 return -1;
132
133 this_leaf->eax.full = eax;
134 this_leaf->ebx.full = ebx;
135 this_leaf->ecx.full = ecx;
136 this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
137 (this_leaf->ebx.split.coherency_line_size + 1) *
138 (this_leaf->ebx.split.physical_line_partition + 1) *
139 (this_leaf->ebx.split.ways_of_associativity + 1);
140 return 0;
141}
142
143static int __init find_num_cache_leaves(void)
144{
145 unsigned int eax, ebx, ecx, edx;
146 union _cpuid4_leaf_eax cache_eax;
147 int i;
148 int retval;
149
150 retval = MAX_CACHE_LEAVES;
151 /* Do cpuid(4) loop to find out num_cache_leaves */
152 for (i = 0; i < MAX_CACHE_LEAVES; i++) {
153 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
154 cache_eax.full = eax;
155 if (cache_eax.split.type == CACHE_TYPE_NULL) {
156 retval = i;
157 break;
158 }
159 }
160 return retval;
161}
162
163unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c)
164{
165 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
166 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
167 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
168
169 if (c->cpuid_level > 4) {
170 static int is_initialized;
171
172 if (is_initialized == 0) {
173 /* Init num_cache_leaves from boot CPU */
174 num_cache_leaves = find_num_cache_leaves();
175 is_initialized++;
176 }
177
178 /*
179 * Whenever possible use cpuid(4), deterministic cache
180 * parameters cpuid leaf to find the cache details
181 */
182 for (i = 0; i < num_cache_leaves; i++) {
183 struct _cpuid4_info this_leaf;
184
185 int retval;
186
187 retval = cpuid4_cache_lookup(i, &this_leaf);
188 if (retval >= 0) {
189 switch(this_leaf.eax.split.level) {
190 case 1:
191 if (this_leaf.eax.split.type ==
192 CACHE_TYPE_DATA)
193 new_l1d = this_leaf.size/1024;
194 else if (this_leaf.eax.split.type ==
195 CACHE_TYPE_INST)
196 new_l1i = this_leaf.size/1024;
197 break;
198 case 2:
199 new_l2 = this_leaf.size/1024;
200 break;
201 case 3:
202 new_l3 = this_leaf.size/1024;
203 break;
204 default:
205 break;
206 }
207 }
208 }
209 }
210 if (c->cpuid_level > 1) {
211 /* supports eax=2 call */
212 int i, j, n;
213 int regs[4];
214 unsigned char *dp = (unsigned char *)regs;
215
216 /* Number of times to iterate */
217 n = cpuid_eax(2) & 0xFF;
218
219 for ( i = 0 ; i < n ; i++ ) {
220 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
221
222 /* If bit 31 is set, this is an unknown format */
223 for ( j = 0 ; j < 3 ; j++ ) {
224 if ( regs[j] < 0 ) regs[j] = 0;
225 }
226
227 /* Byte 0 is level count, not a descriptor */
228 for ( j = 1 ; j < 16 ; j++ ) {
229 unsigned char des = dp[j];
230 unsigned char k = 0;
231
232 /* look up this descriptor in the table */
233 while (cache_table[k].descriptor != 0)
234 {
235 if (cache_table[k].descriptor == des) {
236 switch (cache_table[k].cache_type) {
237 case LVL_1_INST:
238 l1i += cache_table[k].size;
239 break;
240 case LVL_1_DATA:
241 l1d += cache_table[k].size;
242 break;
243 case LVL_2:
244 l2 += cache_table[k].size;
245 break;
246 case LVL_3:
247 l3 += cache_table[k].size;
248 break;
249 case LVL_TRACE:
250 trace += cache_table[k].size;
251 break;
252 }
253
254 break;
255 }
256
257 k++;
258 }
259 }
260 }
261
262 if (new_l1d)
263 l1d = new_l1d;
264
265 if (new_l1i)
266 l1i = new_l1i;
267
268 if (new_l2)
269 l2 = new_l2;
270
271 if (new_l3)
272 l3 = new_l3;
273
274 if ( trace )
275 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
276 else if ( l1i )
277 printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
278 if ( l1d )
279 printk(", L1 D cache: %dK\n", l1d);
280 else
281 printk("\n");
282 if ( l2 )
283 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
284 if ( l3 )
285 printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
286
287 /*
288 * This assumes the L3 cache is shared; it typically lives in
289 * the northbridge. The L1 caches are included by the L2
290 * cache, and so should not be included for the purpose of
291 * SMP switching weights.
292 */
293 c->x86_cache_size = l2 ? l2 : (l1i+l1d);
294 }
295
296 return l2;
297}
298
299/* pointer to _cpuid4_info array (for each cache leaf) */
300static struct _cpuid4_info *cpuid4_info[NR_CPUS];
301#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
302
303#ifdef CONFIG_SMP
304static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
305{
306 struct _cpuid4_info *this_leaf;
307 unsigned long num_threads_sharing;
308
309 this_leaf = CPUID4_INFO_IDX(cpu, index);
310 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
311
312 if (num_threads_sharing == 1)
313 cpu_set(cpu, this_leaf->shared_cpu_map);
314#ifdef CONFIG_X86_HT
315 else if (num_threads_sharing == smp_num_siblings)
316 this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
317#endif
318 else
319 printk(KERN_INFO "Number of CPUs sharing cache didn't match "
320 "any known set of CPUs\n");
321}
322#else
323static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
324#endif
325
326static void free_cache_attributes(unsigned int cpu)
327{
328 kfree(cpuid4_info[cpu]);
329 cpuid4_info[cpu] = NULL;
330}
331
332static int __devinit detect_cache_attributes(unsigned int cpu)
333{
334 struct _cpuid4_info *this_leaf;
335 unsigned long j;
336 int retval;
337
338 if (num_cache_leaves == 0)
339 return -ENOENT;
340
341 cpuid4_info[cpu] = kmalloc(
342 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
343 if (unlikely(cpuid4_info[cpu] == NULL))
344 return -ENOMEM;
345 memset(cpuid4_info[cpu], 0,
346 sizeof(struct _cpuid4_info) * num_cache_leaves);
347
348 /* Do cpuid and store the results */
349 for (j = 0; j < num_cache_leaves; j++) {
350 this_leaf = CPUID4_INFO_IDX(cpu, j);
351 retval = cpuid4_cache_lookup(j, this_leaf);
352 if (unlikely(retval < 0))
353 goto err_out;
354 cache_shared_cpu_map_setup(cpu, j);
355 }
356 return 0;
357
358err_out:
359 free_cache_attributes(cpu);
360 return -ENOMEM;
361}
362
363#ifdef CONFIG_SYSFS
364
365#include <linux/kobject.h>
366#include <linux/sysfs.h>
367
368extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
369
370/* pointer to kobject for cpuX/cache */
371static struct kobject * cache_kobject[NR_CPUS];
372
373struct _index_kobject {
374 struct kobject kobj;
375 unsigned int cpu;
376 unsigned short index;
377};
378
379/* pointer to array of kobjects for cpuX/cache/indexY */
380static struct _index_kobject *index_kobject[NR_CPUS];
381#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
382
383#define show_one_plus(file_name, object, val) \
384static ssize_t show_##file_name \
385 (struct _cpuid4_info *this_leaf, char *buf) \
386{ \
387 return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
388}
389
390show_one_plus(level, eax.split.level, 0);
391show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
392show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
393show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
394show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
395
396static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
397{
398 return sprintf (buf, "%luK\n", this_leaf->size / 1024);
399}
400
401static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
402{
403 char mask_str[NR_CPUS];
404 cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
405 return sprintf(buf, "%s\n", mask_str);
406}
407
408static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
409 switch(this_leaf->eax.split.type) {
410 case CACHE_TYPE_DATA:
411 return sprintf(buf, "Data\n");
412 break;
413 case CACHE_TYPE_INST:
414 return sprintf(buf, "Instruction\n");
415 break;
416 case CACHE_TYPE_UNIFIED:
417 return sprintf(buf, "Unified\n");
418 break;
419 default:
420 return sprintf(buf, "Unknown\n");
421 break;
422 }
423}
424
425struct _cache_attr {
426 struct attribute attr;
427 ssize_t (*show)(struct _cpuid4_info *, char *);
428 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
429};
430
431#define define_one_ro(_name) \
432static struct _cache_attr _name = \
433 __ATTR(_name, 0444, show_##_name, NULL)
434
435define_one_ro(level);
436define_one_ro(type);
437define_one_ro(coherency_line_size);
438define_one_ro(physical_line_partition);
439define_one_ro(ways_of_associativity);
440define_one_ro(number_of_sets);
441define_one_ro(size);
442define_one_ro(shared_cpu_map);
443
444static struct attribute * default_attrs[] = {
445 &type.attr,
446 &level.attr,
447 &coherency_line_size.attr,
448 &physical_line_partition.attr,
449 &ways_of_associativity.attr,
450 &number_of_sets.attr,
451 &size.attr,
452 &shared_cpu_map.attr,
453 NULL
454};
455
456#define to_object(k) container_of(k, struct _index_kobject, kobj)
457#define to_attr(a) container_of(a, struct _cache_attr, attr)
458
459static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
460{
461 struct _cache_attr *fattr = to_attr(attr);
462 struct _index_kobject *this_leaf = to_object(kobj);
463 ssize_t ret;
464
465 ret = fattr->show ?
466 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
467 buf) :
468 0;
469 return ret;
470}
471
472static ssize_t store(struct kobject * kobj, struct attribute * attr,
473 const char * buf, size_t count)
474{
475 return 0;
476}
477
478static struct sysfs_ops sysfs_ops = {
479 .show = show,
480 .store = store,
481};
482
483static struct kobj_type ktype_cache = {
484 .sysfs_ops = &sysfs_ops,
485 .default_attrs = default_attrs,
486};
487
488static struct kobj_type ktype_percpu_entry = {
489 .sysfs_ops = &sysfs_ops,
490};
491
492static void cpuid4_cache_sysfs_exit(unsigned int cpu)
493{
494 kfree(cache_kobject[cpu]);
495 kfree(index_kobject[cpu]);
496 cache_kobject[cpu] = NULL;
497 index_kobject[cpu] = NULL;
498 free_cache_attributes(cpu);
499}
500
501static int __devinit cpuid4_cache_sysfs_init(unsigned int cpu)
502{
503
504 if (num_cache_leaves == 0)
505 return -ENOENT;
506
507 detect_cache_attributes(cpu);
508 if (cpuid4_info[cpu] == NULL)
509 return -ENOENT;
510
511 /* Allocate all required memory */
512 cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
513 if (unlikely(cache_kobject[cpu] == NULL))
514 goto err_out;
515 memset(cache_kobject[cpu], 0, sizeof(struct kobject));
516
517 index_kobject[cpu] = kmalloc(
518 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
519 if (unlikely(index_kobject[cpu] == NULL))
520 goto err_out;
521 memset(index_kobject[cpu], 0,
522 sizeof(struct _index_kobject) * num_cache_leaves);
523
524 return 0;
525
526err_out:
527 cpuid4_cache_sysfs_exit(cpu);
528 return -ENOMEM;
529}
530
531/* Add/Remove cache interface for CPU device */
532static int __devinit cache_add_dev(struct sys_device * sys_dev)
533{
534 unsigned int cpu = sys_dev->id;
535 unsigned long i, j;
536 struct _index_kobject *this_object;
537 int retval = 0;
538
539 retval = cpuid4_cache_sysfs_init(cpu);
540 if (unlikely(retval < 0))
541 return retval;
542
543 cache_kobject[cpu]->parent = &sys_dev->kobj;
544 kobject_set_name(cache_kobject[cpu], "%s", "cache");
545 cache_kobject[cpu]->ktype = &ktype_percpu_entry;
546 retval = kobject_register(cache_kobject[cpu]);
547
548 for (i = 0; i < num_cache_leaves; i++) {
549 this_object = INDEX_KOBJECT_PTR(cpu,i);
550 this_object->cpu = cpu;
551 this_object->index = i;
552 this_object->kobj.parent = cache_kobject[cpu];
553 kobject_set_name(&(this_object->kobj), "index%1lu", i);
554 this_object->kobj.ktype = &ktype_cache;
555 retval = kobject_register(&(this_object->kobj));
556 if (unlikely(retval)) {
557 for (j = 0; j < i; j++) {
558 kobject_unregister(
559 &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
560 }
561 kobject_unregister(cache_kobject[cpu]);
562 cpuid4_cache_sysfs_exit(cpu);
563 break;
564 }
565 }
566 return retval;
567}
568
569static int __devexit cache_remove_dev(struct sys_device * sys_dev)
570{
571 unsigned int cpu = sys_dev->id;
572 unsigned long i;
573
574 for (i = 0; i < num_cache_leaves; i++)
575 kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
576 kobject_unregister(cache_kobject[cpu]);
577 cpuid4_cache_sysfs_exit(cpu);
578 return 0;
579}
580
581static struct sysdev_driver cache_sysdev_driver = {
582 .add = cache_add_dev,
583 .remove = __devexit_p(cache_remove_dev),
584};
585
586/* Register/Unregister the cpu_cache driver */
587static int __devinit cache_register_driver(void)
588{
589 if (num_cache_leaves == 0)
590 return 0;
591
592 return sysdev_driver_register(&cpu_sysdev_class,&cache_sysdev_driver);
593}
594
595device_initcall(cache_register_driver);
596
597#endif
598
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile
new file mode 100644
index 000000000000..30808f3d6715
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/Makefile
@@ -0,0 +1,2 @@
1obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o
2obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
new file mode 100644
index 000000000000..8df52e86c4d2
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -0,0 +1,97 @@
1/*
2 * Athlon/Hammer specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/config.h>
10#include <linux/irq.h>
11#include <linux/interrupt.h>
12#include <linux/smp.h>
13
14#include <asm/processor.h>
15#include <asm/system.h>
16#include <asm/msr.h>
17
18#include "mce.h"
19
20/* Machine Check Handler For AMD Athlon/Duron */
21static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
22{
23 int recover=1;
24 u32 alow, ahigh, high, low;
25 u32 mcgstl, mcgsth;
26 int i;
27
28 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
29 if (mcgstl & (1<<0)) /* Recoverable ? */
30 recover=0;
31
32 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
33 smp_processor_id(), mcgsth, mcgstl);
34
35 for (i=1; i<nr_mce_banks; i++) {
36 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
37 if (high&(1<<31)) {
38 if (high & (1<<29))
39 recover |= 1;
40 if (high & (1<<25))
41 recover |= 2;
42 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
43 high &= ~(1<<31);
44 if (high & (1<<27)) {
45 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
46 printk ("[%08x%08x]", ahigh, alow);
47 }
48 if (high & (1<<26)) {
49 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
50 printk (" at %08x%08x", ahigh, alow);
51 }
52 printk ("\n");
53 /* Clear it */
54 wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
55 /* Serialize */
56 wmb();
57 add_taint(TAINT_MACHINE_CHECK);
58 }
59 }
60
61 if (recover&2)
62 panic ("CPU context corrupt");
63 if (recover&1)
64 panic ("Unable to continue");
65 printk (KERN_EMERG "Attempting to continue.\n");
66 mcgstl &= ~(1<<2);
67 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
68}
69
70
71/* AMD K7 machine check is Intel like */
72void __init amd_mcheck_init(struct cpuinfo_x86 *c)
73{
74 u32 l, h;
75 int i;
76
77 machine_check_vector = k7_machine_check;
78 wmb();
79
80 printk (KERN_INFO "Intel machine check architecture supported.\n");
81 rdmsr (MSR_IA32_MCG_CAP, l, h);
82 if (l & (1<<8)) /* Control register present ? */
83 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
84 nr_mce_banks = l & 0xff;
85
86 /* Clear status for MC index 0 separately, we don't touch CTL,
87 * as some Athlons cause spurious MCEs when its enabled. */
88 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
89 for (i=1; i<nr_mce_banks; i++) {
90 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
91 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
92 }
93
94 set_in_cr4 (X86_CR4_MCE);
95 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
96 smp_processor_id());
97}
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
new file mode 100644
index 000000000000..bf6d1aefafc0
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,77 @@
1/*
2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/config.h>
10#include <linux/module.h>
11#include <linux/smp.h>
12#include <linux/thread_info.h>
13
14#include <asm/processor.h>
15#include <asm/system.h>
16
17#include "mce.h"
18
19int mce_disabled __initdata = 0;
20int nr_mce_banks;
21
22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
23
24/* Handle unconfigured int18 (should never happen) */
25static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
26{
27 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
28}
29
30/* Call the installed machine check handler for this CPU setup. */
31void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
32
33/* This has to be run for each processor */
34void __init mcheck_init(struct cpuinfo_x86 *c)
35{
36 if (mce_disabled==1)
37 return;
38
39 switch (c->x86_vendor) {
40 case X86_VENDOR_AMD:
41 if (c->x86==6 || c->x86==15)
42 amd_mcheck_init(c);
43 break;
44
45 case X86_VENDOR_INTEL:
46 if (c->x86==5)
47 intel_p5_mcheck_init(c);
48 if (c->x86==6)
49 intel_p6_mcheck_init(c);
50 if (c->x86==15)
51 intel_p4_mcheck_init(c);
52 break;
53
54 case X86_VENDOR_CENTAUR:
55 if (c->x86==5)
56 winchip_mcheck_init(c);
57 break;
58
59 default:
60 break;
61 }
62}
63
64static int __init mcheck_disable(char *str)
65{
66 mce_disabled = 1;
67 return 0;
68}
69
70static int __init mcheck_enable(char *str)
71{
72 mce_disabled = -1;
73 return 0;
74}
75
76__setup("nomce", mcheck_disable);
77__setup("mce", mcheck_enable);
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h
new file mode 100644
index 000000000000..dc2416dfef15
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/mce.h
@@ -0,0 +1,14 @@
1#include <linux/init.h>
2
3void amd_mcheck_init(struct cpuinfo_x86 *c);
4void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
5void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
6void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
7void winchip_mcheck_init(struct cpuinfo_x86 *c);
8
9/* Call the installed machine check handler for this CPU setup. */
10extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
11
12extern int mce_disabled __initdata;
13extern int nr_mce_banks;
14
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
new file mode 100644
index 000000000000..7864ddfccf07
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c
@@ -0,0 +1,93 @@
1/*
2 * Non Fatal Machine Check Exception Reporting
3 *
4 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
5 *
6 * This file contains routines to check for non-fatal MCEs every 15s
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/jiffies.h>
14#include <linux/config.h>
15#include <linux/irq.h>
16#include <linux/workqueue.h>
17#include <linux/interrupt.h>
18#include <linux/smp.h>
19#include <linux/module.h>
20
21#include <asm/processor.h>
22#include <asm/system.h>
23#include <asm/msr.h>
24
25#include "mce.h"
26
27static int firstbank;
28
29#define MCE_RATE 15*HZ /* timer rate is 15s */
30
31static void mce_checkregs (void *info)
32{
33 u32 low, high;
34 int i;
35
36 for (i=firstbank; i<nr_mce_banks; i++) {
37 rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
38
39 if (high & (1<<31)) {
40 printk(KERN_INFO "MCE: The hardware reports a non "
41 "fatal, correctable incident occurred on "
42 "CPU %d.\n",
43 smp_processor_id());
44 printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
45
46 /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
47 wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
48
49 /* Serialize */
50 wmb();
51 add_taint(TAINT_MACHINE_CHECK);
52 }
53 }
54}
55
56static void mce_work_fn(void *data);
57static DECLARE_WORK(mce_work, mce_work_fn, NULL);
58
59static void mce_work_fn(void *data)
60{
61 on_each_cpu(mce_checkregs, NULL, 1, 1);
62 schedule_delayed_work(&mce_work, MCE_RATE);
63}
64
65static int __init init_nonfatal_mce_checker(void)
66{
67 struct cpuinfo_x86 *c = &boot_cpu_data;
68
69 /* Check for MCE support */
70 if (!cpu_has(c, X86_FEATURE_MCE))
71 return -ENODEV;
72
73 /* Check for PPro style MCA */
74 if (!cpu_has(c, X86_FEATURE_MCA))
75 return -ENODEV;
76
77 /* Some Athlons misbehave when we frob bank 0 */
78 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
79 boot_cpu_data.x86 == 6)
80 firstbank = 1;
81 else
82 firstbank = 0;
83
84 /*
85 * Check for non-fatal errors every MCE_RATE s
86 */
87 schedule_delayed_work(&mce_work, MCE_RATE);
88 printk(KERN_INFO "Machine check exception polling timer started.\n");
89 return 0;
90}
91module_init(init_nonfatal_mce_checker);
92
93MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
new file mode 100644
index 000000000000..8b16ceb929b4
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -0,0 +1,271 @@
1/*
2 * P4 specific Machine Check Exception Reporting
3 */
4
5#include <linux/init.h>
6#include <linux/types.h>
7#include <linux/kernel.h>
8#include <linux/config.h>
9#include <linux/irq.h>
10#include <linux/interrupt.h>
11#include <linux/smp.h>
12
13#include <asm/processor.h>
14#include <asm/system.h>
15#include <asm/msr.h>
16#include <asm/apic.h>
17
18#include "mce.h"
19
20/* as supported by the P4/Xeon family */
21struct intel_mce_extended_msrs {
22 u32 eax;
23 u32 ebx;
24 u32 ecx;
25 u32 edx;
26 u32 esi;
27 u32 edi;
28 u32 ebp;
29 u32 esp;
30 u32 eflags;
31 u32 eip;
32 /* u32 *reserved[]; */
33};
34
35static int mce_num_extended_msrs = 0;
36
37
38#ifdef CONFIG_X86_MCE_P4THERMAL
39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK);
44}
45
46/* P4/Xeon Thermal transition interrupt handler */
47static void intel_thermal_interrupt(struct pt_regs *regs)
48{
49 u32 l, h;
50 unsigned int cpu = smp_processor_id();
51 static unsigned long next[NR_CPUS];
52
53 ack_APIC_irq();
54
55 if (time_after(next[cpu], jiffies))
56 return;
57
58 next[cpu] = jiffies + HZ*5;
59 rdmsr(MSR_IA32_THERM_STATUS, l, h);
60 if (l & 0x1) {
61 printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
62 printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
63 cpu);
64 add_taint(TAINT_MACHINE_CHECK);
65 } else {
66 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
67 }
68}
69
70/* Thermal interrupt handler for this CPU setup */
71static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
72
73fastcall void smp_thermal_interrupt(struct pt_regs *regs)
74{
75 irq_enter();
76 vendor_thermal_interrupt(regs);
77 irq_exit();
78}
79
80/* P4/Xeon Thermal regulation detect and init */
81static void __init intel_init_thermal(struct cpuinfo_x86 *c)
82{
83 u32 l, h;
84 unsigned int cpu = smp_processor_id();
85
86 /* Thermal monitoring */
87 if (!cpu_has(c, X86_FEATURE_ACPI))
88 return; /* -ENODEV */
89
90 /* Clock modulation */
91 if (!cpu_has(c, X86_FEATURE_ACC))
92 return; /* -ENODEV */
93
94 /* first check if its enabled already, in which case there might
95 * be some SMM goo which handles it, so we can't even put a handler
96 * since it might be delivered via SMI already -zwanem.
97 */
98 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
99 h = apic_read(APIC_LVTTHMR);
100 if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
101 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
102 cpu);
103 return; /* -EBUSY */
104 }
105
106 /* check whether a vector already exists, temporarily masked? */
107 if (h & APIC_VECTOR_MASK) {
108 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
109 "installed\n",
110 cpu, (h & APIC_VECTOR_MASK));
111 return; /* -EBUSY */
112 }
113
114 /* The temperature transition interrupt handler setup */
115 h = THERMAL_APIC_VECTOR; /* our delivery vector */
116 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
117 apic_write_around(APIC_LVTTHMR, h);
118
119 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
120 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
121
122 /* ok we're good to go... */
123 vendor_thermal_interrupt = intel_thermal_interrupt;
124
125 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
126 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
127
128 l = apic_read (APIC_LVTTHMR);
129 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
130 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
131 return;
132}
133#endif /* CONFIG_X86_MCE_P4THERMAL */
134
135
136/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
137static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
138{
139 u32 h;
140
141 if (mce_num_extended_msrs == 0)
142 goto done;
143
144 rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
145 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
146 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
147 rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
148 rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
149 rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
150 rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
151 rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
152 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
153 rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
154
155 /* can we rely on kmalloc to do a dynamic
156 * allocation for the reserved registers?
157 */
158done:
159 return mce_num_extended_msrs;
160}
161
162static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
163{
164 int recover=1;
165 u32 alow, ahigh, high, low;
166 u32 mcgstl, mcgsth;
167 int i;
168 struct intel_mce_extended_msrs dbg;
169
170 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
171 if (mcgstl & (1<<0)) /* Recoverable ? */
172 recover=0;
173
174 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
175 smp_processor_id(), mcgsth, mcgstl);
176
177 if (intel_get_extended_msrs(&dbg)) {
178 printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
179 smp_processor_id(), dbg.eip, dbg.eflags);
180 printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
181 dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
182 printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
183 dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
184 }
185
186 for (i=0; i<nr_mce_banks; i++) {
187 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
188 if (high & (1<<31)) {
189 if (high & (1<<29))
190 recover |= 1;
191 if (high & (1<<25))
192 recover |= 2;
193 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
194 high &= ~(1<<31);
195 if (high & (1<<27)) {
196 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
197 printk ("[%08x%08x]", ahigh, alow);
198 }
199 if (high & (1<<26)) {
200 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
201 printk (" at %08x%08x", ahigh, alow);
202 }
203 printk ("\n");
204 }
205 }
206
207 if (recover & 2)
208 panic ("CPU context corrupt");
209 if (recover & 1)
210 panic ("Unable to continue");
211
212 printk(KERN_EMERG "Attempting to continue.\n");
213 /*
214 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
215 * recoverable/continuable.This will allow BIOS to look at the MSRs
216 * for errors if the OS could not log the error.
217 */
218 for (i=0; i<nr_mce_banks; i++) {
219 u32 msr;
220 msr = MSR_IA32_MC0_STATUS+i*4;
221 rdmsr (msr, low, high);
222 if (high&(1<<31)) {
223 /* Clear it */
224 wrmsr(msr, 0UL, 0UL);
225 /* Serialize */
226 wmb();
227 add_taint(TAINT_MACHINE_CHECK);
228 }
229 }
230 mcgstl &= ~(1<<2);
231 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
232}
233
234
235void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c)
236{
237 u32 l, h;
238 int i;
239
240 machine_check_vector = intel_machine_check;
241 wmb();
242
243 printk (KERN_INFO "Intel machine check architecture supported.\n");
244 rdmsr (MSR_IA32_MCG_CAP, l, h);
245 if (l & (1<<8)) /* Control register present ? */
246 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
247 nr_mce_banks = l & 0xff;
248
249 for (i=0; i<nr_mce_banks; i++) {
250 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
251 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
252 }
253
254 set_in_cr4 (X86_CR4_MCE);
255 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
256 smp_processor_id());
257
258 /* Check for P4/Xeon extended MCE MSRs */
259 rdmsr (MSR_IA32_MCG_CAP, l, h);
260 if (l & (1<<9)) {/* MCG_EXT_P */
261 mce_num_extended_msrs = (l >> 16) & 0xff;
262 printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
263 " available\n",
264 smp_processor_id(), mce_num_extended_msrs);
265
266#ifdef CONFIG_X86_MCE_P4THERMAL
267 /* Check for P4/Xeon Thermal monitor */
268 intel_init_thermal(c);
269#endif
270 }
271}
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
new file mode 100644
index 000000000000..c45a1b485c80
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -0,0 +1,54 @@
1/*
2 * P5 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/irq.h>
10#include <linux/interrupt.h>
11#include <linux/smp.h>
12
13#include <asm/processor.h>
14#include <asm/system.h>
15#include <asm/msr.h>
16
17#include "mce.h"
18
19/* Machine check handler for Pentium class Intel */
20static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
21{
22 u32 loaddr, hi, lotype;
23 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
24 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
25 printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
26 if(lotype&(1<<5))
27 printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
28 add_taint(TAINT_MACHINE_CHECK);
29}
30
31/* Set up machine check reporting for processors with Intel style MCE */
32void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c)
33{
34 u32 l, h;
35
36 /*Check for MCE support */
37 if( !cpu_has(c, X86_FEATURE_MCE) )
38 return;
39
40 /* Default P5 to off as its often misconnected */
41 if(mce_disabled != -1)
42 return;
43 machine_check_vector = pentium_machine_check;
44 wmb();
45
46 /* Read registers before enabling */
47 rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
48 rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
49 printk(KERN_INFO "Intel old style machine check architecture supported.\n");
50
51 /* Enable MCE */
52 set_in_cr4(X86_CR4_MCE);
53 printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
54}
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
new file mode 100644
index 000000000000..46640f8c2494
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -0,0 +1,115 @@
1/*
2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/irq.h>
10#include <linux/interrupt.h>
11#include <linux/smp.h>
12
13#include <asm/processor.h>
14#include <asm/system.h>
15#include <asm/msr.h>
16
17#include "mce.h"
18
19/* Machine Check Handler For PII/PIII */
20static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
21{
22 int recover=1;
23 u32 alow, ahigh, high, low;
24 u32 mcgstl, mcgsth;
25 int i;
26
27 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
28 if (mcgstl & (1<<0)) /* Recoverable ? */
29 recover=0;
30
31 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
32 smp_processor_id(), mcgsth, mcgstl);
33
34 for (i=0; i<nr_mce_banks; i++) {
35 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
36 if (high & (1<<31)) {
37 if (high & (1<<29))
38 recover |= 1;
39 if (high & (1<<25))
40 recover |= 2;
41 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
42 high &= ~(1<<31);
43 if (high & (1<<27)) {
44 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
45 printk ("[%08x%08x]", ahigh, alow);
46 }
47 if (high & (1<<26)) {
48 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
49 printk (" at %08x%08x", ahigh, alow);
50 }
51 printk ("\n");
52 }
53 }
54
55 if (recover & 2)
56 panic ("CPU context corrupt");
57 if (recover & 1)
58 panic ("Unable to continue");
59
60 printk (KERN_EMERG "Attempting to continue.\n");
61 /*
62 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
63 * recoverable/continuable.This will allow BIOS to look at the MSRs
64 * for errors if the OS could not log the error.
65 */
66 for (i=0; i<nr_mce_banks; i++) {
67 unsigned int msr;
68 msr = MSR_IA32_MC0_STATUS+i*4;
69 rdmsr (msr,low, high);
70 if (high & (1<<31)) {
71 /* Clear it */
72 wrmsr (msr, 0UL, 0UL);
73 /* Serialize */
74 wmb();
75 add_taint(TAINT_MACHINE_CHECK);
76 }
77 }
78 mcgstl &= ~(1<<2);
79 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
80}
81
82/* Set up machine check reporting for processors with Intel style MCE */
83void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c)
84{
85 u32 l, h;
86 int i;
87
88 /* Check for MCE support */
89 if (!cpu_has(c, X86_FEATURE_MCE))
90 return;
91
92 /* Check for PPro style MCA */
93 if (!cpu_has(c, X86_FEATURE_MCA))
94 return;
95
96 /* Ok machine check is available */
97 machine_check_vector = intel_machine_check;
98 wmb();
99
100 printk (KERN_INFO "Intel machine check architecture supported.\n");
101 rdmsr (MSR_IA32_MCG_CAP, l, h);
102 if (l & (1<<8)) /* Control register present ? */
103 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
104 nr_mce_banks = l & 0xff;
105
106 /* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */
107 for (i=1; i<nr_mce_banks; i++) {
108 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
109 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
110 }
111
112 set_in_cr4 (X86_CR4_MCE);
113 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
114 smp_processor_id());
115}
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
new file mode 100644
index 000000000000..753fa7acb984
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -0,0 +1,37 @@
1/*
2 * IDT Winchip specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/irq.h>
10#include <linux/interrupt.h>
11
12#include <asm/processor.h>
13#include <asm/system.h>
14#include <asm/msr.h>
15
16#include "mce.h"
17
18/* Machine check handler for WinChip C6 */
19static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
20{
21 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
22 add_taint(TAINT_MACHINE_CHECK);
23}
24
25/* Set up machine check reporting on the Winchip C6 series */
26void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
27{
28 u32 lo, hi;
29 machine_check_vector = winchip_machine_check;
30 wmb();
31 rdmsr(MSR_IDT_FCR1, lo, hi);
32 lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */
33 lo&= ~(1<<4); /* Enable MCE */
34 wrmsr(MSR_IDT_FCR1, lo, hi);
35 set_in_cr4(X86_CR4_MCE);
36 printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
37}
diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile
new file mode 100644
index 000000000000..a25b701ab84e
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/Makefile
@@ -0,0 +1,5 @@
1obj-y := main.o if.o generic.o state.o
2obj-y += amd.o
3obj-y += cyrix.o
4obj-y += centaur.o
5
diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c
new file mode 100644
index 000000000000..1a1e04b6fd00
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/amd.c
@@ -0,0 +1,121 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3#include <asm/mtrr.h>
4#include <asm/msr.h>
5
6#include "mtrr.h"
7
8static void
9amd_get_mtrr(unsigned int reg, unsigned long *base,
10 unsigned int *size, mtrr_type * type)
11{
12 unsigned long low, high;
13
14 rdmsr(MSR_K6_UWCCR, low, high);
15 /* Upper dword is region 1, lower is region 0 */
16 if (reg == 1)
17 low = high;
18 /* The base masks off on the right alignment */
19 *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
20 *type = 0;
21 if (low & 1)
22 *type = MTRR_TYPE_UNCACHABLE;
23 if (low & 2)
24 *type = MTRR_TYPE_WRCOMB;
25 if (!(low & 3)) {
26 *size = 0;
27 return;
28 }
29 /*
30 * This needs a little explaining. The size is stored as an
31 * inverted mask of bits of 128K granularity 15 bits long offset
32 * 2 bits
33 *
34 * So to get a size we do invert the mask and add 1 to the lowest
35 * mask bit (4 as its 2 bits in). This gives us a size we then shift
36 * to turn into 128K blocks
37 *
38 * eg 111 1111 1111 1100 is 512K
39 *
40 * invert 000 0000 0000 0011
41 * +1 000 0000 0000 0100
42 * *128K ...
43 */
44 low = (~low) & 0x1FFFC;
45 *size = (low + 4) << (15 - PAGE_SHIFT);
46 return;
47}
48
49static void amd_set_mtrr(unsigned int reg, unsigned long base,
50 unsigned long size, mtrr_type type)
51/* [SUMMARY] Set variable MTRR register on the local CPU.
52 <reg> The register to set.
53 <base> The base address of the region.
54 <size> The size of the region. If this is 0 the region is disabled.
55 <type> The type of the region.
56 <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
57 be done externally.
58 [RETURNS] Nothing.
59*/
60{
61 u32 regs[2];
62
63 /*
64 * Low is MTRR0 , High MTRR 1
65 */
66 rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
67 /*
68 * Blank to disable
69 */
70 if (size == 0)
71 regs[reg] = 0;
72 else
73 /* Set the register to the base, the type (off by one) and an
74 inverted bitmask of the size The size is the only odd
75 bit. We are fed say 512K We invert this and we get 111 1111
76 1111 1011 but if you subtract one and invert you get the
77 desired 111 1111 1111 1100 mask
78
79 But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */
80 regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
81 | (base << PAGE_SHIFT) | (type + 1);
82
83 /*
84 * The writeback rule is quite specific. See the manual. Its
85 * disable local interrupts, write back the cache, set the mtrr
86 */
87 wbinvd();
88 wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
89}
90
91static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
92{
93 /* Apply the K6 block alignment and size rules
94 In order
95 o Uncached or gathering only
96 o 128K or bigger block
97 o Power of 2 block
98 o base suitably aligned to the power
99 */
100 if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
101 || (size & ~(size - 1)) - size || (base & (size - 1)))
102 return -EINVAL;
103 return 0;
104}
105
106static struct mtrr_ops amd_mtrr_ops = {
107 .vendor = X86_VENDOR_AMD,
108 .set = amd_set_mtrr,
109 .get = amd_get_mtrr,
110 .get_free_region = generic_get_free_region,
111 .validate_add_page = amd_validate_add_page,
112 .have_wrcomb = positive_have_wrcomb,
113};
114
115int __init amd_init_mtrr(void)
116{
117 set_mtrr_ops(&amd_mtrr_ops);
118 return 0;
119}
120
121//arch_initcall(amd_mtrr_init);
diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c
new file mode 100644
index 000000000000..33f00ac314ef
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/centaur.c
@@ -0,0 +1,223 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3#include <asm/mtrr.h>
4#include <asm/msr.h>
5#include "mtrr.h"
6
7static struct {
8 unsigned long high;
9 unsigned long low;
10} centaur_mcr[8];
11
12static u8 centaur_mcr_reserved;
13static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
14
15/*
16 * Report boot time MCR setups
17 */
18
19static int
20centaur_get_free_region(unsigned long base, unsigned long size)
21/* [SUMMARY] Get a free MTRR.
22 <base> The starting (base) address of the region.
23 <size> The size (in bytes) of the region.
24 [RETURNS] The index of the region on success, else -1 on error.
25*/
26{
27 int i, max;
28 mtrr_type ltype;
29 unsigned long lbase;
30 unsigned int lsize;
31
32 max = num_var_ranges;
33 for (i = 0; i < max; ++i) {
34 if (centaur_mcr_reserved & (1 << i))
35 continue;
36 mtrr_if->get(i, &lbase, &lsize, &ltype);
37 if (lsize == 0)
38 return i;
39 }
40 return -ENOSPC;
41}
42
43void
44mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
45{
46 centaur_mcr[mcr].low = lo;
47 centaur_mcr[mcr].high = hi;
48}
49
50static void
51centaur_get_mcr(unsigned int reg, unsigned long *base,
52 unsigned int *size, mtrr_type * type)
53{
54 *base = centaur_mcr[reg].high >> PAGE_SHIFT;
55 *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
56 *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */
57 if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
58 *type = MTRR_TYPE_UNCACHABLE;
59 if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
60 *type = MTRR_TYPE_WRBACK;
61 if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
62 *type = MTRR_TYPE_WRBACK;
63
64}
65
66static void centaur_set_mcr(unsigned int reg, unsigned long base,
67 unsigned long size, mtrr_type type)
68{
69 unsigned long low, high;
70
71 if (size == 0) {
72 /* Disable */
73 high = low = 0;
74 } else {
75 high = base << PAGE_SHIFT;
76 if (centaur_mcr_type == 0)
77 low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */
78 else {
79 if (type == MTRR_TYPE_UNCACHABLE)
80 low = -size << PAGE_SHIFT | 0x02; /* NC */
81 else
82 low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */
83 }
84 }
85 centaur_mcr[reg].high = high;
86 centaur_mcr[reg].low = low;
87 wrmsr(MSR_IDT_MCR0 + reg, low, high);
88}
89
90#if 0
91/*
92 * Initialise the later (saner) Winchip MCR variant. In this version
93 * the BIOS can pass us the registers it has used (but not their values)
94 * and the control register is read/write
95 */
96
97static void __init
98centaur_mcr1_init(void)
99{
100 unsigned i;
101 u32 lo, hi;
102
103 /* Unfortunately, MCR's are read-only, so there is no way to
104 * find out what the bios might have done.
105 */
106
107 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
108 if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
109 lo &= ~0x1C0; /* clear key */
110 lo |= 0x040; /* set key to 1 */
111 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
112 }
113
114 centaur_mcr_type = 1;
115
116 /*
117 * Clear any unconfigured MCR's.
118 */
119
120 for (i = 0; i < 8; ++i) {
121 if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
122 if (!(lo & (1 << (9 + i))))
123 wrmsr(MSR_IDT_MCR0 + i, 0, 0);
124 else
125 /*
126 * If the BIOS set up an MCR we cannot see it
127 * but we don't wish to obliterate it
128 */
129 centaur_mcr_reserved |= (1 << i);
130 }
131 }
132 /*
133 * Throw the main write-combining switch...
134 * However if OOSTORE is enabled then people have already done far
135 * cleverer things and we should behave.
136 */
137
138 lo |= 15; /* Write combine enables */
139 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
140}
141
142/*
143 * Initialise the original winchip with read only MCR registers
144 * no used bitmask for the BIOS to pass on and write only control
145 */
146
147static void __init
148centaur_mcr0_init(void)
149{
150 unsigned i;
151
152 /* Unfortunately, MCR's are read-only, so there is no way to
153 * find out what the bios might have done.
154 */
155
156 /* Clear any unconfigured MCR's.
157 * This way we are sure that the centaur_mcr array contains the actual
158 * values. The disadvantage is that any BIOS tweaks are thus undone.
159 *
160 */
161 for (i = 0; i < 8; ++i) {
162 if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
163 wrmsr(MSR_IDT_MCR0 + i, 0, 0);
164 }
165
166 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
167}
168
169/*
170 * Initialise Winchip series MCR registers
171 */
172
173static void __init
174centaur_mcr_init(void)
175{
176 struct set_mtrr_context ctxt;
177
178 set_mtrr_prepare_save(&ctxt);
179 set_mtrr_cache_disable(&ctxt);
180
181 if (boot_cpu_data.x86_model == 4)
182 centaur_mcr0_init();
183 else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
184 centaur_mcr1_init();
185
186 set_mtrr_done(&ctxt);
187}
188#endif
189
190static int centaur_validate_add_page(unsigned long base,
191 unsigned long size, unsigned int type)
192{
193 /*
194 * FIXME: Winchip2 supports uncached
195 */
196 if (type != MTRR_TYPE_WRCOMB &&
197 (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
198 printk(KERN_WARNING
199 "mtrr: only write-combining%s supported\n",
200 centaur_mcr_type ? " and uncacheable are"
201 : " is");
202 return -EINVAL;
203 }
204 return 0;
205}
206
207static struct mtrr_ops centaur_mtrr_ops = {
208 .vendor = X86_VENDOR_CENTAUR,
209// .init = centaur_mcr_init,
210 .set = centaur_set_mcr,
211 .get = centaur_get_mcr,
212 .get_free_region = centaur_get_free_region,
213 .validate_add_page = centaur_validate_add_page,
214 .have_wrcomb = positive_have_wrcomb,
215};
216
217int __init centaur_init_mtrr(void)
218{
219 set_mtrr_ops(&centaur_mtrr_ops);
220 return 0;
221}
222
223//arch_initcall(centaur_init_mtrr);
diff --git a/arch/i386/kernel/cpu/mtrr/changelog b/arch/i386/kernel/cpu/mtrr/changelog
new file mode 100644
index 000000000000..af1368535955
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/changelog
@@ -0,0 +1,229 @@
1 ChangeLog
2
3 Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de>
4 Initial register-setting code (from proform-1.0).
5 19971216 Richard Gooch <rgooch@atnf.csiro.au>
6 Original version for /proc/mtrr interface, SMP-safe.
7 v1.0
8 19971217 Richard Gooch <rgooch@atnf.csiro.au>
9 Bug fix for ioctls()'s.
10 Added sample code in Documentation/mtrr.txt
11 v1.1
12 19971218 Richard Gooch <rgooch@atnf.csiro.au>
13 Disallow overlapping regions.
14 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de>
15 Register-setting fixups.
16 v1.2
17 19971222 Richard Gooch <rgooch@atnf.csiro.au>
18 Fixups for kernel 2.1.75.
19 v1.3
20 19971229 David Wragg <dpw@doc.ic.ac.uk>
21 Register-setting fixups and conformity with Intel conventions.
22 19971229 Richard Gooch <rgooch@atnf.csiro.au>
23 Cosmetic changes and wrote this ChangeLog ;-)
24 19980106 Richard Gooch <rgooch@atnf.csiro.au>
25 Fixups for kernel 2.1.78.
26 v1.4
27 19980119 David Wragg <dpw@doc.ic.ac.uk>
28 Included passive-release enable code (elsewhere in PCI setup).
29 v1.5
30 19980131 Richard Gooch <rgooch@atnf.csiro.au>
31 Replaced global kernel lock with private spinlock.
32 v1.6
33 19980201 Richard Gooch <rgooch@atnf.csiro.au>
34 Added wait for other CPUs to complete changes.
35 v1.7
36 19980202 Richard Gooch <rgooch@atnf.csiro.au>
37 Bug fix in definition of <set_mtrr> for UP.
38 v1.8
39 19980319 Richard Gooch <rgooch@atnf.csiro.au>
40 Fixups for kernel 2.1.90.
41 19980323 Richard Gooch <rgooch@atnf.csiro.au>
42 Move SMP BIOS fixup before secondary CPUs call <calibrate_delay>
43 v1.9
44 19980325 Richard Gooch <rgooch@atnf.csiro.au>
45 Fixed test for overlapping regions: confused by adjacent regions
46 19980326 Richard Gooch <rgooch@atnf.csiro.au>
47 Added wbinvd in <set_mtrr_prepare>.
48 19980401 Richard Gooch <rgooch@atnf.csiro.au>
49 Bug fix for non-SMP compilation.
50 19980418 David Wragg <dpw@doc.ic.ac.uk>
51 Fixed-MTRR synchronisation for SMP and use atomic operations
52 instead of spinlocks.
53 19980418 Richard Gooch <rgooch@atnf.csiro.au>
54 Differentiate different MTRR register classes for BIOS fixup.
55 v1.10
56 19980419 David Wragg <dpw@doc.ic.ac.uk>
57 Bug fix in variable MTRR synchronisation.
58 v1.11
59 19980419 Richard Gooch <rgooch@atnf.csiro.au>
60 Fixups for kernel 2.1.97.
61 v1.12
62 19980421 Richard Gooch <rgooch@atnf.csiro.au>
63 Safer synchronisation across CPUs when changing MTRRs.
64 v1.13
65 19980423 Richard Gooch <rgooch@atnf.csiro.au>
66 Bugfix for SMP systems without MTRR support.
67 v1.14
68 19980427 Richard Gooch <rgooch@atnf.csiro.au>
69 Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines.
70 v1.15
71 19980427 Richard Gooch <rgooch@atnf.csiro.au>
72 Use atomic bitops for setting SMP change mask.
73 v1.16
74 19980428 Richard Gooch <rgooch@atnf.csiro.au>
75 Removed spurious diagnostic message.
76 v1.17
77 19980429 Richard Gooch <rgooch@atnf.csiro.au>
78 Moved register-setting macros into this file.
79 Moved setup code from init/main.c to i386-specific areas.
80 v1.18
81 19980502 Richard Gooch <rgooch@atnf.csiro.au>
82 Moved MTRR detection outside conditionals in <mtrr_init>.
83 v1.19
84 19980502 Richard Gooch <rgooch@atnf.csiro.au>
85 Documentation improvement: mention Pentium II and AGP.
86 v1.20
87 19980521 Richard Gooch <rgooch@atnf.csiro.au>
88 Only manipulate interrupt enable flag on local CPU.
89 Allow enclosed uncachable regions.
90 v1.21
91 19980611 Richard Gooch <rgooch@atnf.csiro.au>
92 Always define <main_lock>.
93 v1.22
94 19980901 Richard Gooch <rgooch@atnf.csiro.au>
95 Removed module support in order to tidy up code.
96 Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>.
97 Created addition queue for prior to SMP commence.
98 v1.23
99 19980902 Richard Gooch <rgooch@atnf.csiro.au>
100 Ported patch to kernel 2.1.120-pre3.
101 v1.24
102 19980910 Richard Gooch <rgooch@atnf.csiro.au>
103 Removed sanity checks and addition queue: Linus prefers an OOPS.
104 v1.25
105 19981001 Richard Gooch <rgooch@atnf.csiro.au>
106 Fixed harmless compiler warning in include/asm-i386/mtrr.h
107 Fixed version numbering and history for v1.23 -> v1.24.
108 v1.26
109 19990118 Richard Gooch <rgooch@atnf.csiro.au>
110 Added devfs support.
111 v1.27
112 19990123 Richard Gooch <rgooch@atnf.csiro.au>
113 Changed locking to spin with reschedule.
114 Made use of new <smp_call_function>.
115 v1.28
116 19990201 Zoltán Böszörményi <zboszor@mail.externet.hu>
117 Extended the driver to be able to use Cyrix style ARRs.
118 19990204 Richard Gooch <rgooch@atnf.csiro.au>
119 Restructured Cyrix support.
120 v1.29
121 19990204 Zoltán Böszörményi <zboszor@mail.externet.hu>
122 Refined ARR support: enable MAPEN in set_mtrr_prepare()
123 and disable MAPEN in set_mtrr_done().
124 19990205 Richard Gooch <rgooch@atnf.csiro.au>
125 Minor cleanups.
126 v1.30
127 19990208 Zoltán Böszörményi <zboszor@mail.externet.hu>
128 Protect plain 6x86s (and other processors without the
129 Page Global Enable feature) against accessing CR4 in
130 set_mtrr_prepare() and set_mtrr_done().
131 19990210 Richard Gooch <rgooch@atnf.csiro.au>
132 Turned <set_mtrr_up> and <get_mtrr> into function pointers.
133 v1.31
134 19990212 Zoltán Böszörményi <zboszor@mail.externet.hu>
135 Major rewrite of cyrix_arr_init(): do not touch ARRs,
136 leave them as the BIOS have set them up.
137 Enable usage of all 8 ARRs.
138 Avoid multiplications by 3 everywhere and other
139 code clean ups/speed ups.
140 19990213 Zoltán Böszörményi <zboszor@mail.externet.hu>
141 Set up other Cyrix processors identical to the boot cpu.
142 Since Cyrix don't support Intel APIC, this is l'art pour l'art.
143 Weigh ARRs by size:
144 If size <= 32M is given, set up ARR# we were given.
145 If size > 32M is given, set up ARR7 only if it is free,
146 fail otherwise.
147 19990214 Zoltán Böszörményi <zboszor@mail.externet.hu>
148 Also check for size >= 256K if we are to set up ARR7,
149 mtrr_add() returns the value it gets from set_mtrr()
150 19990218 Zoltán Böszörményi <zboszor@mail.externet.hu>
151 Remove Cyrix "coma bug" workaround from here.
152 Moved to linux/arch/i386/kernel/setup.c and
153 linux/include/asm-i386/bugs.h
154 19990228 Richard Gooch <rgooch@atnf.csiro.au>
155 Added MTRRIOC_KILL_ENTRY ioctl(2)
156 Trap for counter underflow in <mtrr_file_del>.
157 Trap for 4 MiB aligned regions for PPro, stepping <= 7.
158 19990301 Richard Gooch <rgooch@atnf.csiro.au>
159 Created <get_free_region> hook.
160 19990305 Richard Gooch <rgooch@atnf.csiro.au>
161 Temporarily disable AMD support now MTRR capability flag is set.
162 v1.32
163 19990308 Zoltán Böszörményi <zboszor@mail.externet.hu>
164 Adjust my changes (19990212-19990218) to Richard Gooch's
165 latest changes. (19990228-19990305)
166 v1.33
167 19990309 Richard Gooch <rgooch@atnf.csiro.au>
168 Fixed typo in <printk> message.
169 19990310 Richard Gooch <rgooch@atnf.csiro.au>
170 Support K6-II/III based on Alan Cox's <alan@redhat.com> patches.
171 v1.34
172 19990511 Bart Hartgers <bart@etpmod.phys.tue.nl>
173 Support Centaur C6 MCR's.
174 19990512 Richard Gooch <rgooch@atnf.csiro.au>
175 Minor cleanups.
176 v1.35
177 19990707 Zoltán Böszörményi <zboszor@mail.externet.hu>
178 Check whether ARR3 is protected in cyrix_get_free_region()
179 and mtrr_del(). The code won't attempt to delete or change it
180 from now on if the BIOS protected ARR3. It silently skips ARR3
181 in cyrix_get_free_region() or returns with an error code from
182 mtrr_del().
183 19990711 Zoltán Böszörményi <zboszor@mail.externet.hu>
184 Reset some bits in the CCRs in cyrix_arr_init() to disable SMM
185 if ARR3 isn't protected. This is needed because if SMM is active
186 and ARR3 isn't protected then deleting and setting ARR3 again
187 may lock up the processor. With SMM entirely disabled, it does
188 not happen.
189 19990812 Zoltán Böszörményi <zboszor@mail.externet.hu>
190 Rearrange switch() statements so the driver accomodates to
191 the fact that the AMD Athlon handles its MTRRs the same way
192 as Intel does.
193 19990814 Zoltán Böszörményi <zboszor@mail.externet.hu>
194 Double check for Intel in mtrr_add()'s big switch() because
195 that revision check is only valid for Intel CPUs.
196 19990819 Alan Cox <alan@redhat.com>
197 Tested Zoltan's changes on a pre production Athlon - 100%
198 success.
199 19991008 Manfred Spraul <manfreds@colorfullife.com>
200 replaced spin_lock_reschedule() with a normal semaphore.
201 v1.36
202 20000221 Richard Gooch <rgooch@atnf.csiro.au>
203 Compile fix if procfs and devfs not enabled.
204 Formatting changes.
205 v1.37
206 20001109 H. Peter Anvin <hpa@zytor.com>
207 Use the new centralized CPU feature detects.
208
209 v1.38
210 20010309 Dave Jones <davej@suse.de>
211 Add support for Cyrix III.
212
213 v1.39
214 20010312 Dave Jones <davej@suse.de>
215 Ugh, I broke AMD support.
216 Reworked fix by Troels Walsted Hansen <troels@thule.no>
217
218 v1.40
219 20010327 Dave Jones <davej@suse.de>
220 Adapted Cyrix III support to include VIA C3.
221
222 v2.0
223 20020306 Patrick Mochel <mochel@osdl.org>
224 Split mtrr.c -> mtrr/*.c
225 Converted to Linux Kernel Coding Style
226 Fixed several minor nits in form
227 Moved some SMP-only functions out, so they can be used
228 for power management in the future.
229 TODO: Fix user interface cruft.
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c
new file mode 100644
index 000000000000..933b0dd62f48
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -0,0 +1,364 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3#include <asm/mtrr.h>
4#include <asm/msr.h>
5#include <asm/io.h>
6#include "mtrr.h"
7
8int arr3_protected;
9
10static void
11cyrix_get_arr(unsigned int reg, unsigned long *base,
12 unsigned int *size, mtrr_type * type)
13{
14 unsigned long flags;
15 unsigned char arr, ccr3, rcr, shift;
16
17 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
18
19 /* Save flags and disable interrupts */
20 local_irq_save(flags);
21
22 ccr3 = getCx86(CX86_CCR3);
23 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
24 ((unsigned char *) base)[3] = getCx86(arr);
25 ((unsigned char *) base)[2] = getCx86(arr + 1);
26 ((unsigned char *) base)[1] = getCx86(arr + 2);
27 rcr = getCx86(CX86_RCR_BASE + reg);
28 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
29
30 /* Enable interrupts if it was enabled previously */
31 local_irq_restore(flags);
32 shift = ((unsigned char *) base)[1] & 0x0f;
33 *base >>= PAGE_SHIFT;
34
35 /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
36 * Note: shift==0xf means 4G, this is unsupported.
37 */
38 if (shift)
39 *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
40 else
41 *size = 0;
42
43 /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
44 if (reg < 7) {
45 switch (rcr) {
46 case 1:
47 *type = MTRR_TYPE_UNCACHABLE;
48 break;
49 case 8:
50 *type = MTRR_TYPE_WRBACK;
51 break;
52 case 9:
53 *type = MTRR_TYPE_WRCOMB;
54 break;
55 case 24:
56 default:
57 *type = MTRR_TYPE_WRTHROUGH;
58 break;
59 }
60 } else {
61 switch (rcr) {
62 case 0:
63 *type = MTRR_TYPE_UNCACHABLE;
64 break;
65 case 8:
66 *type = MTRR_TYPE_WRCOMB;
67 break;
68 case 9:
69 *type = MTRR_TYPE_WRBACK;
70 break;
71 case 25:
72 default:
73 *type = MTRR_TYPE_WRTHROUGH;
74 break;
75 }
76 }
77}
78
79static int
80cyrix_get_free_region(unsigned long base, unsigned long size)
81/* [SUMMARY] Get a free ARR.
82 <base> The starting (base) address of the region.
83 <size> The size (in bytes) of the region.
84 [RETURNS] The index of the region on success, else -1 on error.
85*/
86{
87 int i;
88 mtrr_type ltype;
89 unsigned long lbase;
90 unsigned int lsize;
91
92 /* If we are to set up a region >32M then look at ARR7 immediately */
93 if (size > 0x2000) {
94 cyrix_get_arr(7, &lbase, &lsize, &ltype);
95 if (lsize == 0)
96 return 7;
97 /* Else try ARR0-ARR6 first */
98 } else {
99 for (i = 0; i < 7; i++) {
100 cyrix_get_arr(i, &lbase, &lsize, &ltype);
101 if ((i == 3) && arr3_protected)
102 continue;
103 if (lsize == 0)
104 return i;
105 }
106 /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
107 cyrix_get_arr(i, &lbase, &lsize, &ltype);
108 if ((lsize == 0) && (size >= 0x40))
109 return i;
110 }
111 return -ENOSPC;
112}
113
114static u32 cr4 = 0;
115static u32 ccr3;
116
117static void prepare_set(void)
118{
119 u32 cr0;
120
121 /* Save value of CR4 and clear Page Global Enable (bit 7) */
122 if ( cpu_has_pge ) {
123 cr4 = read_cr4();
124 write_cr4(cr4 & (unsigned char) ~(1 << 7));
125 }
126
127 /* Disable and flush caches. Note that wbinvd flushes the TLBs as
128 a side-effect */
129 cr0 = read_cr0() | 0x40000000;
130 wbinvd();
131 write_cr0(cr0);
132 wbinvd();
133
134 /* Cyrix ARRs - everything else were excluded at the top */
135 ccr3 = getCx86(CX86_CCR3);
136
137 /* Cyrix ARRs - everything else were excluded at the top */
138 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
139
140}
141
142static void post_set(void)
143{
144 /* Flush caches and TLBs */
145 wbinvd();
146
147 /* Cyrix ARRs - everything else was excluded at the top */
148 setCx86(CX86_CCR3, ccr3);
149
150 /* Enable caches */
151 write_cr0(read_cr0() & 0xbfffffff);
152
153 /* Restore value of CR4 */
154 if ( cpu_has_pge )
155 write_cr4(cr4);
156}
157
158static void cyrix_set_arr(unsigned int reg, unsigned long base,
159 unsigned long size, mtrr_type type)
160{
161 unsigned char arr, arr_type, arr_size;
162
163 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
164
165 /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
166 if (reg >= 7)
167 size >>= 6;
168
169 size &= 0x7fff; /* make sure arr_size <= 14 */
170 for (arr_size = 0; size; arr_size++, size >>= 1) ;
171
172 if (reg < 7) {
173 switch (type) {
174 case MTRR_TYPE_UNCACHABLE:
175 arr_type = 1;
176 break;
177 case MTRR_TYPE_WRCOMB:
178 arr_type = 9;
179 break;
180 case MTRR_TYPE_WRTHROUGH:
181 arr_type = 24;
182 break;
183 default:
184 arr_type = 8;
185 break;
186 }
187 } else {
188 switch (type) {
189 case MTRR_TYPE_UNCACHABLE:
190 arr_type = 0;
191 break;
192 case MTRR_TYPE_WRCOMB:
193 arr_type = 8;
194 break;
195 case MTRR_TYPE_WRTHROUGH:
196 arr_type = 25;
197 break;
198 default:
199 arr_type = 9;
200 break;
201 }
202 }
203
204 prepare_set();
205
206 base <<= PAGE_SHIFT;
207 setCx86(arr, ((unsigned char *) &base)[3]);
208 setCx86(arr + 1, ((unsigned char *) &base)[2]);
209 setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
210 setCx86(CX86_RCR_BASE + reg, arr_type);
211
212 post_set();
213}
214
215typedef struct {
216 unsigned long base;
217 unsigned int size;
218 mtrr_type type;
219} arr_state_t;
220
221static arr_state_t arr_state[8] __initdata = {
222 {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
223 {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
224};
225
226static unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 };
227
228static void cyrix_set_all(void)
229{
230 int i;
231
232 prepare_set();
233
234 /* the CCRs are not contiguous */
235 for (i = 0; i < 4; i++)
236 setCx86(CX86_CCR0 + i, ccr_state[i]);
237 for (; i < 7; i++)
238 setCx86(CX86_CCR4 + i, ccr_state[i]);
239 for (i = 0; i < 8; i++)
240 cyrix_set_arr(i, arr_state[i].base,
241 arr_state[i].size, arr_state[i].type);
242
243 post_set();
244}
245
246#if 0
247/*
248 * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
249 * with the SMM (System Management Mode) mode. So we need the following:
250 * Check whether SMI_LOCK (CCR3 bit 0) is set
251 * if it is set, write a warning message: ARR3 cannot be changed!
252 * (it cannot be changed until the next processor reset)
253 * if it is reset, then we can change it, set all the needed bits:
254 * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
255 * - disable access to SMM memory (CCR1 bit 2 reset)
256 * - disable SMM mode (CCR1 bit 1 reset)
257 * - disable write protection of ARR3 (CCR6 bit 1 reset)
258 * - (maybe) disable ARR3
259 * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
260 */
261static void __init
262cyrix_arr_init(void)
263{
264 struct set_mtrr_context ctxt;
265 unsigned char ccr[7];
266 int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
267#ifdef CONFIG_SMP
268 int i;
269#endif
270
271 /* flush cache and enable MAPEN */
272 set_mtrr_prepare_save(&ctxt);
273 set_mtrr_cache_disable(&ctxt);
274
275 /* Save all CCRs locally */
276 ccr[0] = getCx86(CX86_CCR0);
277 ccr[1] = getCx86(CX86_CCR1);
278 ccr[2] = getCx86(CX86_CCR2);
279 ccr[3] = ctxt.ccr3;
280 ccr[4] = getCx86(CX86_CCR4);
281 ccr[5] = getCx86(CX86_CCR5);
282 ccr[6] = getCx86(CX86_CCR6);
283
284 if (ccr[3] & 1) {
285 ccrc[3] = 1;
286 arr3_protected = 1;
287 } else {
288 /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
289 * access to SMM memory through ARR3 (bit 7).
290 */
291 if (ccr[1] & 0x80) {
292 ccr[1] &= 0x7f;
293 ccrc[1] |= 0x80;
294 }
295 if (ccr[1] & 0x04) {
296 ccr[1] &= 0xfb;
297 ccrc[1] |= 0x04;
298 }
299 if (ccr[1] & 0x02) {
300 ccr[1] &= 0xfd;
301 ccrc[1] |= 0x02;
302 }
303 arr3_protected = 0;
304 if (ccr[6] & 0x02) {
305 ccr[6] &= 0xfd;
306 ccrc[6] = 1; /* Disable write protection of ARR3 */
307 setCx86(CX86_CCR6, ccr[6]);
308 }
309 /* Disable ARR3. This is safe now that we disabled SMM. */
310 /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
311 }
312 /* If we changed CCR1 in memory, change it in the processor, too. */
313 if (ccrc[1])
314 setCx86(CX86_CCR1, ccr[1]);
315
316 /* Enable ARR usage by the processor */
317 if (!(ccr[5] & 0x20)) {
318 ccr[5] |= 0x20;
319 ccrc[5] = 1;
320 setCx86(CX86_CCR5, ccr[5]);
321 }
322#ifdef CONFIG_SMP
323 for (i = 0; i < 7; i++)
324 ccr_state[i] = ccr[i];
325 for (i = 0; i < 8; i++)
326 cyrix_get_arr(i,
327 &arr_state[i].base, &arr_state[i].size,
328 &arr_state[i].type);
329#endif
330
331 set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
332
333 if (ccrc[5])
334 printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
335 if (ccrc[3])
336 printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
337/*
338 if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
339 if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
340 if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
341*/
342 if (ccrc[6])
343 printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
344}
345#endif
346
347static struct mtrr_ops cyrix_mtrr_ops = {
348 .vendor = X86_VENDOR_CYRIX,
349// .init = cyrix_arr_init,
350 .set_all = cyrix_set_all,
351 .set = cyrix_set_arr,
352 .get = cyrix_get_arr,
353 .get_free_region = cyrix_get_free_region,
354 .validate_add_page = generic_validate_add_page,
355 .have_wrcomb = positive_have_wrcomb,
356};
357
358int __init cyrix_init_mtrr(void)
359{
360 set_mtrr_ops(&cyrix_mtrr_ops);
361 return 0;
362}
363
364//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
new file mode 100644
index 000000000000..a4cce454d09b
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -0,0 +1,417 @@
1/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
2 because MTRRs can span upto 40 bits (36bits on most modern x86) */
3#include <linux/init.h>
4#include <linux/slab.h>
5#include <linux/mm.h>
6#include <asm/io.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9#include <asm/system.h>
10#include <asm/cpufeature.h>
11#include <asm/tlbflush.h>
12#include "mtrr.h"
13
14struct mtrr_state {
15 struct mtrr_var_range *var_ranges;
16 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
17 unsigned char enabled;
18 mtrr_type def_type;
19};
20
21static unsigned long smp_changes_mask;
22static struct mtrr_state mtrr_state = {};
23
24/* Get the MSR pair relating to a var range */
25static void __init
26get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
27{
28 rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
29 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
30}
31
32static void __init
33get_fixed_ranges(mtrr_type * frs)
34{
35 unsigned int *p = (unsigned int *) frs;
36 int i;
37
38 rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
39
40 for (i = 0; i < 2; i++)
41 rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
42 for (i = 0; i < 8; i++)
43 rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
44}
45
46/* Grab all of the MTRR state for this CPU into *state */
47void __init get_mtrr_state(void)
48{
49 unsigned int i;
50 struct mtrr_var_range *vrs;
51 unsigned lo, dummy;
52
53 if (!mtrr_state.var_ranges) {
54 mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
55 GFP_KERNEL);
56 if (!mtrr_state.var_ranges)
57 return;
58 }
59 vrs = mtrr_state.var_ranges;
60
61 for (i = 0; i < num_var_ranges; i++)
62 get_mtrr_var_range(i, &vrs[i]);
63 get_fixed_ranges(mtrr_state.fixed_ranges);
64
65 rdmsr(MTRRdefType_MSR, lo, dummy);
66 mtrr_state.def_type = (lo & 0xff);
67 mtrr_state.enabled = (lo & 0xc00) >> 10;
68}
69
70/* Free resources associated with a struct mtrr_state */
71void __init finalize_mtrr_state(void)
72{
73 if (mtrr_state.var_ranges)
74 kfree(mtrr_state.var_ranges);
75 mtrr_state.var_ranges = NULL;
76}
77
78/* Some BIOS's are fucked and don't set all MTRRs the same! */
79void __init mtrr_state_warn(void)
80{
81 unsigned long mask = smp_changes_mask;
82
83 if (!mask)
84 return;
85 if (mask & MTRR_CHANGE_MASK_FIXED)
86 printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
87 if (mask & MTRR_CHANGE_MASK_VARIABLE)
88 printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
89 if (mask & MTRR_CHANGE_MASK_DEFTYPE)
90 printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
91 printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
92 printk(KERN_INFO "mtrr: corrected configuration.\n");
93}
94
95/* Doesn't attempt to pass an error out to MTRR users
96 because it's quite complicated in some cases and probably not
97 worth it because the best error handling is to ignore it. */
98void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
99{
100 if (wrmsr_safe(msr, a, b) < 0)
101 printk(KERN_ERR
102 "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
103 smp_processor_id(), msr, a, b);
104}
105
106int generic_get_free_region(unsigned long base, unsigned long size)
107/* [SUMMARY] Get a free MTRR.
108 <base> The starting (base) address of the region.
109 <size> The size (in bytes) of the region.
110 [RETURNS] The index of the region on success, else -1 on error.
111*/
112{
113 int i, max;
114 mtrr_type ltype;
115 unsigned long lbase;
116 unsigned lsize;
117
118 max = num_var_ranges;
119 for (i = 0; i < max; ++i) {
120 mtrr_if->get(i, &lbase, &lsize, &ltype);
121 if (lsize == 0)
122 return i;
123 }
124 return -ENOSPC;
125}
126
127void generic_get_mtrr(unsigned int reg, unsigned long *base,
128 unsigned int *size, mtrr_type * type)
129{
130 unsigned int mask_lo, mask_hi, base_lo, base_hi;
131
132 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
133 if ((mask_lo & 0x800) == 0) {
134 /* Invalid (i.e. free) range */
135 *base = 0;
136 *size = 0;
137 *type = 0;
138 return;
139 }
140
141 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
142
143 /* Work out the shifted address mask. */
144 mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
145 | mask_lo >> PAGE_SHIFT;
146
147 /* This works correctly if size is a power of two, i.e. a
148 contiguous range. */
149 *size = -mask_lo;
150 *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
151 *type = base_lo & 0xff;
152}
153
154static int set_fixed_ranges(mtrr_type * frs)
155{
156 unsigned int *p = (unsigned int *) frs;
157 int changed = FALSE;
158 int i;
159 unsigned int lo, hi;
160
161 rdmsr(MTRRfix64K_00000_MSR, lo, hi);
162 if (p[0] != lo || p[1] != hi) {
163 mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
164 changed = TRUE;
165 }
166
167 for (i = 0; i < 2; i++) {
168 rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
169 if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
170 mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
171 p[3 + i * 2]);
172 changed = TRUE;
173 }
174 }
175
176 for (i = 0; i < 8; i++) {
177 rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
178 if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
179 mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
180 p[7 + i * 2]);
181 changed = TRUE;
182 }
183 }
184 return changed;
185}
186
187/* Set the MSR pair relating to a var range. Returns TRUE if
188 changes are made */
189static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
190{
191 unsigned int lo, hi;
192 int changed = FALSE;
193
194 rdmsr(MTRRphysBase_MSR(index), lo, hi);
195 if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
196 || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) {
197 mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
198 changed = TRUE;
199 }
200
201 rdmsr(MTRRphysMask_MSR(index), lo, hi);
202
203 if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
204 || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) {
205 mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
206 changed = TRUE;
207 }
208 return changed;
209}
210
211static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
212/* [SUMMARY] Set the MTRR state for this CPU.
213 <state> The MTRR state information to read.
214 <ctxt> Some relevant CPU context.
215 [NOTE] The CPU must already be in a safe state for MTRR changes.
216 [RETURNS] 0 if no changes made, else a mask indication what was changed.
217*/
218{
219 unsigned int i;
220 unsigned long change_mask = 0;
221
222 for (i = 0; i < num_var_ranges; i++)
223 if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
224 change_mask |= MTRR_CHANGE_MASK_VARIABLE;
225
226 if (set_fixed_ranges(mtrr_state.fixed_ranges))
227 change_mask |= MTRR_CHANGE_MASK_FIXED;
228
229 /* Set_mtrr_restore restores the old value of MTRRdefType,
230 so to set it we fiddle with the saved value */
231 if ((deftype_lo & 0xff) != mtrr_state.def_type
232 || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
233 deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10);
234 change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
235 }
236
237 return change_mask;
238}
239
240
241static unsigned long cr4 = 0;
242static u32 deftype_lo, deftype_hi;
243static DEFINE_SPINLOCK(set_atomicity_lock);
244
245/*
246 * Since we are disabling the cache don't allow any interrupts - they
247 * would run extremely slow and would only increase the pain. The caller must
248 * ensure that local interrupts are disabled and are reenabled after post_set()
249 * has been called.
250 */
251
252static void prepare_set(void)
253{
254 unsigned long cr0;
255
256 /* Note that this is not ideal, since the cache is only flushed/disabled
257 for this CPU while the MTRRs are changed, but changing this requires
258 more invasive changes to the way the kernel boots */
259
260 spin_lock(&set_atomicity_lock);
261
262 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
263 cr0 = read_cr0() | 0x40000000; /* set CD flag */
264 write_cr0(cr0);
265 wbinvd();
266
267 /* Save value of CR4 and clear Page Global Enable (bit 7) */
268 if ( cpu_has_pge ) {
269 cr4 = read_cr4();
270 write_cr4(cr4 & ~X86_CR4_PGE);
271 }
272
273 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
274 __flush_tlb();
275
276 /* Save MTRR state */
277 rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
278
279 /* Disable MTRRs, and set the default type to uncached */
280 mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
281}
282
283static void post_set(void)
284{
285 /* Flush TLBs (no need to flush caches - they are disabled) */
286 __flush_tlb();
287
288 /* Intel (P6) standard MTRRs */
289 mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
290
291 /* Enable caches */
292 write_cr0(read_cr0() & 0xbfffffff);
293
294 /* Restore value of CR4 */
295 if ( cpu_has_pge )
296 write_cr4(cr4);
297 spin_unlock(&set_atomicity_lock);
298}
299
300static void generic_set_all(void)
301{
302 unsigned long mask, count;
303 unsigned long flags;
304
305 local_irq_save(flags);
306 prepare_set();
307
308 /* Actually set the state */
309 mask = set_mtrr_state(deftype_lo,deftype_hi);
310
311 post_set();
312 local_irq_restore(flags);
313
314 /* Use the atomic bitops to update the global mask */
315 for (count = 0; count < sizeof mask * 8; ++count) {
316 if (mask & 0x01)
317 set_bit(count, &smp_changes_mask);
318 mask >>= 1;
319 }
320
321}
322
323static void generic_set_mtrr(unsigned int reg, unsigned long base,
324 unsigned long size, mtrr_type type)
325/* [SUMMARY] Set variable MTRR register on the local CPU.
326 <reg> The register to set.
327 <base> The base address of the region.
328 <size> The size of the region. If this is 0 the region is disabled.
329 <type> The type of the region.
330 <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
331 be done externally.
332 [RETURNS] Nothing.
333*/
334{
335 unsigned long flags;
336
337 local_irq_save(flags);
338 prepare_set();
339
340 if (size == 0) {
341 /* The invalid bit is kept in the mask, so we simply clear the
342 relevant mask register to disable a range. */
343 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
344 } else {
345 mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
346 (base & size_and_mask) >> (32 - PAGE_SHIFT));
347 mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
348 (-size & size_and_mask) >> (32 - PAGE_SHIFT));
349 }
350
351 post_set();
352 local_irq_restore(flags);
353}
354
355int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
356{
357 unsigned long lbase, last;
358
359 /* For Intel PPro stepping <= 7, must be 4 MiB aligned
360 and not touch 0x70000000->0x7003FFFF */
361 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
362 boot_cpu_data.x86_model == 1 &&
363 boot_cpu_data.x86_mask <= 7) {
364 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
365 printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
366 return -EINVAL;
367 }
368 if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
369 (type == MTRR_TYPE_WRCOMB
370 || type == MTRR_TYPE_WRBACK)) {
371 printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
372 return -EINVAL;
373 }
374 }
375
376 if (base + size < 0x100) {
377 printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
378 base, size);
379 return -EINVAL;
380 }
381 /* Check upper bits of base and last are equal and lower bits are 0
382 for base and 1 for last */
383 last = base + size - 1;
384 for (lbase = base; !(lbase & 1) && (last & 1);
385 lbase = lbase >> 1, last = last >> 1) ;
386 if (lbase != last) {
387 printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
388 base, size);
389 return -EINVAL;
390 }
391 return 0;
392}
393
394
395static int generic_have_wrcomb(void)
396{
397 unsigned long config, dummy;
398 rdmsr(MTRRcap_MSR, config, dummy);
399 return (config & (1 << 10));
400}
401
402int positive_have_wrcomb(void)
403{
404 return 1;
405}
406
407/* generic structure...
408 */
409struct mtrr_ops generic_mtrr_ops = {
410 .use_intel_if = 1,
411 .set_all = generic_set_all,
412 .get = generic_get_mtrr,
413 .get_free_region = generic_get_free_region,
414 .set = generic_set_mtrr,
415 .validate_add_page = generic_validate_add_page,
416 .have_wrcomb = generic_have_wrcomb,
417};
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
new file mode 100644
index 000000000000..1923e0aed26a
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -0,0 +1,374 @@
1#include <linux/init.h>
2#include <linux/proc_fs.h>
3#include <linux/ctype.h>
4#include <linux/module.h>
5#include <linux/seq_file.h>
6#include <asm/uaccess.h>
7
8#define LINE_SIZE 80
9
10#include <asm/mtrr.h>
11#include "mtrr.h"
12
13/* RED-PEN: this is accessed without any locking */
14extern unsigned int *usage_table;
15
16
17#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
18
19static char *mtrr_strings[MTRR_NUM_TYPES] =
20{
21 "uncachable", /* 0 */
22 "write-combining", /* 1 */
23 "?", /* 2 */
24 "?", /* 3 */
25 "write-through", /* 4 */
26 "write-protect", /* 5 */
27 "write-back", /* 6 */
28};
29
30char *mtrr_attrib_to_str(int x)
31{
32 return (x <= 6) ? mtrr_strings[x] : "?";
33}
34
35#ifdef CONFIG_PROC_FS
36
37static int
38mtrr_file_add(unsigned long base, unsigned long size,
39 unsigned int type, char increment, struct file *file, int page)
40{
41 int reg, max;
42 unsigned int *fcount = FILE_FCOUNT(file);
43
44 max = num_var_ranges;
45 if (fcount == NULL) {
46 fcount = kmalloc(max * sizeof *fcount, GFP_KERNEL);
47 if (!fcount)
48 return -ENOMEM;
49 memset(fcount, 0, max * sizeof *fcount);
50 FILE_FCOUNT(file) = fcount;
51 }
52 if (!page) {
53 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
54 return -EINVAL;
55 base >>= PAGE_SHIFT;
56 size >>= PAGE_SHIFT;
57 }
58 reg = mtrr_add_page(base, size, type, 1);
59 if (reg >= 0)
60 ++fcount[reg];
61 return reg;
62}
63
64static int
65mtrr_file_del(unsigned long base, unsigned long size,
66 struct file *file, int page)
67{
68 int reg;
69 unsigned int *fcount = FILE_FCOUNT(file);
70
71 if (!page) {
72 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
73 return -EINVAL;
74 base >>= PAGE_SHIFT;
75 size >>= PAGE_SHIFT;
76 }
77 reg = mtrr_del_page(-1, base, size);
78 if (reg < 0)
79 return reg;
80 if (fcount == NULL)
81 return reg;
82 if (fcount[reg] < 1)
83 return -EINVAL;
84 --fcount[reg];
85 return reg;
86}
87
88/* RED-PEN: seq_file can seek now. this is ignored. */
89static ssize_t
90mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
91/* Format of control line:
92 "base=%Lx size=%Lx type=%s" OR:
93 "disable=%d"
94*/
95{
96 int i, err;
97 unsigned long reg;
98 unsigned long long base, size;
99 char *ptr;
100 char line[LINE_SIZE];
101 size_t linelen;
102
103 if (!capable(CAP_SYS_ADMIN))
104 return -EPERM;
105 if (!len)
106 return -EINVAL;
107 memset(line, 0, LINE_SIZE);
108 if (len > LINE_SIZE)
109 len = LINE_SIZE;
110 if (copy_from_user(line, buf, len - 1))
111 return -EFAULT;
112 linelen = strlen(line);
113 ptr = line + linelen - 1;
114 if (linelen && *ptr == '\n')
115 *ptr = '\0';
116 if (!strncmp(line, "disable=", 8)) {
117 reg = simple_strtoul(line + 8, &ptr, 0);
118 err = mtrr_del_page(reg, 0, 0);
119 if (err < 0)
120 return err;
121 return len;
122 }
123 if (strncmp(line, "base=", 5))
124 return -EINVAL;
125 base = simple_strtoull(line + 5, &ptr, 0);
126 for (; isspace(*ptr); ++ptr) ;
127 if (strncmp(ptr, "size=", 5))
128 return -EINVAL;
129 size = simple_strtoull(ptr + 5, &ptr, 0);
130 if ((base & 0xfff) || (size & 0xfff))
131 return -EINVAL;
132 for (; isspace(*ptr); ++ptr) ;
133 if (strncmp(ptr, "type=", 5))
134 return -EINVAL;
135 ptr += 5;
136 for (; isspace(*ptr); ++ptr) ;
137 for (i = 0; i < MTRR_NUM_TYPES; ++i) {
138 if (strcmp(ptr, mtrr_strings[i]))
139 continue;
140 base >>= PAGE_SHIFT;
141 size >>= PAGE_SHIFT;
142 err =
143 mtrr_add_page((unsigned long) base, (unsigned long) size, i,
144 1);
145 if (err < 0)
146 return err;
147 return len;
148 }
149 return -EINVAL;
150}
151
152static int
153mtrr_ioctl(struct inode *inode, struct file *file,
154 unsigned int cmd, unsigned long __arg)
155{
156 int err;
157 mtrr_type type;
158 struct mtrr_sentry sentry;
159 struct mtrr_gentry gentry;
160 void __user *arg = (void __user *) __arg;
161
162 switch (cmd) {
163 default:
164 return -ENOTTY;
165 case MTRRIOC_ADD_ENTRY:
166 if (!capable(CAP_SYS_ADMIN))
167 return -EPERM;
168 if (copy_from_user(&sentry, arg, sizeof sentry))
169 return -EFAULT;
170 err =
171 mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
172 file, 0);
173 if (err < 0)
174 return err;
175 break;
176 case MTRRIOC_SET_ENTRY:
177 if (!capable(CAP_SYS_ADMIN))
178 return -EPERM;
179 if (copy_from_user(&sentry, arg, sizeof sentry))
180 return -EFAULT;
181 err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
182 if (err < 0)
183 return err;
184 break;
185 case MTRRIOC_DEL_ENTRY:
186 if (!capable(CAP_SYS_ADMIN))
187 return -EPERM;
188 if (copy_from_user(&sentry, arg, sizeof sentry))
189 return -EFAULT;
190 err = mtrr_file_del(sentry.base, sentry.size, file, 0);
191 if (err < 0)
192 return err;
193 break;
194 case MTRRIOC_KILL_ENTRY:
195 if (!capable(CAP_SYS_ADMIN))
196 return -EPERM;
197 if (copy_from_user(&sentry, arg, sizeof sentry))
198 return -EFAULT;
199 err = mtrr_del(-1, sentry.base, sentry.size);
200 if (err < 0)
201 return err;
202 break;
203 case MTRRIOC_GET_ENTRY:
204 if (copy_from_user(&gentry, arg, sizeof gentry))
205 return -EFAULT;
206 if (gentry.regnum >= num_var_ranges)
207 return -EINVAL;
208 mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
209
210 /* Hide entries that go above 4GB */
211 if (gentry.base + gentry.size > 0x100000
212 || gentry.size == 0x100000)
213 gentry.base = gentry.size = gentry.type = 0;
214 else {
215 gentry.base <<= PAGE_SHIFT;
216 gentry.size <<= PAGE_SHIFT;
217 gentry.type = type;
218 }
219
220 if (copy_to_user(arg, &gentry, sizeof gentry))
221 return -EFAULT;
222 break;
223 case MTRRIOC_ADD_PAGE_ENTRY:
224 if (!capable(CAP_SYS_ADMIN))
225 return -EPERM;
226 if (copy_from_user(&sentry, arg, sizeof sentry))
227 return -EFAULT;
228 err =
229 mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
230 file, 1);
231 if (err < 0)
232 return err;
233 break;
234 case MTRRIOC_SET_PAGE_ENTRY:
235 if (!capable(CAP_SYS_ADMIN))
236 return -EPERM;
237 if (copy_from_user(&sentry, arg, sizeof sentry))
238 return -EFAULT;
239 err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
240 if (err < 0)
241 return err;
242 break;
243 case MTRRIOC_DEL_PAGE_ENTRY:
244 if (!capable(CAP_SYS_ADMIN))
245 return -EPERM;
246 if (copy_from_user(&sentry, arg, sizeof sentry))
247 return -EFAULT;
248 err = mtrr_file_del(sentry.base, sentry.size, file, 1);
249 if (err < 0)
250 return err;
251 break;
252 case MTRRIOC_KILL_PAGE_ENTRY:
253 if (!capable(CAP_SYS_ADMIN))
254 return -EPERM;
255 if (copy_from_user(&sentry, arg, sizeof sentry))
256 return -EFAULT;
257 err = mtrr_del_page(-1, sentry.base, sentry.size);
258 if (err < 0)
259 return err;
260 break;
261 case MTRRIOC_GET_PAGE_ENTRY:
262 if (copy_from_user(&gentry, arg, sizeof gentry))
263 return -EFAULT;
264 if (gentry.regnum >= num_var_ranges)
265 return -EINVAL;
266 mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
267 gentry.type = type;
268
269 if (copy_to_user(arg, &gentry, sizeof gentry))
270 return -EFAULT;
271 break;
272 }
273 return 0;
274}
275
276static int
277mtrr_close(struct inode *ino, struct file *file)
278{
279 int i, max;
280 unsigned int *fcount = FILE_FCOUNT(file);
281
282 if (fcount != NULL) {
283 max = num_var_ranges;
284 for (i = 0; i < max; ++i) {
285 while (fcount[i] > 0) {
286 mtrr_del(i, 0, 0);
287 --fcount[i];
288 }
289 }
290 kfree(fcount);
291 FILE_FCOUNT(file) = NULL;
292 }
293 return single_release(ino, file);
294}
295
296static int mtrr_seq_show(struct seq_file *seq, void *offset);
297
298static int mtrr_open(struct inode *inode, struct file *file)
299{
300 if (!mtrr_if)
301 return -EIO;
302 if (!mtrr_if->get)
303 return -ENXIO;
304 return single_open(file, mtrr_seq_show, NULL);
305}
306
307static struct file_operations mtrr_fops = {
308 .owner = THIS_MODULE,
309 .open = mtrr_open,
310 .read = seq_read,
311 .llseek = seq_lseek,
312 .write = mtrr_write,
313 .ioctl = mtrr_ioctl,
314 .release = mtrr_close,
315};
316
317
318static struct proc_dir_entry *proc_root_mtrr;
319
320
321static int mtrr_seq_show(struct seq_file *seq, void *offset)
322{
323 char factor;
324 int i, max, len;
325 mtrr_type type;
326 unsigned long base;
327 unsigned int size;
328
329 len = 0;
330 max = num_var_ranges;
331 for (i = 0; i < max; i++) {
332 mtrr_if->get(i, &base, &size, &type);
333 if (size == 0)
334 usage_table[i] = 0;
335 else {
336 if (size < (0x100000 >> PAGE_SHIFT)) {
337 /* less than 1MB */
338 factor = 'K';
339 size <<= PAGE_SHIFT - 10;
340 } else {
341 factor = 'M';
342 size >>= 20 - PAGE_SHIFT;
343 }
344 /* RED-PEN: base can be > 32bit */
345 len += seq_printf(seq,
346 "reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n",
347 i, base, base >> (20 - PAGE_SHIFT), size, factor,
348 mtrr_attrib_to_str(type), usage_table[i]);
349 }
350 }
351 return 0;
352}
353
354static int __init mtrr_if_init(void)
355{
356 struct cpuinfo_x86 *c = &boot_cpu_data;
357
358 if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
359 (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
360 (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
361 (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
362 return -ENODEV;
363
364 proc_root_mtrr =
365 create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
366 if (proc_root_mtrr) {
367 proc_root_mtrr->owner = THIS_MODULE;
368 proc_root_mtrr->proc_fops = &mtrr_fops;
369 }
370 return 0;
371}
372
373arch_initcall(mtrr_if_init);
374#endif /* CONFIG_PROC_FS */
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
new file mode 100644
index 000000000000..8f67b490a7fd
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -0,0 +1,693 @@
1/* Generic MTRR (Memory Type Range Register) driver.
2
3 Copyright (C) 1997-2000 Richard Gooch
4 Copyright (c) 2002 Patrick Mochel
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with this library; if not, write to the Free
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
21 The postal address is:
22 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
23
24 Source: "Pentium Pro Family Developer's Manual, Volume 3:
25 Operating System Writer's Guide" (Intel document number 242692),
26 section 11.11.7
27
28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
29 on 6-7 March 2002.
30 Source: Intel Architecture Software Developers Manual, Volume 3:
31 System Programming Guide; Section 9.11. (1997 edition - PPro).
32*/
33
34#include <linux/module.h>
35#include <linux/init.h>
36#include <linux/pci.h>
37#include <linux/smp.h>
38#include <linux/cpu.h>
39
40#include <asm/mtrr.h>
41
42#include <asm/uaccess.h>
43#include <asm/processor.h>
44#include <asm/msr.h>
45#include "mtrr.h"
46
47#define MTRR_VERSION "2.0 (20020519)"
48
49u32 num_var_ranges = 0;
50
51unsigned int *usage_table;
52static DECLARE_MUTEX(main_lock);
53
54u32 size_or_mask, size_and_mask;
55
56static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
57
58struct mtrr_ops * mtrr_if = NULL;
59
60static void set_mtrr(unsigned int reg, unsigned long base,
61 unsigned long size, mtrr_type type);
62
63extern int arr3_protected;
64
65void set_mtrr_ops(struct mtrr_ops * ops)
66{
67 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
68 mtrr_ops[ops->vendor] = ops;
69}
70
71/* Returns non-zero if we have the write-combining memory type */
72static int have_wrcomb(void)
73{
74 struct pci_dev *dev;
75
76 if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
77 /* ServerWorks LE chipsets have problems with write-combining
78 Don't allow it and leave room for other chipsets to be tagged */
79 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
80 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
81 printk(KERN_INFO "mtrr: Serverworks LE detected. Write-combining disabled.\n");
82 pci_dev_put(dev);
83 return 0;
84 }
85 /* Intel 450NX errata # 23. Non ascending cachline evictions to
86 write combining memory may resulting in data corruption */
87 if (dev->vendor == PCI_VENDOR_ID_INTEL &&
88 dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
89 printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
90 pci_dev_put(dev);
91 return 0;
92 }
93 pci_dev_put(dev);
94 }
95 return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
96}
97
98/* This function returns the number of variable MTRRs */
99static void __init set_num_var_ranges(void)
100{
101 unsigned long config = 0, dummy;
102
103 if (use_intel()) {
104 rdmsr(MTRRcap_MSR, config, dummy);
105 } else if (is_cpu(AMD))
106 config = 2;
107 else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
108 config = 8;
109 num_var_ranges = config & 0xff;
110}
111
112static void __init init_table(void)
113{
114 int i, max;
115
116 max = num_var_ranges;
117 if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
118 == NULL) {
119 printk(KERN_ERR "mtrr: could not allocate\n");
120 return;
121 }
122 for (i = 0; i < max; i++)
123 usage_table[i] = 1;
124}
125
126struct set_mtrr_data {
127 atomic_t count;
128 atomic_t gate;
129 unsigned long smp_base;
130 unsigned long smp_size;
131 unsigned int smp_reg;
132 mtrr_type smp_type;
133};
134
135#ifdef CONFIG_SMP
136
137static void ipi_handler(void *info)
138/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
139 [RETURNS] Nothing.
140*/
141{
142 struct set_mtrr_data *data = info;
143 unsigned long flags;
144
145 local_irq_save(flags);
146
147 atomic_dec(&data->count);
148 while(!atomic_read(&data->gate))
149 cpu_relax();
150
151 /* The master has cleared me to execute */
152 if (data->smp_reg != ~0U)
153 mtrr_if->set(data->smp_reg, data->smp_base,
154 data->smp_size, data->smp_type);
155 else
156 mtrr_if->set_all();
157
158 atomic_dec(&data->count);
159 while(atomic_read(&data->gate))
160 cpu_relax();
161
162 atomic_dec(&data->count);
163 local_irq_restore(flags);
164}
165
166#endif
167
168/**
169 * set_mtrr - update mtrrs on all processors
170 * @reg: mtrr in question
171 * @base: mtrr base
172 * @size: mtrr size
173 * @type: mtrr type
174 *
175 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
176 *
177 * 1. Send IPI to do the following:
178 * 2. Disable Interrupts
179 * 3. Wait for all procs to do so
180 * 4. Enter no-fill cache mode
181 * 5. Flush caches
182 * 6. Clear PGE bit
183 * 7. Flush all TLBs
184 * 8. Disable all range registers
185 * 9. Update the MTRRs
186 * 10. Enable all range registers
187 * 11. Flush all TLBs and caches again
188 * 12. Enter normal cache mode and reenable caching
189 * 13. Set PGE
190 * 14. Wait for buddies to catch up
191 * 15. Enable interrupts.
192 *
193 * What does that mean for us? Well, first we set data.count to the number
194 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
195 * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
196 * Meanwhile, they are waiting for that flag to be set. Once it's set, each
197 * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it
198 * differently, so we call mtrr_if->set() callback and let them take care of it.
199 * When they're done, they again decrement data->count and wait for data.gate to
200 * be reset.
201 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
202 * Everyone then enables interrupts and we all continue on.
203 *
204 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
205 * becomes nops.
206 */
207static void set_mtrr(unsigned int reg, unsigned long base,
208 unsigned long size, mtrr_type type)
209{
210 struct set_mtrr_data data;
211 unsigned long flags;
212
213 data.smp_reg = reg;
214 data.smp_base = base;
215 data.smp_size = size;
216 data.smp_type = type;
217 atomic_set(&data.count, num_booting_cpus() - 1);
218 atomic_set(&data.gate,0);
219
220 /* Start the ball rolling on other CPUs */
221 if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
222 panic("mtrr: timed out waiting for other CPUs\n");
223
224 local_irq_save(flags);
225
226 while(atomic_read(&data.count))
227 cpu_relax();
228
229 /* ok, reset count and toggle gate */
230 atomic_set(&data.count, num_booting_cpus() - 1);
231 atomic_set(&data.gate,1);
232
233 /* do our MTRR business */
234
235 /* HACK!
236 * We use this same function to initialize the mtrrs on boot.
237 * The state of the boot cpu's mtrrs has been saved, and we want
238 * to replicate across all the APs.
239 * If we're doing that @reg is set to something special...
240 */
241 if (reg != ~0U)
242 mtrr_if->set(reg,base,size,type);
243
244 /* wait for the others */
245 while(atomic_read(&data.count))
246 cpu_relax();
247
248 atomic_set(&data.count, num_booting_cpus() - 1);
249 atomic_set(&data.gate,0);
250
251 /*
252 * Wait here for everyone to have seen the gate change
253 * So we're the last ones to touch 'data'
254 */
255 while(atomic_read(&data.count))
256 cpu_relax();
257
258 local_irq_restore(flags);
259}
260
261/**
262 * mtrr_add_page - Add a memory type region
263 * @base: Physical base address of region in pages (4 KB)
264 * @size: Physical size of region in pages (4 KB)
265 * @type: Type of MTRR desired
266 * @increment: If this is true do usage counting on the region
267 *
268 * Memory type region registers control the caching on newer Intel and
269 * non Intel processors. This function allows drivers to request an
270 * MTRR is added. The details and hardware specifics of each processor's
271 * implementation are hidden from the caller, but nevertheless the
272 * caller should expect to need to provide a power of two size on an
273 * equivalent power of two boundary.
274 *
275 * If the region cannot be added either because all regions are in use
276 * or the CPU cannot support it a negative value is returned. On success
277 * the register number for this entry is returned, but should be treated
278 * as a cookie only.
279 *
280 * On a multiprocessor machine the changes are made to all processors.
281 * This is required on x86 by the Intel processors.
282 *
283 * The available types are
284 *
285 * %MTRR_TYPE_UNCACHABLE - No caching
286 *
287 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
288 *
289 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
290 *
291 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
292 *
293 * BUGS: Needs a quiet flag for the cases where drivers do not mind
294 * failures and do not wish system log messages to be sent.
295 */
296
297int mtrr_add_page(unsigned long base, unsigned long size,
298 unsigned int type, char increment)
299{
300 int i;
301 mtrr_type ltype;
302 unsigned long lbase;
303 unsigned int lsize;
304 int error;
305
306 if (!mtrr_if)
307 return -ENXIO;
308
309 if ((error = mtrr_if->validate_add_page(base,size,type)))
310 return error;
311
312 if (type >= MTRR_NUM_TYPES) {
313 printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
314 return -EINVAL;
315 }
316
317 /* If the type is WC, check that this processor supports it */
318 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
319 printk(KERN_WARNING
320 "mtrr: your processor doesn't support write-combining\n");
321 return -ENOSYS;
322 }
323
324 if (base & size_or_mask || size & size_or_mask) {
325 printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
326 return -EINVAL;
327 }
328
329 error = -EINVAL;
330
331 /* Search for existing MTRR */
332 down(&main_lock);
333 for (i = 0; i < num_var_ranges; ++i) {
334 mtrr_if->get(i, &lbase, &lsize, &ltype);
335 if (base >= lbase + lsize)
336 continue;
337 if ((base < lbase) && (base + size <= lbase))
338 continue;
339 /* At this point we know there is some kind of overlap/enclosure */
340 if ((base < lbase) || (base + size > lbase + lsize)) {
341 printk(KERN_WARNING
342 "mtrr: 0x%lx000,0x%lx000 overlaps existing"
343 " 0x%lx000,0x%x000\n", base, size, lbase,
344 lsize);
345 goto out;
346 }
347 /* New region is enclosed by an existing region */
348 if (ltype != type) {
349 if (type == MTRR_TYPE_UNCACHABLE)
350 continue;
351 printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
352 base, size, mtrr_attrib_to_str(ltype),
353 mtrr_attrib_to_str(type));
354 goto out;
355 }
356 if (increment)
357 ++usage_table[i];
358 error = i;
359 goto out;
360 }
361 /* Search for an empty MTRR */
362 i = mtrr_if->get_free_region(base, size);
363 if (i >= 0) {
364 set_mtrr(i, base, size, type);
365 usage_table[i] = 1;
366 } else
367 printk(KERN_INFO "mtrr: no more MTRRs available\n");
368 error = i;
369 out:
370 up(&main_lock);
371 return error;
372}
373
374/**
375 * mtrr_add - Add a memory type region
376 * @base: Physical base address of region
377 * @size: Physical size of region
378 * @type: Type of MTRR desired
379 * @increment: If this is true do usage counting on the region
380 *
381 * Memory type region registers control the caching on newer Intel and
382 * non Intel processors. This function allows drivers to request an
383 * MTRR is added. The details and hardware specifics of each processor's
384 * implementation are hidden from the caller, but nevertheless the
385 * caller should expect to need to provide a power of two size on an
386 * equivalent power of two boundary.
387 *
388 * If the region cannot be added either because all regions are in use
389 * or the CPU cannot support it a negative value is returned. On success
390 * the register number for this entry is returned, but should be treated
391 * as a cookie only.
392 *
393 * On a multiprocessor machine the changes are made to all processors.
394 * This is required on x86 by the Intel processors.
395 *
396 * The available types are
397 *
398 * %MTRR_TYPE_UNCACHABLE - No caching
399 *
400 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
401 *
402 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
403 *
404 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
405 *
406 * BUGS: Needs a quiet flag for the cases where drivers do not mind
407 * failures and do not wish system log messages to be sent.
408 */
409
410int
411mtrr_add(unsigned long base, unsigned long size, unsigned int type,
412 char increment)
413{
414 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
415 printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n");
416 printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
417 return -EINVAL;
418 }
419 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
420 increment);
421}
422
423/**
424 * mtrr_del_page - delete a memory type region
425 * @reg: Register returned by mtrr_add
426 * @base: Physical base address
427 * @size: Size of region
428 *
429 * If register is supplied then base and size are ignored. This is
430 * how drivers should call it.
431 *
432 * Releases an MTRR region. If the usage count drops to zero the
433 * register is freed and the region returns to default state.
434 * On success the register is returned, on failure a negative error
435 * code.
436 */
437
438int mtrr_del_page(int reg, unsigned long base, unsigned long size)
439{
440 int i, max;
441 mtrr_type ltype;
442 unsigned long lbase;
443 unsigned int lsize;
444 int error = -EINVAL;
445
446 if (!mtrr_if)
447 return -ENXIO;
448
449 max = num_var_ranges;
450 down(&main_lock);
451 if (reg < 0) {
452 /* Search for existing MTRR */
453 for (i = 0; i < max; ++i) {
454 mtrr_if->get(i, &lbase, &lsize, &ltype);
455 if (lbase == base && lsize == size) {
456 reg = i;
457 break;
458 }
459 }
460 if (reg < 0) {
461 printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
462 size);
463 goto out;
464 }
465 }
466 if (reg >= max) {
467 printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
468 goto out;
469 }
470 if (is_cpu(CYRIX) && !use_intel()) {
471 if ((reg == 3) && arr3_protected) {
472 printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
473 goto out;
474 }
475 }
476 mtrr_if->get(reg, &lbase, &lsize, &ltype);
477 if (lsize < 1) {
478 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
479 goto out;
480 }
481 if (usage_table[reg] < 1) {
482 printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
483 goto out;
484 }
485 if (--usage_table[reg] < 1)
486 set_mtrr(reg, 0, 0, 0);
487 error = reg;
488 out:
489 up(&main_lock);
490 return error;
491}
492/**
493 * mtrr_del - delete a memory type region
494 * @reg: Register returned by mtrr_add
495 * @base: Physical base address
496 * @size: Size of region
497 *
498 * If register is supplied then base and size are ignored. This is
499 * how drivers should call it.
500 *
501 * Releases an MTRR region. If the usage count drops to zero the
502 * register is freed and the region returns to default state.
503 * On success the register is returned, on failure a negative error
504 * code.
505 */
506
507int
508mtrr_del(int reg, unsigned long base, unsigned long size)
509{
510 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
511 printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
512 printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
513 return -EINVAL;
514 }
515 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
516}
517
518EXPORT_SYMBOL(mtrr_add);
519EXPORT_SYMBOL(mtrr_del);
520
521/* HACK ALERT!
522 * These should be called implicitly, but we can't yet until all the initcall
523 * stuff is done...
524 */
525extern void amd_init_mtrr(void);
526extern void cyrix_init_mtrr(void);
527extern void centaur_init_mtrr(void);
528
529static void __init init_ifs(void)
530{
531 amd_init_mtrr();
532 cyrix_init_mtrr();
533 centaur_init_mtrr();
534}
535
536static void __init init_other_cpus(void)
537{
538 if (use_intel())
539 get_mtrr_state();
540
541 /* bring up the other processors */
542 set_mtrr(~0U,0,0,0);
543
544 if (use_intel()) {
545 finalize_mtrr_state();
546 mtrr_state_warn();
547 }
548}
549
550
551struct mtrr_value {
552 mtrr_type ltype;
553 unsigned long lbase;
554 unsigned int lsize;
555};
556
557static struct mtrr_value * mtrr_state;
558
559static int mtrr_save(struct sys_device * sysdev, u32 state)
560{
561 int i;
562 int size = num_var_ranges * sizeof(struct mtrr_value);
563
564 mtrr_state = kmalloc(size,GFP_ATOMIC);
565 if (mtrr_state)
566 memset(mtrr_state,0,size);
567 else
568 return -ENOMEM;
569
570 for (i = 0; i < num_var_ranges; i++) {
571 mtrr_if->get(i,
572 &mtrr_state[i].lbase,
573 &mtrr_state[i].lsize,
574 &mtrr_state[i].ltype);
575 }
576 return 0;
577}
578
579static int mtrr_restore(struct sys_device * sysdev)
580{
581 int i;
582
583 for (i = 0; i < num_var_ranges; i++) {
584 if (mtrr_state[i].lsize)
585 set_mtrr(i,
586 mtrr_state[i].lbase,
587 mtrr_state[i].lsize,
588 mtrr_state[i].ltype);
589 }
590 kfree(mtrr_state);
591 return 0;
592}
593
594
595
596static struct sysdev_driver mtrr_sysdev_driver = {
597 .suspend = mtrr_save,
598 .resume = mtrr_restore,
599};
600
601
602/**
603 * mtrr_init - initialize mtrrs on the boot CPU
604 *
605 * This needs to be called early; before any of the other CPUs are
606 * initialized (i.e. before smp_init()).
607 *
608 */
609static int __init mtrr_init(void)
610{
611 init_ifs();
612
613 if (cpu_has_mtrr) {
614 mtrr_if = &generic_mtrr_ops;
615 size_or_mask = 0xff000000; /* 36 bits */
616 size_and_mask = 0x00f00000;
617
618 switch (boot_cpu_data.x86_vendor) {
619 case X86_VENDOR_AMD:
620 /* The original Athlon docs said that
621 total addressable memory is 44 bits wide.
622 It was not really clear whether its MTRRs
623 follow this or not. (Read: 44 or 36 bits).
624 However, "x86-64_overview.pdf" explicitly
625 states that "previous implementations support
626 36 bit MTRRs" and also provides a way to
627 query the width (in bits) of the physical
628 addressable memory on the Hammer family.
629 */
630 if (boot_cpu_data.x86 == 15
631 && (cpuid_eax(0x80000000) >= 0x80000008)) {
632 u32 phys_addr;
633 phys_addr = cpuid_eax(0x80000008) & 0xff;
634 size_or_mask =
635 ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
636 size_and_mask = ~size_or_mask & 0xfff00000;
637 }
638 /* Athlon MTRRs use an Intel-compatible interface for
639 * getting and setting */
640 break;
641 case X86_VENDOR_CENTAUR:
642 if (boot_cpu_data.x86 == 6) {
643 /* VIA Cyrix family have Intel style MTRRs, but don't support PAE */
644 size_or_mask = 0xfff00000; /* 32 bits */
645 size_and_mask = 0;
646 }
647 break;
648
649 default:
650 break;
651 }
652 } else {
653 switch (boot_cpu_data.x86_vendor) {
654 case X86_VENDOR_AMD:
655 if (cpu_has_k6_mtrr) {
656 /* Pre-Athlon (K6) AMD CPU MTRRs */
657 mtrr_if = mtrr_ops[X86_VENDOR_AMD];
658 size_or_mask = 0xfff00000; /* 32 bits */
659 size_and_mask = 0;
660 }
661 break;
662 case X86_VENDOR_CENTAUR:
663 if (cpu_has_centaur_mcr) {
664 mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
665 size_or_mask = 0xfff00000; /* 32 bits */
666 size_and_mask = 0;
667 }
668 break;
669 case X86_VENDOR_CYRIX:
670 if (cpu_has_cyrix_arr) {
671 mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
672 size_or_mask = 0xfff00000; /* 32 bits */
673 size_and_mask = 0;
674 }
675 break;
676 default:
677 break;
678 }
679 }
680 printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION);
681
682 if (mtrr_if) {
683 set_num_var_ranges();
684 init_table();
685 init_other_cpus();
686
687 return sysdev_driver_register(&cpu_sysdev_class,
688 &mtrr_sysdev_driver);
689 }
690 return -ENXIO;
691}
692
693subsys_initcall(mtrr_init);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
new file mode 100644
index 000000000000..de1351245599
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -0,0 +1,98 @@
1/*
2 * local mtrr defines.
3 */
4
5#ifndef TRUE
6#define TRUE 1
7#define FALSE 0
8#endif
9
10#define MTRRcap_MSR 0x0fe
11#define MTRRdefType_MSR 0x2ff
12
13#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
14#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
15
16#define NUM_FIXED_RANGES 88
17#define MTRRfix64K_00000_MSR 0x250
18#define MTRRfix16K_80000_MSR 0x258
19#define MTRRfix16K_A0000_MSR 0x259
20#define MTRRfix4K_C0000_MSR 0x268
21#define MTRRfix4K_C8000_MSR 0x269
22#define MTRRfix4K_D0000_MSR 0x26a
23#define MTRRfix4K_D8000_MSR 0x26b
24#define MTRRfix4K_E0000_MSR 0x26c
25#define MTRRfix4K_E8000_MSR 0x26d
26#define MTRRfix4K_F0000_MSR 0x26e
27#define MTRRfix4K_F8000_MSR 0x26f
28
29#define MTRR_CHANGE_MASK_FIXED 0x01
30#define MTRR_CHANGE_MASK_VARIABLE 0x02
31#define MTRR_CHANGE_MASK_DEFTYPE 0x04
32
33/* In the Intel processor's MTRR interface, the MTRR type is always held in
34 an 8 bit field: */
35typedef u8 mtrr_type;
36
37struct mtrr_ops {
38 u32 vendor;
39 u32 use_intel_if;
40// void (*init)(void);
41 void (*set)(unsigned int reg, unsigned long base,
42 unsigned long size, mtrr_type type);
43 void (*set_all)(void);
44
45 void (*get)(unsigned int reg, unsigned long *base,
46 unsigned int *size, mtrr_type * type);
47 int (*get_free_region) (unsigned long base, unsigned long size);
48
49 int (*validate_add_page)(unsigned long base, unsigned long size,
50 unsigned int type);
51 int (*have_wrcomb)(void);
52};
53
54extern int generic_get_free_region(unsigned long base, unsigned long size);
55extern int generic_validate_add_page(unsigned long base, unsigned long size,
56 unsigned int type);
57
58extern struct mtrr_ops generic_mtrr_ops;
59
60extern int positive_have_wrcomb(void);
61
62/* library functions for processor-specific routines */
63struct set_mtrr_context {
64 unsigned long flags;
65 unsigned long deftype_lo;
66 unsigned long deftype_hi;
67 unsigned long cr4val;
68 unsigned long ccr3;
69};
70
71struct mtrr_var_range {
72 unsigned long base_lo;
73 unsigned long base_hi;
74 unsigned long mask_lo;
75 unsigned long mask_hi;
76};
77
78void set_mtrr_done(struct set_mtrr_context *ctxt);
79void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
80void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
81
82void get_mtrr_state(void);
83
84extern void set_mtrr_ops(struct mtrr_ops * ops);
85
86extern u32 size_or_mask, size_and_mask;
87extern struct mtrr_ops * mtrr_if;
88
89#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
90#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
91
92extern unsigned int num_var_ranges;
93
94void finalize_mtrr_state(void);
95void mtrr_state_warn(void);
96char *mtrr_attrib_to_str(int x);
97void mtrr_wrmsr(unsigned, unsigned, unsigned);
98
diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c
new file mode 100644
index 000000000000..f62ecd15811a
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/state.c
@@ -0,0 +1,78 @@
1#include <linux/mm.h>
2#include <linux/init.h>
3#include <asm/io.h>
4#include <asm/mtrr.h>
5#include <asm/msr.h>
6#include "mtrr.h"
7
8
9/* Put the processor into a state where MTRRs can be safely set */
10void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
11{
12 unsigned int cr0;
13
14 /* Disable interrupts locally */
15 local_irq_save(ctxt->flags);
16
17 if (use_intel() || is_cpu(CYRIX)) {
18
19 /* Save value of CR4 and clear Page Global Enable (bit 7) */
20 if ( cpu_has_pge ) {
21 ctxt->cr4val = read_cr4();
22 write_cr4(ctxt->cr4val & (unsigned char) ~(1 << 7));
23 }
24
25 /* Disable and flush caches. Note that wbinvd flushes the TLBs as
26 a side-effect */
27 cr0 = read_cr0() | 0x40000000;
28 wbinvd();
29 write_cr0(cr0);
30 wbinvd();
31
32 if (use_intel())
33 /* Save MTRR state */
34 rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
35 else
36 /* Cyrix ARRs - everything else were excluded at the top */
37 ctxt->ccr3 = getCx86(CX86_CCR3);
38 }
39}
40
41void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
42{
43 if (use_intel())
44 /* Disable MTRRs, and set the default type to uncached */
45 mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
46 ctxt->deftype_hi);
47 else if (is_cpu(CYRIX))
48 /* Cyrix ARRs - everything else were excluded at the top */
49 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
50}
51
52/* Restore the processor after a set_mtrr_prepare */
53void set_mtrr_done(struct set_mtrr_context *ctxt)
54{
55 if (use_intel() || is_cpu(CYRIX)) {
56
57 /* Flush caches and TLBs */
58 wbinvd();
59
60 /* Restore MTRRdefType */
61 if (use_intel())
62 /* Intel (P6) standard MTRRs */
63 mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
64 else
65 /* Cyrix ARRs - everything else was excluded at the top */
66 setCx86(CX86_CCR3, ctxt->ccr3);
67
68 /* Enable caches */
69 write_cr0(read_cr0() & 0xbfffffff);
70
71 /* Restore value of CR4 */
72 if ( cpu_has_pge )
73 write_cr4(ctxt->cr4val);
74 }
75 /* Re-enable interrupts locally (if enabled previously) */
76 local_irq_restore(ctxt->flags);
77}
78
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
new file mode 100644
index 000000000000..30898a260a5c
--- /dev/null
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -0,0 +1,63 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/string.h>
4#include <asm/processor.h>
5
6#include "cpu.h"
7
8/*
9 * Detect a NexGen CPU running without BIOS hypercode new enough
10 * to have CPUID. (Thanks to Herbert Oppmann)
11 */
12
13static int __init deep_magic_nexgen_probe(void)
14{
15 int ret;
16
17 __asm__ __volatile__ (
18 " movw $0x5555, %%ax\n"
19 " xorw %%dx,%%dx\n"
20 " movw $2, %%cx\n"
21 " divw %%cx\n"
22 " movl $0, %%eax\n"
23 " jnz 1f\n"
24 " movl $1, %%eax\n"
25 "1:\n"
26 : "=a" (ret) : : "cx", "dx" );
27 return ret;
28}
29
30static void __init init_nexgen(struct cpuinfo_x86 * c)
31{
32 c->x86_cache_size = 256; /* A few had 1 MB... */
33}
34
35static void __init nexgen_identify(struct cpuinfo_x86 * c)
36{
37 /* Detect NexGen with old hypercode */
38 if ( deep_magic_nexgen_probe() ) {
39 strcpy(c->x86_vendor_id, "NexGenDriven");
40 }
41 generic_identify(c);
42}
43
44static struct cpu_dev nexgen_cpu_dev __initdata = {
45 .c_vendor = "Nexgen",
46 .c_ident = { "NexGenDriven" },
47 .c_models = {
48 { .vendor = X86_VENDOR_NEXGEN,
49 .family = 5,
50 .model_names = { [1] = "Nx586" }
51 },
52 },
53 .c_init = init_nexgen,
54 .c_identify = nexgen_identify,
55};
56
57int __init nexgen_init_cpu(void)
58{
59 cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
60 return 0;
61}
62
63//early_arch_initcall(nexgen_init_cpu);
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
new file mode 100644
index 000000000000..c8d83fdc237a
--- /dev/null
+++ b/arch/i386/kernel/cpu/proc.c
@@ -0,0 +1,149 @@
1#include <linux/smp.h>
2#include <linux/timex.h>
3#include <linux/string.h>
4#include <asm/semaphore.h>
5#include <linux/seq_file.h>
6
7/*
8 * Get CPU information for use by the procfs.
9 */
10static int show_cpuinfo(struct seq_file *m, void *v)
11{
12 /*
13 * These flag bits must match the definitions in <asm/cpufeature.h>.
14 * NULL means this bit is undefined or reserved; either way it doesn't
15 * have meaning as far as Linux is concerned. Note that it's important
16 * to realize there is a difference between this table and CPUID -- if
17 * applications want to get the raw CPUID data, they should access
18 * /dev/cpu/<cpu_nr>/cpuid instead.
19 */
20 static char *x86_cap_flags[] = {
21 /* Intel-defined */
22 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
23 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
24 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
25 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
26
27 /* AMD-defined */
28 "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
29 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
30 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
31 NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
32
33 /* Transmeta-defined */
34 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
35 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
36 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
37 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
38
39 /* Other (Linux-defined) */
40 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
41 NULL, NULL, NULL, NULL,
42 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
43 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
44 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
45
46 /* Intel-defined (#2) */
47 "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
48 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
51
52 /* VIA/Cyrix/Centaur-defined */
53 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
54 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
55 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
56 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
57
58 /* AMD-defined (#2) */
59 "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
60 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
61 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
62 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
63 };
64 struct cpuinfo_x86 *c = v;
65 int i, n = c - cpu_data;
66 int fpu_exception;
67
68#ifdef CONFIG_SMP
69 if (!cpu_online(n))
70 return 0;
71#endif
72 seq_printf(m, "processor\t: %d\n"
73 "vendor_id\t: %s\n"
74 "cpu family\t: %d\n"
75 "model\t\t: %d\n"
76 "model name\t: %s\n",
77 n,
78 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
79 c->x86,
80 c->x86_model,
81 c->x86_model_id[0] ? c->x86_model_id : "unknown");
82
83 if (c->x86_mask || c->cpuid_level >= 0)
84 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
85 else
86 seq_printf(m, "stepping\t: unknown\n");
87
88 if ( cpu_has(c, X86_FEATURE_TSC) ) {
89 seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
90 cpu_khz / 1000, (cpu_khz % 1000));
91 }
92
93 /* Cache size */
94 if (c->x86_cache_size >= 0)
95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
96#ifdef CONFIG_X86_HT
97 seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
98 seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings);
99#endif
100
101 /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
102 fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
103 seq_printf(m, "fdiv_bug\t: %s\n"
104 "hlt_bug\t\t: %s\n"
105 "f00f_bug\t: %s\n"
106 "coma_bug\t: %s\n"
107 "fpu\t\t: %s\n"
108 "fpu_exception\t: %s\n"
109 "cpuid level\t: %d\n"
110 "wp\t\t: %s\n"
111 "flags\t\t:",
112 c->fdiv_bug ? "yes" : "no",
113 c->hlt_works_ok ? "no" : "yes",
114 c->f00f_bug ? "yes" : "no",
115 c->coma_bug ? "yes" : "no",
116 c->hard_math ? "yes" : "no",
117 fpu_exception ? "yes" : "no",
118 c->cpuid_level,
119 c->wp_works_ok ? "yes" : "no");
120
121 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
122 if ( test_bit(i, c->x86_capability) &&
123 x86_cap_flags[i] != NULL )
124 seq_printf(m, " %s", x86_cap_flags[i]);
125
126 seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
127 c->loops_per_jiffy/(500000/HZ),
128 (c->loops_per_jiffy/(5000/HZ)) % 100);
129 return 0;
130}
131
132static void *c_start(struct seq_file *m, loff_t *pos)
133{
134 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
135}
136static void *c_next(struct seq_file *m, void *v, loff_t *pos)
137{
138 ++*pos;
139 return c_start(m, pos);
140}
141static void c_stop(struct seq_file *m, void *v)
142{
143}
144struct seq_operations cpuinfo_op = {
145 .start = c_start,
146 .next = c_next,
147 .stop = c_stop,
148 .show = show_cpuinfo,
149};
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
new file mode 100644
index 000000000000..8602425628ca
--- /dev/null
+++ b/arch/i386/kernel/cpu/rise.c
@@ -0,0 +1,53 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/bitops.h>
4#include <asm/processor.h>
5
6#include "cpu.h"
7
8static void __init init_rise(struct cpuinfo_x86 *c)
9{
10 printk("CPU: Rise iDragon");
11 if (c->x86_model > 2)
12 printk(" II");
13 printk("\n");
14
15 /* Unhide possibly hidden capability flags
16 The mp6 iDragon family don't have MSRs.
17 We switch on extra features with this cpuid weirdness: */
18 __asm__ (
19 "movl $0x6363452a, %%eax\n\t"
20 "movl $0x3231206c, %%ecx\n\t"
21 "movl $0x2a32313a, %%edx\n\t"
22 "cpuid\n\t"
23 "movl $0x63634523, %%eax\n\t"
24 "movl $0x32315f6c, %%ecx\n\t"
25 "movl $0x2333313a, %%edx\n\t"
26 "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
27 );
28 set_bit(X86_FEATURE_CX8, c->x86_capability);
29}
30
31static struct cpu_dev rise_cpu_dev __initdata = {
32 .c_vendor = "Rise",
33 .c_ident = { "RiseRiseRise" },
34 .c_models = {
35 { .vendor = X86_VENDOR_RISE, .family = 5, .model_names =
36 {
37 [0] = "iDragon",
38 [2] = "iDragon",
39 [8] = "iDragon II",
40 [9] = "iDragon II"
41 }
42 },
43 },
44 .c_init = init_rise,
45};
46
47int __init rise_init_cpu(void)
48{
49 cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
50 return 0;
51}
52
53//early_arch_initcall(rise_init_cpu);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
new file mode 100644
index 000000000000..f57e5ee94943
--- /dev/null
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -0,0 +1,107 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <asm/processor.h>
4#include <asm/msr.h>
5#include "cpu.h"
6
7static void __init init_transmeta(struct cpuinfo_x86 *c)
8{
9 unsigned int cap_mask, uk, max, dummy;
10 unsigned int cms_rev1, cms_rev2;
11 unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev;
12 char cpu_info[65];
13
14 get_model_name(c); /* Same as AMD/Cyrix */
15 display_cacheinfo(c);
16
17 /* Print CMS and CPU revision */
18 max = cpuid_eax(0x80860000);
19 cpu_rev = 0;
20 if ( max >= 0x80860001 ) {
21 cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
22 if (cpu_rev != 0x02000000) {
23 printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
24 (cpu_rev >> 24) & 0xff,
25 (cpu_rev >> 16) & 0xff,
26 (cpu_rev >> 8) & 0xff,
27 cpu_rev & 0xff,
28 cpu_freq);
29 }
30 }
31 if ( max >= 0x80860002 ) {
32 cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
33 if (cpu_rev == 0x02000000) {
34 printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
35 new_cpu_rev, cpu_freq);
36 }
37 printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
38 (cms_rev1 >> 24) & 0xff,
39 (cms_rev1 >> 16) & 0xff,
40 (cms_rev1 >> 8) & 0xff,
41 cms_rev1 & 0xff,
42 cms_rev2);
43 }
44 if ( max >= 0x80860006 ) {
45 cpuid(0x80860003,
46 (void *)&cpu_info[0],
47 (void *)&cpu_info[4],
48 (void *)&cpu_info[8],
49 (void *)&cpu_info[12]);
50 cpuid(0x80860004,
51 (void *)&cpu_info[16],
52 (void *)&cpu_info[20],
53 (void *)&cpu_info[24],
54 (void *)&cpu_info[28]);
55 cpuid(0x80860005,
56 (void *)&cpu_info[32],
57 (void *)&cpu_info[36],
58 (void *)&cpu_info[40],
59 (void *)&cpu_info[44]);
60 cpuid(0x80860006,
61 (void *)&cpu_info[48],
62 (void *)&cpu_info[52],
63 (void *)&cpu_info[56],
64 (void *)&cpu_info[60]);
65 cpu_info[64] = '\0';
66 printk(KERN_INFO "CPU: %s\n", cpu_info);
67 }
68
69 /* Unhide possibly hidden capability flags */
70 rdmsr(0x80860004, cap_mask, uk);
71 wrmsr(0x80860004, ~0, uk);
72 c->x86_capability[0] = cpuid_edx(0x00000001);
73 wrmsr(0x80860004, cap_mask, uk);
74
75 /* If we can run i686 user-space code, call us an i686 */
76#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
77 if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
78 c->x86 = 6;
79}
80
81static void transmeta_identify(struct cpuinfo_x86 * c)
82{
83 u32 xlvl;
84 generic_identify(c);
85
86 /* Transmeta-defined flags: level 0x80860001 */
87 xlvl = cpuid_eax(0x80860000);
88 if ( (xlvl & 0xffff0000) == 0x80860000 ) {
89 if ( xlvl >= 0x80860001 )
90 c->x86_capability[2] = cpuid_edx(0x80860001);
91 }
92}
93
94static struct cpu_dev transmeta_cpu_dev __initdata = {
95 .c_vendor = "Transmeta",
96 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
97 .c_init = init_transmeta,
98 .c_identify = transmeta_identify,
99};
100
101int __init transmeta_init_cpu(void)
102{
103 cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
104 return 0;
105}
106
107//early_arch_initcall(transmeta_init_cpu);
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
new file mode 100644
index 000000000000..264fcad559d5
--- /dev/null
+++ b/arch/i386/kernel/cpu/umc.c
@@ -0,0 +1,33 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <asm/processor.h>
4#include "cpu.h"
5
6/* UMC chips appear to be only either 386 or 486, so no special init takes place.
7 */
8static void __init init_umc(struct cpuinfo_x86 * c)
9{
10
11}
12
13static struct cpu_dev umc_cpu_dev __initdata = {
14 .c_vendor = "UMC",
15 .c_ident = { "UMC UMC UMC" },
16 .c_models = {
17 { .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
18 {
19 [1] = "U5D",
20 [2] = "U5S",
21 }
22 },
23 },
24 .c_init = init_umc,
25};
26
27int __init umc_init_cpu(void)
28{
29 cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
30 return 0;
31}
32
33//early_arch_initcall(umc_init_cpu);
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
new file mode 100644
index 000000000000..2e2756345bb2
--- /dev/null
+++ b/arch/i386/kernel/cpuid.c
@@ -0,0 +1,246 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2000 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
8 * USA; either version 2 of the License, or (at your option) any later
9 * version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * cpuid.c
15 *
16 * x86 CPUID access device
17 *
18 * This device is accessed by lseek() to the appropriate CPUID level
19 * and then read in chunks of 16 bytes. A larger size means multiple
20 * reads of consecutive levels.
21 *
22 * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
23 * an SMP box will direct the access to CPU %d.
24 */
25
26#include <linux/module.h>
27#include <linux/config.h>
28
29#include <linux/types.h>
30#include <linux/errno.h>
31#include <linux/fcntl.h>
32#include <linux/init.h>
33#include <linux/poll.h>
34#include <linux/smp.h>
35#include <linux/major.h>
36#include <linux/fs.h>
37#include <linux/smp_lock.h>
38#include <linux/fs.h>
39#include <linux/device.h>
40#include <linux/cpu.h>
41#include <linux/notifier.h>
42
43#include <asm/processor.h>
44#include <asm/msr.h>
45#include <asm/uaccess.h>
46#include <asm/system.h>
47
48static struct class_simple *cpuid_class;
49
50#ifdef CONFIG_SMP
51
52struct cpuid_command {
53 int cpu;
54 u32 reg;
55 u32 *data;
56};
57
58static void cpuid_smp_cpuid(void *cmd_block)
59{
60 struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
61
62 if (cmd->cpu == smp_processor_id())
63 cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
64 &cmd->data[3]);
65}
66
67static inline void do_cpuid(int cpu, u32 reg, u32 * data)
68{
69 struct cpuid_command cmd;
70
71 preempt_disable();
72 if (cpu == smp_processor_id()) {
73 cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
74 } else {
75 cmd.cpu = cpu;
76 cmd.reg = reg;
77 cmd.data = data;
78
79 smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1);
80 }
81 preempt_enable();
82}
83#else /* ! CONFIG_SMP */
84
85static inline void do_cpuid(int cpu, u32 reg, u32 * data)
86{
87 cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
88}
89
90#endif /* ! CONFIG_SMP */
91
92static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
93{
94 loff_t ret;
95
96 lock_kernel();
97
98 switch (orig) {
99 case 0:
100 file->f_pos = offset;
101 ret = file->f_pos;
102 break;
103 case 1:
104 file->f_pos += offset;
105 ret = file->f_pos;
106 break;
107 default:
108 ret = -EINVAL;
109 }
110
111 unlock_kernel();
112 return ret;
113}
114
115static ssize_t cpuid_read(struct file *file, char __user *buf,
116 size_t count, loff_t * ppos)
117{
118 char __user *tmp = buf;
119 u32 data[4];
120 size_t rv;
121 u32 reg = *ppos;
122 int cpu = iminor(file->f_dentry->d_inode);
123
124 if (count % 16)
125 return -EINVAL; /* Invalid chunk size */
126
127 for (rv = 0; count; count -= 16) {
128 do_cpuid(cpu, reg, data);
129 if (copy_to_user(tmp, &data, 16))
130 return -EFAULT;
131 tmp += 16;
132 *ppos = reg++;
133 }
134
135 return tmp - buf;
136}
137
138static int cpuid_open(struct inode *inode, struct file *file)
139{
140 unsigned int cpu = iminor(file->f_dentry->d_inode);
141 struct cpuinfo_x86 *c = &(cpu_data)[cpu];
142
143 if (cpu >= NR_CPUS || !cpu_online(cpu))
144 return -ENXIO; /* No such CPU */
145 if (c->cpuid_level < 0)
146 return -EIO; /* CPUID not supported */
147
148 return 0;
149}
150
151/*
152 * File operations we support
153 */
154static struct file_operations cpuid_fops = {
155 .owner = THIS_MODULE,
156 .llseek = cpuid_seek,
157 .read = cpuid_read,
158 .open = cpuid_open,
159};
160
161static int cpuid_class_simple_device_add(int i)
162{
163 int err = 0;
164 struct class_device *class_err;
165
166 class_err = class_simple_device_add(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i);
167 if (IS_ERR(class_err))
168 err = PTR_ERR(class_err);
169 return err;
170}
171
172static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
173{
174 unsigned int cpu = (unsigned long)hcpu;
175
176 switch (action) {
177 case CPU_ONLINE:
178 cpuid_class_simple_device_add(cpu);
179 break;
180 case CPU_DEAD:
181 class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu));
182 break;
183 }
184 return NOTIFY_OK;
185}
186
187static struct notifier_block cpuid_class_cpu_notifier =
188{
189 .notifier_call = cpuid_class_cpu_callback,
190};
191
192static int __init cpuid_init(void)
193{
194 int i, err = 0;
195 i = 0;
196
197 if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) {
198 printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
199 CPUID_MAJOR);
200 err = -EBUSY;
201 goto out;
202 }
203 cpuid_class = class_simple_create(THIS_MODULE, "cpuid");
204 if (IS_ERR(cpuid_class)) {
205 err = PTR_ERR(cpuid_class);
206 goto out_chrdev;
207 }
208 for_each_online_cpu(i) {
209 err = cpuid_class_simple_device_add(i);
210 if (err != 0)
211 goto out_class;
212 }
213 register_cpu_notifier(&cpuid_class_cpu_notifier);
214
215 err = 0;
216 goto out;
217
218out_class:
219 i = 0;
220 for_each_online_cpu(i) {
221 class_simple_device_remove(MKDEV(CPUID_MAJOR, i));
222 }
223 class_simple_destroy(cpuid_class);
224out_chrdev:
225 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
226out:
227 return err;
228}
229
230static void __exit cpuid_exit(void)
231{
232 int cpu = 0;
233
234 for_each_online_cpu(cpu)
235 class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu));
236 class_simple_destroy(cpuid_class);
237 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
238 unregister_cpu_notifier(&cpuid_class_cpu_notifier);
239}
240
241module_init(cpuid_init);
242module_exit(cpuid_exit);
243
244MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
245MODULE_DESCRIPTION("x86 generic CPUID driver");
246MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
new file mode 100644
index 000000000000..6ed7e28f306c
--- /dev/null
+++ b/arch/i386/kernel/dmi_scan.c
@@ -0,0 +1,487 @@
1#include <linux/types.h>
2#include <linux/kernel.h>
3#include <linux/string.h>
4#include <linux/init.h>
5#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/acpi.h>
8#include <asm/io.h>
9#include <linux/pm.h>
10#include <asm/system.h>
11#include <linux/dmi.h>
12#include <linux/bootmem.h>
13
14
15struct dmi_header
16{
17 u8 type;
18 u8 length;
19 u16 handle;
20};
21
22#undef DMI_DEBUG
23
24#ifdef DMI_DEBUG
25#define dmi_printk(x) printk x
26#else
27#define dmi_printk(x)
28#endif
29
30static char * __init dmi_string(struct dmi_header *dm, u8 s)
31{
32 u8 *bp=(u8 *)dm;
33 bp+=dm->length;
34 if(!s)
35 return "";
36 s--;
37 while(s>0 && *bp)
38 {
39 bp+=strlen(bp);
40 bp++;
41 s--;
42 }
43 return bp;
44}
45
46/*
47 * We have to be cautious here. We have seen BIOSes with DMI pointers
48 * pointing to completely the wrong place for example
49 */
50
51static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *))
52{
53 u8 *buf;
54 struct dmi_header *dm;
55 u8 *data;
56 int i=0;
57
58 buf = bt_ioremap(base, len);
59 if(buf==NULL)
60 return -1;
61
62 data = buf;
63
64 /*
65 * Stop when we see all the items the table claimed to have
66 * OR we run off the end of the table (also happens)
67 */
68
69 while(i<num && data-buf+sizeof(struct dmi_header)<=len)
70 {
71 dm=(struct dmi_header *)data;
72 /*
73 * We want to know the total length (formated area and strings)
74 * before decoding to make sure we won't run off the table in
75 * dmi_decode or dmi_string
76 */
77 data+=dm->length;
78 while(data-buf<len-1 && (data[0] || data[1]))
79 data++;
80 if(data-buf<len-1)
81 decode(dm);
82 data+=2;
83 i++;
84 }
85 bt_iounmap(buf, len);
86 return 0;
87}
88
89
90inline static int __init dmi_checksum(u8 *buf)
91{
92 u8 sum=0;
93 int a;
94
95 for(a=0; a<15; a++)
96 sum+=buf[a];
97 return (sum==0);
98}
99
100static int __init dmi_iterate(void (*decode)(struct dmi_header *))
101{
102 u8 buf[15];
103 char __iomem *p, *q;
104
105 /*
106 * no iounmap() for that ioremap(); it would be a no-op, but it's
107 * so early in setup that sucker gets confused into doing what
108 * it shouldn't if we actually call it.
109 */
110 p = ioremap(0xF0000, 0x10000);
111 if (p == NULL)
112 return -1;
113 for (q = p; q < p + 0x10000; q += 16) {
114 memcpy_fromio(buf, q, 15);
115 if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf))
116 {
117 u16 num=buf[13]<<8|buf[12];
118 u16 len=buf[7]<<8|buf[6];
119 u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
120
121 /*
122 * DMI version 0.0 means that the real version is taken from
123 * the SMBIOS version, which we don't know at this point.
124 */
125 if(buf[14]!=0)
126 printk(KERN_INFO "DMI %d.%d present.\n",
127 buf[14]>>4, buf[14]&0x0F);
128 else
129 printk(KERN_INFO "DMI present.\n");
130 dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
131 num, len));
132 dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
133 base));
134 if(dmi_table(base,len, num, decode)==0)
135 return 0;
136 }
137 }
138 return -1;
139}
140
141static char *dmi_ident[DMI_STRING_MAX];
142
143/*
144 * Save a DMI string
145 */
146
147static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
148{
149 char *d = (char*)dm;
150 char *p = dmi_string(dm, d[string]);
151 if(p==NULL || *p == 0)
152 return;
153 if (dmi_ident[slot])
154 return;
155 dmi_ident[slot] = alloc_bootmem(strlen(p)+1);
156 if(dmi_ident[slot])
157 strcpy(dmi_ident[slot], p);
158 else
159 printk(KERN_ERR "dmi_save_ident: out of memory.\n");
160}
161
162/*
163 * Ugly compatibility crap.
164 */
165#define dmi_blacklist dmi_system_id
166#define NO_MATCH { DMI_NONE, NULL}
167#define MATCH DMI_MATCH
168
169/*
170 * Toshiba keyboard likes to repeat keys when they are not repeated.
171 */
172
173static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
174{
175 printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
176 return 0;
177}
178
179
180#ifdef CONFIG_ACPI_SLEEP
181static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
182{
183 /* See acpi_wakeup.S */
184 extern long acpi_video_flags;
185 acpi_video_flags |= 2;
186 return 0;
187}
188#endif
189
190
191#ifdef CONFIG_ACPI_BOOT
192extern int acpi_force;
193
194static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
195{
196 if (!acpi_force) {
197 printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
198 disable_acpi();
199 } else {
200 printk(KERN_NOTICE
201 "Warning: DMI blacklist says broken, but acpi forced\n");
202 }
203 return 0;
204}
205
206/*
207 * Limit ACPI to CPU enumeration for HT
208 */
209static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
210{
211 if (!acpi_force) {
212 printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
213 disable_acpi();
214 acpi_ht = 1;
215 } else {
216 printk(KERN_NOTICE
217 "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
218 }
219 return 0;
220}
221#endif
222
223#ifdef CONFIG_ACPI_PCI
224static __init int disable_acpi_irq(struct dmi_blacklist *d)
225{
226 if (!acpi_force) {
227 printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
228 d->ident);
229 acpi_noirq_set();
230 }
231 return 0;
232}
233static __init int disable_acpi_pci(struct dmi_blacklist *d)
234{
235 if (!acpi_force) {
236 printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
237 d->ident);
238 acpi_disable_pci();
239 }
240 return 0;
241}
242#endif
243
244/*
245 * Process the DMI blacklists
246 */
247
248
249/*
250 * This will be expanded over time to force things like the APM
251 * interrupt mask settings according to the laptop
252 */
253
254static __initdata struct dmi_blacklist dmi_blacklist[]={
255
256 { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
257 MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
258 NO_MATCH, NO_MATCH, NO_MATCH
259 } },
260#ifdef CONFIG_ACPI_SLEEP
261 { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
262 MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
263 NO_MATCH, NO_MATCH, NO_MATCH
264 } },
265#endif
266
267#ifdef CONFIG_ACPI_BOOT
268 /*
269 * If your system is blacklisted here, but you find that acpi=force
270 * works for you, please contact acpi-devel@sourceforge.net
271 */
272
273 /*
274 * Boxes that need ACPI disabled
275 */
276
277 { dmi_disable_acpi, "IBM Thinkpad", {
278 MATCH(DMI_BOARD_VENDOR, "IBM"),
279 MATCH(DMI_BOARD_NAME, "2629H1G"),
280 NO_MATCH, NO_MATCH }},
281
282 /*
283 * Boxes that need acpi=ht
284 */
285
286 { force_acpi_ht, "FSC Primergy T850", {
287 MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
288 MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
289 NO_MATCH, NO_MATCH }},
290
291 { force_acpi_ht, "DELL GX240", {
292 MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
293 MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
294 NO_MATCH, NO_MATCH }},
295
296 { force_acpi_ht, "HP VISUALIZE NT Workstation", {
297 MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
298 MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
299 NO_MATCH, NO_MATCH }},
300
301 { force_acpi_ht, "Compaq Workstation W8000", {
302 MATCH(DMI_SYS_VENDOR, "Compaq"),
303 MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
304 NO_MATCH, NO_MATCH }},
305
306 { force_acpi_ht, "ASUS P4B266", {
307 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
308 MATCH(DMI_BOARD_NAME, "P4B266"),
309 NO_MATCH, NO_MATCH }},
310
311 { force_acpi_ht, "ASUS P2B-DS", {
312 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
313 MATCH(DMI_BOARD_NAME, "P2B-DS"),
314 NO_MATCH, NO_MATCH }},
315
316 { force_acpi_ht, "ASUS CUR-DLS", {
317 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
318 MATCH(DMI_BOARD_NAME, "CUR-DLS"),
319 NO_MATCH, NO_MATCH }},
320
321 { force_acpi_ht, "ABIT i440BX-W83977", {
322 MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
323 MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
324 NO_MATCH, NO_MATCH }},
325
326 { force_acpi_ht, "IBM Bladecenter", {
327 MATCH(DMI_BOARD_VENDOR, "IBM"),
328 MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
329 NO_MATCH, NO_MATCH }},
330
331 { force_acpi_ht, "IBM eServer xSeries 360", {
332 MATCH(DMI_BOARD_VENDOR, "IBM"),
333 MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
334 NO_MATCH, NO_MATCH }},
335
336 { force_acpi_ht, "IBM eserver xSeries 330", {
337 MATCH(DMI_BOARD_VENDOR, "IBM"),
338 MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
339 NO_MATCH, NO_MATCH }},
340
341 { force_acpi_ht, "IBM eserver xSeries 440", {
342 MATCH(DMI_BOARD_VENDOR, "IBM"),
343 MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
344 NO_MATCH, NO_MATCH }},
345
346#endif // CONFIG_ACPI_BOOT
347
348#ifdef CONFIG_ACPI_PCI
349 /*
350 * Boxes that need ACPI PCI IRQ routing disabled
351 */
352
353 { disable_acpi_irq, "ASUS A7V", {
354 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
355 MATCH(DMI_BOARD_NAME, "<A7V>"),
356 /* newer BIOS, Revision 1011, does work */
357 MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
358 NO_MATCH }},
359
360 /*
361 * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
362 */
363 { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
364 MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
365 MATCH(DMI_BOARD_NAME, "PR-DLS"),
366 MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
367 MATCH(DMI_BIOS_DATE, "03/21/2003") }},
368
369 { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
370 MATCH(DMI_SYS_VENDOR, "Acer"),
371 MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
372 NO_MATCH, NO_MATCH
373 } },
374
375#endif
376
377 { NULL, }
378};
379
380/*
381 * Process a DMI table entry. Right now all we care about are the BIOS
382 * and machine entries. For 2.5 we should pull the smbus controller info
383 * out of here.
384 */
385
386static void __init dmi_decode(struct dmi_header *dm)
387{
388#ifdef DMI_DEBUG
389 u8 *data = (u8 *)dm;
390#endif
391
392 switch(dm->type)
393 {
394 case 0:
395 dmi_printk(("BIOS Vendor: %s\n",
396 dmi_string(dm, data[4])));
397 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
398 dmi_printk(("BIOS Version: %s\n",
399 dmi_string(dm, data[5])));
400 dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
401 dmi_printk(("BIOS Release: %s\n",
402 dmi_string(dm, data[8])));
403 dmi_save_ident(dm, DMI_BIOS_DATE, 8);
404 break;
405 case 1:
406 dmi_printk(("System Vendor: %s\n",
407 dmi_string(dm, data[4])));
408 dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
409 dmi_printk(("Product Name: %s\n",
410 dmi_string(dm, data[5])));
411 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
412 dmi_printk(("Version: %s\n",
413 dmi_string(dm, data[6])));
414 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
415 dmi_printk(("Serial Number: %s\n",
416 dmi_string(dm, data[7])));
417 break;
418 case 2:
419 dmi_printk(("Board Vendor: %s\n",
420 dmi_string(dm, data[4])));
421 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
422 dmi_printk(("Board Name: %s\n",
423 dmi_string(dm, data[5])));
424 dmi_save_ident(dm, DMI_BOARD_NAME, 5);
425 dmi_printk(("Board Version: %s\n",
426 dmi_string(dm, data[6])));
427 dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
428 break;
429 }
430}
431
432void __init dmi_scan_machine(void)
433{
434 int err = dmi_iterate(dmi_decode);
435 if(err == 0)
436 dmi_check_system(dmi_blacklist);
437 else
438 printk(KERN_INFO "DMI not present.\n");
439}
440
441
442/**
443 * dmi_check_system - check system DMI data
444 * @list: array of dmi_system_id structures to match against
445 *
446 * Walk the blacklist table running matching functions until someone
447 * returns non zero or we hit the end. Callback function is called for
448 * each successfull match. Returns the number of matches.
449 */
450int dmi_check_system(struct dmi_system_id *list)
451{
452 int i, count = 0;
453 struct dmi_system_id *d = list;
454
455 while (d->ident) {
456 for (i = 0; i < ARRAY_SIZE(d->matches); i++) {
457 int s = d->matches[i].slot;
458 if (s == DMI_NONE)
459 continue;
460 if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr))
461 continue;
462 /* No match */
463 goto fail;
464 }
465 if (d->callback && d->callback(d))
466 break;
467 count++;
468fail: d++;
469 }
470
471 return count;
472}
473
474EXPORT_SYMBOL(dmi_check_system);
475
476/**
477 * dmi_get_system_info - return DMI data value
478 * @field: data index (see enum dmi_filed)
479 *
480 * Returns one DMI data value, can be used to perform
481 * complex DMI data checks.
482 */
483char * dmi_get_system_info(int field)
484{
485 return dmi_ident[field];
486}
487
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
new file mode 100644
index 000000000000..789af3e9fb1f
--- /dev/null
+++ b/arch/i386/kernel/doublefault.c
@@ -0,0 +1,65 @@
1#include <linux/mm.h>
2#include <linux/sched.h>
3#include <linux/init.h>
4#include <linux/init_task.h>
5#include <linux/fs.h>
6
7#include <asm/uaccess.h>
8#include <asm/pgtable.h>
9#include <asm/processor.h>
10#include <asm/desc.h>
11
12#define DOUBLEFAULT_STACKSIZE (1024)
13static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
14#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
15
16#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
17
18static void doublefault_fn(void)
19{
20 struct Xgt_desc_struct gdt_desc = {0, 0};
21 unsigned long gdt, tss;
22
23 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
24 gdt = gdt_desc.address;
25
26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
27
28 if (ptr_ok(gdt)) {
29 gdt += GDT_ENTRY_TSS << 3;
30 tss = *(u16 *)(gdt+2);
31 tss += *(u8 *)(gdt+4) << 16;
32 tss += *(u8 *)(gdt+7) << 24;
33 printk("double fault, tss at %08lx\n", tss);
34
35 if (ptr_ok(tss)) {
36 struct tss_struct *t = (struct tss_struct *)tss;
37
38 printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
39
40 printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
41 t->eax, t->ebx, t->ecx, t->edx);
42 printk("esi = %08lx, edi = %08lx\n",
43 t->esi, t->edi);
44 }
45 }
46
47 for (;;) /* nothing */;
48}
49
50struct tss_struct doublefault_tss __cacheline_aligned = {
51 .esp0 = STACK_START,
52 .ss0 = __KERNEL_DS,
53 .ldt = 0,
54 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
55
56 .eip = (unsigned long) doublefault_fn,
57 .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
58 .esp = STACK_START,
59 .es = __USER_DS,
60 .cs = __KERNEL_CS,
61 .ss = __KERNEL_DS,
62 .ds = __USER_DS,
63
64 .__cr3 = __pa(swapper_pg_dir)
65};
diff --git a/arch/i386/kernel/early_printk.c b/arch/i386/kernel/early_printk.c
new file mode 100644
index 000000000000..92f812ba275c
--- /dev/null
+++ b/arch/i386/kernel/early_printk.c
@@ -0,0 +1,2 @@
1
2#include "../../x86_64/kernel/early_printk.c"
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
new file mode 100644
index 000000000000..9e5e0d8bd36e
--- /dev/null
+++ b/arch/i386/kernel/efi.c
@@ -0,0 +1,635 @@
1/*
2 * Extensible Firmware Interface
3 *
4 * Based on Extensible Firmware Interface Specification version 1.0
5 *
6 * Copyright (C) 1999 VA Linux Systems
7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
8 * Copyright (C) 1999-2002 Hewlett-Packard Co.
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com>
11 *
12 * All EFI Runtime Services are not implemented yet as EFI only
13 * supports physical mode addressing on SoftSDV. This is to be fixed
14 * in a future version. --drummond 1999-07-20
15 *
16 * Implemented EFI runtime services and virtual mode calls. --davidm
17 *
18 * Goutham Rao: <goutham.rao@intel.com>
19 * Skip non-WB memory and ignore empty memory ranges.
20 */
21
22#include <linux/config.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/mm.h>
26#include <linux/types.h>
27#include <linux/time.h>
28#include <linux/spinlock.h>
29#include <linux/bootmem.h>
30#include <linux/ioport.h>
31#include <linux/module.h>
32#include <linux/efi.h>
33
34#include <asm/setup.h>
35#include <asm/io.h>
36#include <asm/page.h>
37#include <asm/pgtable.h>
38#include <asm/processor.h>
39#include <asm/desc.h>
40#include <asm/tlbflush.h>
41
42#define EFI_DEBUG 0
43#define PFX "EFI: "
44
45extern efi_status_t asmlinkage efi_call_phys(void *, ...);
46
47struct efi efi;
48EXPORT_SYMBOL(efi);
49static struct efi efi_phys __initdata;
50struct efi_memory_map memmap __initdata;
51
52/*
53 * We require an early boot_ioremap mapping mechanism initially
54 */
55extern void * boot_ioremap(unsigned long, unsigned long);
56
57/*
58 * To make EFI call EFI runtime service in physical addressing mode we need
59 * prelog/epilog before/after the invocation to disable interrupt, to
60 * claim EFI runtime service handler exclusively and to duplicate a memory in
61 * low memory space say 0 - 3G.
62 */
63
64static unsigned long efi_rt_eflags;
65static DEFINE_SPINLOCK(efi_rt_lock);
66static pgd_t efi_bak_pg_dir_pointer[2];
67
68static void efi_call_phys_prelog(void)
69{
70 unsigned long cr4;
71 unsigned long temp;
72
73 spin_lock(&efi_rt_lock);
74 local_irq_save(efi_rt_eflags);
75
76 /*
77 * If I don't have PSE, I should just duplicate two entries in page
78 * directory. If I have PSE, I just need to duplicate one entry in
79 * page directory.
80 */
81 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
82
83 if (cr4 & X86_CR4_PSE) {
84 efi_bak_pg_dir_pointer[0].pgd =
85 swapper_pg_dir[pgd_index(0)].pgd;
86 swapper_pg_dir[0].pgd =
87 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
88 } else {
89 efi_bak_pg_dir_pointer[0].pgd =
90 swapper_pg_dir[pgd_index(0)].pgd;
91 efi_bak_pg_dir_pointer[1].pgd =
92 swapper_pg_dir[pgd_index(0x400000)].pgd;
93 swapper_pg_dir[pgd_index(0)].pgd =
94 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
95 temp = PAGE_OFFSET + 0x400000;
96 swapper_pg_dir[pgd_index(0x400000)].pgd =
97 swapper_pg_dir[pgd_index(temp)].pgd;
98 }
99
100 /*
101 * After the lock is released, the original page table is restored.
102 */
103 local_flush_tlb();
104
105 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
106 __asm__ __volatile__("lgdt %0":"=m"
107 (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
108}
109
110static void efi_call_phys_epilog(void)
111{
112 unsigned long cr4;
113
114 cpu_gdt_descr[0].address =
115 (unsigned long) __va(cpu_gdt_descr[0].address);
116 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
117 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
118
119 if (cr4 & X86_CR4_PSE) {
120 swapper_pg_dir[pgd_index(0)].pgd =
121 efi_bak_pg_dir_pointer[0].pgd;
122 } else {
123 swapper_pg_dir[pgd_index(0)].pgd =
124 efi_bak_pg_dir_pointer[0].pgd;
125 swapper_pg_dir[pgd_index(0x400000)].pgd =
126 efi_bak_pg_dir_pointer[1].pgd;
127 }
128
129 /*
130 * After the lock is released, the original page table is restored.
131 */
132 local_flush_tlb();
133
134 local_irq_restore(efi_rt_eflags);
135 spin_unlock(&efi_rt_lock);
136}
137
138static efi_status_t
139phys_efi_set_virtual_address_map(unsigned long memory_map_size,
140 unsigned long descriptor_size,
141 u32 descriptor_version,
142 efi_memory_desc_t *virtual_map)
143{
144 efi_status_t status;
145
146 efi_call_phys_prelog();
147 status = efi_call_phys(efi_phys.set_virtual_address_map,
148 memory_map_size, descriptor_size,
149 descriptor_version, virtual_map);
150 efi_call_phys_epilog();
151 return status;
152}
153
154static efi_status_t
155phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
156{
157 efi_status_t status;
158
159 efi_call_phys_prelog();
160 status = efi_call_phys(efi_phys.get_time, tm, tc);
161 efi_call_phys_epilog();
162 return status;
163}
164
165inline int efi_set_rtc_mmss(unsigned long nowtime)
166{
167 int real_seconds, real_minutes;
168 efi_status_t status;
169 efi_time_t eft;
170 efi_time_cap_t cap;
171
172 spin_lock(&efi_rt_lock);
173 status = efi.get_time(&eft, &cap);
174 spin_unlock(&efi_rt_lock);
175 if (status != EFI_SUCCESS)
176 panic("Ooops, efitime: can't read time!\n");
177 real_seconds = nowtime % 60;
178 real_minutes = nowtime / 60;
179
180 if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
181 real_minutes += 30;
182 real_minutes %= 60;
183
184 eft.minute = real_minutes;
185 eft.second = real_seconds;
186
187 if (status != EFI_SUCCESS) {
188 printk("Ooops: efitime: can't read time!\n");
189 return -1;
190 }
191 return 0;
192}
193/*
194 * This should only be used during kernel init and before runtime
195 * services have been remapped, therefore, we'll need to call in physical
196 * mode. Note, this call isn't used later, so mark it __init.
197 */
198inline unsigned long __init efi_get_time(void)
199{
200 efi_status_t status;
201 efi_time_t eft;
202 efi_time_cap_t cap;
203
204 status = phys_efi_get_time(&eft, &cap);
205 if (status != EFI_SUCCESS)
206 printk("Oops: efitime: can't read time status: 0x%lx\n",status);
207
208 return mktime(eft.year, eft.month, eft.day, eft.hour,
209 eft.minute, eft.second);
210}
211
212int is_available_memory(efi_memory_desc_t * md)
213{
214 if (!(md->attribute & EFI_MEMORY_WB))
215 return 0;
216
217 switch (md->type) {
218 case EFI_LOADER_CODE:
219 case EFI_LOADER_DATA:
220 case EFI_BOOT_SERVICES_CODE:
221 case EFI_BOOT_SERVICES_DATA:
222 case EFI_CONVENTIONAL_MEMORY:
223 return 1;
224 }
225 return 0;
226}
227
228/*
229 * We need to map the EFI memory map again after paging_init().
230 */
231void __init efi_map_memmap(void)
232{
233 memmap.map = NULL;
234
235 memmap.map = (efi_memory_desc_t *)
236 bt_ioremap((unsigned long) memmap.phys_map,
237 (memmap.nr_map * sizeof(efi_memory_desc_t)));
238
239 if (memmap.map == NULL)
240 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
241}
242
243#if EFI_DEBUG
244static void __init print_efi_memmap(void)
245{
246 efi_memory_desc_t *md;
247 int i;
248
249 for (i = 0; i < memmap.nr_map; i++) {
250 md = &memmap.map[i];
251 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
252 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
253 i, md->type, md->attribute, md->phys_addr,
254 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
255 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
256 }
257}
258#endif /* EFI_DEBUG */
259
260/*
261 * Walks the EFI memory map and calls CALLBACK once for each EFI
262 * memory descriptor that has memory that is available for kernel use.
263 */
264void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
265{
266 int prev_valid = 0;
267 struct range {
268 unsigned long start;
269 unsigned long end;
270 } prev, curr;
271 efi_memory_desc_t *md;
272 unsigned long start, end;
273 int i;
274
275 for (i = 0; i < memmap.nr_map; i++) {
276 md = &memmap.map[i];
277
278 if ((md->num_pages == 0) || (!is_available_memory(md)))
279 continue;
280
281 curr.start = md->phys_addr;
282 curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
283
284 if (!prev_valid) {
285 prev = curr;
286 prev_valid = 1;
287 } else {
288 if (curr.start < prev.start)
289 printk(KERN_INFO PFX "Unordered memory map\n");
290 if (prev.end == curr.start)
291 prev.end = curr.end;
292 else {
293 start =
294 (unsigned long) (PAGE_ALIGN(prev.start));
295 end = (unsigned long) (prev.end & PAGE_MASK);
296 if ((end > start)
297 && (*callback) (start, end, arg) < 0)
298 return;
299 prev = curr;
300 }
301 }
302 }
303 if (prev_valid) {
304 start = (unsigned long) PAGE_ALIGN(prev.start);
305 end = (unsigned long) (prev.end & PAGE_MASK);
306 if (end > start)
307 (*callback) (start, end, arg);
308 }
309}
310
311void __init efi_init(void)
312{
313 efi_config_table_t *config_tables;
314 efi_runtime_services_t *runtime;
315 efi_char16_t *c16;
316 char vendor[100] = "unknown";
317 unsigned long num_config_tables;
318 int i = 0;
319
320 memset(&efi, 0, sizeof(efi) );
321 memset(&efi_phys, 0, sizeof(efi_phys));
322
323 efi_phys.systab = EFI_SYSTAB;
324 memmap.phys_map = EFI_MEMMAP;
325 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
326 memmap.desc_version = EFI_MEMDESC_VERSION;
327
328 efi.systab = (efi_system_table_t *)
329 boot_ioremap((unsigned long) efi_phys.systab,
330 sizeof(efi_system_table_t));
331 /*
332 * Verify the EFI Table
333 */
334 if (efi.systab == NULL)
335 printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n");
336 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
337 printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n");
338 if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
339 printk(KERN_ERR PFX
340 "Warning: EFI system table major version mismatch: "
341 "got %d.%02d, expected %d.%02d\n",
342 efi.systab->hdr.revision >> 16,
343 efi.systab->hdr.revision & 0xffff,
344 EFI_SYSTEM_TABLE_REVISION >> 16,
345 EFI_SYSTEM_TABLE_REVISION & 0xffff);
346 /*
347 * Grab some details from the system table
348 */
349 num_config_tables = efi.systab->nr_tables;
350 config_tables = (efi_config_table_t *)efi.systab->tables;
351 runtime = efi.systab->runtime;
352
353 /*
354 * Show what we know for posterity
355 */
356 c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
357 if (c16) {
358 for (i = 0; i < sizeof(vendor) && *c16; ++i)
359 vendor[i] = *c16++;
360 vendor[i] = '\0';
361 } else
362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
363
364 printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n",
365 efi.systab->hdr.revision >> 16,
366 efi.systab->hdr.revision & 0xffff, vendor);
367
368 /*
369 * Let's see what config tables the firmware passed to us.
370 */
371 config_tables = (efi_config_table_t *)
372 boot_ioremap((unsigned long) config_tables,
373 num_config_tables * sizeof(efi_config_table_t));
374
375 if (config_tables == NULL)
376 printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
377
378 for (i = 0; i < num_config_tables; i++) {
379 if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
380 efi.mps = (void *)config_tables[i].table;
381 printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
382 } else
383 if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
384 efi.acpi20 = __va(config_tables[i].table);
385 printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
386 } else
387 if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
388 efi.acpi = __va(config_tables[i].table);
389 printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
390 } else
391 if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
392 efi.smbios = (void *) config_tables[i].table;
393 printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
394 } else
395 if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
396 efi.hcdp = (void *)config_tables[i].table;
397 printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
398 } else
399 if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
400 efi.uga = (void *)config_tables[i].table;
401 printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
402 }
403 }
404 printk("\n");
405
406 /*
407 * Check out the runtime services table. We need to map
408 * the runtime services table so that we can grab the physical
409 * address of several of the EFI runtime functions, needed to
410 * set the firmware into virtual mode.
411 */
412
413 runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long)
414 runtime,
415 sizeof(efi_runtime_services_t));
416 if (runtime != NULL) {
417 /*
418 * We will only need *early* access to the following
419 * two EFI runtime services before set_virtual_address_map
420 * is invoked.
421 */
422 efi_phys.get_time = (efi_get_time_t *) runtime->get_time;
423 efi_phys.set_virtual_address_map =
424 (efi_set_virtual_address_map_t *)
425 runtime->set_virtual_address_map;
426 } else
427 printk(KERN_ERR PFX "Could not map the runtime service table!\n");
428
429 /* Map the EFI memory map for use until paging_init() */
430
431 memmap.map = (efi_memory_desc_t *)
432 boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
433
434 if (memmap.map == NULL)
435 printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
436
437 if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) {
438 printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't "
439 "match the one from EFI!\n");
440 }
441#if EFI_DEBUG
442 print_efi_memmap();
443#endif
444}
445
446/*
447 * This function will switch the EFI runtime services to virtual mode.
448 * Essentially, look through the EFI memmap and map every region that
449 * has the runtime attribute bit set in its memory descriptor and update
450 * that memory descriptor with the virtual address obtained from ioremap().
451 * This enables the runtime services to be called without having to
452 * thunk back into physical mode for every invocation.
453 */
454
455void __init efi_enter_virtual_mode(void)
456{
457 efi_memory_desc_t *md;
458 efi_status_t status;
459 int i;
460
461 efi.systab = NULL;
462
463 for (i = 0; i < memmap.nr_map; i++) {
464 md = &memmap.map[i];
465
466 if (md->attribute & EFI_MEMORY_RUNTIME) {
467 md->virt_addr =
468 (unsigned long)ioremap(md->phys_addr,
469 md->num_pages << EFI_PAGE_SHIFT);
470 if (!(unsigned long)md->virt_addr) {
471 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
472 (unsigned long)md->phys_addr);
473 }
474
475 if (((unsigned long)md->phys_addr <=
476 (unsigned long)efi_phys.systab) &&
477 ((unsigned long)efi_phys.systab <
478 md->phys_addr +
479 ((unsigned long)md->num_pages <<
480 EFI_PAGE_SHIFT))) {
481 unsigned long addr;
482
483 addr = md->virt_addr - md->phys_addr +
484 (unsigned long)efi_phys.systab;
485 efi.systab = (efi_system_table_t *)addr;
486 }
487 }
488 }
489
490 if (!efi.systab)
491 BUG();
492
493 status = phys_efi_set_virtual_address_map(
494 sizeof(efi_memory_desc_t) * memmap.nr_map,
495 sizeof(efi_memory_desc_t),
496 memmap.desc_version,
497 memmap.phys_map);
498
499 if (status != EFI_SUCCESS) {
500 printk (KERN_ALERT "You are screwed! "
501 "Unable to switch EFI into virtual mode "
502 "(status=%lx)\n", status);
503 panic("EFI call to SetVirtualAddressMap() failed!");
504 }
505
506 /*
507 * Now that EFI is in virtual mode, update the function
508 * pointers in the runtime service table to the new virtual addresses.
509 */
510
511 efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
512 efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time;
513 efi.get_wakeup_time = (efi_get_wakeup_time_t *)
514 efi.systab->runtime->get_wakeup_time;
515 efi.set_wakeup_time = (efi_set_wakeup_time_t *)
516 efi.systab->runtime->set_wakeup_time;
517 efi.get_variable = (efi_get_variable_t *)
518 efi.systab->runtime->get_variable;
519 efi.get_next_variable = (efi_get_next_variable_t *)
520 efi.systab->runtime->get_next_variable;
521 efi.set_variable = (efi_set_variable_t *)
522 efi.systab->runtime->set_variable;
523 efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
524 efi.systab->runtime->get_next_high_mono_count;
525 efi.reset_system = (efi_reset_system_t *)
526 efi.systab->runtime->reset_system;
527}
528
529void __init
530efi_initialize_iomem_resources(struct resource *code_resource,
531 struct resource *data_resource)
532{
533 struct resource *res;
534 efi_memory_desc_t *md;
535 int i;
536
537 for (i = 0; i < memmap.nr_map; i++) {
538 md = &memmap.map[i];
539
540 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
541 0x100000000ULL)
542 continue;
543 res = alloc_bootmem_low(sizeof(struct resource));
544 switch (md->type) {
545 case EFI_RESERVED_TYPE:
546 res->name = "Reserved Memory";
547 break;
548 case EFI_LOADER_CODE:
549 res->name = "Loader Code";
550 break;
551 case EFI_LOADER_DATA:
552 res->name = "Loader Data";
553 break;
554 case EFI_BOOT_SERVICES_DATA:
555 res->name = "BootServices Data";
556 break;
557 case EFI_BOOT_SERVICES_CODE:
558 res->name = "BootServices Code";
559 break;
560 case EFI_RUNTIME_SERVICES_CODE:
561 res->name = "Runtime Service Code";
562 break;
563 case EFI_RUNTIME_SERVICES_DATA:
564 res->name = "Runtime Service Data";
565 break;
566 case EFI_CONVENTIONAL_MEMORY:
567 res->name = "Conventional Memory";
568 break;
569 case EFI_UNUSABLE_MEMORY:
570 res->name = "Unusable Memory";
571 break;
572 case EFI_ACPI_RECLAIM_MEMORY:
573 res->name = "ACPI Reclaim";
574 break;
575 case EFI_ACPI_MEMORY_NVS:
576 res->name = "ACPI NVS";
577 break;
578 case EFI_MEMORY_MAPPED_IO:
579 res->name = "Memory Mapped IO";
580 break;
581 case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
582 res->name = "Memory Mapped IO Port Space";
583 break;
584 default:
585 res->name = "Reserved";
586 break;
587 }
588 res->start = md->phys_addr;
589 res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
590 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
591 if (request_resource(&iomem_resource, res) < 0)
592 printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n",
593 res->name, res->start, res->end);
594 /*
595 * We don't know which region contains kernel data so we try
596 * it repeatedly and let the resource manager test it.
597 */
598 if (md->type == EFI_CONVENTIONAL_MEMORY) {
599 request_resource(res, code_resource);
600 request_resource(res, data_resource);
601 }
602 }
603}
604
605/*
606 * Convenience functions to obtain memory types and attributes
607 */
608
609u32 efi_mem_type(unsigned long phys_addr)
610{
611 efi_memory_desc_t *md;
612 int i;
613
614 for (i = 0; i < memmap.nr_map; i++) {
615 md = &memmap.map[i];
616 if ((md->phys_addr <= phys_addr) && (phys_addr <
617 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
618 return md->type;
619 }
620 return 0;
621}
622
623u64 efi_mem_attributes(unsigned long phys_addr)
624{
625 efi_memory_desc_t *md;
626 int i;
627
628 for (i = 0; i < memmap.nr_map; i++) {
629 md = &memmap.map[i];
630 if ((md->phys_addr <= phys_addr) && (phys_addr <
631 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
632 return md->attribute;
633 }
634 return 0;
635}
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
new file mode 100644
index 000000000000..08c0312d9b6c
--- /dev/null
+++ b/arch/i386/kernel/efi_stub.S
@@ -0,0 +1,124 @@
1/*
2 * EFI call stub for IA32.
3 *
4 * This stub allows us to make EFI calls in physical mode with interrupts
5 * turned off.
6 */
7
8#include <linux/config.h>
9#include <linux/linkage.h>
10#include <asm/page.h>
11#include <asm/pgtable.h>
12
13/*
14 * efi_call_phys(void *, ...) is a function with variable parameters.
15 * All the callers of this function assure that all the parameters are 4-bytes.
16 */
17
18/*
19 * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
20 * So we'd better save all of them at the beginning of this function and restore
21 * at the end no matter how many we use, because we can not assure EFI runtime
22 * service functions will comply with gcc calling convention, too.
23 */
24
25.text
26ENTRY(efi_call_phys)
27 /*
28 * 0. The function can only be called in Linux kernel. So CS has been
29 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
30 * the values of these registers are the same. And, the corresponding
31 * GDT entries are identical. So I will do nothing about segment reg
32 * and GDT, but change GDT base register in prelog and epilog.
33 */
34
35 /*
36 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
37 * But to make it smoothly switch from virtual mode to flat mode.
38 * The mapping of lower virtual memory has been created in prelog and
39 * epilog.
40 */
41 movl $1f, %edx
42 subl $__PAGE_OFFSET, %edx
43 jmp *%edx
441:
45
46 /*
47 * 2. Now on the top of stack is the return
48 * address in the caller of efi_call_phys(), then parameter 1,
49 * parameter 2, ..., param n. To make things easy, we save the return
50 * address of efi_call_phys in a global variable.
51 */
52 popl %edx
53 movl %edx, saved_return_addr
54 /* get the function pointer into ECX*/
55 popl %ecx
56 movl %ecx, efi_rt_function_ptr
57 movl $2f, %edx
58 subl $__PAGE_OFFSET, %edx
59 pushl %edx
60
61 /*
62 * 3. Clear PG bit in %CR0.
63 */
64 movl %cr0, %edx
65 andl $0x7fffffff, %edx
66 movl %edx, %cr0
67 jmp 1f
681:
69
70 /*
71 * 4. Adjust stack pointer.
72 */
73 subl $__PAGE_OFFSET, %esp
74
75 /*
76 * 5. Call the physical function.
77 */
78 jmp *%ecx
79
802:
81 /*
82 * 6. After EFI runtime service returns, control will return to
83 * following instruction. We'd better readjust stack pointer first.
84 */
85 addl $__PAGE_OFFSET, %esp
86
87 /*
88 * 7. Restore PG bit
89 */
90 movl %cr0, %edx
91 orl $0x80000000, %edx
92 movl %edx, %cr0
93 jmp 1f
941:
95 /*
96 * 8. Now restore the virtual mode from flat mode by
97 * adding EIP with PAGE_OFFSET.
98 */
99 movl $1f, %edx
100 jmp *%edx
1011:
102
103 /*
104 * 9. Balance the stack. And because EAX contain the return value,
105 * we'd better not clobber it.
106 */
107 leal efi_rt_function_ptr, %edx
108 movl (%edx), %ecx
109 pushl %ecx
110
111 /*
112 * 10. Push the saved return address onto the stack and return.
113 */
114 leal saved_return_addr, %edx
115 movl (%edx), %ecx
116 pushl %ecx
117 ret
118.previous
119
120.data
121saved_return_addr:
122 .long 0
123efi_rt_function_ptr:
124 .long 0
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
new file mode 100644
index 000000000000..1e45ff292bc9
--- /dev/null
+++ b/arch/i386/kernel/entry.S
@@ -0,0 +1,950 @@
1/*
2 * linux/arch/i386/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7/*
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
14 *
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
16 * on a 486.
17 *
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
23 *
24 * 0(%esp) - %ebx
25 * 4(%esp) - %ecx
26 * 8(%esp) - %edx
27 * C(%esp) - %esi
28 * 10(%esp) - %edi
29 * 14(%esp) - %ebp
30 * 18(%esp) - %eax
31 * 1C(%esp) - %ds
32 * 20(%esp) - %es
33 * 24(%esp) - orig_eax
34 * 28(%esp) - %eip
35 * 2C(%esp) - %cs
36 * 30(%esp) - %eflags
37 * 34(%esp) - %oldesp
38 * 38(%esp) - %oldss
39 *
40 * "current" is in register %ebx during any slow entries.
41 */
42
43#include <linux/config.h>
44#include <linux/linkage.h>
45#include <asm/thread_info.h>
46#include <asm/errno.h>
47#include <asm/segment.h>
48#include <asm/smp.h>
49#include <asm/page.h>
50#include <asm/desc.h>
51#include "irq_vectors.h"
52
53#define nr_syscalls ((syscall_table_size)/4)
54
55EBX = 0x00
56ECX = 0x04
57EDX = 0x08
58ESI = 0x0C
59EDI = 0x10
60EBP = 0x14
61EAX = 0x18
62DS = 0x1C
63ES = 0x20
64ORIG_EAX = 0x24
65EIP = 0x28
66CS = 0x2C
67EFLAGS = 0x30
68OLDESP = 0x34
69OLDSS = 0x38
70
71CF_MASK = 0x00000001
72TF_MASK = 0x00000100
73IF_MASK = 0x00000200
74DF_MASK = 0x00000400
75NT_MASK = 0x00004000
76VM_MASK = 0x00020000
77
78#ifdef CONFIG_PREEMPT
79#define preempt_stop cli
80#else
81#define preempt_stop
82#define resume_kernel restore_nocheck
83#endif
84
85#define SAVE_ALL \
86 cld; \
87 pushl %es; \
88 pushl %ds; \
89 pushl %eax; \
90 pushl %ebp; \
91 pushl %edi; \
92 pushl %esi; \
93 pushl %edx; \
94 pushl %ecx; \
95 pushl %ebx; \
96 movl $(__USER_DS), %edx; \
97 movl %edx, %ds; \
98 movl %edx, %es;
99
100#define RESTORE_INT_REGS \
101 popl %ebx; \
102 popl %ecx; \
103 popl %edx; \
104 popl %esi; \
105 popl %edi; \
106 popl %ebp; \
107 popl %eax
108
109#define RESTORE_REGS \
110 RESTORE_INT_REGS; \
1111: popl %ds; \
1122: popl %es; \
113.section .fixup,"ax"; \
1143: movl $0,(%esp); \
115 jmp 1b; \
1164: movl $0,(%esp); \
117 jmp 2b; \
118.previous; \
119.section __ex_table,"a";\
120 .align 4; \
121 .long 1b,3b; \
122 .long 2b,4b; \
123.previous
124
125
126ENTRY(ret_from_fork)
127 pushl %eax
128 call schedule_tail
129 GET_THREAD_INFO(%ebp)
130 popl %eax
131 jmp syscall_exit
132
133/*
134 * Return to user mode is not as complex as all this looks,
135 * but we want the default path for a system call return to
136 * go as quickly as possible which is why some of this is
137 * less clear than it otherwise should be.
138 */
139
140 # userspace resumption stub bypassing syscall exit tracing
141 ALIGN
142ret_from_exception:
143 preempt_stop
144ret_from_intr:
145 GET_THREAD_INFO(%ebp)
146 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
147 movb CS(%esp), %al
148 testl $(VM_MASK | 3), %eax
149 jz resume_kernel
150ENTRY(resume_userspace)
151 cli # make sure we don't miss an interrupt
152 # setting need_resched or sigpending
153 # between sampling and the iret
154 movl TI_flags(%ebp), %ecx
155 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
156 # int/exception return?
157 jne work_pending
158 jmp restore_all
159
160#ifdef CONFIG_PREEMPT
161ENTRY(resume_kernel)
162 cli
163 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
164 jnz restore_nocheck
165need_resched:
166 movl TI_flags(%ebp), %ecx # need_resched set ?
167 testb $_TIF_NEED_RESCHED, %cl
168 jz restore_all
169 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
170 jz restore_all
171 call preempt_schedule_irq
172 jmp need_resched
173#endif
174
175/* SYSENTER_RETURN points to after the "sysenter" instruction in
176 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
177
178 # sysenter call handler stub
179ENTRY(sysenter_entry)
180 movl TSS_sysenter_esp0(%esp),%esp
181sysenter_past_esp:
182 sti
183 pushl $(__USER_DS)
184 pushl %ebp
185 pushfl
186 pushl $(__USER_CS)
187 pushl $SYSENTER_RETURN
188
189/*
190 * Load the potential sixth argument from user stack.
191 * Careful about security.
192 */
193 cmpl $__PAGE_OFFSET-3,%ebp
194 jae syscall_fault
1951: movl (%ebp),%ebp
196.section __ex_table,"a"
197 .align 4
198 .long 1b,syscall_fault
199.previous
200
201 pushl %eax
202 SAVE_ALL
203 GET_THREAD_INFO(%ebp)
204
205 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
206 testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
207 jnz syscall_trace_entry
208 cmpl $(nr_syscalls), %eax
209 jae syscall_badsys
210 call *sys_call_table(,%eax,4)
211 movl %eax,EAX(%esp)
212 cli
213 movl TI_flags(%ebp), %ecx
214 testw $_TIF_ALLWORK_MASK, %cx
215 jne syscall_exit_work
216/* if something modifies registers it must also disable sysexit */
217 movl EIP(%esp), %edx
218 movl OLDESP(%esp), %ecx
219 xorl %ebp,%ebp
220 sti
221 sysexit
222
223
224 # system call handler stub
225ENTRY(system_call)
226 pushl %eax # save orig_eax
227 SAVE_ALL
228 GET_THREAD_INFO(%ebp)
229 # system call tracing in operation
230 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
231 testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
232 jnz syscall_trace_entry
233 cmpl $(nr_syscalls), %eax
234 jae syscall_badsys
235syscall_call:
236 call *sys_call_table(,%eax,4)
237 movl %eax,EAX(%esp) # store the return value
238syscall_exit:
239 cli # make sure we don't miss an interrupt
240 # setting need_resched or sigpending
241 # between sampling and the iret
242 movl TI_flags(%ebp), %ecx
243 testw $_TIF_ALLWORK_MASK, %cx # current->work
244 jne syscall_exit_work
245
246restore_all:
247 movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
248 movb OLDSS(%esp), %ah
249 movb CS(%esp), %al
250 andl $(VM_MASK | (4 << 8) | 3), %eax
251 cmpl $((4 << 8) | 3), %eax
252 je ldt_ss # returning to user-space with LDT SS
253restore_nocheck:
254 RESTORE_REGS
255 addl $4, %esp
2561: iret
257.section .fixup,"ax"
258iret_exc:
259 sti
260 movl $__USER_DS, %edx
261 movl %edx, %ds
262 movl %edx, %es
263 movl $11,%eax
264 call do_exit
265.previous
266.section __ex_table,"a"
267 .align 4
268 .long 1b,iret_exc
269.previous
270
271ldt_ss:
272 larl OLDSS(%esp), %eax
273 jnz restore_nocheck
274 testl $0x00400000, %eax # returning to 32bit stack?
275 jnz restore_nocheck # allright, normal return
276 /* If returning to userspace with 16bit stack,
277 * try to fix the higher word of ESP, as the CPU
278 * won't restore it.
279 * This is an "official" bug of all the x86-compatible
280 * CPUs, which we can try to work around to make
281 * dosemu and wine happy. */
282 subl $8, %esp # reserve space for switch16 pointer
283 cli
284 movl %esp, %eax
285 /* Set up the 16bit stack frame with switch32 pointer on top,
286 * and a switch16 pointer on top of the current frame. */
287 call setup_x86_bogus_stack
288 RESTORE_REGS
289 lss 20+4(%esp), %esp # switch to 16bit stack
2901: iret
291.section __ex_table,"a"
292 .align 4
293 .long 1b,iret_exc
294.previous
295
296 # perform work that needs to be done immediately before resumption
297 ALIGN
298work_pending:
299 testb $_TIF_NEED_RESCHED, %cl
300 jz work_notifysig
301work_resched:
302 call schedule
303 cli # make sure we don't miss an interrupt
304 # setting need_resched or sigpending
305 # between sampling and the iret
306 movl TI_flags(%ebp), %ecx
307 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
308 # than syscall tracing?
309 jz restore_all
310 testb $_TIF_NEED_RESCHED, %cl
311 jnz work_resched
312
313work_notifysig: # deal with pending signals and
314 # notify-resume requests
315 testl $VM_MASK, EFLAGS(%esp)
316 movl %esp, %eax
317 jne work_notifysig_v86 # returning to kernel-space or
318 # vm86-space
319 xorl %edx, %edx
320 call do_notify_resume
321 jmp restore_all
322
323 ALIGN
324work_notifysig_v86:
325 pushl %ecx # save ti_flags for do_notify_resume
326 call save_v86_state # %eax contains pt_regs pointer
327 popl %ecx
328 movl %eax, %esp
329 xorl %edx, %edx
330 call do_notify_resume
331 jmp restore_all
332
333 # perform syscall exit tracing
334 ALIGN
335syscall_trace_entry:
336 movl $-ENOSYS,EAX(%esp)
337 movl %esp, %eax
338 xorl %edx,%edx
339 call do_syscall_trace
340 movl ORIG_EAX(%esp), %eax
341 cmpl $(nr_syscalls), %eax
342 jnae syscall_call
343 jmp syscall_exit
344
345 # perform syscall exit tracing
346 ALIGN
347syscall_exit_work:
348 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
349 jz work_pending
350 sti # could let do_syscall_trace() call
351 # schedule() instead
352 movl %esp, %eax
353 movl $1, %edx
354 call do_syscall_trace
355 jmp resume_userspace
356
357 ALIGN
358syscall_fault:
359 pushl %eax # save orig_eax
360 SAVE_ALL
361 GET_THREAD_INFO(%ebp)
362 movl $-EFAULT,EAX(%esp)
363 jmp resume_userspace
364
365 ALIGN
366syscall_badsys:
367 movl $-ENOSYS,EAX(%esp)
368 jmp resume_userspace
369
370#define FIXUP_ESPFIX_STACK \
371 movl %esp, %eax; \
372 /* switch to 32bit stack using the pointer on top of 16bit stack */ \
373 lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
374 /* copy data from 16bit stack to 32bit stack */ \
375 call fixup_x86_bogus_stack; \
376 /* put ESP to the proper location */ \
377 movl %eax, %esp;
378#define UNWIND_ESPFIX_STACK \
379 pushl %eax; \
380 movl %ss, %eax; \
381 /* see if on 16bit stack */ \
382 cmpw $__ESPFIX_SS, %ax; \
383 jne 28f; \
384 movl $__KERNEL_DS, %edx; \
385 movl %edx, %ds; \
386 movl %edx, %es; \
387 /* switch to 32bit stack */ \
388 FIXUP_ESPFIX_STACK \
38928: popl %eax;
390
391/*
392 * Build the entry stubs and pointer table with
393 * some assembler magic.
394 */
395.data
396ENTRY(interrupt)
397.text
398
399vector=0
400ENTRY(irq_entries_start)
401.rept NR_IRQS
402 ALIGN
4031: pushl $vector-256
404 jmp common_interrupt
405.data
406 .long 1b
407.text
408vector=vector+1
409.endr
410
411 ALIGN
412common_interrupt:
413 SAVE_ALL
414 movl %esp,%eax
415 call do_IRQ
416 jmp ret_from_intr
417
418#define BUILD_INTERRUPT(name, nr) \
419ENTRY(name) \
420 pushl $nr-256; \
421 SAVE_ALL \
422 movl %esp,%eax; \
423 call smp_/**/name; \
424 jmp ret_from_intr;
425
426/* The include is where all of the SMP etc. interrupts come from */
427#include "entry_arch.h"
428
429ENTRY(divide_error)
430 pushl $0 # no error code
431 pushl $do_divide_error
432 ALIGN
433error_code:
434 pushl %ds
435 pushl %eax
436 xorl %eax, %eax
437 pushl %ebp
438 pushl %edi
439 pushl %esi
440 pushl %edx
441 decl %eax # eax = -1
442 pushl %ecx
443 pushl %ebx
444 cld
445 pushl %es
446 UNWIND_ESPFIX_STACK
447 popl %ecx
448 movl ES(%esp), %edi # get the function address
449 movl ORIG_EAX(%esp), %edx # get the error code
450 movl %eax, ORIG_EAX(%esp)
451 movl %ecx, ES(%esp)
452 movl $(__USER_DS), %ecx
453 movl %ecx, %ds
454 movl %ecx, %es
455 movl %esp,%eax # pt_regs pointer
456 call *%edi
457 jmp ret_from_exception
458
459ENTRY(coprocessor_error)
460 pushl $0
461 pushl $do_coprocessor_error
462 jmp error_code
463
464ENTRY(simd_coprocessor_error)
465 pushl $0
466 pushl $do_simd_coprocessor_error
467 jmp error_code
468
469ENTRY(device_not_available)
470 pushl $-1 # mark this as an int
471 SAVE_ALL
472 movl %cr0, %eax
473 testl $0x4, %eax # EM (math emulation bit)
474 jne device_not_available_emulate
475 preempt_stop
476 call math_state_restore
477 jmp ret_from_exception
478device_not_available_emulate:
479 pushl $0 # temporary storage for ORIG_EIP
480 call math_emulate
481 addl $4, %esp
482 jmp ret_from_exception
483
484/*
485 * Debug traps and NMI can happen at the one SYSENTER instruction
486 * that sets up the real kernel stack. Check here, since we can't
487 * allow the wrong stack to be used.
488 *
489 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
490 * already pushed 3 words if it hits on the sysenter instruction:
491 * eflags, cs and eip.
492 *
493 * We just load the right stack, and push the three (known) values
494 * by hand onto the new stack - while updating the return eip past
495 * the instruction that would have done it for sysenter.
496 */
497#define FIX_STACK(offset, ok, label) \
498 cmpw $__KERNEL_CS,4(%esp); \
499 jne ok; \
500label: \
501 movl TSS_sysenter_esp0+offset(%esp),%esp; \
502 pushfl; \
503 pushl $__KERNEL_CS; \
504 pushl $sysenter_past_esp
505
506ENTRY(debug)
507 cmpl $sysenter_entry,(%esp)
508 jne debug_stack_correct
509 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
510debug_stack_correct:
511 pushl $-1 # mark this as an int
512 SAVE_ALL
513 xorl %edx,%edx # error code 0
514 movl %esp,%eax # pt_regs pointer
515 call do_debug
516 testl %eax,%eax
517 jnz restore_all
518 jmp ret_from_exception
519
520/*
521 * NMI is doubly nasty. It can happen _while_ we're handling
522 * a debug fault, and the debug fault hasn't yet been able to
523 * clear up the stack. So we first check whether we got an
524 * NMI on the sysenter entry path, but after that we need to
525 * check whether we got an NMI on the debug path where the debug
526 * fault happened on the sysenter path.
527 */
528ENTRY(nmi)
529 pushl %eax
530 movl %ss, %eax
531 cmpw $__ESPFIX_SS, %ax
532 popl %eax
533 je nmi_16bit_stack
534 cmpl $sysenter_entry,(%esp)
535 je nmi_stack_fixup
536 pushl %eax
537 movl %esp,%eax
538 /* Do not access memory above the end of our stack page,
539 * it might not exist.
540 */
541 andl $(THREAD_SIZE-1),%eax
542 cmpl $(THREAD_SIZE-20),%eax
543 popl %eax
544 jae nmi_stack_correct
545 cmpl $sysenter_entry,12(%esp)
546 je nmi_debug_stack_check
547nmi_stack_correct:
548 pushl %eax
549 SAVE_ALL
550 xorl %edx,%edx # zero error code
551 movl %esp,%eax # pt_regs pointer
552 call do_nmi
553 jmp restore_all
554
555nmi_stack_fixup:
556 FIX_STACK(12,nmi_stack_correct, 1)
557 jmp nmi_stack_correct
558nmi_debug_stack_check:
559 cmpw $__KERNEL_CS,16(%esp)
560 jne nmi_stack_correct
561 cmpl $debug - 1,(%esp)
562 jle nmi_stack_correct
563 cmpl $debug_esp_fix_insn,(%esp)
564 jle nmi_debug_stack_fixup
565nmi_debug_stack_fixup:
566 FIX_STACK(24,nmi_stack_correct, 1)
567 jmp nmi_stack_correct
568
569nmi_16bit_stack:
570 /* create the pointer to lss back */
571 pushl %ss
572 pushl %esp
573 movzwl %sp, %esp
574 addw $4, (%esp)
575 /* copy the iret frame of 12 bytes */
576 .rept 3
577 pushl 16(%esp)
578 .endr
579 pushl %eax
580 SAVE_ALL
581 FIXUP_ESPFIX_STACK # %eax == %esp
582 xorl %edx,%edx # zero error code
583 call do_nmi
584 RESTORE_REGS
585 lss 12+4(%esp), %esp # back to 16bit stack
5861: iret
587.section __ex_table,"a"
588 .align 4
589 .long 1b,iret_exc
590.previous
591
592ENTRY(int3)
593 pushl $-1 # mark this as an int
594 SAVE_ALL
595 xorl %edx,%edx # zero error code
596 movl %esp,%eax # pt_regs pointer
597 call do_int3
598 testl %eax,%eax
599 jnz restore_all
600 jmp ret_from_exception
601
602ENTRY(overflow)
603 pushl $0
604 pushl $do_overflow
605 jmp error_code
606
607ENTRY(bounds)
608 pushl $0
609 pushl $do_bounds
610 jmp error_code
611
612ENTRY(invalid_op)
613 pushl $0
614 pushl $do_invalid_op
615 jmp error_code
616
617ENTRY(coprocessor_segment_overrun)
618 pushl $0
619 pushl $do_coprocessor_segment_overrun
620 jmp error_code
621
622ENTRY(invalid_TSS)
623 pushl $do_invalid_TSS
624 jmp error_code
625
626ENTRY(segment_not_present)
627 pushl $do_segment_not_present
628 jmp error_code
629
630ENTRY(stack_segment)
631 pushl $do_stack_segment
632 jmp error_code
633
634ENTRY(general_protection)
635 pushl $do_general_protection
636 jmp error_code
637
638ENTRY(alignment_check)
639 pushl $do_alignment_check
640 jmp error_code
641
642ENTRY(page_fault)
643 pushl $do_page_fault
644 jmp error_code
645
646#ifdef CONFIG_X86_MCE
647ENTRY(machine_check)
648 pushl $0
649 pushl machine_check_vector
650 jmp error_code
651#endif
652
653ENTRY(spurious_interrupt_bug)
654 pushl $0
655 pushl $do_spurious_interrupt_bug
656 jmp error_code
657
658.data
659ENTRY(sys_call_table)
660 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
661 .long sys_exit
662 .long sys_fork
663 .long sys_read
664 .long sys_write
665 .long sys_open /* 5 */
666 .long sys_close
667 .long sys_waitpid
668 .long sys_creat
669 .long sys_link
670 .long sys_unlink /* 10 */
671 .long sys_execve
672 .long sys_chdir
673 .long sys_time
674 .long sys_mknod
675 .long sys_chmod /* 15 */
676 .long sys_lchown16
677 .long sys_ni_syscall /* old break syscall holder */
678 .long sys_stat
679 .long sys_lseek
680 .long sys_getpid /* 20 */
681 .long sys_mount
682 .long sys_oldumount
683 .long sys_setuid16
684 .long sys_getuid16
685 .long sys_stime /* 25 */
686 .long sys_ptrace
687 .long sys_alarm
688 .long sys_fstat
689 .long sys_pause
690 .long sys_utime /* 30 */
691 .long sys_ni_syscall /* old stty syscall holder */
692 .long sys_ni_syscall /* old gtty syscall holder */
693 .long sys_access
694 .long sys_nice
695 .long sys_ni_syscall /* 35 - old ftime syscall holder */
696 .long sys_sync
697 .long sys_kill
698 .long sys_rename
699 .long sys_mkdir
700 .long sys_rmdir /* 40 */
701 .long sys_dup
702 .long sys_pipe
703 .long sys_times
704 .long sys_ni_syscall /* old prof syscall holder */
705 .long sys_brk /* 45 */
706 .long sys_setgid16
707 .long sys_getgid16
708 .long sys_signal
709 .long sys_geteuid16
710 .long sys_getegid16 /* 50 */
711 .long sys_acct
712 .long sys_umount /* recycled never used phys() */
713 .long sys_ni_syscall /* old lock syscall holder */
714 .long sys_ioctl
715 .long sys_fcntl /* 55 */
716 .long sys_ni_syscall /* old mpx syscall holder */
717 .long sys_setpgid
718 .long sys_ni_syscall /* old ulimit syscall holder */
719 .long sys_olduname
720 .long sys_umask /* 60 */
721 .long sys_chroot
722 .long sys_ustat
723 .long sys_dup2
724 .long sys_getppid
725 .long sys_getpgrp /* 65 */
726 .long sys_setsid
727 .long sys_sigaction
728 .long sys_sgetmask
729 .long sys_ssetmask
730 .long sys_setreuid16 /* 70 */
731 .long sys_setregid16
732 .long sys_sigsuspend
733 .long sys_sigpending
734 .long sys_sethostname
735 .long sys_setrlimit /* 75 */
736 .long sys_old_getrlimit
737 .long sys_getrusage
738 .long sys_gettimeofday
739 .long sys_settimeofday
740 .long sys_getgroups16 /* 80 */
741 .long sys_setgroups16
742 .long old_select
743 .long sys_symlink
744 .long sys_lstat
745 .long sys_readlink /* 85 */
746 .long sys_uselib
747 .long sys_swapon
748 .long sys_reboot
749 .long old_readdir
750 .long old_mmap /* 90 */
751 .long sys_munmap
752 .long sys_truncate
753 .long sys_ftruncate
754 .long sys_fchmod
755 .long sys_fchown16 /* 95 */
756 .long sys_getpriority
757 .long sys_setpriority
758 .long sys_ni_syscall /* old profil syscall holder */
759 .long sys_statfs
760 .long sys_fstatfs /* 100 */
761 .long sys_ioperm
762 .long sys_socketcall
763 .long sys_syslog
764 .long sys_setitimer
765 .long sys_getitimer /* 105 */
766 .long sys_newstat
767 .long sys_newlstat
768 .long sys_newfstat
769 .long sys_uname
770 .long sys_iopl /* 110 */
771 .long sys_vhangup
772 .long sys_ni_syscall /* old "idle" system call */
773 .long sys_vm86old
774 .long sys_wait4
775 .long sys_swapoff /* 115 */
776 .long sys_sysinfo
777 .long sys_ipc
778 .long sys_fsync
779 .long sys_sigreturn
780 .long sys_clone /* 120 */
781 .long sys_setdomainname
782 .long sys_newuname
783 .long sys_modify_ldt
784 .long sys_adjtimex
785 .long sys_mprotect /* 125 */
786 .long sys_sigprocmask
787 .long sys_ni_syscall /* old "create_module" */
788 .long sys_init_module
789 .long sys_delete_module
790 .long sys_ni_syscall /* 130: old "get_kernel_syms" */
791 .long sys_quotactl
792 .long sys_getpgid
793 .long sys_fchdir
794 .long sys_bdflush
795 .long sys_sysfs /* 135 */
796 .long sys_personality
797 .long sys_ni_syscall /* reserved for afs_syscall */
798 .long sys_setfsuid16
799 .long sys_setfsgid16
800 .long sys_llseek /* 140 */
801 .long sys_getdents
802 .long sys_select
803 .long sys_flock
804 .long sys_msync
805 .long sys_readv /* 145 */
806 .long sys_writev
807 .long sys_getsid
808 .long sys_fdatasync
809 .long sys_sysctl
810 .long sys_mlock /* 150 */
811 .long sys_munlock
812 .long sys_mlockall
813 .long sys_munlockall
814 .long sys_sched_setparam
815 .long sys_sched_getparam /* 155 */
816 .long sys_sched_setscheduler
817 .long sys_sched_getscheduler
818 .long sys_sched_yield
819 .long sys_sched_get_priority_max
820 .long sys_sched_get_priority_min /* 160 */
821 .long sys_sched_rr_get_interval
822 .long sys_nanosleep
823 .long sys_mremap
824 .long sys_setresuid16
825 .long sys_getresuid16 /* 165 */
826 .long sys_vm86
827 .long sys_ni_syscall /* Old sys_query_module */
828 .long sys_poll
829 .long sys_nfsservctl
830 .long sys_setresgid16 /* 170 */
831 .long sys_getresgid16
832 .long sys_prctl
833 .long sys_rt_sigreturn
834 .long sys_rt_sigaction
835 .long sys_rt_sigprocmask /* 175 */
836 .long sys_rt_sigpending
837 .long sys_rt_sigtimedwait
838 .long sys_rt_sigqueueinfo
839 .long sys_rt_sigsuspend
840 .long sys_pread64 /* 180 */
841 .long sys_pwrite64
842 .long sys_chown16
843 .long sys_getcwd
844 .long sys_capget
845 .long sys_capset /* 185 */
846 .long sys_sigaltstack
847 .long sys_sendfile
848 .long sys_ni_syscall /* reserved for streams1 */
849 .long sys_ni_syscall /* reserved for streams2 */
850 .long sys_vfork /* 190 */
851 .long sys_getrlimit
852 .long sys_mmap2
853 .long sys_truncate64
854 .long sys_ftruncate64
855 .long sys_stat64 /* 195 */
856 .long sys_lstat64
857 .long sys_fstat64
858 .long sys_lchown
859 .long sys_getuid
860 .long sys_getgid /* 200 */
861 .long sys_geteuid
862 .long sys_getegid
863 .long sys_setreuid
864 .long sys_setregid
865 .long sys_getgroups /* 205 */
866 .long sys_setgroups
867 .long sys_fchown
868 .long sys_setresuid
869 .long sys_getresuid
870 .long sys_setresgid /* 210 */
871 .long sys_getresgid
872 .long sys_chown
873 .long sys_setuid
874 .long sys_setgid
875 .long sys_setfsuid /* 215 */
876 .long sys_setfsgid
877 .long sys_pivot_root
878 .long sys_mincore
879 .long sys_madvise
880 .long sys_getdents64 /* 220 */
881 .long sys_fcntl64
882 .long sys_ni_syscall /* reserved for TUX */
883 .long sys_ni_syscall
884 .long sys_gettid
885 .long sys_readahead /* 225 */
886 .long sys_setxattr
887 .long sys_lsetxattr
888 .long sys_fsetxattr
889 .long sys_getxattr
890 .long sys_lgetxattr /* 230 */
891 .long sys_fgetxattr
892 .long sys_listxattr
893 .long sys_llistxattr
894 .long sys_flistxattr
895 .long sys_removexattr /* 235 */
896 .long sys_lremovexattr
897 .long sys_fremovexattr
898 .long sys_tkill
899 .long sys_sendfile64
900 .long sys_futex /* 240 */
901 .long sys_sched_setaffinity
902 .long sys_sched_getaffinity
903 .long sys_set_thread_area
904 .long sys_get_thread_area
905 .long sys_io_setup /* 245 */
906 .long sys_io_destroy
907 .long sys_io_getevents
908 .long sys_io_submit
909 .long sys_io_cancel
910 .long sys_fadvise64 /* 250 */
911 .long sys_ni_syscall
912 .long sys_exit_group
913 .long sys_lookup_dcookie
914 .long sys_epoll_create
915 .long sys_epoll_ctl /* 255 */
916 .long sys_epoll_wait
917 .long sys_remap_file_pages
918 .long sys_set_tid_address
919 .long sys_timer_create
920 .long sys_timer_settime /* 260 */
921 .long sys_timer_gettime
922 .long sys_timer_getoverrun
923 .long sys_timer_delete
924 .long sys_clock_settime
925 .long sys_clock_gettime /* 265 */
926 .long sys_clock_getres
927 .long sys_clock_nanosleep
928 .long sys_statfs64
929 .long sys_fstatfs64
930 .long sys_tgkill /* 270 */
931 .long sys_utimes
932 .long sys_fadvise64_64
933 .long sys_ni_syscall /* sys_vserver */
934 .long sys_mbind
935 .long sys_get_mempolicy
936 .long sys_set_mempolicy
937 .long sys_mq_open
938 .long sys_mq_unlink
939 .long sys_mq_timedsend
940 .long sys_mq_timedreceive /* 280 */
941 .long sys_mq_notify
942 .long sys_mq_getsetattr
943 .long sys_ni_syscall /* reserved for kexec */
944 .long sys_waitid
945 .long sys_ni_syscall /* 285 */ /* available */
946 .long sys_add_key
947 .long sys_request_key
948 .long sys_keyctl
949
950syscall_table_size=(.-sys_call_table)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
new file mode 100644
index 000000000000..d273fd746192
--- /dev/null
+++ b/arch/i386/kernel/head.S
@@ -0,0 +1,521 @@
1/*
2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code.
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * Enhanced CPU detection and feature setting code by Mike Jagdis
7 * and Martin Mares, November 1997.
8 */
9
10.text
11#include <linux/config.h>
12#include <linux/threads.h>
13#include <linux/linkage.h>
14#include <asm/segment.h>
15#include <asm/page.h>
16#include <asm/pgtable.h>
17#include <asm/desc.h>
18#include <asm/cache.h>
19#include <asm/thread_info.h>
20#include <asm/asm_offsets.h>
21#include <asm/setup.h>
22
23/*
24 * References to members of the new_cpu_data structure.
25 */
26
27#define X86 new_cpu_data+CPUINFO_x86
28#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
29#define X86_MODEL new_cpu_data+CPUINFO_x86_model
30#define X86_MASK new_cpu_data+CPUINFO_x86_mask
31#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
32#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
33#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
34#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
35
36/*
37 * This is how much memory *in addition to the memory covered up to
38 * and including _end* we need mapped initially. We need one bit for
39 * each possible page, but only in low memory, which means
40 * 2^32/4096/8 = 128K worst case (4G/4G split.)
41 *
42 * Modulo rounding, each megabyte assigned here requires a kilobyte of
43 * memory, which is currently unreclaimed.
44 *
45 * This should be a multiple of a page.
46 */
47#define INIT_MAP_BEYOND_END (128*1024)
48
49
50/*
51 * 32-bit kernel entrypoint; only used by the boot CPU. On entry,
52 * %esi points to the real-mode code as a 32-bit pointer.
53 * CS and DS must be 4 GB flat segments, but we don't depend on
54 * any particular GDT layout, because we load our own as soon as we
55 * can.
56 */
57ENTRY(startup_32)
58
59/*
60 * Set segments to known values.
61 */
62 cld
63 lgdt boot_gdt_descr - __PAGE_OFFSET
64 movl $(__BOOT_DS),%eax
65 movl %eax,%ds
66 movl %eax,%es
67 movl %eax,%fs
68 movl %eax,%gs
69
70/*
71 * Clear BSS first so that there are no surprises...
72 * No need to cld as DF is already clear from cld above...
73 */
74 xorl %eax,%eax
75 movl $__bss_start - __PAGE_OFFSET,%edi
76 movl $__bss_stop - __PAGE_OFFSET,%ecx
77 subl %edi,%ecx
78 shrl $2,%ecx
79 rep ; stosl
80
81/*
82 * Initialize page tables. This creates a PDE and a set of page
83 * tables, which are located immediately beyond _end. The variable
84 * init_pg_tables_end is set up to point to the first "safe" location.
85 * Mappings are created both at virtual address 0 (identity mapping)
86 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
87 *
88 * Warning: don't use %esi or the stack in this code. However, %esp
89 * can be used as a GPR if you really need it...
90 */
91page_pde_offset = (__PAGE_OFFSET >> 20);
92
93 movl $(pg0 - __PAGE_OFFSET), %edi
94 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
95 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
9610:
97 leal 0x007(%edi),%ecx /* Create PDE entry */
98 movl %ecx,(%edx) /* Store identity PDE entry */
99 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
100 addl $4,%edx
101 movl $1024, %ecx
10211:
103 stosl
104 addl $0x1000,%eax
105 loop 11b
106 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
107 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
108 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
109 cmpl %ebp,%eax
110 jb 10b
111 movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
112
113#ifdef CONFIG_SMP
114 xorl %ebx,%ebx /* This is the boot CPU (BSP) */
115 jmp 3f
116
117/*
118 * Non-boot CPU entry point; entered from trampoline.S
119 * We can't lgdt here, because lgdt itself uses a data segment, but
120 * we know the trampoline has already loaded the boot_gdt_table GDT
121 * for us.
122 */
123ENTRY(startup_32_smp)
124 cld
125 movl $(__BOOT_DS),%eax
126 movl %eax,%ds
127 movl %eax,%es
128 movl %eax,%fs
129 movl %eax,%gs
130
131/*
132 * New page tables may be in 4Mbyte page mode and may
133 * be using the global pages.
134 *
135 * NOTE! If we are on a 486 we may have no cr4 at all!
136 * So we do not try to touch it unless we really have
137 * some bits in it to set. This won't work if the BSP
138 * implements cr4 but this AP does not -- very unlikely
139 * but be warned! The same applies to the pse feature
140 * if not equally supported. --macro
141 *
142 * NOTE! We have to correct for the fact that we're
143 * not yet offset PAGE_OFFSET..
144 */
145#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
146 movl cr4_bits,%edx
147 andl %edx,%edx
148 jz 6f
149 movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
150 orl %edx,%eax
151 movl %eax,%cr4
152
153 btl $5, %eax # check if PAE is enabled
154 jnc 6f
155
156 /* Check if extended functions are implemented */
157 movl $0x80000000, %eax
158 cpuid
159 cmpl $0x80000000, %eax
160 jbe 6f
161 mov $0x80000001, %eax
162 cpuid
163 /* Execute Disable bit supported? */
164 btl $20, %edx
165 jnc 6f
166
167 /* Setup EFER (Extended Feature Enable Register) */
168 movl $0xc0000080, %ecx
169 rdmsr
170
171 btsl $11, %eax
172 /* Make changes effective */
173 wrmsr
174
1756:
176 /* This is a secondary processor (AP) */
177 xorl %ebx,%ebx
178 incl %ebx
179
1803:
181#endif /* CONFIG_SMP */
182
183/*
184 * Enable paging
185 */
186 movl $swapper_pg_dir-__PAGE_OFFSET,%eax
187 movl %eax,%cr3 /* set the page table pointer.. */
188 movl %cr0,%eax
189 orl $0x80000000,%eax
190 movl %eax,%cr0 /* ..and set paging (PG) bit */
191 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1921:
193 /* Set up the stack pointer */
194 lss stack_start,%esp
195
196/*
197 * Initialize eflags. Some BIOS's leave bits like NT set. This would
198 * confuse the debugger if this code is traced.
199 * XXX - best to initialize before switching to protected mode.
200 */
201 pushl $0
202 popfl
203
204#ifdef CONFIG_SMP
205 andl %ebx,%ebx
206 jz 1f /* Initial CPU cleans BSS */
207 jmp checkCPUtype
2081:
209#endif /* CONFIG_SMP */
210
211/*
212 * start system 32-bit setup. We need to re-do some of the things done
213 * in 16-bit mode for the "real" operations.
214 */
215 call setup_idt
216
217/*
218 * Copy bootup parameters out of the way.
219 * Note: %esi still has the pointer to the real-mode data.
220 */
221 movl $boot_params,%edi
222 movl $(PARAM_SIZE/4),%ecx
223 cld
224 rep
225 movsl
226 movl boot_params+NEW_CL_POINTER,%esi
227 andl %esi,%esi
228 jnz 2f # New command line protocol
229 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
230 jne 1f
231 movzwl OLD_CL_OFFSET,%esi
232 addl $(OLD_CL_BASE_ADDR),%esi
2332:
234 movl $saved_command_line,%edi
235 movl $(COMMAND_LINE_SIZE/4),%ecx
236 rep
237 movsl
2381:
239checkCPUtype:
240
241 movl $-1,X86_CPUID # -1 for no CPUID initially
242
243/* check if it is 486 or 386. */
244/*
245 * XXX - this does a lot of unnecessary setup. Alignment checks don't
246 * apply at our cpl of 0 and the stack ought to be aligned already, and
247 * we don't need to preserve eflags.
248 */
249
250 movb $3,X86 # at least 386
251 pushfl # push EFLAGS
252 popl %eax # get EFLAGS
253 movl %eax,%ecx # save original EFLAGS
254 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
255 pushl %eax # copy to EFLAGS
256 popfl # set EFLAGS
257 pushfl # get new EFLAGS
258 popl %eax # put it in eax
259 xorl %ecx,%eax # change in flags
260 pushl %ecx # restore original EFLAGS
261 popfl
262 testl $0x40000,%eax # check if AC bit changed
263 je is386
264
265 movb $4,X86 # at least 486
266 testl $0x200000,%eax # check if ID bit changed
267 je is486
268
269 /* get vendor info */
270 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
271 cpuid
272 movl %eax,X86_CPUID # save CPUID level
273 movl %ebx,X86_VENDOR_ID # lo 4 chars
274 movl %edx,X86_VENDOR_ID+4 # next 4 chars
275 movl %ecx,X86_VENDOR_ID+8 # last 4 chars
276
277 orl %eax,%eax # do we have processor info as well?
278 je is486
279
280 movl $1,%eax # Use the CPUID instruction to get CPU type
281 cpuid
282 movb %al,%cl # save reg for future use
283 andb $0x0f,%ah # mask processor family
284 movb %ah,X86
285 andb $0xf0,%al # mask model
286 shrb $4,%al
287 movb %al,X86_MODEL
288 andb $0x0f,%cl # mask mask revision
289 movb %cl,X86_MASK
290 movl %edx,X86_CAPABILITY
291
292is486: movl $0x50022,%ecx # set AM, WP, NE and MP
293 jmp 2f
294
295is386: movl $2,%ecx # set MP
2962: movl %cr0,%eax
297 andl $0x80000011,%eax # Save PG,PE,ET
298 orl %ecx,%eax
299 movl %eax,%cr0
300
301 call check_x87
302 incb ready
303 lgdt cpu_gdt_descr
304 lidt idt_descr
305 ljmp $(__KERNEL_CS),$1f
3061: movl $(__KERNEL_DS),%eax # reload all the segment registers
307 movl %eax,%ss # after changing gdt.
308
309 movl $(__USER_DS),%eax # DS/ES contains default USER segment
310 movl %eax,%ds
311 movl %eax,%es
312
313 xorl %eax,%eax # Clear FS/GS and LDT
314 movl %eax,%fs
315 movl %eax,%gs
316 lldt %ax
317 cld # gcc2 wants the direction flag cleared at all times
318#ifdef CONFIG_SMP
319 movb ready, %cl
320 cmpb $1,%cl
321 je 1f # the first CPU calls start_kernel
322 # all other CPUs call initialize_secondary
323 call initialize_secondary
324 jmp L6
3251:
326#endif /* CONFIG_SMP */
327 call start_kernel
328L6:
329 jmp L6 # main should never return here, but
330 # just in case, we know what happens.
331
332/*
333 * We depend on ET to be correct. This checks for 287/387.
334 */
335check_x87:
336 movb $0,X86_HARD_MATH
337 clts
338 fninit
339 fstsw %ax
340 cmpb $0,%al
341 je 1f
342 movl %cr0,%eax /* no coprocessor: have to set bits */
343 xorl $4,%eax /* set EM */
344 movl %eax,%cr0
345 ret
346 ALIGN
3471: movb $1,X86_HARD_MATH
348 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
349 ret
350
351/*
352 * setup_idt
353 *
354 * sets up a idt with 256 entries pointing to
355 * ignore_int, interrupt gates. It doesn't actually load
356 * idt - that can be done only after paging has been enabled
357 * and the kernel moved to PAGE_OFFSET. Interrupts
358 * are enabled elsewhere, when we can be relatively
359 * sure everything is ok.
360 *
361 * Warning: %esi is live across this function.
362 */
363setup_idt:
364 lea ignore_int,%edx
365 movl $(__KERNEL_CS << 16),%eax
366 movw %dx,%ax /* selector = 0x0010 = cs */
367 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
368
369 lea idt_table,%edi
370 mov $256,%ecx
371rp_sidt:
372 movl %eax,(%edi)
373 movl %edx,4(%edi)
374 addl $8,%edi
375 dec %ecx
376 jne rp_sidt
377 ret
378
379/* This is the default interrupt "handler" :-) */
380 ALIGN
381ignore_int:
382 cld
383 pushl %eax
384 pushl %ecx
385 pushl %edx
386 pushl %es
387 pushl %ds
388 movl $(__KERNEL_DS),%eax
389 movl %eax,%ds
390 movl %eax,%es
391 pushl 16(%esp)
392 pushl 24(%esp)
393 pushl 32(%esp)
394 pushl 40(%esp)
395 pushl $int_msg
396 call printk
397 addl $(5*4),%esp
398 popl %ds
399 popl %es
400 popl %edx
401 popl %ecx
402 popl %eax
403 iret
404
405/*
406 * Real beginning of normal "text" segment
407 */
408ENTRY(stext)
409ENTRY(_stext)
410
411/*
412 * BSS section
413 */
414.section ".bss.page_aligned","w"
415ENTRY(swapper_pg_dir)
416 .fill 1024,4,0
417ENTRY(empty_zero_page)
418 .fill 4096,1,0
419
420/*
421 * This starts the data section.
422 */
423.data
424
425ENTRY(stack_start)
426 .long init_thread_union+THREAD_SIZE
427 .long __BOOT_DS
428
429ready: .byte 0
430
431int_msg:
432 .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
433
434/*
435 * The IDT and GDT 'descriptors' are a strange 48-bit object
436 * only used by the lidt and lgdt instructions. They are not
437 * like usual segment descriptors - they consist of a 16-bit
438 * segment size, and 32-bit linear address value:
439 */
440
441.globl boot_gdt_descr
442.globl idt_descr
443.globl cpu_gdt_descr
444
445 ALIGN
446# early boot GDT descriptor (must use 1:1 address mapping)
447 .word 0 # 32 bit align gdt_desc.address
448boot_gdt_descr:
449 .word __BOOT_DS+7
450 .long boot_gdt_table - __PAGE_OFFSET
451
452 .word 0 # 32-bit align idt_desc.address
453idt_descr:
454 .word IDT_ENTRIES*8-1 # idt contains 256 entries
455 .long idt_table
456
457# boot GDT descriptor (later on used by CPU#0):
458 .word 0 # 32 bit align gdt_desc.address
459cpu_gdt_descr:
460 .word GDT_ENTRIES*8-1
461 .long cpu_gdt_table
462
463 .fill NR_CPUS-1,8,0 # space for the other GDT descriptors
464
465/*
466 * The boot_gdt_table must mirror the equivalent in setup.S and is
467 * used only for booting.
468 */
469 .align L1_CACHE_BYTES
470ENTRY(boot_gdt_table)
471 .fill GDT_ENTRY_BOOT_CS,8,0
472 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
473 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
474
475/*
476 * The Global Descriptor Table contains 28 quadwords, per-CPU.
477 */
478 .align PAGE_SIZE_asm
479ENTRY(cpu_gdt_table)
480 .quad 0x0000000000000000 /* NULL descriptor */
481 .quad 0x0000000000000000 /* 0x0b reserved */
482 .quad 0x0000000000000000 /* 0x13 reserved */
483 .quad 0x0000000000000000 /* 0x1b reserved */
484 .quad 0x0000000000000000 /* 0x20 unused */
485 .quad 0x0000000000000000 /* 0x28 unused */
486 .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
487 .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
488 .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
489 .quad 0x0000000000000000 /* 0x4b reserved */
490 .quad 0x0000000000000000 /* 0x53 reserved */
491 .quad 0x0000000000000000 /* 0x5b reserved */
492
493 .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
494 .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
495 .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
496 .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
497
498 .quad 0x0000000000000000 /* 0x80 TSS descriptor */
499 .quad 0x0000000000000000 /* 0x88 LDT descriptor */
500
501 /* Segments used for calling PnP BIOS */
502 .quad 0x00c09a0000000000 /* 0x90 32-bit code */
503 .quad 0x00809a0000000000 /* 0x98 16-bit code */
504 .quad 0x0080920000000000 /* 0xa0 16-bit data */
505 .quad 0x0080920000000000 /* 0xa8 16-bit data */
506 .quad 0x0080920000000000 /* 0xb0 16-bit data */
507 /*
508 * The APM segments have byte granularity and their bases
509 * and limits are set at run time.
510 */
511 .quad 0x00409a0000000000 /* 0xb8 APM CS code */
512 .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */
513 .quad 0x0040920000000000 /* 0xc8 APM DS data */
514
515 .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */
516 .quad 0x0000000000000000 /* 0xd8 - unused */
517 .quad 0x0000000000000000 /* 0xe0 - unused */
518 .quad 0x0000000000000000 /* 0xe8 - unused */
519 .quad 0x0000000000000000 /* 0xf0 - unused */
520 .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
521
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
new file mode 100644
index 000000000000..14ec354bec92
--- /dev/null
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -0,0 +1,195 @@
1#include <linux/config.h>
2#include <linux/module.h>
3#include <linux/smp.h>
4#include <linux/user.h>
5#include <linux/elfcore.h>
6#include <linux/mca.h>
7#include <linux/sched.h>
8#include <linux/in6.h>
9#include <linux/interrupt.h>
10#include <linux/smp_lock.h>
11#include <linux/pm.h>
12#include <linux/pci.h>
13#include <linux/apm_bios.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/tty.h>
17#include <linux/highmem.h>
18#include <linux/time.h>
19
20#include <asm/semaphore.h>
21#include <asm/processor.h>
22#include <asm/i387.h>
23#include <asm/uaccess.h>
24#include <asm/checksum.h>
25#include <asm/io.h>
26#include <asm/delay.h>
27#include <asm/irq.h>
28#include <asm/mmx.h>
29#include <asm/desc.h>
30#include <asm/pgtable.h>
31#include <asm/tlbflush.h>
32#include <asm/nmi.h>
33#include <asm/ist.h>
34#include <asm/kdebug.h>
35
36extern void dump_thread(struct pt_regs *, struct user *);
37extern spinlock_t rtc_lock;
38
39/* This is definitely a GPL-only symbol */
40EXPORT_SYMBOL_GPL(cpu_gdt_table);
41
42#if defined(CONFIG_APM_MODULE)
43extern void machine_real_restart(unsigned char *, int);
44EXPORT_SYMBOL(machine_real_restart);
45extern void default_idle(void);
46EXPORT_SYMBOL(default_idle);
47#endif
48
49#ifdef CONFIG_SMP
50extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
51extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
52#endif
53
54#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
55extern struct drive_info_struct drive_info;
56EXPORT_SYMBOL(drive_info);
57#endif
58
59extern unsigned long cpu_khz;
60extern unsigned long get_cmos_time(void);
61
62/* platform dependent support */
63EXPORT_SYMBOL(boot_cpu_data);
64#ifdef CONFIG_DISCONTIGMEM
65EXPORT_SYMBOL(node_data);
66EXPORT_SYMBOL(physnode_map);
67#endif
68#ifdef CONFIG_X86_NUMAQ
69EXPORT_SYMBOL(xquad_portio);
70#endif
71EXPORT_SYMBOL(dump_thread);
72EXPORT_SYMBOL(dump_fpu);
73EXPORT_SYMBOL_GPL(kernel_fpu_begin);
74EXPORT_SYMBOL(__ioremap);
75EXPORT_SYMBOL(ioremap_nocache);
76EXPORT_SYMBOL(iounmap);
77EXPORT_SYMBOL(kernel_thread);
78EXPORT_SYMBOL(pm_idle);
79EXPORT_SYMBOL(pm_power_off);
80EXPORT_SYMBOL(get_cmos_time);
81EXPORT_SYMBOL(cpu_khz);
82EXPORT_SYMBOL(apm_info);
83
84EXPORT_SYMBOL(__down_failed);
85EXPORT_SYMBOL(__down_failed_interruptible);
86EXPORT_SYMBOL(__down_failed_trylock);
87EXPORT_SYMBOL(__up_wakeup);
88/* Networking helper routines. */
89EXPORT_SYMBOL(csum_partial_copy_generic);
90/* Delay loops */
91EXPORT_SYMBOL(__ndelay);
92EXPORT_SYMBOL(__udelay);
93EXPORT_SYMBOL(__delay);
94EXPORT_SYMBOL(__const_udelay);
95
96EXPORT_SYMBOL(__get_user_1);
97EXPORT_SYMBOL(__get_user_2);
98EXPORT_SYMBOL(__get_user_4);
99
100EXPORT_SYMBOL(__put_user_1);
101EXPORT_SYMBOL(__put_user_2);
102EXPORT_SYMBOL(__put_user_4);
103EXPORT_SYMBOL(__put_user_8);
104
105EXPORT_SYMBOL(strpbrk);
106EXPORT_SYMBOL(strstr);
107
108EXPORT_SYMBOL(strncpy_from_user);
109EXPORT_SYMBOL(__strncpy_from_user);
110EXPORT_SYMBOL(clear_user);
111EXPORT_SYMBOL(__clear_user);
112EXPORT_SYMBOL(__copy_from_user_ll);
113EXPORT_SYMBOL(__copy_to_user_ll);
114EXPORT_SYMBOL(strnlen_user);
115
116EXPORT_SYMBOL(dma_alloc_coherent);
117EXPORT_SYMBOL(dma_free_coherent);
118
119#ifdef CONFIG_PCI
120EXPORT_SYMBOL(pci_mem_start);
121#endif
122
123#ifdef CONFIG_PCI_BIOS
124EXPORT_SYMBOL(pcibios_set_irq_routing);
125EXPORT_SYMBOL(pcibios_get_irq_routing_table);
126#endif
127
128#ifdef CONFIG_X86_USE_3DNOW
129EXPORT_SYMBOL(_mmx_memcpy);
130EXPORT_SYMBOL(mmx_clear_page);
131EXPORT_SYMBOL(mmx_copy_page);
132#endif
133
134#ifdef CONFIG_X86_HT
135EXPORT_SYMBOL(smp_num_siblings);
136EXPORT_SYMBOL(cpu_sibling_map);
137#endif
138
139#ifdef CONFIG_SMP
140EXPORT_SYMBOL(cpu_data);
141EXPORT_SYMBOL(cpu_online_map);
142EXPORT_SYMBOL(cpu_callout_map);
143EXPORT_SYMBOL(__write_lock_failed);
144EXPORT_SYMBOL(__read_lock_failed);
145
146/* Global SMP stuff */
147EXPORT_SYMBOL(smp_call_function);
148
149/* TLB flushing */
150EXPORT_SYMBOL(flush_tlb_page);
151#endif
152
153#ifdef CONFIG_X86_IO_APIC
154EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
155#endif
156
157#ifdef CONFIG_MCA
158EXPORT_SYMBOL(machine_id);
159#endif
160
161#ifdef CONFIG_VT
162EXPORT_SYMBOL(screen_info);
163#endif
164
165EXPORT_SYMBOL(get_wchan);
166
167EXPORT_SYMBOL(rtc_lock);
168
169EXPORT_SYMBOL_GPL(set_nmi_callback);
170EXPORT_SYMBOL_GPL(unset_nmi_callback);
171
172#undef memcmp
173extern int memcmp(const void *,const void *,__kernel_size_t);
174EXPORT_SYMBOL(memcmp);
175
176EXPORT_SYMBOL(register_die_notifier);
177#ifdef CONFIG_HAVE_DEC_LOCK
178EXPORT_SYMBOL(_atomic_dec_and_lock);
179#endif
180
181EXPORT_SYMBOL(__PAGE_KERNEL);
182
183#ifdef CONFIG_HIGHMEM
184EXPORT_SYMBOL(kmap);
185EXPORT_SYMBOL(kunmap);
186EXPORT_SYMBOL(kmap_atomic);
187EXPORT_SYMBOL(kunmap_atomic);
188EXPORT_SYMBOL(kmap_atomic_to_page);
189#endif
190
191#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
192EXPORT_SYMBOL(ist_info);
193#endif
194
195EXPORT_SYMBOL(csum_partial);
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
new file mode 100644
index 000000000000..c55e037f08f7
--- /dev/null
+++ b/arch/i386/kernel/i387.c
@@ -0,0 +1,555 @@
1/*
2 * linux/arch/i386/kernel/i387.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * General FPU state handling cleanups
8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 */
10
11#include <linux/config.h>
12#include <linux/sched.h>
13#include <asm/processor.h>
14#include <asm/i387.h>
15#include <asm/math_emu.h>
16#include <asm/sigcontext.h>
17#include <asm/user.h>
18#include <asm/ptrace.h>
19#include <asm/uaccess.h>
20
21#ifdef CONFIG_MATH_EMULATION
22#define HAVE_HWFP (boot_cpu_data.hard_math)
23#else
24#define HAVE_HWFP 1
25#endif
26
27static unsigned long mxcsr_feature_mask = 0xffffffff;
28
29void mxcsr_feature_mask_init(void)
30{
31 unsigned long mask = 0;
32 clts();
33 if (cpu_has_fxsr) {
34 memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
35 asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
36 mask = current->thread.i387.fxsave.mxcsr_mask;
37 if (mask == 0) mask = 0x0000ffbf;
38 }
39 mxcsr_feature_mask &= mask;
40 stts();
41}
42
43/*
44 * The _current_ task is using the FPU for the first time
45 * so initialize it and set the mxcsr to its default
46 * value at reset if we support XMM instructions and then
47 * remeber the current task has used the FPU.
48 */
49void init_fpu(struct task_struct *tsk)
50{
51 if (cpu_has_fxsr) {
52 memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
53 tsk->thread.i387.fxsave.cwd = 0x37f;
54 if (cpu_has_xmm)
55 tsk->thread.i387.fxsave.mxcsr = 0x1f80;
56 } else {
57 memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct));
58 tsk->thread.i387.fsave.cwd = 0xffff037fu;
59 tsk->thread.i387.fsave.swd = 0xffff0000u;
60 tsk->thread.i387.fsave.twd = 0xffffffffu;
61 tsk->thread.i387.fsave.fos = 0xffff0000u;
62 }
63 /* only the device not available exception or ptrace can call init_fpu */
64 set_stopped_child_used_math(tsk);
65}
66
67/*
68 * FPU lazy state save handling.
69 */
70
71void kernel_fpu_begin(void)
72{
73 struct thread_info *thread = current_thread_info();
74
75 preempt_disable();
76 if (thread->status & TS_USEDFPU) {
77 __save_init_fpu(thread->task);
78 return;
79 }
80 clts();
81}
82
83void restore_fpu( struct task_struct *tsk )
84{
85 if ( cpu_has_fxsr ) {
86 asm volatile( "fxrstor %0"
87 : : "m" (tsk->thread.i387.fxsave) );
88 } else {
89 asm volatile( "frstor %0"
90 : : "m" (tsk->thread.i387.fsave) );
91 }
92}
93
94/*
95 * FPU tag word conversions.
96 */
97
98static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
99{
100 unsigned int tmp; /* to avoid 16 bit prefixes in the code */
101
102 /* Transform each pair of bits into 01 (valid) or 00 (empty) */
103 tmp = ~twd;
104 tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
105 /* and move the valid bits to the lower byte. */
106 tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
107 tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
108 tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
109 return tmp;
110}
111
112static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
113{
114 struct _fpxreg *st = NULL;
115 unsigned long tos = (fxsave->swd >> 11) & 7;
116 unsigned long twd = (unsigned long) fxsave->twd;
117 unsigned long tag;
118 unsigned long ret = 0xffff0000u;
119 int i;
120
121#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
122
123 for ( i = 0 ; i < 8 ; i++ ) {
124 if ( twd & 0x1 ) {
125 st = FPREG_ADDR( fxsave, (i - tos) & 7 );
126
127 switch ( st->exponent & 0x7fff ) {
128 case 0x7fff:
129 tag = 2; /* Special */
130 break;
131 case 0x0000:
132 if ( !st->significand[0] &&
133 !st->significand[1] &&
134 !st->significand[2] &&
135 !st->significand[3] ) {
136 tag = 1; /* Zero */
137 } else {
138 tag = 2; /* Special */
139 }
140 break;
141 default:
142 if ( st->significand[3] & 0x8000 ) {
143 tag = 0; /* Valid */
144 } else {
145 tag = 2; /* Special */
146 }
147 break;
148 }
149 } else {
150 tag = 3; /* Empty */
151 }
152 ret |= (tag << (2 * i));
153 twd = twd >> 1;
154 }
155 return ret;
156}
157
158/*
159 * FPU state interaction.
160 */
161
162unsigned short get_fpu_cwd( struct task_struct *tsk )
163{
164 if ( cpu_has_fxsr ) {
165 return tsk->thread.i387.fxsave.cwd;
166 } else {
167 return (unsigned short)tsk->thread.i387.fsave.cwd;
168 }
169}
170
171unsigned short get_fpu_swd( struct task_struct *tsk )
172{
173 if ( cpu_has_fxsr ) {
174 return tsk->thread.i387.fxsave.swd;
175 } else {
176 return (unsigned short)tsk->thread.i387.fsave.swd;
177 }
178}
179
180#if 0
181unsigned short get_fpu_twd( struct task_struct *tsk )
182{
183 if ( cpu_has_fxsr ) {
184 return tsk->thread.i387.fxsave.twd;
185 } else {
186 return (unsigned short)tsk->thread.i387.fsave.twd;
187 }
188}
189#endif /* 0 */
190
191unsigned short get_fpu_mxcsr( struct task_struct *tsk )
192{
193 if ( cpu_has_xmm ) {
194 return tsk->thread.i387.fxsave.mxcsr;
195 } else {
196 return 0x1f80;
197 }
198}
199
200#if 0
201
202void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
203{
204 if ( cpu_has_fxsr ) {
205 tsk->thread.i387.fxsave.cwd = cwd;
206 } else {
207 tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u);
208 }
209}
210
211void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
212{
213 if ( cpu_has_fxsr ) {
214 tsk->thread.i387.fxsave.swd = swd;
215 } else {
216 tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u);
217 }
218}
219
220void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
221{
222 if ( cpu_has_fxsr ) {
223 tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
224 } else {
225 tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u);
226 }
227}
228
229#endif /* 0 */
230
231/*
232 * FXSR floating point environment conversions.
233 */
234
235static int convert_fxsr_to_user( struct _fpstate __user *buf,
236 struct i387_fxsave_struct *fxsave )
237{
238 unsigned long env[7];
239 struct _fpreg __user *to;
240 struct _fpxreg *from;
241 int i;
242
243 env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
244 env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
245 env[2] = twd_fxsr_to_i387(fxsave);
246 env[3] = fxsave->fip;
247 env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
248 env[5] = fxsave->foo;
249 env[6] = fxsave->fos;
250
251 if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
252 return 1;
253
254 to = &buf->_st[0];
255 from = (struct _fpxreg *) &fxsave->st_space[0];
256 for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
257 unsigned long __user *t = (unsigned long __user *)to;
258 unsigned long *f = (unsigned long *)from;
259
260 if (__put_user(*f, t) ||
261 __put_user(*(f + 1), t + 1) ||
262 __put_user(from->exponent, &to->exponent))
263 return 1;
264 }
265 return 0;
266}
267
268static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
269 struct _fpstate __user *buf )
270{
271 unsigned long env[7];
272 struct _fpxreg *to;
273 struct _fpreg __user *from;
274 int i;
275
276 if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
277 return 1;
278
279 fxsave->cwd = (unsigned short)(env[0] & 0xffff);
280 fxsave->swd = (unsigned short)(env[1] & 0xffff);
281 fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
282 fxsave->fip = env[3];
283 fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
284 fxsave->fcs = (env[4] & 0xffff);
285 fxsave->foo = env[5];
286 fxsave->fos = env[6];
287
288 to = (struct _fpxreg *) &fxsave->st_space[0];
289 from = &buf->_st[0];
290 for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
291 unsigned long *t = (unsigned long *)to;
292 unsigned long __user *f = (unsigned long __user *)from;
293
294 if (__get_user(*t, f) ||
295 __get_user(*(t + 1), f + 1) ||
296 __get_user(to->exponent, &from->exponent))
297 return 1;
298 }
299 return 0;
300}
301
302/*
303 * Signal frame handlers.
304 */
305
306static inline int save_i387_fsave( struct _fpstate __user *buf )
307{
308 struct task_struct *tsk = current;
309
310 unlazy_fpu( tsk );
311 tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
312 if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
313 sizeof(struct i387_fsave_struct) ) )
314 return -1;
315 return 1;
316}
317
318static int save_i387_fxsave( struct _fpstate __user *buf )
319{
320 struct task_struct *tsk = current;
321 int err = 0;
322
323 unlazy_fpu( tsk );
324
325 if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
326 return -1;
327
328 err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
329 err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
330 if ( err )
331 return -1;
332
333 if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
334 sizeof(struct i387_fxsave_struct) ) )
335 return -1;
336 return 1;
337}
338
339int save_i387( struct _fpstate __user *buf )
340{
341 if ( !used_math() )
342 return 0;
343
344 /* This will cause a "finit" to be triggered by the next
345 * attempted FPU operation by the 'current' process.
346 */
347 clear_used_math();
348
349 if ( HAVE_HWFP ) {
350 if ( cpu_has_fxsr ) {
351 return save_i387_fxsave( buf );
352 } else {
353 return save_i387_fsave( buf );
354 }
355 } else {
356 return save_i387_soft( &current->thread.i387.soft, buf );
357 }
358}
359
360static inline int restore_i387_fsave( struct _fpstate __user *buf )
361{
362 struct task_struct *tsk = current;
363 clear_fpu( tsk );
364 return __copy_from_user( &tsk->thread.i387.fsave, buf,
365 sizeof(struct i387_fsave_struct) );
366}
367
368static int restore_i387_fxsave( struct _fpstate __user *buf )
369{
370 int err;
371 struct task_struct *tsk = current;
372 clear_fpu( tsk );
373 err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
374 sizeof(struct i387_fxsave_struct) );
375 /* mxcsr reserved bits must be masked to zero for security reasons */
376 tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
377 return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
378}
379
380int restore_i387( struct _fpstate __user *buf )
381{
382 int err;
383
384 if ( HAVE_HWFP ) {
385 if ( cpu_has_fxsr ) {
386 err = restore_i387_fxsave( buf );
387 } else {
388 err = restore_i387_fsave( buf );
389 }
390 } else {
391 err = restore_i387_soft( &current->thread.i387.soft, buf );
392 }
393 set_used_math();
394 return err;
395}
396
397/*
398 * ptrace request handlers.
399 */
400
401static inline int get_fpregs_fsave( struct user_i387_struct __user *buf,
402 struct task_struct *tsk )
403{
404 return __copy_to_user( buf, &tsk->thread.i387.fsave,
405 sizeof(struct user_i387_struct) );
406}
407
408static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf,
409 struct task_struct *tsk )
410{
411 return convert_fxsr_to_user( (struct _fpstate __user *)buf,
412 &tsk->thread.i387.fxsave );
413}
414
415int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk )
416{
417 if ( HAVE_HWFP ) {
418 if ( cpu_has_fxsr ) {
419 return get_fpregs_fxsave( buf, tsk );
420 } else {
421 return get_fpregs_fsave( buf, tsk );
422 }
423 } else {
424 return save_i387_soft( &tsk->thread.i387.soft,
425 (struct _fpstate __user *)buf );
426 }
427}
428
429static inline int set_fpregs_fsave( struct task_struct *tsk,
430 struct user_i387_struct __user *buf )
431{
432 return __copy_from_user( &tsk->thread.i387.fsave, buf,
433 sizeof(struct user_i387_struct) );
434}
435
436static inline int set_fpregs_fxsave( struct task_struct *tsk,
437 struct user_i387_struct __user *buf )
438{
439 return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
440 (struct _fpstate __user *)buf );
441}
442
443int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf )
444{
445 if ( HAVE_HWFP ) {
446 if ( cpu_has_fxsr ) {
447 return set_fpregs_fxsave( tsk, buf );
448 } else {
449 return set_fpregs_fsave( tsk, buf );
450 }
451 } else {
452 return restore_i387_soft( &tsk->thread.i387.soft,
453 (struct _fpstate __user *)buf );
454 }
455}
456
457int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk )
458{
459 if ( cpu_has_fxsr ) {
460 if (__copy_to_user( buf, &tsk->thread.i387.fxsave,
461 sizeof(struct user_fxsr_struct) ))
462 return -EFAULT;
463 return 0;
464 } else {
465 return -EIO;
466 }
467}
468
469int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf )
470{
471 int ret = 0;
472
473 if ( cpu_has_fxsr ) {
474 if (__copy_from_user( &tsk->thread.i387.fxsave, buf,
475 sizeof(struct user_fxsr_struct) ))
476 ret = -EFAULT;
477 /* mxcsr reserved bits must be masked to zero for security reasons */
478 tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
479 } else {
480 ret = -EIO;
481 }
482 return ret;
483}
484
485/*
486 * FPU state for core dumps.
487 */
488
489static inline void copy_fpu_fsave( struct task_struct *tsk,
490 struct user_i387_struct *fpu )
491{
492 memcpy( fpu, &tsk->thread.i387.fsave,
493 sizeof(struct user_i387_struct) );
494}
495
496static inline void copy_fpu_fxsave( struct task_struct *tsk,
497 struct user_i387_struct *fpu )
498{
499 unsigned short *to;
500 unsigned short *from;
501 int i;
502
503 memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
504
505 to = (unsigned short *)&fpu->st_space[0];
506 from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
507 for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
508 memcpy( to, from, 5 * sizeof(unsigned short) );
509 }
510}
511
512int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
513{
514 int fpvalid;
515 struct task_struct *tsk = current;
516
517 fpvalid = !!used_math();
518 if ( fpvalid ) {
519 unlazy_fpu( tsk );
520 if ( cpu_has_fxsr ) {
521 copy_fpu_fxsave( tsk, fpu );
522 } else {
523 copy_fpu_fsave( tsk, fpu );
524 }
525 }
526
527 return fpvalid;
528}
529
530int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
531{
532 int fpvalid = !!tsk_used_math(tsk);
533
534 if (fpvalid) {
535 if (tsk == current)
536 unlazy_fpu(tsk);
537 if (cpu_has_fxsr)
538 copy_fpu_fxsave(tsk, fpu);
539 else
540 copy_fpu_fsave(tsk, fpu);
541 }
542 return fpvalid;
543}
544
545int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu)
546{
547 int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
548
549 if (fpvalid) {
550 if (tsk == current)
551 unlazy_fpu(tsk);
552 memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu));
553 }
554 return fpvalid;
555}
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
new file mode 100644
index 000000000000..560bef1afb3b
--- /dev/null
+++ b/arch/i386/kernel/i8259.c
@@ -0,0 +1,429 @@
1#include <linux/config.h>
2#include <linux/errno.h>
3#include <linux/signal.h>
4#include <linux/sched.h>
5#include <linux/ioport.h>
6#include <linux/interrupt.h>
7#include <linux/slab.h>
8#include <linux/random.h>
9#include <linux/smp_lock.h>
10#include <linux/init.h>
11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h>
13#include <linux/bitops.h>
14
15#include <asm/8253pit.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/irq.h>
20#include <asm/timer.h>
21#include <asm/pgtable.h>
22#include <asm/delay.h>
23#include <asm/desc.h>
24#include <asm/apic.h>
25#include <asm/arch_hooks.h>
26#include <asm/i8259.h>
27
28#include <linux/irq.h>
29
30#include <io_ports.h>
31
32/*
33 * This is the 'legacy' 8259A Programmable Interrupt Controller,
34 * present in the majority of PC/AT boxes.
35 * plus some generic x86 specific things if generic specifics makes
36 * any sense at all.
37 * this file should become arch/i386/kernel/irq.c when the old irq.c
38 * moves to arch independent land
39 */
40
41DEFINE_SPINLOCK(i8259A_lock);
42
43static void end_8259A_irq (unsigned int irq)
44{
45 if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
46 irq_desc[irq].action)
47 enable_8259A_irq(irq);
48}
49
50#define shutdown_8259A_irq disable_8259A_irq
51
52static void mask_and_ack_8259A(unsigned int);
53
54unsigned int startup_8259A_irq(unsigned int irq)
55{
56 enable_8259A_irq(irq);
57 return 0; /* never anything pending */
58}
59
60static struct hw_interrupt_type i8259A_irq_type = {
61 .typename = "XT-PIC",
62 .startup = startup_8259A_irq,
63 .shutdown = shutdown_8259A_irq,
64 .enable = enable_8259A_irq,
65 .disable = disable_8259A_irq,
66 .ack = mask_and_ack_8259A,
67 .end = end_8259A_irq,
68};
69
70/*
71 * 8259A PIC functions to handle ISA devices:
72 */
73
74/*
75 * This contains the irq mask for both 8259A irq controllers,
76 */
77unsigned int cached_irq_mask = 0xffff;
78
79/*
80 * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
81 * boards the timer interrupt is not really connected to any IO-APIC pin,
82 * it's fed to the master 8259A's IR0 line only.
83 *
84 * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
85 * this 'mixed mode' IRQ handling costs nothing because it's only used
86 * at IRQ setup time.
87 */
88unsigned long io_apic_irqs;
89
90void disable_8259A_irq(unsigned int irq)
91{
92 unsigned int mask = 1 << irq;
93 unsigned long flags;
94
95 spin_lock_irqsave(&i8259A_lock, flags);
96 cached_irq_mask |= mask;
97 if (irq & 8)
98 outb(cached_slave_mask, PIC_SLAVE_IMR);
99 else
100 outb(cached_master_mask, PIC_MASTER_IMR);
101 spin_unlock_irqrestore(&i8259A_lock, flags);
102}
103
104void enable_8259A_irq(unsigned int irq)
105{
106 unsigned int mask = ~(1 << irq);
107 unsigned long flags;
108
109 spin_lock_irqsave(&i8259A_lock, flags);
110 cached_irq_mask &= mask;
111 if (irq & 8)
112 outb(cached_slave_mask, PIC_SLAVE_IMR);
113 else
114 outb(cached_master_mask, PIC_MASTER_IMR);
115 spin_unlock_irqrestore(&i8259A_lock, flags);
116}
117
118int i8259A_irq_pending(unsigned int irq)
119{
120 unsigned int mask = 1<<irq;
121 unsigned long flags;
122 int ret;
123
124 spin_lock_irqsave(&i8259A_lock, flags);
125 if (irq < 8)
126 ret = inb(PIC_MASTER_CMD) & mask;
127 else
128 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
129 spin_unlock_irqrestore(&i8259A_lock, flags);
130
131 return ret;
132}
133
134void make_8259A_irq(unsigned int irq)
135{
136 disable_irq_nosync(irq);
137 io_apic_irqs &= ~(1<<irq);
138 irq_desc[irq].handler = &i8259A_irq_type;
139 enable_irq(irq);
140}
141
142/*
143 * This function assumes to be called rarely. Switching between
144 * 8259A registers is slow.
145 * This has to be protected by the irq controller spinlock
146 * before being called.
147 */
148static inline int i8259A_irq_real(unsigned int irq)
149{
150 int value;
151 int irqmask = 1<<irq;
152
153 if (irq < 8) {
154 outb(0x0B,PIC_MASTER_CMD); /* ISR register */
155 value = inb(PIC_MASTER_CMD) & irqmask;
156 outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
157 return value;
158 }
159 outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
160 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
161 outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
162 return value;
163}
164
165/*
166 * Careful! The 8259A is a fragile beast, it pretty
167 * much _has_ to be done exactly like this (mask it
168 * first, _then_ send the EOI, and the order of EOI
169 * to the two 8259s is important!
170 */
171static void mask_and_ack_8259A(unsigned int irq)
172{
173 unsigned int irqmask = 1 << irq;
174 unsigned long flags;
175
176 spin_lock_irqsave(&i8259A_lock, flags);
177 /*
178 * Lightweight spurious IRQ detection. We do not want
179 * to overdo spurious IRQ handling - it's usually a sign
180 * of hardware problems, so we only do the checks we can
181 * do without slowing down good hardware unnecesserily.
182 *
183 * Note that IRQ7 and IRQ15 (the two spurious IRQs
184 * usually resulting from the 8259A-1|2 PICs) occur
185 * even if the IRQ is masked in the 8259A. Thus we
186 * can check spurious 8259A IRQs without doing the
187 * quite slow i8259A_irq_real() call for every IRQ.
188 * This does not cover 100% of spurious interrupts,
189 * but should be enough to warn the user that there
190 * is something bad going on ...
191 */
192 if (cached_irq_mask & irqmask)
193 goto spurious_8259A_irq;
194 cached_irq_mask |= irqmask;
195
196handle_real_irq:
197 if (irq & 8) {
198 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
199 outb(cached_slave_mask, PIC_SLAVE_IMR);
200 outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
201 outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
202 } else {
203 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
204 outb(cached_master_mask, PIC_MASTER_IMR);
205 outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */
206 }
207 spin_unlock_irqrestore(&i8259A_lock, flags);
208 return;
209
210spurious_8259A_irq:
211 /*
212 * this is the slow path - should happen rarely.
213 */
214 if (i8259A_irq_real(irq))
215 /*
216 * oops, the IRQ _is_ in service according to the
217 * 8259A - not spurious, go handle it.
218 */
219 goto handle_real_irq;
220
221 {
222 static int spurious_irq_mask;
223 /*
224 * At this point we can be sure the IRQ is spurious,
225 * lets ACK and report it. [once per IRQ]
226 */
227 if (!(spurious_irq_mask & irqmask)) {
228 printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
229 spurious_irq_mask |= irqmask;
230 }
231 atomic_inc(&irq_err_count);
232 /*
233 * Theoretically we do not have to handle this IRQ,
234 * but in Linux this does not cause problems and is
235 * simpler for us.
236 */
237 goto handle_real_irq;
238 }
239}
240
241static char irq_trigger[2];
242/**
243 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
244 */
245static void restore_ELCR(char *trigger)
246{
247 outb(trigger[0], 0x4d0);
248 outb(trigger[1], 0x4d1);
249}
250
251static void save_ELCR(char *trigger)
252{
253 /* IRQ 0,1,2,8,13 are marked as reserved */
254 trigger[0] = inb(0x4d0) & 0xF8;
255 trigger[1] = inb(0x4d1) & 0xDE;
256}
257
258static int i8259A_resume(struct sys_device *dev)
259{
260 init_8259A(0);
261 restore_ELCR(irq_trigger);
262 return 0;
263}
264
265static int i8259A_suspend(struct sys_device *dev, u32 state)
266{
267 save_ELCR(irq_trigger);
268 return 0;
269}
270
271static struct sysdev_class i8259_sysdev_class = {
272 set_kset_name("i8259"),
273 .suspend = i8259A_suspend,
274 .resume = i8259A_resume,
275};
276
277static struct sys_device device_i8259A = {
278 .id = 0,
279 .cls = &i8259_sysdev_class,
280};
281
282static int __init i8259A_init_sysfs(void)
283{
284 int error = sysdev_class_register(&i8259_sysdev_class);
285 if (!error)
286 error = sysdev_register(&device_i8259A);
287 return error;
288}
289
290device_initcall(i8259A_init_sysfs);
291
292void init_8259A(int auto_eoi)
293{
294 unsigned long flags;
295
296 spin_lock_irqsave(&i8259A_lock, flags);
297
298 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
299 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
300
301 /*
302 * outb_p - this has to work on a wide range of PC hardware.
303 */
304 outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
305 outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
306 outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */
307 if (auto_eoi) /* master does Auto EOI */
308 outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
309 else /* master expects normal EOI */
310 outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
311
312 outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
313 outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
314 outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */
315 outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
316 if (auto_eoi)
317 /*
318 * in AEOI mode we just have to mask the interrupt
319 * when acking.
320 */
321 i8259A_irq_type.ack = disable_8259A_irq;
322 else
323 i8259A_irq_type.ack = mask_and_ack_8259A;
324
325 udelay(100); /* wait for 8259A to initialize */
326
327 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
328 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
329
330 spin_unlock_irqrestore(&i8259A_lock, flags);
331}
332
333/*
334 * Note that on a 486, we don't want to do a SIGFPE on an irq13
335 * as the irq is unreliable, and exception 16 works correctly
336 * (ie as explained in the intel literature). On a 386, you
337 * can't use exception 16 due to bad IBM design, so we have to
338 * rely on the less exact irq13.
339 *
340 * Careful.. Not only is IRQ13 unreliable, but it is also
341 * leads to races. IBM designers who came up with it should
342 * be shot.
343 */
344
345
346static irqreturn_t math_error_irq(int cpl, void *dev_id, struct pt_regs *regs)
347{
348 extern void math_error(void __user *);
349 outb(0,0xF0);
350 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
351 return IRQ_NONE;
352 math_error((void __user *)regs->eip);
353 return IRQ_HANDLED;
354}
355
356/*
357 * New motherboards sometimes make IRQ 13 be a PCI interrupt,
358 * so allow interrupt sharing.
359 */
360static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
361
362void __init init_ISA_irqs (void)
363{
364 int i;
365
366#ifdef CONFIG_X86_LOCAL_APIC
367 init_bsp_APIC();
368#endif
369 init_8259A(0);
370
371 for (i = 0; i < NR_IRQS; i++) {
372 irq_desc[i].status = IRQ_DISABLED;
373 irq_desc[i].action = NULL;
374 irq_desc[i].depth = 1;
375
376 if (i < 16) {
377 /*
378 * 16 old-style INTA-cycle interrupts:
379 */
380 irq_desc[i].handler = &i8259A_irq_type;
381 } else {
382 /*
383 * 'high' PCI IRQs filled in on demand
384 */
385 irq_desc[i].handler = &no_irq_type;
386 }
387 }
388}
389
390void __init init_IRQ(void)
391{
392 int i;
393
394 /* all the set up before the call gates are initialised */
395 pre_intr_init_hook();
396
397 /*
398 * Cover the whole vector space, no vector can escape
399 * us. (some of these will be overridden and become
400 * 'special' SMP interrupts)
401 */
402 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
403 int vector = FIRST_EXTERNAL_VECTOR + i;
404 if (i >= NR_IRQS)
405 break;
406 if (vector != SYSCALL_VECTOR)
407 set_intr_gate(vector, interrupt[i]);
408 }
409
410 /* setup after call gates are initialised (usually add in
411 * the architecture specific gates)
412 */
413 intr_init_hook();
414
415 /*
416 * Set the clock to HZ Hz, we already have a valid
417 * vector now:
418 */
419 setup_pit_timer();
420
421 /*
422 * External FPU? Set up irq13 if so, for
423 * original braindamaged IBM FERR coupling.
424 */
425 if (boot_cpu_data.hard_math && !cpu_has_fpu)
426 setup_irq(FPU_IRQ, &fpu_irq);
427
428 irq_ctx_init(smp_processor_id());
429}
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
new file mode 100644
index 000000000000..9caa8e8db80c
--- /dev/null
+++ b/arch/i386/kernel/init_task.c
@@ -0,0 +1,46 @@
1#include <linux/mm.h>
2#include <linux/module.h>
3#include <linux/sched.h>
4#include <linux/init.h>
5#include <linux/init_task.h>
6#include <linux/fs.h>
7#include <linux/mqueue.h>
8
9#include <asm/uaccess.h>
10#include <asm/pgtable.h>
11#include <asm/desc.h>
12
13static struct fs_struct init_fs = INIT_FS;
14static struct files_struct init_files = INIT_FILES;
15static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
17struct mm_struct init_mm = INIT_MM(init_mm);
18
19EXPORT_SYMBOL(init_mm);
20
21/*
22 * Initial thread structure.
23 *
24 * We need to make sure that this is THREAD_SIZE aligned due to the
25 * way process stacks are handled. This is done by having a special
26 * "init_task" linker map entry..
27 */
28union thread_union init_thread_union
29 __attribute__((__section__(".data.init_task"))) =
30 { INIT_THREAD_INFO(init_task) };
31
32/*
33 * Initial task structure.
34 *
35 * All other task structs will be allocated on slabs in fork.c
36 */
37struct task_struct init_task = INIT_TASK(init_task);
38
39EXPORT_SYMBOL(init_task);
40
41/*
42 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
43 * no more per-task TSS's.
44 */
45DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
46
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
new file mode 100644
index 000000000000..9c1350e811d0
--- /dev/null
+++ b/arch/i386/kernel/io_apic.c
@@ -0,0 +1,2545 @@
1/*
2 * Intel IO-APIC support for multi-Pentium hosts.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently!
8 *
9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com>
14 *
15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore
18 * and Rolf G. Tews
19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support
21 */
22
23#include <linux/mm.h>
24#include <linux/irq.h>
25#include <linux/interrupt.h>
26#include <linux/init.h>
27#include <linux/delay.h>
28#include <linux/sched.h>
29#include <linux/config.h>
30#include <linux/smp_lock.h>
31#include <linux/mc146818rtc.h>
32#include <linux/compiler.h>
33#include <linux/acpi.h>
34
35#include <linux/sysdev.h>
36#include <asm/io.h>
37#include <asm/smp.h>
38#include <asm/desc.h>
39#include <asm/timer.h>
40
41#include <mach_apic.h>
42
43#include "io_ports.h"
44
45int (*ioapic_renumber_irq)(int ioapic, int irq);
46atomic_t irq_mis_count;
47
48static DEFINE_SPINLOCK(ioapic_lock);
49
50/*
51 * Is the SiS APIC rmw bug present ?
52 * -1 = don't know, 0 = no, 1 = yes
53 */
54int sis_apic_bug = -1;
55
56/*
57 * # of IRQ routing registers
58 */
59int nr_ioapic_registers[MAX_IO_APICS];
60
61/*
62 * Rough estimation of how many shared IRQs there are, can
63 * be changed anytime.
64 */
65#define MAX_PLUS_SHARED_IRQS NR_IRQS
66#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
67
68/*
69 * This is performance-critical, we want to do it O(1)
70 *
71 * the indexing order of this array favors 1:1 mappings
72 * between pins and IRQs.
73 */
74
75static struct irq_pin_list {
76 int apic, pin, next;
77} irq_2_pin[PIN_MAP_SIZE];
78
79int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
80#ifdef CONFIG_PCI_MSI
81#define vector_to_irq(vector) \
82 (platform_legacy_irq(vector) ? vector : vector_irq[vector])
83#else
84#define vector_to_irq(vector) (vector)
85#endif
86
87/*
88 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
89 * shared ISA-space IRQs, so we have to support them. We are super
90 * fast in the common case, and fast for shared ISA-space IRQs.
91 */
92static void add_pin_to_irq(unsigned int irq, int apic, int pin)
93{
94 static int first_free_entry = NR_IRQS;
95 struct irq_pin_list *entry = irq_2_pin + irq;
96
97 while (entry->next)
98 entry = irq_2_pin + entry->next;
99
100 if (entry->pin != -1) {
101 entry->next = first_free_entry;
102 entry = irq_2_pin + entry->next;
103 if (++first_free_entry >= PIN_MAP_SIZE)
104 panic("io_apic.c: whoops");
105 }
106 entry->apic = apic;
107 entry->pin = pin;
108}
109
110/*
111 * Reroute an IRQ to a different pin.
112 */
113static void __init replace_pin_at_irq(unsigned int irq,
114 int oldapic, int oldpin,
115 int newapic, int newpin)
116{
117 struct irq_pin_list *entry = irq_2_pin + irq;
118
119 while (1) {
120 if (entry->apic == oldapic && entry->pin == oldpin) {
121 entry->apic = newapic;
122 entry->pin = newpin;
123 }
124 if (!entry->next)
125 break;
126 entry = irq_2_pin + entry->next;
127 }
128}
129
130static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
131{
132 struct irq_pin_list *entry = irq_2_pin + irq;
133 unsigned int pin, reg;
134
135 for (;;) {
136 pin = entry->pin;
137 if (pin == -1)
138 break;
139 reg = io_apic_read(entry->apic, 0x10 + pin*2);
140 reg &= ~disable;
141 reg |= enable;
142 io_apic_modify(entry->apic, 0x10 + pin*2, reg);
143 if (!entry->next)
144 break;
145 entry = irq_2_pin + entry->next;
146 }
147}
148
149/* mask = 1 */
150static void __mask_IO_APIC_irq (unsigned int irq)
151{
152 __modify_IO_APIC_irq(irq, 0x00010000, 0);
153}
154
155/* mask = 0 */
156static void __unmask_IO_APIC_irq (unsigned int irq)
157{
158 __modify_IO_APIC_irq(irq, 0, 0x00010000);
159}
160
161/* mask = 1, trigger = 0 */
162static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
163{
164 __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
165}
166
167/* mask = 0, trigger = 1 */
168static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
169{
170 __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
171}
172
173static void mask_IO_APIC_irq (unsigned int irq)
174{
175 unsigned long flags;
176
177 spin_lock_irqsave(&ioapic_lock, flags);
178 __mask_IO_APIC_irq(irq);
179 spin_unlock_irqrestore(&ioapic_lock, flags);
180}
181
182static void unmask_IO_APIC_irq (unsigned int irq)
183{
184 unsigned long flags;
185
186 spin_lock_irqsave(&ioapic_lock, flags);
187 __unmask_IO_APIC_irq(irq);
188 spin_unlock_irqrestore(&ioapic_lock, flags);
189}
190
191static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
192{
193 struct IO_APIC_route_entry entry;
194 unsigned long flags;
195
196 /* Check delivery_mode to be sure we're not clearing an SMI pin */
197 spin_lock_irqsave(&ioapic_lock, flags);
198 *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
199 *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
200 spin_unlock_irqrestore(&ioapic_lock, flags);
201 if (entry.delivery_mode == dest_SMI)
202 return;
203
204 /*
205 * Disable it in the IO-APIC irq-routing table:
206 */
207 memset(&entry, 0, sizeof(entry));
208 entry.mask = 1;
209 spin_lock_irqsave(&ioapic_lock, flags);
210 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
211 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
212 spin_unlock_irqrestore(&ioapic_lock, flags);
213}
214
215static void clear_IO_APIC (void)
216{
217 int apic, pin;
218
219 for (apic = 0; apic < nr_ioapics; apic++)
220 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
221 clear_IO_APIC_pin(apic, pin);
222}
223
224static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
225{
226 unsigned long flags;
227 int pin;
228 struct irq_pin_list *entry = irq_2_pin + irq;
229 unsigned int apicid_value;
230
231 apicid_value = cpu_mask_to_apicid(cpumask);
232 /* Prepare to do the io_apic_write */
233 apicid_value = apicid_value << 24;
234 spin_lock_irqsave(&ioapic_lock, flags);
235 for (;;) {
236 pin = entry->pin;
237 if (pin == -1)
238 break;
239 io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
240 if (!entry->next)
241 break;
242 entry = irq_2_pin + entry->next;
243 }
244 spin_unlock_irqrestore(&ioapic_lock, flags);
245}
246
247#if defined(CONFIG_IRQBALANCE)
248# include <asm/processor.h> /* kernel_thread() */
249# include <linux/kernel_stat.h> /* kstat */
250# include <linux/slab.h> /* kmalloc() */
251# include <linux/timer.h> /* time_after() */
252
253# ifdef CONFIG_BALANCED_IRQ_DEBUG
254# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
255# define Dprintk(x...) do { TDprintk(x); } while (0)
256# else
257# define TDprintk(x...)
258# define Dprintk(x...)
259# endif
260
261cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
262
263#define IRQBALANCE_CHECK_ARCH -999
264static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
265static int physical_balance = 0;
266
267static struct irq_cpu_info {
268 unsigned long * last_irq;
269 unsigned long * irq_delta;
270 unsigned long irq;
271} irq_cpu_data[NR_CPUS];
272
273#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
274#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])
275#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
276
277#define IDLE_ENOUGH(cpu,now) \
278 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
279
280#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
281
282#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
283
284#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
285#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
286#define BALANCED_IRQ_MORE_DELTA (HZ/10)
287#define BALANCED_IRQ_LESS_DELTA (HZ)
288
289static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
290
291static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
292 unsigned long now, int direction)
293{
294 int search_idle = 1;
295 int cpu = curr_cpu;
296
297 goto inside;
298
299 do {
300 if (unlikely(cpu == curr_cpu))
301 search_idle = 0;
302inside:
303 if (direction == 1) {
304 cpu++;
305 if (cpu >= NR_CPUS)
306 cpu = 0;
307 } else {
308 cpu--;
309 if (cpu == -1)
310 cpu = NR_CPUS-1;
311 }
312 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
313 (search_idle && !IDLE_ENOUGH(cpu,now)));
314
315 return cpu;
316}
317
318static inline void balance_irq(int cpu, int irq)
319{
320 unsigned long now = jiffies;
321 cpumask_t allowed_mask;
322 unsigned int new_cpu;
323
324 if (irqbalance_disabled)
325 return;
326
327 cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
328 new_cpu = move(cpu, allowed_mask, now, 1);
329 if (cpu != new_cpu) {
330 irq_desc_t *desc = irq_desc + irq;
331 unsigned long flags;
332
333 spin_lock_irqsave(&desc->lock, flags);
334 pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
335 spin_unlock_irqrestore(&desc->lock, flags);
336 }
337}
338
339static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
340{
341 int i, j;
342 Dprintk("Rotating IRQs among CPUs.\n");
343 for (i = 0; i < NR_CPUS; i++) {
344 for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
345 if (!irq_desc[j].action)
346 continue;
347 /* Is it a significant load ? */
348 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
349 useful_load_threshold)
350 continue;
351 balance_irq(i, j);
352 }
353 }
354 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
355 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
356 return;
357}
358
359static void do_irq_balance(void)
360{
361 int i, j;
362 unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
363 unsigned long move_this_load = 0;
364 int max_loaded = 0, min_loaded = 0;
365 int load;
366 unsigned long useful_load_threshold = balanced_irq_interval + 10;
367 int selected_irq;
368 int tmp_loaded, first_attempt = 1;
369 unsigned long tmp_cpu_irq;
370 unsigned long imbalance = 0;
371 cpumask_t allowed_mask, target_cpu_mask, tmp;
372
373 for (i = 0; i < NR_CPUS; i++) {
374 int package_index;
375 CPU_IRQ(i) = 0;
376 if (!cpu_online(i))
377 continue;
378 package_index = CPU_TO_PACKAGEINDEX(i);
379 for (j = 0; j < NR_IRQS; j++) {
380 unsigned long value_now, delta;
381 /* Is this an active IRQ? */
382 if (!irq_desc[j].action)
383 continue;
384 if ( package_index == i )
385 IRQ_DELTA(package_index,j) = 0;
386 /* Determine the total count per processor per IRQ */
387 value_now = (unsigned long) kstat_cpu(i).irqs[j];
388
389 /* Determine the activity per processor per IRQ */
390 delta = value_now - LAST_CPU_IRQ(i,j);
391
392 /* Update last_cpu_irq[][] for the next time */
393 LAST_CPU_IRQ(i,j) = value_now;
394
395 /* Ignore IRQs whose rate is less than the clock */
396 if (delta < useful_load_threshold)
397 continue;
398 /* update the load for the processor or package total */
399 IRQ_DELTA(package_index,j) += delta;
400
401 /* Keep track of the higher numbered sibling as well */
402 if (i != package_index)
403 CPU_IRQ(i) += delta;
404 /*
405 * We have sibling A and sibling B in the package
406 *
407 * cpu_irq[A] = load for cpu A + load for cpu B
408 * cpu_irq[B] = load for cpu B
409 */
410 CPU_IRQ(package_index) += delta;
411 }
412 }
413 /* Find the least loaded processor package */
414 for (i = 0; i < NR_CPUS; i++) {
415 if (!cpu_online(i))
416 continue;
417 if (i != CPU_TO_PACKAGEINDEX(i))
418 continue;
419 if (min_cpu_irq > CPU_IRQ(i)) {
420 min_cpu_irq = CPU_IRQ(i);
421 min_loaded = i;
422 }
423 }
424 max_cpu_irq = ULONG_MAX;
425
426tryanothercpu:
427 /* Look for heaviest loaded processor.
428 * We may come back to get the next heaviest loaded processor.
429 * Skip processors with trivial loads.
430 */
431 tmp_cpu_irq = 0;
432 tmp_loaded = -1;
433 for (i = 0; i < NR_CPUS; i++) {
434 if (!cpu_online(i))
435 continue;
436 if (i != CPU_TO_PACKAGEINDEX(i))
437 continue;
438 if (max_cpu_irq <= CPU_IRQ(i))
439 continue;
440 if (tmp_cpu_irq < CPU_IRQ(i)) {
441 tmp_cpu_irq = CPU_IRQ(i);
442 tmp_loaded = i;
443 }
444 }
445
446 if (tmp_loaded == -1) {
447 /* In the case of small number of heavy interrupt sources,
448 * loading some of the cpus too much. We use Ingo's original
449 * approach to rotate them around.
450 */
451 if (!first_attempt && imbalance >= useful_load_threshold) {
452 rotate_irqs_among_cpus(useful_load_threshold);
453 return;
454 }
455 goto not_worth_the_effort;
456 }
457
458 first_attempt = 0; /* heaviest search */
459 max_cpu_irq = tmp_cpu_irq; /* load */
460 max_loaded = tmp_loaded; /* processor */
461 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
462
463 Dprintk("max_loaded cpu = %d\n", max_loaded);
464 Dprintk("min_loaded cpu = %d\n", min_loaded);
465 Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
466 Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
467 Dprintk("load imbalance = %lu\n", imbalance);
468
469 /* if imbalance is less than approx 10% of max load, then
470 * observe diminishing returns action. - quit
471 */
472 if (imbalance < (max_cpu_irq >> 3)) {
473 Dprintk("Imbalance too trivial\n");
474 goto not_worth_the_effort;
475 }
476
477tryanotherirq:
478 /* if we select an IRQ to move that can't go where we want, then
479 * see if there is another one to try.
480 */
481 move_this_load = 0;
482 selected_irq = -1;
483 for (j = 0; j < NR_IRQS; j++) {
484 /* Is this an active IRQ? */
485 if (!irq_desc[j].action)
486 continue;
487 if (imbalance <= IRQ_DELTA(max_loaded,j))
488 continue;
489 /* Try to find the IRQ that is closest to the imbalance
490 * without going over.
491 */
492 if (move_this_load < IRQ_DELTA(max_loaded,j)) {
493 move_this_load = IRQ_DELTA(max_loaded,j);
494 selected_irq = j;
495 }
496 }
497 if (selected_irq == -1) {
498 goto tryanothercpu;
499 }
500
501 imbalance = move_this_load;
502
503 /* For physical_balance case, we accumlated both load
504 * values in the one of the siblings cpu_irq[],
505 * to use the same code for physical and logical processors
506 * as much as possible.
507 *
508 * NOTE: the cpu_irq[] array holds the sum of the load for
509 * sibling A and sibling B in the slot for the lowest numbered
510 * sibling (A), _AND_ the load for sibling B in the slot for
511 * the higher numbered sibling.
512 *
513 * We seek the least loaded sibling by making the comparison
514 * (A+B)/2 vs B
515 */
516 load = CPU_IRQ(min_loaded) >> 1;
517 for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
518 if (load > CPU_IRQ(j)) {
519 /* This won't change cpu_sibling_map[min_loaded] */
520 load = CPU_IRQ(j);
521 min_loaded = j;
522 }
523 }
524
525 cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
526 target_cpu_mask = cpumask_of_cpu(min_loaded);
527 cpus_and(tmp, target_cpu_mask, allowed_mask);
528
529 if (!cpus_empty(tmp)) {
530 irq_desc_t *desc = irq_desc + selected_irq;
531 unsigned long flags;
532
533 Dprintk("irq = %d moved to cpu = %d\n",
534 selected_irq, min_loaded);
535 /* mark for change destination */
536 spin_lock_irqsave(&desc->lock, flags);
537 pending_irq_balance_cpumask[selected_irq] =
538 cpumask_of_cpu(min_loaded);
539 spin_unlock_irqrestore(&desc->lock, flags);
540 /* Since we made a change, come back sooner to
541 * check for more variation.
542 */
543 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
544 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
545 return;
546 }
547 goto tryanotherirq;
548
549not_worth_the_effort:
550 /*
551 * if we did not find an IRQ to move, then adjust the time interval
552 * upward
553 */
554 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
555 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
556 Dprintk("IRQ worth rotating not found\n");
557 return;
558}
559
560static int balanced_irq(void *unused)
561{
562 int i;
563 unsigned long prev_balance_time = jiffies;
564 long time_remaining = balanced_irq_interval;
565
566 daemonize("kirqd");
567
568 /* push everything to CPU 0 to give us a starting point. */
569 for (i = 0 ; i < NR_IRQS ; i++) {
570 pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
571 }
572
573 for ( ; ; ) {
574 set_current_state(TASK_INTERRUPTIBLE);
575 time_remaining = schedule_timeout(time_remaining);
576 try_to_freeze(PF_FREEZE);
577 if (time_after(jiffies,
578 prev_balance_time+balanced_irq_interval)) {
579 do_irq_balance();
580 prev_balance_time = jiffies;
581 time_remaining = balanced_irq_interval;
582 }
583 }
584 return 0;
585}
586
587static int __init balanced_irq_init(void)
588{
589 int i;
590 struct cpuinfo_x86 *c;
591 cpumask_t tmp;
592
593 cpus_shift_right(tmp, cpu_online_map, 2);
594 c = &boot_cpu_data;
595 /* When not overwritten by the command line ask subarchitecture. */
596 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
597 irqbalance_disabled = NO_BALANCE_IRQ;
598 if (irqbalance_disabled)
599 return 0;
600
601 /* disable irqbalance completely if there is only one processor online */
602 if (num_online_cpus() < 2) {
603 irqbalance_disabled = 1;
604 return 0;
605 }
606 /*
607 * Enable physical balance only if more than 1 physical processor
608 * is present
609 */
610 if (smp_num_siblings > 1 && !cpus_empty(tmp))
611 physical_balance = 1;
612
613 for (i = 0; i < NR_CPUS; i++) {
614 if (!cpu_online(i))
615 continue;
616 irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
617 irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
618 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
619 printk(KERN_ERR "balanced_irq_init: out of memory");
620 goto failed;
621 }
622 memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
623 memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
624 }
625
626 printk(KERN_INFO "Starting balanced_irq\n");
627 if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
628 return 0;
629 else
630 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
631failed:
632 for (i = 0; i < NR_CPUS; i++) {
633 if(irq_cpu_data[i].irq_delta)
634 kfree(irq_cpu_data[i].irq_delta);
635 if(irq_cpu_data[i].last_irq)
636 kfree(irq_cpu_data[i].last_irq);
637 }
638 return 0;
639}
640
641int __init irqbalance_disable(char *str)
642{
643 irqbalance_disabled = 1;
644 return 0;
645}
646
647__setup("noirqbalance", irqbalance_disable);
648
649static inline void move_irq(int irq)
650{
651 /* note - we hold the desc->lock */
652 if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
653 set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
654 cpus_clear(pending_irq_balance_cpumask[irq]);
655 }
656}
657
658late_initcall(balanced_irq_init);
659
660#else /* !CONFIG_IRQBALANCE */
661static inline void move_irq(int irq) { }
662#endif /* CONFIG_IRQBALANCE */
663
664#ifndef CONFIG_SMP
665void fastcall send_IPI_self(int vector)
666{
667 unsigned int cfg;
668
669 /*
670 * Wait for idle.
671 */
672 apic_wait_icr_idle();
673 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
674 /*
675 * Send the IPI. The write to APIC_ICR fires this off.
676 */
677 apic_write_around(APIC_ICR, cfg);
678}
679#endif /* !CONFIG_SMP */
680
681
682/*
683 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
684 * specific CPU-side IRQs.
685 */
686
687#define MAX_PIRQS 8
688static int pirq_entries [MAX_PIRQS];
689static int pirqs_enabled;
690int skip_ioapic_setup;
691
692static int __init ioapic_setup(char *str)
693{
694 skip_ioapic_setup = 1;
695 return 1;
696}
697
698__setup("noapic", ioapic_setup);
699
700static int __init ioapic_pirq_setup(char *str)
701{
702 int i, max;
703 int ints[MAX_PIRQS+1];
704
705 get_options(str, ARRAY_SIZE(ints), ints);
706
707 for (i = 0; i < MAX_PIRQS; i++)
708 pirq_entries[i] = -1;
709
710 pirqs_enabled = 1;
711 apic_printk(APIC_VERBOSE, KERN_INFO
712 "PIRQ redirection, working around broken MP-BIOS.\n");
713 max = MAX_PIRQS;
714 if (ints[0] < MAX_PIRQS)
715 max = ints[0];
716
717 for (i = 0; i < max; i++) {
718 apic_printk(APIC_VERBOSE, KERN_DEBUG
719 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
720 /*
721 * PIRQs are mapped upside down, usually.
722 */
723 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
724 }
725 return 1;
726}
727
728__setup("pirq=", ioapic_pirq_setup);
729
730/*
731 * Find the IRQ entry number of a certain pin.
732 */
733static int find_irq_entry(int apic, int pin, int type)
734{
735 int i;
736
737 for (i = 0; i < mp_irq_entries; i++)
738 if (mp_irqs[i].mpc_irqtype == type &&
739 (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
740 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
741 mp_irqs[i].mpc_dstirq == pin)
742 return i;
743
744 return -1;
745}
746
747/*
748 * Find the pin to which IRQ[irq] (ISA) is connected
749 */
750static int find_isa_irq_pin(int irq, int type)
751{
752 int i;
753
754 for (i = 0; i < mp_irq_entries; i++) {
755 int lbus = mp_irqs[i].mpc_srcbus;
756
757 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
758 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
759 mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
760 mp_bus_id_to_type[lbus] == MP_BUS_NEC98
761 ) &&
762 (mp_irqs[i].mpc_irqtype == type) &&
763 (mp_irqs[i].mpc_srcbusirq == irq))
764
765 return mp_irqs[i].mpc_dstirq;
766 }
767 return -1;
768}
769
770/*
771 * Find a specific PCI IRQ entry.
772 * Not an __init, possibly needed by modules
773 */
774static int pin_2_irq(int idx, int apic, int pin);
775
776int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
777{
778 int apic, i, best_guess = -1;
779
780 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
781 "slot:%d, pin:%d.\n", bus, slot, pin);
782 if (mp_bus_id_to_pci_bus[bus] == -1) {
783 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
784 return -1;
785 }
786 for (i = 0; i < mp_irq_entries; i++) {
787 int lbus = mp_irqs[i].mpc_srcbus;
788
789 for (apic = 0; apic < nr_ioapics; apic++)
790 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
791 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
792 break;
793
794 if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
795 !mp_irqs[i].mpc_irqtype &&
796 (bus == lbus) &&
797 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
798 int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
799
800 if (!(apic || IO_APIC_IRQ(irq)))
801 continue;
802
803 if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
804 return irq;
805 /*
806 * Use the first all-but-pin matching entry as a
807 * best-guess fuzzy result for broken mptables.
808 */
809 if (best_guess < 0)
810 best_guess = irq;
811 }
812 }
813 return best_guess;
814}
815
816/*
817 * This function currently is only a helper for the i386 smp boot process where
818 * we need to reprogram the ioredtbls to cater for the cpus which have come online
819 * so mask in all cases should simply be TARGET_CPUS
820 */
821void __init setup_ioapic_dest(void)
822{
823 int pin, ioapic, irq, irq_entry;
824
825 if (skip_ioapic_setup == 1)
826 return;
827
828 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
829 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
830 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
831 if (irq_entry == -1)
832 continue;
833 irq = pin_2_irq(irq_entry, ioapic, pin);
834 set_ioapic_affinity_irq(irq, TARGET_CPUS);
835 }
836
837 }
838}
839
840/*
841 * EISA Edge/Level control register, ELCR
842 */
843static int EISA_ELCR(unsigned int irq)
844{
845 if (irq < 16) {
846 unsigned int port = 0x4d0 + (irq >> 3);
847 return (inb(port) >> (irq & 7)) & 1;
848 }
849 apic_printk(APIC_VERBOSE, KERN_INFO
850 "Broken MPtable reports ISA irq %d\n", irq);
851 return 0;
852}
853
854/* EISA interrupts are always polarity zero and can be edge or level
855 * trigger depending on the ELCR value. If an interrupt is listed as
856 * EISA conforming in the MP table, that means its trigger type must
857 * be read in from the ELCR */
858
859#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
860#define default_EISA_polarity(idx) (0)
861
862/* ISA interrupts are always polarity zero edge triggered,
863 * when listed as conforming in the MP table. */
864
865#define default_ISA_trigger(idx) (0)
866#define default_ISA_polarity(idx) (0)
867
868/* PCI interrupts are always polarity one level triggered,
869 * when listed as conforming in the MP table. */
870
871#define default_PCI_trigger(idx) (1)
872#define default_PCI_polarity(idx) (1)
873
874/* MCA interrupts are always polarity zero level triggered,
875 * when listed as conforming in the MP table. */
876
877#define default_MCA_trigger(idx) (1)
878#define default_MCA_polarity(idx) (0)
879
880/* NEC98 interrupts are always polarity zero edge triggered,
881 * when listed as conforming in the MP table. */
882
883#define default_NEC98_trigger(idx) (0)
884#define default_NEC98_polarity(idx) (0)
885
886static int __init MPBIOS_polarity(int idx)
887{
888 int bus = mp_irqs[idx].mpc_srcbus;
889 int polarity;
890
891 /*
892 * Determine IRQ line polarity (high active or low active):
893 */
894 switch (mp_irqs[idx].mpc_irqflag & 3)
895 {
896 case 0: /* conforms, ie. bus-type dependent polarity */
897 {
898 switch (mp_bus_id_to_type[bus])
899 {
900 case MP_BUS_ISA: /* ISA pin */
901 {
902 polarity = default_ISA_polarity(idx);
903 break;
904 }
905 case MP_BUS_EISA: /* EISA pin */
906 {
907 polarity = default_EISA_polarity(idx);
908 break;
909 }
910 case MP_BUS_PCI: /* PCI pin */
911 {
912 polarity = default_PCI_polarity(idx);
913 break;
914 }
915 case MP_BUS_MCA: /* MCA pin */
916 {
917 polarity = default_MCA_polarity(idx);
918 break;
919 }
920 case MP_BUS_NEC98: /* NEC 98 pin */
921 {
922 polarity = default_NEC98_polarity(idx);
923 break;
924 }
925 default:
926 {
927 printk(KERN_WARNING "broken BIOS!!\n");
928 polarity = 1;
929 break;
930 }
931 }
932 break;
933 }
934 case 1: /* high active */
935 {
936 polarity = 0;
937 break;
938 }
939 case 2: /* reserved */
940 {
941 printk(KERN_WARNING "broken BIOS!!\n");
942 polarity = 1;
943 break;
944 }
945 case 3: /* low active */
946 {
947 polarity = 1;
948 break;
949 }
950 default: /* invalid */
951 {
952 printk(KERN_WARNING "broken BIOS!!\n");
953 polarity = 1;
954 break;
955 }
956 }
957 return polarity;
958}
959
960static int MPBIOS_trigger(int idx)
961{
962 int bus = mp_irqs[idx].mpc_srcbus;
963 int trigger;
964
965 /*
966 * Determine IRQ trigger mode (edge or level sensitive):
967 */
968 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
969 {
970 case 0: /* conforms, ie. bus-type dependent */
971 {
972 switch (mp_bus_id_to_type[bus])
973 {
974 case MP_BUS_ISA: /* ISA pin */
975 {
976 trigger = default_ISA_trigger(idx);
977 break;
978 }
979 case MP_BUS_EISA: /* EISA pin */
980 {
981 trigger = default_EISA_trigger(idx);
982 break;
983 }
984 case MP_BUS_PCI: /* PCI pin */
985 {
986 trigger = default_PCI_trigger(idx);
987 break;
988 }
989 case MP_BUS_MCA: /* MCA pin */
990 {
991 trigger = default_MCA_trigger(idx);
992 break;
993 }
994 case MP_BUS_NEC98: /* NEC 98 pin */
995 {
996 trigger = default_NEC98_trigger(idx);
997 break;
998 }
999 default:
1000 {
1001 printk(KERN_WARNING "broken BIOS!!\n");
1002 trigger = 1;
1003 break;
1004 }
1005 }
1006 break;
1007 }
1008 case 1: /* edge */
1009 {
1010 trigger = 0;
1011 break;
1012 }
1013 case 2: /* reserved */
1014 {
1015 printk(KERN_WARNING "broken BIOS!!\n");
1016 trigger = 1;
1017 break;
1018 }
1019 case 3: /* level */
1020 {
1021 trigger = 1;
1022 break;
1023 }
1024 default: /* invalid */
1025 {
1026 printk(KERN_WARNING "broken BIOS!!\n");
1027 trigger = 0;
1028 break;
1029 }
1030 }
1031 return trigger;
1032}
1033
1034static inline int irq_polarity(int idx)
1035{
1036 return MPBIOS_polarity(idx);
1037}
1038
1039static inline int irq_trigger(int idx)
1040{
1041 return MPBIOS_trigger(idx);
1042}
1043
1044static int pin_2_irq(int idx, int apic, int pin)
1045{
1046 int irq, i;
1047 int bus = mp_irqs[idx].mpc_srcbus;
1048
1049 /*
1050 * Debugging check, we are in big trouble if this message pops up!
1051 */
1052 if (mp_irqs[idx].mpc_dstirq != pin)
1053 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1054
1055 switch (mp_bus_id_to_type[bus])
1056 {
1057 case MP_BUS_ISA: /* ISA pin */
1058 case MP_BUS_EISA:
1059 case MP_BUS_MCA:
1060 case MP_BUS_NEC98:
1061 {
1062 irq = mp_irqs[idx].mpc_srcbusirq;
1063 break;
1064 }
1065 case MP_BUS_PCI: /* PCI pin */
1066 {
1067 /*
1068 * PCI IRQs are mapped in order
1069 */
1070 i = irq = 0;
1071 while (i < apic)
1072 irq += nr_ioapic_registers[i++];
1073 irq += pin;
1074
1075 /*
1076 * For MPS mode, so far only needed by ES7000 platform
1077 */
1078 if (ioapic_renumber_irq)
1079 irq = ioapic_renumber_irq(apic, irq);
1080
1081 break;
1082 }
1083 default:
1084 {
1085 printk(KERN_ERR "unknown bus type %d.\n",bus);
1086 irq = 0;
1087 break;
1088 }
1089 }
1090
1091 /*
1092 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1093 */
1094 if ((pin >= 16) && (pin <= 23)) {
1095 if (pirq_entries[pin-16] != -1) {
1096 if (!pirq_entries[pin-16]) {
1097 apic_printk(APIC_VERBOSE, KERN_DEBUG
1098 "disabling PIRQ%d\n", pin-16);
1099 } else {
1100 irq = pirq_entries[pin-16];
1101 apic_printk(APIC_VERBOSE, KERN_DEBUG
1102 "using PIRQ%d -> IRQ %d\n",
1103 pin-16, irq);
1104 }
1105 }
1106 }
1107 return irq;
1108}
1109
1110static inline int IO_APIC_irq_trigger(int irq)
1111{
1112 int apic, idx, pin;
1113
1114 for (apic = 0; apic < nr_ioapics; apic++) {
1115 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1116 idx = find_irq_entry(apic,pin,mp_INT);
1117 if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
1118 return irq_trigger(idx);
1119 }
1120 }
1121 /*
1122 * nonexistent IRQs are edge default
1123 */
1124 return 0;
1125}
1126
1127/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1128u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
1129
1130int assign_irq_vector(int irq)
1131{
1132 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
1133
1134 BUG_ON(irq >= NR_IRQ_VECTORS);
1135 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
1136 return IO_APIC_VECTOR(irq);
1137next:
1138 current_vector += 8;
1139 if (current_vector == SYSCALL_VECTOR)
1140 goto next;
1141
1142 if (current_vector >= FIRST_SYSTEM_VECTOR) {
1143 offset++;
1144 if (!(offset%8))
1145 return -ENOSPC;
1146 current_vector = FIRST_DEVICE_VECTOR + offset;
1147 }
1148
1149 vector_irq[current_vector] = irq;
1150 if (irq != AUTO_ASSIGN)
1151 IO_APIC_VECTOR(irq) = current_vector;
1152
1153 return current_vector;
1154}
1155
1156static struct hw_interrupt_type ioapic_level_type;
1157static struct hw_interrupt_type ioapic_edge_type;
1158
1159#define IOAPIC_AUTO -1
1160#define IOAPIC_EDGE 0
1161#define IOAPIC_LEVEL 1
1162
1163static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1164{
1165 if (use_pci_vector() && !platform_legacy_irq(irq)) {
1166 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1167 trigger == IOAPIC_LEVEL)
1168 irq_desc[vector].handler = &ioapic_level_type;
1169 else
1170 irq_desc[vector].handler = &ioapic_edge_type;
1171 set_intr_gate(vector, interrupt[vector]);
1172 } else {
1173 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1174 trigger == IOAPIC_LEVEL)
1175 irq_desc[irq].handler = &ioapic_level_type;
1176 else
1177 irq_desc[irq].handler = &ioapic_edge_type;
1178 set_intr_gate(vector, interrupt[irq]);
1179 }
1180}
1181
1182static void __init setup_IO_APIC_irqs(void)
1183{
1184 struct IO_APIC_route_entry entry;
1185 int apic, pin, idx, irq, first_notcon = 1, vector;
1186 unsigned long flags;
1187
1188 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1189
1190 for (apic = 0; apic < nr_ioapics; apic++) {
1191 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1192
1193 /*
1194 * add it to the IO-APIC irq-routing table:
1195 */
1196 memset(&entry,0,sizeof(entry));
1197
1198 entry.delivery_mode = INT_DELIVERY_MODE;
1199 entry.dest_mode = INT_DEST_MODE;
1200 entry.mask = 0; /* enable IRQ */
1201 entry.dest.logical.logical_dest =
1202 cpu_mask_to_apicid(TARGET_CPUS);
1203
1204 idx = find_irq_entry(apic,pin,mp_INT);
1205 if (idx == -1) {
1206 if (first_notcon) {
1207 apic_printk(APIC_VERBOSE, KERN_DEBUG
1208 " IO-APIC (apicid-pin) %d-%d",
1209 mp_ioapics[apic].mpc_apicid,
1210 pin);
1211 first_notcon = 0;
1212 } else
1213 apic_printk(APIC_VERBOSE, ", %d-%d",
1214 mp_ioapics[apic].mpc_apicid, pin);
1215 continue;
1216 }
1217
1218 entry.trigger = irq_trigger(idx);
1219 entry.polarity = irq_polarity(idx);
1220
1221 if (irq_trigger(idx)) {
1222 entry.trigger = 1;
1223 entry.mask = 1;
1224 }
1225
1226 irq = pin_2_irq(idx, apic, pin);
1227 /*
1228 * skip adding the timer int on secondary nodes, which causes
1229 * a small but painful rift in the time-space continuum
1230 */
1231 if (multi_timer_check(apic, irq))
1232 continue;
1233 else
1234 add_pin_to_irq(irq, apic, pin);
1235
1236 if (!apic && !IO_APIC_IRQ(irq))
1237 continue;
1238
1239 if (IO_APIC_IRQ(irq)) {
1240 vector = assign_irq_vector(irq);
1241 entry.vector = vector;
1242 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1243
1244 if (!apic && (irq < 16))
1245 disable_8259A_irq(irq);
1246 }
1247 spin_lock_irqsave(&ioapic_lock, flags);
1248 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1249 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1250 spin_unlock_irqrestore(&ioapic_lock, flags);
1251 }
1252 }
1253
1254 if (!first_notcon)
1255 apic_printk(APIC_VERBOSE, " not connected.\n");
1256}
1257
1258/*
1259 * Set up the 8259A-master output pin:
1260 */
1261static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
1262{
1263 struct IO_APIC_route_entry entry;
1264 unsigned long flags;
1265
1266 memset(&entry,0,sizeof(entry));
1267
1268 disable_8259A_irq(0);
1269
1270 /* mask LVT0 */
1271 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1272
1273 /*
1274 * We use logical delivery to get the timer IRQ
1275 * to the first CPU.
1276 */
1277 entry.dest_mode = INT_DEST_MODE;
1278 entry.mask = 0; /* unmask IRQ now */
1279 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1280 entry.delivery_mode = INT_DELIVERY_MODE;
1281 entry.polarity = 0;
1282 entry.trigger = 0;
1283 entry.vector = vector;
1284
1285 /*
1286 * The timer IRQ doesn't have to know that behind the
1287 * scene we have a 8259A-master in AEOI mode ...
1288 */
1289 irq_desc[0].handler = &ioapic_edge_type;
1290
1291 /*
1292 * Add it to the IO-APIC irq-routing table:
1293 */
1294 spin_lock_irqsave(&ioapic_lock, flags);
1295 io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
1296 io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
1297 spin_unlock_irqrestore(&ioapic_lock, flags);
1298
1299 enable_8259A_irq(0);
1300}
1301
1302static inline void UNEXPECTED_IO_APIC(void)
1303{
1304}
1305
1306void __init print_IO_APIC(void)
1307{
1308 int apic, i;
1309 union IO_APIC_reg_00 reg_00;
1310 union IO_APIC_reg_01 reg_01;
1311 union IO_APIC_reg_02 reg_02;
1312 union IO_APIC_reg_03 reg_03;
1313 unsigned long flags;
1314
1315 if (apic_verbosity == APIC_QUIET)
1316 return;
1317
1318 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1319 for (i = 0; i < nr_ioapics; i++)
1320 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1321 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
1322
1323 /*
1324 * We are a bit conservative about what we expect. We have to
1325 * know about every hardware change ASAP.
1326 */
1327 printk(KERN_INFO "testing the IO APIC.......................\n");
1328
1329 for (apic = 0; apic < nr_ioapics; apic++) {
1330
1331 spin_lock_irqsave(&ioapic_lock, flags);
1332 reg_00.raw = io_apic_read(apic, 0);
1333 reg_01.raw = io_apic_read(apic, 1);
1334 if (reg_01.bits.version >= 0x10)
1335 reg_02.raw = io_apic_read(apic, 2);
1336 if (reg_01.bits.version >= 0x20)
1337 reg_03.raw = io_apic_read(apic, 3);
1338 spin_unlock_irqrestore(&ioapic_lock, flags);
1339
1340 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
1341 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1342 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1343 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1344 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1345 if (reg_00.bits.ID >= get_physical_broadcast())
1346 UNEXPECTED_IO_APIC();
1347 if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
1348 UNEXPECTED_IO_APIC();
1349
1350 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
1351 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1352 if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
1353 (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
1354 (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
1355 (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
1356 (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
1357 (reg_01.bits.entries != 0x2E) &&
1358 (reg_01.bits.entries != 0x3F)
1359 )
1360 UNEXPECTED_IO_APIC();
1361
1362 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1363 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1364 if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
1365 (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
1366 (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
1367 (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
1368 (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
1369 )
1370 UNEXPECTED_IO_APIC();
1371 if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
1372 UNEXPECTED_IO_APIC();
1373
1374 /*
1375 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1376 * but the value of reg_02 is read as the previous read register
1377 * value, so ignore it if reg_02 == reg_01.
1378 */
1379 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1380 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1381 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1382 if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
1383 UNEXPECTED_IO_APIC();
1384 }
1385
1386 /*
1387 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1388 * or reg_03, but the value of reg_0[23] is read as the previous read
1389 * register value, so ignore it if reg_03 == reg_0[12].
1390 */
1391 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1392 reg_03.raw != reg_01.raw) {
1393 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1394 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1395 if (reg_03.bits.__reserved_1)
1396 UNEXPECTED_IO_APIC();
1397 }
1398
1399 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1400
1401 printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1402 " Stat Dest Deli Vect: \n");
1403
1404 for (i = 0; i <= reg_01.bits.entries; i++) {
1405 struct IO_APIC_route_entry entry;
1406
1407 spin_lock_irqsave(&ioapic_lock, flags);
1408 *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1409 *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1410 spin_unlock_irqrestore(&ioapic_lock, flags);
1411
1412 printk(KERN_DEBUG " %02x %03X %02X ",
1413 i,
1414 entry.dest.logical.logical_dest,
1415 entry.dest.physical.physical_dest
1416 );
1417
1418 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1419 entry.mask,
1420 entry.trigger,
1421 entry.irr,
1422 entry.polarity,
1423 entry.delivery_status,
1424 entry.dest_mode,
1425 entry.delivery_mode,
1426 entry.vector
1427 );
1428 }
1429 }
1430 if (use_pci_vector())
1431 printk(KERN_INFO "Using vector-based indexing\n");
1432 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1433 for (i = 0; i < NR_IRQS; i++) {
1434 struct irq_pin_list *entry = irq_2_pin + i;
1435 if (entry->pin < 0)
1436 continue;
1437 if (use_pci_vector() && !platform_legacy_irq(i))
1438 printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
1439 else
1440 printk(KERN_DEBUG "IRQ%d ", i);
1441 for (;;) {
1442 printk("-> %d:%d", entry->apic, entry->pin);
1443 if (!entry->next)
1444 break;
1445 entry = irq_2_pin + entry->next;
1446 }
1447 printk("\n");
1448 }
1449
1450 printk(KERN_INFO ".................................... done.\n");
1451
1452 return;
1453}
1454
1455#if 0
1456
1457static void print_APIC_bitfield (int base)
1458{
1459 unsigned int v;
1460 int i, j;
1461
1462 if (apic_verbosity == APIC_QUIET)
1463 return;
1464
1465 printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
1466 for (i = 0; i < 8; i++) {
1467 v = apic_read(base + i*0x10);
1468 for (j = 0; j < 32; j++) {
1469 if (v & (1<<j))
1470 printk("1");
1471 else
1472 printk("0");
1473 }
1474 printk("\n");
1475 }
1476}
1477
1478void /*__init*/ print_local_APIC(void * dummy)
1479{
1480 unsigned int v, ver, maxlvt;
1481
1482 if (apic_verbosity == APIC_QUIET)
1483 return;
1484
1485 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1486 smp_processor_id(), hard_smp_processor_id());
1487 v = apic_read(APIC_ID);
1488 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
1489 v = apic_read(APIC_LVR);
1490 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1491 ver = GET_APIC_VERSION(v);
1492 maxlvt = get_maxlvt();
1493
1494 v = apic_read(APIC_TASKPRI);
1495 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1496
1497 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1498 v = apic_read(APIC_ARBPRI);
1499 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1500 v & APIC_ARBPRI_MASK);
1501 v = apic_read(APIC_PROCPRI);
1502 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1503 }
1504
1505 v = apic_read(APIC_EOI);
1506 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1507 v = apic_read(APIC_RRR);
1508 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1509 v = apic_read(APIC_LDR);
1510 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1511 v = apic_read(APIC_DFR);
1512 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1513 v = apic_read(APIC_SPIV);
1514 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1515
1516 printk(KERN_DEBUG "... APIC ISR field:\n");
1517 print_APIC_bitfield(APIC_ISR);
1518 printk(KERN_DEBUG "... APIC TMR field:\n");
1519 print_APIC_bitfield(APIC_TMR);
1520 printk(KERN_DEBUG "... APIC IRR field:\n");
1521 print_APIC_bitfield(APIC_IRR);
1522
1523 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1524 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1525 apic_write(APIC_ESR, 0);
1526 v = apic_read(APIC_ESR);
1527 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1528 }
1529
1530 v = apic_read(APIC_ICR);
1531 printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
1532 v = apic_read(APIC_ICR2);
1533 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1534
1535 v = apic_read(APIC_LVTT);
1536 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1537
1538 if (maxlvt > 3) { /* PC is LVT#4. */
1539 v = apic_read(APIC_LVTPC);
1540 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1541 }
1542 v = apic_read(APIC_LVT0);
1543 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1544 v = apic_read(APIC_LVT1);
1545 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1546
1547 if (maxlvt > 2) { /* ERR is LVT#3. */
1548 v = apic_read(APIC_LVTERR);
1549 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1550 }
1551
1552 v = apic_read(APIC_TMICT);
1553 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1554 v = apic_read(APIC_TMCCT);
1555 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1556 v = apic_read(APIC_TDCR);
1557 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1558 printk("\n");
1559}
1560
1561void print_all_local_APICs (void)
1562{
1563 on_each_cpu(print_local_APIC, NULL, 1, 1);
1564}
1565
1566void /*__init*/ print_PIC(void)
1567{
1568 extern spinlock_t i8259A_lock;
1569 unsigned int v;
1570 unsigned long flags;
1571
1572 if (apic_verbosity == APIC_QUIET)
1573 return;
1574
1575 printk(KERN_DEBUG "\nprinting PIC contents\n");
1576
1577 spin_lock_irqsave(&i8259A_lock, flags);
1578
1579 v = inb(0xa1) << 8 | inb(0x21);
1580 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
1581
1582 v = inb(0xa0) << 8 | inb(0x20);
1583 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1584
1585 outb(0x0b,0xa0);
1586 outb(0x0b,0x20);
1587 v = inb(0xa0) << 8 | inb(0x20);
1588 outb(0x0a,0xa0);
1589 outb(0x0a,0x20);
1590
1591 spin_unlock_irqrestore(&i8259A_lock, flags);
1592
1593 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1594
1595 v = inb(0x4d1) << 8 | inb(0x4d0);
1596 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1597}
1598
1599#endif /* 0 */
1600
1601static void __init enable_IO_APIC(void)
1602{
1603 union IO_APIC_reg_01 reg_01;
1604 int i;
1605 unsigned long flags;
1606
1607 for (i = 0; i < PIN_MAP_SIZE; i++) {
1608 irq_2_pin[i].pin = -1;
1609 irq_2_pin[i].next = 0;
1610 }
1611 if (!pirqs_enabled)
1612 for (i = 0; i < MAX_PIRQS; i++)
1613 pirq_entries[i] = -1;
1614
1615 /*
1616 * The number of IO-APIC IRQ registers (== #pins):
1617 */
1618 for (i = 0; i < nr_ioapics; i++) {
1619 spin_lock_irqsave(&ioapic_lock, flags);
1620 reg_01.raw = io_apic_read(i, 1);
1621 spin_unlock_irqrestore(&ioapic_lock, flags);
1622 nr_ioapic_registers[i] = reg_01.bits.entries+1;
1623 }
1624
1625 /*
1626 * Do not trust the IO-APIC being empty at bootup
1627 */
1628 clear_IO_APIC();
1629}
1630
1631/*
1632 * Not an __init, needed by the reboot code
1633 */
1634void disable_IO_APIC(void)
1635{
1636 /*
1637 * Clear the IO-APIC before rebooting:
1638 */
1639 clear_IO_APIC();
1640
1641 disconnect_bsp_APIC();
1642}
1643
1644/*
1645 * function to set the IO-APIC physical IDs based on the
1646 * values stored in the MPC table.
1647 *
1648 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1649 */
1650
1651#ifndef CONFIG_X86_NUMAQ
1652static void __init setup_ioapic_ids_from_mpc(void)
1653{
1654 union IO_APIC_reg_00 reg_00;
1655 physid_mask_t phys_id_present_map;
1656 int apic;
1657 int i;
1658 unsigned char old_id;
1659 unsigned long flags;
1660
1661 /*
1662 * This is broken; anything with a real cpu count has to
1663 * circumvent this idiocy regardless.
1664 */
1665 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1666
1667 /*
1668 * Set the IOAPIC ID to the value stored in the MPC table.
1669 */
1670 for (apic = 0; apic < nr_ioapics; apic++) {
1671
1672 /* Read the register 0 value */
1673 spin_lock_irqsave(&ioapic_lock, flags);
1674 reg_00.raw = io_apic_read(apic, 0);
1675 spin_unlock_irqrestore(&ioapic_lock, flags);
1676
1677 old_id = mp_ioapics[apic].mpc_apicid;
1678
1679 if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
1680 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1681 apic, mp_ioapics[apic].mpc_apicid);
1682 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1683 reg_00.bits.ID);
1684 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
1685 }
1686
1687 /* Don't check I/O APIC IDs for some xAPIC systems. They have
1688 * no meaning without the serial APIC bus. */
1689 if (NO_IOAPIC_CHECK)
1690 continue;
1691 /*
1692 * Sanity check, is the ID really free? Every APIC in a
1693 * system must have a unique ID or we get lots of nice
1694 * 'stuck on smp_invalidate_needed IPI wait' messages.
1695 */
1696 if (check_apicid_used(phys_id_present_map,
1697 mp_ioapics[apic].mpc_apicid)) {
1698 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1699 apic, mp_ioapics[apic].mpc_apicid);
1700 for (i = 0; i < get_physical_broadcast(); i++)
1701 if (!physid_isset(i, phys_id_present_map))
1702 break;
1703 if (i >= get_physical_broadcast())
1704 panic("Max APIC ID exceeded!\n");
1705 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1706 i);
1707 physid_set(i, phys_id_present_map);
1708 mp_ioapics[apic].mpc_apicid = i;
1709 } else {
1710 physid_mask_t tmp;
1711 tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
1712 apic_printk(APIC_VERBOSE, "Setting %d in the "
1713 "phys_id_present_map\n",
1714 mp_ioapics[apic].mpc_apicid);
1715 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1716 }
1717
1718
1719 /*
1720 * We need to adjust the IRQ routing table
1721 * if the ID changed.
1722 */
1723 if (old_id != mp_ioapics[apic].mpc_apicid)
1724 for (i = 0; i < mp_irq_entries; i++)
1725 if (mp_irqs[i].mpc_dstapic == old_id)
1726 mp_irqs[i].mpc_dstapic
1727 = mp_ioapics[apic].mpc_apicid;
1728
1729 /*
1730 * Read the right value from the MPC table and
1731 * write it into the ID register.
1732 */
1733 apic_printk(APIC_VERBOSE, KERN_INFO
1734 "...changing IO-APIC physical APIC ID to %d ...",
1735 mp_ioapics[apic].mpc_apicid);
1736
1737 reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
1738 spin_lock_irqsave(&ioapic_lock, flags);
1739 io_apic_write(apic, 0, reg_00.raw);
1740 spin_unlock_irqrestore(&ioapic_lock, flags);
1741
1742 /*
1743 * Sanity check
1744 */
1745 spin_lock_irqsave(&ioapic_lock, flags);
1746 reg_00.raw = io_apic_read(apic, 0);
1747 spin_unlock_irqrestore(&ioapic_lock, flags);
1748 if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
1749 printk("could not set ID!\n");
1750 else
1751 apic_printk(APIC_VERBOSE, " ok.\n");
1752 }
1753}
1754#else
1755static void __init setup_ioapic_ids_from_mpc(void) { }
1756#endif
1757
1758/*
1759 * There is a nasty bug in some older SMP boards, their mptable lies
1760 * about the timer IRQ. We do the following to work around the situation:
1761 *
1762 * - timer IRQ defaults to IO-APIC IRQ
1763 * - if this function detects that timer IRQs are defunct, then we fall
1764 * back to ISA timer IRQs
1765 */
1766static int __init timer_irq_works(void)
1767{
1768 unsigned long t1 = jiffies;
1769
1770 local_irq_enable();
1771 /* Let ten ticks pass... */
1772 mdelay((10 * 1000) / HZ);
1773
1774 /*
1775 * Expect a few ticks at least, to be sure some possible
1776 * glue logic does not lock up after one or two first
1777 * ticks in a non-ExtINT mode. Also the local APIC
1778 * might have cached one ExtINT interrupt. Finally, at
1779 * least one tick may be lost due to delays.
1780 */
1781 if (jiffies - t1 > 4)
1782 return 1;
1783
1784 return 0;
1785}
1786
1787/*
1788 * In the SMP+IOAPIC case it might happen that there are an unspecified
1789 * number of pending IRQ events unhandled. These cases are very rare,
1790 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1791 * better to do it this way as thus we do not have to be aware of
1792 * 'pending' interrupts in the IRQ path, except at this point.
1793 */
1794/*
1795 * Edge triggered needs to resend any interrupt
1796 * that was delayed but this is now handled in the device
1797 * independent code.
1798 */
1799
1800/*
1801 * Starting up a edge-triggered IO-APIC interrupt is
1802 * nasty - we need to make sure that we get the edge.
1803 * If it is already asserted for some reason, we need
1804 * return 1 to indicate that is was pending.
1805 *
1806 * This is not complete - we should be able to fake
1807 * an edge even if it isn't on the 8259A...
1808 */
1809static unsigned int startup_edge_ioapic_irq(unsigned int irq)
1810{
1811 int was_pending = 0;
1812 unsigned long flags;
1813
1814 spin_lock_irqsave(&ioapic_lock, flags);
1815 if (irq < 16) {
1816 disable_8259A_irq(irq);
1817 if (i8259A_irq_pending(irq))
1818 was_pending = 1;
1819 }
1820 __unmask_IO_APIC_irq(irq);
1821 spin_unlock_irqrestore(&ioapic_lock, flags);
1822
1823 return was_pending;
1824}
1825
1826/*
1827 * Once we have recorded IRQ_PENDING already, we can mask the
1828 * interrupt for real. This prevents IRQ storms from unhandled
1829 * devices.
1830 */
1831static void ack_edge_ioapic_irq(unsigned int irq)
1832{
1833 move_irq(irq);
1834 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
1835 == (IRQ_PENDING | IRQ_DISABLED))
1836 mask_IO_APIC_irq(irq);
1837 ack_APIC_irq();
1838}
1839
1840/*
1841 * Level triggered interrupts can just be masked,
1842 * and shutting down and starting up the interrupt
1843 * is the same as enabling and disabling them -- except
1844 * with a startup need to return a "was pending" value.
1845 *
1846 * Level triggered interrupts are special because we
1847 * do not touch any IO-APIC register while handling
1848 * them. We ack the APIC in the end-IRQ handler, not
1849 * in the start-IRQ-handler. Protection against reentrance
1850 * from the same interrupt is still provided, both by the
1851 * generic IRQ layer and by the fact that an unacked local
1852 * APIC does not accept IRQs.
1853 */
1854static unsigned int startup_level_ioapic_irq (unsigned int irq)
1855{
1856 unmask_IO_APIC_irq(irq);
1857
1858 return 0; /* don't check for pending */
1859}
1860
1861static void end_level_ioapic_irq (unsigned int irq)
1862{
1863 unsigned long v;
1864 int i;
1865
1866 move_irq(irq);
1867/*
1868 * It appears there is an erratum which affects at least version 0x11
1869 * of I/O APIC (that's the 82093AA and cores integrated into various
1870 * chipsets). Under certain conditions a level-triggered interrupt is
1871 * erroneously delivered as edge-triggered one but the respective IRR
1872 * bit gets set nevertheless. As a result the I/O unit expects an EOI
1873 * message but it will never arrive and further interrupts are blocked
1874 * from the source. The exact reason is so far unknown, but the
1875 * phenomenon was observed when two consecutive interrupt requests
1876 * from a given source get delivered to the same CPU and the source is
1877 * temporarily disabled in between.
1878 *
1879 * A workaround is to simulate an EOI message manually. We achieve it
1880 * by setting the trigger mode to edge and then to level when the edge
1881 * trigger mode gets detected in the TMR of a local APIC for a
1882 * level-triggered interrupt. We mask the source for the time of the
1883 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1884 * The idea is from Manfred Spraul. --macro
1885 */
1886 i = IO_APIC_VECTOR(irq);
1887
1888 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1889
1890 ack_APIC_irq();
1891
1892 if (!(v & (1 << (i & 0x1f)))) {
1893 atomic_inc(&irq_mis_count);
1894 spin_lock(&ioapic_lock);
1895 __mask_and_edge_IO_APIC_irq(irq);
1896 __unmask_and_level_IO_APIC_irq(irq);
1897 spin_unlock(&ioapic_lock);
1898 }
1899}
1900
1901#ifdef CONFIG_PCI_MSI
1902static unsigned int startup_edge_ioapic_vector(unsigned int vector)
1903{
1904 int irq = vector_to_irq(vector);
1905
1906 return startup_edge_ioapic_irq(irq);
1907}
1908
1909static void ack_edge_ioapic_vector(unsigned int vector)
1910{
1911 int irq = vector_to_irq(vector);
1912
1913 ack_edge_ioapic_irq(irq);
1914}
1915
1916static unsigned int startup_level_ioapic_vector (unsigned int vector)
1917{
1918 int irq = vector_to_irq(vector);
1919
1920 return startup_level_ioapic_irq (irq);
1921}
1922
1923static void end_level_ioapic_vector (unsigned int vector)
1924{
1925 int irq = vector_to_irq(vector);
1926
1927 end_level_ioapic_irq(irq);
1928}
1929
1930static void mask_IO_APIC_vector (unsigned int vector)
1931{
1932 int irq = vector_to_irq(vector);
1933
1934 mask_IO_APIC_irq(irq);
1935}
1936
1937static void unmask_IO_APIC_vector (unsigned int vector)
1938{
1939 int irq = vector_to_irq(vector);
1940
1941 unmask_IO_APIC_irq(irq);
1942}
1943
1944static void set_ioapic_affinity_vector (unsigned int vector,
1945 cpumask_t cpu_mask)
1946{
1947 int irq = vector_to_irq(vector);
1948
1949 set_ioapic_affinity_irq(irq, cpu_mask);
1950}
1951#endif
1952
1953/*
1954 * Level and edge triggered IO-APIC interrupts need different handling,
1955 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1956 * handled with the level-triggered descriptor, but that one has slightly
1957 * more overhead. Level-triggered interrupts cannot be handled with the
1958 * edge-triggered handler, without risking IRQ storms and other ugly
1959 * races.
1960 */
1961static struct hw_interrupt_type ioapic_edge_type = {
1962 .typename = "IO-APIC-edge",
1963 .startup = startup_edge_ioapic,
1964 .shutdown = shutdown_edge_ioapic,
1965 .enable = enable_edge_ioapic,
1966 .disable = disable_edge_ioapic,
1967 .ack = ack_edge_ioapic,
1968 .end = end_edge_ioapic,
1969 .set_affinity = set_ioapic_affinity,
1970};
1971
1972static struct hw_interrupt_type ioapic_level_type = {
1973 .typename = "IO-APIC-level",
1974 .startup = startup_level_ioapic,
1975 .shutdown = shutdown_level_ioapic,
1976 .enable = enable_level_ioapic,
1977 .disable = disable_level_ioapic,
1978 .ack = mask_and_ack_level_ioapic,
1979 .end = end_level_ioapic,
1980 .set_affinity = set_ioapic_affinity,
1981};
1982
1983static inline void init_IO_APIC_traps(void)
1984{
1985 int irq;
1986
1987 /*
1988 * NOTE! The local APIC isn't very good at handling
1989 * multiple interrupts at the same interrupt level.
1990 * As the interrupt level is determined by taking the
1991 * vector number and shifting that right by 4, we
1992 * want to spread these out a bit so that they don't
1993 * all fall in the same interrupt level.
1994 *
1995 * Also, we've got to be careful not to trash gate
1996 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1997 */
1998 for (irq = 0; irq < NR_IRQS ; irq++) {
1999 int tmp = irq;
2000 if (use_pci_vector()) {
2001 if (!platform_legacy_irq(tmp))
2002 if ((tmp = vector_to_irq(tmp)) == -1)
2003 continue;
2004 }
2005 if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
2006 /*
2007 * Hmm.. We don't have an entry for this,
2008 * so default to an old-fashioned 8259
2009 * interrupt if we can..
2010 */
2011 if (irq < 16)
2012 make_8259A_irq(irq);
2013 else
2014 /* Strange. Oh, well.. */
2015 irq_desc[irq].handler = &no_irq_type;
2016 }
2017 }
2018}
2019
2020static void enable_lapic_irq (unsigned int irq)
2021{
2022 unsigned long v;
2023
2024 v = apic_read(APIC_LVT0);
2025 apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2026}
2027
2028static void disable_lapic_irq (unsigned int irq)
2029{
2030 unsigned long v;
2031
2032 v = apic_read(APIC_LVT0);
2033 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
2034}
2035
2036static void ack_lapic_irq (unsigned int irq)
2037{
2038 ack_APIC_irq();
2039}
2040
2041static void end_lapic_irq (unsigned int i) { /* nothing */ }
2042
2043static struct hw_interrupt_type lapic_irq_type = {
2044 .typename = "local-APIC-edge",
2045 .startup = NULL, /* startup_irq() not used for IRQ0 */
2046 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
2047 .enable = enable_lapic_irq,
2048 .disable = disable_lapic_irq,
2049 .ack = ack_lapic_irq,
2050 .end = end_lapic_irq
2051};
2052
2053static void setup_nmi (void)
2054{
2055 /*
2056 * Dirty trick to enable the NMI watchdog ...
2057 * We put the 8259A master into AEOI mode and
2058 * unmask on all local APICs LVT0 as NMI.
2059 *
2060 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2061 * is from Maciej W. Rozycki - so we do not have to EOI from
2062 * the NMI handler or the timer interrupt.
2063 */
2064 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2065
2066 on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
2067
2068 apic_printk(APIC_VERBOSE, " done.\n");
2069}
2070
2071/*
2072 * This looks a bit hackish but it's about the only one way of sending
2073 * a few INTA cycles to 8259As and any associated glue logic. ICR does
2074 * not support the ExtINT mode, unfortunately. We need to send these
2075 * cycles as some i82489DX-based boards have glue logic that keeps the
2076 * 8259A interrupt line asserted until INTA. --macro
2077 */
2078static inline void unlock_ExtINT_logic(void)
2079{
2080 int pin, i;
2081 struct IO_APIC_route_entry entry0, entry1;
2082 unsigned char save_control, save_freq_select;
2083 unsigned long flags;
2084
2085 pin = find_isa_irq_pin(8, mp_INT);
2086 if (pin == -1)
2087 return;
2088
2089 spin_lock_irqsave(&ioapic_lock, flags);
2090 *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
2091 *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
2092 spin_unlock_irqrestore(&ioapic_lock, flags);
2093 clear_IO_APIC_pin(0, pin);
2094
2095 memset(&entry1, 0, sizeof(entry1));
2096
2097 entry1.dest_mode = 0; /* physical delivery */
2098 entry1.mask = 0; /* unmask IRQ now */
2099 entry1.dest.physical.physical_dest = hard_smp_processor_id();
2100 entry1.delivery_mode = dest_ExtINT;
2101 entry1.polarity = entry0.polarity;
2102 entry1.trigger = 0;
2103 entry1.vector = 0;
2104
2105 spin_lock_irqsave(&ioapic_lock, flags);
2106 io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2107 io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2108 spin_unlock_irqrestore(&ioapic_lock, flags);
2109
2110 save_control = CMOS_READ(RTC_CONTROL);
2111 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2112 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2113 RTC_FREQ_SELECT);
2114 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2115
2116 i = 100;
2117 while (i-- > 0) {
2118 mdelay(10);
2119 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2120 i -= 10;
2121 }
2122
2123 CMOS_WRITE(save_control, RTC_CONTROL);
2124 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2125 clear_IO_APIC_pin(0, pin);
2126
2127 spin_lock_irqsave(&ioapic_lock, flags);
2128 io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2129 io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2130 spin_unlock_irqrestore(&ioapic_lock, flags);
2131}
2132
2133/*
2134 * This code may look a bit paranoid, but it's supposed to cooperate with
2135 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2136 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2137 * fanatically on his truly buggy board.
2138 */
2139static inline void check_timer(void)
2140{
2141 int pin1, pin2;
2142 int vector;
2143
2144 /*
2145 * get/set the timer IRQ vector:
2146 */
2147 disable_8259A_irq(0);
2148 vector = assign_irq_vector(0);
2149 set_intr_gate(vector, interrupt[0]);
2150
2151 /*
2152 * Subtle, code in do_timer_interrupt() expects an AEOI
2153 * mode for the 8259A whenever interrupts are routed
2154 * through I/O APICs. Also IRQ0 has to be enabled in
2155 * the 8259A which implies the virtual wire has to be
2156 * disabled in the local APIC.
2157 */
2158 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2159 init_8259A(1);
2160 timer_ack = 1;
2161 enable_8259A_irq(0);
2162
2163 pin1 = find_isa_irq_pin(0, mp_INT);
2164 pin2 = find_isa_irq_pin(0, mp_ExtINT);
2165
2166 printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
2167
2168 if (pin1 != -1) {
2169 /*
2170 * Ok, does IRQ0 through the IOAPIC work?
2171 */
2172 unmask_IO_APIC_irq(0);
2173 if (timer_irq_works()) {
2174 if (nmi_watchdog == NMI_IO_APIC) {
2175 disable_8259A_irq(0);
2176 setup_nmi();
2177 enable_8259A_irq(0);
2178 check_nmi_watchdog();
2179 }
2180 return;
2181 }
2182 clear_IO_APIC_pin(0, pin1);
2183 printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
2184 }
2185
2186 printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
2187 if (pin2 != -1) {
2188 printk("\n..... (found pin %d) ...", pin2);
2189 /*
2190 * legacy devices should be connected to IO APIC #0
2191 */
2192 setup_ExtINT_IRQ0_pin(pin2, vector);
2193 if (timer_irq_works()) {
2194 printk("works.\n");
2195 if (pin1 != -1)
2196 replace_pin_at_irq(0, 0, pin1, 0, pin2);
2197 else
2198 add_pin_to_irq(0, 0, pin2);
2199 if (nmi_watchdog == NMI_IO_APIC) {
2200 setup_nmi();
2201 check_nmi_watchdog();
2202 }
2203 return;
2204 }
2205 /*
2206 * Cleanup, just in case ...
2207 */
2208 clear_IO_APIC_pin(0, pin2);
2209 }
2210 printk(" failed.\n");
2211
2212 if (nmi_watchdog == NMI_IO_APIC) {
2213 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
2214 nmi_watchdog = 0;
2215 }
2216
2217 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2218
2219 disable_8259A_irq(0);
2220 irq_desc[0].handler = &lapic_irq_type;
2221 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2222 enable_8259A_irq(0);
2223
2224 if (timer_irq_works()) {
2225 printk(" works.\n");
2226 return;
2227 }
2228 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2229 printk(" failed.\n");
2230
2231 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
2232
2233 timer_ack = 0;
2234 init_8259A(0);
2235 make_8259A_irq(0);
2236 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
2237
2238 unlock_ExtINT_logic();
2239
2240 if (timer_irq_works()) {
2241 printk(" works.\n");
2242 return;
2243 }
2244 printk(" failed :(.\n");
2245 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2246 "report. Then try booting with the 'noapic' option");
2247}
2248
2249/*
2250 *
2251 * IRQ's that are handled by the PIC in the MPS IOAPIC case.
2252 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
2253 * Linux doesn't really care, as it's not actually used
2254 * for any interrupt handling anyway.
2255 */
2256#define PIC_IRQS (1 << PIC_CASCADE_IR)
2257
2258void __init setup_IO_APIC(void)
2259{
2260 enable_IO_APIC();
2261
2262 if (acpi_ioapic)
2263 io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
2264 else
2265 io_apic_irqs = ~PIC_IRQS;
2266
2267 printk("ENABLING IO-APIC IRQs\n");
2268
2269 /*
2270 * Set up IO-APIC IRQ routing.
2271 */
2272 if (!acpi_ioapic)
2273 setup_ioapic_ids_from_mpc();
2274 sync_Arb_IDs();
2275 setup_IO_APIC_irqs();
2276 init_IO_APIC_traps();
2277 check_timer();
2278 if (!acpi_ioapic)
2279 print_IO_APIC();
2280}
2281
2282/*
2283 * Called after all the initialization is done. If we didnt find any
2284 * APIC bugs then we can allow the modify fast path
2285 */
2286
2287static int __init io_apic_bug_finalize(void)
2288{
2289 if(sis_apic_bug == -1)
2290 sis_apic_bug = 0;
2291 return 0;
2292}
2293
2294late_initcall(io_apic_bug_finalize);
2295
2296struct sysfs_ioapic_data {
2297 struct sys_device dev;
2298 struct IO_APIC_route_entry entry[0];
2299};
2300static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
2301
2302static int ioapic_suspend(struct sys_device *dev, u32 state)
2303{
2304 struct IO_APIC_route_entry *entry;
2305 struct sysfs_ioapic_data *data;
2306 unsigned long flags;
2307 int i;
2308
2309 data = container_of(dev, struct sysfs_ioapic_data, dev);
2310 entry = data->entry;
2311 spin_lock_irqsave(&ioapic_lock, flags);
2312 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2313 *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
2314 *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
2315 }
2316 spin_unlock_irqrestore(&ioapic_lock, flags);
2317
2318 return 0;
2319}
2320
2321static int ioapic_resume(struct sys_device *dev)
2322{
2323 struct IO_APIC_route_entry *entry;
2324 struct sysfs_ioapic_data *data;
2325 unsigned long flags;
2326 union IO_APIC_reg_00 reg_00;
2327 int i;
2328
2329 data = container_of(dev, struct sysfs_ioapic_data, dev);
2330 entry = data->entry;
2331
2332 spin_lock_irqsave(&ioapic_lock, flags);
2333 reg_00.raw = io_apic_read(dev->id, 0);
2334 if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
2335 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2336 io_apic_write(dev->id, 0, reg_00.raw);
2337 }
2338 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2339 io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2340 io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2341 }
2342 spin_unlock_irqrestore(&ioapic_lock, flags);
2343
2344 return 0;
2345}
2346
2347static struct sysdev_class ioapic_sysdev_class = {
2348 set_kset_name("ioapic"),
2349 .suspend = ioapic_suspend,
2350 .resume = ioapic_resume,
2351};
2352
2353static int __init ioapic_init_sysfs(void)
2354{
2355 struct sys_device * dev;
2356 int i, size, error = 0;
2357
2358 error = sysdev_class_register(&ioapic_sysdev_class);
2359 if (error)
2360 return error;
2361
2362 for (i = 0; i < nr_ioapics; i++ ) {
2363 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2364 * sizeof(struct IO_APIC_route_entry);
2365 mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
2366 if (!mp_ioapic_data[i]) {
2367 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2368 continue;
2369 }
2370 memset(mp_ioapic_data[i], 0, size);
2371 dev = &mp_ioapic_data[i]->dev;
2372 dev->id = i;
2373 dev->cls = &ioapic_sysdev_class;
2374 error = sysdev_register(dev);
2375 if (error) {
2376 kfree(mp_ioapic_data[i]);
2377 mp_ioapic_data[i] = NULL;
2378 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2379 continue;
2380 }
2381 }
2382
2383 return 0;
2384}
2385
2386device_initcall(ioapic_init_sysfs);
2387
2388/* --------------------------------------------------------------------------
2389 ACPI-based IOAPIC Configuration
2390 -------------------------------------------------------------------------- */
2391
2392#ifdef CONFIG_ACPI_BOOT
2393
2394int __init io_apic_get_unique_id (int ioapic, int apic_id)
2395{
2396 union IO_APIC_reg_00 reg_00;
2397 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2398 physid_mask_t tmp;
2399 unsigned long flags;
2400 int i = 0;
2401
2402 /*
2403 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2404 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2405 * supports up to 16 on one shared APIC bus.
2406 *
2407 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2408 * advantage of new APIC bus architecture.
2409 */
2410
2411 if (physids_empty(apic_id_map))
2412 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
2413
2414 spin_lock_irqsave(&ioapic_lock, flags);
2415 reg_00.raw = io_apic_read(ioapic, 0);
2416 spin_unlock_irqrestore(&ioapic_lock, flags);
2417
2418 if (apic_id >= get_physical_broadcast()) {
2419 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2420 "%d\n", ioapic, apic_id, reg_00.bits.ID);
2421 apic_id = reg_00.bits.ID;
2422 }
2423
2424 /*
2425 * Every APIC in a system must have a unique ID or we get lots of nice
2426 * 'stuck on smp_invalidate_needed IPI wait' messages.
2427 */
2428 if (check_apicid_used(apic_id_map, apic_id)) {
2429
2430 for (i = 0; i < get_physical_broadcast(); i++) {
2431 if (!check_apicid_used(apic_id_map, i))
2432 break;
2433 }
2434
2435 if (i == get_physical_broadcast())
2436 panic("Max apic_id exceeded!\n");
2437
2438 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2439 "trying %d\n", ioapic, apic_id, i);
2440
2441 apic_id = i;
2442 }
2443
2444 tmp = apicid_to_cpu_present(apic_id);
2445 physids_or(apic_id_map, apic_id_map, tmp);
2446
2447 if (reg_00.bits.ID != apic_id) {
2448 reg_00.bits.ID = apic_id;
2449
2450 spin_lock_irqsave(&ioapic_lock, flags);
2451 io_apic_write(ioapic, 0, reg_00.raw);
2452 reg_00.raw = io_apic_read(ioapic, 0);
2453 spin_unlock_irqrestore(&ioapic_lock, flags);
2454
2455 /* Sanity check */
2456 if (reg_00.bits.ID != apic_id)
2457 panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
2458 }
2459
2460 apic_printk(APIC_VERBOSE, KERN_INFO
2461 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2462
2463 return apic_id;
2464}
2465
2466
2467int __init io_apic_get_version (int ioapic)
2468{
2469 union IO_APIC_reg_01 reg_01;
2470 unsigned long flags;
2471
2472 spin_lock_irqsave(&ioapic_lock, flags);
2473 reg_01.raw = io_apic_read(ioapic, 1);
2474 spin_unlock_irqrestore(&ioapic_lock, flags);
2475
2476 return reg_01.bits.version;
2477}
2478
2479
2480int __init io_apic_get_redir_entries (int ioapic)
2481{
2482 union IO_APIC_reg_01 reg_01;
2483 unsigned long flags;
2484
2485 spin_lock_irqsave(&ioapic_lock, flags);
2486 reg_01.raw = io_apic_read(ioapic, 1);
2487 spin_unlock_irqrestore(&ioapic_lock, flags);
2488
2489 return reg_01.bits.entries;
2490}
2491
2492
2493int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
2494{
2495 struct IO_APIC_route_entry entry;
2496 unsigned long flags;
2497
2498 if (!IO_APIC_IRQ(irq)) {
2499 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2500 ioapic);
2501 return -EINVAL;
2502 }
2503
2504 /*
2505 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
2506 * Note that we mask (disable) IRQs now -- these get enabled when the
2507 * corresponding device driver registers for this IRQ.
2508 */
2509
2510 memset(&entry,0,sizeof(entry));
2511
2512 entry.delivery_mode = INT_DELIVERY_MODE;
2513 entry.dest_mode = INT_DEST_MODE;
2514 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2515 entry.trigger = edge_level;
2516 entry.polarity = active_high_low;
2517 entry.mask = 1;
2518
2519 /*
2520 * IRQs < 16 are already in the irq_2_pin[] map
2521 */
2522 if (irq >= 16)
2523 add_pin_to_irq(irq, ioapic, pin);
2524
2525 entry.vector = assign_irq_vector(irq);
2526
2527 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2528 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2529 mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
2530 edge_level, active_high_low);
2531
2532 ioapic_register_intr(irq, entry.vector, edge_level);
2533
2534 if (!ioapic && (irq < 16))
2535 disable_8259A_irq(irq);
2536
2537 spin_lock_irqsave(&ioapic_lock, flags);
2538 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2539 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2540 spin_unlock_irqrestore(&ioapic_lock, flags);
2541
2542 return 0;
2543}
2544
2545#endif /*CONFIG_ACPI_BOOT*/
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
new file mode 100644
index 000000000000..8b25160393c1
--- /dev/null
+++ b/arch/i386/kernel/ioport.c
@@ -0,0 +1,147 @@
1/*
2 * linux/arch/i386/kernel/ioport.c
3 *
4 * This contains the io-permission bitmap code - written by obz, with changes
5 * by Linus.
6 */
7
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/types.h>
12#include <linux/ioport.h>
13#include <linux/smp.h>
14#include <linux/smp_lock.h>
15#include <linux/stddef.h>
16#include <linux/slab.h>
17#include <linux/thread_info.h>
18
19/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
20static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
21{
22 unsigned long mask;
23 unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
24 unsigned int low_index = base & (BITS_PER_LONG-1);
25 int length = low_index + extent;
26
27 if (low_index != 0) {
28 mask = (~0UL << low_index);
29 if (length < BITS_PER_LONG)
30 mask &= ~(~0UL << length);
31 if (new_value)
32 *bitmap_base++ |= mask;
33 else
34 *bitmap_base++ &= ~mask;
35 length -= BITS_PER_LONG;
36 }
37
38 mask = (new_value ? ~0UL : 0UL);
39 while (length >= BITS_PER_LONG) {
40 *bitmap_base++ = mask;
41 length -= BITS_PER_LONG;
42 }
43
44 if (length > 0) {
45 mask = ~(~0UL << length);
46 if (new_value)
47 *bitmap_base++ |= mask;
48 else
49 *bitmap_base++ &= ~mask;
50 }
51}
52
53
54/*
55 * this changes the io permissions bitmap in the current task.
56 */
57asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
58{
59 unsigned long i, max_long, bytes, bytes_updated;
60 struct thread_struct * t = &current->thread;
61 struct tss_struct * tss;
62 unsigned long *bitmap;
63
64 if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
65 return -EINVAL;
66 if (turn_on && !capable(CAP_SYS_RAWIO))
67 return -EPERM;
68
69 /*
70 * If it's the first ioperm() call in this thread's lifetime, set the
71 * IO bitmap up. ioperm() is much less timing critical than clone(),
72 * this is why we delay this operation until now:
73 */
74 if (!t->io_bitmap_ptr) {
75 bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
76 if (!bitmap)
77 return -ENOMEM;
78
79 memset(bitmap, 0xff, IO_BITMAP_BYTES);
80 t->io_bitmap_ptr = bitmap;
81 }
82
83 /*
84 * do it in the per-thread copy and in the TSS ...
85 *
86 * Disable preemption via get_cpu() - we must not switch away
87 * because the ->io_bitmap_max value must match the bitmap
88 * contents:
89 */
90 tss = &per_cpu(init_tss, get_cpu());
91
92 set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
93
94 /*
95 * Search for a (possibly new) maximum. This is simple and stupid,
96 * to keep it obviously correct:
97 */
98 max_long = 0;
99 for (i = 0; i < IO_BITMAP_LONGS; i++)
100 if (t->io_bitmap_ptr[i] != ~0UL)
101 max_long = i;
102
103 bytes = (max_long + 1) * sizeof(long);
104 bytes_updated = max(bytes, t->io_bitmap_max);
105
106 t->io_bitmap_max = bytes;
107
108 /*
109 * Sets the lazy trigger so that the next I/O operation will
110 * reload the correct bitmap.
111 */
112 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
113
114 put_cpu();
115
116 return 0;
117}
118
119/*
120 * sys_iopl has to be used when you want to access the IO ports
121 * beyond the 0x3ff range: to get the full 65536 ports bitmapped
122 * you'd need 8kB of bitmaps/process, which is a bit excessive.
123 *
124 * Here we just change the eflags value on the stack: we allow
125 * only the super-user to do it. This depends on the stack-layout
126 * on system-call entry - see also fork() and the signal handling
127 * code.
128 */
129
130asmlinkage long sys_iopl(unsigned long unused)
131{
132 volatile struct pt_regs * regs = (struct pt_regs *) &unused;
133 unsigned int level = regs->ebx;
134 unsigned int old = (regs->eflags >> 12) & 3;
135
136 if (level > 3)
137 return -EINVAL;
138 /* Trying to gain more privileges? */
139 if (level > old) {
140 if (!capable(CAP_SYS_RAWIO))
141 return -EPERM;
142 }
143 regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
144 /* Make sure we return the long way (not sysenter) */
145 set_thread_flag(TIF_IRET);
146 return 0;
147}
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
new file mode 100644
index 000000000000..73945a3c53c4
--- /dev/null
+++ b/arch/i386/kernel/irq.c
@@ -0,0 +1,261 @@
1/*
2 * linux/arch/i386/kernel/irq.c
3 *
4 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
5 *
6 * This file contains the lowest level x86-specific interrupt
7 * entry, irq-stacks and irq statistics code. All the remaining
8 * irq logic is done by the generic kernel/irq/ code and
9 * by the x86-specific irq controller code. (e.g. i8259.c and
10 * io_apic.c.)
11 */
12
13#include <asm/uaccess.h>
14#include <linux/module.h>
15#include <linux/seq_file.h>
16#include <linux/interrupt.h>
17#include <linux/kernel_stat.h>
18
19DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
20EXPORT_PER_CPU_SYMBOL(irq_stat);
21
22#ifndef CONFIG_X86_LOCAL_APIC
23/*
24 * 'what should we do if we get a hw irq event on an illegal vector'.
25 * each architecture has to answer this themselves.
26 */
27void ack_bad_irq(unsigned int irq)
28{
29 printk("unexpected IRQ trap at vector %02x\n", irq);
30}
31#endif
32
33#ifdef CONFIG_4KSTACKS
34/*
35 * per-CPU IRQ handling contexts (thread information and stack)
36 */
37union irq_ctx {
38 struct thread_info tinfo;
39 u32 stack[THREAD_SIZE/sizeof(u32)];
40};
41
42static union irq_ctx *hardirq_ctx[NR_CPUS];
43static union irq_ctx *softirq_ctx[NR_CPUS];
44#endif
45
46/*
47 * do_IRQ handles all normal device IRQ's (the special
48 * SMP cross-CPU interrupts have their own specific
49 * handlers).
50 */
51fastcall unsigned int do_IRQ(struct pt_regs *regs)
52{
53 /* high bits used in ret_from_ code */
54 int irq = regs->orig_eax & 0xff;
55#ifdef CONFIG_4KSTACKS
56 union irq_ctx *curctx, *irqctx;
57 u32 *isp;
58#endif
59
60 irq_enter();
61#ifdef CONFIG_DEBUG_STACKOVERFLOW
62 /* Debugging check for stack overflow: is there less than 1KB free? */
63 {
64 long esp;
65
66 __asm__ __volatile__("andl %%esp,%0" :
67 "=r" (esp) : "0" (THREAD_SIZE - 1));
68 if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
69 printk("do_IRQ: stack overflow: %ld\n",
70 esp - sizeof(struct thread_info));
71 dump_stack();
72 }
73 }
74#endif
75
76#ifdef CONFIG_4KSTACKS
77
78 curctx = (union irq_ctx *) current_thread_info();
79 irqctx = hardirq_ctx[smp_processor_id()];
80
81 /*
82 * this is where we switch to the IRQ stack. However, if we are
83 * already using the IRQ stack (because we interrupted a hardirq
84 * handler) we can't do that and just have to keep using the
85 * current stack (which is the irq stack already after all)
86 */
87 if (curctx != irqctx) {
88 int arg1, arg2, ebx;
89
90 /* build the stack frame on the IRQ stack */
91 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
92 irqctx->tinfo.task = curctx->tinfo.task;
93 irqctx->tinfo.previous_esp = current_stack_pointer;
94
95 asm volatile(
96 " xchgl %%ebx,%%esp \n"
97 " call __do_IRQ \n"
98 " movl %%ebx,%%esp \n"
99 : "=a" (arg1), "=d" (arg2), "=b" (ebx)
100 : "0" (irq), "1" (regs), "2" (isp)
101 : "memory", "cc", "ecx"
102 );
103 } else
104#endif
105 __do_IRQ(irq, regs);
106
107 irq_exit();
108
109 return 1;
110}
111
112#ifdef CONFIG_4KSTACKS
113
114/*
115 * These should really be __section__(".bss.page_aligned") as well, but
116 * gcc's 3.0 and earlier don't handle that correctly.
117 */
118static char softirq_stack[NR_CPUS * THREAD_SIZE]
119 __attribute__((__aligned__(THREAD_SIZE)));
120
121static char hardirq_stack[NR_CPUS * THREAD_SIZE]
122 __attribute__((__aligned__(THREAD_SIZE)));
123
124/*
125 * allocate per-cpu stacks for hardirq and for softirq processing
126 */
127void irq_ctx_init(int cpu)
128{
129 union irq_ctx *irqctx;
130
131 if (hardirq_ctx[cpu])
132 return;
133
134 irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
135 irqctx->tinfo.task = NULL;
136 irqctx->tinfo.exec_domain = NULL;
137 irqctx->tinfo.cpu = cpu;
138 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
139 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
140
141 hardirq_ctx[cpu] = irqctx;
142
143 irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
144 irqctx->tinfo.task = NULL;
145 irqctx->tinfo.exec_domain = NULL;
146 irqctx->tinfo.cpu = cpu;
147 irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET;
148 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
149
150 softirq_ctx[cpu] = irqctx;
151
152 printk("CPU %u irqstacks, hard=%p soft=%p\n",
153 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
154}
155
156extern asmlinkage void __do_softirq(void);
157
158asmlinkage void do_softirq(void)
159{
160 unsigned long flags;
161 struct thread_info *curctx;
162 union irq_ctx *irqctx;
163 u32 *isp;
164
165 if (in_interrupt())
166 return;
167
168 local_irq_save(flags);
169
170 if (local_softirq_pending()) {
171 curctx = current_thread_info();
172 irqctx = softirq_ctx[smp_processor_id()];
173 irqctx->tinfo.task = curctx->task;
174 irqctx->tinfo.previous_esp = current_stack_pointer;
175
176 /* build the stack frame on the softirq stack */
177 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
178
179 asm volatile(
180 " xchgl %%ebx,%%esp \n"
181 " call __do_softirq \n"
182 " movl %%ebx,%%esp \n"
183 : "=b"(isp)
184 : "0"(isp)
185 : "memory", "cc", "edx", "ecx", "eax"
186 );
187 }
188
189 local_irq_restore(flags);
190}
191
192EXPORT_SYMBOL(do_softirq);
193#endif
194
195/*
196 * Interrupt statistics:
197 */
198
199atomic_t irq_err_count;
200
201/*
202 * /proc/interrupts printing:
203 */
204
205int show_interrupts(struct seq_file *p, void *v)
206{
207 int i = *(loff_t *) v, j;
208 struct irqaction * action;
209 unsigned long flags;
210
211 if (i == 0) {
212 seq_printf(p, " ");
213 for (j=0; j<NR_CPUS; j++)
214 if (cpu_online(j))
215 seq_printf(p, "CPU%d ",j);
216 seq_putc(p, '\n');
217 }
218
219 if (i < NR_IRQS) {
220 spin_lock_irqsave(&irq_desc[i].lock, flags);
221 action = irq_desc[i].action;
222 if (!action)
223 goto skip;
224 seq_printf(p, "%3d: ",i);
225#ifndef CONFIG_SMP
226 seq_printf(p, "%10u ", kstat_irqs(i));
227#else
228 for (j = 0; j < NR_CPUS; j++)
229 if (cpu_online(j))
230 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
231#endif
232 seq_printf(p, " %14s", irq_desc[i].handler->typename);
233 seq_printf(p, " %s", action->name);
234
235 for (action=action->next; action; action = action->next)
236 seq_printf(p, ", %s", action->name);
237
238 seq_putc(p, '\n');
239skip:
240 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
241 } else if (i == NR_IRQS) {
242 seq_printf(p, "NMI: ");
243 for (j = 0; j < NR_CPUS; j++)
244 if (cpu_online(j))
245 seq_printf(p, "%10u ", nmi_count(j));
246 seq_putc(p, '\n');
247#ifdef CONFIG_X86_LOCAL_APIC
248 seq_printf(p, "LOC: ");
249 for (j = 0; j < NR_CPUS; j++)
250 if (cpu_online(j))
251 seq_printf(p, "%10u ",
252 per_cpu(irq_stat,j).apic_timer_irqs);
253 seq_putc(p, '\n');
254#endif
255 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
256#if defined(CONFIG_X86_IO_APIC)
257 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
258#endif
259 }
260 return 0;
261}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
new file mode 100644
index 000000000000..671681659243
--- /dev/null
+++ b/arch/i386/kernel/kprobes.c
@@ -0,0 +1,385 @@
1/*
2 * Kernel Probes (KProbes)
3 * arch/i386/kernel/kprobes.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (C) IBM Corporation, 2002, 2004
20 *
21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22 * Probes initial implementation ( includes contributions from
23 * Rusty Russell).
24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
25 * interface to access function arguments.
26 */
27
28#include <linux/config.h>
29#include <linux/kprobes.h>
30#include <linux/ptrace.h>
31#include <linux/spinlock.h>
32#include <linux/preempt.h>
33#include <asm/kdebug.h>
34#include <asm/desc.h>
35
36/* kprobe_status settings */
37#define KPROBE_HIT_ACTIVE 0x00000001
38#define KPROBE_HIT_SS 0x00000002
39
40static struct kprobe *current_kprobe;
41static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
42static struct pt_regs jprobe_saved_regs;
43static long *jprobe_saved_esp;
44/* copy of the kernel stack at the probe fire time */
45static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
46void jprobe_return_end(void);
47
48/*
49 * returns non-zero if opcode modifies the interrupt flag.
50 */
51static inline int is_IF_modifier(kprobe_opcode_t opcode)
52{
53 switch (opcode) {
54 case 0xfa: /* cli */
55 case 0xfb: /* sti */
56 case 0xcf: /* iret/iretd */
57 case 0x9d: /* popf/popfd */
58 return 1;
59 }
60 return 0;
61}
62
63int arch_prepare_kprobe(struct kprobe *p)
64{
65 return 0;
66}
67
68void arch_copy_kprobe(struct kprobe *p)
69{
70 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
71}
72
73void arch_remove_kprobe(struct kprobe *p)
74{
75}
76
77static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs)
78{
79 *p->addr = p->opcode;
80 regs->eip = (unsigned long)p->addr;
81}
82
83static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
84{
85 regs->eflags |= TF_MASK;
86 regs->eflags &= ~IF_MASK;
87 /*single step inline if the instruction is an int3*/
88 if (p->opcode == BREAKPOINT_INSTRUCTION)
89 regs->eip = (unsigned long)p->addr;
90 else
91 regs->eip = (unsigned long)&p->ainsn.insn;
92}
93
94/*
95 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
96 * remain disabled thorough out this function.
97 */
98static int kprobe_handler(struct pt_regs *regs)
99{
100 struct kprobe *p;
101 int ret = 0;
102 kprobe_opcode_t *addr = NULL;
103 unsigned long *lp;
104
105 /* We're in an interrupt, but this is clear and BUG()-safe. */
106 preempt_disable();
107 /* Check if the application is using LDT entry for its code segment and
108 * calculate the address by reading the base address from the LDT entry.
109 */
110 if ((regs->xcs & 4) && (current->mm)) {
111 lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8)
112 + (char *) current->mm->context.ldt);
113 addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip -
114 sizeof(kprobe_opcode_t));
115 } else {
116 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
117 }
118 /* Check we're not actually recursing */
119 if (kprobe_running()) {
120 /* We *are* holding lock here, so this is safe.
121 Disarm the probe we just hit, and ignore it. */
122 p = get_kprobe(addr);
123 if (p) {
124 if (kprobe_status == KPROBE_HIT_SS) {
125 regs->eflags &= ~TF_MASK;
126 regs->eflags |= kprobe_saved_eflags;
127 unlock_kprobes();
128 goto no_kprobe;
129 }
130 disarm_kprobe(p, regs);
131 ret = 1;
132 } else {
133 p = current_kprobe;
134 if (p->break_handler && p->break_handler(p, regs)) {
135 goto ss_probe;
136 }
137 }
138 /* If it's not ours, can't be delete race, (we hold lock). */
139 goto no_kprobe;
140 }
141
142 lock_kprobes();
143 p = get_kprobe(addr);
144 if (!p) {
145 unlock_kprobes();
146 if (regs->eflags & VM_MASK) {
147 /* We are in virtual-8086 mode. Return 0 */
148 goto no_kprobe;
149 }
150
151 if (*addr != BREAKPOINT_INSTRUCTION) {
152 /*
153 * The breakpoint instruction was removed right
154 * after we hit it. Another cpu has removed
155 * either a probepoint or a debugger breakpoint
156 * at this address. In either case, no further
157 * handling of this interrupt is appropriate.
158 */
159 ret = 1;
160 }
161 /* Not one of ours: let kernel handle it */
162 goto no_kprobe;
163 }
164
165 kprobe_status = KPROBE_HIT_ACTIVE;
166 current_kprobe = p;
167 kprobe_saved_eflags = kprobe_old_eflags
168 = (regs->eflags & (TF_MASK | IF_MASK));
169 if (is_IF_modifier(p->opcode))
170 kprobe_saved_eflags &= ~IF_MASK;
171
172 if (p->pre_handler && p->pre_handler(p, regs))
173 /* handler has already set things up, so skip ss setup */
174 return 1;
175
176ss_probe:
177 prepare_singlestep(p, regs);
178 kprobe_status = KPROBE_HIT_SS;
179 return 1;
180
181no_kprobe:
182 preempt_enable_no_resched();
183 return ret;
184}
185
186/*
187 * Called after single-stepping. p->addr is the address of the
188 * instruction whose first byte has been replaced by the "int 3"
189 * instruction. To avoid the SMP problems that can occur when we
190 * temporarily put back the original opcode to single-step, we
191 * single-stepped a copy of the instruction. The address of this
192 * copy is p->ainsn.insn.
193 *
194 * This function prepares to return from the post-single-step
195 * interrupt. We have to fix up the stack as follows:
196 *
197 * 0) Except in the case of absolute or indirect jump or call instructions,
198 * the new eip is relative to the copied instruction. We need to make
199 * it relative to the original instruction.
200 *
201 * 1) If the single-stepped instruction was pushfl, then the TF and IF
202 * flags are set in the just-pushed eflags, and may need to be cleared.
203 *
204 * 2) If the single-stepped instruction was a call, the return address
205 * that is atop the stack is the address following the copied instruction.
206 * We need to make it the address following the original instruction.
207 */
208static void resume_execution(struct kprobe *p, struct pt_regs *regs)
209{
210 unsigned long *tos = (unsigned long *)&regs->esp;
211 unsigned long next_eip = 0;
212 unsigned long copy_eip = (unsigned long)&p->ainsn.insn;
213 unsigned long orig_eip = (unsigned long)p->addr;
214
215 switch (p->ainsn.insn[0]) {
216 case 0x9c: /* pushfl */
217 *tos &= ~(TF_MASK | IF_MASK);
218 *tos |= kprobe_old_eflags;
219 break;
220 case 0xe8: /* call relative - Fix return addr */
221 *tos = orig_eip + (*tos - copy_eip);
222 break;
223 case 0xff:
224 if ((p->ainsn.insn[1] & 0x30) == 0x10) {
225 /* call absolute, indirect */
226 /* Fix return addr; eip is correct. */
227 next_eip = regs->eip;
228 *tos = orig_eip + (*tos - copy_eip);
229 } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
230 ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
231 /* eip is correct. */
232 next_eip = regs->eip;
233 }
234 break;
235 case 0xea: /* jmp absolute -- eip is correct */
236 next_eip = regs->eip;
237 break;
238 default:
239 break;
240 }
241
242 regs->eflags &= ~TF_MASK;
243 if (next_eip) {
244 regs->eip = next_eip;
245 } else {
246 regs->eip = orig_eip + (regs->eip - copy_eip);
247 }
248}
249
250/*
251 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
252 * remain disabled thoroughout this function. And we hold kprobe lock.
253 */
254static inline int post_kprobe_handler(struct pt_regs *regs)
255{
256 if (!kprobe_running())
257 return 0;
258
259 if (current_kprobe->post_handler)
260 current_kprobe->post_handler(current_kprobe, regs, 0);
261
262 resume_execution(current_kprobe, regs);
263 regs->eflags |= kprobe_saved_eflags;
264
265 unlock_kprobes();
266 preempt_enable_no_resched();
267
268 /*
269 * if somebody else is singlestepping across a probe point, eflags
270 * will have TF set, in which case, continue the remaining processing
271 * of do_debug, as if this is not a probe hit.
272 */
273 if (regs->eflags & TF_MASK)
274 return 0;
275
276 return 1;
277}
278
279/* Interrupts disabled, kprobe_lock held. */
280static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
281{
282 if (current_kprobe->fault_handler
283 && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
284 return 1;
285
286 if (kprobe_status & KPROBE_HIT_SS) {
287 resume_execution(current_kprobe, regs);
288 regs->eflags |= kprobe_old_eflags;
289
290 unlock_kprobes();
291 preempt_enable_no_resched();
292 }
293 return 0;
294}
295
296/*
297 * Wrapper routine to for handling exceptions.
298 */
299int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
300 void *data)
301{
302 struct die_args *args = (struct die_args *)data;
303 switch (val) {
304 case DIE_INT3:
305 if (kprobe_handler(args->regs))
306 return NOTIFY_STOP;
307 break;
308 case DIE_DEBUG:
309 if (post_kprobe_handler(args->regs))
310 return NOTIFY_STOP;
311 break;
312 case DIE_GPF:
313 if (kprobe_running() &&
314 kprobe_fault_handler(args->regs, args->trapnr))
315 return NOTIFY_STOP;
316 break;
317 case DIE_PAGE_FAULT:
318 if (kprobe_running() &&
319 kprobe_fault_handler(args->regs, args->trapnr))
320 return NOTIFY_STOP;
321 break;
322 default:
323 break;
324 }
325 return NOTIFY_DONE;
326}
327
328int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
329{
330 struct jprobe *jp = container_of(p, struct jprobe, kp);
331 unsigned long addr;
332
333 jprobe_saved_regs = *regs;
334 jprobe_saved_esp = &regs->esp;
335 addr = (unsigned long)jprobe_saved_esp;
336
337 /*
338 * TBD: As Linus pointed out, gcc assumes that the callee
339 * owns the argument space and could overwrite it, e.g.
340 * tailcall optimization. So, to be absolutely safe
341 * we also save and restore enough stack bytes to cover
342 * the argument area.
343 */
344 memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
345 regs->eflags &= ~IF_MASK;
346 regs->eip = (unsigned long)(jp->entry);
347 return 1;
348}
349
350void jprobe_return(void)
351{
352 preempt_enable_no_resched();
353 asm volatile (" xchgl %%ebx,%%esp \n"
354 " int3 \n"
355 " .globl jprobe_return_end \n"
356 " jprobe_return_end: \n"
357 " nop \n"::"b"
358 (jprobe_saved_esp):"memory");
359}
360
361int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
362{
363 u8 *addr = (u8 *) (regs->eip - 1);
364 unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
365 struct jprobe *jp = container_of(p, struct jprobe, kp);
366
367 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
368 if (&regs->esp != jprobe_saved_esp) {
369 struct pt_regs *saved_regs =
370 container_of(jprobe_saved_esp, struct pt_regs, esp);
371 printk("current esp %p does not match saved esp %p\n",
372 &regs->esp, jprobe_saved_esp);
373 printk("Saved registers for jprobe %p\n", jp);
374 show_registers(saved_regs);
375 printk("Current registers\n");
376 show_registers(regs);
377 BUG();
378 }
379 *regs = jprobe_saved_regs;
380 memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
381 MIN_STACK_SIZE(stack_addr));
382 return 1;
383 }
384 return 0;
385}
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
new file mode 100644
index 000000000000..bb50afbee921
--- /dev/null
+++ b/arch/i386/kernel/ldt.c
@@ -0,0 +1,255 @@
1/*
2 * linux/kernel/ldt.c
3 *
4 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
5 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
6 */
7
8#include <linux/errno.h>
9#include <linux/sched.h>
10#include <linux/string.h>
11#include <linux/mm.h>
12#include <linux/smp.h>
13#include <linux/smp_lock.h>
14#include <linux/vmalloc.h>
15#include <linux/slab.h>
16
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <asm/ldt.h>
20#include <asm/desc.h>
21
22#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
23static void flush_ldt(void *null)
24{
25 if (current->active_mm)
26 load_LDT(&current->active_mm->context);
27}
28#endif
29
30static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
31{
32 void *oldldt;
33 void *newldt;
34 int oldsize;
35
36 if (mincount <= pc->size)
37 return 0;
38 oldsize = pc->size;
39 mincount = (mincount+511)&(~511);
40 if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
41 newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
42 else
43 newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
44
45 if (!newldt)
46 return -ENOMEM;
47
48 if (oldsize)
49 memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
50 oldldt = pc->ldt;
51 memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
52 pc->ldt = newldt;
53 wmb();
54 pc->size = mincount;
55 wmb();
56
57 if (reload) {
58#ifdef CONFIG_SMP
59 cpumask_t mask;
60 preempt_disable();
61 load_LDT(pc);
62 mask = cpumask_of_cpu(smp_processor_id());
63 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
64 smp_call_function(flush_ldt, NULL, 1, 1);
65 preempt_enable();
66#else
67 load_LDT(pc);
68#endif
69 }
70 if (oldsize) {
71 if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
72 vfree(oldldt);
73 else
74 kfree(oldldt);
75 }
76 return 0;
77}
78
79static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
80{
81 int err = alloc_ldt(new, old->size, 0);
82 if (err < 0)
83 return err;
84 memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
85 return 0;
86}
87
88/*
89 * we do not have to muck with descriptors here, that is
90 * done in switch_mm() as needed.
91 */
92int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
93{
94 struct mm_struct * old_mm;
95 int retval = 0;
96
97 init_MUTEX(&mm->context.sem);
98 mm->context.size = 0;
99 old_mm = current->mm;
100 if (old_mm && old_mm->context.size > 0) {
101 down(&old_mm->context.sem);
102 retval = copy_ldt(&mm->context, &old_mm->context);
103 up(&old_mm->context.sem);
104 }
105 return retval;
106}
107
108/*
109 * No need to lock the MM as we are the last user
110 */
111void destroy_context(struct mm_struct *mm)
112{
113 if (mm->context.size) {
114 if (mm == current->active_mm)
115 clear_LDT();
116 if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
117 vfree(mm->context.ldt);
118 else
119 kfree(mm->context.ldt);
120 mm->context.size = 0;
121 }
122}
123
124static int read_ldt(void __user * ptr, unsigned long bytecount)
125{
126 int err;
127 unsigned long size;
128 struct mm_struct * mm = current->mm;
129
130 if (!mm->context.size)
131 return 0;
132 if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
133 bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
134
135 down(&mm->context.sem);
136 size = mm->context.size*LDT_ENTRY_SIZE;
137 if (size > bytecount)
138 size = bytecount;
139
140 err = 0;
141 if (copy_to_user(ptr, mm->context.ldt, size))
142 err = -EFAULT;
143 up(&mm->context.sem);
144 if (err < 0)
145 goto error_return;
146 if (size != bytecount) {
147 /* zero-fill the rest */
148 if (clear_user(ptr+size, bytecount-size) != 0) {
149 err = -EFAULT;
150 goto error_return;
151 }
152 }
153 return bytecount;
154error_return:
155 return err;
156}
157
158static int read_default_ldt(void __user * ptr, unsigned long bytecount)
159{
160 int err;
161 unsigned long size;
162 void *address;
163
164 err = 0;
165 address = &default_ldt[0];
166 size = 5*sizeof(struct desc_struct);
167 if (size > bytecount)
168 size = bytecount;
169
170 err = size;
171 if (copy_to_user(ptr, address, size))
172 err = -EFAULT;
173
174 return err;
175}
176
177static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
178{
179 struct mm_struct * mm = current->mm;
180 __u32 entry_1, entry_2, *lp;
181 int error;
182 struct user_desc ldt_info;
183
184 error = -EINVAL;
185 if (bytecount != sizeof(ldt_info))
186 goto out;
187 error = -EFAULT;
188 if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
189 goto out;
190
191 error = -EINVAL;
192 if (ldt_info.entry_number >= LDT_ENTRIES)
193 goto out;
194 if (ldt_info.contents == 3) {
195 if (oldmode)
196 goto out;
197 if (ldt_info.seg_not_present == 0)
198 goto out;
199 }
200
201 down(&mm->context.sem);
202 if (ldt_info.entry_number >= mm->context.size) {
203 error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
204 if (error < 0)
205 goto out_unlock;
206 }
207
208 lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
209
210 /* Allow LDTs to be cleared by the user. */
211 if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
212 if (oldmode || LDT_empty(&ldt_info)) {
213 entry_1 = 0;
214 entry_2 = 0;
215 goto install;
216 }
217 }
218
219 entry_1 = LDT_entry_a(&ldt_info);
220 entry_2 = LDT_entry_b(&ldt_info);
221 if (oldmode)
222 entry_2 &= ~(1 << 20);
223
224 /* Install the new entry ... */
225install:
226 *lp = entry_1;
227 *(lp+1) = entry_2;
228 error = 0;
229
230out_unlock:
231 up(&mm->context.sem);
232out:
233 return error;
234}
235
236asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
237{
238 int ret = -ENOSYS;
239
240 switch (func) {
241 case 0:
242 ret = read_ldt(ptr, bytecount);
243 break;
244 case 1:
245 ret = write_ldt(ptr, bytecount, 1);
246 break;
247 case 2:
248 ret = read_default_ldt(ptr, bytecount);
249 break;
250 case 0x11:
251 ret = write_ldt(ptr, bytecount, 0);
252 break;
253 }
254 return ret;
255}
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
new file mode 100644
index 000000000000..8600faeea29d
--- /dev/null
+++ b/arch/i386/kernel/mca.c
@@ -0,0 +1,474 @@
1/*
2 * linux/arch/i386/kernel/mca.c
3 * Written by Martin Kolinek, February 1996
4 *
5 * Changes:
6 *
7 * Chris Beauregard July 28th, 1996
8 * - Fixed up integrated SCSI detection
9 *
10 * Chris Beauregard August 3rd, 1996
11 * - Made mca_info local
12 * - Made integrated registers accessible through standard function calls
13 * - Added name field
14 * - More sanity checking
15 *
16 * Chris Beauregard August 9th, 1996
17 * - Rewrote /proc/mca
18 *
19 * Chris Beauregard January 7th, 1997
20 * - Added basic NMI-processing
21 * - Added more information to mca_info structure
22 *
23 * David Weinehall October 12th, 1998
24 * - Made a lot of cleaning up in the source
25 * - Added use of save_flags / restore_flags
26 * - Added the 'driver_loaded' flag in MCA_adapter
27 * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter
28 *
29 * David Weinehall March 24th, 1999
30 * - Fixed the output of 'Driver Installed' in /proc/mca/pos
31 * - Made the Integrated Video & SCSI show up even if they have id 0000
32 *
33 * Alexander Viro November 9th, 1999
34 * - Switched to regular procfs methods
35 *
36 * Alfred Arnold & David Weinehall August 23rd, 2000
37 * - Added support for Planar POS-registers
38 */
39
40#include <linux/module.h>
41#include <linux/types.h>
42#include <linux/errno.h>
43#include <linux/kernel.h>
44#include <linux/mca.h>
45#include <asm/system.h>
46#include <asm/io.h>
47#include <linux/proc_fs.h>
48#include <linux/mman.h>
49#include <linux/config.h>
50#include <linux/mm.h>
51#include <linux/pagemap.h>
52#include <linux/ioport.h>
53#include <asm/uaccess.h>
54#include <linux/init.h>
55#include <asm/arch_hooks.h>
56
57static unsigned char which_scsi = 0;
58
59int MCA_bus = 0;
60EXPORT_SYMBOL(MCA_bus);
61
62/*
63 * Motherboard register spinlock. Untested on SMP at the moment, but
64 * are there any MCA SMP boxes?
65 *
66 * Yes - Alan
67 */
68static DEFINE_SPINLOCK(mca_lock);
69
70/* Build the status info for the adapter */
71
72static void mca_configure_adapter_status(struct mca_device *mca_dev) {
73 mca_dev->status = MCA_ADAPTER_NONE;
74
75 mca_dev->pos_id = mca_dev->pos[0]
76 + (mca_dev->pos[1] << 8);
77
78 if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
79
80 /* id = 0x0000 usually indicates hardware failure,
81 * however, ZP Gu (zpg@castle.net> reports that his 9556
82 * has 0x0000 as id and everything still works. There
83 * also seem to be an adapter with id = 0x0000; the
84 * NCR Parallel Bus Memory Card. Until this is confirmed,
85 * however, this code will stay.
86 */
87
88 mca_dev->status = MCA_ADAPTER_ERROR;
89
90 return;
91 } else if(mca_dev->pos_id != 0xffff) {
92
93 /* 0xffff usually indicates that there's no adapter,
94 * however, some integrated adapters may have 0xffff as
95 * their id and still be valid. Examples are on-board
96 * VGA of the 55sx, the integrated SCSI of the 56 & 57,
97 * and possibly also the 95 ULTIMEDIA.
98 */
99
100 mca_dev->status = MCA_ADAPTER_NORMAL;
101 }
102
103 if((mca_dev->pos_id == 0xffff ||
104 mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
105 int j;
106
107 for(j = 2; j < 8; j++) {
108 if(mca_dev->pos[j] != 0xff) {
109 mca_dev->status = MCA_ADAPTER_NORMAL;
110 break;
111 }
112 }
113 }
114
115 if(!(mca_dev->pos[2] & MCA_ENABLED)) {
116
117 /* enabled bit is in POS 2 */
118
119 mca_dev->status = MCA_ADAPTER_DISABLED;
120 }
121} /* mca_configure_adapter_status */
122
123/*--------------------------------------------------------------------*/
124
125static struct resource mca_standard_resources[] = {
126 { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" },
127 { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" },
128 { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" },
129 { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" },
130 { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" },
131 { .start = 0x96, .end = 0x97, .name = "POS (MCA)" },
132 { .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
133};
134
135#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource))
136
137/**
138 * mca_read_and_store_pos - read the POS registers into a memory buffer
139 * @pos: a char pointer to 8 bytes, contains the POS register value on
140 * successful return
141 *
142 * Returns 1 if a card actually exists (i.e. the pos isn't
143 * all 0xff) or 0 otherwise
144 */
145static int mca_read_and_store_pos(unsigned char *pos) {
146 int j;
147 int found = 0;
148
149 for(j=0; j<8; j++) {
150 if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) {
151 /* 0xff all across means no device. 0x00 means
152 * something's broken, but a device is
153 * probably there. However, if you get 0x00
154 * from a motherboard register it won't matter
155 * what we find. For the record, on the
156 * 57SLC, the integrated SCSI adapter has
157 * 0xffff for the adapter ID, but nonzero for
158 * other registers. */
159
160 found = 1;
161 }
162 }
163 return found;
164}
165
166static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
167{
168 unsigned char byte;
169 unsigned long flags;
170
171 if(reg < 0 || reg >= 8)
172 return 0;
173
174 spin_lock_irqsave(&mca_lock, flags);
175 if(mca_dev->pos_register) {
176 /* Disable adapter setup, enable motherboard setup */
177
178 outb_p(0, MCA_ADAPTER_SETUP_REG);
179 outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
180
181 byte = inb_p(MCA_POS_REG(reg));
182 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
183 } else {
184
185 /* Make sure motherboard setup is off */
186
187 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
188
189 /* Read the appropriate register */
190
191 outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG);
192 byte = inb_p(MCA_POS_REG(reg));
193 outb_p(0, MCA_ADAPTER_SETUP_REG);
194 }
195 spin_unlock_irqrestore(&mca_lock, flags);
196
197 mca_dev->pos[reg] = byte;
198
199 return byte;
200}
201
202static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
203 unsigned char byte)
204{
205 unsigned long flags;
206
207 if(reg < 0 || reg >= 8)
208 return;
209
210 spin_lock_irqsave(&mca_lock, flags);
211
212 /* Make sure motherboard setup is off */
213
214 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
215
216 /* Read in the appropriate register */
217
218 outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG);
219 outb_p(byte, MCA_POS_REG(reg));
220 outb_p(0, MCA_ADAPTER_SETUP_REG);
221
222 spin_unlock_irqrestore(&mca_lock, flags);
223
224 /* Update the global register list, while we have the byte */
225
226 mca_dev->pos[reg] = byte;
227
228}
229
230/* for the primary MCA bus, we have identity transforms */
231static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq)
232{
233 return irq;
234}
235
236static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port)
237{
238 return port;
239}
240
241static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem)
242{
243 return mem;
244}
245
246
247static int __init mca_init(void)
248{
249 unsigned int i, j;
250 struct mca_device *mca_dev;
251 unsigned char pos[8];
252 short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
253 struct mca_bus *bus;
254
255 /* WARNING: Be careful when making changes here. Putting an adapter
256 * and the motherboard simultaneously into setup mode may result in
257 * damage to chips (according to The Indispensible PC Hardware Book
258 * by Hans-Peter Messmer). Also, we disable system interrupts (so
259 * that we are not disturbed in the middle of this).
260 */
261
262 /* Make sure the MCA bus is present */
263
264 if (mca_system_init()) {
265 printk(KERN_ERR "MCA bus system initialisation failed\n");
266 return -ENODEV;
267 }
268
269 if (!MCA_bus)
270 return -ENODEV;
271
272 printk(KERN_INFO "Micro Channel bus detected.\n");
273
274 /* All MCA systems have at least a primary bus */
275 bus = mca_attach_bus(MCA_PRIMARY_BUS);
276 if (!bus)
277 goto out_nomem;
278 bus->default_dma_mask = 0xffffffffLL;
279 bus->f.mca_write_pos = mca_pc_write_pos;
280 bus->f.mca_read_pos = mca_pc_read_pos;
281 bus->f.mca_transform_irq = mca_dummy_transform_irq;
282 bus->f.mca_transform_ioport = mca_dummy_transform_ioport;
283 bus->f.mca_transform_memory = mca_dummy_transform_memory;
284
285 /* get the motherboard device */
286 mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL);
287 if(unlikely(!mca_dev))
288 goto out_nomem;
289 memset(mca_dev, 0, sizeof(struct mca_device));
290
291 /*
292 * We do not expect many MCA interrupts during initialization,
293 * but let us be safe:
294 */
295 spin_lock_irq(&mca_lock);
296
297 /* Make sure adapter setup is off */
298
299 outb_p(0, MCA_ADAPTER_SETUP_REG);
300
301 /* Read motherboard POS registers */
302
303 mca_dev->pos_register = 0x7f;
304 outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
305 mca_dev->name[0] = 0;
306 mca_read_and_store_pos(mca_dev->pos);
307 mca_configure_adapter_status(mca_dev);
308 /* fake POS and slot for a motherboard */
309 mca_dev->pos_id = MCA_MOTHERBOARD_POS;
310 mca_dev->slot = MCA_MOTHERBOARD;
311 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
312
313 mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
314 if(unlikely(!mca_dev))
315 goto out_unlock_nomem;
316 memset(mca_dev, 0, sizeof(struct mca_device));
317
318
319 /* Put motherboard into video setup mode, read integrated video
320 * POS registers, and turn motherboard setup off.
321 */
322
323 mca_dev->pos_register = 0xdf;
324 outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
325 mca_dev->name[0] = 0;
326 mca_read_and_store_pos(mca_dev->pos);
327 mca_configure_adapter_status(mca_dev);
328 /* fake POS and slot for the integrated video */
329 mca_dev->pos_id = MCA_INTEGVIDEO_POS;
330 mca_dev->slot = MCA_INTEGVIDEO;
331 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
332
333 /* Put motherboard into scsi setup mode, read integrated scsi
334 * POS registers, and turn motherboard setup off.
335 *
336 * It seems there are two possible SCSI registers. Martin says that
337 * for the 56,57, 0xf7 is the one, but fails on the 76.
338 * Alfredo (apena@vnet.ibm.com) says
339 * 0xfd works on his machine. We'll try both of them. I figure it's
340 * a good bet that only one could be valid at a time. This could
341 * screw up though if one is used for something else on the other
342 * machine.
343 */
344
345 for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
346 outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
347 if(mca_read_and_store_pos(pos))
348 break;
349 }
350 if(which_scsi) {
351 /* found a scsi card */
352 mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
353 if(unlikely(!mca_dev))
354 goto out_unlock_nomem;
355 memset(mca_dev, 0, sizeof(struct mca_device));
356
357 for(j = 0; j < 8; j++)
358 mca_dev->pos[j] = pos[j];
359
360 mca_configure_adapter_status(mca_dev);
361 /* fake POS and slot for integrated SCSI controller */
362 mca_dev->pos_id = MCA_INTEGSCSI_POS;
363 mca_dev->slot = MCA_INTEGSCSI;
364 mca_dev->pos_register = which_scsi;
365 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
366 }
367
368 /* Turn off motherboard setup */
369
370 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
371
372 /* Now loop over MCA slots: put each adapter into setup mode, and
373 * read its POS registers. Then put adapter setup off.
374 */
375
376 for(i=0; i<MCA_MAX_SLOT_NR; i++) {
377 outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
378 if(!mca_read_and_store_pos(pos))
379 continue;
380
381 mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
382 if(unlikely(!mca_dev))
383 goto out_unlock_nomem;
384 memset(mca_dev, 0, sizeof(struct mca_device));
385
386 for(j=0; j<8; j++)
387 mca_dev->pos[j]=pos[j];
388
389 mca_dev->driver_loaded = 0;
390 mca_dev->slot = i;
391 mca_dev->pos_register = 0;
392 mca_configure_adapter_status(mca_dev);
393 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
394 }
395 outb_p(0, MCA_ADAPTER_SETUP_REG);
396
397 /* Enable interrupts and return memory start */
398 spin_unlock_irq(&mca_lock);
399
400 for (i = 0; i < MCA_STANDARD_RESOURCES; i++)
401 request_resource(&ioport_resource, mca_standard_resources + i);
402
403 mca_do_proc_init();
404
405 return 0;
406
407 out_unlock_nomem:
408 spin_unlock_irq(&mca_lock);
409 out_nomem:
410 printk(KERN_EMERG "Failed memory allocation in MCA setup!\n");
411 return -ENOMEM;
412}
413
414subsys_initcall(mca_init);
415
416/*--------------------------------------------------------------------*/
417
418static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
419{
420 int slot = mca_dev->slot;
421
422 if(slot == MCA_INTEGSCSI) {
423 printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
424 mca_dev->name);
425 } else if(slot == MCA_INTEGVIDEO) {
426 printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
427 mca_dev->name);
428 } else if(slot == MCA_MOTHERBOARD) {
429 printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
430 mca_dev->name);
431 }
432
433 /* More info available in POS 6 and 7? */
434
435 if(check_flag) {
436 unsigned char pos6, pos7;
437
438 pos6 = mca_device_read_pos(mca_dev, 6);
439 pos7 = mca_device_read_pos(mca_dev, 7);
440
441 printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7);
442 }
443
444} /* mca_handle_nmi_slot */
445
446/*--------------------------------------------------------------------*/
447
448static int mca_handle_nmi_callback(struct device *dev, void *data)
449{
450 struct mca_device *mca_dev = to_mca_device(dev);
451 unsigned char pos5;
452
453 pos5 = mca_device_read_pos(mca_dev, 5);
454
455 if(!(pos5 & 0x80)) {
456 /* Bit 7 of POS 5 is reset when this adapter has a hardware
457 * error. Bit 7 it reset if there's error information
458 * available in POS 6 and 7.
459 */
460 mca_handle_nmi_device(mca_dev, !(pos5 & 0x40));
461 return 1;
462 }
463 return 0;
464}
465
466void mca_handle_nmi(void)
467{
468 /* First try - scan the various adapters and see if a specific
469 * adapter was responsible for the error.
470 */
471 bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
472
473 mca_nmi_hook();
474} /* mca_handle_nmi */
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
new file mode 100644
index 000000000000..a77c612aad00
--- /dev/null
+++ b/arch/i386/kernel/microcode.c
@@ -0,0 +1,512 @@
1/*
2 * Intel CPU Microcode Update Driver for Linux
3 *
4 * Copyright (C) 2000-2004 Tigran Aivazian
5 *
6 * This driver allows to upgrade microcode on Intel processors
7 * belonging to IA-32 family - PentiumPro, Pentium II,
8 * Pentium III, Xeon, Pentium 4, etc.
9 *
10 * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual,
11 * Order Number 245472 or free download from:
12 *
13 * http://developer.intel.com/design/pentium4/manuals/245472.htm
14 *
15 * For more information, go to http://www.urbanmyth.org/microcode
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 *
22 * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
23 * Initial release.
24 * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
25 * Added read() support + cleanups.
26 * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
27 * Added 'device trimming' support. open(O_WRONLY) zeroes
28 * and frees the saved copy of applied microcode.
29 * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
30 * Made to use devfs (/dev/cpu/microcode) + cleanups.
31 * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
32 * Added misc device support (now uses both devfs and misc).
33 * Added MICROCODE_IOCFREE ioctl to clear memory.
34 * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
35 * Messages for error cases (non Intel & no suitable microcode).
36 * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
37 * Removed ->release(). Removed exclusive open and status bitmap.
38 * Added microcode_rwsem to serialize read()/write()/ioctl().
39 * Removed global kernel lock usage.
40 * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
41 * Write 0 to 0x8B msr and then cpuid before reading revision,
42 * so that it works even if there were no update done by the
43 * BIOS. Otherwise, reading from 0x8B gives junk (which happened
44 * to be 0 on my machine which is why it worked even when I
45 * disabled update by the BIOS)
46 * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
47 * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
48 * Tigran Aivazian <tigran@veritas.com>
49 * Intel Pentium 4 processor support and bugfixes.
50 * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
51 * Bugfix for HT (Hyper-Threading) enabled processors
52 * whereby processor resources are shared by all logical processors
53 * in a single CPU package.
54 * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
55 * Tigran Aivazian <tigran@veritas.com>,
56 * Serialize updates as required on HT processors due to speculative
57 * nature of implementation.
58 * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
59 * Fix the panic when writing zero-length microcode chunk.
60 * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
61 * Jun Nakajima <jun.nakajima@intel.com>
62 * Support for the microcode updates in the new format.
63 * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
64 * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
65 * because we no longer hold a copy of applied microcode
66 * in kernel memory.
67 * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
68 * Fix sigmatch() macro to handle old CPUs with pf == 0.
69 * Thanks to Stuart Swales for pointing out this bug.
70 */
71
72//#define DEBUG /* pr_debug */
73#include <linux/kernel.h>
74#include <linux/init.h>
75#include <linux/sched.h>
76#include <linux/module.h>
77#include <linux/slab.h>
78#include <linux/vmalloc.h>
79#include <linux/miscdevice.h>
80#include <linux/spinlock.h>
81#include <linux/mm.h>
82
83#include <asm/msr.h>
84#include <asm/uaccess.h>
85#include <asm/processor.h>
86
87MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
88MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
89MODULE_LICENSE("GPL");
90
91#define MICROCODE_VERSION "1.14"
92
93#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
94#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
95#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
96#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */
97#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */
98#define DWSIZE (sizeof (u32))
99#define get_totalsize(mc) \
100 (((microcode_t *)mc)->hdr.totalsize ? \
101 ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
102#define get_datasize(mc) \
103 (((microcode_t *)mc)->hdr.datasize ? \
104 ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
105
106#define sigmatch(s1, s2, p1, p2) \
107 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
108
109#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
110
111/* serialize access to the physical write to MSR 0x79 */
112static DEFINE_SPINLOCK(microcode_update_lock);
113
114/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
115static DECLARE_MUTEX(microcode_sem);
116
117static void __user *user_buffer; /* user area microcode data buffer */
118static unsigned int user_buffer_size; /* it's size */
119
120typedef enum mc_error_code {
121 MC_SUCCESS = 0,
122 MC_NOTFOUND = 1,
123 MC_MARKED = 2,
124 MC_ALLOCATED = 3,
125} mc_error_code_t;
126
127static struct ucode_cpu_info {
128 unsigned int sig;
129 unsigned int pf;
130 unsigned int rev;
131 unsigned int cksum;
132 mc_error_code_t err;
133 microcode_t *mc;
134} ucode_cpu_info[NR_CPUS];
135
136static int microcode_open (struct inode *unused1, struct file *unused2)
137{
138 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
139}
140
141static void collect_cpu_info (void *unused)
142{
143 int cpu_num = smp_processor_id();
144 struct cpuinfo_x86 *c = cpu_data + cpu_num;
145 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
146 unsigned int val[2];
147
148 uci->sig = uci->pf = uci->rev = uci->cksum = 0;
149 uci->err = MC_NOTFOUND;
150 uci->mc = NULL;
151
152 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
153 cpu_has(c, X86_FEATURE_IA64)) {
154 printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num);
155 return;
156 } else {
157 uci->sig = cpuid_eax(0x00000001);
158
159 if ((c->x86_model >= 5) || (c->x86 > 6)) {
160 /* get processor flags from MSR 0x17 */
161 rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
162 uci->pf = 1 << ((val[1] >> 18) & 7);
163 }
164 }
165
166 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
167 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
168 /* get the current revision from MSR 0x8B */
169 rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
170 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
171 uci->sig, uci->pf, uci->rev);
172}
173
174static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum)
175{
176 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
177
178 pr_debug("Microcode Found.\n");
179 pr_debug(" Header Revision 0x%x\n", mc_header->hdrver);
180 pr_debug(" Loader Revision 0x%x\n", mc_header->ldrver);
181 pr_debug(" Revision 0x%x \n", mc_header->rev);
182 pr_debug(" Date %x/%x/%x\n",
183 ((mc_header->date >> 24 ) & 0xff),
184 ((mc_header->date >> 16 ) & 0xff),
185 (mc_header->date & 0xFFFF));
186 pr_debug(" Signature 0x%x\n", sig);
187 pr_debug(" Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n",
188 ((sig >> 12) & 0x3),
189 ((sig >> 8) & 0xf),
190 ((sig >> 4) & 0xf),
191 ((sig & 0xf)));
192 pr_debug(" Processor Flags 0x%x\n", pf);
193 pr_debug(" Checksum 0x%x\n", cksum);
194
195 if (mc_header->rev < uci->rev) {
196 printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision"
197 " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
198 goto out;
199 } else if (mc_header->rev == uci->rev) {
200 /* notify the caller of success on this cpu */
201 uci->err = MC_SUCCESS;
202 printk(KERN_ERR "microcode: CPU%d already at revision"
203 " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
204 goto out;
205 }
206
207 pr_debug("microcode: CPU%d found a matching microcode update with "
208 " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
209 uci->cksum = cksum;
210 uci->pf = pf; /* keep the original mc pf for cksum calculation */
211 uci->err = MC_MARKED; /* found the match */
212out:
213 return;
214}
215
216static int find_matching_ucodes (void)
217{
218 int cursor = 0;
219 int error = 0;
220
221 while (cursor + MC_HEADER_SIZE < user_buffer_size) {
222 microcode_header_t mc_header;
223 void *newmc = NULL;
224 int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size;
225
226 if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) {
227 printk(KERN_ERR "microcode: error! Can not read user data\n");
228 error = -EFAULT;
229 goto out;
230 }
231
232 total_size = get_totalsize(&mc_header);
233 if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) {
234 printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
235 error = -EINVAL;
236 goto out;
237 }
238
239 data_size = get_datasize(&mc_header);
240 if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) {
241 printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
242 error = -EINVAL;
243 goto out;
244 }
245
246 if (mc_header.ldrver != 1 || mc_header.hdrver != 1) {
247 printk(KERN_ERR "microcode: error! Unknown microcode update format\n");
248 error = -EINVAL;
249 goto out;
250 }
251
252 for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
253 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
254 if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
255 continue;
256
257 if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf))
258 mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum);
259 }
260
261 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
262 if (ext_table_size) {
263 struct extended_sigtable ext_header;
264 struct extended_signature ext_sig;
265 int ext_sigcount;
266
267 if ((ext_table_size < EXT_HEADER_SIZE)
268 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
269 printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
270 error = -EINVAL;
271 goto out;
272 }
273 if (copy_from_user(&ext_header, user_buffer + cursor
274 + MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) {
275 printk(KERN_ERR "microcode: error! Can not read user data\n");
276 error = -EFAULT;
277 goto out;
278 }
279 if (ext_table_size != exttable_size(&ext_header)) {
280 printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
281 error = -EFAULT;
282 goto out;
283 }
284
285 ext_sigcount = ext_header.count;
286
287 for (i = 0; i < ext_sigcount; i++) {
288 if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE
289 + EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) {
290 printk(KERN_ERR "microcode: error! Can not read user data\n");
291 error = -EFAULT;
292 goto out;
293 }
294 for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
295 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
296 if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
297 continue;
298 if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) {
299 mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
300 }
301 }
302 }
303 }
304 /* now check if any cpu has matched */
305 for (cpu_num = 0, allocated_flag = 0, sum = 0; cpu_num < num_online_cpus(); cpu_num++) {
306 if (ucode_cpu_info[cpu_num].err == MC_MARKED) {
307 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
308 if (!allocated_flag) {
309 allocated_flag = 1;
310 newmc = vmalloc(total_size);
311 if (!newmc) {
312 printk(KERN_ERR "microcode: error! Can not allocate memory\n");
313 error = -ENOMEM;
314 goto out;
315 }
316 if (copy_from_user(newmc + MC_HEADER_SIZE,
317 user_buffer + cursor + MC_HEADER_SIZE,
318 total_size - MC_HEADER_SIZE)) {
319 printk(KERN_ERR "microcode: error! Can not read user data\n");
320 vfree(newmc);
321 error = -EFAULT;
322 goto out;
323 }
324 memcpy(newmc, &mc_header, MC_HEADER_SIZE);
325 /* check extended table checksum */
326 if (ext_table_size) {
327 int ext_table_sum = 0;
328 int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size);
329 i = ext_table_size / DWSIZE;
330 while (i--) ext_table_sum += ext_tablep[i];
331 if (ext_table_sum) {
332 printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n");
333 vfree(newmc);
334 error = -EINVAL;
335 goto out;
336 }
337 }
338
339 /* calculate the checksum */
340 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
341 while (i--) sum += ((int *)newmc)[i];
342 sum -= (mc_header.sig + mc_header.pf + mc_header.cksum);
343 }
344 ucode_cpu_info[cpu_num].mc = newmc;
345 ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */
346 if (sum + uci->sig + uci->pf + uci->cksum != 0) {
347 printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num);
348 error = -EINVAL;
349 goto out;
350 }
351 }
352 }
353 cursor += total_size; /* goto the next update patch */
354 } /* end of while */
355out:
356 return error;
357}
358
359static void do_update_one (void * unused)
360{
361 unsigned long flags;
362 unsigned int val[2];
363 int cpu_num = smp_processor_id();
364 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
365
366 if (uci->mc == NULL) {
367 printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num);
368 return;
369 }
370
371 /* serialize access to the physical write to MSR 0x79 */
372 spin_lock_irqsave(&microcode_update_lock, flags);
373
374 /* write microcode via MSR 0x79 */
375 wrmsr(MSR_IA32_UCODE_WRITE,
376 (unsigned long) uci->mc->bits,
377 (unsigned long) uci->mc->bits >> 16 >> 16);
378 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
379
380 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
381 /* get the current revision from MSR 0x8B */
382 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
383
384 /* notify the caller of success on this cpu */
385 uci->err = MC_SUCCESS;
386 spin_unlock_irqrestore(&microcode_update_lock, flags);
387 printk(KERN_INFO "microcode: CPU%d updated from revision "
388 "0x%x to 0x%x, date = %08x \n",
389 cpu_num, uci->rev, val[1], uci->mc->hdr.date);
390 return;
391}
392
393static int do_microcode_update (void)
394{
395 int i, error;
396
397 if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) {
398 printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
399 error = -EIO;
400 goto out;
401 }
402
403 if ((error = find_matching_ucodes())) {
404 printk(KERN_ERR "microcode: Error in the microcode data\n");
405 goto out_free;
406 }
407
408 if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) {
409 printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
410 error = -EIO;
411 }
412
413out_free:
414 for (i = 0; i < num_online_cpus(); i++) {
415 if (ucode_cpu_info[i].mc) {
416 int j;
417 void *tmp = ucode_cpu_info[i].mc;
418 vfree(tmp);
419 for (j = i; j < num_online_cpus(); j++) {
420 if (ucode_cpu_info[j].mc == tmp)
421 ucode_cpu_info[j].mc = NULL;
422 }
423 }
424 }
425out:
426 return error;
427}
428
429static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
430{
431 ssize_t ret;
432
433 if (len < DEFAULT_UCODE_TOTALSIZE) {
434 printk(KERN_ERR "microcode: not enough data\n");
435 return -EINVAL;
436 }
437
438 if ((len >> PAGE_SHIFT) > num_physpages) {
439 printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
440 return -EINVAL;
441 }
442
443 down(&microcode_sem);
444
445 user_buffer = (void __user *) buf;
446 user_buffer_size = (int) len;
447
448 ret = do_microcode_update();
449 if (!ret)
450 ret = (ssize_t)len;
451
452 up(&microcode_sem);
453
454 return ret;
455}
456
457static int microcode_ioctl (struct inode *inode, struct file *file,
458 unsigned int cmd, unsigned long arg)
459{
460 switch (cmd) {
461 /*
462 * XXX: will be removed after microcode_ctl
463 * is updated to ignore failure of this ioctl()
464 */
465 case MICROCODE_IOCFREE:
466 return 0;
467 default:
468 return -EINVAL;
469 }
470 return -EINVAL;
471}
472
473static struct file_operations microcode_fops = {
474 .owner = THIS_MODULE,
475 .write = microcode_write,
476 .ioctl = microcode_ioctl,
477 .open = microcode_open,
478};
479
480static struct miscdevice microcode_dev = {
481 .minor = MICROCODE_MINOR,
482 .name = "microcode",
483 .devfs_name = "cpu/microcode",
484 .fops = &microcode_fops,
485};
486
487static int __init microcode_init (void)
488{
489 int error;
490
491 error = misc_register(&microcode_dev);
492 if (error) {
493 printk(KERN_ERR
494 "microcode: can't misc_register on minor=%d\n",
495 MICROCODE_MINOR);
496 return error;
497 }
498
499 printk(KERN_INFO
500 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
501 return 0;
502}
503
504static void __exit microcode_exit (void)
505{
506 misc_deregister(&microcode_dev);
507 printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n");
508}
509
510module_init(microcode_init)
511module_exit(microcode_exit)
512MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c
new file mode 100644
index 000000000000..5149c8a621f0
--- /dev/null
+++ b/arch/i386/kernel/module.c
@@ -0,0 +1,129 @@
1/* Kernel module help for i386.
2 Copyright (C) 2001 Rusty Russell.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18#include <linux/moduleloader.h>
19#include <linux/elf.h>
20#include <linux/vmalloc.h>
21#include <linux/fs.h>
22#include <linux/string.h>
23#include <linux/kernel.h>
24
25#if 0
26#define DEBUGP printk
27#else
28#define DEBUGP(fmt...)
29#endif
30
31void *module_alloc(unsigned long size)
32{
33 if (size == 0)
34 return NULL;
35 return vmalloc_exec(size);
36}
37
38
39/* Free memory returned from module_alloc */
40void module_free(struct module *mod, void *module_region)
41{
42 vfree(module_region);
43 /* FIXME: If module_region == mod->init_region, trim exception
44 table entries. */
45}
46
47/* We don't need anything special. */
48int module_frob_arch_sections(Elf_Ehdr *hdr,
49 Elf_Shdr *sechdrs,
50 char *secstrings,
51 struct module *mod)
52{
53 return 0;
54}
55
56int apply_relocate(Elf32_Shdr *sechdrs,
57 const char *strtab,
58 unsigned int symindex,
59 unsigned int relsec,
60 struct module *me)
61{
62 unsigned int i;
63 Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
64 Elf32_Sym *sym;
65 uint32_t *location;
66
67 DEBUGP("Applying relocate section %u to %u\n", relsec,
68 sechdrs[relsec].sh_info);
69 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
70 /* This is where to make the change */
71 location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
72 + rel[i].r_offset;
73 /* This is the symbol it is referring to. Note that all
74 undefined symbols have been resolved. */
75 sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
76 + ELF32_R_SYM(rel[i].r_info);
77
78 switch (ELF32_R_TYPE(rel[i].r_info)) {
79 case R_386_32:
80 /* We add the value into the location given */
81 *location += sym->st_value;
82 break;
83 case R_386_PC32:
84 /* Add the value, subtract its postition */
85 *location += sym->st_value - (uint32_t)location;
86 break;
87 default:
88 printk(KERN_ERR "module %s: Unknown relocation: %u\n",
89 me->name, ELF32_R_TYPE(rel[i].r_info));
90 return -ENOEXEC;
91 }
92 }
93 return 0;
94}
95
96int apply_relocate_add(Elf32_Shdr *sechdrs,
97 const char *strtab,
98 unsigned int symindex,
99 unsigned int relsec,
100 struct module *me)
101{
102 printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
103 me->name);
104 return -ENOEXEC;
105}
106
107extern void apply_alternatives(void *start, void *end);
108
109int module_finalize(const Elf_Ehdr *hdr,
110 const Elf_Shdr *sechdrs,
111 struct module *me)
112{
113 const Elf_Shdr *s;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115
116 /* look for .altinstructions to patch */
117 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
118 void *seg;
119 if (strcmp(".altinstructions", secstrings + s->sh_name))
120 continue;
121 seg = (void *)s->sh_addr;
122 apply_alternatives(seg, seg + s->sh_size);
123 }
124 return 0;
125}
126
127void module_arch_cleanup(struct module *mod)
128{
129}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
new file mode 100644
index 000000000000..1347ab4939e7
--- /dev/null
+++ b/arch/i386/kernel/mpparse.c
@@ -0,0 +1,1109 @@
1/*
2 * Intel Multiprocessor Specification 1.1 and 1.4
3 * compliant MP-table parsing routines.
4 *
5 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
6 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes
9 * Erich Boleyn : MP v1.4 and additional changes.
10 * Alan Cox : Added EBDA scanning
11 * Ingo Molnar : various cleanups and rewrites
12 * Maciej W. Rozycki: Bits for default MP configurations
13 * Paul Diefenbaugh: Added full ACPI support
14 */
15
16#include <linux/mm.h>
17#include <linux/irq.h>
18#include <linux/init.h>
19#include <linux/acpi.h>
20#include <linux/delay.h>
21#include <linux/config.h>
22#include <linux/bootmem.h>
23#include <linux/smp_lock.h>
24#include <linux/kernel_stat.h>
25#include <linux/mc146818rtc.h>
26#include <linux/bitops.h>
27
28#include <asm/smp.h>
29#include <asm/acpi.h>
30#include <asm/mtrr.h>
31#include <asm/mpspec.h>
32#include <asm/io_apic.h>
33
34#include <mach_apic.h>
35#include <mach_mpparse.h>
36#include <bios_ebda.h>
37
38/* Have we found an MP table */
39int smp_found_config;
40unsigned int __initdata maxcpus = NR_CPUS;
41
42/*
43 * Various Linux-internal data structures created from the
44 * MP-table.
45 */
46int apic_version [MAX_APICS];
47int mp_bus_id_to_type [MAX_MP_BUSSES];
48int mp_bus_id_to_node [MAX_MP_BUSSES];
49int mp_bus_id_to_local [MAX_MP_BUSSES];
50int quad_local_to_mp_bus_id [NR_CPUS/4][4];
51int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
52static int mp_current_pci_id;
53
54/* I/O APIC entries */
55struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
56
57/* # of MP IRQ source entries */
58struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
59
60/* MP IRQ source entries */
61int mp_irq_entries;
62
63int nr_ioapics;
64
65int pic_mode;
66unsigned long mp_lapic_addr;
67
68/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U;
70unsigned int boot_cpu_logical_apicid = -1U;
71/* Internal processor count */
72static unsigned int __initdata num_processors;
73
74/* Bitmask of physically existing CPUs */
75physid_mask_t phys_cpu_present_map;
76
77u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
78
79/*
80 * Intel MP BIOS table parsing routines:
81 */
82
83
84/*
85 * Checksum an MP configuration block.
86 */
87
88static int __init mpf_checksum(unsigned char *mp, int len)
89{
90 int sum = 0;
91
92 while (len--)
93 sum += *mp++;
94
95 return sum & 0xFF;
96}
97
98/*
99 * Have to match translation table entries to main table entries by counter
100 * hence the mpc_record variable .... can't see a less disgusting way of
101 * doing this ....
102 */
103
104static int mpc_record;
105static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
106
107#ifdef CONFIG_X86_NUMAQ
108static int MP_valid_apicid(int apicid, int version)
109{
110 return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
111}
112#else
113static int MP_valid_apicid(int apicid, int version)
114{
115 if (version >= 0x14)
116 return apicid < 0xff;
117 else
118 return apicid < 0xf;
119}
120#endif
121
122static void __init MP_processor_info (struct mpc_config_processor *m)
123{
124 int ver, apicid;
125 physid_mask_t tmp;
126
127 if (!(m->mpc_cpuflag & CPU_ENABLED))
128 return;
129
130 apicid = mpc_apic_id(m, translation_table[mpc_record]);
131
132 if (m->mpc_featureflag&(1<<0))
133 Dprintk(" Floating point unit present.\n");
134 if (m->mpc_featureflag&(1<<7))
135 Dprintk(" Machine Exception supported.\n");
136 if (m->mpc_featureflag&(1<<8))
137 Dprintk(" 64 bit compare & exchange supported.\n");
138 if (m->mpc_featureflag&(1<<9))
139 Dprintk(" Internal APIC present.\n");
140 if (m->mpc_featureflag&(1<<11))
141 Dprintk(" SEP present.\n");
142 if (m->mpc_featureflag&(1<<12))
143 Dprintk(" MTRR present.\n");
144 if (m->mpc_featureflag&(1<<13))
145 Dprintk(" PGE present.\n");
146 if (m->mpc_featureflag&(1<<14))
147 Dprintk(" MCA present.\n");
148 if (m->mpc_featureflag&(1<<15))
149 Dprintk(" CMOV present.\n");
150 if (m->mpc_featureflag&(1<<16))
151 Dprintk(" PAT present.\n");
152 if (m->mpc_featureflag&(1<<17))
153 Dprintk(" PSE present.\n");
154 if (m->mpc_featureflag&(1<<18))
155 Dprintk(" PSN present.\n");
156 if (m->mpc_featureflag&(1<<19))
157 Dprintk(" Cache Line Flush Instruction present.\n");
158 /* 20 Reserved */
159 if (m->mpc_featureflag&(1<<21))
160 Dprintk(" Debug Trace and EMON Store present.\n");
161 if (m->mpc_featureflag&(1<<22))
162 Dprintk(" ACPI Thermal Throttle Registers present.\n");
163 if (m->mpc_featureflag&(1<<23))
164 Dprintk(" MMX present.\n");
165 if (m->mpc_featureflag&(1<<24))
166 Dprintk(" FXSR present.\n");
167 if (m->mpc_featureflag&(1<<25))
168 Dprintk(" XMM present.\n");
169 if (m->mpc_featureflag&(1<<26))
170 Dprintk(" Willamette New Instructions present.\n");
171 if (m->mpc_featureflag&(1<<27))
172 Dprintk(" Self Snoop present.\n");
173 if (m->mpc_featureflag&(1<<28))
174 Dprintk(" HT present.\n");
175 if (m->mpc_featureflag&(1<<29))
176 Dprintk(" Thermal Monitor present.\n");
177 /* 30, 31 Reserved */
178
179
180 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
181 Dprintk(" Bootup CPU\n");
182 boot_cpu_physical_apicid = m->mpc_apicid;
183 boot_cpu_logical_apicid = apicid;
184 }
185
186 if (num_processors >= NR_CPUS) {
187 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
188 " Processor ignored.\n", NR_CPUS);
189 return;
190 }
191
192 if (num_processors >= maxcpus) {
193 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
194 " Processor ignored.\n", maxcpus);
195 return;
196 }
197 num_processors++;
198 ver = m->mpc_apicver;
199
200 if (!MP_valid_apicid(apicid, ver)) {
201 printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
202 m->mpc_apicid, MAX_APICS);
203 --num_processors;
204 return;
205 }
206
207 tmp = apicid_to_cpu_present(apicid);
208 physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
209
210 /*
211 * Validate version
212 */
213 if (ver == 0x0) {
214 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
215 ver = 0x10;
216 }
217 apic_version[m->mpc_apicid] = ver;
218 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
219}
220
221static void __init MP_bus_info (struct mpc_config_bus *m)
222{
223 char str[7];
224
225 memcpy(str, m->mpc_bustype, 6);
226 str[6] = 0;
227
228 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
229
230 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
231 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
232 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
233 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
234 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
235 mpc_oem_pci_bus(m, translation_table[mpc_record]);
236 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
237 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
238 mp_current_pci_id++;
239 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
240 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
241 } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
242 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
243 } else {
244 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
245 }
246}
247
248static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
249{
250 if (!(m->mpc_flags & MPC_APIC_USABLE))
251 return;
252
253 printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
254 m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
255 if (nr_ioapics >= MAX_IO_APICS) {
256 printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
257 MAX_IO_APICS, nr_ioapics);
258 panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
259 }
260 if (!m->mpc_apicaddr) {
261 printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
262 " found in MP table, skipping!\n");
263 return;
264 }
265 mp_ioapics[nr_ioapics] = *m;
266 nr_ioapics++;
267}
268
269static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
270{
271 mp_irqs [mp_irq_entries] = *m;
272 Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
273 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
274 m->mpc_irqtype, m->mpc_irqflag & 3,
275 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
276 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
277 if (++mp_irq_entries == MAX_IRQ_SOURCES)
278 panic("Max # of irq sources exceeded!!\n");
279}
280
281static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
282{
283 Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
284 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
285 m->mpc_irqtype, m->mpc_irqflag & 3,
286 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
287 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
288 /*
289 * Well it seems all SMP boards in existence
290 * use ExtINT/LVT1 == LINT0 and
291 * NMI/LVT2 == LINT1 - the following check
292 * will show us if this assumptions is false.
293 * Until then we do not have to add baggage.
294 */
295 if ((m->mpc_irqtype == mp_ExtINT) &&
296 (m->mpc_destapiclint != 0))
297 BUG();
298 if ((m->mpc_irqtype == mp_NMI) &&
299 (m->mpc_destapiclint != 1))
300 BUG();
301}
302
303#ifdef CONFIG_X86_NUMAQ
304static void __init MP_translation_info (struct mpc_config_translation *m)
305{
306 printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
307
308 if (mpc_record >= MAX_MPC_ENTRY)
309 printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
310 else
311 translation_table[mpc_record] = m; /* stash this for later */
312 if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
313 node_set_online(m->trans_quad);
314}
315
316/*
317 * Read/parse the MPC oem tables
318 */
319
320static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
321 unsigned short oemsize)
322{
323 int count = sizeof (*oemtable); /* the header size */
324 unsigned char *oemptr = ((unsigned char *)oemtable)+count;
325
326 mpc_record = 0;
327 printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
328 if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
329 {
330 printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
331 oemtable->oem_signature[0],
332 oemtable->oem_signature[1],
333 oemtable->oem_signature[2],
334 oemtable->oem_signature[3]);
335 return;
336 }
337 if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
338 {
339 printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
340 return;
341 }
342 while (count < oemtable->oem_length) {
343 switch (*oemptr) {
344 case MP_TRANSLATION:
345 {
346 struct mpc_config_translation *m=
347 (struct mpc_config_translation *)oemptr;
348 MP_translation_info(m);
349 oemptr += sizeof(*m);
350 count += sizeof(*m);
351 ++mpc_record;
352 break;
353 }
354 default:
355 {
356 printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
357 return;
358 }
359 }
360 }
361}
362
363static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
364 char *productid)
365{
366 if (strncmp(oem, "IBM NUMA", 8))
367 printk("Warning! May not be a NUMA-Q system!\n");
368 if (mpc->mpc_oemptr)
369 smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
370 mpc->mpc_oemsize);
371}
372#endif /* CONFIG_X86_NUMAQ */
373
374/*
375 * Read/parse the MPC
376 */
377
378static int __init smp_read_mpc(struct mp_config_table *mpc)
379{
380 char str[16];
381 char oem[10];
382 int count=sizeof(*mpc);
383 unsigned char *mpt=((unsigned char *)mpc)+count;
384
385 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
386 printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
387 *(u32 *)mpc->mpc_signature);
388 return 0;
389 }
390 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
391 printk(KERN_ERR "SMP mptable: checksum error!\n");
392 return 0;
393 }
394 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
395 printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
396 mpc->mpc_spec);
397 return 0;
398 }
399 if (!mpc->mpc_lapic) {
400 printk(KERN_ERR "SMP mptable: null local APIC address!\n");
401 return 0;
402 }
403 memcpy(oem,mpc->mpc_oem,8);
404 oem[8]=0;
405 printk(KERN_INFO "OEM ID: %s ",oem);
406
407 memcpy(str,mpc->mpc_productid,12);
408 str[12]=0;
409 printk("Product ID: %s ",str);
410
411 mps_oem_check(mpc, oem, str);
412
413 printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
414
415 /*
416 * Save the local APIC address (it might be non-default) -- but only
417 * if we're not using ACPI.
418 */
419 if (!acpi_lapic)
420 mp_lapic_addr = mpc->mpc_lapic;
421
422 /*
423 * Now process the configuration blocks.
424 */
425 mpc_record = 0;
426 while (count < mpc->mpc_length) {
427 switch(*mpt) {
428 case MP_PROCESSOR:
429 {
430 struct mpc_config_processor *m=
431 (struct mpc_config_processor *)mpt;
432 /* ACPI may have already provided this data */
433 if (!acpi_lapic)
434 MP_processor_info(m);
435 mpt += sizeof(*m);
436 count += sizeof(*m);
437 break;
438 }
439 case MP_BUS:
440 {
441 struct mpc_config_bus *m=
442 (struct mpc_config_bus *)mpt;
443 MP_bus_info(m);
444 mpt += sizeof(*m);
445 count += sizeof(*m);
446 break;
447 }
448 case MP_IOAPIC:
449 {
450 struct mpc_config_ioapic *m=
451 (struct mpc_config_ioapic *)mpt;
452 MP_ioapic_info(m);
453 mpt+=sizeof(*m);
454 count+=sizeof(*m);
455 break;
456 }
457 case MP_INTSRC:
458 {
459 struct mpc_config_intsrc *m=
460 (struct mpc_config_intsrc *)mpt;
461
462 MP_intsrc_info(m);
463 mpt+=sizeof(*m);
464 count+=sizeof(*m);
465 break;
466 }
467 case MP_LINTSRC:
468 {
469 struct mpc_config_lintsrc *m=
470 (struct mpc_config_lintsrc *)mpt;
471 MP_lintsrc_info(m);
472 mpt+=sizeof(*m);
473 count+=sizeof(*m);
474 break;
475 }
476 default:
477 {
478 count = mpc->mpc_length;
479 break;
480 }
481 }
482 ++mpc_record;
483 }
484 clustered_apic_check();
485 if (!num_processors)
486 printk(KERN_ERR "SMP mptable: no processors registered!\n");
487 return num_processors;
488}
489
490static int __init ELCR_trigger(unsigned int irq)
491{
492 unsigned int port;
493
494 port = 0x4d0 + (irq >> 3);
495 return (inb(port) >> (irq & 7)) & 1;
496}
497
498static void __init construct_default_ioirq_mptable(int mpc_default_type)
499{
500 struct mpc_config_intsrc intsrc;
501 int i;
502 int ELCR_fallback = 0;
503
504 intsrc.mpc_type = MP_INTSRC;
505 intsrc.mpc_irqflag = 0; /* conforming */
506 intsrc.mpc_srcbus = 0;
507 intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
508
509 intsrc.mpc_irqtype = mp_INT;
510
511 /*
512 * If true, we have an ISA/PCI system with no IRQ entries
513 * in the MP table. To prevent the PCI interrupts from being set up
514 * incorrectly, we try to use the ELCR. The sanity check to see if
515 * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
516 * never be level sensitive, so we simply see if the ELCR agrees.
517 * If it does, we assume it's valid.
518 */
519 if (mpc_default_type == 5) {
520 printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
521
522 if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
523 printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
524 else {
525 printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
526 ELCR_fallback = 1;
527 }
528 }
529
530 for (i = 0; i < 16; i++) {
531 switch (mpc_default_type) {
532 case 2:
533 if (i == 0 || i == 13)
534 continue; /* IRQ0 & IRQ13 not connected */
535 /* fall through */
536 default:
537 if (i == 2)
538 continue; /* IRQ2 is never connected */
539 }
540
541 if (ELCR_fallback) {
542 /*
543 * If the ELCR indicates a level-sensitive interrupt, we
544 * copy that information over to the MP table in the
545 * irqflag field (level sensitive, active high polarity).
546 */
547 if (ELCR_trigger(i))
548 intsrc.mpc_irqflag = 13;
549 else
550 intsrc.mpc_irqflag = 0;
551 }
552
553 intsrc.mpc_srcbusirq = i;
554 intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
555 MP_intsrc_info(&intsrc);
556 }
557
558 intsrc.mpc_irqtype = mp_ExtINT;
559 intsrc.mpc_srcbusirq = 0;
560 intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
561 MP_intsrc_info(&intsrc);
562}
563
564static inline void __init construct_default_ISA_mptable(int mpc_default_type)
565{
566 struct mpc_config_processor processor;
567 struct mpc_config_bus bus;
568 struct mpc_config_ioapic ioapic;
569 struct mpc_config_lintsrc lintsrc;
570 int linttypes[2] = { mp_ExtINT, mp_NMI };
571 int i;
572
573 /*
574 * local APIC has default address
575 */
576 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
577
578 /*
579 * 2 CPUs, numbered 0 & 1.
580 */
581 processor.mpc_type = MP_PROCESSOR;
582 /* Either an integrated APIC or a discrete 82489DX. */
583 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
584 processor.mpc_cpuflag = CPU_ENABLED;
585 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
586 (boot_cpu_data.x86_model << 4) |
587 boot_cpu_data.x86_mask;
588 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
589 processor.mpc_reserved[0] = 0;
590 processor.mpc_reserved[1] = 0;
591 for (i = 0; i < 2; i++) {
592 processor.mpc_apicid = i;
593 MP_processor_info(&processor);
594 }
595
596 bus.mpc_type = MP_BUS;
597 bus.mpc_busid = 0;
598 switch (mpc_default_type) {
599 default:
600 printk("???\n");
601 printk(KERN_ERR "Unknown standard configuration %d\n",
602 mpc_default_type);
603 /* fall through */
604 case 1:
605 case 5:
606 memcpy(bus.mpc_bustype, "ISA ", 6);
607 break;
608 case 2:
609 case 6:
610 case 3:
611 memcpy(bus.mpc_bustype, "EISA ", 6);
612 break;
613 case 4:
614 case 7:
615 memcpy(bus.mpc_bustype, "MCA ", 6);
616 }
617 MP_bus_info(&bus);
618 if (mpc_default_type > 4) {
619 bus.mpc_busid = 1;
620 memcpy(bus.mpc_bustype, "PCI ", 6);
621 MP_bus_info(&bus);
622 }
623
624 ioapic.mpc_type = MP_IOAPIC;
625 ioapic.mpc_apicid = 2;
626 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
627 ioapic.mpc_flags = MPC_APIC_USABLE;
628 ioapic.mpc_apicaddr = 0xFEC00000;
629 MP_ioapic_info(&ioapic);
630
631 /*
632 * We set up most of the low 16 IO-APIC pins according to MPS rules.
633 */
634 construct_default_ioirq_mptable(mpc_default_type);
635
636 lintsrc.mpc_type = MP_LINTSRC;
637 lintsrc.mpc_irqflag = 0; /* conforming */
638 lintsrc.mpc_srcbusid = 0;
639 lintsrc.mpc_srcbusirq = 0;
640 lintsrc.mpc_destapic = MP_APIC_ALL;
641 for (i = 0; i < 2; i++) {
642 lintsrc.mpc_irqtype = linttypes[i];
643 lintsrc.mpc_destapiclint = i;
644 MP_lintsrc_info(&lintsrc);
645 }
646}
647
648static struct intel_mp_floating *mpf_found;
649
650/*
651 * Scan the memory blocks for an SMP configuration block.
652 */
653void __init get_smp_config (void)
654{
655 struct intel_mp_floating *mpf = mpf_found;
656
657 /*
658 * ACPI may be used to obtain the entire SMP configuration or just to
659 * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that
660 * ACPI supports both logical (e.g. Hyper-Threading) and physical
661 * processors, where MPS only supports physical.
662 */
663 if (acpi_lapic && acpi_ioapic) {
664 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
665 return;
666 }
667 else if (acpi_lapic)
668 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
669
670 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
671 if (mpf->mpf_feature2 & (1<<7)) {
672 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
673 pic_mode = 1;
674 } else {
675 printk(KERN_INFO " Virtual Wire compatibility mode.\n");
676 pic_mode = 0;
677 }
678
679 /*
680 * Now see if we need to read further.
681 */
682 if (mpf->mpf_feature1 != 0) {
683
684 printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
685 construct_default_ISA_mptable(mpf->mpf_feature1);
686
687 } else if (mpf->mpf_physptr) {
688
689 /*
690 * Read the physical hardware table. Anything here will
691 * override the defaults.
692 */
693 if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
694 smp_found_config = 0;
695 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
696 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
697 return;
698 }
699 /*
700 * If there are no explicit MP IRQ entries, then we are
701 * broken. We set up most of the low 16 IO-APIC pins to
702 * ISA defaults and hope it will work.
703 */
704 if (!mp_irq_entries) {
705 struct mpc_config_bus bus;
706
707 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
708
709 bus.mpc_type = MP_BUS;
710 bus.mpc_busid = 0;
711 memcpy(bus.mpc_bustype, "ISA ", 6);
712 MP_bus_info(&bus);
713
714 construct_default_ioirq_mptable(0);
715 }
716
717 } else
718 BUG();
719
720 printk(KERN_INFO "Processors: %d\n", num_processors);
721 /*
722 * Only use the first configuration found.
723 */
724}
725
726static int __init smp_scan_config (unsigned long base, unsigned long length)
727{
728 unsigned long *bp = phys_to_virt(base);
729 struct intel_mp_floating *mpf;
730
731 Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
732 if (sizeof(*mpf) != 16)
733 printk("Error: MPF size\n");
734
735 while (length > 0) {
736 mpf = (struct intel_mp_floating *)bp;
737 if ((*bp == SMP_MAGIC_IDENT) &&
738 (mpf->mpf_length == 1) &&
739 !mpf_checksum((unsigned char *)bp, 16) &&
740 ((mpf->mpf_specification == 1)
741 || (mpf->mpf_specification == 4)) ) {
742
743 smp_found_config = 1;
744 printk(KERN_INFO "found SMP MP-table at %08lx\n",
745 virt_to_phys(mpf));
746 reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
747 if (mpf->mpf_physptr) {
748 /*
749 * We cannot access to MPC table to compute
750 * table size yet, as only few megabytes from
751 * the bottom is mapped now.
752 * PC-9800's MPC table places on the very last
753 * of physical memory; so that simply reserving
754 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
755 * in reserve_bootmem.
756 */
757 unsigned long size = PAGE_SIZE;
758 unsigned long end = max_low_pfn * PAGE_SIZE;
759 if (mpf->mpf_physptr + size > end)
760 size = end - mpf->mpf_physptr;
761 reserve_bootmem(mpf->mpf_physptr, size);
762 }
763
764 mpf_found = mpf;
765 return 1;
766 }
767 bp += 4;
768 length -= 16;
769 }
770 return 0;
771}
772
773void __init find_smp_config (void)
774{
775 unsigned int address;
776
777 /*
778 * FIXME: Linux assumes you have 640K of base ram..
779 * this continues the error...
780 *
781 * 1) Scan the bottom 1K for a signature
782 * 2) Scan the top 1K of base RAM
783 * 3) Scan the 64K of bios
784 */
785 if (smp_scan_config(0x0,0x400) ||
786 smp_scan_config(639*0x400,0x400) ||
787 smp_scan_config(0xF0000,0x10000))
788 return;
789 /*
790 * If it is an SMP machine we should know now, unless the
791 * configuration is in an EISA/MCA bus machine with an
792 * extended bios data area.
793 *
794 * there is a real-mode segmented pointer pointing to the
795 * 4K EBDA area at 0x40E, calculate and scan it here.
796 *
797 * NOTE! There are Linux loaders that will corrupt the EBDA
798 * area, and as such this kind of SMP config may be less
799 * trustworthy, simply because the SMP table may have been
800 * stomped on during early boot. These loaders are buggy and
801 * should be fixed.
802 *
803 * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
804 */
805
806 address = get_bios_ebda();
807 if (address)
808 smp_scan_config(address, 0x400);
809}
810
811/* --------------------------------------------------------------------------
812 ACPI-based MP Configuration
813 -------------------------------------------------------------------------- */
814
815#ifdef CONFIG_ACPI_BOOT
816
817void __init mp_register_lapic_address (
818 u64 address)
819{
820 mp_lapic_addr = (unsigned long) address;
821
822 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
823
824 if (boot_cpu_physical_apicid == -1U)
825 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
826
827 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
828}
829
830
831void __init mp_register_lapic (
832 u8 id,
833 u8 enabled)
834{
835 struct mpc_config_processor processor;
836 int boot_cpu = 0;
837
838 if (MAX_APICS - id <= 0) {
839 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
840 id, MAX_APICS);
841 return;
842 }
843
844 if (id == boot_cpu_physical_apicid)
845 boot_cpu = 1;
846
847 processor.mpc_type = MP_PROCESSOR;
848 processor.mpc_apicid = id;
849 processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
850 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
851 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
852 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
853 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
854 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
855 processor.mpc_reserved[0] = 0;
856 processor.mpc_reserved[1] = 0;
857
858 MP_processor_info(&processor);
859}
860
861#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
862
863#define MP_ISA_BUS 0
864#define MP_MAX_IOAPIC_PIN 127
865
866static struct mp_ioapic_routing {
867 int apic_id;
868 int gsi_base;
869 int gsi_end;
870 u32 pin_programmed[4];
871} mp_ioapic_routing[MAX_IO_APICS];
872
873
874static int mp_find_ioapic (
875 int gsi)
876{
877 int i = 0;
878
879 /* Find the IOAPIC that manages this GSI. */
880 for (i = 0; i < nr_ioapics; i++) {
881 if ((gsi >= mp_ioapic_routing[i].gsi_base)
882 && (gsi <= mp_ioapic_routing[i].gsi_end))
883 return i;
884 }
885
886 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
887
888 return -1;
889}
890
891
892void __init mp_register_ioapic (
893 u8 id,
894 u32 address,
895 u32 gsi_base)
896{
897 int idx = 0;
898
899 if (nr_ioapics >= MAX_IO_APICS) {
900 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
901 "(found %d)\n", MAX_IO_APICS, nr_ioapics);
902 panic("Recompile kernel with bigger MAX_IO_APICS!\n");
903 }
904 if (!address) {
905 printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
906 " found in MADT table, skipping!\n");
907 return;
908 }
909
910 idx = nr_ioapics++;
911
912 mp_ioapics[idx].mpc_type = MP_IOAPIC;
913 mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
914 mp_ioapics[idx].mpc_apicaddr = address;
915
916 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
917 mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
918 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
919
920 /*
921 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
922 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
923 */
924 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
925 mp_ioapic_routing[idx].gsi_base = gsi_base;
926 mp_ioapic_routing[idx].gsi_end = gsi_base +
927 io_apic_get_redir_entries(idx);
928
929 printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
930 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
931 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
932 mp_ioapic_routing[idx].gsi_base,
933 mp_ioapic_routing[idx].gsi_end);
934
935 return;
936}
937
938
939void __init mp_override_legacy_irq (
940 u8 bus_irq,
941 u8 polarity,
942 u8 trigger,
943 u32 gsi)
944{
945 struct mpc_config_intsrc intsrc;
946 int ioapic = -1;
947 int pin = -1;
948
949 /*
950 * Convert 'gsi' to 'ioapic.pin'.
951 */
952 ioapic = mp_find_ioapic(gsi);
953 if (ioapic < 0)
954 return;
955 pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
956
957 /*
958 * TBD: This check is for faulty timer entries, where the override
959 * erroneously sets the trigger to level, resulting in a HUGE
960 * increase of timer interrupts!
961 */
962 if ((bus_irq == 0) && (trigger == 3))
963 trigger = 1;
964
965 intsrc.mpc_type = MP_INTSRC;
966 intsrc.mpc_irqtype = mp_INT;
967 intsrc.mpc_irqflag = (trigger << 2) | polarity;
968 intsrc.mpc_srcbus = MP_ISA_BUS;
969 intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
970 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
971 intsrc.mpc_dstirq = pin; /* INTIN# */
972
973 Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
974 intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
975 (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
976 intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
977
978 mp_irqs[mp_irq_entries] = intsrc;
979 if (++mp_irq_entries == MAX_IRQ_SOURCES)
980 panic("Max # of irq sources exceeded!\n");
981
982 return;
983}
984
985int es7000_plat;
986
987void __init mp_config_acpi_legacy_irqs (void)
988{
989 struct mpc_config_intsrc intsrc;
990 int i = 0;
991 int ioapic = -1;
992
993 /*
994 * Fabricate the legacy ISA bus (bus #31).
995 */
996 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
997 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
998
999 /*
1000 * Older generations of ES7000 have no legacy identity mappings
1001 */
1002 if (es7000_plat == 1)
1003 return;
1004
1005 /*
1006 * Locate the IOAPIC that manages the ISA IRQs (0-15).
1007 */
1008 ioapic = mp_find_ioapic(0);
1009 if (ioapic < 0)
1010 return;
1011
1012 intsrc.mpc_type = MP_INTSRC;
1013 intsrc.mpc_irqflag = 0; /* Conforming */
1014 intsrc.mpc_srcbus = MP_ISA_BUS;
1015 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
1016
1017 /*
1018 * Use the default configuration for the IRQs 0-15. Unless
1019 * overriden by (MADT) interrupt source override entries.
1020 */
1021 for (i = 0; i < 16; i++) {
1022 int idx;
1023
1024 for (idx = 0; idx < mp_irq_entries; idx++) {
1025 struct mpc_config_intsrc *irq = mp_irqs + idx;
1026
1027 /* Do we already have a mapping for this ISA IRQ? */
1028 if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
1029 break;
1030
1031 /* Do we already have a mapping for this IOAPIC pin */
1032 if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
1033 (irq->mpc_dstirq == i))
1034 break;
1035 }
1036
1037 if (idx != mp_irq_entries) {
1038 printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
1039 continue; /* IRQ already used */
1040 }
1041
1042 intsrc.mpc_irqtype = mp_INT;
1043 intsrc.mpc_srcbusirq = i; /* Identity mapped */
1044 intsrc.mpc_dstirq = i;
1045
1046 Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
1047 "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
1048 (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
1049 intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
1050 intsrc.mpc_dstirq);
1051
1052 mp_irqs[mp_irq_entries] = intsrc;
1053 if (++mp_irq_entries == MAX_IRQ_SOURCES)
1054 panic("Max # of irq sources exceeded!\n");
1055 }
1056}
1057
1058int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
1059{
1060 int ioapic = -1;
1061 int ioapic_pin = 0;
1062 int idx, bit = 0;
1063
1064#ifdef CONFIG_ACPI_BUS
1065 /* Don't set up the ACPI SCI because it's already set up */
1066 if (acpi_fadt.sci_int == gsi)
1067 return gsi;
1068#endif
1069
1070 ioapic = mp_find_ioapic(gsi);
1071 if (ioapic < 0) {
1072 printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
1073 return gsi;
1074 }
1075
1076 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
1077
1078 if (ioapic_renumber_irq)
1079 gsi = ioapic_renumber_irq(ioapic, gsi);
1080
1081 /*
1082 * Avoid pin reprogramming. PRTs typically include entries
1083 * with redundant pin->gsi mappings (but unique PCI devices);
1084 * we only program the IOAPIC on the first.
1085 */
1086 bit = ioapic_pin % 32;
1087 idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
1088 if (idx > 3) {
1089 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1090 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
1091 ioapic_pin);
1092 return gsi;
1093 }
1094 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
1095 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
1096 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1097 return gsi;
1098 }
1099
1100 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
1101
1102 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1103 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
1104 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
1105 return gsi;
1106}
1107
1108#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/
1109#endif /*CONFIG_ACPI_BOOT*/
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
new file mode 100644
index 000000000000..05d9f8f363a6
--- /dev/null
+++ b/arch/i386/kernel/msr.c
@@ -0,0 +1,346 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2000 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
8 * USA; either version 2 of the License, or (at your option) any later
9 * version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * msr.c
15 *
16 * x86 MSR access device
17 *
18 * This device is accessed by lseek() to the appropriate register number
19 * and then read/write in chunks of 8 bytes. A larger size means multiple
20 * reads or writes of the same register.
21 *
22 * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
23 * an SMP box will direct the access to CPU %d.
24 */
25
26#include <linux/module.h>
27#include <linux/config.h>
28
29#include <linux/types.h>
30#include <linux/errno.h>
31#include <linux/fcntl.h>
32#include <linux/init.h>
33#include <linux/poll.h>
34#include <linux/smp.h>
35#include <linux/smp_lock.h>
36#include <linux/major.h>
37#include <linux/fs.h>
38#include <linux/device.h>
39#include <linux/cpu.h>
40#include <linux/notifier.h>
41
42#include <asm/processor.h>
43#include <asm/msr.h>
44#include <asm/uaccess.h>
45#include <asm/system.h>
46
47static struct class_simple *msr_class;
48
49/* Note: "err" is handled in a funny way below. Otherwise one version
50 of gcc or another breaks. */
51
52static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
53{
54 int err;
55
56 asm volatile ("1: wrmsr\n"
57 "2:\n"
58 ".section .fixup,\"ax\"\n"
59 "3: movl %4,%0\n"
60 " jmp 2b\n"
61 ".previous\n"
62 ".section __ex_table,\"a\"\n"
63 " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err)
64 :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
65
66 return err;
67}
68
69static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
70{
71 int err;
72
73 asm volatile ("1: rdmsr\n"
74 "2:\n"
75 ".section .fixup,\"ax\"\n"
76 "3: movl %4,%0\n"
77 " jmp 2b\n"
78 ".previous\n"
79 ".section __ex_table,\"a\"\n"
80 " .align 4\n"
81 " .long 1b,3b\n"
82 ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
83 :"c"(reg), "i"(-EIO), "0"(0));
84
85 return err;
86}
87
88#ifdef CONFIG_SMP
89
90struct msr_command {
91 int cpu;
92 int err;
93 u32 reg;
94 u32 data[2];
95};
96
97static void msr_smp_wrmsr(void *cmd_block)
98{
99 struct msr_command *cmd = (struct msr_command *)cmd_block;
100
101 if (cmd->cpu == smp_processor_id())
102 cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
103}
104
105static void msr_smp_rdmsr(void *cmd_block)
106{
107 struct msr_command *cmd = (struct msr_command *)cmd_block;
108
109 if (cmd->cpu == smp_processor_id())
110 cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
111}
112
113static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
114{
115 struct msr_command cmd;
116 int ret;
117
118 preempt_disable();
119 if (cpu == smp_processor_id()) {
120 ret = wrmsr_eio(reg, eax, edx);
121 } else {
122 cmd.cpu = cpu;
123 cmd.reg = reg;
124 cmd.data[0] = eax;
125 cmd.data[1] = edx;
126
127 smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
128 ret = cmd.err;
129 }
130 preempt_enable();
131 return ret;
132}
133
134static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
135{
136 struct msr_command cmd;
137 int ret;
138
139 preempt_disable();
140 if (cpu == smp_processor_id()) {
141 ret = rdmsr_eio(reg, eax, edx);
142 } else {
143 cmd.cpu = cpu;
144 cmd.reg = reg;
145
146 smp_call_function(msr_smp_rdmsr, &cmd, 1, 1);
147
148 *eax = cmd.data[0];
149 *edx = cmd.data[1];
150
151 ret = cmd.err;
152 }
153 preempt_enable();
154 return ret;
155}
156
157#else /* ! CONFIG_SMP */
158
159static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
160{
161 return wrmsr_eio(reg, eax, edx);
162}
163
164static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
165{
166 return rdmsr_eio(reg, eax, edx);
167}
168
169#endif /* ! CONFIG_SMP */
170
171static loff_t msr_seek(struct file *file, loff_t offset, int orig)
172{
173 loff_t ret = -EINVAL;
174
175 lock_kernel();
176 switch (orig) {
177 case 0:
178 file->f_pos = offset;
179 ret = file->f_pos;
180 break;
181 case 1:
182 file->f_pos += offset;
183 ret = file->f_pos;
184 }
185 unlock_kernel();
186 return ret;
187}
188
189static ssize_t msr_read(struct file *file, char __user * buf,
190 size_t count, loff_t * ppos)
191{
192 u32 __user *tmp = (u32 __user *) buf;
193 u32 data[2];
194 size_t rv;
195 u32 reg = *ppos;
196 int cpu = iminor(file->f_dentry->d_inode);
197 int err;
198
199 if (count % 8)
200 return -EINVAL; /* Invalid chunk size */
201
202 for (rv = 0; count; count -= 8) {
203 err = do_rdmsr(cpu, reg, &data[0], &data[1]);
204 if (err)
205 return err;
206 if (copy_to_user(tmp, &data, 8))
207 return -EFAULT;
208 tmp += 2;
209 }
210
211 return ((char __user *)tmp) - buf;
212}
213
214static ssize_t msr_write(struct file *file, const char __user *buf,
215 size_t count, loff_t *ppos)
216{
217 const u32 __user *tmp = (const u32 __user *)buf;
218 u32 data[2];
219 size_t rv;
220 u32 reg = *ppos;
221 int cpu = iminor(file->f_dentry->d_inode);
222 int err;
223
224 if (count % 8)
225 return -EINVAL; /* Invalid chunk size */
226
227 for (rv = 0; count; count -= 8) {
228 if (copy_from_user(&data, tmp, 8))
229 return -EFAULT;
230 err = do_wrmsr(cpu, reg, data[0], data[1]);
231 if (err)
232 return err;
233 tmp += 2;
234 }
235
236 return ((char __user *)tmp) - buf;
237}
238
239static int msr_open(struct inode *inode, struct file *file)
240{
241 unsigned int cpu = iminor(file->f_dentry->d_inode);
242 struct cpuinfo_x86 *c = &(cpu_data)[cpu];
243
244 if (cpu >= NR_CPUS || !cpu_online(cpu))
245 return -ENXIO; /* No such CPU */
246 if (!cpu_has(c, X86_FEATURE_MSR))
247 return -EIO; /* MSR not supported */
248
249 return 0;
250}
251
252/*
253 * File operations we support
254 */
255static struct file_operations msr_fops = {
256 .owner = THIS_MODULE,
257 .llseek = msr_seek,
258 .read = msr_read,
259 .write = msr_write,
260 .open = msr_open,
261};
262
263static int msr_class_simple_device_add(int i)
264{
265 int err = 0;
266 struct class_device *class_err;
267
268 class_err = class_simple_device_add(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i);
269 if (IS_ERR(class_err))
270 err = PTR_ERR(class_err);
271 return err;
272}
273
274static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
275{
276 unsigned int cpu = (unsigned long)hcpu;
277
278 switch (action) {
279 case CPU_ONLINE:
280 msr_class_simple_device_add(cpu);
281 break;
282 case CPU_DEAD:
283 class_simple_device_remove(MKDEV(MSR_MAJOR, cpu));
284 break;
285 }
286 return NOTIFY_OK;
287}
288
289static struct notifier_block msr_class_cpu_notifier =
290{
291 .notifier_call = msr_class_cpu_callback,
292};
293
294static int __init msr_init(void)
295{
296 int i, err = 0;
297 i = 0;
298
299 if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
300 printk(KERN_ERR "msr: unable to get major %d for msr\n",
301 MSR_MAJOR);
302 err = -EBUSY;
303 goto out;
304 }
305 msr_class = class_simple_create(THIS_MODULE, "msr");
306 if (IS_ERR(msr_class)) {
307 err = PTR_ERR(msr_class);
308 goto out_chrdev;
309 }
310 for_each_online_cpu(i) {
311 err = msr_class_simple_device_add(i);
312 if (err != 0)
313 goto out_class;
314 }
315 register_cpu_notifier(&msr_class_cpu_notifier);
316
317 err = 0;
318 goto out;
319
320out_class:
321 i = 0;
322 for_each_online_cpu(i)
323 class_simple_device_remove(MKDEV(MSR_MAJOR, i));
324 class_simple_destroy(msr_class);
325out_chrdev:
326 unregister_chrdev(MSR_MAJOR, "cpu/msr");
327out:
328 return err;
329}
330
331static void __exit msr_exit(void)
332{
333 int cpu = 0;
334 for_each_online_cpu(cpu)
335 class_simple_device_remove(MKDEV(MSR_MAJOR, cpu));
336 class_simple_destroy(msr_class);
337 unregister_chrdev(MSR_MAJOR, "cpu/msr");
338 unregister_cpu_notifier(&msr_class_cpu_notifier);
339}
340
341module_init(msr_init);
342module_exit(msr_exit)
343
344MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
345MODULE_DESCRIPTION("x86 generic MSR driver");
346MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
new file mode 100644
index 000000000000..f5b0c5081bd6
--- /dev/null
+++ b/arch/i386/kernel/nmi.c
@@ -0,0 +1,570 @@
1/*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
14 */
15
16#include <linux/config.h>
17#include <linux/mm.h>
18#include <linux/irq.h>
19#include <linux/delay.h>
20#include <linux/bootmem.h>
21#include <linux/smp_lock.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h>
25#include <linux/module.h>
26#include <linux/nmi.h>
27#include <linux/sysdev.h>
28#include <linux/sysctl.h>
29
30#include <asm/smp.h>
31#include <asm/mtrr.h>
32#include <asm/mpspec.h>
33#include <asm/nmi.h>
34
35#include "mach_traps.h"
36
37unsigned int nmi_watchdog = NMI_NONE;
38extern int unknown_nmi_panic;
39static unsigned int nmi_hz = HZ;
40static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
41static unsigned int nmi_p4_cccr_val;
42extern void show_registers(struct pt_regs *regs);
43
44/*
45 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
46 * - it may be reserved by some other driver, or not
47 * - when not reserved by some other driver, it may be used for
48 * the NMI watchdog, or not
49 *
50 * This is maintained separately from nmi_active because the NMI
51 * watchdog may also be driven from the I/O APIC timer.
52 */
53static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
54static unsigned int lapic_nmi_owner;
55#define LAPIC_NMI_WATCHDOG (1<<0)
56#define LAPIC_NMI_RESERVED (1<<1)
57
58/* nmi_active:
59 * +1: the lapic NMI watchdog is active, but can be disabled
60 * 0: the lapic NMI watchdog has not been set up, and cannot
61 * be enabled
62 * -1: the lapic NMI watchdog is disabled, but can be enabled
63 */
64int nmi_active;
65
66#define K7_EVNTSEL_ENABLE (1 << 22)
67#define K7_EVNTSEL_INT (1 << 20)
68#define K7_EVNTSEL_OS (1 << 17)
69#define K7_EVNTSEL_USR (1 << 16)
70#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
71#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
72
73#define P6_EVNTSEL0_ENABLE (1 << 22)
74#define P6_EVNTSEL_INT (1 << 20)
75#define P6_EVNTSEL_OS (1 << 17)
76#define P6_EVNTSEL_USR (1 << 16)
77#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
78#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
79
80#define MSR_P4_MISC_ENABLE 0x1A0
81#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
82#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
83#define MSR_P4_PERFCTR0 0x300
84#define MSR_P4_CCCR0 0x360
85#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
86#define P4_ESCR_OS (1<<3)
87#define P4_ESCR_USR (1<<2)
88#define P4_CCCR_OVF_PMI0 (1<<26)
89#define P4_CCCR_OVF_PMI1 (1<<27)
90#define P4_CCCR_THRESHOLD(N) ((N)<<20)
91#define P4_CCCR_COMPLEMENT (1<<19)
92#define P4_CCCR_COMPARE (1<<18)
93#define P4_CCCR_REQUIRED (3<<16)
94#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
95#define P4_CCCR_ENABLE (1<<12)
96/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
97 CRU_ESCR0 (with any non-null event selector) through a complemented
98 max threshold. [IA32-Vol3, Section 14.9.9] */
99#define MSR_P4_IQ_COUNTER0 0x30C
100#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
101#define P4_NMI_IQ_CCCR0 \
102 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
103 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
104
105int __init check_nmi_watchdog (void)
106{
107 unsigned int prev_nmi_count[NR_CPUS];
108 int cpu;
109
110 printk(KERN_INFO "testing NMI watchdog ... ");
111
112 for (cpu = 0; cpu < NR_CPUS; cpu++)
113 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
114 local_irq_enable();
115 mdelay((10*1000)/nmi_hz); // wait 10 ticks
116
117 /* FIXME: Only boot CPU is online at this stage. Check CPUs
118 as they come up. */
119 for (cpu = 0; cpu < NR_CPUS; cpu++) {
120#ifdef CONFIG_SMP
121 /* Check cpu_callin_map here because that is set
122 after the timer is started. */
123 if (!cpu_isset(cpu, cpu_callin_map))
124 continue;
125#endif
126 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
127 printk("CPU#%d: NMI appears to be stuck!\n", cpu);
128 nmi_active = 0;
129 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
130 return -1;
131 }
132 }
133 printk("OK.\n");
134
135 /* now that we know it works we can reduce NMI frequency to
136 something more reasonable; makes a difference in some configs */
137 if (nmi_watchdog == NMI_LOCAL_APIC)
138 nmi_hz = 1;
139
140 return 0;
141}
142
143static int __init setup_nmi_watchdog(char *str)
144{
145 int nmi;
146
147 get_option(&str, &nmi);
148
149 if (nmi >= NMI_INVALID)
150 return 0;
151 if (nmi == NMI_NONE)
152 nmi_watchdog = nmi;
153 /*
154 * If any other x86 CPU has a local APIC, then
155 * please test the NMI stuff there and send me the
156 * missing bits. Right now Intel P6/P4 and AMD K7 only.
157 */
158 if ((nmi == NMI_LOCAL_APIC) &&
159 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
160 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
161 nmi_watchdog = nmi;
162 if ((nmi == NMI_LOCAL_APIC) &&
163 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
164 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
165 nmi_watchdog = nmi;
166 /*
167 * We can enable the IO-APIC watchdog
168 * unconditionally.
169 */
170 if (nmi == NMI_IO_APIC) {
171 nmi_active = 1;
172 nmi_watchdog = nmi;
173 }
174 return 1;
175}
176
177__setup("nmi_watchdog=", setup_nmi_watchdog);
178
179static void disable_lapic_nmi_watchdog(void)
180{
181 if (nmi_active <= 0)
182 return;
183 switch (boot_cpu_data.x86_vendor) {
184 case X86_VENDOR_AMD:
185 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
186 break;
187 case X86_VENDOR_INTEL:
188 switch (boot_cpu_data.x86) {
189 case 6:
190 if (boot_cpu_data.x86_model > 0xd)
191 break;
192
193 wrmsr(MSR_P6_EVNTSEL0, 0, 0);
194 break;
195 case 15:
196 if (boot_cpu_data.x86_model > 0x3)
197 break;
198
199 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
200 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
201 break;
202 }
203 break;
204 }
205 nmi_active = -1;
206 /* tell do_nmi() and others that we're not active any more */
207 nmi_watchdog = 0;
208}
209
210static void enable_lapic_nmi_watchdog(void)
211{
212 if (nmi_active < 0) {
213 nmi_watchdog = NMI_LOCAL_APIC;
214 setup_apic_nmi_watchdog();
215 }
216}
217
218int reserve_lapic_nmi(void)
219{
220 unsigned int old_owner;
221
222 spin_lock(&lapic_nmi_owner_lock);
223 old_owner = lapic_nmi_owner;
224 lapic_nmi_owner |= LAPIC_NMI_RESERVED;
225 spin_unlock(&lapic_nmi_owner_lock);
226 if (old_owner & LAPIC_NMI_RESERVED)
227 return -EBUSY;
228 if (old_owner & LAPIC_NMI_WATCHDOG)
229 disable_lapic_nmi_watchdog();
230 return 0;
231}
232
233void release_lapic_nmi(void)
234{
235 unsigned int new_owner;
236
237 spin_lock(&lapic_nmi_owner_lock);
238 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
239 lapic_nmi_owner = new_owner;
240 spin_unlock(&lapic_nmi_owner_lock);
241 if (new_owner & LAPIC_NMI_WATCHDOG)
242 enable_lapic_nmi_watchdog();
243}
244
245void disable_timer_nmi_watchdog(void)
246{
247 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
248 return;
249
250 unset_nmi_callback();
251 nmi_active = -1;
252 nmi_watchdog = NMI_NONE;
253}
254
255void enable_timer_nmi_watchdog(void)
256{
257 if (nmi_active < 0) {
258 nmi_watchdog = NMI_IO_APIC;
259 touch_nmi_watchdog();
260 nmi_active = 1;
261 }
262}
263
264#ifdef CONFIG_PM
265
266static int nmi_pm_active; /* nmi_active before suspend */
267
268static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
269{
270 nmi_pm_active = nmi_active;
271 disable_lapic_nmi_watchdog();
272 return 0;
273}
274
275static int lapic_nmi_resume(struct sys_device *dev)
276{
277 if (nmi_pm_active > 0)
278 enable_lapic_nmi_watchdog();
279 return 0;
280}
281
282
283static struct sysdev_class nmi_sysclass = {
284 set_kset_name("lapic_nmi"),
285 .resume = lapic_nmi_resume,
286 .suspend = lapic_nmi_suspend,
287};
288
289static struct sys_device device_lapic_nmi = {
290 .id = 0,
291 .cls = &nmi_sysclass,
292};
293
294static int __init init_lapic_nmi_sysfs(void)
295{
296 int error;
297
298 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
299 return 0;
300
301 error = sysdev_class_register(&nmi_sysclass);
302 if (!error)
303 error = sysdev_register(&device_lapic_nmi);
304 return error;
305}
306/* must come after the local APIC's device_initcall() */
307late_initcall(init_lapic_nmi_sysfs);
308
309#endif /* CONFIG_PM */
310
311/*
312 * Activate the NMI watchdog via the local APIC.
313 * Original code written by Keith Owens.
314 */
315
316static void clear_msr_range(unsigned int base, unsigned int n)
317{
318 unsigned int i;
319
320 for(i = 0; i < n; ++i)
321 wrmsr(base+i, 0, 0);
322}
323
324static void setup_k7_watchdog(void)
325{
326 unsigned int evntsel;
327
328 nmi_perfctr_msr = MSR_K7_PERFCTR0;
329
330 clear_msr_range(MSR_K7_EVNTSEL0, 4);
331 clear_msr_range(MSR_K7_PERFCTR0, 4);
332
333 evntsel = K7_EVNTSEL_INT
334 | K7_EVNTSEL_OS
335 | K7_EVNTSEL_USR
336 | K7_NMI_EVENT;
337
338 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
339 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
340 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
341 apic_write(APIC_LVTPC, APIC_DM_NMI);
342 evntsel |= K7_EVNTSEL_ENABLE;
343 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
344}
345
346static void setup_p6_watchdog(void)
347{
348 unsigned int evntsel;
349
350 nmi_perfctr_msr = MSR_P6_PERFCTR0;
351
352 clear_msr_range(MSR_P6_EVNTSEL0, 2);
353 clear_msr_range(MSR_P6_PERFCTR0, 2);
354
355 evntsel = P6_EVNTSEL_INT
356 | P6_EVNTSEL_OS
357 | P6_EVNTSEL_USR
358 | P6_NMI_EVENT;
359
360 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
361 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
362 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
363 apic_write(APIC_LVTPC, APIC_DM_NMI);
364 evntsel |= P6_EVNTSEL0_ENABLE;
365 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
366}
367
368static int setup_p4_watchdog(void)
369{
370 unsigned int misc_enable, dummy;
371
372 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
373 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
374 return 0;
375
376 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
377 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
378#ifdef CONFIG_SMP
379 if (smp_num_siblings == 2)
380 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
381#endif
382
383 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
384 clear_msr_range(0x3F1, 2);
385 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
386 docs doesn't fully define it, so leave it alone for now. */
387 if (boot_cpu_data.x86_model >= 0x3) {
388 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
389 clear_msr_range(0x3A0, 26);
390 clear_msr_range(0x3BC, 3);
391 } else {
392 clear_msr_range(0x3A0, 31);
393 }
394 clear_msr_range(0x3C0, 6);
395 clear_msr_range(0x3C8, 6);
396 clear_msr_range(0x3E0, 2);
397 clear_msr_range(MSR_P4_CCCR0, 18);
398 clear_msr_range(MSR_P4_PERFCTR0, 18);
399
400 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
401 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
402 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
403 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
406 return 1;
407}
408
409void setup_apic_nmi_watchdog (void)
410{
411 switch (boot_cpu_data.x86_vendor) {
412 case X86_VENDOR_AMD:
413 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
414 return;
415 setup_k7_watchdog();
416 break;
417 case X86_VENDOR_INTEL:
418 switch (boot_cpu_data.x86) {
419 case 6:
420 if (boot_cpu_data.x86_model > 0xd)
421 return;
422
423 setup_p6_watchdog();
424 break;
425 case 15:
426 if (boot_cpu_data.x86_model > 0x3)
427 return;
428
429 if (!setup_p4_watchdog())
430 return;
431 break;
432 default:
433 return;
434 }
435 break;
436 default:
437 return;
438 }
439 lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
440 nmi_active = 1;
441}
442
443/*
444 * the best way to detect whether a CPU has a 'hard lockup' problem
445 * is to check it's local APIC timer IRQ counts. If they are not
446 * changing then that CPU has some problem.
447 *
448 * as these watchdog NMI IRQs are generated on every CPU, we only
449 * have to check the current processor.
450 *
451 * since NMIs don't listen to _any_ locks, we have to be extremely
452 * careful not to rely on unsafe variables. The printk might lock
453 * up though, so we have to break up any console locks first ...
454 * [when there will be more tty-related locks, break them up
455 * here too!]
456 */
457
458static unsigned int
459 last_irq_sums [NR_CPUS],
460 alert_counter [NR_CPUS];
461
462void touch_nmi_watchdog (void)
463{
464 int i;
465
466 /*
467 * Just reset the alert counters, (other CPUs might be
468 * spinning on locks we hold):
469 */
470 for (i = 0; i < NR_CPUS; i++)
471 alert_counter[i] = 0;
472}
473
474extern void die_nmi(struct pt_regs *, const char *msg);
475
476void nmi_watchdog_tick (struct pt_regs * regs)
477{
478
479 /*
480 * Since current_thread_info()-> is always on the stack, and we
481 * always switch the stack NMI-atomically, it's safe to use
482 * smp_processor_id().
483 */
484 int sum, cpu = smp_processor_id();
485
486 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
487
488 if (last_irq_sums[cpu] == sum) {
489 /*
490 * Ayiee, looks like this CPU is stuck ...
491 * wait a few IRQs (5 seconds) before doing the oops ...
492 */
493 alert_counter[cpu]++;
494 if (alert_counter[cpu] == 5*nmi_hz)
495 die_nmi(regs, "NMI Watchdog detected LOCKUP");
496 } else {
497 last_irq_sums[cpu] = sum;
498 alert_counter[cpu] = 0;
499 }
500 if (nmi_perfctr_msr) {
501 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
502 /*
503 * P4 quirks:
504 * - An overflown perfctr will assert its interrupt
505 * until the OVF flag in its CCCR is cleared.
506 * - LVTPC is masked on interrupt and must be
507 * unmasked by the LVTPC handler.
508 */
509 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
510 apic_write(APIC_LVTPC, APIC_DM_NMI);
511 }
512 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
513 /* Only P6 based Pentium M need to re-unmask
514 * the apic vector but it doesn't hurt
515 * other P6 variant */
516 apic_write(APIC_LVTPC, APIC_DM_NMI);
517 }
518 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
519 }
520}
521
522#ifdef CONFIG_SYSCTL
523
524static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
525{
526 unsigned char reason = get_nmi_reason();
527 char buf[64];
528
529 if (!(reason & 0xc0)) {
530 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
531 die_nmi(regs, buf);
532 }
533 return 0;
534}
535
536/*
537 * proc handler for /proc/sys/kernel/unknown_nmi_panic
538 */
539int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
540 void __user *buffer, size_t *length, loff_t *ppos)
541{
542 int old_state;
543
544 old_state = unknown_nmi_panic;
545 proc_dointvec(table, write, file, buffer, length, ppos);
546 if (!!old_state == !!unknown_nmi_panic)
547 return 0;
548
549 if (unknown_nmi_panic) {
550 if (reserve_lapic_nmi() < 0) {
551 unknown_nmi_panic = 0;
552 return -EBUSY;
553 } else {
554 set_nmi_callback(unknown_nmi_panic_callback);
555 }
556 } else {
557 release_lapic_nmi();
558 unset_nmi_callback();
559 }
560 return 0;
561}
562
563#endif
564
565EXPORT_SYMBOL(nmi_active);
566EXPORT_SYMBOL(nmi_watchdog);
567EXPORT_SYMBOL(reserve_lapic_nmi);
568EXPORT_SYMBOL(release_lapic_nmi);
569EXPORT_SYMBOL(disable_timer_nmi_watchdog);
570EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
new file mode 100644
index 000000000000..e51edf0a6564
--- /dev/null
+++ b/arch/i386/kernel/numaq.c
@@ -0,0 +1,79 @@
1/*
2 * Written by: Patricia Gaughen, IBM Corporation
3 *
4 * Copyright (C) 2002, IBM Corp.
5 *
6 * All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16 * NON INFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * Send feedback to <gone@us.ibm.com>
24 */
25
26#include <linux/config.h>
27#include <linux/mm.h>
28#include <linux/bootmem.h>
29#include <linux/mmzone.h>
30#include <linux/module.h>
31#include <linux/nodemask.h>
32#include <asm/numaq.h>
33#include <asm/topology.h>
34
35#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
36
37/*
38 * Function: smp_dump_qct()
39 *
40 * Description: gets memory layout from the quad config table. This
41 * function also updates node_online_map with the nodes (quads) present.
42 */
43static void __init smp_dump_qct(void)
44{
45 int node;
46 struct eachquadmem *eq;
47 struct sys_cfg_data *scd =
48 (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
49
50 nodes_clear(node_online_map);
51 for_each_node(node) {
52 if (scd->quads_present31_0 & (1 << node)) {
53 node_set_online(node);
54 eq = &scd->eq[node];
55 /* Convert to pages */
56 node_start_pfn[node] = MB_TO_PAGES(
57 eq->hi_shrd_mem_start - eq->priv_mem_size);
58 node_end_pfn[node] = MB_TO_PAGES(
59 eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
60
61 memory_present(node,
62 node_start_pfn[node], node_end_pfn[node]);
63 node_remap_size[node] = node_memmap_size_bytes(node,
64 node_start_pfn[node],
65 node_end_pfn[node]);
66 }
67 }
68}
69
70/*
71 * Unlike Summit, we don't really care to let the NUMA-Q
72 * fall back to flat mode. Don't compile for NUMA-Q
73 * unless you really need it!
74 */
75int __init get_memcfg_numaq(void)
76{
77 smp_dump_qct();
78 return 1;
79}
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
new file mode 100644
index 000000000000..4de2e03c7b45
--- /dev/null
+++ b/arch/i386/kernel/pci-dma.c
@@ -0,0 +1,147 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * On i386 there is no hardware dynamic DMA address translation,
5 * so consistent alloc/free are merely page allocation/freeing.
6 * The rest of the dynamic DMA mapping interface is implemented
7 * in asm/pci.h.
8 */
9
10#include <linux/types.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/pci.h>
14#include <asm/io.h>
15
16struct dma_coherent_mem {
17 void *virt_base;
18 u32 device_base;
19 int size;
20 int flags;
21 unsigned long *bitmap;
22};
23
24void *dma_alloc_coherent(struct device *dev, size_t size,
25 dma_addr_t *dma_handle, unsigned int __nocast gfp)
26{
27 void *ret;
28 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
29 int order = get_order(size);
30 /* ignore region specifiers */
31 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
32
33 if (mem) {
34 int page = bitmap_find_free_region(mem->bitmap, mem->size,
35 order);
36 if (page >= 0) {
37 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
38 ret = mem->virt_base + (page << PAGE_SHIFT);
39 memset(ret, 0, size);
40 return ret;
41 }
42 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
43 return NULL;
44 }
45
46 if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
47 gfp |= GFP_DMA;
48
49 ret = (void *)__get_free_pages(gfp, order);
50
51 if (ret != NULL) {
52 memset(ret, 0, size);
53 *dma_handle = virt_to_phys(ret);
54 }
55 return ret;
56}
57
58void dma_free_coherent(struct device *dev, size_t size,
59 void *vaddr, dma_addr_t dma_handle)
60{
61 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
62 int order = get_order(size);
63
64 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
65 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
66
67 bitmap_release_region(mem->bitmap, page, order);
68 } else
69 free_pages((unsigned long)vaddr, order);
70}
71
72int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
73 dma_addr_t device_addr, size_t size, int flags)
74{
75 void __iomem *mem_base;
76 int pages = size >> PAGE_SHIFT;
77 int bitmap_size = (pages + 31)/32;
78
79 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
80 goto out;
81 if (!size)
82 goto out;
83 if (dev->dma_mem)
84 goto out;
85
86 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
87
88 mem_base = ioremap(bus_addr, size);
89 if (!mem_base)
90 goto out;
91
92 dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
93 if (!dev->dma_mem)
94 goto out;
95 memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
96 dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
97 if (!dev->dma_mem->bitmap)
98 goto free1_out;
99 memset(dev->dma_mem->bitmap, 0, bitmap_size);
100
101 dev->dma_mem->virt_base = mem_base;
102 dev->dma_mem->device_base = device_addr;
103 dev->dma_mem->size = pages;
104 dev->dma_mem->flags = flags;
105
106 if (flags & DMA_MEMORY_MAP)
107 return DMA_MEMORY_MAP;
108
109 return DMA_MEMORY_IO;
110
111 free1_out:
112 kfree(dev->dma_mem->bitmap);
113 out:
114 return 0;
115}
116EXPORT_SYMBOL(dma_declare_coherent_memory);
117
118void dma_release_declared_memory(struct device *dev)
119{
120 struct dma_coherent_mem *mem = dev->dma_mem;
121
122 if(!mem)
123 return;
124 dev->dma_mem = NULL;
125 iounmap(mem->virt_base);
126 kfree(mem->bitmap);
127 kfree(mem);
128}
129EXPORT_SYMBOL(dma_release_declared_memory);
130
131void *dma_mark_declared_memory_occupied(struct device *dev,
132 dma_addr_t device_addr, size_t size)
133{
134 struct dma_coherent_mem *mem = dev->dma_mem;
135 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
136 int pos, err;
137
138 if (!mem)
139 return ERR_PTR(-EINVAL);
140
141 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
142 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
143 if (err != 0)
144 return ERR_PTR(err);
145 return mem->virt_base + (pos << PAGE_SHIFT);
146}
147EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
new file mode 100644
index 000000000000..c36fedf40e95
--- /dev/null
+++ b/arch/i386/kernel/process.c
@@ -0,0 +1,848 @@
1/*
2 * linux/arch/i386/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */
9
10/*
11 * This file handles the architecture-dependent parts of process handling..
12 */
13
14#include <stdarg.h>
15
16#include <linux/errno.h>
17#include <linux/sched.h>
18#include <linux/fs.h>
19#include <linux/kernel.h>
20#include <linux/mm.h>
21#include <linux/elfcore.h>
22#include <linux/smp.h>
23#include <linux/smp_lock.h>
24#include <linux/stddef.h>
25#include <linux/slab.h>
26#include <linux/vmalloc.h>
27#include <linux/user.h>
28#include <linux/a.out.h>
29#include <linux/interrupt.h>
30#include <linux/config.h>
31#include <linux/utsname.h>
32#include <linux/delay.h>
33#include <linux/reboot.h>
34#include <linux/init.h>
35#include <linux/mc146818rtc.h>
36#include <linux/module.h>
37#include <linux/kallsyms.h>
38#include <linux/ptrace.h>
39#include <linux/random.h>
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/ldt.h>
46#include <asm/processor.h>
47#include <asm/i387.h>
48#include <asm/irq.h>
49#include <asm/desc.h>
50#ifdef CONFIG_MATH_EMULATION
51#include <asm/math_emu.h>
52#endif
53
54#include <linux/irq.h>
55#include <linux/err.h>
56
57asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
58
59static int hlt_counter;
60
61unsigned long boot_option_idle_override = 0;
62EXPORT_SYMBOL(boot_option_idle_override);
63
64/*
65 * Return saved PC of a blocked thread.
66 */
67unsigned long thread_saved_pc(struct task_struct *tsk)
68{
69 return ((unsigned long *)tsk->thread.esp)[3];
70}
71
72/*
73 * Powermanagement idle function, if any..
74 */
75void (*pm_idle)(void);
76static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
77
78void disable_hlt(void)
79{
80 hlt_counter++;
81}
82
83EXPORT_SYMBOL(disable_hlt);
84
85void enable_hlt(void)
86{
87 hlt_counter--;
88}
89
90EXPORT_SYMBOL(enable_hlt);
91
92/*
93 * We use this if we don't have any better
94 * idle routine..
95 */
96void default_idle(void)
97{
98 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
99 local_irq_disable();
100 if (!need_resched())
101 safe_halt();
102 else
103 local_irq_enable();
104 } else {
105 cpu_relax();
106 }
107}
108
109/*
110 * On SMP it's slightly faster (but much more power-consuming!)
111 * to poll the ->work.need_resched flag instead of waiting for the
112 * cross-CPU IPI to arrive. Use this option with caution.
113 */
114static void poll_idle (void)
115{
116 int oldval;
117
118 local_irq_enable();
119
120 /*
121 * Deal with another CPU just having chosen a thread to
122 * run here:
123 */
124 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
125
126 if (!oldval) {
127 set_thread_flag(TIF_POLLING_NRFLAG);
128 asm volatile(
129 "2:"
130 "testl %0, %1;"
131 "rep; nop;"
132 "je 2b;"
133 : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
134
135 clear_thread_flag(TIF_POLLING_NRFLAG);
136 } else {
137 set_need_resched();
138 }
139}
140
141/*
142 * The idle thread. There's no useful work to be
143 * done, so just try to conserve power and have a
144 * low exit latency (ie sit in a loop waiting for
145 * somebody to say that they'd like to reschedule)
146 */
147void cpu_idle (void)
148{
149 /* endless idle loop with no priority at all */
150 while (1) {
151 while (!need_resched()) {
152 void (*idle)(void);
153
154 if (__get_cpu_var(cpu_idle_state))
155 __get_cpu_var(cpu_idle_state) = 0;
156
157 rmb();
158 idle = pm_idle;
159
160 if (!idle)
161 idle = default_idle;
162
163 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
164 idle();
165 }
166 schedule();
167 }
168}
169
170void cpu_idle_wait(void)
171{
172 unsigned int cpu, this_cpu = get_cpu();
173 cpumask_t map;
174
175 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
176 put_cpu();
177
178 cpus_clear(map);
179 for_each_online_cpu(cpu) {
180 per_cpu(cpu_idle_state, cpu) = 1;
181 cpu_set(cpu, map);
182 }
183
184 __get_cpu_var(cpu_idle_state) = 0;
185
186 wmb();
187 do {
188 ssleep(1);
189 for_each_online_cpu(cpu) {
190 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
191 cpu_clear(cpu, map);
192 }
193 cpus_and(map, map, cpu_online_map);
194 } while (!cpus_empty(map));
195}
196EXPORT_SYMBOL_GPL(cpu_idle_wait);
197
198/*
199 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
200 * which can obviate IPI to trigger checking of need_resched.
201 * We execute MONITOR against need_resched and enter optimized wait state
202 * through MWAIT. Whenever someone changes need_resched, we would be woken
203 * up from MWAIT (without an IPI).
204 */
205static void mwait_idle(void)
206{
207 local_irq_enable();
208
209 if (!need_resched()) {
210 set_thread_flag(TIF_POLLING_NRFLAG);
211 do {
212 __monitor((void *)&current_thread_info()->flags, 0, 0);
213 if (need_resched())
214 break;
215 __mwait(0, 0);
216 } while (!need_resched());
217 clear_thread_flag(TIF_POLLING_NRFLAG);
218 }
219}
220
221void __init select_idle_routine(const struct cpuinfo_x86 *c)
222{
223 if (cpu_has(c, X86_FEATURE_MWAIT)) {
224 printk("monitor/mwait feature present.\n");
225 /*
226 * Skip, if setup has overridden idle.
227 * One CPU supports mwait => All CPUs supports mwait
228 */
229 if (!pm_idle) {
230 printk("using mwait in idle threads.\n");
231 pm_idle = mwait_idle;
232 }
233 }
234}
235
236static int __init idle_setup (char *str)
237{
238 if (!strncmp(str, "poll", 4)) {
239 printk("using polling idle threads.\n");
240 pm_idle = poll_idle;
241#ifdef CONFIG_X86_SMP
242 if (smp_num_siblings > 1)
243 printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
244#endif
245 } else if (!strncmp(str, "halt", 4)) {
246 printk("using halt in idle threads.\n");
247 pm_idle = default_idle;
248 }
249
250 boot_option_idle_override = 1;
251 return 1;
252}
253
254__setup("idle=", idle_setup);
255
256void show_regs(struct pt_regs * regs)
257{
258 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
259
260 printk("\n");
261 printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
262 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
263 print_symbol("EIP is at %s\n", regs->eip);
264
265 if (regs->xcs & 3)
266 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
267 printk(" EFLAGS: %08lx %s (%s)\n",
268 regs->eflags, print_tainted(), system_utsname.release);
269 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
270 regs->eax,regs->ebx,regs->ecx,regs->edx);
271 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
272 regs->esi, regs->edi, regs->ebp);
273 printk(" DS: %04x ES: %04x\n",
274 0xffff & regs->xds,0xffff & regs->xes);
275
276 __asm__("movl %%cr0, %0": "=r" (cr0));
277 __asm__("movl %%cr2, %0": "=r" (cr2));
278 __asm__("movl %%cr3, %0": "=r" (cr3));
279 /* This could fault if %cr4 does not exist */
280 __asm__("1: movl %%cr4, %0 \n"
281 "2: \n"
282 ".section __ex_table,\"a\" \n"
283 ".long 1b,2b \n"
284 ".previous \n"
285 : "=r" (cr4): "0" (0));
286 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
287 show_trace(NULL, &regs->esp);
288}
289
290/*
291 * This gets run with %ebx containing the
292 * function to call, and %edx containing
293 * the "args".
294 */
295extern void kernel_thread_helper(void);
296__asm__(".section .text\n"
297 ".align 4\n"
298 "kernel_thread_helper:\n\t"
299 "movl %edx,%eax\n\t"
300 "pushl %edx\n\t"
301 "call *%ebx\n\t"
302 "pushl %eax\n\t"
303 "call do_exit\n"
304 ".previous");
305
306/*
307 * Create a kernel thread
308 */
309int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
310{
311 struct pt_regs regs;
312
313 memset(&regs, 0, sizeof(regs));
314
315 regs.ebx = (unsigned long) fn;
316 regs.edx = (unsigned long) arg;
317
318 regs.xds = __USER_DS;
319 regs.xes = __USER_DS;
320 regs.orig_eax = -1;
321 regs.eip = (unsigned long) kernel_thread_helper;
322 regs.xcs = __KERNEL_CS;
323 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
324
325 /* Ok, create the new process.. */
326 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
327}
328
329/*
330 * Free current thread data structures etc..
331 */
332void exit_thread(void)
333{
334 struct task_struct *tsk = current;
335 struct thread_struct *t = &tsk->thread;
336
337 /* The process may have allocated an io port bitmap... nuke it. */
338 if (unlikely(NULL != t->io_bitmap_ptr)) {
339 int cpu = get_cpu();
340 struct tss_struct *tss = &per_cpu(init_tss, cpu);
341
342 kfree(t->io_bitmap_ptr);
343 t->io_bitmap_ptr = NULL;
344 /*
345 * Careful, clear this in the TSS too:
346 */
347 memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
348 t->io_bitmap_max = 0;
349 tss->io_bitmap_owner = NULL;
350 tss->io_bitmap_max = 0;
351 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
352 put_cpu();
353 }
354}
355
356void flush_thread(void)
357{
358 struct task_struct *tsk = current;
359
360 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
361 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
362 /*
363 * Forget coprocessor state..
364 */
365 clear_fpu(tsk);
366 clear_used_math();
367}
368
369void release_thread(struct task_struct *dead_task)
370{
371 if (dead_task->mm) {
372 // temporary debugging check
373 if (dead_task->mm->context.size) {
374 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
375 dead_task->comm,
376 dead_task->mm->context.ldt,
377 dead_task->mm->context.size);
378 BUG();
379 }
380 }
381
382 release_vm86_irqs(dead_task);
383}
384
385/*
386 * This gets called before we allocate a new thread and copy
387 * the current task into it.
388 */
389void prepare_to_copy(struct task_struct *tsk)
390{
391 unlazy_fpu(tsk);
392}
393
394int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
395 unsigned long unused,
396 struct task_struct * p, struct pt_regs * regs)
397{
398 struct pt_regs * childregs;
399 struct task_struct *tsk;
400 int err;
401
402 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
403 *childregs = *regs;
404 childregs->eax = 0;
405 childregs->esp = esp;
406
407 p->thread.esp = (unsigned long) childregs;
408 p->thread.esp0 = (unsigned long) (childregs+1);
409
410 p->thread.eip = (unsigned long) ret_from_fork;
411
412 savesegment(fs,p->thread.fs);
413 savesegment(gs,p->thread.gs);
414
415 tsk = current;
416 if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
417 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
418 if (!p->thread.io_bitmap_ptr) {
419 p->thread.io_bitmap_max = 0;
420 return -ENOMEM;
421 }
422 memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
423 IO_BITMAP_BYTES);
424 }
425
426 /*
427 * Set a new TLS for the child thread?
428 */
429 if (clone_flags & CLONE_SETTLS) {
430 struct desc_struct *desc;
431 struct user_desc info;
432 int idx;
433
434 err = -EFAULT;
435 if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
436 goto out;
437 err = -EINVAL;
438 if (LDT_empty(&info))
439 goto out;
440
441 idx = info.entry_number;
442 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
443 goto out;
444
445 desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
446 desc->a = LDT_entry_a(&info);
447 desc->b = LDT_entry_b(&info);
448 }
449
450 err = 0;
451 out:
452 if (err && p->thread.io_bitmap_ptr) {
453 kfree(p->thread.io_bitmap_ptr);
454 p->thread.io_bitmap_max = 0;
455 }
456 return err;
457}
458
459/*
460 * fill in the user structure for a core dump..
461 */
462void dump_thread(struct pt_regs * regs, struct user * dump)
463{
464 int i;
465
466/* changed the size calculations - should hopefully work better. lbt */
467 dump->magic = CMAGIC;
468 dump->start_code = 0;
469 dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
470 dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
471 dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
472 dump->u_dsize -= dump->u_tsize;
473 dump->u_ssize = 0;
474 for (i = 0; i < 8; i++)
475 dump->u_debugreg[i] = current->thread.debugreg[i];
476
477 if (dump->start_stack < TASK_SIZE)
478 dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
479
480 dump->regs.ebx = regs->ebx;
481 dump->regs.ecx = regs->ecx;
482 dump->regs.edx = regs->edx;
483 dump->regs.esi = regs->esi;
484 dump->regs.edi = regs->edi;
485 dump->regs.ebp = regs->ebp;
486 dump->regs.eax = regs->eax;
487 dump->regs.ds = regs->xds;
488 dump->regs.es = regs->xes;
489 savesegment(fs,dump->regs.fs);
490 savesegment(gs,dump->regs.gs);
491 dump->regs.orig_eax = regs->orig_eax;
492 dump->regs.eip = regs->eip;
493 dump->regs.cs = regs->xcs;
494 dump->regs.eflags = regs->eflags;
495 dump->regs.esp = regs->esp;
496 dump->regs.ss = regs->xss;
497
498 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
499}
500
501/*
502 * Capture the user space registers if the task is not running (in user space)
503 */
504int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
505{
506 struct pt_regs ptregs;
507
508 ptregs = *(struct pt_regs *)
509 ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
510 ptregs.xcs &= 0xffff;
511 ptregs.xds &= 0xffff;
512 ptregs.xes &= 0xffff;
513 ptregs.xss &= 0xffff;
514
515 elf_core_copy_regs(regs, &ptregs);
516
517 return 1;
518}
519
520static inline void
521handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
522{
523 if (!next->io_bitmap_ptr) {
524 /*
525 * Disable the bitmap via an invalid offset. We still cache
526 * the previous bitmap owner and the IO bitmap contents:
527 */
528 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
529 return;
530 }
531 if (likely(next == tss->io_bitmap_owner)) {
532 /*
533 * Previous owner of the bitmap (hence the bitmap content)
534 * matches the next task, we dont have to do anything but
535 * to set a valid offset in the TSS:
536 */
537 tss->io_bitmap_base = IO_BITMAP_OFFSET;
538 return;
539 }
540 /*
541 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
542 * and we let the task to get a GPF in case an I/O instruction
543 * is performed. The handler of the GPF will verify that the
544 * faulting task has a valid I/O bitmap and, it true, does the
545 * real copy and restart the instruction. This will save us
546 * redundant copies when the currently switched task does not
547 * perform any I/O during its timeslice.
548 */
549 tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
550}
551/*
552 * This special macro can be used to load a debugging register
553 */
554#define loaddebug(thread,register) \
555 __asm__("movl %0,%%db" #register \
556 : /* no output */ \
557 :"r" (thread->debugreg[register]))
558
559/*
560 * switch_to(x,yn) should switch tasks from x to y.
561 *
562 * We fsave/fwait so that an exception goes off at the right time
563 * (as a call from the fsave or fwait in effect) rather than to
564 * the wrong process. Lazy FP saving no longer makes any sense
565 * with modern CPU's, and this simplifies a lot of things (SMP
566 * and UP become the same).
567 *
568 * NOTE! We used to use the x86 hardware context switching. The
569 * reason for not using it any more becomes apparent when you
570 * try to recover gracefully from saved state that is no longer
571 * valid (stale segment register values in particular). With the
572 * hardware task-switch, there is no way to fix up bad state in
573 * a reasonable manner.
574 *
575 * The fact that Intel documents the hardware task-switching to
576 * be slow is a fairly red herring - this code is not noticeably
577 * faster. However, there _is_ some room for improvement here,
578 * so the performance issues may eventually be a valid point.
579 * More important, however, is the fact that this allows us much
580 * more flexibility.
581 *
582 * The return value (in %eax) will be the "prev" task after
583 * the task-switch, and shows up in ret_from_fork in entry.S,
584 * for example.
585 */
586struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
587{
588 struct thread_struct *prev = &prev_p->thread,
589 *next = &next_p->thread;
590 int cpu = smp_processor_id();
591 struct tss_struct *tss = &per_cpu(init_tss, cpu);
592
593 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
594
595 __unlazy_fpu(prev_p);
596
597 /*
598 * Reload esp0, LDT and the page table pointer:
599 */
600 load_esp0(tss, next);
601
602 /*
603 * Load the per-thread Thread-Local Storage descriptor.
604 */
605 load_TLS(next, cpu);
606
607 /*
608 * Save away %fs and %gs. No need to save %es and %ds, as
609 * those are always kernel segments while inside the kernel.
610 */
611 asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
612 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
613
614 /*
615 * Restore %fs and %gs if needed.
616 */
617 if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
618 loadsegment(fs, next->fs);
619 loadsegment(gs, next->gs);
620 }
621
622 /*
623 * Now maybe reload the debug registers
624 */
625 if (unlikely(next->debugreg[7])) {
626 loaddebug(next, 0);
627 loaddebug(next, 1);
628 loaddebug(next, 2);
629 loaddebug(next, 3);
630 /* no 4 and 5 */
631 loaddebug(next, 6);
632 loaddebug(next, 7);
633 }
634
635 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
636 handle_io_bitmap(next, tss);
637
638 return prev_p;
639}
640
641asmlinkage int sys_fork(struct pt_regs regs)
642{
643 return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
644}
645
646asmlinkage int sys_clone(struct pt_regs regs)
647{
648 unsigned long clone_flags;
649 unsigned long newsp;
650 int __user *parent_tidptr, *child_tidptr;
651
652 clone_flags = regs.ebx;
653 newsp = regs.ecx;
654 parent_tidptr = (int __user *)regs.edx;
655 child_tidptr = (int __user *)regs.edi;
656 if (!newsp)
657 newsp = regs.esp;
658 return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
659}
660
661/*
662 * This is trivial, and on the face of it looks like it
663 * could equally well be done in user mode.
664 *
665 * Not so, for quite unobvious reasons - register pressure.
666 * In user mode vfork() cannot have a stack frame, and if
667 * done by calling the "clone()" system call directly, you
668 * do not have enough call-clobbered registers to hold all
669 * the information you need.
670 */
671asmlinkage int sys_vfork(struct pt_regs regs)
672{
673 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
674}
675
676/*
677 * sys_execve() executes a new program.
678 */
679asmlinkage int sys_execve(struct pt_regs regs)
680{
681 int error;
682 char * filename;
683
684 filename = getname((char __user *) regs.ebx);
685 error = PTR_ERR(filename);
686 if (IS_ERR(filename))
687 goto out;
688 error = do_execve(filename,
689 (char __user * __user *) regs.ecx,
690 (char __user * __user *) regs.edx,
691 &regs);
692 if (error == 0) {
693 task_lock(current);
694 current->ptrace &= ~PT_DTRACE;
695 task_unlock(current);
696 /* Make sure we don't return using sysenter.. */
697 set_thread_flag(TIF_IRET);
698 }
699 putname(filename);
700out:
701 return error;
702}
703
704#define top_esp (THREAD_SIZE - sizeof(unsigned long))
705#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
706
707unsigned long get_wchan(struct task_struct *p)
708{
709 unsigned long ebp, esp, eip;
710 unsigned long stack_page;
711 int count = 0;
712 if (!p || p == current || p->state == TASK_RUNNING)
713 return 0;
714 stack_page = (unsigned long)p->thread_info;
715 esp = p->thread.esp;
716 if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
717 return 0;
718 /* include/asm-i386/system.h:switch_to() pushes ebp last. */
719 ebp = *(unsigned long *) esp;
720 do {
721 if (ebp < stack_page || ebp > top_ebp+stack_page)
722 return 0;
723 eip = *(unsigned long *) (ebp+4);
724 if (!in_sched_functions(eip))
725 return eip;
726 ebp = *(unsigned long *) ebp;
727 } while (count++ < 16);
728 return 0;
729}
730
731/*
732 * sys_alloc_thread_area: get a yet unused TLS descriptor index.
733 */
734static int get_free_idx(void)
735{
736 struct thread_struct *t = &current->thread;
737 int idx;
738
739 for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
740 if (desc_empty(t->tls_array + idx))
741 return idx + GDT_ENTRY_TLS_MIN;
742 return -ESRCH;
743}
744
745/*
746 * Set a given TLS descriptor:
747 */
748asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
749{
750 struct thread_struct *t = &current->thread;
751 struct user_desc info;
752 struct desc_struct *desc;
753 int cpu, idx;
754
755 if (copy_from_user(&info, u_info, sizeof(info)))
756 return -EFAULT;
757 idx = info.entry_number;
758
759 /*
760 * index -1 means the kernel should try to find and
761 * allocate an empty descriptor:
762 */
763 if (idx == -1) {
764 idx = get_free_idx();
765 if (idx < 0)
766 return idx;
767 if (put_user(idx, &u_info->entry_number))
768 return -EFAULT;
769 }
770
771 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
772 return -EINVAL;
773
774 desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
775
776 /*
777 * We must not get preempted while modifying the TLS.
778 */
779 cpu = get_cpu();
780
781 if (LDT_empty(&info)) {
782 desc->a = 0;
783 desc->b = 0;
784 } else {
785 desc->a = LDT_entry_a(&info);
786 desc->b = LDT_entry_b(&info);
787 }
788 load_TLS(t, cpu);
789
790 put_cpu();
791
792 return 0;
793}
794
795/*
796 * Get the current Thread-Local Storage area:
797 */
798
799#define GET_BASE(desc) ( \
800 (((desc)->a >> 16) & 0x0000ffff) | \
801 (((desc)->b << 16) & 0x00ff0000) | \
802 ( (desc)->b & 0xff000000) )
803
804#define GET_LIMIT(desc) ( \
805 ((desc)->a & 0x0ffff) | \
806 ((desc)->b & 0xf0000) )
807
808#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
809#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
810#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
811#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
812#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
813#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
814
815asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
816{
817 struct user_desc info;
818 struct desc_struct *desc;
819 int idx;
820
821 if (get_user(idx, &u_info->entry_number))
822 return -EFAULT;
823 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
824 return -EINVAL;
825
826 desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
827
828 info.entry_number = idx;
829 info.base_addr = GET_BASE(desc);
830 info.limit = GET_LIMIT(desc);
831 info.seg_32bit = GET_32BIT(desc);
832 info.contents = GET_CONTENTS(desc);
833 info.read_exec_only = !GET_WRITABLE(desc);
834 info.limit_in_pages = GET_LIMIT_PAGES(desc);
835 info.seg_not_present = !GET_PRESENT(desc);
836 info.useable = GET_USEABLE(desc);
837
838 if (copy_to_user(u_info, &info, sizeof(info)))
839 return -EFAULT;
840 return 0;
841}
842
843unsigned long arch_align_stack(unsigned long sp)
844{
845 if (randomize_va_space)
846 sp -= get_random_int() % 8192;
847 return sp & ~0xf;
848}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
new file mode 100644
index 000000000000..b2f17640ceff
--- /dev/null
+++ b/arch/i386/kernel/ptrace.c
@@ -0,0 +1,717 @@
1/* ptrace.c */
2/* By Ross Biro 1/23/92 */
3/*
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 */
7
8#include <linux/kernel.h>
9#include <linux/sched.h>
10#include <linux/mm.h>
11#include <linux/smp.h>
12#include <linux/smp_lock.h>
13#include <linux/errno.h>
14#include <linux/ptrace.h>
15#include <linux/user.h>
16#include <linux/security.h>
17#include <linux/audit.h>
18#include <linux/seccomp.h>
19
20#include <asm/uaccess.h>
21#include <asm/pgtable.h>
22#include <asm/system.h>
23#include <asm/processor.h>
24#include <asm/i387.h>
25#include <asm/debugreg.h>
26#include <asm/ldt.h>
27#include <asm/desc.h>
28
29/*
30 * does not yet catch signals sent when the child dies.
31 * in exit.c or in signal.c.
32 */
33
34/* determines which flags the user has access to. */
35/* 1 = access 0 = no access */
36#define FLAG_MASK 0x00044dd5
37
38/* set's the trap flag. */
39#define TRAP_FLAG 0x100
40
41/*
42 * Offset of eflags on child stack..
43 */
44#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
45
46static inline struct pt_regs *get_child_regs(struct task_struct *task)
47{
48 void *stack_top = (void *)task->thread.esp0;
49 return stack_top - sizeof(struct pt_regs);
50}
51
52/*
53 * this routine will get a word off of the processes privileged stack.
54 * the offset is how far from the base addr as stored in the TSS.
55 * this routine assumes that all the privileged stacks are in our
56 * data space.
57 */
58static inline int get_stack_long(struct task_struct *task, int offset)
59{
60 unsigned char *stack;
61
62 stack = (unsigned char *)task->thread.esp0;
63 stack += offset;
64 return (*((int *)stack));
65}
66
67/*
68 * this routine will put a word on the processes privileged stack.
69 * the offset is how far from the base addr as stored in the TSS.
70 * this routine assumes that all the privileged stacks are in our
71 * data space.
72 */
73static inline int put_stack_long(struct task_struct *task, int offset,
74 unsigned long data)
75{
76 unsigned char * stack;
77
78 stack = (unsigned char *) task->thread.esp0;
79 stack += offset;
80 *(unsigned long *) stack = data;
81 return 0;
82}
83
84static int putreg(struct task_struct *child,
85 unsigned long regno, unsigned long value)
86{
87 switch (regno >> 2) {
88 case FS:
89 if (value && (value & 3) != 3)
90 return -EIO;
91 child->thread.fs = value;
92 return 0;
93 case GS:
94 if (value && (value & 3) != 3)
95 return -EIO;
96 child->thread.gs = value;
97 return 0;
98 case DS:
99 case ES:
100 if (value && (value & 3) != 3)
101 return -EIO;
102 value &= 0xffff;
103 break;
104 case SS:
105 case CS:
106 if ((value & 3) != 3)
107 return -EIO;
108 value &= 0xffff;
109 break;
110 case EFL:
111 value &= FLAG_MASK;
112 value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
113 break;
114 }
115 if (regno > GS*4)
116 regno -= 2*4;
117 put_stack_long(child, regno - sizeof(struct pt_regs), value);
118 return 0;
119}
120
121static unsigned long getreg(struct task_struct *child,
122 unsigned long regno)
123{
124 unsigned long retval = ~0UL;
125
126 switch (regno >> 2) {
127 case FS:
128 retval = child->thread.fs;
129 break;
130 case GS:
131 retval = child->thread.gs;
132 break;
133 case DS:
134 case ES:
135 case SS:
136 case CS:
137 retval = 0xffff;
138 /* fall through */
139 default:
140 if (regno > GS*4)
141 regno -= 2*4;
142 regno = regno - sizeof(struct pt_regs);
143 retval &= get_stack_long(child, regno);
144 }
145 return retval;
146}
147
148#define LDT_SEGMENT 4
149
150static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
151{
152 unsigned long addr, seg;
153
154 addr = regs->eip;
155 seg = regs->xcs & 0xffff;
156 if (regs->eflags & VM_MASK) {
157 addr = (addr & 0xffff) + (seg << 4);
158 return addr;
159 }
160
161 /*
162 * We'll assume that the code segments in the GDT
163 * are all zero-based. That is largely true: the
164 * TLS segments are used for data, and the PNPBIOS
165 * and APM bios ones we just ignore here.
166 */
167 if (seg & LDT_SEGMENT) {
168 u32 *desc;
169 unsigned long base;
170
171 down(&child->mm->context.sem);
172 desc = child->mm->context.ldt + (seg & ~7);
173 base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
174
175 /* 16-bit code segment? */
176 if (!((desc[1] >> 22) & 1))
177 addr &= 0xffff;
178 addr += base;
179 up(&child->mm->context.sem);
180 }
181 return addr;
182}
183
184static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
185{
186 int i, copied;
187 unsigned char opcode[16];
188 unsigned long addr = convert_eip_to_linear(child, regs);
189
190 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
191 for (i = 0; i < copied; i++) {
192 switch (opcode[i]) {
193 /* popf */
194 case 0x9d:
195 return 1;
196 /* opcode and address size prefixes */
197 case 0x66: case 0x67:
198 continue;
199 /* irrelevant prefixes (segment overrides and repeats) */
200 case 0x26: case 0x2e:
201 case 0x36: case 0x3e:
202 case 0x64: case 0x65:
203 case 0xf0: case 0xf2: case 0xf3:
204 continue;
205
206 /*
207 * pushf: NOTE! We should probably not let
208 * the user see the TF bit being set. But
209 * it's more pain than it's worth to avoid
210 * it, and a debugger could emulate this
211 * all in user space if it _really_ cares.
212 */
213 case 0x9c:
214 default:
215 return 0;
216 }
217 }
218 return 0;
219}
220
221static void set_singlestep(struct task_struct *child)
222{
223 struct pt_regs *regs = get_child_regs(child);
224
225 /*
226 * Always set TIF_SINGLESTEP - this guarantees that
227 * we single-step system calls etc.. This will also
228 * cause us to set TF when returning to user mode.
229 */
230 set_tsk_thread_flag(child, TIF_SINGLESTEP);
231
232 /*
233 * If TF was already set, don't do anything else
234 */
235 if (regs->eflags & TRAP_FLAG)
236 return;
237
238 /* Set TF on the kernel stack.. */
239 regs->eflags |= TRAP_FLAG;
240
241 /*
242 * ..but if TF is changed by the instruction we will trace,
243 * don't mark it as being "us" that set it, so that we
244 * won't clear it by hand later.
245 */
246 if (is_at_popf(child, regs))
247 return;
248
249 child->ptrace |= PT_DTRACE;
250}
251
252static void clear_singlestep(struct task_struct *child)
253{
254 /* Always clear TIF_SINGLESTEP... */
255 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
256
257 /* But touch TF only if it was set by us.. */
258 if (child->ptrace & PT_DTRACE) {
259 struct pt_regs *regs = get_child_regs(child);
260 regs->eflags &= ~TRAP_FLAG;
261 child->ptrace &= ~PT_DTRACE;
262 }
263}
264
265/*
266 * Called by kernel/ptrace.c when detaching..
267 *
268 * Make sure the single step bit is not set.
269 */
270void ptrace_disable(struct task_struct *child)
271{
272 clear_singlestep(child);
273}
274
275/*
276 * Perform get_thread_area on behalf of the traced child.
277 */
278static int
279ptrace_get_thread_area(struct task_struct *child,
280 int idx, struct user_desc __user *user_desc)
281{
282 struct user_desc info;
283 struct desc_struct *desc;
284
285/*
286 * Get the current Thread-Local Storage area:
287 */
288
289#define GET_BASE(desc) ( \
290 (((desc)->a >> 16) & 0x0000ffff) | \
291 (((desc)->b << 16) & 0x00ff0000) | \
292 ( (desc)->b & 0xff000000) )
293
294#define GET_LIMIT(desc) ( \
295 ((desc)->a & 0x0ffff) | \
296 ((desc)->b & 0xf0000) )
297
298#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
299#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
300#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
301#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
302#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
303#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
304
305 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
306 return -EINVAL;
307
308 desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
309
310 info.entry_number = idx;
311 info.base_addr = GET_BASE(desc);
312 info.limit = GET_LIMIT(desc);
313 info.seg_32bit = GET_32BIT(desc);
314 info.contents = GET_CONTENTS(desc);
315 info.read_exec_only = !GET_WRITABLE(desc);
316 info.limit_in_pages = GET_LIMIT_PAGES(desc);
317 info.seg_not_present = !GET_PRESENT(desc);
318 info.useable = GET_USEABLE(desc);
319
320 if (copy_to_user(user_desc, &info, sizeof(info)))
321 return -EFAULT;
322
323 return 0;
324}
325
326/*
327 * Perform set_thread_area on behalf of the traced child.
328 */
329static int
330ptrace_set_thread_area(struct task_struct *child,
331 int idx, struct user_desc __user *user_desc)
332{
333 struct user_desc info;
334 struct desc_struct *desc;
335
336 if (copy_from_user(&info, user_desc, sizeof(info)))
337 return -EFAULT;
338
339 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
340 return -EINVAL;
341
342 desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
343 if (LDT_empty(&info)) {
344 desc->a = 0;
345 desc->b = 0;
346 } else {
347 desc->a = LDT_entry_a(&info);
348 desc->b = LDT_entry_b(&info);
349 }
350
351 return 0;
352}
353
354asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
355{
356 struct task_struct *child;
357 struct user * dummy = NULL;
358 int i, ret;
359 unsigned long __user *datap = (unsigned long __user *)data;
360
361 lock_kernel();
362 ret = -EPERM;
363 if (request == PTRACE_TRACEME) {
364 /* are we already being traced? */
365 if (current->ptrace & PT_PTRACED)
366 goto out;
367 ret = security_ptrace(current->parent, current);
368 if (ret)
369 goto out;
370 /* set the ptrace bit in the process flags. */
371 current->ptrace |= PT_PTRACED;
372 ret = 0;
373 goto out;
374 }
375 ret = -ESRCH;
376 read_lock(&tasklist_lock);
377 child = find_task_by_pid(pid);
378 if (child)
379 get_task_struct(child);
380 read_unlock(&tasklist_lock);
381 if (!child)
382 goto out;
383
384 ret = -EPERM;
385 if (pid == 1) /* you may not mess with init */
386 goto out_tsk;
387
388 if (request == PTRACE_ATTACH) {
389 ret = ptrace_attach(child);
390 goto out_tsk;
391 }
392
393 ret = ptrace_check_attach(child, request == PTRACE_KILL);
394 if (ret < 0)
395 goto out_tsk;
396
397 switch (request) {
398 /* when I and D space are separate, these will need to be fixed. */
399 case PTRACE_PEEKTEXT: /* read word at location addr. */
400 case PTRACE_PEEKDATA: {
401 unsigned long tmp;
402 int copied;
403
404 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
405 ret = -EIO;
406 if (copied != sizeof(tmp))
407 break;
408 ret = put_user(tmp, datap);
409 break;
410 }
411
412 /* read the word at location addr in the USER area. */
413 case PTRACE_PEEKUSR: {
414 unsigned long tmp;
415
416 ret = -EIO;
417 if ((addr & 3) || addr < 0 ||
418 addr > sizeof(struct user) - 3)
419 break;
420
421 tmp = 0; /* Default return condition */
422 if(addr < FRAME_SIZE*sizeof(long))
423 tmp = getreg(child, addr);
424 if(addr >= (long) &dummy->u_debugreg[0] &&
425 addr <= (long) &dummy->u_debugreg[7]){
426 addr -= (long) &dummy->u_debugreg[0];
427 addr = addr >> 2;
428 tmp = child->thread.debugreg[addr];
429 }
430 ret = put_user(tmp, datap);
431 break;
432 }
433
434 /* when I and D space are separate, this will have to be fixed. */
435 case PTRACE_POKETEXT: /* write the word at location addr. */
436 case PTRACE_POKEDATA:
437 ret = 0;
438 if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
439 break;
440 ret = -EIO;
441 break;
442
443 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
444 ret = -EIO;
445 if ((addr & 3) || addr < 0 ||
446 addr > sizeof(struct user) - 3)
447 break;
448
449 if (addr < FRAME_SIZE*sizeof(long)) {
450 ret = putreg(child, addr, data);
451 break;
452 }
453 /* We need to be very careful here. We implicitly
454 want to modify a portion of the task_struct, and we
455 have to be selective about what portions we allow someone
456 to modify. */
457
458 ret = -EIO;
459 if(addr >= (long) &dummy->u_debugreg[0] &&
460 addr <= (long) &dummy->u_debugreg[7]){
461
462 if(addr == (long) &dummy->u_debugreg[4]) break;
463 if(addr == (long) &dummy->u_debugreg[5]) break;
464 if(addr < (long) &dummy->u_debugreg[4] &&
465 ((unsigned long) data) >= TASK_SIZE-3) break;
466
467 /* Sanity-check data. Take one half-byte at once with
468 * check = (val >> (16 + 4*i)) & 0xf. It contains the
469 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
470 * 2 and 3 are LENi. Given a list of invalid values,
471 * we do mask |= 1 << invalid_value, so that
472 * (mask >> check) & 1 is a correct test for invalid
473 * values.
474 *
475 * R/Wi contains the type of the breakpoint /
476 * watchpoint, LENi contains the length of the watched
477 * data in the watchpoint case.
478 *
479 * The invalid values are:
480 * - LENi == 0x10 (undefined), so mask |= 0x0f00.
481 * - R/Wi == 0x10 (break on I/O reads or writes), so
482 * mask |= 0x4444.
483 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
484 * 0x1110.
485 *
486 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
487 *
488 * See the Intel Manual "System Programming Guide",
489 * 15.2.4
490 *
491 * Note that LENi == 0x10 is defined on x86_64 in long
492 * mode (i.e. even for 32-bit userspace software, but
493 * 64-bit kernel), so the x86_64 mask value is 0x5454.
494 * See the AMD manual no. 24593 (AMD64 System
495 * Programming)*/
496
497 if(addr == (long) &dummy->u_debugreg[7]) {
498 data &= ~DR_CONTROL_RESERVED;
499 for(i=0; i<4; i++)
500 if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
501 goto out_tsk;
502 }
503
504 addr -= (long) &dummy->u_debugreg;
505 addr = addr >> 2;
506 child->thread.debugreg[addr] = data;
507 ret = 0;
508 }
509 break;
510
511 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
512 case PTRACE_CONT: /* restart after signal. */
513 ret = -EIO;
514 if ((unsigned long) data > _NSIG)
515 break;
516 if (request == PTRACE_SYSCALL) {
517 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
518 }
519 else {
520 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
521 }
522 child->exit_code = data;
523 /* make sure the single step bit is not set. */
524 clear_singlestep(child);
525 wake_up_process(child);
526 ret = 0;
527 break;
528
529/*
530 * make the child exit. Best I can do is send it a sigkill.
531 * perhaps it should be put in the status that it wants to
532 * exit.
533 */
534 case PTRACE_KILL:
535 ret = 0;
536 if (child->exit_state == EXIT_ZOMBIE) /* already dead */
537 break;
538 child->exit_code = SIGKILL;
539 /* make sure the single step bit is not set. */
540 clear_singlestep(child);
541 wake_up_process(child);
542 break;
543
544 case PTRACE_SINGLESTEP: /* set the trap flag. */
545 ret = -EIO;
546 if ((unsigned long) data > _NSIG)
547 break;
548 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
549 set_singlestep(child);
550 child->exit_code = data;
551 /* give it a chance to run. */
552 wake_up_process(child);
553 ret = 0;
554 break;
555
556 case PTRACE_DETACH:
557 /* detach a process that was attached. */
558 ret = ptrace_detach(child, data);
559 break;
560
561 case PTRACE_GETREGS: { /* Get all gp regs from the child. */
562 if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
563 ret = -EIO;
564 break;
565 }
566 for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
567 __put_user(getreg(child, i), datap);
568 datap++;
569 }
570 ret = 0;
571 break;
572 }
573
574 case PTRACE_SETREGS: { /* Set all gp regs in the child. */
575 unsigned long tmp;
576 if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
577 ret = -EIO;
578 break;
579 }
580 for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
581 __get_user(tmp, datap);
582 putreg(child, i, tmp);
583 datap++;
584 }
585 ret = 0;
586 break;
587 }
588
589 case PTRACE_GETFPREGS: { /* Get the child FPU state. */
590 if (!access_ok(VERIFY_WRITE, datap,
591 sizeof(struct user_i387_struct))) {
592 ret = -EIO;
593 break;
594 }
595 ret = 0;
596 if (!tsk_used_math(child))
597 init_fpu(child);
598 get_fpregs((struct user_i387_struct __user *)data, child);
599 break;
600 }
601
602 case PTRACE_SETFPREGS: { /* Set the child FPU state. */
603 if (!access_ok(VERIFY_READ, datap,
604 sizeof(struct user_i387_struct))) {
605 ret = -EIO;
606 break;
607 }
608 set_stopped_child_used_math(child);
609 set_fpregs(child, (struct user_i387_struct __user *)data);
610 ret = 0;
611 break;
612 }
613
614 case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
615 if (!access_ok(VERIFY_WRITE, datap,
616 sizeof(struct user_fxsr_struct))) {
617 ret = -EIO;
618 break;
619 }
620 if (!tsk_used_math(child))
621 init_fpu(child);
622 ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
623 break;
624 }
625
626 case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
627 if (!access_ok(VERIFY_READ, datap,
628 sizeof(struct user_fxsr_struct))) {
629 ret = -EIO;
630 break;
631 }
632 set_stopped_child_used_math(child);
633 ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
634 break;
635 }
636
637 case PTRACE_GET_THREAD_AREA:
638 ret = ptrace_get_thread_area(child, addr,
639 (struct user_desc __user *) data);
640 break;
641
642 case PTRACE_SET_THREAD_AREA:
643 ret = ptrace_set_thread_area(child, addr,
644 (struct user_desc __user *) data);
645 break;
646
647 default:
648 ret = ptrace_request(child, request, addr, data);
649 break;
650 }
651out_tsk:
652 put_task_struct(child);
653out:
654 unlock_kernel();
655 return ret;
656}
657
658void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
659{
660 struct siginfo info;
661
662 tsk->thread.trap_no = 1;
663 tsk->thread.error_code = error_code;
664
665 memset(&info, 0, sizeof(info));
666 info.si_signo = SIGTRAP;
667 info.si_code = TRAP_BRKPT;
668
669 /* User-mode eip? */
670 info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL;
671
672 /* Send us the fakey SIGTRAP */
673 force_sig_info(SIGTRAP, &info, tsk);
674}
675
676/* notification of system call entry/exit
677 * - triggered by current->work.syscall_trace
678 */
679__attribute__((regparm(3)))
680void do_syscall_trace(struct pt_regs *regs, int entryexit)
681{
682 /* do the secure computing check first */
683 secure_computing(regs->orig_eax);
684
685 if (unlikely(current->audit_context)) {
686 if (!entryexit)
687 audit_syscall_entry(current, regs->orig_eax,
688 regs->ebx, regs->ecx,
689 regs->edx, regs->esi);
690 else
691 audit_syscall_exit(current, regs->eax);
692 }
693
694 if (!(current->ptrace & PT_PTRACED))
695 return;
696
697 /* Fake a debug trap */
698 if (test_thread_flag(TIF_SINGLESTEP))
699 send_sigtrap(current, regs, 0);
700
701 if (!test_thread_flag(TIF_SYSCALL_TRACE))
702 return;
703
704 /* the 0x80 provides a way for the tracing parent to distinguish
705 between a syscall stop and SIGTRAP delivery */
706 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
707
708 /*
709 * this isn't the same as continuing with a signal, but it will do
710 * for normal use. strace only continues with a signal if the
711 * stopping signal is not SIGTRAP. -brl
712 */
713 if (current->exit_code) {
714 send_sig(current->exit_code, current, 1);
715 current->exit_code = 0;
716 }
717}
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
new file mode 100644
index 000000000000..aaf89cb2bc51
--- /dev/null
+++ b/arch/i386/kernel/quirks.c
@@ -0,0 +1,52 @@
1/*
2 * This file contains work-arounds for x86 and x86_64 platform bugs.
3 */
4#include <linux/config.h>
5#include <linux/pci.h>
6#include <linux/irq.h>
7
8#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
9
10static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
11{
12 u8 config, rev;
13 u32 word;
14
15 /* BIOS may enable hardware IRQ balancing for
16 * E7520/E7320/E7525(revision ID 0x9 and below)
17 * based platforms.
18 * Disable SW irqbalance/affinity on those platforms.
19 */
20 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
21 if (rev > 0x9)
22 return;
23
24 printk(KERN_INFO "Intel E7520/7320/7525 detected.");
25
26 /* enable access to config space*/
27 pci_read_config_byte(dev, 0xf4, &config);
28 config |= 0x2;
29 pci_write_config_byte(dev, 0xf4, config);
30
31 /* read xTPR register */
32 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
33
34 if (!(word & (1 << 13))) {
35 printk(KERN_INFO "Disabling irq balancing and affinity\n");
36#ifdef CONFIG_IRQBALANCE
37 irqbalance_disable("");
38#endif
39 noirqdebug_setup("");
40#ifdef CONFIG_PROC_FS
41 no_irq_affinity = 1;
42#endif
43 }
44
45 config &= ~0x2;
46 /* disable access to config space*/
47 pci_write_config_byte(dev, 0xf4, config);
48}
49DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
50DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
51DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
52#endif
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
new file mode 100644
index 000000000000..3d7e994563df
--- /dev/null
+++ b/arch/i386/kernel/reboot.c
@@ -0,0 +1,382 @@
1/*
2 * linux/arch/i386/kernel/reboot.c
3 */
4
5#include <linux/mm.h>
6#include <linux/module.h>
7#include <linux/delay.h>
8#include <linux/init.h>
9#include <linux/interrupt.h>
10#include <linux/mc146818rtc.h>
11#include <linux/efi.h>
12#include <linux/dmi.h>
13#include <asm/uaccess.h>
14#include <asm/apic.h>
15#include "mach_reboot.h"
16
17/*
18 * Power off function, if any
19 */
20void (*pm_power_off)(void);
21
22static int reboot_mode;
23static int reboot_thru_bios;
24
25#ifdef CONFIG_SMP
26int reboot_smp = 0;
27static int reboot_cpu = -1;
28/* shamelessly grabbed from lib/vsprintf.c for readability */
29#define is_digit(c) ((c) >= '0' && (c) <= '9')
30#endif
31static int __init reboot_setup(char *str)
32{
33 while(1) {
34 switch (*str) {
35 case 'w': /* "warm" reboot (no memory testing etc) */
36 reboot_mode = 0x1234;
37 break;
38 case 'c': /* "cold" reboot (with memory testing etc) */
39 reboot_mode = 0x0;
40 break;
41 case 'b': /* "bios" reboot by jumping through the BIOS */
42 reboot_thru_bios = 1;
43 break;
44 case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
45 reboot_thru_bios = 0;
46 break;
47#ifdef CONFIG_SMP
48 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
49 reboot_smp = 1;
50 if (is_digit(*(str+1))) {
51 reboot_cpu = (int) (*(str+1) - '0');
52 if (is_digit(*(str+2)))
53 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
54 }
55 /* we will leave sorting out the final value
56 when we are ready to reboot, since we might not
57 have set up boot_cpu_id or smp_num_cpu */
58 break;
59#endif
60 }
61 if((str = strchr(str,',')) != NULL)
62 str++;
63 else
64 break;
65 }
66 return 1;
67}
68
69__setup("reboot=", reboot_setup);
70
71/*
72 * Reboot options and system auto-detection code provided by
73 * Dell Inc. so their systems "just work". :-)
74 */
75
76/*
77 * Some machines require the "reboot=b" commandline option, this quirk makes that automatic.
78 */
79static int __init set_bios_reboot(struct dmi_system_id *d)
80{
81 if (!reboot_thru_bios) {
82 reboot_thru_bios = 1;
83 printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
84 }
85 return 0;
86}
87
88/*
89 * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
90 */
91static int __init set_smp_reboot(struct dmi_system_id *d)
92{
93#ifdef CONFIG_SMP
94 if (!reboot_smp) {
95 reboot_smp = 1;
96 printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
97 }
98#endif
99 return 0;
100}
101
102/*
103 * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
104 */
105static int __init set_smp_bios_reboot(struct dmi_system_id *d)
106{
107 set_smp_reboot(d);
108 set_bios_reboot(d);
109 return 0;
110}
111
112static struct dmi_system_id __initdata reboot_dmi_table[] = {
113 { /* Handle problems with rebooting on Dell 1300's */
114 .callback = set_smp_bios_reboot,
115 .ident = "Dell PowerEdge 1300",
116 .matches = {
117 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
118 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
119 },
120 },
121 { /* Handle problems with rebooting on Dell 300's */
122 .callback = set_bios_reboot,
123 .ident = "Dell PowerEdge 300",
124 .matches = {
125 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
126 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
127 },
128 },
129 { /* Handle problems with rebooting on Dell 2400's */
130 .callback = set_bios_reboot,
131 .ident = "Dell PowerEdge 2400",
132 .matches = {
133 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
134 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
135 },
136 },
137 { }
138};
139
140static int __init reboot_init(void)
141{
142 dmi_check_system(reboot_dmi_table);
143 return 0;
144}
145
146core_initcall(reboot_init);
147
148/* The following code and data reboots the machine by switching to real
149 mode and jumping to the BIOS reset entry point, as if the CPU has
150 really been reset. The previous version asked the keyboard
151 controller to pulse the CPU reset line, which is more thorough, but
152 doesn't work with at least one type of 486 motherboard. It is easy
153 to stop this code working; hence the copious comments. */
154
155static unsigned long long
156real_mode_gdt_entries [3] =
157{
158 0x0000000000000000ULL, /* Null descriptor */
159 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
160 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
161};
162
163static struct
164{
165 unsigned short size __attribute__ ((packed));
166 unsigned long long * base __attribute__ ((packed));
167}
168real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
169real_mode_idt = { 0x3ff, NULL },
170no_idt = { 0, NULL };
171
172
173/* This is 16-bit protected mode code to disable paging and the cache,
174 switch to real mode and jump to the BIOS reset code.
175
176 The instruction that switches to real mode by writing to CR0 must be
177 followed immediately by a far jump instruction, which set CS to a
178 valid value for real mode, and flushes the prefetch queue to avoid
179 running instructions that have already been decoded in protected
180 mode.
181
182 Clears all the flags except ET, especially PG (paging), PE
183 (protected-mode enable) and TS (task switch for coprocessor state
184 save). Flushes the TLB after paging has been disabled. Sets CD and
185 NW, to disable the cache on a 486, and invalidates the cache. This
186 is more like the state of a 486 after reset. I don't know if
187 something else should be done for other chips.
188
189 More could be done here to set up the registers as if a CPU reset had
190 occurred; hopefully real BIOSs don't assume much. */
191
192static unsigned char real_mode_switch [] =
193{
194 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
195 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
196 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */
197 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
198 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */
199 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */
200 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */
201 0x74, 0x02, /* jz f */
202 0x0f, 0x09, /* wbinvd */
203 0x24, 0x10, /* f: andb $0x10,al */
204 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
205};
206static unsigned char jump_to_bios [] =
207{
208 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
209};
210
211/*
212 * Switch to real mode and then execute the code
213 * specified by the code and length parameters.
214 * We assume that length will aways be less that 100!
215 */
216void machine_real_restart(unsigned char *code, int length)
217{
218 unsigned long flags;
219
220 local_irq_disable();
221
222 /* Write zero to CMOS register number 0x0f, which the BIOS POST
223 routine will recognize as telling it to do a proper reboot. (Well
224 that's what this book in front of me says -- it may only apply to
225 the Phoenix BIOS though, it's not clear). At the same time,
226 disable NMIs by setting the top bit in the CMOS address register,
227 as we're about to do peculiar things to the CPU. I'm not sure if
228 `outb_p' is needed instead of just `outb'. Use it to be on the
229 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
230 */
231
232 spin_lock_irqsave(&rtc_lock, flags);
233 CMOS_WRITE(0x00, 0x8f);
234 spin_unlock_irqrestore(&rtc_lock, flags);
235
236 /* Remap the kernel at virtual address zero, as well as offset zero
237 from the kernel segment. This assumes the kernel segment starts at
238 virtual address PAGE_OFFSET. */
239
240 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
241 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
242
243 /*
244 * Use `swapper_pg_dir' as our page directory.
245 */
246 load_cr3(swapper_pg_dir);
247
248 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
249 this on booting to tell it to "Bypass memory test (also warm
250 boot)". This seems like a fairly standard thing that gets set by
251 REBOOT.COM programs, and the previous reset routine did this
252 too. */
253
254 *((unsigned short *)0x472) = reboot_mode;
255
256 /* For the switch to real mode, copy some code to low memory. It has
257 to be in the first 64k because it is running in 16-bit mode, and it
258 has to have the same physical and virtual address, because it turns
259 off paging. Copy it near the end of the first page, out of the way
260 of BIOS variables. */
261
262 memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
263 real_mode_switch, sizeof (real_mode_switch));
264 memcpy ((void *) (0x1000 - 100), code, length);
265
266 /* Set up the IDT for real mode. */
267
268 __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
269
270 /* Set up a GDT from which we can load segment descriptors for real
271 mode. The GDT is not used in real mode; it is just needed here to
272 prepare the descriptors. */
273
274 __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
275
276 /* Load the data segment registers, and thus the descriptors ready for
277 real mode. The base address of each segment is 0x100, 16 times the
278 selector value being loaded here. This is so that the segment
279 registers don't have to be reloaded after switching to real mode:
280 the values are consistent for real mode operation already. */
281
282 __asm__ __volatile__ ("movl $0x0010,%%eax\n"
283 "\tmovl %%eax,%%ds\n"
284 "\tmovl %%eax,%%es\n"
285 "\tmovl %%eax,%%fs\n"
286 "\tmovl %%eax,%%gs\n"
287 "\tmovl %%eax,%%ss" : : : "eax");
288
289 /* Jump to the 16-bit code that we copied earlier. It disables paging
290 and the cache, switches to real mode, and jumps to the BIOS reset
291 entry point. */
292
293 __asm__ __volatile__ ("ljmp $0x0008,%0"
294 :
295 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
296}
297
298void machine_restart(char * __unused)
299{
300#ifdef CONFIG_SMP
301 int cpuid;
302
303 cpuid = GET_APIC_ID(apic_read(APIC_ID));
304
305 if (reboot_smp) {
306
307 /* check to see if reboot_cpu is valid
308 if its not, default to the BSP */
309 if ((reboot_cpu == -1) ||
310 (reboot_cpu > (NR_CPUS -1)) ||
311 !physid_isset(cpuid, phys_cpu_present_map))
312 reboot_cpu = boot_cpu_physical_apicid;
313
314 reboot_smp = 0; /* use this as a flag to only go through this once*/
315 /* re-run this function on the other CPUs
316 it will fall though this section since we have
317 cleared reboot_smp, and do the reboot if it is the
318 correct CPU, otherwise it halts. */
319 if (reboot_cpu != cpuid)
320 smp_call_function((void *)machine_restart , NULL, 1, 0);
321 }
322
323 /* if reboot_cpu is still -1, then we want a tradional reboot,
324 and if we are not running on the reboot_cpu,, halt */
325 if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
326 for (;;)
327 __asm__ __volatile__ ("hlt");
328 }
329 /*
330 * Stop all CPUs and turn off local APICs and the IO-APIC, so
331 * other OSs see a clean IRQ state.
332 */
333 smp_send_stop();
334#endif /* CONFIG_SMP */
335
336 lapic_shutdown();
337
338#ifdef CONFIG_X86_IO_APIC
339 disable_IO_APIC();
340#endif
341
342 if (!reboot_thru_bios) {
343 if (efi_enabled) {
344 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
345 __asm__ __volatile__("lidt %0": :"m" (no_idt));
346 __asm__ __volatile__("int3");
347 }
348 /* rebooting needs to touch the page at absolute addr 0 */
349 *((unsigned short *)__va(0x472)) = reboot_mode;
350 for (;;) {
351 mach_reboot();
352 /* That didn't work - force a triple fault.. */
353 __asm__ __volatile__("lidt %0": :"m" (no_idt));
354 __asm__ __volatile__("int3");
355 }
356 }
357 if (efi_enabled)
358 efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL);
359
360 machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
361}
362
363EXPORT_SYMBOL(machine_restart);
364
365void machine_halt(void)
366{
367}
368
369EXPORT_SYMBOL(machine_halt);
370
371void machine_power_off(void)
372{
373 lapic_shutdown();
374
375 if (efi_enabled)
376 efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
377 if (pm_power_off)
378 pm_power_off();
379}
380
381EXPORT_SYMBOL(machine_power_off);
382
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c
new file mode 100644
index 000000000000..69e203a0d330
--- /dev/null
+++ b/arch/i386/kernel/scx200.c
@@ -0,0 +1,167 @@
1/* linux/arch/i386/kernel/scx200.c
2
3 Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
4
5 National Semiconductor SCx200 support. */
6
7#include <linux/config.h>
8#include <linux/module.h>
9#include <linux/errno.h>
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/pci.h>
13
14#include <linux/scx200.h>
15
16/* Verify that the configuration block really is there */
17#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
18
19#define NAME "scx200"
20
21MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
22MODULE_DESCRIPTION("NatSemi SCx200 Driver");
23MODULE_LICENSE("GPL");
24
25unsigned scx200_gpio_base = 0;
26long scx200_gpio_shadow[2];
27
28unsigned scx200_cb_base = 0;
29
30static struct pci_device_id scx200_tbl[] = {
31 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
32 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
33 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
34 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
35 { },
36};
37MODULE_DEVICE_TABLE(pci,scx200_tbl);
38
39static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
40
41static struct pci_driver scx200_pci_driver = {
42 .name = "scx200",
43 .id_table = scx200_tbl,
44 .probe = scx200_probe,
45};
46
47static DEFINE_SPINLOCK(scx200_gpio_config_lock);
48
49static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
50{
51 int bank;
52 unsigned base;
53
54 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
55 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
56 base = pci_resource_start(pdev, 0);
57 printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
58
59 if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) {
60 printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
61 return -EBUSY;
62 }
63
64 scx200_gpio_base = base;
65
66 /* read the current values driven on the GPIO signals */
67 for (bank = 0; bank < 2; ++bank)
68 scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
69
70 } else {
71 /* find the base of the Configuration Block */
72 if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
73 scx200_cb_base = SCx200_CB_BASE_FIXED;
74 } else {
75 pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
76 if (scx200_cb_probe(base)) {
77 scx200_cb_base = base;
78 } else {
79 printk(KERN_WARNING NAME ": Configuration Block not found\n");
80 return -ENODEV;
81 }
82 }
83 printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
84 }
85
86 return 0;
87}
88
89u32 scx200_gpio_configure(int index, u32 mask, u32 bits)
90{
91 u32 config, new_config;
92 unsigned long flags;
93
94 spin_lock_irqsave(&scx200_gpio_config_lock, flags);
95
96 outl(index, scx200_gpio_base + 0x20);
97 config = inl(scx200_gpio_base + 0x24);
98
99 new_config = (config & mask) | bits;
100 outl(new_config, scx200_gpio_base + 0x24);
101
102 spin_unlock_irqrestore(&scx200_gpio_config_lock, flags);
103
104 return config;
105}
106
107#if 0
108void scx200_gpio_dump(unsigned index)
109{
110 u32 config = scx200_gpio_configure(index, ~0, 0);
111 printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config);
112
113 if (config & 1)
114 printk(" OE"); /* output enabled */
115 else
116 printk(" TS"); /* tristate */
117 if (config & 2)
118 printk(" PP"); /* push pull */
119 else
120 printk(" OD"); /* open drain */
121 if (config & 4)
122 printk(" PUE"); /* pull up enabled */
123 else
124 printk(" PUD"); /* pull up disabled */
125 if (config & 8)
126 printk(" LOCKED"); /* locked */
127 if (config & 16)
128 printk(" LEVEL"); /* level input */
129 else
130 printk(" EDGE"); /* edge input */
131 if (config & 32)
132 printk(" HI"); /* trigger on rising edge */
133 else
134 printk(" LO"); /* trigger on falling edge */
135 if (config & 64)
136 printk(" DEBOUNCE"); /* debounce */
137 printk("\n");
138}
139#endif /* 0 */
140
141static int __init scx200_init(void)
142{
143 printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
144
145 return pci_module_init(&scx200_pci_driver);
146}
147
148static void __exit scx200_cleanup(void)
149{
150 pci_unregister_driver(&scx200_pci_driver);
151 release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
152}
153
154module_init(scx200_init);
155module_exit(scx200_cleanup);
156
157EXPORT_SYMBOL(scx200_gpio_base);
158EXPORT_SYMBOL(scx200_gpio_shadow);
159EXPORT_SYMBOL(scx200_gpio_configure);
160EXPORT_SYMBOL(scx200_cb_base);
161
162/*
163 Local variables:
164 compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules"
165 c-basic-offset: 8
166 End:
167*/
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
new file mode 100644
index 000000000000..469f496e55c0
--- /dev/null
+++ b/arch/i386/kernel/semaphore.c
@@ -0,0 +1,297 @@
1/*
2 * i386 semaphore implementation.
3 *
4 * (C) Copyright 1999 Linus Torvalds
5 *
6 * Portions Copyright 1999 Red Hat, Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */
15#include <linux/config.h>
16#include <linux/sched.h>
17#include <linux/err.h>
18#include <linux/init.h>
19#include <asm/semaphore.h>
20
21/*
22 * Semaphores are implemented using a two-way counter:
23 * The "count" variable is decremented for each process
24 * that tries to acquire the semaphore, while the "sleeping"
25 * variable is a count of such acquires.
26 *
27 * Notably, the inline "up()" and "down()" functions can
28 * efficiently test if they need to do any extra work (up
29 * needs to do something only if count was negative before
30 * the increment operation.
31 *
32 * "sleeping" and the contention routine ordering is protected
33 * by the spinlock in the semaphore's waitqueue head.
34 *
35 * Note that these functions are only called when there is
36 * contention on the lock, and as such all this is the
37 * "non-critical" part of the whole semaphore business. The
38 * critical part is the inline stuff in <asm/semaphore.h>
39 * where we want to avoid any extra jumps and calls.
40 */
41
42/*
43 * Logic:
44 * - only on a boundary condition do we need to care. When we go
45 * from a negative count to a non-negative, we wake people up.
46 * - when we go from a non-negative count to a negative do we
47 * (a) synchronize with the "sleeper" count and (b) make sure
48 * that we're on the wakeup list before we synchronize so that
49 * we cannot lose wakeup events.
50 */
51
52static fastcall void __attribute_used__ __up(struct semaphore *sem)
53{
54 wake_up(&sem->wait);
55}
56
57static fastcall void __attribute_used__ __sched __down(struct semaphore * sem)
58{
59 struct task_struct *tsk = current;
60 DECLARE_WAITQUEUE(wait, tsk);
61 unsigned long flags;
62
63 tsk->state = TASK_UNINTERRUPTIBLE;
64 spin_lock_irqsave(&sem->wait.lock, flags);
65 add_wait_queue_exclusive_locked(&sem->wait, &wait);
66
67 sem->sleepers++;
68 for (;;) {
69 int sleepers = sem->sleepers;
70
71 /*
72 * Add "everybody else" into it. They aren't
73 * playing, because we own the spinlock in
74 * the wait_queue_head.
75 */
76 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
77 sem->sleepers = 0;
78 break;
79 }
80 sem->sleepers = 1; /* us - see -1 above */
81 spin_unlock_irqrestore(&sem->wait.lock, flags);
82
83 schedule();
84
85 spin_lock_irqsave(&sem->wait.lock, flags);
86 tsk->state = TASK_UNINTERRUPTIBLE;
87 }
88 remove_wait_queue_locked(&sem->wait, &wait);
89 wake_up_locked(&sem->wait);
90 spin_unlock_irqrestore(&sem->wait.lock, flags);
91 tsk->state = TASK_RUNNING;
92}
93
94static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem)
95{
96 int retval = 0;
97 struct task_struct *tsk = current;
98 DECLARE_WAITQUEUE(wait, tsk);
99 unsigned long flags;
100
101 tsk->state = TASK_INTERRUPTIBLE;
102 spin_lock_irqsave(&sem->wait.lock, flags);
103 add_wait_queue_exclusive_locked(&sem->wait, &wait);
104
105 sem->sleepers++;
106 for (;;) {
107 int sleepers = sem->sleepers;
108
109 /*
110 * With signals pending, this turns into
111 * the trylock failure case - we won't be
112 * sleeping, and we* can't get the lock as
113 * it has contention. Just correct the count
114 * and exit.
115 */
116 if (signal_pending(current)) {
117 retval = -EINTR;
118 sem->sleepers = 0;
119 atomic_add(sleepers, &sem->count);
120 break;
121 }
122
123 /*
124 * Add "everybody else" into it. They aren't
125 * playing, because we own the spinlock in
126 * wait_queue_head. The "-1" is because we're
127 * still hoping to get the semaphore.
128 */
129 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
130 sem->sleepers = 0;
131 break;
132 }
133 sem->sleepers = 1; /* us - see -1 above */
134 spin_unlock_irqrestore(&sem->wait.lock, flags);
135
136 schedule();
137
138 spin_lock_irqsave(&sem->wait.lock, flags);
139 tsk->state = TASK_INTERRUPTIBLE;
140 }
141 remove_wait_queue_locked(&sem->wait, &wait);
142 wake_up_locked(&sem->wait);
143 spin_unlock_irqrestore(&sem->wait.lock, flags);
144
145 tsk->state = TASK_RUNNING;
146 return retval;
147}
148
149/*
150 * Trylock failed - make sure we correct for
151 * having decremented the count.
152 *
153 * We could have done the trylock with a
154 * single "cmpxchg" without failure cases,
155 * but then it wouldn't work on a 386.
156 */
157static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem)
158{
159 int sleepers;
160 unsigned long flags;
161
162 spin_lock_irqsave(&sem->wait.lock, flags);
163 sleepers = sem->sleepers + 1;
164 sem->sleepers = 0;
165
166 /*
167 * Add "everybody else" and us into it. They aren't
168 * playing, because we own the spinlock in the
169 * wait_queue_head.
170 */
171 if (!atomic_add_negative(sleepers, &sem->count)) {
172 wake_up_locked(&sem->wait);
173 }
174
175 spin_unlock_irqrestore(&sem->wait.lock, flags);
176 return 1;
177}
178
179
180/*
181 * The semaphore operations have a special calling sequence that
182 * allow us to do a simpler in-line version of them. These routines
183 * need to convert that sequence back into the C sequence when
184 * there is contention on the semaphore.
185 *
186 * %eax contains the semaphore pointer on entry. Save the C-clobbered
187 * registers (%eax, %edx and %ecx) except %eax whish is either a return
188 * value or just clobbered..
189 */
190asm(
191".section .sched.text\n"
192".align 4\n"
193".globl __down_failed\n"
194"__down_failed:\n\t"
195#if defined(CONFIG_FRAME_POINTER)
196 "pushl %ebp\n\t"
197 "movl %esp,%ebp\n\t"
198#endif
199 "pushl %edx\n\t"
200 "pushl %ecx\n\t"
201 "call __down\n\t"
202 "popl %ecx\n\t"
203 "popl %edx\n\t"
204#if defined(CONFIG_FRAME_POINTER)
205 "movl %ebp,%esp\n\t"
206 "popl %ebp\n\t"
207#endif
208 "ret"
209);
210
211asm(
212".section .sched.text\n"
213".align 4\n"
214".globl __down_failed_interruptible\n"
215"__down_failed_interruptible:\n\t"
216#if defined(CONFIG_FRAME_POINTER)
217 "pushl %ebp\n\t"
218 "movl %esp,%ebp\n\t"
219#endif
220 "pushl %edx\n\t"
221 "pushl %ecx\n\t"
222 "call __down_interruptible\n\t"
223 "popl %ecx\n\t"
224 "popl %edx\n\t"
225#if defined(CONFIG_FRAME_POINTER)
226 "movl %ebp,%esp\n\t"
227 "popl %ebp\n\t"
228#endif
229 "ret"
230);
231
232asm(
233".section .sched.text\n"
234".align 4\n"
235".globl __down_failed_trylock\n"
236"__down_failed_trylock:\n\t"
237#if defined(CONFIG_FRAME_POINTER)
238 "pushl %ebp\n\t"
239 "movl %esp,%ebp\n\t"
240#endif
241 "pushl %edx\n\t"
242 "pushl %ecx\n\t"
243 "call __down_trylock\n\t"
244 "popl %ecx\n\t"
245 "popl %edx\n\t"
246#if defined(CONFIG_FRAME_POINTER)
247 "movl %ebp,%esp\n\t"
248 "popl %ebp\n\t"
249#endif
250 "ret"
251);
252
253asm(
254".section .sched.text\n"
255".align 4\n"
256".globl __up_wakeup\n"
257"__up_wakeup:\n\t"
258 "pushl %edx\n\t"
259 "pushl %ecx\n\t"
260 "call __up\n\t"
261 "popl %ecx\n\t"
262 "popl %edx\n\t"
263 "ret"
264);
265
266/*
267 * rw spinlock fallbacks
268 */
269#if defined(CONFIG_SMP)
270asm(
271".section .sched.text\n"
272".align 4\n"
273".globl __write_lock_failed\n"
274"__write_lock_failed:\n\t"
275 LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
276"1: rep; nop\n\t"
277 "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
278 "jne 1b\n\t"
279 LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
280 "jnz __write_lock_failed\n\t"
281 "ret"
282);
283
284asm(
285".section .sched.text\n"
286".align 4\n"
287".globl __read_lock_failed\n"
288"__read_lock_failed:\n\t"
289 LOCK "incl (%eax)\n"
290"1: rep; nop\n\t"
291 "cmpl $1,(%eax)\n\t"
292 "js 1b\n\t"
293 LOCK "decl (%eax)\n\t"
294 "js __read_lock_failed\n\t"
295 "ret"
296);
297#endif
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
new file mode 100644
index 000000000000..945ec73163c8
--- /dev/null
+++ b/arch/i386/kernel/setup.c
@@ -0,0 +1,1535 @@
1/*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
21
22/*
23 * This file handles the architecture-dependent parts of initialization
24 */
25
26#include <linux/sched.h>
27#include <linux/mm.h>
28#include <linux/tty.h>
29#include <linux/ioport.h>
30#include <linux/acpi.h>
31#include <linux/apm_bios.h>
32#include <linux/initrd.h>
33#include <linux/bootmem.h>
34#include <linux/seq_file.h>
35#include <linux/console.h>
36#include <linux/mca.h>
37#include <linux/root_dev.h>
38#include <linux/highmem.h>
39#include <linux/module.h>
40#include <linux/efi.h>
41#include <linux/init.h>
42#include <linux/edd.h>
43#include <linux/nodemask.h>
44#include <video/edid.h>
45#include <asm/e820.h>
46#include <asm/mpspec.h>
47#include <asm/setup.h>
48#include <asm/arch_hooks.h>
49#include <asm/sections.h>
50#include <asm/io_apic.h>
51#include <asm/ist.h>
52#include <asm/io.h>
53#include "setup_arch_pre.h"
54#include <bios_ebda.h>
55
56/* This value is set up by the early boot code to point to the value
57 immediately after the boot time page tables. It contains a *physical*
58 address, and must not be in the .bss segment! */
59unsigned long init_pg_tables_end __initdata = ~0UL;
60
61int disable_pse __initdata = 0;
62
63/*
64 * Machine setup..
65 */
66
67#ifdef CONFIG_EFI
68int efi_enabled = 0;
69EXPORT_SYMBOL(efi_enabled);
70#endif
71
72/* cpu data as detected by the assembly code in head.S */
73struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74/* common cpu data for all cpus */
75struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
76
77unsigned long mmu_cr4_features;
78
79#ifdef CONFIG_ACPI_INTERPRETER
80 int acpi_disabled = 0;
81#else
82 int acpi_disabled = 1;
83#endif
84EXPORT_SYMBOL(acpi_disabled);
85
86#ifdef CONFIG_ACPI_BOOT
87int __initdata acpi_force = 0;
88extern acpi_interrupt_flags acpi_sci_flags;
89#endif
90
91/* for MCA, but anyone else can use it if they want */
92unsigned int machine_id;
93unsigned int machine_submodel_id;
94unsigned int BIOS_revision;
95unsigned int mca_pentium_flag;
96
97/* For PCI or other memory-mapped resources */
98unsigned long pci_mem_start = 0x10000000;
99
100/* Boot loader ID as an integer, for the benefit of proc_dointvec */
101int bootloader_type;
102
103/* user-defined highmem size */
104static unsigned int highmem_pages = -1;
105
106/*
107 * Setup options
108 */
109struct drive_info_struct { char dummy[32]; } drive_info;
110struct screen_info screen_info;
111struct apm_info apm_info;
112struct sys_desc_table_struct {
113 unsigned short length;
114 unsigned char table[0];
115};
116struct edid_info edid_info;
117struct ist_info ist_info;
118struct e820map e820;
119
120extern void early_cpu_init(void);
121extern void dmi_scan_machine(void);
122extern void generic_apic_probe(char *);
123extern int root_mountflags;
124
125unsigned long saved_videomode;
126
127#define RAMDISK_IMAGE_START_MASK 0x07FF
128#define RAMDISK_PROMPT_FLAG 0x8000
129#define RAMDISK_LOAD_FLAG 0x4000
130
131static char command_line[COMMAND_LINE_SIZE];
132
133unsigned char __initdata boot_params[PARAM_SIZE];
134
135static struct resource data_resource = {
136 .name = "Kernel data",
137 .start = 0,
138 .end = 0,
139 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
140};
141
142static struct resource code_resource = {
143 .name = "Kernel code",
144 .start = 0,
145 .end = 0,
146 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
147};
148
149static struct resource system_rom_resource = {
150 .name = "System ROM",
151 .start = 0xf0000,
152 .end = 0xfffff,
153 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
154};
155
156static struct resource extension_rom_resource = {
157 .name = "Extension ROM",
158 .start = 0xe0000,
159 .end = 0xeffff,
160 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
161};
162
163static struct resource adapter_rom_resources[] = { {
164 .name = "Adapter ROM",
165 .start = 0xc8000,
166 .end = 0,
167 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
168}, {
169 .name = "Adapter ROM",
170 .start = 0,
171 .end = 0,
172 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
173}, {
174 .name = "Adapter ROM",
175 .start = 0,
176 .end = 0,
177 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
178}, {
179 .name = "Adapter ROM",
180 .start = 0,
181 .end = 0,
182 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183}, {
184 .name = "Adapter ROM",
185 .start = 0,
186 .end = 0,
187 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
188}, {
189 .name = "Adapter ROM",
190 .start = 0,
191 .end = 0,
192 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
193} };
194
195#define ADAPTER_ROM_RESOURCES \
196 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
197
198static struct resource video_rom_resource = {
199 .name = "Video ROM",
200 .start = 0xc0000,
201 .end = 0xc7fff,
202 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203};
204
205static struct resource video_ram_resource = {
206 .name = "Video RAM area",
207 .start = 0xa0000,
208 .end = 0xbffff,
209 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
210};
211
212static struct resource standard_io_resources[] = { {
213 .name = "dma1",
214 .start = 0x0000,
215 .end = 0x001f,
216 .flags = IORESOURCE_BUSY | IORESOURCE_IO
217}, {
218 .name = "pic1",
219 .start = 0x0020,
220 .end = 0x0021,
221 .flags = IORESOURCE_BUSY | IORESOURCE_IO
222}, {
223 .name = "timer0",
224 .start = 0x0040,
225 .end = 0x0043,
226 .flags = IORESOURCE_BUSY | IORESOURCE_IO
227}, {
228 .name = "timer1",
229 .start = 0x0050,
230 .end = 0x0053,
231 .flags = IORESOURCE_BUSY | IORESOURCE_IO
232}, {
233 .name = "keyboard",
234 .start = 0x0060,
235 .end = 0x006f,
236 .flags = IORESOURCE_BUSY | IORESOURCE_IO
237}, {
238 .name = "dma page reg",
239 .start = 0x0080,
240 .end = 0x008f,
241 .flags = IORESOURCE_BUSY | IORESOURCE_IO
242}, {
243 .name = "pic2",
244 .start = 0x00a0,
245 .end = 0x00a1,
246 .flags = IORESOURCE_BUSY | IORESOURCE_IO
247}, {
248 .name = "dma2",
249 .start = 0x00c0,
250 .end = 0x00df,
251 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252}, {
253 .name = "fpu",
254 .start = 0x00f0,
255 .end = 0x00ff,
256 .flags = IORESOURCE_BUSY | IORESOURCE_IO
257} };
258
259#define STANDARD_IO_RESOURCES \
260 (sizeof standard_io_resources / sizeof standard_io_resources[0])
261
262#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
263
264static int __init romchecksum(unsigned char *rom, unsigned long length)
265{
266 unsigned char *p, sum = 0;
267
268 for (p = rom; p < rom + length; p++)
269 sum += *p;
270 return sum == 0;
271}
272
273static void __init probe_roms(void)
274{
275 unsigned long start, length, upper;
276 unsigned char *rom;
277 int i;
278
279 /* video rom */
280 upper = adapter_rom_resources[0].start;
281 for (start = video_rom_resource.start; start < upper; start += 2048) {
282 rom = isa_bus_to_virt(start);
283 if (!romsignature(rom))
284 continue;
285
286 video_rom_resource.start = start;
287
288 /* 0 < length <= 0x7f * 512, historically */
289 length = rom[2] * 512;
290
291 /* if checksum okay, trust length byte */
292 if (length && romchecksum(rom, length))
293 video_rom_resource.end = start + length - 1;
294
295 request_resource(&iomem_resource, &video_rom_resource);
296 break;
297 }
298
299 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
300 if (start < upper)
301 start = upper;
302
303 /* system rom */
304 request_resource(&iomem_resource, &system_rom_resource);
305 upper = system_rom_resource.start;
306
307 /* check for extension rom (ignore length byte!) */
308 rom = isa_bus_to_virt(extension_rom_resource.start);
309 if (romsignature(rom)) {
310 length = extension_rom_resource.end - extension_rom_resource.start + 1;
311 if (romchecksum(rom, length)) {
312 request_resource(&iomem_resource, &extension_rom_resource);
313 upper = extension_rom_resource.start;
314 }
315 }
316
317 /* check for adapter roms on 2k boundaries */
318 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
319 rom = isa_bus_to_virt(start);
320 if (!romsignature(rom))
321 continue;
322
323 /* 0 < length <= 0x7f * 512, historically */
324 length = rom[2] * 512;
325
326 /* but accept any length that fits if checksum okay */
327 if (!length || start + length > upper || !romchecksum(rom, length))
328 continue;
329
330 adapter_rom_resources[i].start = start;
331 adapter_rom_resources[i].end = start + length - 1;
332 request_resource(&iomem_resource, &adapter_rom_resources[i]);
333
334 start = adapter_rom_resources[i++].end & ~2047UL;
335 }
336}
337
338static void __init limit_regions(unsigned long long size)
339{
340 unsigned long long current_addr = 0;
341 int i;
342
343 if (efi_enabled) {
344 for (i = 0; i < memmap.nr_map; i++) {
345 current_addr = memmap.map[i].phys_addr +
346 (memmap.map[i].num_pages << 12);
347 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
348 if (current_addr >= size) {
349 memmap.map[i].num_pages -=
350 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
351 memmap.nr_map = i + 1;
352 return;
353 }
354 }
355 }
356 }
357 for (i = 0; i < e820.nr_map; i++) {
358 if (e820.map[i].type == E820_RAM) {
359 current_addr = e820.map[i].addr + e820.map[i].size;
360 if (current_addr >= size) {
361 e820.map[i].size -= current_addr-size;
362 e820.nr_map = i + 1;
363 return;
364 }
365 }
366 }
367}
368
369static void __init add_memory_region(unsigned long long start,
370 unsigned long long size, int type)
371{
372 int x;
373
374 if (!efi_enabled) {
375 x = e820.nr_map;
376
377 if (x == E820MAX) {
378 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
379 return;
380 }
381
382 e820.map[x].addr = start;
383 e820.map[x].size = size;
384 e820.map[x].type = type;
385 e820.nr_map++;
386 }
387} /* add_memory_region */
388
389#define E820_DEBUG 1
390
391static void __init print_memory_map(char *who)
392{
393 int i;
394
395 for (i = 0; i < e820.nr_map; i++) {
396 printk(" %s: %016Lx - %016Lx ", who,
397 e820.map[i].addr,
398 e820.map[i].addr + e820.map[i].size);
399 switch (e820.map[i].type) {
400 case E820_RAM: printk("(usable)\n");
401 break;
402 case E820_RESERVED:
403 printk("(reserved)\n");
404 break;
405 case E820_ACPI:
406 printk("(ACPI data)\n");
407 break;
408 case E820_NVS:
409 printk("(ACPI NVS)\n");
410 break;
411 default: printk("type %lu\n", e820.map[i].type);
412 break;
413 }
414 }
415}
416
417/*
418 * Sanitize the BIOS e820 map.
419 *
420 * Some e820 responses include overlapping entries. The following
421 * replaces the original e820 map with a new one, removing overlaps.
422 *
423 */
424struct change_member {
425 struct e820entry *pbios; /* pointer to original bios entry */
426 unsigned long long addr; /* address for this change point */
427};
428static struct change_member change_point_list[2*E820MAX] __initdata;
429static struct change_member *change_point[2*E820MAX] __initdata;
430static struct e820entry *overlap_list[E820MAX] __initdata;
431static struct e820entry new_bios[E820MAX] __initdata;
432
433static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
434{
435 struct change_member *change_tmp;
436 unsigned long current_type, last_type;
437 unsigned long long last_addr;
438 int chgidx, still_changing;
439 int overlap_entries;
440 int new_bios_entry;
441 int old_nr, new_nr, chg_nr;
442 int i;
443
444 /*
445 Visually we're performing the following (1,2,3,4 = memory types)...
446
447 Sample memory map (w/overlaps):
448 ____22__________________
449 ______________________4_
450 ____1111________________
451 _44_____________________
452 11111111________________
453 ____________________33__
454 ___________44___________
455 __________33333_________
456 ______________22________
457 ___________________2222_
458 _________111111111______
459 _____________________11_
460 _________________4______
461
462 Sanitized equivalent (no overlap):
463 1_______________________
464 _44_____________________
465 ___1____________________
466 ____22__________________
467 ______11________________
468 _________1______________
469 __________3_____________
470 ___________44___________
471 _____________33_________
472 _______________2________
473 ________________1_______
474 _________________4______
475 ___________________2____
476 ____________________33__
477 ______________________4_
478 */
479
480 /* if there's only one memory region, don't bother */
481 if (*pnr_map < 2)
482 return -1;
483
484 old_nr = *pnr_map;
485
486 /* bail out if we find any unreasonable addresses in bios map */
487 for (i=0; i<old_nr; i++)
488 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
489 return -1;
490
491 /* create pointers for initial change-point information (for sorting) */
492 for (i=0; i < 2*old_nr; i++)
493 change_point[i] = &change_point_list[i];
494
495 /* record all known change-points (starting and ending addresses),
496 omitting those that are for empty memory regions */
497 chgidx = 0;
498 for (i=0; i < old_nr; i++) {
499 if (biosmap[i].size != 0) {
500 change_point[chgidx]->addr = biosmap[i].addr;
501 change_point[chgidx++]->pbios = &biosmap[i];
502 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
503 change_point[chgidx++]->pbios = &biosmap[i];
504 }
505 }
506 chg_nr = chgidx; /* true number of change-points */
507
508 /* sort change-point list by memory addresses (low -> high) */
509 still_changing = 1;
510 while (still_changing) {
511 still_changing = 0;
512 for (i=1; i < chg_nr; i++) {
513 /* if <current_addr> > <last_addr>, swap */
514 /* or, if current=<start_addr> & last=<end_addr>, swap */
515 if ((change_point[i]->addr < change_point[i-1]->addr) ||
516 ((change_point[i]->addr == change_point[i-1]->addr) &&
517 (change_point[i]->addr == change_point[i]->pbios->addr) &&
518 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
519 )
520 {
521 change_tmp = change_point[i];
522 change_point[i] = change_point[i-1];
523 change_point[i-1] = change_tmp;
524 still_changing=1;
525 }
526 }
527 }
528
529 /* create a new bios memory map, removing overlaps */
530 overlap_entries=0; /* number of entries in the overlap table */
531 new_bios_entry=0; /* index for creating new bios map entries */
532 last_type = 0; /* start with undefined memory type */
533 last_addr = 0; /* start with 0 as last starting address */
534 /* loop through change-points, determining affect on the new bios map */
535 for (chgidx=0; chgidx < chg_nr; chgidx++)
536 {
537 /* keep track of all overlapping bios entries */
538 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
539 {
540 /* add map entry to overlap list (> 1 entry implies an overlap) */
541 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
542 }
543 else
544 {
545 /* remove entry from list (order independent, so swap with last) */
546 for (i=0; i<overlap_entries; i++)
547 {
548 if (overlap_list[i] == change_point[chgidx]->pbios)
549 overlap_list[i] = overlap_list[overlap_entries-1];
550 }
551 overlap_entries--;
552 }
553 /* if there are overlapping entries, decide which "type" to use */
554 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
555 current_type = 0;
556 for (i=0; i<overlap_entries; i++)
557 if (overlap_list[i]->type > current_type)
558 current_type = overlap_list[i]->type;
559 /* continue building up new bios map based on this information */
560 if (current_type != last_type) {
561 if (last_type != 0) {
562 new_bios[new_bios_entry].size =
563 change_point[chgidx]->addr - last_addr;
564 /* move forward only if the new size was non-zero */
565 if (new_bios[new_bios_entry].size != 0)
566 if (++new_bios_entry >= E820MAX)
567 break; /* no more space left for new bios entries */
568 }
569 if (current_type != 0) {
570 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
571 new_bios[new_bios_entry].type = current_type;
572 last_addr=change_point[chgidx]->addr;
573 }
574 last_type = current_type;
575 }
576 }
577 new_nr = new_bios_entry; /* retain count for new bios entries */
578
579 /* copy new bios mapping into original location */
580 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
581 *pnr_map = new_nr;
582
583 return 0;
584}
585
586/*
587 * Copy the BIOS e820 map into a safe place.
588 *
589 * Sanity-check it while we're at it..
590 *
591 * If we're lucky and live on a modern system, the setup code
592 * will have given us a memory map that we can use to properly
593 * set up memory. If we aren't, we'll fake a memory map.
594 *
595 * We check to see that the memory map contains at least 2 elements
596 * before we'll use it, because the detection code in setup.S may
597 * not be perfect and most every PC known to man has two memory
598 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
599 * thinkpad 560x, for example, does not cooperate with the memory
600 * detection code.)
601 */
602static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
603{
604 /* Only one memory region (or negative)? Ignore it */
605 if (nr_map < 2)
606 return -1;
607
608 do {
609 unsigned long long start = biosmap->addr;
610 unsigned long long size = biosmap->size;
611 unsigned long long end = start + size;
612 unsigned long type = biosmap->type;
613
614 /* Overflow in 64 bits? Ignore the memory map. */
615 if (start > end)
616 return -1;
617
618 /*
619 * Some BIOSes claim RAM in the 640k - 1M region.
620 * Not right. Fix it up.
621 */
622 if (type == E820_RAM) {
623 if (start < 0x100000ULL && end > 0xA0000ULL) {
624 if (start < 0xA0000ULL)
625 add_memory_region(start, 0xA0000ULL-start, type);
626 if (end <= 0x100000ULL)
627 continue;
628 start = 0x100000ULL;
629 size = end - start;
630 }
631 }
632 add_memory_region(start, size, type);
633 } while (biosmap++,--nr_map);
634 return 0;
635}
636
637#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
638struct edd edd;
639#ifdef CONFIG_EDD_MODULE
640EXPORT_SYMBOL(edd);
641#endif
642/**
643 * copy_edd() - Copy the BIOS EDD information
644 * from boot_params into a safe place.
645 *
646 */
647static inline void copy_edd(void)
648{
649 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
650 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
651 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
652 edd.edd_info_nr = EDD_NR;
653}
654#else
655static inline void copy_edd(void)
656{
657}
658#endif
659
660/*
661 * Do NOT EVER look at the BIOS memory size location.
662 * It does not work on many machines.
663 */
664#define LOWMEMSIZE() (0x9f000)
665
666static void __init parse_cmdline_early (char ** cmdline_p)
667{
668 char c = ' ', *to = command_line, *from = saved_command_line;
669 int len = 0;
670 int userdef = 0;
671
672 /* Save unparsed command line copy for /proc/cmdline */
673 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
674
675 for (;;) {
676 if (c != ' ')
677 goto next_char;
678 /*
679 * "mem=nopentium" disables the 4MB page tables.
680 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
681 * to <mem>, overriding the bios size.
682 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
683 * <start> to <start>+<mem>, overriding the bios size.
684 *
685 * HPA tells me bootloaders need to parse mem=, so no new
686 * option should be mem= [also see Documentation/i386/boot.txt]
687 */
688 if (!memcmp(from, "mem=", 4)) {
689 if (to != command_line)
690 to--;
691 if (!memcmp(from+4, "nopentium", 9)) {
692 from += 9+4;
693 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
694 disable_pse = 1;
695 } else {
696 /* If the user specifies memory size, we
697 * limit the BIOS-provided memory map to
698 * that size. exactmap can be used to specify
699 * the exact map. mem=number can be used to
700 * trim the existing memory map.
701 */
702 unsigned long long mem_size;
703
704 mem_size = memparse(from+4, &from);
705 limit_regions(mem_size);
706 userdef=1;
707 }
708 }
709
710 else if (!memcmp(from, "memmap=", 7)) {
711 if (to != command_line)
712 to--;
713 if (!memcmp(from+7, "exactmap", 8)) {
714 from += 8+7;
715 e820.nr_map = 0;
716 userdef = 1;
717 } else {
718 /* If the user specifies memory size, we
719 * limit the BIOS-provided memory map to
720 * that size. exactmap can be used to specify
721 * the exact map. mem=number can be used to
722 * trim the existing memory map.
723 */
724 unsigned long long start_at, mem_size;
725
726 mem_size = memparse(from+7, &from);
727 if (*from == '@') {
728 start_at = memparse(from+1, &from);
729 add_memory_region(start_at, mem_size, E820_RAM);
730 } else if (*from == '#') {
731 start_at = memparse(from+1, &from);
732 add_memory_region(start_at, mem_size, E820_ACPI);
733 } else if (*from == '$') {
734 start_at = memparse(from+1, &from);
735 add_memory_region(start_at, mem_size, E820_RESERVED);
736 } else {
737 limit_regions(mem_size);
738 userdef=1;
739 }
740 }
741 }
742
743 else if (!memcmp(from, "noexec=", 7))
744 noexec_setup(from + 7);
745
746
747#ifdef CONFIG_X86_SMP
748 /*
749 * If the BIOS enumerates physical processors before logical,
750 * maxcpus=N at enumeration-time can be used to disable HT.
751 */
752 else if (!memcmp(from, "maxcpus=", 8)) {
753 extern unsigned int maxcpus;
754
755 maxcpus = simple_strtoul(from + 8, NULL, 0);
756 }
757#endif
758
759#ifdef CONFIG_ACPI_BOOT
760 /* "acpi=off" disables both ACPI table parsing and interpreter */
761 else if (!memcmp(from, "acpi=off", 8)) {
762 disable_acpi();
763 }
764
765 /* acpi=force to over-ride black-list */
766 else if (!memcmp(from, "acpi=force", 10)) {
767 acpi_force = 1;
768 acpi_ht = 1;
769 acpi_disabled = 0;
770 }
771
772 /* acpi=strict disables out-of-spec workarounds */
773 else if (!memcmp(from, "acpi=strict", 11)) {
774 acpi_strict = 1;
775 }
776
777 /* Limit ACPI just to boot-time to enable HT */
778 else if (!memcmp(from, "acpi=ht", 7)) {
779 if (!acpi_force)
780 disable_acpi();
781 acpi_ht = 1;
782 }
783
784 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
785 else if (!memcmp(from, "pci=noacpi", 10)) {
786 acpi_disable_pci();
787 }
788 /* "acpi=noirq" disables ACPI interrupt routing */
789 else if (!memcmp(from, "acpi=noirq", 10)) {
790 acpi_noirq_set();
791 }
792
793 else if (!memcmp(from, "acpi_sci=edge", 13))
794 acpi_sci_flags.trigger = 1;
795
796 else if (!memcmp(from, "acpi_sci=level", 14))
797 acpi_sci_flags.trigger = 3;
798
799 else if (!memcmp(from, "acpi_sci=high", 13))
800 acpi_sci_flags.polarity = 1;
801
802 else if (!memcmp(from, "acpi_sci=low", 12))
803 acpi_sci_flags.polarity = 3;
804
805#ifdef CONFIG_X86_IO_APIC
806 else if (!memcmp(from, "acpi_skip_timer_override", 24))
807 acpi_skip_timer_override = 1;
808#endif
809
810#ifdef CONFIG_X86_LOCAL_APIC
811 /* disable IO-APIC */
812 else if (!memcmp(from, "noapic", 6))
813 disable_ioapic_setup();
814#endif /* CONFIG_X86_LOCAL_APIC */
815#endif /* CONFIG_ACPI_BOOT */
816
817 /*
818 * highmem=size forces highmem to be exactly 'size' bytes.
819 * This works even on boxes that have no highmem otherwise.
820 * This also works to reduce highmem size on bigger boxes.
821 */
822 else if (!memcmp(from, "highmem=", 8))
823 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
824
825 /*
826 * vmalloc=size forces the vmalloc area to be exactly 'size'
827 * bytes. This can be used to increase (or decrease) the
828 * vmalloc area - the default is 128m.
829 */
830 else if (!memcmp(from, "vmalloc=", 8))
831 __VMALLOC_RESERVE = memparse(from+8, &from);
832
833 next_char:
834 c = *(from++);
835 if (!c)
836 break;
837 if (COMMAND_LINE_SIZE <= ++len)
838 break;
839 *(to++) = c;
840 }
841 *to = '\0';
842 *cmdline_p = command_line;
843 if (userdef) {
844 printk(KERN_INFO "user-defined physical RAM map:\n");
845 print_memory_map("user");
846 }
847}
848
849/*
850 * Callback for efi_memory_walk.
851 */
852static int __init
853efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
854{
855 unsigned long *max_pfn = arg, pfn;
856
857 if (start < end) {
858 pfn = PFN_UP(end -1);
859 if (pfn > *max_pfn)
860 *max_pfn = pfn;
861 }
862 return 0;
863}
864
865
866/*
867 * Find the highest page frame number we have available
868 */
869void __init find_max_pfn(void)
870{
871 int i;
872
873 max_pfn = 0;
874 if (efi_enabled) {
875 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
876 return;
877 }
878
879 for (i = 0; i < e820.nr_map; i++) {
880 unsigned long start, end;
881 /* RAM? */
882 if (e820.map[i].type != E820_RAM)
883 continue;
884 start = PFN_UP(e820.map[i].addr);
885 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
886 if (start >= end)
887 continue;
888 if (end > max_pfn)
889 max_pfn = end;
890 }
891}
892
893/*
894 * Determine low and high memory ranges:
895 */
896unsigned long __init find_max_low_pfn(void)
897{
898 unsigned long max_low_pfn;
899
900 max_low_pfn = max_pfn;
901 if (max_low_pfn > MAXMEM_PFN) {
902 if (highmem_pages == -1)
903 highmem_pages = max_pfn - MAXMEM_PFN;
904 if (highmem_pages + MAXMEM_PFN < max_pfn)
905 max_pfn = MAXMEM_PFN + highmem_pages;
906 if (highmem_pages + MAXMEM_PFN > max_pfn) {
907 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
908 highmem_pages = 0;
909 }
910 max_low_pfn = MAXMEM_PFN;
911#ifndef CONFIG_HIGHMEM
912 /* Maximum memory usable is what is directly addressable */
913 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
914 MAXMEM>>20);
915 if (max_pfn > MAX_NONPAE_PFN)
916 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
917 else
918 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
919 max_pfn = MAXMEM_PFN;
920#else /* !CONFIG_HIGHMEM */
921#ifndef CONFIG_X86_PAE
922 if (max_pfn > MAX_NONPAE_PFN) {
923 max_pfn = MAX_NONPAE_PFN;
924 printk(KERN_WARNING "Warning only 4GB will be used.\n");
925 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
926 }
927#endif /* !CONFIG_X86_PAE */
928#endif /* !CONFIG_HIGHMEM */
929 } else {
930 if (highmem_pages == -1)
931 highmem_pages = 0;
932#ifdef CONFIG_HIGHMEM
933 if (highmem_pages >= max_pfn) {
934 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
935 highmem_pages = 0;
936 }
937 if (highmem_pages) {
938 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
939 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
940 highmem_pages = 0;
941 }
942 max_low_pfn -= highmem_pages;
943 }
944#else
945 if (highmem_pages)
946 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
947#endif
948 }
949 return max_low_pfn;
950}
951
952/*
953 * Free all available memory for boot time allocation. Used
954 * as a callback function by efi_memory_walk()
955 */
956
957static int __init
958free_available_memory(unsigned long start, unsigned long end, void *arg)
959{
960 /* check max_low_pfn */
961 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
962 return 0;
963 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
964 end = (max_low_pfn + 1) << PAGE_SHIFT;
965 if (start < end)
966 free_bootmem(start, end - start);
967
968 return 0;
969}
970/*
971 * Register fully available low RAM pages with the bootmem allocator.
972 */
973static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
974{
975 int i;
976
977 if (efi_enabled) {
978 efi_memmap_walk(free_available_memory, NULL);
979 return;
980 }
981 for (i = 0; i < e820.nr_map; i++) {
982 unsigned long curr_pfn, last_pfn, size;
983 /*
984 * Reserve usable low memory
985 */
986 if (e820.map[i].type != E820_RAM)
987 continue;
988 /*
989 * We are rounding up the start address of usable memory:
990 */
991 curr_pfn = PFN_UP(e820.map[i].addr);
992 if (curr_pfn >= max_low_pfn)
993 continue;
994 /*
995 * ... and at the end of the usable range downwards:
996 */
997 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
998
999 if (last_pfn > max_low_pfn)
1000 last_pfn = max_low_pfn;
1001
1002 /*
1003 * .. finally, did all the rounding and playing
1004 * around just make the area go away?
1005 */
1006 if (last_pfn <= curr_pfn)
1007 continue;
1008
1009 size = last_pfn - curr_pfn;
1010 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1011 }
1012}
1013
1014/*
1015 * workaround for Dell systems that neglect to reserve EBDA
1016 */
1017static void __init reserve_ebda_region(void)
1018{
1019 unsigned int addr;
1020 addr = get_bios_ebda();
1021 if (addr)
1022 reserve_bootmem(addr, PAGE_SIZE);
1023}
1024
1025#ifndef CONFIG_DISCONTIGMEM
1026void __init setup_bootmem_allocator(void);
1027static unsigned long __init setup_memory(void)
1028{
1029 /*
1030 * partially used pages are not usable - thus
1031 * we are rounding upwards:
1032 */
1033 min_low_pfn = PFN_UP(init_pg_tables_end);
1034
1035 find_max_pfn();
1036
1037 max_low_pfn = find_max_low_pfn();
1038
1039#ifdef CONFIG_HIGHMEM
1040 highstart_pfn = highend_pfn = max_pfn;
1041 if (max_pfn > max_low_pfn) {
1042 highstart_pfn = max_low_pfn;
1043 }
1044 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1045 pages_to_mb(highend_pfn - highstart_pfn));
1046#endif
1047 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1048 pages_to_mb(max_low_pfn));
1049
1050 setup_bootmem_allocator();
1051
1052 return max_low_pfn;
1053}
1054
1055void __init zone_sizes_init(void)
1056{
1057 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1058 unsigned int max_dma, low;
1059
1060 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1061 low = max_low_pfn;
1062
1063 if (low < max_dma)
1064 zones_size[ZONE_DMA] = low;
1065 else {
1066 zones_size[ZONE_DMA] = max_dma;
1067 zones_size[ZONE_NORMAL] = low - max_dma;
1068#ifdef CONFIG_HIGHMEM
1069 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1070#endif
1071 }
1072 free_area_init(zones_size);
1073}
1074#else
1075extern unsigned long setup_memory(void);
1076extern void zone_sizes_init(void);
1077#endif /* !CONFIG_DISCONTIGMEM */
1078
1079void __init setup_bootmem_allocator(void)
1080{
1081 unsigned long bootmap_size;
1082 /*
1083 * Initialize the boot-time allocator (with low memory only):
1084 */
1085 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1086
1087 register_bootmem_low_pages(max_low_pfn);
1088
1089 /*
1090 * Reserve the bootmem bitmap itself as well. We do this in two
1091 * steps (first step was init_bootmem()) because this catches
1092 * the (very unlikely) case of us accidentally initializing the
1093 * bootmem allocator with an invalid RAM area.
1094 */
1095 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1096 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1097
1098 /*
1099 * reserve physical page 0 - it's a special BIOS page on many boxes,
1100 * enabling clean reboots, SMP operation, laptop functions.
1101 */
1102 reserve_bootmem(0, PAGE_SIZE);
1103
1104 /* reserve EBDA region, it's a 4K region */
1105 reserve_ebda_region();
1106
1107 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1108 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1109 unless you have no PS/2 mouse plugged in. */
1110 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1111 boot_cpu_data.x86 == 6)
1112 reserve_bootmem(0xa0000 - 4096, 4096);
1113
1114#ifdef CONFIG_SMP
1115 /*
1116 * But first pinch a few for the stack/trampoline stuff
1117 * FIXME: Don't need the extra page at 4K, but need to fix
1118 * trampoline before removing it. (see the GDT stuff)
1119 */
1120 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1121#endif
1122#ifdef CONFIG_ACPI_SLEEP
1123 /*
1124 * Reserve low memory region for sleep support.
1125 */
1126 acpi_reserve_bootmem();
1127#endif
1128#ifdef CONFIG_X86_FIND_SMP_CONFIG
1129 /*
1130 * Find and reserve possible boot-time SMP configuration:
1131 */
1132 find_smp_config();
1133#endif
1134
1135#ifdef CONFIG_BLK_DEV_INITRD
1136 if (LOADER_TYPE && INITRD_START) {
1137 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1138 reserve_bootmem(INITRD_START, INITRD_SIZE);
1139 initrd_start =
1140 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1141 initrd_end = initrd_start+INITRD_SIZE;
1142 }
1143 else {
1144 printk(KERN_ERR "initrd extends beyond end of memory "
1145 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1146 INITRD_START + INITRD_SIZE,
1147 max_low_pfn << PAGE_SHIFT);
1148 initrd_start = 0;
1149 }
1150 }
1151#endif
1152}
1153
1154/*
1155 * The node 0 pgdat is initialized before all of these because
1156 * it's needed for bootmem. node>0 pgdats have their virtual
1157 * space allocated before the pagetables are in place to access
1158 * them, so they can't be cleared then.
1159 *
1160 * This should all compile down to nothing when NUMA is off.
1161 */
1162void __init remapped_pgdat_init(void)
1163{
1164 int nid;
1165
1166 for_each_online_node(nid) {
1167 if (nid != 0)
1168 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1169 }
1170}
1171
1172/*
1173 * Request address space for all standard RAM and ROM resources
1174 * and also for regions reported as reserved by the e820.
1175 */
1176static void __init
1177legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1178{
1179 int i;
1180
1181 probe_roms();
1182 for (i = 0; i < e820.nr_map; i++) {
1183 struct resource *res;
1184 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1185 continue;
1186 res = alloc_bootmem_low(sizeof(struct resource));
1187 switch (e820.map[i].type) {
1188 case E820_RAM: res->name = "System RAM"; break;
1189 case E820_ACPI: res->name = "ACPI Tables"; break;
1190 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1191 default: res->name = "reserved";
1192 }
1193 res->start = e820.map[i].addr;
1194 res->end = res->start + e820.map[i].size - 1;
1195 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1196 request_resource(&iomem_resource, res);
1197 if (e820.map[i].type == E820_RAM) {
1198 /*
1199 * We don't know which RAM region contains kernel data,
1200 * so we try it repeatedly and let the resource manager
1201 * test it.
1202 */
1203 request_resource(res, code_resource);
1204 request_resource(res, data_resource);
1205 }
1206 }
1207}
1208
1209/*
1210 * Request address space for all standard resources
1211 */
1212static void __init register_memory(void)
1213{
1214 unsigned long gapstart, gapsize;
1215 unsigned long long last;
1216 int i;
1217
1218 if (efi_enabled)
1219 efi_initialize_iomem_resources(&code_resource, &data_resource);
1220 else
1221 legacy_init_iomem_resources(&code_resource, &data_resource);
1222
1223 /* EFI systems may still have VGA */
1224 request_resource(&iomem_resource, &video_ram_resource);
1225
1226 /* request I/O space for devices used on all i[345]86 PCs */
1227 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1228 request_resource(&ioport_resource, &standard_io_resources[i]);
1229
1230 /*
1231 * Search for the bigest gap in the low 32 bits of the e820
1232 * memory space.
1233 */
1234 last = 0x100000000ull;
1235 gapstart = 0x10000000;
1236 gapsize = 0x400000;
1237 i = e820.nr_map;
1238 while (--i >= 0) {
1239 unsigned long long start = e820.map[i].addr;
1240 unsigned long long end = start + e820.map[i].size;
1241
1242 /*
1243 * Since "last" is at most 4GB, we know we'll
1244 * fit in 32 bits if this condition is true
1245 */
1246 if (last > end) {
1247 unsigned long gap = last - end;
1248
1249 if (gap > gapsize) {
1250 gapsize = gap;
1251 gapstart = end;
1252 }
1253 }
1254 if (start < last)
1255 last = start;
1256 }
1257
1258 /*
1259 * Start allocating dynamic PCI memory a bit into the gap,
1260 * aligned up to the nearest megabyte.
1261 *
1262 * Question: should we try to pad it up a bit (do something
1263 * like " + (gapsize >> 3)" in there too?). We now have the
1264 * technology.
1265 */
1266 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1267
1268 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1269 pci_mem_start, gapstart, gapsize);
1270}
1271
1272/* Use inline assembly to define this because the nops are defined
1273 as inline assembly strings in the include files and we cannot
1274 get them easily into strings. */
1275asm("\t.data\nintelnops: "
1276 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1277 GENERIC_NOP7 GENERIC_NOP8);
1278asm("\t.data\nk8nops: "
1279 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1280 K8_NOP7 K8_NOP8);
1281asm("\t.data\nk7nops: "
1282 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1283 K7_NOP7 K7_NOP8);
1284
1285extern unsigned char intelnops[], k8nops[], k7nops[];
1286static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1287 NULL,
1288 intelnops,
1289 intelnops + 1,
1290 intelnops + 1 + 2,
1291 intelnops + 1 + 2 + 3,
1292 intelnops + 1 + 2 + 3 + 4,
1293 intelnops + 1 + 2 + 3 + 4 + 5,
1294 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1295 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1296};
1297static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1298 NULL,
1299 k8nops,
1300 k8nops + 1,
1301 k8nops + 1 + 2,
1302 k8nops + 1 + 2 + 3,
1303 k8nops + 1 + 2 + 3 + 4,
1304 k8nops + 1 + 2 + 3 + 4 + 5,
1305 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1306 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1307};
1308static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1309 NULL,
1310 k7nops,
1311 k7nops + 1,
1312 k7nops + 1 + 2,
1313 k7nops + 1 + 2 + 3,
1314 k7nops + 1 + 2 + 3 + 4,
1315 k7nops + 1 + 2 + 3 + 4 + 5,
1316 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1317 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1318};
1319static struct nop {
1320 int cpuid;
1321 unsigned char **noptable;
1322} noptypes[] = {
1323 { X86_FEATURE_K8, k8_nops },
1324 { X86_FEATURE_K7, k7_nops },
1325 { -1, NULL }
1326};
1327
1328/* Replace instructions with better alternatives for this CPU type.
1329
1330 This runs before SMP is initialized to avoid SMP problems with
1331 self modifying code. This implies that assymetric systems where
1332 APs have less capabilities than the boot processor are not handled.
1333 In this case boot with "noreplacement". */
1334void apply_alternatives(void *start, void *end)
1335{
1336 struct alt_instr *a;
1337 int diff, i, k;
1338 unsigned char **noptable = intel_nops;
1339 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1340 if (boot_cpu_has(noptypes[i].cpuid)) {
1341 noptable = noptypes[i].noptable;
1342 break;
1343 }
1344 }
1345 for (a = start; (void *)a < end; a++) {
1346 if (!boot_cpu_has(a->cpuid))
1347 continue;
1348 BUG_ON(a->replacementlen > a->instrlen);
1349 memcpy(a->instr, a->replacement, a->replacementlen);
1350 diff = a->instrlen - a->replacementlen;
1351 /* Pad the rest with nops */
1352 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1353 k = diff;
1354 if (k > ASM_NOP_MAX)
1355 k = ASM_NOP_MAX;
1356 memcpy(a->instr + i, noptable[k], k);
1357 }
1358 }
1359}
1360
1361static int no_replacement __initdata = 0;
1362
1363void __init alternative_instructions(void)
1364{
1365 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1366 if (no_replacement)
1367 return;
1368 apply_alternatives(__alt_instructions, __alt_instructions_end);
1369}
1370
1371static int __init noreplacement_setup(char *s)
1372{
1373 no_replacement = 1;
1374 return 0;
1375}
1376
1377__setup("noreplacement", noreplacement_setup);
1378
1379static char * __init machine_specific_memory_setup(void);
1380
1381#ifdef CONFIG_MCA
1382static void set_mca_bus(int x)
1383{
1384 MCA_bus = x;
1385}
1386#else
1387static void set_mca_bus(int x) { }
1388#endif
1389
1390/*
1391 * Determine if we were loaded by an EFI loader. If so, then we have also been
1392 * passed the efi memmap, systab, etc., so we should use these data structures
1393 * for initialization. Note, the efi init code path is determined by the
1394 * global efi_enabled. This allows the same kernel image to be used on existing
1395 * systems (with a traditional BIOS) as well as on EFI systems.
1396 */
1397void __init setup_arch(char **cmdline_p)
1398{
1399 unsigned long max_low_pfn;
1400
1401 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1402 pre_setup_arch_hook();
1403 early_cpu_init();
1404
1405 /*
1406 * FIXME: This isn't an official loader_type right
1407 * now but does currently work with elilo.
1408 * If we were configured as an EFI kernel, check to make
1409 * sure that we were loaded correctly from elilo and that
1410 * the system table is valid. If not, then initialize normally.
1411 */
1412#ifdef CONFIG_EFI
1413 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1414 efi_enabled = 1;
1415#endif
1416
1417 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1418 drive_info = DRIVE_INFO;
1419 screen_info = SCREEN_INFO;
1420 edid_info = EDID_INFO;
1421 apm_info.bios = APM_BIOS_INFO;
1422 ist_info = IST_INFO;
1423 saved_videomode = VIDEO_MODE;
1424 if( SYS_DESC_TABLE.length != 0 ) {
1425 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1426 machine_id = SYS_DESC_TABLE.table[0];
1427 machine_submodel_id = SYS_DESC_TABLE.table[1];
1428 BIOS_revision = SYS_DESC_TABLE.table[2];
1429 }
1430 bootloader_type = LOADER_TYPE;
1431
1432#ifdef CONFIG_BLK_DEV_RAM
1433 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1434 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1435 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1436#endif
1437 ARCH_SETUP
1438 if (efi_enabled)
1439 efi_init();
1440 else {
1441 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1442 print_memory_map(machine_specific_memory_setup());
1443 }
1444
1445 copy_edd();
1446
1447 if (!MOUNT_ROOT_RDONLY)
1448 root_mountflags &= ~MS_RDONLY;
1449 init_mm.start_code = (unsigned long) _text;
1450 init_mm.end_code = (unsigned long) _etext;
1451 init_mm.end_data = (unsigned long) _edata;
1452 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1453
1454 code_resource.start = virt_to_phys(_text);
1455 code_resource.end = virt_to_phys(_etext)-1;
1456 data_resource.start = virt_to_phys(_etext);
1457 data_resource.end = virt_to_phys(_edata)-1;
1458
1459 parse_cmdline_early(cmdline_p);
1460
1461 max_low_pfn = setup_memory();
1462
1463 /*
1464 * NOTE: before this point _nobody_ is allowed to allocate
1465 * any memory using the bootmem allocator. Although the
1466 * alloctor is now initialised only the first 8Mb of the kernel
1467 * virtual address space has been mapped. All allocations before
1468 * paging_init() has completed must use the alloc_bootmem_low_pages()
1469 * variant (which allocates DMA'able memory) and care must be taken
1470 * not to exceed the 8Mb limit.
1471 */
1472
1473#ifdef CONFIG_SMP
1474 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1475#endif
1476 paging_init();
1477 remapped_pgdat_init();
1478 zone_sizes_init();
1479
1480 /*
1481 * NOTE: at this point the bootmem allocator is fully available.
1482 */
1483
1484#ifdef CONFIG_EARLY_PRINTK
1485 {
1486 char *s = strstr(*cmdline_p, "earlyprintk=");
1487 if (s) {
1488 extern void setup_early_printk(char *);
1489
1490 setup_early_printk(s);
1491 printk("early console enabled\n");
1492 }
1493 }
1494#endif
1495
1496
1497 dmi_scan_machine();
1498
1499#ifdef CONFIG_X86_GENERICARCH
1500 generic_apic_probe(*cmdline_p);
1501#endif
1502 if (efi_enabled)
1503 efi_map_memmap();
1504
1505 /*
1506 * Parse the ACPI tables for possible boot-time SMP configuration.
1507 */
1508 acpi_boot_table_init();
1509 acpi_boot_init();
1510
1511#ifdef CONFIG_X86_LOCAL_APIC
1512 if (smp_found_config)
1513 get_smp_config();
1514#endif
1515
1516 register_memory();
1517
1518#ifdef CONFIG_VT
1519#if defined(CONFIG_VGA_CONSOLE)
1520 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1521 conswitchp = &vga_con;
1522#elif defined(CONFIG_DUMMY_CONSOLE)
1523 conswitchp = &dummy_con;
1524#endif
1525#endif
1526}
1527
1528#include "setup_arch_post.h"
1529/*
1530 * Local Variables:
1531 * mode:c
1532 * c-file-style:"k&r"
1533 * c-basic-offset:8
1534 * End:
1535 */
diff --git a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h
new file mode 100644
index 000000000000..d21b14f5c25c
--- /dev/null
+++ b/arch/i386/kernel/sigframe.h
@@ -0,0 +1,21 @@
1struct sigframe
2{
3 char *pretcode;
4 int sig;
5 struct sigcontext sc;
6 struct _fpstate fpstate;
7 unsigned long extramask[_NSIG_WORDS-1];
8 char retcode[8];
9};
10
11struct rt_sigframe
12{
13 char *pretcode;
14 int sig;
15 struct siginfo *pinfo;
16 void *puc;
17 struct siginfo info;
18 struct ucontext uc;
19 struct _fpstate fpstate;
20 char retcode[8];
21};
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
new file mode 100644
index 000000000000..ef3602e1c052
--- /dev/null
+++ b/arch/i386/kernel/signal.c
@@ -0,0 +1,665 @@
1/*
2 * linux/arch/i386/kernel/signal.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
8 */
9
10#include <linux/sched.h>
11#include <linux/mm.h>
12#include <linux/smp.h>
13#include <linux/smp_lock.h>
14#include <linux/kernel.h>
15#include <linux/signal.h>
16#include <linux/errno.h>
17#include <linux/wait.h>
18#include <linux/unistd.h>
19#include <linux/stddef.h>
20#include <linux/personality.h>
21#include <linux/suspend.h>
22#include <linux/ptrace.h>
23#include <linux/elf.h>
24#include <asm/processor.h>
25#include <asm/ucontext.h>
26#include <asm/uaccess.h>
27#include <asm/i387.h>
28#include "sigframe.h"
29
30#define DEBUG_SIG 0
31
32#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
33
34/*
35 * Atomically swap in the new signal mask, and wait for a signal.
36 */
37asmlinkage int
38sys_sigsuspend(int history0, int history1, old_sigset_t mask)
39{
40 struct pt_regs * regs = (struct pt_regs *) &history0;
41 sigset_t saveset;
42
43 mask &= _BLOCKABLE;
44 spin_lock_irq(&current->sighand->siglock);
45 saveset = current->blocked;
46 siginitset(&current->blocked, mask);
47 recalc_sigpending();
48 spin_unlock_irq(&current->sighand->siglock);
49
50 regs->eax = -EINTR;
51 while (1) {
52 current->state = TASK_INTERRUPTIBLE;
53 schedule();
54 if (do_signal(regs, &saveset))
55 return -EINTR;
56 }
57}
58
59asmlinkage int
60sys_rt_sigsuspend(struct pt_regs regs)
61{
62 sigset_t saveset, newset;
63
64 /* XXX: Don't preclude handling different sized sigset_t's. */
65 if (regs.ecx != sizeof(sigset_t))
66 return -EINVAL;
67
68 if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset)))
69 return -EFAULT;
70 sigdelsetmask(&newset, ~_BLOCKABLE);
71
72 spin_lock_irq(&current->sighand->siglock);
73 saveset = current->blocked;
74 current->blocked = newset;
75 recalc_sigpending();
76 spin_unlock_irq(&current->sighand->siglock);
77
78 regs.eax = -EINTR;
79 while (1) {
80 current->state = TASK_INTERRUPTIBLE;
81 schedule();
82 if (do_signal(&regs, &saveset))
83 return -EINTR;
84 }
85}
86
87asmlinkage int
88sys_sigaction(int sig, const struct old_sigaction __user *act,
89 struct old_sigaction __user *oact)
90{
91 struct k_sigaction new_ka, old_ka;
92 int ret;
93
94 if (act) {
95 old_sigset_t mask;
96 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
97 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
98 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
99 return -EFAULT;
100 __get_user(new_ka.sa.sa_flags, &act->sa_flags);
101 __get_user(mask, &act->sa_mask);
102 siginitset(&new_ka.sa.sa_mask, mask);
103 }
104
105 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
106
107 if (!ret && oact) {
108 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
109 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
110 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
111 return -EFAULT;
112 __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
113 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
114 }
115
116 return ret;
117}
118
119asmlinkage int
120sys_sigaltstack(unsigned long ebx)
121{
122 /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
123 struct pt_regs *regs = (struct pt_regs *)&ebx;
124 const stack_t __user *uss = (const stack_t __user *)ebx;
125 stack_t __user *uoss = (stack_t __user *)regs->ecx;
126
127 return do_sigaltstack(uss, uoss, regs->esp);
128}
129
130
131/*
132 * Do a signal return; undo the signal stack.
133 */
134
135static int
136restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
137{
138 unsigned int err = 0;
139
140 /* Always make any pending restarted system calls return -EINTR */
141 current_thread_info()->restart_block.fn = do_no_restart_syscall;
142
143#define COPY(x) err |= __get_user(regs->x, &sc->x)
144
145#define COPY_SEG(seg) \
146 { unsigned short tmp; \
147 err |= __get_user(tmp, &sc->seg); \
148 regs->x##seg = tmp; }
149
150#define COPY_SEG_STRICT(seg) \
151 { unsigned short tmp; \
152 err |= __get_user(tmp, &sc->seg); \
153 regs->x##seg = tmp|3; }
154
155#define GET_SEG(seg) \
156 { unsigned short tmp; \
157 err |= __get_user(tmp, &sc->seg); \
158 loadsegment(seg,tmp); }
159
160#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
161 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
162 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
163
164 GET_SEG(gs);
165 GET_SEG(fs);
166 COPY_SEG(es);
167 COPY_SEG(ds);
168 COPY(edi);
169 COPY(esi);
170 COPY(ebp);
171 COPY(esp);
172 COPY(ebx);
173 COPY(edx);
174 COPY(ecx);
175 COPY(eip);
176 COPY_SEG_STRICT(cs);
177 COPY_SEG_STRICT(ss);
178
179 {
180 unsigned int tmpflags;
181 err |= __get_user(tmpflags, &sc->eflags);
182 regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
183 regs->orig_eax = -1; /* disable syscall checks */
184 }
185
186 {
187 struct _fpstate __user * buf;
188 err |= __get_user(buf, &sc->fpstate);
189 if (buf) {
190 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
191 goto badframe;
192 err |= restore_i387(buf);
193 } else {
194 struct task_struct *me = current;
195 if (used_math()) {
196 clear_fpu(me);
197 clear_used_math();
198 }
199 }
200 }
201
202 err |= __get_user(*peax, &sc->eax);
203 return err;
204
205badframe:
206 return 1;
207}
208
209asmlinkage int sys_sigreturn(unsigned long __unused)
210{
211 struct pt_regs *regs = (struct pt_regs *) &__unused;
212 struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
213 sigset_t set;
214 int eax;
215
216 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
217 goto badframe;
218 if (__get_user(set.sig[0], &frame->sc.oldmask)
219 || (_NSIG_WORDS > 1
220 && __copy_from_user(&set.sig[1], &frame->extramask,
221 sizeof(frame->extramask))))
222 goto badframe;
223
224 sigdelsetmask(&set, ~_BLOCKABLE);
225 spin_lock_irq(&current->sighand->siglock);
226 current->blocked = set;
227 recalc_sigpending();
228 spin_unlock_irq(&current->sighand->siglock);
229
230 if (restore_sigcontext(regs, &frame->sc, &eax))
231 goto badframe;
232 return eax;
233
234badframe:
235 force_sig(SIGSEGV, current);
236 return 0;
237}
238
239asmlinkage int sys_rt_sigreturn(unsigned long __unused)
240{
241 struct pt_regs *regs = (struct pt_regs *) &__unused;
242 struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
243 sigset_t set;
244 int eax;
245
246 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
247 goto badframe;
248 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
249 goto badframe;
250
251 sigdelsetmask(&set, ~_BLOCKABLE);
252 spin_lock_irq(&current->sighand->siglock);
253 current->blocked = set;
254 recalc_sigpending();
255 spin_unlock_irq(&current->sighand->siglock);
256
257 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
258 goto badframe;
259
260 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
261 goto badframe;
262
263 return eax;
264
265badframe:
266 force_sig(SIGSEGV, current);
267 return 0;
268}
269
270/*
271 * Set up a signal frame.
272 */
273
274static int
275setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
276 struct pt_regs *regs, unsigned long mask)
277{
278 int tmp, err = 0;
279
280 tmp = 0;
281 __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
283 __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
285
286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
287 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
288 err |= __put_user(regs->edi, &sc->edi);
289 err |= __put_user(regs->esi, &sc->esi);
290 err |= __put_user(regs->ebp, &sc->ebp);
291 err |= __put_user(regs->esp, &sc->esp);
292 err |= __put_user(regs->ebx, &sc->ebx);
293 err |= __put_user(regs->edx, &sc->edx);
294 err |= __put_user(regs->ecx, &sc->ecx);
295 err |= __put_user(regs->eax, &sc->eax);
296 err |= __put_user(current->thread.trap_no, &sc->trapno);
297 err |= __put_user(current->thread.error_code, &sc->err);
298 err |= __put_user(regs->eip, &sc->eip);
299 err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
300 err |= __put_user(regs->eflags, &sc->eflags);
301 err |= __put_user(regs->esp, &sc->esp_at_signal);
302 err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
303
304 tmp = save_i387(fpstate);
305 if (tmp < 0)
306 err = 1;
307 else
308 err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
309
310 /* non-iBCS2 extensions.. */
311 err |= __put_user(mask, &sc->oldmask);
312 err |= __put_user(current->thread.cr2, &sc->cr2);
313
314 return err;
315}
316
317/*
318 * Determine which stack to use..
319 */
320static inline void __user *
321get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
322{
323 unsigned long esp;
324
325 /* Default to using normal stack */
326 esp = regs->esp;
327
328 /* This is the X/Open sanctioned signal stack switching. */
329 if (ka->sa.sa_flags & SA_ONSTACK) {
330 if (sas_ss_flags(esp) == 0)
331 esp = current->sas_ss_sp + current->sas_ss_size;
332 }
333
334 /* This is the legacy signal stack switching. */
335 else if ((regs->xss & 0xffff) != __USER_DS &&
336 !(ka->sa.sa_flags & SA_RESTORER) &&
337 ka->sa.sa_restorer) {
338 esp = (unsigned long) ka->sa.sa_restorer;
339 }
340
341 return (void __user *)((esp - frame_size) & -8ul);
342}
343
344/* These symbols are defined with the addresses in the vsyscall page.
345 See vsyscall-sigreturn.S. */
346extern void __user __kernel_sigreturn;
347extern void __user __kernel_rt_sigreturn;
348
349static void setup_frame(int sig, struct k_sigaction *ka,
350 sigset_t *set, struct pt_regs * regs)
351{
352 void __user *restorer;
353 struct sigframe __user *frame;
354 int err = 0;
355 int usig;
356
357 frame = get_sigframe(ka, regs, sizeof(*frame));
358
359 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
360 goto give_sigsegv;
361
362 usig = current_thread_info()->exec_domain
363 && current_thread_info()->exec_domain->signal_invmap
364 && sig < 32
365 ? current_thread_info()->exec_domain->signal_invmap[sig]
366 : sig;
367
368 err = __put_user(usig, &frame->sig);
369 if (err)
370 goto give_sigsegv;
371
372 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
373 if (err)
374 goto give_sigsegv;
375
376 if (_NSIG_WORDS > 1) {
377 err = __copy_to_user(&frame->extramask, &set->sig[1],
378 sizeof(frame->extramask));
379 if (err)
380 goto give_sigsegv;
381 }
382
383 restorer = &__kernel_sigreturn;
384 if (ka->sa.sa_flags & SA_RESTORER)
385 restorer = ka->sa.sa_restorer;
386
387 /* Set up to return from userspace. */
388 err |= __put_user(restorer, &frame->pretcode);
389
390 /*
391 * This is popl %eax ; movl $,%eax ; int $0x80
392 *
393 * WE DO NOT USE IT ANY MORE! It's only left here for historical
394 * reasons and because gdb uses it as a signature to notice
395 * signal handler stack frames.
396 */
397 err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
398 err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
399 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
400
401 if (err)
402 goto give_sigsegv;
403
404 /* Set up registers for signal handler */
405 regs->esp = (unsigned long) frame;
406 regs->eip = (unsigned long) ka->sa.sa_handler;
407 regs->eax = (unsigned long) sig;
408 regs->edx = (unsigned long) 0;
409 regs->ecx = (unsigned long) 0;
410
411 set_fs(USER_DS);
412 regs->xds = __USER_DS;
413 regs->xes = __USER_DS;
414 regs->xss = __USER_DS;
415 regs->xcs = __USER_CS;
416
417 /*
418 * Clear TF when entering the signal handler, but
419 * notify any tracer that was single-stepping it.
420 * The tracer may want to single-step inside the
421 * handler too.
422 */
423 regs->eflags &= ~TF_MASK;
424 if (test_thread_flag(TIF_SINGLESTEP))
425 ptrace_notify(SIGTRAP);
426
427#if DEBUG_SIG
428 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
429 current->comm, current->pid, frame, regs->eip, frame->pretcode);
430#endif
431
432 return;
433
434give_sigsegv:
435 force_sigsegv(sig, current);
436}
437
438static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
439 sigset_t *set, struct pt_regs * regs)
440{
441 void __user *restorer;
442 struct rt_sigframe __user *frame;
443 int err = 0;
444 int usig;
445
446 frame = get_sigframe(ka, regs, sizeof(*frame));
447
448 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
449 goto give_sigsegv;
450
451 usig = current_thread_info()->exec_domain
452 && current_thread_info()->exec_domain->signal_invmap
453 && sig < 32
454 ? current_thread_info()->exec_domain->signal_invmap[sig]
455 : sig;
456
457 err |= __put_user(usig, &frame->sig);
458 err |= __put_user(&frame->info, &frame->pinfo);
459 err |= __put_user(&frame->uc, &frame->puc);
460 err |= copy_siginfo_to_user(&frame->info, info);
461 if (err)
462 goto give_sigsegv;
463
464 /* Create the ucontext. */
465 err |= __put_user(0, &frame->uc.uc_flags);
466 err |= __put_user(0, &frame->uc.uc_link);
467 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
468 err |= __put_user(sas_ss_flags(regs->esp),
469 &frame->uc.uc_stack.ss_flags);
470 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
471 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
472 regs, set->sig[0]);
473 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
474 if (err)
475 goto give_sigsegv;
476
477 /* Set up to return from userspace. */
478 restorer = &__kernel_rt_sigreturn;
479 if (ka->sa.sa_flags & SA_RESTORER)
480 restorer = ka->sa.sa_restorer;
481 err |= __put_user(restorer, &frame->pretcode);
482
483 /*
484 * This is movl $,%eax ; int $0x80
485 *
486 * WE DO NOT USE IT ANY MORE! It's only left here for historical
487 * reasons and because gdb uses it as a signature to notice
488 * signal handler stack frames.
489 */
490 err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
491 err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
492 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
493
494 if (err)
495 goto give_sigsegv;
496
497 /* Set up registers for signal handler */
498 regs->esp = (unsigned long) frame;
499 regs->eip = (unsigned long) ka->sa.sa_handler;
500 regs->eax = (unsigned long) usig;
501 regs->edx = (unsigned long) &frame->info;
502 regs->ecx = (unsigned long) &frame->uc;
503
504 set_fs(USER_DS);
505 regs->xds = __USER_DS;
506 regs->xes = __USER_DS;
507 regs->xss = __USER_DS;
508 regs->xcs = __USER_CS;
509
510 /*
511 * Clear TF when entering the signal handler, but
512 * notify any tracer that was single-stepping it.
513 * The tracer may want to single-step inside the
514 * handler too.
515 */
516 regs->eflags &= ~TF_MASK;
517 if (test_thread_flag(TIF_SINGLESTEP))
518 ptrace_notify(SIGTRAP);
519
520#if DEBUG_SIG
521 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
522 current->comm, current->pid, frame, regs->eip, frame->pretcode);
523#endif
524
525 return;
526
527give_sigsegv:
528 force_sigsegv(sig, current);
529}
530
531/*
532 * OK, we're invoking a handler
533 */
534
535static void
536handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
537 sigset_t *oldset, struct pt_regs * regs)
538{
539 /* Are we from a system call? */
540 if (regs->orig_eax >= 0) {
541 /* If so, check system call restarting.. */
542 switch (regs->eax) {
543 case -ERESTART_RESTARTBLOCK:
544 case -ERESTARTNOHAND:
545 regs->eax = -EINTR;
546 break;
547
548 case -ERESTARTSYS:
549 if (!(ka->sa.sa_flags & SA_RESTART)) {
550 regs->eax = -EINTR;
551 break;
552 }
553 /* fallthrough */
554 case -ERESTARTNOINTR:
555 regs->eax = regs->orig_eax;
556 regs->eip -= 2;
557 }
558 }
559
560 /*
561 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
562 * that register information in the sigcontext is correct.
563 */
564 if (unlikely(regs->eflags & TF_MASK)
565 && likely(current->ptrace & PT_DTRACE)) {
566 current->ptrace &= ~PT_DTRACE;
567 regs->eflags &= ~TF_MASK;
568 }
569
570 /* Set up the stack frame */
571 if (ka->sa.sa_flags & SA_SIGINFO)
572 setup_rt_frame(sig, ka, info, oldset, regs);
573 else
574 setup_frame(sig, ka, oldset, regs);
575
576 if (!(ka->sa.sa_flags & SA_NODEFER)) {
577 spin_lock_irq(&current->sighand->siglock);
578 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
579 sigaddset(&current->blocked,sig);
580 recalc_sigpending();
581 spin_unlock_irq(&current->sighand->siglock);
582 }
583}
584
585/*
586 * Note that 'init' is a special process: it doesn't get signals it doesn't
587 * want to handle. Thus you cannot kill init even with a SIGKILL even by
588 * mistake.
589 */
590int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
591{
592 siginfo_t info;
593 int signr;
594 struct k_sigaction ka;
595
596 /*
597 * We want the common case to go fast, which
598 * is why we may in certain cases get here from
599 * kernel mode. Just return without doing anything
600 * if so.
601 */
602 if ((regs->xcs & 3) != 3)
603 return 1;
604
605 if (current->flags & PF_FREEZE) {
606 refrigerator(0);
607 goto no_signal;
608 }
609
610 if (!oldset)
611 oldset = &current->blocked;
612
613 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
614 if (signr > 0) {
615 /* Reenable any watchpoints before delivering the
616 * signal to user space. The processor register will
617 * have been cleared if the watchpoint triggered
618 * inside the kernel.
619 */
620 if (unlikely(current->thread.debugreg[7])) {
621 __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7]));
622 }
623
624 /* Whee! Actually deliver the signal. */
625 handle_signal(signr, &info, &ka, oldset, regs);
626 return 1;
627 }
628
629 no_signal:
630 /* Did we come from a system call? */
631 if (regs->orig_eax >= 0) {
632 /* Restart the system call - no handlers present */
633 if (regs->eax == -ERESTARTNOHAND ||
634 regs->eax == -ERESTARTSYS ||
635 regs->eax == -ERESTARTNOINTR) {
636 regs->eax = regs->orig_eax;
637 regs->eip -= 2;
638 }
639 if (regs->eax == -ERESTART_RESTARTBLOCK){
640 regs->eax = __NR_restart_syscall;
641 regs->eip -= 2;
642 }
643 }
644 return 0;
645}
646
647/*
648 * notification of userspace execution resumption
649 * - triggered by current->work.notify_resume
650 */
651__attribute__((regparm(3)))
652void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
653 __u32 thread_info_flags)
654{
655 /* Pending single-step? */
656 if (thread_info_flags & _TIF_SINGLESTEP) {
657 regs->eflags |= TF_MASK;
658 clear_thread_flag(TIF_SINGLESTEP);
659 }
660 /* deal with pending signal delivery */
661 if (thread_info_flags & _TIF_SIGPENDING)
662 do_signal(regs,oldset);
663
664 clear_thread_flag(TIF_IRET);
665}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
new file mode 100644
index 000000000000..6223c33ac91c
--- /dev/null
+++ b/arch/i386/kernel/smp.c
@@ -0,0 +1,612 @@
1/*
2 * Intel SMP support routines.
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * This code is released under the GNU General Public License version 2 or
8 * later.
9 */
10
11#include <linux/init.h>
12
13#include <linux/mm.h>
14#include <linux/irq.h>
15#include <linux/delay.h>
16#include <linux/spinlock.h>
17#include <linux/smp_lock.h>
18#include <linux/kernel_stat.h>
19#include <linux/mc146818rtc.h>
20#include <linux/cache.h>
21#include <linux/interrupt.h>
22
23#include <asm/mtrr.h>
24#include <asm/tlbflush.h>
25#include <mach_apic.h>
26
27/*
28 * Some notes on x86 processor bugs affecting SMP operation:
29 *
30 * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
31 * The Linux implications for SMP are handled as follows:
32 *
33 * Pentium III / [Xeon]
34 * None of the E1AP-E3AP errata are visible to the user.
35 *
36 * E1AP. see PII A1AP
37 * E2AP. see PII A2AP
38 * E3AP. see PII A3AP
39 *
40 * Pentium II / [Xeon]
41 * None of the A1AP-A3AP errata are visible to the user.
42 *
43 * A1AP. see PPro 1AP
44 * A2AP. see PPro 2AP
45 * A3AP. see PPro 7AP
46 *
47 * Pentium Pro
48 * None of 1AP-9AP errata are visible to the normal user,
49 * except occasional delivery of 'spurious interrupt' as trap #15.
50 * This is very rare and a non-problem.
51 *
52 * 1AP. Linux maps APIC as non-cacheable
53 * 2AP. worked around in hardware
54 * 3AP. fixed in C0 and above steppings microcode update.
55 * Linux does not use excessive STARTUP_IPIs.
56 * 4AP. worked around in hardware
57 * 5AP. symmetric IO mode (normal Linux operation) not affected.
58 * 'noapic' mode has vector 0xf filled out properly.
59 * 6AP. 'noapic' mode might be affected - fixed in later steppings
60 * 7AP. We do not assume writes to the LVT deassering IRQs
61 * 8AP. We do not enable low power mode (deep sleep) during MP bootup
62 * 9AP. We do not use mixed mode
63 *
64 * Pentium
65 * There is a marginal case where REP MOVS on 100MHz SMP
66 * machines with B stepping processors can fail. XXX should provide
67 * an L1cache=Writethrough or L1cache=off option.
68 *
69 * B stepping CPUs may hang. There are hardware work arounds
70 * for this. We warn about it in case your board doesn't have the work
71 * arounds. Basically thats so I can tell anyone with a B stepping
72 * CPU and SMP problems "tough".
73 *
74 * Specific items [From Pentium Processor Specification Update]
75 *
76 * 1AP. Linux doesn't use remote read
77 * 2AP. Linux doesn't trust APIC errors
78 * 3AP. We work around this
79 * 4AP. Linux never generated 3 interrupts of the same priority
80 * to cause a lost local interrupt.
81 * 5AP. Remote read is never used
82 * 6AP. not affected - worked around in hardware
83 * 7AP. not affected - worked around in hardware
84 * 8AP. worked around in hardware - we get explicit CS errors if not
85 * 9AP. only 'noapic' mode affected. Might generate spurious
86 * interrupts, we log only the first one and count the
87 * rest silently.
88 * 10AP. not affected - worked around in hardware
89 * 11AP. Linux reads the APIC between writes to avoid this, as per
90 * the documentation. Make sure you preserve this as it affects
91 * the C stepping chips too.
92 * 12AP. not affected - worked around in hardware
93 * 13AP. not affected - worked around in hardware
94 * 14AP. we always deassert INIT during bootup
95 * 15AP. not affected - worked around in hardware
96 * 16AP. not affected - worked around in hardware
97 * 17AP. not affected - worked around in hardware
98 * 18AP. not affected - worked around in hardware
99 * 19AP. not affected - worked around in BIOS
100 *
101 * If this sounds worrying believe me these bugs are either ___RARE___,
102 * or are signal timing bugs worked around in hardware and there's
103 * about nothing of note with C stepping upwards.
104 */
105
106DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
107
108/*
109 * the following functions deal with sending IPIs between CPUs.
110 *
111 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
112 */
113
114static inline int __prepare_ICR (unsigned int shortcut, int vector)
115{
116 return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
117}
118
119static inline int __prepare_ICR2 (unsigned int mask)
120{
121 return SET_APIC_DEST_FIELD(mask);
122}
123
124void __send_IPI_shortcut(unsigned int shortcut, int vector)
125{
126 /*
127 * Subtle. In the case of the 'never do double writes' workaround
128 * we have to lock out interrupts to be safe. As we don't care
129 * of the value read we use an atomic rmw access to avoid costly
130 * cli/sti. Otherwise we use an even cheaper single atomic write
131 * to the APIC.
132 */
133 unsigned int cfg;
134
135 /*
136 * Wait for idle.
137 */
138 apic_wait_icr_idle();
139
140 /*
141 * No need to touch the target chip field
142 */
143 cfg = __prepare_ICR(shortcut, vector);
144
145 /*
146 * Send the IPI. The write to APIC_ICR fires this off.
147 */
148 apic_write_around(APIC_ICR, cfg);
149}
150
151void fastcall send_IPI_self(int vector)
152{
153 __send_IPI_shortcut(APIC_DEST_SELF, vector);
154}
155
156/*
157 * This is only used on smaller machines.
158 */
159void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
160{
161 unsigned long mask = cpus_addr(cpumask)[0];
162 unsigned long cfg;
163 unsigned long flags;
164
165 local_irq_save(flags);
166
167 /*
168 * Wait for idle.
169 */
170 apic_wait_icr_idle();
171
172 /*
173 * prepare target chip field
174 */
175 cfg = __prepare_ICR2(mask);
176 apic_write_around(APIC_ICR2, cfg);
177
178 /*
179 * program the ICR
180 */
181 cfg = __prepare_ICR(0, vector);
182
183 /*
184 * Send the IPI. The write to APIC_ICR fires this off.
185 */
186 apic_write_around(APIC_ICR, cfg);
187
188 local_irq_restore(flags);
189}
190
191void send_IPI_mask_sequence(cpumask_t mask, int vector)
192{
193 unsigned long cfg, flags;
194 unsigned int query_cpu;
195
196 /*
197 * Hack. The clustered APIC addressing mode doesn't allow us to send
198 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
199 * should be modified to do 1 message per cluster ID - mbligh
200 */
201
202 local_irq_save(flags);
203
204 for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
205 if (cpu_isset(query_cpu, mask)) {
206
207 /*
208 * Wait for idle.
209 */
210 apic_wait_icr_idle();
211
212 /*
213 * prepare target chip field
214 */
215 cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
216 apic_write_around(APIC_ICR2, cfg);
217
218 /*
219 * program the ICR
220 */
221 cfg = __prepare_ICR(0, vector);
222
223 /*
224 * Send the IPI. The write to APIC_ICR fires this off.
225 */
226 apic_write_around(APIC_ICR, cfg);
227 }
228 }
229 local_irq_restore(flags);
230}
231
232#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
233
234/*
235 * Smarter SMP flushing macros.
236 * c/o Linus Torvalds.
237 *
238 * These mean you can really definitely utterly forget about
239 * writing to user space from interrupts. (Its not allowed anyway).
240 *
241 * Optimizations Manfred Spraul <manfred@colorfullife.com>
242 */
243
244static cpumask_t flush_cpumask;
245static struct mm_struct * flush_mm;
246static unsigned long flush_va;
247static DEFINE_SPINLOCK(tlbstate_lock);
248#define FLUSH_ALL 0xffffffff
249
250/*
251 * We cannot call mmdrop() because we are in interrupt context,
252 * instead update mm->cpu_vm_mask.
253 *
254 * We need to reload %cr3 since the page tables may be going
255 * away from under us..
256 */
257static inline void leave_mm (unsigned long cpu)
258{
259 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
260 BUG();
261 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
262 load_cr3(swapper_pg_dir);
263}
264
265/*
266 *
267 * The flush IPI assumes that a thread switch happens in this order:
268 * [cpu0: the cpu that switches]
269 * 1) switch_mm() either 1a) or 1b)
270 * 1a) thread switch to a different mm
271 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
272 * Stop ipi delivery for the old mm. This is not synchronized with
273 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
274 * for the wrong mm, and in the worst case we perform a superflous
275 * tlb flush.
276 * 1a2) set cpu_tlbstate to TLBSTATE_OK
277 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
278 * was in lazy tlb mode.
279 * 1a3) update cpu_tlbstate[].active_mm
280 * Now cpu0 accepts tlb flushes for the new mm.
281 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
282 * Now the other cpus will send tlb flush ipis.
283 * 1a4) change cr3.
284 * 1b) thread switch without mm change
285 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
286 * flush ipis.
287 * 1b1) set cpu_tlbstate to TLBSTATE_OK
288 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
289 * Atomically set the bit [other cpus will start sending flush ipis],
290 * and test the bit.
291 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
292 * 2) switch %%esp, ie current
293 *
294 * The interrupt must handle 2 special cases:
295 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
296 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
297 * runs in kernel space, the cpu could load tlb entries for user space
298 * pages.
299 *
300 * The good news is that cpu_tlbstate is local to each cpu, no
301 * write/read ordering problems.
302 */
303
304/*
305 * TLB flush IPI:
306 *
307 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
308 * 2) Leave the mm if we are in the lazy tlb mode.
309 */
310
311fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
312{
313 unsigned long cpu;
314
315 cpu = get_cpu();
316
317 if (!cpu_isset(cpu, flush_cpumask))
318 goto out;
319 /*
320 * This was a BUG() but until someone can quote me the
321 * line from the intel manual that guarantees an IPI to
322 * multiple CPUs is retried _only_ on the erroring CPUs
323 * its staying as a return
324 *
325 * BUG();
326 */
327
328 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
329 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
330 if (flush_va == FLUSH_ALL)
331 local_flush_tlb();
332 else
333 __flush_tlb_one(flush_va);
334 } else
335 leave_mm(cpu);
336 }
337 ack_APIC_irq();
338 smp_mb__before_clear_bit();
339 cpu_clear(cpu, flush_cpumask);
340 smp_mb__after_clear_bit();
341out:
342 put_cpu_no_resched();
343}
344
345static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
346 unsigned long va)
347{
348 cpumask_t tmp;
349 /*
350 * A couple of (to be removed) sanity checks:
351 *
352 * - we do not send IPIs to not-yet booted CPUs.
353 * - current CPU must not be in mask
354 * - mask must exist :)
355 */
356 BUG_ON(cpus_empty(cpumask));
357
358 cpus_and(tmp, cpumask, cpu_online_map);
359 BUG_ON(!cpus_equal(cpumask, tmp));
360 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
361 BUG_ON(!mm);
362
363 /*
364 * i'm not happy about this global shared spinlock in the
365 * MM hot path, but we'll see how contended it is.
366 * Temporarily this turns IRQs off, so that lockups are
367 * detected by the NMI watchdog.
368 */
369 spin_lock(&tlbstate_lock);
370
371 flush_mm = mm;
372 flush_va = va;
373#if NR_CPUS <= BITS_PER_LONG
374 atomic_set_mask(cpumask, &flush_cpumask);
375#else
376 {
377 int k;
378 unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
379 unsigned long *cpu_mask = (unsigned long *)&cpumask;
380 for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
381 atomic_set_mask(cpu_mask[k], &flush_mask[k]);
382 }
383#endif
384 /*
385 * We have to send the IPI only to
386 * CPUs affected.
387 */
388 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
389
390 while (!cpus_empty(flush_cpumask))
391 /* nothing. lockup detection does not belong here */
392 mb();
393
394 flush_mm = NULL;
395 flush_va = 0;
396 spin_unlock(&tlbstate_lock);
397}
398
399void flush_tlb_current_task(void)
400{
401 struct mm_struct *mm = current->mm;
402 cpumask_t cpu_mask;
403
404 preempt_disable();
405 cpu_mask = mm->cpu_vm_mask;
406 cpu_clear(smp_processor_id(), cpu_mask);
407
408 local_flush_tlb();
409 if (!cpus_empty(cpu_mask))
410 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
411 preempt_enable();
412}
413
414void flush_tlb_mm (struct mm_struct * mm)
415{
416 cpumask_t cpu_mask;
417
418 preempt_disable();
419 cpu_mask = mm->cpu_vm_mask;
420 cpu_clear(smp_processor_id(), cpu_mask);
421
422 if (current->active_mm == mm) {
423 if (current->mm)
424 local_flush_tlb();
425 else
426 leave_mm(smp_processor_id());
427 }
428 if (!cpus_empty(cpu_mask))
429 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
430
431 preempt_enable();
432}
433
434void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
435{
436 struct mm_struct *mm = vma->vm_mm;
437 cpumask_t cpu_mask;
438
439 preempt_disable();
440 cpu_mask = mm->cpu_vm_mask;
441 cpu_clear(smp_processor_id(), cpu_mask);
442
443 if (current->active_mm == mm) {
444 if(current->mm)
445 __flush_tlb_one(va);
446 else
447 leave_mm(smp_processor_id());
448 }
449
450 if (!cpus_empty(cpu_mask))
451 flush_tlb_others(cpu_mask, mm, va);
452
453 preempt_enable();
454}
455
456static void do_flush_tlb_all(void* info)
457{
458 unsigned long cpu = smp_processor_id();
459
460 __flush_tlb_all();
461 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
462 leave_mm(cpu);
463}
464
465void flush_tlb_all(void)
466{
467 on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
468}
469
470/*
471 * this function sends a 'reschedule' IPI to another CPU.
472 * it goes straight through and wastes no time serializing
473 * anything. Worst case is that we lose a reschedule ...
474 */
475void smp_send_reschedule(int cpu)
476{
477 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
478}
479
480/*
481 * Structure and data for smp_call_function(). This is designed to minimise
482 * static memory requirements. It also looks cleaner.
483 */
484static DEFINE_SPINLOCK(call_lock);
485
486struct call_data_struct {
487 void (*func) (void *info);
488 void *info;
489 atomic_t started;
490 atomic_t finished;
491 int wait;
492};
493
494static struct call_data_struct * call_data;
495
496/*
497 * this function sends a 'generic call function' IPI to all other CPUs
498 * in the system.
499 */
500
501int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
502 int wait)
503/*
504 * [SUMMARY] Run a function on all other CPUs.
505 * <func> The function to run. This must be fast and non-blocking.
506 * <info> An arbitrary pointer to pass to the function.
507 * <nonatomic> currently unused.
508 * <wait> If true, wait (atomically) until function has completed on other CPUs.
509 * [RETURNS] 0 on success, else a negative status code. Does not return until
510 * remote CPUs are nearly ready to execute <<func>> or are or have executed.
511 *
512 * You must not call this function with disabled interrupts or from a
513 * hardware interrupt handler or from a bottom half handler.
514 */
515{
516 struct call_data_struct data;
517 int cpus = num_online_cpus()-1;
518
519 if (!cpus)
520 return 0;
521
522 /* Can deadlock when called with interrupts disabled */
523 WARN_ON(irqs_disabled());
524
525 data.func = func;
526 data.info = info;
527 atomic_set(&data.started, 0);
528 data.wait = wait;
529 if (wait)
530 atomic_set(&data.finished, 0);
531
532 spin_lock(&call_lock);
533 call_data = &data;
534 mb();
535
536 /* Send a message to all other CPUs and wait for them to respond */
537 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
538
539 /* Wait for response */
540 while (atomic_read(&data.started) != cpus)
541 cpu_relax();
542
543 if (wait)
544 while (atomic_read(&data.finished) != cpus)
545 cpu_relax();
546 spin_unlock(&call_lock);
547
548 return 0;
549}
550
551static void stop_this_cpu (void * dummy)
552{
553 /*
554 * Remove this CPU:
555 */
556 cpu_clear(smp_processor_id(), cpu_online_map);
557 local_irq_disable();
558 disable_local_APIC();
559 if (cpu_data[smp_processor_id()].hlt_works_ok)
560 for(;;) __asm__("hlt");
561 for (;;);
562}
563
564/*
565 * this function calls the 'stop' function on all other CPUs in the system.
566 */
567
568void smp_send_stop(void)
569{
570 smp_call_function(stop_this_cpu, NULL, 1, 0);
571
572 local_irq_disable();
573 disable_local_APIC();
574 local_irq_enable();
575}
576
577/*
578 * Reschedule call back. Nothing to do,
579 * all the work is done automatically when
580 * we return from the interrupt.
581 */
582fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
583{
584 ack_APIC_irq();
585}
586
587fastcall void smp_call_function_interrupt(struct pt_regs *regs)
588{
589 void (*func) (void *info) = call_data->func;
590 void *info = call_data->info;
591 int wait = call_data->wait;
592
593 ack_APIC_irq();
594 /*
595 * Notify initiating CPU that I've grabbed the data and am
596 * about to execute the function
597 */
598 mb();
599 atomic_inc(&call_data->started);
600 /*
601 * At this point the info structure may be out of scope unless wait==1
602 */
603 irq_enter();
604 (*func)(info);
605 irq_exit();
606
607 if (wait) {
608 mb();
609 atomic_inc(&call_data->finished);
610 }
611}
612
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
new file mode 100644
index 000000000000..332ee7a1d1a1
--- /dev/null
+++ b/arch/i386/kernel/smpboot.c
@@ -0,0 +1,1145 @@
1/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
35
36#include <linux/module.h>
37#include <linux/config.h>
38#include <linux/init.h>
39#include <linux/kernel.h>
40
41#include <linux/mm.h>
42#include <linux/sched.h>
43#include <linux/kernel_stat.h>
44#include <linux/smp_lock.h>
45#include <linux/irq.h>
46#include <linux/bootmem.h>
47
48#include <linux/delay.h>
49#include <linux/mc146818rtc.h>
50#include <asm/tlbflush.h>
51#include <asm/desc.h>
52#include <asm/arch_hooks.h>
53
54#include <mach_apic.h>
55#include <mach_wakecpu.h>
56#include <smpboot_hooks.h>
57
58/* Set if we find a B stepping CPU */
59static int __initdata smp_b_stepping;
60
61/* Number of siblings per CPU package */
62int smp_num_siblings = 1;
63int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
64EXPORT_SYMBOL(phys_proc_id);
65
66/* bitmap of online cpus */
67cpumask_t cpu_online_map;
68
69cpumask_t cpu_callin_map;
70cpumask_t cpu_callout_map;
71static cpumask_t smp_commenced_mask;
72
73/* Per CPU bogomips and other parameters */
74struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
75
76u8 x86_cpu_to_apicid[NR_CPUS] =
77 { [0 ... NR_CPUS-1] = 0xff };
78EXPORT_SYMBOL(x86_cpu_to_apicid);
79
80/*
81 * Trampoline 80x86 program as an array.
82 */
83
84extern unsigned char trampoline_data [];
85extern unsigned char trampoline_end [];
86static unsigned char *trampoline_base;
87static int trampoline_exec;
88
89static void map_cpu_to_logical_apicid(void);
90
91/*
92 * Currently trivial. Write the real->protected mode
93 * bootstrap into the page concerned. The caller
94 * has made sure it's suitably aligned.
95 */
96
97static unsigned long __init setup_trampoline(void)
98{
99 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
100 return virt_to_phys(trampoline_base);
101}
102
103/*
104 * We are called very early to get the low memory for the
105 * SMP bootup trampoline page.
106 */
107void __init smp_alloc_memory(void)
108{
109 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
110 /*
111 * Has to be in very low memory so we can execute
112 * real-mode AP code.
113 */
114 if (__pa(trampoline_base) >= 0x9F000)
115 BUG();
116 /*
117 * Make the SMP trampoline executable:
118 */
119 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
120}
121
122/*
123 * The bootstrap kernel entry code has set these up. Save them for
124 * a given CPU
125 */
126
127static void __init smp_store_cpu_info(int id)
128{
129 struct cpuinfo_x86 *c = cpu_data + id;
130
131 *c = boot_cpu_data;
132 if (id!=0)
133 identify_cpu(c);
134 /*
135 * Mask B, Pentium, but not Pentium MMX
136 */
137 if (c->x86_vendor == X86_VENDOR_INTEL &&
138 c->x86 == 5 &&
139 c->x86_mask >= 1 && c->x86_mask <= 4 &&
140 c->x86_model <= 3)
141 /*
142 * Remember we have B step Pentia with bugs
143 */
144 smp_b_stepping = 1;
145
146 /*
147 * Certain Athlons might work (for various values of 'work') in SMP
148 * but they are not certified as MP capable.
149 */
150 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
151
152 /* Athlon 660/661 is valid. */
153 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
154 goto valid_k7;
155
156 /* Duron 670 is valid */
157 if ((c->x86_model==7) && (c->x86_mask==0))
158 goto valid_k7;
159
160 /*
161 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
162 * It's worth noting that the A5 stepping (662) of some Athlon XP's
163 * have the MP bit set.
164 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
165 */
166 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
167 ((c->x86_model==7) && (c->x86_mask>=1)) ||
168 (c->x86_model> 7))
169 if (cpu_has_mp)
170 goto valid_k7;
171
172 /* If we get here, it's not a certified SMP capable AMD system. */
173 tainted |= TAINT_UNSAFE_SMP;
174 }
175
176valid_k7:
177 ;
178}
179
180/*
181 * TSC synchronization.
182 *
183 * We first check whether all CPUs have their TSC's synchronized,
184 * then we print a warning if not, and always resync.
185 */
186
187static atomic_t tsc_start_flag = ATOMIC_INIT(0);
188static atomic_t tsc_count_start = ATOMIC_INIT(0);
189static atomic_t tsc_count_stop = ATOMIC_INIT(0);
190static unsigned long long tsc_values[NR_CPUS];
191
192#define NR_LOOPS 5
193
194static void __init synchronize_tsc_bp (void)
195{
196 int i;
197 unsigned long long t0;
198 unsigned long long sum, avg;
199 long long delta;
200 unsigned long one_usec;
201 int buggy = 0;
202
203 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
204
205 /* convert from kcyc/sec to cyc/usec */
206 one_usec = cpu_khz / 1000;
207
208 atomic_set(&tsc_start_flag, 1);
209 wmb();
210
211 /*
212 * We loop a few times to get a primed instruction cache,
213 * then the last pass is more or less synchronized and
214 * the BP and APs set their cycle counters to zero all at
215 * once. This reduces the chance of having random offsets
216 * between the processors, and guarantees that the maximum
217 * delay between the cycle counters is never bigger than
218 * the latency of information-passing (cachelines) between
219 * two CPUs.
220 */
221 for (i = 0; i < NR_LOOPS; i++) {
222 /*
223 * all APs synchronize but they loop on '== num_cpus'
224 */
225 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
226 mb();
227 atomic_set(&tsc_count_stop, 0);
228 wmb();
229 /*
230 * this lets the APs save their current TSC:
231 */
232 atomic_inc(&tsc_count_start);
233
234 rdtscll(tsc_values[smp_processor_id()]);
235 /*
236 * We clear the TSC in the last loop:
237 */
238 if (i == NR_LOOPS-1)
239 write_tsc(0, 0);
240
241 /*
242 * Wait for all APs to leave the synchronization point:
243 */
244 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
245 mb();
246 atomic_set(&tsc_count_start, 0);
247 wmb();
248 atomic_inc(&tsc_count_stop);
249 }
250
251 sum = 0;
252 for (i = 0; i < NR_CPUS; i++) {
253 if (cpu_isset(i, cpu_callout_map)) {
254 t0 = tsc_values[i];
255 sum += t0;
256 }
257 }
258 avg = sum;
259 do_div(avg, num_booting_cpus());
260
261 sum = 0;
262 for (i = 0; i < NR_CPUS; i++) {
263 if (!cpu_isset(i, cpu_callout_map))
264 continue;
265 delta = tsc_values[i] - avg;
266 if (delta < 0)
267 delta = -delta;
268 /*
269 * We report bigger than 2 microseconds clock differences.
270 */
271 if (delta > 2*one_usec) {
272 long realdelta;
273 if (!buggy) {
274 buggy = 1;
275 printk("\n");
276 }
277 realdelta = delta;
278 do_div(realdelta, one_usec);
279 if (tsc_values[i] < avg)
280 realdelta = -realdelta;
281
282 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
283 }
284
285 sum += delta;
286 }
287 if (!buggy)
288 printk("passed.\n");
289}
290
291static void __init synchronize_tsc_ap (void)
292{
293 int i;
294
295 /*
296 * Not every cpu is online at the time
297 * this gets called, so we first wait for the BP to
298 * finish SMP initialization:
299 */
300 while (!atomic_read(&tsc_start_flag)) mb();
301
302 for (i = 0; i < NR_LOOPS; i++) {
303 atomic_inc(&tsc_count_start);
304 while (atomic_read(&tsc_count_start) != num_booting_cpus())
305 mb();
306
307 rdtscll(tsc_values[smp_processor_id()]);
308 if (i == NR_LOOPS-1)
309 write_tsc(0, 0);
310
311 atomic_inc(&tsc_count_stop);
312 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
313 }
314}
315#undef NR_LOOPS
316
317extern void calibrate_delay(void);
318
319static atomic_t init_deasserted;
320
321static void __init smp_callin(void)
322{
323 int cpuid, phys_id;
324 unsigned long timeout;
325
326 /*
327 * If waken up by an INIT in an 82489DX configuration
328 * we may get here before an INIT-deassert IPI reaches
329 * our local APIC. We have to wait for the IPI or we'll
330 * lock up on an APIC access.
331 */
332 wait_for_init_deassert(&init_deasserted);
333
334 /*
335 * (This works even if the APIC is not enabled.)
336 */
337 phys_id = GET_APIC_ID(apic_read(APIC_ID));
338 cpuid = smp_processor_id();
339 if (cpu_isset(cpuid, cpu_callin_map)) {
340 printk("huh, phys CPU#%d, CPU#%d already present??\n",
341 phys_id, cpuid);
342 BUG();
343 }
344 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
345
346 /*
347 * STARTUP IPIs are fragile beasts as they might sometimes
348 * trigger some glue motherboard logic. Complete APIC bus
349 * silence for 1 second, this overestimates the time the
350 * boot CPU is spending to send the up to 2 STARTUP IPIs
351 * by a factor of two. This should be enough.
352 */
353
354 /*
355 * Waiting 2s total for startup (udelay is not yet working)
356 */
357 timeout = jiffies + 2*HZ;
358 while (time_before(jiffies, timeout)) {
359 /*
360 * Has the boot CPU finished it's STARTUP sequence?
361 */
362 if (cpu_isset(cpuid, cpu_callout_map))
363 break;
364 rep_nop();
365 }
366
367 if (!time_before(jiffies, timeout)) {
368 printk("BUG: CPU%d started up but did not get a callout!\n",
369 cpuid);
370 BUG();
371 }
372
373 /*
374 * the boot CPU has finished the init stage and is spinning
375 * on callin_map until we finish. We are free to set up this
376 * CPU, first the APIC. (this is probably redundant on most
377 * boards)
378 */
379
380 Dprintk("CALLIN, before setup_local_APIC().\n");
381 smp_callin_clear_local_apic();
382 setup_local_APIC();
383 map_cpu_to_logical_apicid();
384
385 /*
386 * Get our bogomips.
387 */
388 calibrate_delay();
389 Dprintk("Stack at about %p\n",&cpuid);
390
391 /*
392 * Save our processor parameters
393 */
394 smp_store_cpu_info(cpuid);
395
396 disable_APIC_timer();
397
398 /*
399 * Allow the master to continue.
400 */
401 cpu_set(cpuid, cpu_callin_map);
402
403 /*
404 * Synchronize the TSC with the BP
405 */
406 if (cpu_has_tsc && cpu_khz)
407 synchronize_tsc_ap();
408}
409
410static int cpucount;
411
412/*
413 * Activate a secondary processor.
414 */
415static void __init start_secondary(void *unused)
416{
417 /*
418 * Dont put anything before smp_callin(), SMP
419 * booting is too fragile that we want to limit the
420 * things done here to the most necessary things.
421 */
422 cpu_init();
423 smp_callin();
424 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
425 rep_nop();
426 setup_secondary_APIC_clock();
427 if (nmi_watchdog == NMI_IO_APIC) {
428 disable_8259A_irq(0);
429 enable_NMI_through_LVT0(NULL);
430 enable_8259A_irq(0);
431 }
432 enable_APIC_timer();
433 /*
434 * low-memory mappings have been cleared, flush them from
435 * the local TLBs too.
436 */
437 local_flush_tlb();
438 cpu_set(smp_processor_id(), cpu_online_map);
439
440 /* We can take interrupts now: we're officially "up". */
441 local_irq_enable();
442
443 wmb();
444 cpu_idle();
445}
446
447/*
448 * Everything has been set up for the secondary
449 * CPUs - they just need to reload everything
450 * from the task structure
451 * This function must not return.
452 */
453void __init initialize_secondary(void)
454{
455 /*
456 * We don't actually need to load the full TSS,
457 * basically just the stack pointer and the eip.
458 */
459
460 asm volatile(
461 "movl %0,%%esp\n\t"
462 "jmp *%1"
463 :
464 :"r" (current->thread.esp),"r" (current->thread.eip));
465}
466
467extern struct {
468 void * esp;
469 unsigned short ss;
470} stack_start;
471
472#ifdef CONFIG_NUMA
473
474/* which logical CPUs are on which nodes */
475cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
476 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
477/* which node each logical CPU is on */
478int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
479EXPORT_SYMBOL(cpu_2_node);
480
481/* set up a mapping between cpu and node. */
482static inline void map_cpu_to_node(int cpu, int node)
483{
484 printk("Mapping cpu %d to node %d\n", cpu, node);
485 cpu_set(cpu, node_2_cpu_mask[node]);
486 cpu_2_node[cpu] = node;
487}
488
489/* undo a mapping between cpu and node. */
490static inline void unmap_cpu_to_node(int cpu)
491{
492 int node;
493
494 printk("Unmapping cpu %d from all nodes\n", cpu);
495 for (node = 0; node < MAX_NUMNODES; node ++)
496 cpu_clear(cpu, node_2_cpu_mask[node]);
497 cpu_2_node[cpu] = 0;
498}
499#else /* !CONFIG_NUMA */
500
501#define map_cpu_to_node(cpu, node) ({})
502#define unmap_cpu_to_node(cpu) ({})
503
504#endif /* CONFIG_NUMA */
505
506u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
507
508static void map_cpu_to_logical_apicid(void)
509{
510 int cpu = smp_processor_id();
511 int apicid = logical_smp_processor_id();
512
513 cpu_2_logical_apicid[cpu] = apicid;
514 map_cpu_to_node(cpu, apicid_to_node(apicid));
515}
516
517static void unmap_cpu_to_logical_apicid(int cpu)
518{
519 cpu_2_logical_apicid[cpu] = BAD_APICID;
520 unmap_cpu_to_node(cpu);
521}
522
523#if APIC_DEBUG
524static inline void __inquire_remote_apic(int apicid)
525{
526 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
527 char *names[] = { "ID", "VERSION", "SPIV" };
528 int timeout, status;
529
530 printk("Inquiring remote APIC #%d...\n", apicid);
531
532 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
533 printk("... APIC #%d %s: ", apicid, names[i]);
534
535 /*
536 * Wait for idle.
537 */
538 apic_wait_icr_idle();
539
540 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
541 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
542
543 timeout = 0;
544 do {
545 udelay(100);
546 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
547 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
548
549 switch (status) {
550 case APIC_ICR_RR_VALID:
551 status = apic_read(APIC_RRR);
552 printk("%08x\n", status);
553 break;
554 default:
555 printk("failed\n");
556 }
557 }
558}
559#endif
560
561#ifdef WAKE_SECONDARY_VIA_NMI
562/*
563 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
564 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
565 * won't ... remember to clear down the APIC, etc later.
566 */
567static int __init
568wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
569{
570 unsigned long send_status = 0, accept_status = 0;
571 int timeout, maxlvt;
572
573 /* Target chip */
574 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
575
576 /* Boot on the stack */
577 /* Kick the second */
578 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
579
580 Dprintk("Waiting for send to finish...\n");
581 timeout = 0;
582 do {
583 Dprintk("+");
584 udelay(100);
585 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
586 } while (send_status && (timeout++ < 1000));
587
588 /*
589 * Give the other CPU some time to accept the IPI.
590 */
591 udelay(200);
592 /*
593 * Due to the Pentium erratum 3AP.
594 */
595 maxlvt = get_maxlvt();
596 if (maxlvt > 3) {
597 apic_read_around(APIC_SPIV);
598 apic_write(APIC_ESR, 0);
599 }
600 accept_status = (apic_read(APIC_ESR) & 0xEF);
601 Dprintk("NMI sent.\n");
602
603 if (send_status)
604 printk("APIC never delivered???\n");
605 if (accept_status)
606 printk("APIC delivery error (%lx).\n", accept_status);
607
608 return (send_status | accept_status);
609}
610#endif /* WAKE_SECONDARY_VIA_NMI */
611
612#ifdef WAKE_SECONDARY_VIA_INIT
613static int __init
614wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
615{
616 unsigned long send_status = 0, accept_status = 0;
617 int maxlvt, timeout, num_starts, j;
618
619 /*
620 * Be paranoid about clearing APIC errors.
621 */
622 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
623 apic_read_around(APIC_SPIV);
624 apic_write(APIC_ESR, 0);
625 apic_read(APIC_ESR);
626 }
627
628 Dprintk("Asserting INIT.\n");
629
630 /*
631 * Turn INIT on target chip
632 */
633 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
634
635 /*
636 * Send IPI
637 */
638 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
639 | APIC_DM_INIT);
640
641 Dprintk("Waiting for send to finish...\n");
642 timeout = 0;
643 do {
644 Dprintk("+");
645 udelay(100);
646 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
647 } while (send_status && (timeout++ < 1000));
648
649 mdelay(10);
650
651 Dprintk("Deasserting INIT.\n");
652
653 /* Target chip */
654 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
655
656 /* Send IPI */
657 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
658
659 Dprintk("Waiting for send to finish...\n");
660 timeout = 0;
661 do {
662 Dprintk("+");
663 udelay(100);
664 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
665 } while (send_status && (timeout++ < 1000));
666
667 atomic_set(&init_deasserted, 1);
668
669 /*
670 * Should we send STARTUP IPIs ?
671 *
672 * Determine this based on the APIC version.
673 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
674 */
675 if (APIC_INTEGRATED(apic_version[phys_apicid]))
676 num_starts = 2;
677 else
678 num_starts = 0;
679
680 /*
681 * Run STARTUP IPI loop.
682 */
683 Dprintk("#startup loops: %d.\n", num_starts);
684
685 maxlvt = get_maxlvt();
686
687 for (j = 1; j <= num_starts; j++) {
688 Dprintk("Sending STARTUP #%d.\n",j);
689 apic_read_around(APIC_SPIV);
690 apic_write(APIC_ESR, 0);
691 apic_read(APIC_ESR);
692 Dprintk("After apic_write.\n");
693
694 /*
695 * STARTUP IPI
696 */
697
698 /* Target chip */
699 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
700
701 /* Boot on the stack */
702 /* Kick the second */
703 apic_write_around(APIC_ICR, APIC_DM_STARTUP
704 | (start_eip >> 12));
705
706 /*
707 * Give the other CPU some time to accept the IPI.
708 */
709 udelay(300);
710
711 Dprintk("Startup point 1.\n");
712
713 Dprintk("Waiting for send to finish...\n");
714 timeout = 0;
715 do {
716 Dprintk("+");
717 udelay(100);
718 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
719 } while (send_status && (timeout++ < 1000));
720
721 /*
722 * Give the other CPU some time to accept the IPI.
723 */
724 udelay(200);
725 /*
726 * Due to the Pentium erratum 3AP.
727 */
728 if (maxlvt > 3) {
729 apic_read_around(APIC_SPIV);
730 apic_write(APIC_ESR, 0);
731 }
732 accept_status = (apic_read(APIC_ESR) & 0xEF);
733 if (send_status || accept_status)
734 break;
735 }
736 Dprintk("After Startup.\n");
737
738 if (send_status)
739 printk("APIC never delivered???\n");
740 if (accept_status)
741 printk("APIC delivery error (%lx).\n", accept_status);
742
743 return (send_status | accept_status);
744}
745#endif /* WAKE_SECONDARY_VIA_INIT */
746
747extern cpumask_t cpu_initialized;
748
749static int __init do_boot_cpu(int apicid)
750/*
751 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
752 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
753 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
754 */
755{
756 struct task_struct *idle;
757 unsigned long boot_error;
758 int timeout, cpu;
759 unsigned long start_eip;
760 unsigned short nmi_high = 0, nmi_low = 0;
761
762 cpu = ++cpucount;
763 /*
764 * We can't use kernel_thread since we must avoid to
765 * reschedule the child.
766 */
767 idle = fork_idle(cpu);
768 if (IS_ERR(idle))
769 panic("failed fork for CPU %d", cpu);
770 idle->thread.eip = (unsigned long) start_secondary;
771 /* start_eip had better be page-aligned! */
772 start_eip = setup_trampoline();
773
774 /* So we see what's up */
775 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
776 /* Stack for startup_32 can be just as for start_secondary onwards */
777 stack_start.esp = (void *) idle->thread.esp;
778
779 irq_ctx_init(cpu);
780
781 /*
782 * This grunge runs the startup process for
783 * the targeted processor.
784 */
785
786 atomic_set(&init_deasserted, 0);
787
788 Dprintk("Setting warm reset code and vector.\n");
789
790 store_NMI_vector(&nmi_high, &nmi_low);
791
792 smpboot_setup_warm_reset_vector(start_eip);
793
794 /*
795 * Starting actual IPI sequence...
796 */
797 boot_error = wakeup_secondary_cpu(apicid, start_eip);
798
799 if (!boot_error) {
800 /*
801 * allow APs to start initializing.
802 */
803 Dprintk("Before Callout %d.\n", cpu);
804 cpu_set(cpu, cpu_callout_map);
805 Dprintk("After Callout %d.\n", cpu);
806
807 /*
808 * Wait 5s total for a response
809 */
810 for (timeout = 0; timeout < 50000; timeout++) {
811 if (cpu_isset(cpu, cpu_callin_map))
812 break; /* It has booted */
813 udelay(100);
814 }
815
816 if (cpu_isset(cpu, cpu_callin_map)) {
817 /* number CPUs logically, starting from 1 (BSP is 0) */
818 Dprintk("OK.\n");
819 printk("CPU%d: ", cpu);
820 print_cpu_info(&cpu_data[cpu]);
821 Dprintk("CPU has booted.\n");
822 } else {
823 boot_error= 1;
824 if (*((volatile unsigned char *)trampoline_base)
825 == 0xA5)
826 /* trampoline started but...? */
827 printk("Stuck ??\n");
828 else
829 /* trampoline code not run */
830 printk("Not responding.\n");
831 inquire_remote_apic(apicid);
832 }
833 }
834 x86_cpu_to_apicid[cpu] = apicid;
835 if (boot_error) {
836 /* Try to put things back the way they were before ... */
837 unmap_cpu_to_logical_apicid(cpu);
838 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
839 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
840 cpucount--;
841 }
842
843 /* mark "stuck" area as not stuck */
844 *((volatile unsigned long *)trampoline_base) = 0;
845
846 return boot_error;
847}
848
849static void smp_tune_scheduling (void)
850{
851 unsigned long cachesize; /* kB */
852 unsigned long bandwidth = 350; /* MB/s */
853 /*
854 * Rough estimation for SMP scheduling, this is the number of
855 * cycles it takes for a fully memory-limited process to flush
856 * the SMP-local cache.
857 *
858 * (For a P5 this pretty much means we will choose another idle
859 * CPU almost always at wakeup time (this is due to the small
860 * L1 cache), on PIIs it's around 50-100 usecs, depending on
861 * the cache size)
862 */
863
864 if (!cpu_khz) {
865 /*
866 * this basically disables processor-affinity
867 * scheduling on SMP without a TSC.
868 */
869 return;
870 } else {
871 cachesize = boot_cpu_data.x86_cache_size;
872 if (cachesize == -1) {
873 cachesize = 16; /* Pentiums, 2x8kB cache */
874 bandwidth = 100;
875 }
876 }
877}
878
879/*
880 * Cycle through the processors sending APIC IPIs to boot each.
881 */
882
883static int boot_cpu_logical_apicid;
884/* Where the IO area was mapped on multiquad, always 0 otherwise */
885void *xquad_portio;
886
887cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
888
889static void __init smp_boot_cpus(unsigned int max_cpus)
890{
891 int apicid, cpu, bit, kicked;
892 unsigned long bogosum = 0;
893
894 /*
895 * Setup boot CPU information
896 */
897 smp_store_cpu_info(0); /* Final full version of the data */
898 printk("CPU%d: ", 0);
899 print_cpu_info(&cpu_data[0]);
900
901 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
902 boot_cpu_logical_apicid = logical_smp_processor_id();
903 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
904
905 current_thread_info()->cpu = 0;
906 smp_tune_scheduling();
907 cpus_clear(cpu_sibling_map[0]);
908 cpu_set(0, cpu_sibling_map[0]);
909
910 /*
911 * If we couldn't find an SMP configuration at boot time,
912 * get out of here now!
913 */
914 if (!smp_found_config && !acpi_lapic) {
915 printk(KERN_NOTICE "SMP motherboard not detected.\n");
916 smpboot_clear_io_apic_irqs();
917 phys_cpu_present_map = physid_mask_of_physid(0);
918 if (APIC_init_uniprocessor())
919 printk(KERN_NOTICE "Local APIC not detected."
920 " Using dummy APIC emulation.\n");
921 map_cpu_to_logical_apicid();
922 return;
923 }
924
925 /*
926 * Should not be necessary because the MP table should list the boot
927 * CPU too, but we do it for the sake of robustness anyway.
928 * Makes no sense to do this check in clustered apic mode, so skip it
929 */
930 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
931 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
932 boot_cpu_physical_apicid);
933 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
934 }
935
936 /*
937 * If we couldn't find a local APIC, then get out of here now!
938 */
939 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
940 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
941 boot_cpu_physical_apicid);
942 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
943 smpboot_clear_io_apic_irqs();
944 phys_cpu_present_map = physid_mask_of_physid(0);
945 return;
946 }
947
948 verify_local_APIC();
949
950 /*
951 * If SMP should be disabled, then really disable it!
952 */
953 if (!max_cpus) {
954 smp_found_config = 0;
955 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
956 smpboot_clear_io_apic_irqs();
957 phys_cpu_present_map = physid_mask_of_physid(0);
958 return;
959 }
960
961 connect_bsp_APIC();
962 setup_local_APIC();
963 map_cpu_to_logical_apicid();
964
965
966 setup_portio_remap();
967
968 /*
969 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
970 *
971 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
972 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
973 * clustered apic ID.
974 */
975 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
976
977 kicked = 1;
978 for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
979 apicid = cpu_present_to_apicid(bit);
980 /*
981 * Don't even attempt to start the boot CPU!
982 */
983 if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
984 continue;
985
986 if (!check_apicid_present(bit))
987 continue;
988 if (max_cpus <= cpucount+1)
989 continue;
990
991 if (do_boot_cpu(apicid))
992 printk("CPU #%d not responding - cannot use it.\n",
993 apicid);
994 else
995 ++kicked;
996 }
997
998 /*
999 * Cleanup possible dangling ends...
1000 */
1001 smpboot_restore_warm_reset_vector();
1002
1003 /*
1004 * Allow the user to impress friends.
1005 */
1006 Dprintk("Before bogomips.\n");
1007 for (cpu = 0; cpu < NR_CPUS; cpu++)
1008 if (cpu_isset(cpu, cpu_callout_map))
1009 bogosum += cpu_data[cpu].loops_per_jiffy;
1010 printk(KERN_INFO
1011 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1012 cpucount+1,
1013 bogosum/(500000/HZ),
1014 (bogosum/(5000/HZ))%100);
1015
1016 Dprintk("Before bogocount - setting activated=1.\n");
1017
1018 if (smp_b_stepping)
1019 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1020
1021 /*
1022 * Don't taint if we are running SMP kernel on a single non-MP
1023 * approved Athlon
1024 */
1025 if (tainted & TAINT_UNSAFE_SMP) {
1026 if (cpucount)
1027 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1028 else
1029 tainted &= ~TAINT_UNSAFE_SMP;
1030 }
1031
1032 Dprintk("Boot done.\n");
1033
1034 /*
1035 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1036 * efficiently.
1037 */
1038 for (cpu = 0; cpu < NR_CPUS; cpu++)
1039 cpus_clear(cpu_sibling_map[cpu]);
1040
1041 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1042 int siblings = 0;
1043 int i;
1044 if (!cpu_isset(cpu, cpu_callout_map))
1045 continue;
1046
1047 if (smp_num_siblings > 1) {
1048 for (i = 0; i < NR_CPUS; i++) {
1049 if (!cpu_isset(i, cpu_callout_map))
1050 continue;
1051 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1052 siblings++;
1053 cpu_set(i, cpu_sibling_map[cpu]);
1054 }
1055 }
1056 } else {
1057 siblings++;
1058 cpu_set(cpu, cpu_sibling_map[cpu]);
1059 }
1060
1061 if (siblings != smp_num_siblings)
1062 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1063 }
1064
1065 if (nmi_watchdog == NMI_LOCAL_APIC)
1066 check_nmi_watchdog();
1067
1068 smpboot_setup_io_apic();
1069
1070 setup_boot_APIC_clock();
1071
1072 /*
1073 * Synchronize the TSC with the AP
1074 */
1075 if (cpu_has_tsc && cpucount && cpu_khz)
1076 synchronize_tsc_bp();
1077}
1078
1079/* These are wrappers to interface to the new boot process. Someone
1080 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1081void __init smp_prepare_cpus(unsigned int max_cpus)
1082{
1083 smp_boot_cpus(max_cpus);
1084}
1085
1086void __devinit smp_prepare_boot_cpu(void)
1087{
1088 cpu_set(smp_processor_id(), cpu_online_map);
1089 cpu_set(smp_processor_id(), cpu_callout_map);
1090}
1091
1092int __devinit __cpu_up(unsigned int cpu)
1093{
1094 /* This only works at boot for x86. See "rewrite" above. */
1095 if (cpu_isset(cpu, smp_commenced_mask)) {
1096 local_irq_enable();
1097 return -ENOSYS;
1098 }
1099
1100 /* In case one didn't come up */
1101 if (!cpu_isset(cpu, cpu_callin_map)) {
1102 local_irq_enable();
1103 return -EIO;
1104 }
1105
1106 local_irq_enable();
1107 /* Unleash the CPU! */
1108 cpu_set(cpu, smp_commenced_mask);
1109 while (!cpu_isset(cpu, cpu_online_map))
1110 mb();
1111 return 0;
1112}
1113
1114void __init smp_cpus_done(unsigned int max_cpus)
1115{
1116#ifdef CONFIG_X86_IO_APIC
1117 setup_ioapic_dest();
1118#endif
1119 zap_low_mappings();
1120 /*
1121 * Disable executability of the SMP trampoline:
1122 */
1123 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1124}
1125
1126void __init smp_intr_init(void)
1127{
1128 /*
1129 * IRQ0 must be given a fixed assignment and initialized,
1130 * because it's used before the IO-APIC is set up.
1131 */
1132 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1133
1134 /*
1135 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1136 * IPI, driven by wakeup.
1137 */
1138 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1139
1140 /* IPI for invalidation */
1141 set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1142
1143 /* IPI for generic function call */
1144 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1145}
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
new file mode 100644
index 000000000000..7b3b27d64409
--- /dev/null
+++ b/arch/i386/kernel/srat.c
@@ -0,0 +1,456 @@
1/*
2 * Some of the code in this file has been gleaned from the 64 bit
3 * discontigmem support code base.
4 *
5 * Copyright (C) 2002, IBM Corp.
6 *
7 * All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
17 * NON INFRINGEMENT. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * Send feedback to Pat Gaughen <gone@us.ibm.com>
25 */
26#include <linux/config.h>
27#include <linux/mm.h>
28#include <linux/bootmem.h>
29#include <linux/mmzone.h>
30#include <linux/acpi.h>
31#include <linux/nodemask.h>
32#include <asm/srat.h>
33#include <asm/topology.h>
34
35/*
36 * proximity macros and definitions
37 */
38#define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */
39#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */
40#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
41#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
42#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */
43/* bitmap length; _PXM is at most 255 */
44#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
45static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
46
47#define MAX_CHUNKS_PER_NODE 4
48#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
49struct node_memory_chunk_s {
50 unsigned long start_pfn;
51 unsigned long end_pfn;
52 u8 pxm; // proximity domain of node
53 u8 nid; // which cnode contains this chunk?
54 u8 bank; // which mem bank on this node
55};
56static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
57
58static int num_memory_chunks; /* total number of memory chunks */
59static int zholes_size_init;
60static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
61
62extern void * boot_ioremap(unsigned long, unsigned long);
63
64/* Identify CPU proximity domains */
65static void __init parse_cpu_affinity_structure(char *p)
66{
67 struct acpi_table_processor_affinity *cpu_affinity =
68 (struct acpi_table_processor_affinity *) p;
69
70 if (!cpu_affinity->flags.enabled)
71 return; /* empty entry */
72
73 /* mark this node as "seen" in node bitmap */
74 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain);
75
76 printk("CPU 0x%02X in proximity domain 0x%02X\n",
77 cpu_affinity->apic_id, cpu_affinity->proximity_domain);
78}
79
80/*
81 * Identify memory proximity domains and hot-remove capabilities.
82 * Fill node memory chunk list structure.
83 */
84static void __init parse_memory_affinity_structure (char *sratp)
85{
86 unsigned long long paddr, size;
87 unsigned long start_pfn, end_pfn;
88 u8 pxm;
89 struct node_memory_chunk_s *p, *q, *pend;
90 struct acpi_table_memory_affinity *memory_affinity =
91 (struct acpi_table_memory_affinity *) sratp;
92
93 if (!memory_affinity->flags.enabled)
94 return; /* empty entry */
95
96 /* mark this node as "seen" in node bitmap */
97 BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain);
98
99 /* calculate info for memory chunk structure */
100 paddr = memory_affinity->base_addr_hi;
101 paddr = (paddr << 32) | memory_affinity->base_addr_lo;
102 size = memory_affinity->length_hi;
103 size = (size << 32) | memory_affinity->length_lo;
104
105 start_pfn = paddr >> PAGE_SHIFT;
106 end_pfn = (paddr + size) >> PAGE_SHIFT;
107
108 pxm = memory_affinity->proximity_domain;
109
110 if (num_memory_chunks >= MAXCHUNKS) {
111 printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n",
112 size/(1024*1024), paddr);
113 return;
114 }
115
116 /* Insertion sort based on base address */
117 pend = &node_memory_chunk[num_memory_chunks];
118 for (p = &node_memory_chunk[0]; p < pend; p++) {
119 if (start_pfn < p->start_pfn)
120 break;
121 }
122 if (p < pend) {
123 for (q = pend; q >= p; q--)
124 *(q + 1) = *q;
125 }
126 p->start_pfn = start_pfn;
127 p->end_pfn = end_pfn;
128 p->pxm = pxm;
129
130 num_memory_chunks++;
131
132 printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n",
133 start_pfn, end_pfn,
134 memory_affinity->memory_type,
135 memory_affinity->proximity_domain,
136 (memory_affinity->flags.hot_pluggable ?
137 "enabled and removable" : "enabled" ) );
138}
139
140#if MAX_NR_ZONES != 3
141#error "MAX_NR_ZONES != 3, chunk_to_zone requires review"
142#endif
143/* Take a chunk of pages from page frame cstart to cend and count the number
144 * of pages in each zone, returned via zones[].
145 */
146static __init void chunk_to_zones(unsigned long cstart, unsigned long cend,
147 unsigned long *zones)
148{
149 unsigned long max_dma;
150 extern unsigned long max_low_pfn;
151
152 int z;
153 unsigned long rend;
154
155 /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
156 * similarly scoped information and should be handled in a consistant
157 * manner.
158 */
159 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
160
161 /* Split the hole into the zones in which it falls. Repeatedly
162 * take the segment in which the remaining hole starts, round it
163 * to the end of that zone.
164 */
165 memset(zones, 0, MAX_NR_ZONES * sizeof(long));
166 while (cstart < cend) {
167 if (cstart < max_dma) {
168 z = ZONE_DMA;
169 rend = (cend < max_dma)? cend : max_dma;
170
171 } else if (cstart < max_low_pfn) {
172 z = ZONE_NORMAL;
173 rend = (cend < max_low_pfn)? cend : max_low_pfn;
174
175 } else {
176 z = ZONE_HIGHMEM;
177 rend = cend;
178 }
179 zones[z] += rend - cstart;
180 cstart = rend;
181 }
182}
183
184/*
185 * The SRAT table always lists ascending addresses, so can always
186 * assume that the first "start" address that you see is the real
187 * start of the node, and that the current "end" address is after
188 * the previous one.
189 */
190static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
191{
192 /*
193 * Only add present memory as told by the e820.
194 * There is no guarantee from the SRAT that the memory it
195 * enumerates is present at boot time because it represents
196 * *possible* memory hotplug areas the same as normal RAM.
197 */
198 if (memory_chunk->start_pfn >= max_pfn) {
199 printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n",
200 memory_chunk->start_pfn, memory_chunk->end_pfn);
201 return;
202 }
203 if (memory_chunk->nid != nid)
204 return;
205
206 if (!node_has_online_mem(nid))
207 node_start_pfn[nid] = memory_chunk->start_pfn;
208
209 if (node_start_pfn[nid] > memory_chunk->start_pfn)
210 node_start_pfn[nid] = memory_chunk->start_pfn;
211
212 if (node_end_pfn[nid] < memory_chunk->end_pfn)
213 node_end_pfn[nid] = memory_chunk->end_pfn;
214}
215
216/* Parse the ACPI Static Resource Affinity Table */
217static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
218{
219 u8 *start, *end, *p;
220 int i, j, nid;
221 u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */
222 u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */
223
224 start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
225 p = start;
226 end = (u8 *)sratp + sratp->header.length;
227
228 memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
229 memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
230 memset(zholes_size, 0, sizeof(zholes_size));
231
232 /* -1 in these maps means not available */
233 memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
234 memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
235
236 num_memory_chunks = 0;
237 while (p < end) {
238 switch (*p) {
239 case ACPI_SRAT_PROCESSOR_AFFINITY:
240 parse_cpu_affinity_structure(p);
241 break;
242 case ACPI_SRAT_MEMORY_AFFINITY:
243 parse_memory_affinity_structure(p);
244 break;
245 default:
246 printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
247 break;
248 }
249 p += p[1];
250 if (p[1] == 0) {
251 printk("acpi20_parse_srat: Entry length value is zero;"
252 " can't parse any further!\n");
253 break;
254 }
255 }
256
257 if (num_memory_chunks == 0) {
258 printk("could not finy any ACPI SRAT memory areas.\n");
259 goto out_fail;
260 }
261
262 /* Calculate total number of nodes in system from PXM bitmap and create
263 * a set of sequential node IDs starting at zero. (ACPI doesn't seem
264 * to specify the range of _PXM values.)
265 */
266 /*
267 * MCD - we no longer HAVE to number nodes sequentially. PXM domain
268 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
269 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
270 * approaches MAX_PXM_DOMAINS for i386.
271 */
272 nodes_clear(node_online_map);
273 for (i = 0; i < MAX_PXM_DOMAINS; i++) {
274 if (BMAP_TEST(pxm_bitmap, i)) {
275 nid = num_online_nodes();
276 pxm_to_nid_map[i] = nid;
277 nid_to_pxm_map[nid] = i;
278 node_set_online(nid);
279 }
280 }
281 BUG_ON(num_online_nodes() == 0);
282
283 /* set cnode id in memory chunk structure */
284 for (i = 0; i < num_memory_chunks; i++)
285 node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm];
286
287 printk("pxm bitmap: ");
288 for (i = 0; i < sizeof(pxm_bitmap); i++) {
289 printk("%02X ", pxm_bitmap[i]);
290 }
291 printk("\n");
292 printk("Number of logical nodes in system = %d\n", num_online_nodes());
293 printk("Number of memory chunks in system = %d\n", num_memory_chunks);
294
295 for (j = 0; j < num_memory_chunks; j++){
296 struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
297 printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
298 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
299 node_read_chunk(chunk->nid, chunk);
300 }
301
302 for_each_online_node(nid) {
303 unsigned long start = node_start_pfn[nid];
304 unsigned long end = node_end_pfn[nid];
305
306 memory_present(nid, start, end);
307 node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
308 }
309 return 1;
310out_fail:
311 return 0;
312}
313
314int __init get_memcfg_from_srat(void)
315{
316 struct acpi_table_header *header = NULL;
317 struct acpi_table_rsdp *rsdp = NULL;
318 struct acpi_table_rsdt *rsdt = NULL;
319 struct acpi_pointer *rsdp_address = NULL;
320 struct acpi_table_rsdt saved_rsdt;
321 int tables = 0;
322 int i = 0;
323
324 acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, rsdp_address);
325
326 if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) {
327 printk("%s: assigning address to rsdp\n", __FUNCTION__);
328 rsdp = (struct acpi_table_rsdp *)
329 (u32)rsdp_address->pointer.physical;
330 } else {
331 printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__);
332 goto out_err;
333 }
334 if (!rsdp) {
335 printk("%s: Didn't find ACPI root!\n", __FUNCTION__);
336 goto out_err;
337 }
338
339 printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
340 rsdp->oem_id);
341
342 if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) {
343 printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__);
344 goto out_err;
345 }
346
347 rsdt = (struct acpi_table_rsdt *)
348 boot_ioremap(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt));
349
350 if (!rsdt) {
351 printk(KERN_WARNING
352 "%s: ACPI: Invalid root system description tables (RSDT)\n",
353 __FUNCTION__);
354 goto out_err;
355 }
356
357 header = & rsdt->header;
358
359 if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) {
360 printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
361 goto out_err;
362 }
363
364 /*
365 * The number of tables is computed by taking the
366 * size of all entries (header size minus total
367 * size of RSDT) divided by the size of each entry
368 * (4-byte table pointers).
369 */
370 tables = (header->length - sizeof(struct acpi_table_header)) / 4;
371
372 if (!tables)
373 goto out_err;
374
375 memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
376
377 if (saved_rsdt.header.length > sizeof(saved_rsdt)) {
378 printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
379 saved_rsdt.header.length);
380 goto out_err;
381 }
382
383 printk("Begin SRAT table scan....\n");
384
385 for (i = 0; i < tables; i++) {
386 /* Map in header, then map in full table length. */
387 header = (struct acpi_table_header *)
388 boot_ioremap(saved_rsdt.entry[i], sizeof(struct acpi_table_header));
389 if (!header)
390 break;
391 header = (struct acpi_table_header *)
392 boot_ioremap(saved_rsdt.entry[i], header->length);
393 if (!header)
394 break;
395
396 if (strncmp((char *) &header->signature, "SRAT", 4))
397 continue;
398
399 /* we've found the srat table. don't need to look at any more tables */
400 return acpi20_parse_srat((struct acpi_table_srat *)header);
401 }
402out_err:
403 printk("failed to get NUMA memory information from SRAT table\n");
404 return 0;
405}
406
407/* For each node run the memory list to determine whether there are
408 * any memory holes. For each hole determine which ZONE they fall
409 * into.
410 *
411 * NOTE#1: this requires knowledge of the zone boundries and so
412 * _cannot_ be performed before those are calculated in setup_memory.
413 *
414 * NOTE#2: we rely on the fact that the memory chunks are ordered by
415 * start pfn number during setup.
416 */
417static void __init get_zholes_init(void)
418{
419 int nid;
420 int c;
421 int first;
422 unsigned long end = 0;
423
424 for_each_online_node(nid) {
425 first = 1;
426 for (c = 0; c < num_memory_chunks; c++){
427 if (node_memory_chunk[c].nid == nid) {
428 if (first) {
429 end = node_memory_chunk[c].end_pfn;
430 first = 0;
431
432 } else {
433 /* Record any gap between this chunk
434 * and the previous chunk on this node
435 * against the zones it spans.
436 */
437 chunk_to_zones(end,
438 node_memory_chunk[c].start_pfn,
439 &zholes_size[nid * MAX_NR_ZONES]);
440 }
441 }
442 }
443 }
444}
445
446unsigned long * __init get_zholes_size(int nid)
447{
448 if (!zholes_size_init) {
449 zholes_size_init++;
450 get_zholes_init();
451 }
452 if (nid >= MAX_NUMNODES || !node_online(nid))
453 printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
454 __FUNCTION__, nid, num_online_nodes());
455 return &zholes_size[nid * MAX_NR_ZONES];
456}
diff --git a/arch/i386/kernel/summit.c b/arch/i386/kernel/summit.c
new file mode 100644
index 000000000000..d0e01a3acf35
--- /dev/null
+++ b/arch/i386/kernel/summit.c
@@ -0,0 +1,180 @@
1/*
2 * arch/i386/kernel/summit.c - IBM Summit-Specific Code
3 *
4 * Written By: Matthew Dobson, IBM Corporation
5 *
6 * Copyright (c) 2003 IBM Corp.
7 *
8 * All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or (at
13 * your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
18 * NON INFRINGEMENT. See the GNU General Public License for more
19 * details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * Send feedback to <colpatch@us.ibm.com>
26 *
27 */
28
29#include <linux/mm.h>
30#include <linux/init.h>
31#include <asm/io.h>
32#include <asm/mach-summit/mach_mpparse.h>
33
34static struct rio_table_hdr *rio_table_hdr __initdata;
35static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
36static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
37
38static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
39{
40 int twister = 0, node = 0;
41 int i, bus, num_buses;
42
43 for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
44 if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id){
45 twister = rio_devs[i]->owner_id;
46 break;
47 }
48 }
49 if (i == rio_table_hdr->num_rio_dev){
50 printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__);
51 return last_bus;
52 }
53
54 for(i = 0; i < rio_table_hdr->num_scal_dev; i++){
55 if (scal_devs[i]->node_id == twister){
56 node = scal_devs[i]->node_id;
57 break;
58 }
59 }
60 if (i == rio_table_hdr->num_scal_dev){
61 printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__);
62 return last_bus;
63 }
64
65 switch (rio_devs[wpeg_num]->type){
66 case CompatWPEG:
67 /* The Compatability Winnipeg controls the 2 legacy buses,
68 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
69 * a PCI-PCI bridge card is used in either slot: total 5 buses.
70 */
71 num_buses = 5;
72 break;
73 case AltWPEG:
74 /* The Alternate Winnipeg controls the 2 133MHz buses [1 slot
75 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
76 * the "extra" buses for each of those slots: total 7 buses.
77 */
78 num_buses = 7;
79 break;
80 case LookOutAWPEG:
81 case LookOutBWPEG:
82 /* A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
83 * & the "extra" buses for each of those slots: total 9 buses.
84 */
85 num_buses = 9;
86 break;
87 default:
88 printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __FUNCTION__);
89 return last_bus;
90 }
91
92 for(bus = last_bus; bus < last_bus + num_buses; bus++)
93 mp_bus_id_to_node[bus] = node;
94 return bus;
95}
96
97static int __init build_detail_arrays(void)
98{
99 unsigned long ptr;
100 int i, scal_detail_size, rio_detail_size;
101
102 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
103 printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
104 return 0;
105 }
106
107 switch (rio_table_hdr->version){
108 default:
109 printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version);
110 return 0;
111 case 2:
112 scal_detail_size = 11;
113 rio_detail_size = 13;
114 break;
115 case 3:
116 scal_detail_size = 12;
117 rio_detail_size = 15;
118 break;
119 }
120
121 ptr = (unsigned long)rio_table_hdr + 3;
122 for(i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
123 scal_devs[i] = (struct scal_detail *)ptr;
124
125 for(i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
126 rio_devs[i] = (struct rio_detail *)ptr;
127
128 return 1;
129}
130
131void __init setup_summit(void)
132{
133 unsigned long ptr;
134 unsigned short offset;
135 int i, next_wpeg, next_bus = 0;
136
137 /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
138 ptr = *(unsigned short *)phys_to_virt(0x40Eul);
139 ptr = (unsigned long)phys_to_virt(ptr << 4);
140
141 rio_table_hdr = NULL;
142 offset = 0x180;
143 while (offset){
144 /* The block id is stored in the 2nd word */
145 if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
146 /* set the pointer past the offset & block id */
147 rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
148 break;
149 }
150 /* The next offset is stored in the 1st word. 0 means no more */
151 offset = *((unsigned short *)(ptr + offset));
152 }
153 if (!rio_table_hdr){
154 printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__);
155 return;
156 }
157
158 if (!build_detail_arrays())
159 return;
160
161 /* The first Winnipeg we're looking for has an index of 0 */
162 next_wpeg = 0;
163 do {
164 for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
165 if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg){
166 /* It's the Winnipeg we're looking for! */
167 next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
168 next_wpeg++;
169 break;
170 }
171 }
172 /*
173 * If we go through all Rio devices and don't find one with
174 * the next index, it means we've found all the Winnipegs,
175 * and thus all the PCI buses.
176 */
177 if (i == rio_table_hdr->num_rio_dev)
178 next_wpeg = 0;
179 } while (next_wpeg != 0);
180}
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
new file mode 100644
index 000000000000..a4a61976ecb9
--- /dev/null
+++ b/arch/i386/kernel/sys_i386.c
@@ -0,0 +1,252 @@
1/*
2 * linux/arch/i386/kernel/sys_i386.c
3 *
4 * This file contains various random system calls that
5 * have a non-standard calling sequence on the Linux/i386
6 * platform.
7 */
8
9#include <linux/errno.h>
10#include <linux/sched.h>
11#include <linux/mm.h>
12#include <linux/smp.h>
13#include <linux/smp_lock.h>
14#include <linux/sem.h>
15#include <linux/msg.h>
16#include <linux/shm.h>
17#include <linux/stat.h>
18#include <linux/syscalls.h>
19#include <linux/mman.h>
20#include <linux/file.h>
21#include <linux/utsname.h>
22
23#include <asm/uaccess.h>
24#include <asm/ipc.h>
25
26/*
27 * sys_pipe() is the normal C calling standard for creating
28 * a pipe. It's not the way Unix traditionally does this, though.
29 */
30asmlinkage int sys_pipe(unsigned long __user * fildes)
31{
32 int fd[2];
33 int error;
34
35 error = do_pipe(fd);
36 if (!error) {
37 if (copy_to_user(fildes, fd, 2*sizeof(int)))
38 error = -EFAULT;
39 }
40 return error;
41}
42
43/* common code for old and new mmaps */
44static inline long do_mmap2(
45 unsigned long addr, unsigned long len,
46 unsigned long prot, unsigned long flags,
47 unsigned long fd, unsigned long pgoff)
48{
49 int error = -EBADF;
50 struct file * file = NULL;
51
52 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
53 if (!(flags & MAP_ANONYMOUS)) {
54 file = fget(fd);
55 if (!file)
56 goto out;
57 }
58
59 down_write(&current->mm->mmap_sem);
60 error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
61 up_write(&current->mm->mmap_sem);
62
63 if (file)
64 fput(file);
65out:
66 return error;
67}
68
69asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
70 unsigned long prot, unsigned long flags,
71 unsigned long fd, unsigned long pgoff)
72{
73 return do_mmap2(addr, len, prot, flags, fd, pgoff);
74}
75
76/*
77 * Perform the select(nd, in, out, ex, tv) and mmap() system
78 * calls. Linux/i386 didn't use to be able to handle more than
79 * 4 system call parameters, so these system calls used a memory
80 * block for parameter passing..
81 */
82
83struct mmap_arg_struct {
84 unsigned long addr;
85 unsigned long len;
86 unsigned long prot;
87 unsigned long flags;
88 unsigned long fd;
89 unsigned long offset;
90};
91
92asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
93{
94 struct mmap_arg_struct a;
95 int err = -EFAULT;
96
97 if (copy_from_user(&a, arg, sizeof(a)))
98 goto out;
99
100 err = -EINVAL;
101 if (a.offset & ~PAGE_MASK)
102 goto out;
103
104 err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
105out:
106 return err;
107}
108
109
110struct sel_arg_struct {
111 unsigned long n;
112 fd_set __user *inp, *outp, *exp;
113 struct timeval __user *tvp;
114};
115
116asmlinkage int old_select(struct sel_arg_struct __user *arg)
117{
118 struct sel_arg_struct a;
119
120 if (copy_from_user(&a, arg, sizeof(a)))
121 return -EFAULT;
122 /* sys_select() does the appropriate kernel locking */
123 return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
124}
125
126/*
127 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
128 *
129 * This is really horribly ugly.
130 */
131asmlinkage int sys_ipc (uint call, int first, int second,
132 int third, void __user *ptr, long fifth)
133{
134 int version, ret;
135
136 version = call >> 16; /* hack for backward compatibility */
137 call &= 0xffff;
138
139 switch (call) {
140 case SEMOP:
141 return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
142 case SEMTIMEDOP:
143 return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
144 (const struct timespec __user *)fifth);
145
146 case SEMGET:
147 return sys_semget (first, second, third);
148 case SEMCTL: {
149 union semun fourth;
150 if (!ptr)
151 return -EINVAL;
152 if (get_user(fourth.__pad, (void __user * __user *) ptr))
153 return -EFAULT;
154 return sys_semctl (first, second, third, fourth);
155 }
156
157 case MSGSND:
158 return sys_msgsnd (first, (struct msgbuf __user *) ptr,
159 second, third);
160 case MSGRCV:
161 switch (version) {
162 case 0: {
163 struct ipc_kludge tmp;
164 if (!ptr)
165 return -EINVAL;
166
167 if (copy_from_user(&tmp,
168 (struct ipc_kludge __user *) ptr,
169 sizeof (tmp)))
170 return -EFAULT;
171 return sys_msgrcv (first, tmp.msgp, second,
172 tmp.msgtyp, third);
173 }
174 default:
175 return sys_msgrcv (first,
176 (struct msgbuf __user *) ptr,
177 second, fifth, third);
178 }
179 case MSGGET:
180 return sys_msgget ((key_t) first, second);
181 case MSGCTL:
182 return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
183
184 case SHMAT:
185 switch (version) {
186 default: {
187 ulong raddr;
188 ret = do_shmat (first, (char __user *) ptr, second, &raddr);
189 if (ret)
190 return ret;
191 return put_user (raddr, (ulong __user *) third);
192 }
193 case 1: /* iBCS2 emulator entry point */
194 if (!segment_eq(get_fs(), get_ds()))
195 return -EINVAL;
196 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
197 return do_shmat (first, (char __user *) ptr, second, (ulong *) third);
198 }
199 case SHMDT:
200 return sys_shmdt ((char __user *)ptr);
201 case SHMGET:
202 return sys_shmget (first, second, third);
203 case SHMCTL:
204 return sys_shmctl (first, second,
205 (struct shmid_ds __user *) ptr);
206 default:
207 return -ENOSYS;
208 }
209}
210
211/*
212 * Old cruft
213 */
214asmlinkage int sys_uname(struct old_utsname __user * name)
215{
216 int err;
217 if (!name)
218 return -EFAULT;
219 down_read(&uts_sem);
220 err=copy_to_user(name, &system_utsname, sizeof (*name));
221 up_read(&uts_sem);
222 return err?-EFAULT:0;
223}
224
225asmlinkage int sys_olduname(struct oldold_utsname __user * name)
226{
227 int error;
228
229 if (!name)
230 return -EFAULT;
231 if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
232 return -EFAULT;
233
234 down_read(&uts_sem);
235
236 error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
237 error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
238 error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
239 error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
240 error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
241 error |= __put_user(0,name->release+__OLD_UTS_LEN);
242 error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
243 error |= __put_user(0,name->version+__OLD_UTS_LEN);
244 error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
245 error |= __put_user(0,name->machine+__OLD_UTS_LEN);
246
247 up_read(&uts_sem);
248
249 error = error ? -EFAULT : 0;
250
251 return error;
252}
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
new file mode 100644
index 000000000000..960d8bd137d0
--- /dev/null
+++ b/arch/i386/kernel/sysenter.c
@@ -0,0 +1,65 @@
1/*
2 * linux/arch/i386/kernel/sysenter.c
3 *
4 * (C) Copyright 2002 Linus Torvalds
5 *
6 * This file contains the needed initializations to support sysenter.
7 */
8
9#include <linux/init.h>
10#include <linux/smp.h>
11#include <linux/thread_info.h>
12#include <linux/sched.h>
13#include <linux/gfp.h>
14#include <linux/string.h>
15#include <linux/elf.h>
16
17#include <asm/cpufeature.h>
18#include <asm/msr.h>
19#include <asm/pgtable.h>
20#include <asm/unistd.h>
21
22extern asmlinkage void sysenter_entry(void);
23
24void enable_sep_cpu(void *info)
25{
26 int cpu = get_cpu();
27 struct tss_struct *tss = &per_cpu(init_tss, cpu);
28
29 tss->ss1 = __KERNEL_CS;
30 tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
31 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
32 wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
33 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
34 put_cpu();
35}
36
37/*
38 * These symbols are defined by vsyscall.o to mark the bounds
39 * of the ELF DSO images included therein.
40 */
41extern const char vsyscall_int80_start, vsyscall_int80_end;
42extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
43
44static int __init sysenter_setup(void)
45{
46 void *page = (void *)get_zeroed_page(GFP_ATOMIC);
47
48 __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
49
50 if (!boot_cpu_has(X86_FEATURE_SEP)) {
51 memcpy(page,
52 &vsyscall_int80_start,
53 &vsyscall_int80_end - &vsyscall_int80_start);
54 return 0;
55 }
56
57 memcpy(page,
58 &vsyscall_sysenter_start,
59 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
60
61 on_each_cpu(enable_sep_cpu, NULL, 1, 1);
62 return 0;
63}
64
65__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
new file mode 100644
index 000000000000..9b55e30e4490
--- /dev/null
+++ b/arch/i386/kernel/time.c
@@ -0,0 +1,476 @@
1/*
2 * linux/arch/i386/kernel/time.c
3 *
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
5 *
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
31 */
32
33#include <linux/errno.h>
34#include <linux/sched.h>
35#include <linux/kernel.h>
36#include <linux/param.h>
37#include <linux/string.h>
38#include <linux/mm.h>
39#include <linux/interrupt.h>
40#include <linux/time.h>
41#include <linux/delay.h>
42#include <linux/init.h>
43#include <linux/smp.h>
44#include <linux/module.h>
45#include <linux/sysdev.h>
46#include <linux/bcd.h>
47#include <linux/efi.h>
48#include <linux/mca.h>
49
50#include <asm/io.h>
51#include <asm/smp.h>
52#include <asm/irq.h>
53#include <asm/msr.h>
54#include <asm/delay.h>
55#include <asm/mpspec.h>
56#include <asm/uaccess.h>
57#include <asm/processor.h>
58#include <asm/timer.h>
59
60#include "mach_time.h"
61
62#include <linux/timex.h>
63#include <linux/config.h>
64
65#include <asm/hpet.h>
66
67#include <asm/arch_hooks.h>
68
69#include "io_ports.h"
70
71extern spinlock_t i8259A_lock;
72int pit_latch_buggy; /* extern */
73
74#include "do_timer.h"
75
76u64 jiffies_64 = INITIAL_JIFFIES;
77
78EXPORT_SYMBOL(jiffies_64);
79
80unsigned long cpu_khz; /* Detected as we calibrate the TSC */
81
82extern unsigned long wall_jiffies;
83
84DEFINE_SPINLOCK(rtc_lock);
85
86DEFINE_SPINLOCK(i8253_lock);
87EXPORT_SYMBOL(i8253_lock);
88
89struct timer_opts *cur_timer = &timer_none;
90
91/*
92 * This is a special lock that is owned by the CPU and holds the index
93 * register we are working with. It is required for NMI access to the
94 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
95 */
96volatile unsigned long cmos_lock = 0;
97EXPORT_SYMBOL(cmos_lock);
98
99/* Routines for accessing the CMOS RAM/RTC. */
100unsigned char rtc_cmos_read(unsigned char addr)
101{
102 unsigned char val;
103 lock_cmos_prefix(addr);
104 outb_p(addr, RTC_PORT(0));
105 val = inb_p(RTC_PORT(1));
106 lock_cmos_suffix(addr);
107 return val;
108}
109EXPORT_SYMBOL(rtc_cmos_read);
110
111void rtc_cmos_write(unsigned char val, unsigned char addr)
112{
113 lock_cmos_prefix(addr);
114 outb_p(addr, RTC_PORT(0));
115 outb_p(val, RTC_PORT(1));
116 lock_cmos_suffix(addr);
117}
118EXPORT_SYMBOL(rtc_cmos_write);
119
120/*
121 * This version of gettimeofday has microsecond resolution
122 * and better than microsecond precision on fast x86 machines with TSC.
123 */
124void do_gettimeofday(struct timeval *tv)
125{
126 unsigned long seq;
127 unsigned long usec, sec;
128 unsigned long max_ntp_tick;
129
130 do {
131 unsigned long lost;
132
133 seq = read_seqbegin(&xtime_lock);
134
135 usec = cur_timer->get_offset();
136 lost = jiffies - wall_jiffies;
137
138 /*
139 * If time_adjust is negative then NTP is slowing the clock
140 * so make sure not to go into next possible interval.
141 * Better to lose some accuracy than have time go backwards..
142 */
143 if (unlikely(time_adjust < 0)) {
144 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
145 usec = min(usec, max_ntp_tick);
146
147 if (lost)
148 usec += lost * max_ntp_tick;
149 }
150 else if (unlikely(lost))
151 usec += lost * (USEC_PER_SEC / HZ);
152
153 sec = xtime.tv_sec;
154 usec += (xtime.tv_nsec / 1000);
155 } while (read_seqretry(&xtime_lock, seq));
156
157 while (usec >= 1000000) {
158 usec -= 1000000;
159 sec++;
160 }
161
162 tv->tv_sec = sec;
163 tv->tv_usec = usec;
164}
165
166EXPORT_SYMBOL(do_gettimeofday);
167
168int do_settimeofday(struct timespec *tv)
169{
170 time_t wtm_sec, sec = tv->tv_sec;
171 long wtm_nsec, nsec = tv->tv_nsec;
172
173 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
174 return -EINVAL;
175
176 write_seqlock_irq(&xtime_lock);
177 /*
178 * This is revolting. We need to set "xtime" correctly. However, the
179 * value in this location is the value at the most recent update of
180 * wall time. Discover what correction gettimeofday() would have
181 * made, and then undo it!
182 */
183 nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
184 nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
185
186 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
187 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
188
189 set_normalized_timespec(&xtime, sec, nsec);
190 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
191
192 time_adjust = 0; /* stop active adjtime() */
193 time_status |= STA_UNSYNC;
194 time_maxerror = NTP_PHASE_LIMIT;
195 time_esterror = NTP_PHASE_LIMIT;
196 write_sequnlock_irq(&xtime_lock);
197 clock_was_set();
198 return 0;
199}
200
201EXPORT_SYMBOL(do_settimeofday);
202
203static int set_rtc_mmss(unsigned long nowtime)
204{
205 int retval;
206
207 WARN_ON(irqs_disabled());
208
209 /* gets recalled with irq locally disabled */
210 spin_lock_irq(&rtc_lock);
211 if (efi_enabled)
212 retval = efi_set_rtc_mmss(nowtime);
213 else
214 retval = mach_set_rtc_mmss(nowtime);
215 spin_unlock_irq(&rtc_lock);
216
217 return retval;
218}
219
220
221int timer_ack;
222
223/* monotonic_clock(): returns # of nanoseconds passed since time_init()
224 * Note: This function is required to return accurate
225 * time even in the absence of multiple timer ticks.
226 */
227unsigned long long monotonic_clock(void)
228{
229 return cur_timer->monotonic_clock();
230}
231EXPORT_SYMBOL(monotonic_clock);
232
233#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
234unsigned long profile_pc(struct pt_regs *regs)
235{
236 unsigned long pc = instruction_pointer(regs);
237
238 if (in_lock_functions(pc))
239 return *(unsigned long *)(regs->ebp + 4);
240
241 return pc;
242}
243EXPORT_SYMBOL(profile_pc);
244#endif
245
246/*
247 * timer_interrupt() needs to keep up the real-time clock,
248 * as well as call the "do_timer()" routine every clocktick
249 */
250static inline void do_timer_interrupt(int irq, void *dev_id,
251 struct pt_regs *regs)
252{
253#ifdef CONFIG_X86_IO_APIC
254 if (timer_ack) {
255 /*
256 * Subtle, when I/O APICs are used we have to ack timer IRQ
257 * manually to reset the IRR bit for do_slow_gettimeoffset().
258 * This will also deassert NMI lines for the watchdog if run
259 * on an 82489DX-based system.
260 */
261 spin_lock(&i8259A_lock);
262 outb(0x0c, PIC_MASTER_OCW3);
263 /* Ack the IRQ; AEOI will end it automatically. */
264 inb(PIC_MASTER_POLL);
265 spin_unlock(&i8259A_lock);
266 }
267#endif
268
269 do_timer_interrupt_hook(regs);
270
271
272 if (MCA_bus) {
273 /* The PS/2 uses level-triggered interrupts. You can't
274 turn them off, nor would you want to (any attempt to
275 enable edge-triggered interrupts usually gets intercepted by a
276 special hardware circuit). Hence we have to acknowledge
277 the timer interrupt. Through some incredibly stupid
278 design idea, the reset for IRQ 0 is done by setting the
279 high bit of the PPI port B (0x61). Note that some PS/2s,
280 notably the 55SX, work fine if this is removed. */
281
282 irq = inb_p( 0x61 ); /* read the current state */
283 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
284 }
285}
286
287/*
288 * This is the same as the above, except we _also_ save the current
289 * Time Stamp Counter value at the time of the timer interrupt, so that
290 * we later on can estimate the time of day more exactly.
291 */
292irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
293{
294 /*
295 * Here we are in the timer irq handler. We just have irqs locally
296 * disabled but we don't know if the timer_bh is running on the other
297 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
298 * the irq version of write_lock because as just said we have irq
299 * locally disabled. -arca
300 */
301 write_seqlock(&xtime_lock);
302
303 cur_timer->mark_offset();
304
305 do_timer_interrupt(irq, NULL, regs);
306
307 write_sequnlock(&xtime_lock);
308 return IRQ_HANDLED;
309}
310
311/* not static: needed by APM */
312unsigned long get_cmos_time(void)
313{
314 unsigned long retval;
315
316 spin_lock(&rtc_lock);
317
318 if (efi_enabled)
319 retval = efi_get_time();
320 else
321 retval = mach_get_cmos_time();
322
323 spin_unlock(&rtc_lock);
324
325 return retval;
326}
327static void sync_cmos_clock(unsigned long dummy);
328
329static struct timer_list sync_cmos_timer =
330 TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
331
332static void sync_cmos_clock(unsigned long dummy)
333{
334 struct timeval now, next;
335 int fail = 1;
336
337 /*
338 * If we have an externally synchronized Linux clock, then update
339 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
340 * called as close as possible to 500 ms before the new second starts.
341 * This code is run on a timer. If the clock is set, that timer
342 * may not expire at the correct time. Thus, we adjust...
343 */
344 if ((time_status & STA_UNSYNC) != 0)
345 /*
346 * Not synced, exit, do not restart a timer (if one is
347 * running, let it run out).
348 */
349 return;
350
351 do_gettimeofday(&now);
352 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
353 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
354 fail = set_rtc_mmss(now.tv_sec);
355
356 next.tv_usec = USEC_AFTER - now.tv_usec;
357 if (next.tv_usec <= 0)
358 next.tv_usec += USEC_PER_SEC;
359
360 if (!fail)
361 next.tv_sec = 659;
362 else
363 next.tv_sec = 0;
364
365 if (next.tv_usec >= USEC_PER_SEC) {
366 next.tv_sec++;
367 next.tv_usec -= USEC_PER_SEC;
368 }
369 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
370}
371
372void notify_arch_cmos_timer(void)
373{
374 mod_timer(&sync_cmos_timer, jiffies + 1);
375}
376
377static long clock_cmos_diff, sleep_start;
378
379static int timer_suspend(struct sys_device *dev, u32 state)
380{
381 /*
382 * Estimate time zone so that set_time can update the clock
383 */
384 clock_cmos_diff = -get_cmos_time();
385 clock_cmos_diff += get_seconds();
386 sleep_start = get_cmos_time();
387 return 0;
388}
389
390static int timer_resume(struct sys_device *dev)
391{
392 unsigned long flags;
393 unsigned long sec;
394 unsigned long sleep_length;
395
396#ifdef CONFIG_HPET_TIMER
397 if (is_hpet_enabled())
398 hpet_reenable();
399#endif
400 sec = get_cmos_time() + clock_cmos_diff;
401 sleep_length = (get_cmos_time() - sleep_start) * HZ;
402 write_seqlock_irqsave(&xtime_lock, flags);
403 xtime.tv_sec = sec;
404 xtime.tv_nsec = 0;
405 write_sequnlock_irqrestore(&xtime_lock, flags);
406 jiffies += sleep_length;
407 wall_jiffies += sleep_length;
408 return 0;
409}
410
411static struct sysdev_class timer_sysclass = {
412 .resume = timer_resume,
413 .suspend = timer_suspend,
414 set_kset_name("timer"),
415};
416
417
418/* XXX this driverfs stuff should probably go elsewhere later -john */
419static struct sys_device device_timer = {
420 .id = 0,
421 .cls = &timer_sysclass,
422};
423
424static int time_init_device(void)
425{
426 int error = sysdev_class_register(&timer_sysclass);
427 if (!error)
428 error = sysdev_register(&device_timer);
429 return error;
430}
431
432device_initcall(time_init_device);
433
434#ifdef CONFIG_HPET_TIMER
435extern void (*late_time_init)(void);
436/* Duplicate of time_init() below, with hpet_enable part added */
437static void __init hpet_time_init(void)
438{
439 xtime.tv_sec = get_cmos_time();
440 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
441 set_normalized_timespec(&wall_to_monotonic,
442 -xtime.tv_sec, -xtime.tv_nsec);
443
444 if (hpet_enable() >= 0) {
445 printk("Using HPET for base-timer\n");
446 }
447
448 cur_timer = select_timer();
449 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
450
451 time_init_hook();
452}
453#endif
454
455void __init time_init(void)
456{
457#ifdef CONFIG_HPET_TIMER
458 if (is_hpet_capable()) {
459 /*
460 * HPET initialization needs to do memory-mapped io. So, let
461 * us do a late initialization after mem_init().
462 */
463 late_time_init = hpet_time_init;
464 return;
465 }
466#endif
467 xtime.tv_sec = get_cmos_time();
468 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
469 set_normalized_timespec(&wall_to_monotonic,
470 -xtime.tv_sec, -xtime.tv_nsec);
471
472 cur_timer = select_timer();
473 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
474
475 time_init_hook();
476}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
new file mode 100644
index 000000000000..244a31b04be7
--- /dev/null
+++ b/arch/i386/kernel/time_hpet.c
@@ -0,0 +1,458 @@
1/*
2 * linux/arch/i386/kernel/time_hpet.c
3 * This code largely copied from arch/x86_64/kernel/time.c
4 * See that file for credits.
5 *
6 * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support
7 */
8
9#include <linux/errno.h>
10#include <linux/kernel.h>
11#include <linux/param.h>
12#include <linux/string.h>
13#include <linux/init.h>
14#include <linux/smp.h>
15
16#include <asm/timer.h>
17#include <asm/fixmap.h>
18#include <asm/apic.h>
19
20#include <linux/timex.h>
21#include <linux/config.h>
22
23#include <asm/hpet.h>
24#include <linux/hpet.h>
25
26static unsigned long hpet_period; /* fsecs / HPET clock */
27unsigned long hpet_tick; /* hpet clks count per tick */
28unsigned long hpet_address; /* hpet memory map physical address */
29
30static int use_hpet; /* can be used for runtime check of hpet */
31static int boot_hpet_disable; /* boottime override for HPET timer */
32static void __iomem * hpet_virt_address; /* hpet kernel virtual address */
33
34#define FSEC_TO_USEC (1000000000UL)
35
36int hpet_readl(unsigned long a)
37{
38 return readl(hpet_virt_address + a);
39}
40
41static void hpet_writel(unsigned long d, unsigned long a)
42{
43 writel(d, hpet_virt_address + a);
44}
45
46#ifdef CONFIG_X86_LOCAL_APIC
47/*
48 * HPET counters dont wrap around on every tick. They just change the
49 * comparator value and continue. Next tick can be caught by checking
50 * for a change in the comparator value. Used in apic.c.
51 */
52static void __init wait_hpet_tick(void)
53{
54 unsigned int start_cmp_val, end_cmp_val;
55
56 start_cmp_val = hpet_readl(HPET_T0_CMP);
57 do {
58 end_cmp_val = hpet_readl(HPET_T0_CMP);
59 } while (start_cmp_val == end_cmp_val);
60}
61#endif
62
63static int hpet_timer_stop_set_go(unsigned long tick)
64{
65 unsigned int cfg;
66
67 /*
68 * Stop the timers and reset the main counter.
69 */
70 cfg = hpet_readl(HPET_CFG);
71 cfg &= ~HPET_CFG_ENABLE;
72 hpet_writel(cfg, HPET_CFG);
73 hpet_writel(0, HPET_COUNTER);
74 hpet_writel(0, HPET_COUNTER + 4);
75
76 /*
77 * Set up timer 0, as periodic with first interrupt to happen at
78 * hpet_tick, and period also hpet_tick.
79 */
80 cfg = hpet_readl(HPET_T0_CFG);
81 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
82 HPET_TN_SETVAL | HPET_TN_32BIT;
83 hpet_writel(cfg, HPET_T0_CFG);
84
85 /*
86 * The first write after writing TN_SETVAL to the config register sets
87 * the counter value, the second write sets the threshold.
88 */
89 hpet_writel(tick, HPET_T0_CMP);
90 hpet_writel(tick, HPET_T0_CMP);
91
92 /*
93 * Go!
94 */
95 cfg = hpet_readl(HPET_CFG);
96 cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
97 hpet_writel(cfg, HPET_CFG);
98
99 return 0;
100}
101
102/*
103 * Check whether HPET was found by ACPI boot parse. If yes setup HPET
104 * counter 0 for kernel base timer.
105 */
106int __init hpet_enable(void)
107{
108 unsigned int id;
109 unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */
110 unsigned long hpet_tick_rem;
111
112 if (boot_hpet_disable)
113 return -1;
114
115 if (!hpet_address) {
116 return -1;
117 }
118 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
119 /*
120 * Read the period, compute tick and quotient.
121 */
122 id = hpet_readl(HPET_ID);
123
124 /*
125 * We are checking for value '1' or more in number field if
126 * CONFIG_HPET_EMULATE_RTC is set because we will need an
127 * additional timer for RTC emulation.
128 * However, we can do with one timer otherwise using the
129 * the single HPET timer for system time.
130 */
131 if (
132#ifdef CONFIG_HPET_EMULATE_RTC
133 !(id & HPET_ID_NUMBER) ||
134#endif
135 !(id & HPET_ID_LEGSUP))
136 return -1;
137
138 hpet_period = hpet_readl(HPET_PERIOD);
139 if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD))
140 return -1;
141
142 /*
143 * 64 bit math
144 * First changing tick into fsec
145 * Then 64 bit div to find number of hpet clk per tick
146 */
147 ASM_MUL64_REG(tick_fsec_low, tick_fsec_high,
148 KERNEL_TICK_USEC, FSEC_TO_USEC);
149 ASM_DIV64_REG(hpet_tick, hpet_tick_rem,
150 hpet_period, tick_fsec_low, tick_fsec_high);
151
152 if (hpet_tick_rem > (hpet_period >> 1))
153 hpet_tick++; /* rounding the result */
154
155 if (hpet_timer_stop_set_go(hpet_tick))
156 return -1;
157
158 use_hpet = 1;
159
160#ifdef CONFIG_HPET
161 {
162 struct hpet_data hd;
163 unsigned int ntimer;
164
165 memset(&hd, 0, sizeof (hd));
166
167 ntimer = hpet_readl(HPET_ID);
168 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
169 ntimer++;
170
171 /*
172 * Register with driver.
173 * Timer0 and Timer1 is used by platform.
174 */
175 hd.hd_phys_address = hpet_address;
176 hd.hd_address = hpet_virt_address;
177 hd.hd_nirqs = ntimer;
178 hd.hd_flags = HPET_DATA_PLATFORM;
179 hpet_reserve_timer(&hd, 0);
180#ifdef CONFIG_HPET_EMULATE_RTC
181 hpet_reserve_timer(&hd, 1);
182#endif
183 hd.hd_irq[0] = HPET_LEGACY_8254;
184 hd.hd_irq[1] = HPET_LEGACY_RTC;
185 if (ntimer > 2) {
186 struct hpet __iomem *hpet;
187 struct hpet_timer __iomem *timer;
188 int i;
189
190 hpet = hpet_virt_address;
191
192 for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
193 timer++, i++)
194 hd.hd_irq[i] = (timer->hpet_config &
195 Tn_INT_ROUTE_CNF_MASK) >>
196 Tn_INT_ROUTE_CNF_SHIFT;
197
198 }
199
200 hpet_alloc(&hd);
201 }
202#endif
203
204#ifdef CONFIG_X86_LOCAL_APIC
205 wait_timer_tick = wait_hpet_tick;
206#endif
207 return 0;
208}
209
210int hpet_reenable(void)
211{
212 return hpet_timer_stop_set_go(hpet_tick);
213}
214
215int is_hpet_enabled(void)
216{
217 return use_hpet;
218}
219
220int is_hpet_capable(void)
221{
222 if (!boot_hpet_disable && hpet_address)
223 return 1;
224 return 0;
225}
226
227static int __init hpet_setup(char* str)
228{
229 if (str) {
230 if (!strncmp("disable", str, 7))
231 boot_hpet_disable = 1;
232 }
233 return 1;
234}
235
236__setup("hpet=", hpet_setup);
237
238#ifdef CONFIG_HPET_EMULATE_RTC
239/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
240 * is enabled, we support RTC interrupt functionality in software.
241 * RTC has 3 kinds of interrupts:
242 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
243 * is updated
244 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
245 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
246 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
247 * (1) and (2) above are implemented using polling at a frequency of
248 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
249 * overhead. (DEFAULT_RTC_INT_FREQ)
250 * For (3), we use interrupts at 64Hz or user specified periodic
251 * frequency, whichever is higher.
252 */
253#include <linux/mc146818rtc.h>
254#include <linux/rtc.h>
255
256extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
257
258#define DEFAULT_RTC_INT_FREQ 64
259#define RTC_NUM_INTS 1
260
261static unsigned long UIE_on;
262static unsigned long prev_update_sec;
263
264static unsigned long AIE_on;
265static struct rtc_time alarm_time;
266
267static unsigned long PIE_on;
268static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
269static unsigned long PIE_count;
270
271static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
272
273/*
274 * Timer 1 for RTC, we do not use periodic interrupt feature,
275 * even if HPET supports periodic interrupts on Timer 1.
276 * The reason being, to set up a periodic interrupt in HPET, we need to
277 * stop the main counter. And if we do that everytime someone diables/enables
278 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
279 * So, for the time being, simulate the periodic interrupt in software.
280 *
281 * hpet_rtc_timer_init() is called for the first time and during subsequent
282 * interuppts reinit happens through hpet_rtc_timer_reinit().
283 */
284int hpet_rtc_timer_init(void)
285{
286 unsigned int cfg, cnt;
287 unsigned long flags;
288
289 if (!is_hpet_enabled())
290 return 0;
291 /*
292 * Set the counter 1 and enable the interrupts.
293 */
294 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
295 hpet_rtc_int_freq = PIE_freq;
296 else
297 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
298
299 local_irq_save(flags);
300 cnt = hpet_readl(HPET_COUNTER);
301 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
302 hpet_writel(cnt, HPET_T1_CMP);
303 local_irq_restore(flags);
304
305 cfg = hpet_readl(HPET_T1_CFG);
306 cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
307 hpet_writel(cfg, HPET_T1_CFG);
308
309 return 1;
310}
311
312static void hpet_rtc_timer_reinit(void)
313{
314 unsigned int cfg, cnt;
315
316 if (!(PIE_on | AIE_on | UIE_on))
317 return;
318
319 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
320 hpet_rtc_int_freq = PIE_freq;
321 else
322 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
323
324 /* It is more accurate to use the comparator value than current count.*/
325 cnt = hpet_readl(HPET_T1_CMP);
326 cnt += hpet_tick*HZ/hpet_rtc_int_freq;
327 hpet_writel(cnt, HPET_T1_CMP);
328
329 cfg = hpet_readl(HPET_T1_CFG);
330 cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
331 hpet_writel(cfg, HPET_T1_CFG);
332
333 return;
334}
335
336/*
337 * The functions below are called from rtc driver.
338 * Return 0 if HPET is not being used.
339 * Otherwise do the necessary changes and return 1.
340 */
341int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
342{
343 if (!is_hpet_enabled())
344 return 0;
345
346 if (bit_mask & RTC_UIE)
347 UIE_on = 0;
348 if (bit_mask & RTC_PIE)
349 PIE_on = 0;
350 if (bit_mask & RTC_AIE)
351 AIE_on = 0;
352
353 return 1;
354}
355
356int hpet_set_rtc_irq_bit(unsigned long bit_mask)
357{
358 int timer_init_reqd = 0;
359
360 if (!is_hpet_enabled())
361 return 0;
362
363 if (!(PIE_on | AIE_on | UIE_on))
364 timer_init_reqd = 1;
365
366 if (bit_mask & RTC_UIE) {
367 UIE_on = 1;
368 }
369 if (bit_mask & RTC_PIE) {
370 PIE_on = 1;
371 PIE_count = 0;
372 }
373 if (bit_mask & RTC_AIE) {
374 AIE_on = 1;
375 }
376
377 if (timer_init_reqd)
378 hpet_rtc_timer_init();
379
380 return 1;
381}
382
383int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
384{
385 if (!is_hpet_enabled())
386 return 0;
387
388 alarm_time.tm_hour = hrs;
389 alarm_time.tm_min = min;
390 alarm_time.tm_sec = sec;
391
392 return 1;
393}
394
395int hpet_set_periodic_freq(unsigned long freq)
396{
397 if (!is_hpet_enabled())
398 return 0;
399
400 PIE_freq = freq;
401 PIE_count = 0;
402
403 return 1;
404}
405
406int hpet_rtc_dropped_irq(void)
407{
408 if (!is_hpet_enabled())
409 return 0;
410
411 return 1;
412}
413
414irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
415{
416 struct rtc_time curr_time;
417 unsigned long rtc_int_flag = 0;
418 int call_rtc_interrupt = 0;
419
420 hpet_rtc_timer_reinit();
421
422 if (UIE_on | AIE_on) {
423 rtc_get_rtc_time(&curr_time);
424 }
425 if (UIE_on) {
426 if (curr_time.tm_sec != prev_update_sec) {
427 /* Set update int info, call real rtc int routine */
428 call_rtc_interrupt = 1;
429 rtc_int_flag = RTC_UF;
430 prev_update_sec = curr_time.tm_sec;
431 }
432 }
433 if (PIE_on) {
434 PIE_count++;
435 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
436 /* Set periodic int info, call real rtc int routine */
437 call_rtc_interrupt = 1;
438 rtc_int_flag |= RTC_PF;
439 PIE_count = 0;
440 }
441 }
442 if (AIE_on) {
443 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
444 (curr_time.tm_min == alarm_time.tm_min) &&
445 (curr_time.tm_hour == alarm_time.tm_hour)) {
446 /* Set alarm int info, call real rtc int routine */
447 call_rtc_interrupt = 1;
448 rtc_int_flag |= RTC_AF;
449 }
450 }
451 if (call_rtc_interrupt) {
452 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
453 rtc_interrupt(rtc_int_flag, dev_id, regs);
454 }
455 return IRQ_HANDLED;
456}
457#endif
458
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
new file mode 100644
index 000000000000..8fa12be658dd
--- /dev/null
+++ b/arch/i386/kernel/timers/Makefile
@@ -0,0 +1,9 @@
1#
2# Makefile for x86 timers
3#
4
5obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
6
7obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
8obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
9obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
new file mode 100644
index 000000000000..f7f90005e22e
--- /dev/null
+++ b/arch/i386/kernel/timers/common.c
@@ -0,0 +1,160 @@
1/*
2 * Common functions used across the timers go here
3 */
4
5#include <linux/init.h>
6#include <linux/timex.h>
7#include <linux/errno.h>
8#include <linux/jiffies.h>
9
10#include <asm/io.h>
11#include <asm/timer.h>
12#include <asm/hpet.h>
13
14#include "mach_timer.h"
15
16/* ------ Calibrate the TSC -------
17 * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
18 * Too much 64-bit arithmetic here to do this cleanly in C, and for
19 * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
20 * output busy loop as low as possible. We avoid reading the CTC registers
21 * directly because of the awkward 8-bit access mechanism of the 82C54
22 * device.
23 */
24
25#define CALIBRATE_TIME (5 * 1000020/HZ)
26
27unsigned long __init calibrate_tsc(void)
28{
29 mach_prepare_counter();
30
31 {
32 unsigned long startlow, starthigh;
33 unsigned long endlow, endhigh;
34 unsigned long count;
35
36 rdtsc(startlow,starthigh);
37 mach_countup(&count);
38 rdtsc(endlow,endhigh);
39
40
41 /* Error: ECTCNEVERSET */
42 if (count <= 1)
43 goto bad_ctc;
44
45 /* 64-bit subtract - gcc just messes up with long longs */
46 __asm__("subl %2,%0\n\t"
47 "sbbl %3,%1"
48 :"=a" (endlow), "=d" (endhigh)
49 :"g" (startlow), "g" (starthigh),
50 "0" (endlow), "1" (endhigh));
51
52 /* Error: ECPUTOOFAST */
53 if (endhigh)
54 goto bad_ctc;
55
56 /* Error: ECPUTOOSLOW */
57 if (endlow <= CALIBRATE_TIME)
58 goto bad_ctc;
59
60 __asm__("divl %2"
61 :"=a" (endlow), "=d" (endhigh)
62 :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
63
64 return endlow;
65 }
66
67 /*
68 * The CTC wasn't reliable: we got a hit on the very first read,
69 * or the CPU was so fast/slow that the quotient wouldn't fit in
70 * 32 bits..
71 */
72bad_ctc:
73 return 0;
74}
75
76#ifdef CONFIG_HPET_TIMER
77/* ------ Calibrate the TSC using HPET -------
78 * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
79 * Second output is parameter 1 (when non NULL)
80 * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
81 * calibrate_tsc() calibrates the processor TSC by comparing
82 * it to the HPET timer of known frequency.
83 * Too much 64-bit arithmetic here to do this cleanly in C
84 */
85#define CALIBRATE_CNT_HPET (5 * hpet_tick)
86#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
87
88unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
89{
90 unsigned long tsc_startlow, tsc_starthigh;
91 unsigned long tsc_endlow, tsc_endhigh;
92 unsigned long hpet_start, hpet_end;
93 unsigned long result, remain;
94
95 hpet_start = hpet_readl(HPET_COUNTER);
96 rdtsc(tsc_startlow, tsc_starthigh);
97 do {
98 hpet_end = hpet_readl(HPET_COUNTER);
99 } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
100 rdtsc(tsc_endlow, tsc_endhigh);
101
102 /* 64-bit subtract - gcc just messes up with long longs */
103 __asm__("subl %2,%0\n\t"
104 "sbbl %3,%1"
105 :"=a" (tsc_endlow), "=d" (tsc_endhigh)
106 :"g" (tsc_startlow), "g" (tsc_starthigh),
107 "0" (tsc_endlow), "1" (tsc_endhigh));
108
109 /* Error: ECPUTOOFAST */
110 if (tsc_endhigh)
111 goto bad_calibration;
112
113 /* Error: ECPUTOOSLOW */
114 if (tsc_endlow <= CALIBRATE_TIME_HPET)
115 goto bad_calibration;
116
117 ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
118 if (remain > (tsc_endlow >> 1))
119 result++; /* rounding the result */
120
121 if (tsc_hpet_quotient_ptr) {
122 unsigned long tsc_hpet_quotient;
123
124 ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
125 CALIBRATE_CNT_HPET);
126 if (remain > (tsc_endlow >> 1))
127 tsc_hpet_quotient++; /* rounding the result */
128 *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
129 }
130
131 return result;
132bad_calibration:
133 /*
134 * the CPU was so fast/slow that the quotient wouldn't fit in
135 * 32 bits..
136 */
137 return 0;
138}
139#endif
140
141/* calculate cpu_khz */
142void __init init_cpu_khz(void)
143{
144 if (cpu_has_tsc) {
145 unsigned long tsc_quotient = calibrate_tsc();
146 if (tsc_quotient) {
147 /* report CPU clock rate in Hz.
148 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
149 * clock/second. Our precision is about 100 ppm.
150 */
151 { unsigned long eax=0, edx=1000;
152 __asm__("divl %2"
153 :"=a" (cpu_khz), "=d" (edx)
154 :"r" (tsc_quotient),
155 "0" (eax), "1" (edx));
156 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
157 }
158 }
159 }
160}
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
new file mode 100644
index 000000000000..a3d6a288088b
--- /dev/null
+++ b/arch/i386/kernel/timers/timer.c
@@ -0,0 +1,66 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/string.h>
4#include <asm/timer.h>
5
6#ifdef CONFIG_HPET_TIMER
7/*
8 * HPET memory read is slower than tsc reads, but is more dependable as it
9 * always runs at constant frequency and reduces complexity due to
10 * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
11 * timer_pit when HPET is active. So, we default to timer_tsc.
12 */
13#endif
14/* list of timers, ordered by preference, NULL terminated */
15static struct init_timer_opts* __initdata timers[] = {
16#ifdef CONFIG_X86_CYCLONE_TIMER
17 &timer_cyclone_init,
18#endif
19#ifdef CONFIG_HPET_TIMER
20 &timer_hpet_init,
21#endif
22#ifdef CONFIG_X86_PM_TIMER
23 &timer_pmtmr_init,
24#endif
25 &timer_tsc_init,
26 &timer_pit_init,
27 NULL,
28};
29
30static char clock_override[10] __initdata;
31
32static int __init clock_setup(char* str)
33{
34 if (str)
35 strlcpy(clock_override, str, sizeof(clock_override));
36 return 1;
37}
38__setup("clock=", clock_setup);
39
40
41/* The chosen timesource has been found to be bad.
42 * Fall back to a known good timesource (the PIT)
43 */
44void clock_fallback(void)
45{
46 cur_timer = &timer_pit;
47}
48
49/* iterates through the list of timers, returning the first
50 * one that initializes successfully.
51 */
52struct timer_opts* __init select_timer(void)
53{
54 int i = 0;
55
56 /* find most preferred working timer */
57 while (timers[i]) {
58 if (timers[i]->init)
59 if (timers[i]->init(clock_override) == 0)
60 return timers[i]->opts;
61 ++i;
62 }
63
64 panic("select_timer: Cannot find a suitable timer\n");
65 return NULL;
66}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
new file mode 100644
index 000000000000..f6f1206a11bb
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -0,0 +1,259 @@
1/* Cyclone-timer:
2 * This code implements timer_ops for the cyclone counter found
3 * on IBM x440, x360, and other Summit based systems.
4 *
5 * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
6 */
7
8
9#include <linux/spinlock.h>
10#include <linux/init.h>
11#include <linux/timex.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/jiffies.h>
15
16#include <asm/timer.h>
17#include <asm/io.h>
18#include <asm/pgtable.h>
19#include <asm/fixmap.h>
20#include "io_ports.h"
21
22extern spinlock_t i8253_lock;
23
24/* Number of usecs that the last interrupt was delayed */
25static int delay_at_last_interrupt;
26
27#define CYCLONE_CBAR_ADDR 0xFEB00CD0
28#define CYCLONE_PMCC_OFFSET 0x51A0
29#define CYCLONE_MPMC_OFFSET 0x51D0
30#define CYCLONE_MPCS_OFFSET 0x51A8
31#define CYCLONE_TIMER_FREQ 100000000
32#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
33int use_cyclone = 0;
34
35static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
36static u32 last_cyclone_low;
37static u32 last_cyclone_high;
38static unsigned long long monotonic_base;
39static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
40
41/* helper macro to atomically read both cyclone counter registers */
42#define read_cyclone_counter(low,high) \
43 do{ \
44 high = cyclone_timer[1]; low = cyclone_timer[0]; \
45 } while (high != cyclone_timer[1]);
46
47
48static void mark_offset_cyclone(void)
49{
50 unsigned long lost, delay;
51 unsigned long delta = last_cyclone_low;
52 int count;
53 unsigned long long this_offset, last_offset;
54
55 write_seqlock(&monotonic_lock);
56 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
57
58 spin_lock(&i8253_lock);
59 read_cyclone_counter(last_cyclone_low,last_cyclone_high);
60
61 /* read values for delay_at_last_interrupt */
62 outb_p(0x00, 0x43); /* latch the count ASAP */
63
64 count = inb_p(0x40); /* read the latched count */
65 count |= inb(0x40) << 8;
66
67 /*
68 * VIA686a test code... reset the latch if count > max + 1
69 * from timer_pit.c - cjb
70 */
71 if (count > LATCH) {
72 outb_p(0x34, PIT_MODE);
73 outb_p(LATCH & 0xff, PIT_CH0);
74 outb(LATCH >> 8, PIT_CH0);
75 count = LATCH - 1;
76 }
77 spin_unlock(&i8253_lock);
78
79 /* lost tick compensation */
80 delta = last_cyclone_low - delta;
81 delta /= (CYCLONE_TIMER_FREQ/1000000);
82 delta += delay_at_last_interrupt;
83 lost = delta/(1000000/HZ);
84 delay = delta%(1000000/HZ);
85 if (lost >= 2)
86 jiffies_64 += lost-1;
87
88 /* update the monotonic base value */
89 this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
90 monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
91 write_sequnlock(&monotonic_lock);
92
93 /* calculate delay_at_last_interrupt */
94 count = ((LATCH-1) - count) * TICK_SIZE;
95 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
96
97
98 /* catch corner case where tick rollover occured
99 * between cyclone and pit reads (as noted when
100 * usec delta is > 90% # of usecs/tick)
101 */
102 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
103 jiffies_64++;
104}
105
106static unsigned long get_offset_cyclone(void)
107{
108 u32 offset;
109
110 if(!cyclone_timer)
111 return delay_at_last_interrupt;
112
113 /* Read the cyclone timer */
114 offset = cyclone_timer[0];
115
116 /* .. relative to previous jiffy */
117 offset = offset - last_cyclone_low;
118
119 /* convert cyclone ticks to microseconds */
120 /* XXX slow, can we speed this up? */
121 offset = offset/(CYCLONE_TIMER_FREQ/1000000);
122
123 /* our adjusted time offset in microseconds */
124 return delay_at_last_interrupt + offset;
125}
126
127static unsigned long long monotonic_clock_cyclone(void)
128{
129 u32 now_low, now_high;
130 unsigned long long last_offset, this_offset, base;
131 unsigned long long ret;
132 unsigned seq;
133
134 /* atomically read monotonic base & last_offset */
135 do {
136 seq = read_seqbegin(&monotonic_lock);
137 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
138 base = monotonic_base;
139 } while (read_seqretry(&monotonic_lock, seq));
140
141
142 /* Read the cyclone counter */
143 read_cyclone_counter(now_low,now_high);
144 this_offset = ((unsigned long long)now_high<<32)|now_low;
145
146 /* convert to nanoseconds */
147 ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
148 return ret * (1000000000 / CYCLONE_TIMER_FREQ);
149}
150
151static int __init init_cyclone(char* override)
152{
153 u32* reg;
154 u32 base; /* saved cyclone base address */
155 u32 pageaddr; /* page that contains cyclone_timer register */
156 u32 offset; /* offset from pageaddr to cyclone_timer register */
157 int i;
158
159 /* check clock override */
160 if (override[0] && strncmp(override,"cyclone",7))
161 return -ENODEV;
162
163 /*make sure we're on a summit box*/
164 if(!use_cyclone) return -ENODEV;
165
166 printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
167
168 /* find base address */
169 pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
170 offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
171 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
172 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
173 if(!reg){
174 printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
175 return -ENODEV;
176 }
177 base = *reg;
178 if(!base){
179 printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
180 return -ENODEV;
181 }
182
183 /* setup PMCC */
184 pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
185 offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
186 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
187 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
188 if(!reg){
189 printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
190 return -ENODEV;
191 }
192 reg[0] = 0x00000001;
193
194 /* setup MPCS */
195 pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
196 offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
197 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
198 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
199 if(!reg){
200 printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
201 return -ENODEV;
202 }
203 reg[0] = 0x00000001;
204
205 /* map in cyclone_timer */
206 pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
207 offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
208 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
209 cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
210 if(!cyclone_timer){
211 printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
212 return -ENODEV;
213 }
214
215 /*quick test to make sure its ticking*/
216 for(i=0; i<3; i++){
217 u32 old = cyclone_timer[0];
218 int stall = 100;
219 while(stall--) barrier();
220 if(cyclone_timer[0] == old){
221 printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
222 cyclone_timer = 0;
223 return -ENODEV;
224 }
225 }
226
227 init_cpu_khz();
228
229 /* Everything looks good! */
230 return 0;
231}
232
233
234static void delay_cyclone(unsigned long loops)
235{
236 unsigned long bclock, now;
237 if(!cyclone_timer)
238 return;
239 bclock = cyclone_timer[0];
240 do {
241 rep_nop();
242 now = cyclone_timer[0];
243 } while ((now-bclock) < loops);
244}
245/************************************************************/
246
247/* cyclone timer_opts struct */
248static struct timer_opts timer_cyclone = {
249 .name = "cyclone",
250 .mark_offset = mark_offset_cyclone,
251 .get_offset = get_offset_cyclone,
252 .monotonic_clock = monotonic_clock_cyclone,
253 .delay = delay_cyclone,
254};
255
256struct init_timer_opts __initdata timer_cyclone_init = {
257 .init = init_cyclone,
258 .opts = &timer_cyclone,
259};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
new file mode 100644
index 000000000000..713134e71844
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -0,0 +1,191 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/init.h>
8#include <linux/timex.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/jiffies.h>
12
13#include <asm/timer.h>
14#include <asm/io.h>
15#include <asm/processor.h>
16
17#include "io_ports.h"
18#include "mach_timer.h"
19#include <asm/hpet.h>
20
21static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */
22static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */
23static unsigned long hpet_last; /* hpet counter value at last tick*/
24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
25static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
26static unsigned long long monotonic_base;
27static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
28
29/* convert from cycles(64bits) => nanoseconds (64bits)
30 * basic equation:
31 * ns = cycles / (freq / ns_per_sec)
32 * ns = cycles * (ns_per_sec / freq)
33 * ns = cycles * (10^9 / (cpu_mhz * 10^6))
34 * ns = cycles * (10^3 / cpu_mhz)
35 *
36 * Then we use scaling math (suggested by george@mvista.com) to get:
37 * ns = cycles * (10^3 * SC / cpu_mhz) / SC
38 * ns = cycles * cyc2ns_scale / SC
39 *
40 * And since SC is a constant power of two, we can convert the div
41 * into a shift.
42 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
43 */
44static unsigned long cyc2ns_scale;
45#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
46
47static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
48{
49 cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
50}
51
52static inline unsigned long long cycles_2_ns(unsigned long long cyc)
53{
54 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
55}
56
57static unsigned long long monotonic_clock_hpet(void)
58{
59 unsigned long long last_offset, this_offset, base;
60 unsigned seq;
61
62 /* atomically read monotonic base & last_offset */
63 do {
64 seq = read_seqbegin(&monotonic_lock);
65 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
66 base = monotonic_base;
67 } while (read_seqretry(&monotonic_lock, seq));
68
69 /* Read the Time Stamp Counter */
70 rdtscll(this_offset);
71
72 /* return the value in ns */
73 return base + cycles_2_ns(this_offset - last_offset);
74}
75
76static unsigned long get_offset_hpet(void)
77{
78 register unsigned long eax, edx;
79
80 eax = hpet_readl(HPET_COUNTER);
81 eax -= hpet_last; /* hpet delta */
82
83 /*
84 * Time offset = (hpet delta) * ( usecs per HPET clock )
85 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
86 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
87 *
88 * Where,
89 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
90 *
91 * Using a mull instead of a divl saves some cycles in critical path.
92 */
93 ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
94
95 /* our adjusted time offset in microseconds */
96 return edx;
97}
98
99static void mark_offset_hpet(void)
100{
101 unsigned long long this_offset, last_offset;
102 unsigned long offset;
103
104 write_seqlock(&monotonic_lock);
105 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
106 rdtsc(last_tsc_low, last_tsc_high);
107
108 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
109 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
110 int lost_ticks = (offset - hpet_last) / hpet_tick;
111 jiffies_64 += lost_ticks;
112 }
113 hpet_last = offset;
114
115 /* update the monotonic base value */
116 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
117 monotonic_base += cycles_2_ns(this_offset - last_offset);
118 write_sequnlock(&monotonic_lock);
119}
120
121static void delay_hpet(unsigned long loops)
122{
123 unsigned long hpet_start, hpet_end;
124 unsigned long eax;
125
126 /* loops is the number of cpu cycles. Convert it to hpet clocks */
127 ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
128
129 hpet_start = hpet_readl(HPET_COUNTER);
130 do {
131 rep_nop();
132 hpet_end = hpet_readl(HPET_COUNTER);
133 } while ((hpet_end - hpet_start) < (loops));
134}
135
136static int __init init_hpet(char* override)
137{
138 unsigned long result, remain;
139
140 /* check clock override */
141 if (override[0] && strncmp(override,"hpet",4))
142 return -ENODEV;
143
144 if (!is_hpet_enabled())
145 return -ENODEV;
146
147 printk("Using HPET for gettimeofday\n");
148 if (cpu_has_tsc) {
149 unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
150 if (tsc_quotient) {
151 /* report CPU clock rate in Hz.
152 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
153 * clock/second. Our precision is about 100 ppm.
154 */
155 { unsigned long eax=0, edx=1000;
156 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
157 eax, edx);
158 printk("Detected %lu.%03lu MHz processor.\n",
159 cpu_khz / 1000, cpu_khz % 1000);
160 }
161 set_cyc2ns_scale(cpu_khz/1000);
162 }
163 }
164
165 /*
166 * Math to calculate hpet to usec multiplier
167 * Look for the comments at get_offset_hpet()
168 */
169 ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
170 if (remain > (hpet_tick >> 1))
171 result++; /* rounding the result */
172 hpet_usec_quotient = result;
173
174 return 0;
175}
176
177/************************************************************/
178
179/* tsc timer_opts struct */
180static struct timer_opts timer_hpet = {
181 .name = "hpet",
182 .mark_offset = mark_offset_hpet,
183 .get_offset = get_offset_hpet,
184 .monotonic_clock = monotonic_clock_hpet,
185 .delay = delay_hpet,
186};
187
188struct init_timer_opts __initdata timer_hpet_init = {
189 .init = init_hpet,
190 .opts = &timer_hpet,
191};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
new file mode 100644
index 000000000000..4ea2f414dbbd
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_none.c
@@ -0,0 +1,39 @@
1#include <linux/init.h>
2#include <asm/timer.h>
3
4static void mark_offset_none(void)
5{
6 /* nothing needed */
7}
8
9static unsigned long get_offset_none(void)
10{
11 return 0;
12}
13
14static unsigned long long monotonic_clock_none(void)
15{
16 return 0;
17}
18
19static void delay_none(unsigned long loops)
20{
21 int d0;
22 __asm__ __volatile__(
23 "\tjmp 1f\n"
24 ".align 16\n"
25 "1:\tjmp 2f\n"
26 ".align 16\n"
27 "2:\tdecl %0\n\tjns 2b"
28 :"=&a" (d0)
29 :"0" (loops));
30}
31
32/* none timer_opts struct */
33struct timer_opts timer_none = {
34 .name = "none",
35 .mark_offset = mark_offset_none,
36 .get_offset = get_offset_none,
37 .monotonic_clock = monotonic_clock_none,
38 .delay = delay_none,
39};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
new file mode 100644
index 000000000000..967d5453cd0e
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -0,0 +1,206 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/module.h>
8#include <linux/device.h>
9#include <linux/irq.h>
10#include <linux/sysdev.h>
11#include <linux/timex.h>
12#include <asm/delay.h>
13#include <asm/mpspec.h>
14#include <asm/timer.h>
15#include <asm/smp.h>
16#include <asm/io.h>
17#include <asm/arch_hooks.h>
18
19extern spinlock_t i8259A_lock;
20extern spinlock_t i8253_lock;
21#include "do_timer.h"
22#include "io_ports.h"
23
24static int count_p; /* counter in get_offset_pit() */
25
26static int __init init_pit(char* override)
27{
28 /* check clock override */
29 if (override[0] && strncmp(override,"pit",3))
30 printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n");
31
32 count_p = LATCH;
33 return 0;
34}
35
36static void mark_offset_pit(void)
37{
38 /* nothing needed */
39}
40
41static unsigned long long monotonic_clock_pit(void)
42{
43 return 0;
44}
45
46static void delay_pit(unsigned long loops)
47{
48 int d0;
49 __asm__ __volatile__(
50 "\tjmp 1f\n"
51 ".align 16\n"
52 "1:\tjmp 2f\n"
53 ".align 16\n"
54 "2:\tdecl %0\n\tjns 2b"
55 :"=&a" (d0)
56 :"0" (loops));
57}
58
59
60/* This function must be called with xtime_lock held.
61 * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
62 *
63 * However, the pc-audio speaker driver changes the divisor so that
64 * it gets interrupted rather more often - it loads 64 into the
65 * counter rather than 11932! This has an adverse impact on
66 * do_gettimeoffset() -- it stops working! What is also not
67 * good is that the interval that our timer function gets called
68 * is no longer 10.0002 ms, but 9.9767 ms. To get around this
69 * would require using a different timing source. Maybe someone
70 * could use the RTC - I know that this can interrupt at frequencies
71 * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
72 * it so that at startup, the timer code in sched.c would select
73 * using either the RTC or the 8253 timer. The decision would be
74 * based on whether there was any other device around that needed
75 * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
76 * and then do some jiggery to have a version of do_timer that
77 * advanced the clock by 1/1024 s. Every time that reached over 1/100
78 * of a second, then do all the old code. If the time was kept correct
79 * then do_gettimeoffset could just return 0 - there is no low order
80 * divider that can be accessed.
81 *
82 * Ideally, you would be able to use the RTC for the speaker driver,
83 * but it appears that the speaker driver really needs interrupt more
84 * often than every 120 us or so.
85 *
86 * Anyway, this needs more thought.... pjsg (1993-08-28)
87 *
88 * If you are really that interested, you should be reading
89 * comp.protocols.time.ntp!
90 */
91
92static unsigned long get_offset_pit(void)
93{
94 int count;
95 unsigned long flags;
96 static unsigned long jiffies_p = 0;
97
98 /*
99 * cache volatile jiffies temporarily; we have xtime_lock.
100 */
101 unsigned long jiffies_t;
102
103 spin_lock_irqsave(&i8253_lock, flags);
104 /* timer count may underflow right here */
105 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
106
107 count = inb_p(PIT_CH0); /* read the latched count */
108
109 /*
110 * We do this guaranteed double memory access instead of a _p
111 * postfix in the previous port access. Wheee, hackady hack
112 */
113 jiffies_t = jiffies;
114
115 count |= inb_p(PIT_CH0) << 8;
116
117 /* VIA686a test code... reset the latch if count > max + 1 */
118 if (count > LATCH) {
119 outb_p(0x34, PIT_MODE);
120 outb_p(LATCH & 0xff, PIT_CH0);
121 outb(LATCH >> 8, PIT_CH0);
122 count = LATCH - 1;
123 }
124
125 /*
126 * avoiding timer inconsistencies (they are rare, but they happen)...
127 * there are two kinds of problems that must be avoided here:
128 * 1. the timer counter underflows
129 * 2. hardware problem with the timer, not giving us continuous time,
130 * the counter does small "jumps" upwards on some Pentium systems,
131 * (see c't 95/10 page 335 for Neptun bug.)
132 */
133
134 if( jiffies_t == jiffies_p ) {
135 if( count > count_p ) {
136 /* the nutcase */
137 count = do_timer_overflow(count);
138 }
139 } else
140 jiffies_p = jiffies_t;
141
142 count_p = count;
143
144 spin_unlock_irqrestore(&i8253_lock, flags);
145
146 count = ((LATCH-1) - count) * TICK_SIZE;
147 count = (count + LATCH/2) / LATCH;
148
149 return count;
150}
151
152
153/* tsc timer_opts struct */
154struct timer_opts timer_pit = {
155 .name = "pit",
156 .mark_offset = mark_offset_pit,
157 .get_offset = get_offset_pit,
158 .monotonic_clock = monotonic_clock_pit,
159 .delay = delay_pit,
160};
161
162struct init_timer_opts __initdata timer_pit_init = {
163 .init = init_pit,
164 .opts = &timer_pit,
165};
166
167void setup_pit_timer(void)
168{
169 extern spinlock_t i8253_lock;
170 unsigned long flags;
171
172 spin_lock_irqsave(&i8253_lock, flags);
173 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
174 udelay(10);
175 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
176 udelay(10);
177 outb(LATCH >> 8 , PIT_CH0); /* MSB */
178 spin_unlock_irqrestore(&i8253_lock, flags);
179}
180
181static int timer_resume(struct sys_device *dev)
182{
183 setup_pit_timer();
184 return 0;
185}
186
187static struct sysdev_class timer_sysclass = {
188 set_kset_name("timer_pit"),
189 .resume = timer_resume,
190};
191
192static struct sys_device device_timer = {
193 .id = 0,
194 .cls = &timer_sysclass,
195};
196
197static int __init init_timer_sysfs(void)
198{
199 int error = sysdev_class_register(&timer_sysclass);
200 if (!error)
201 error = sysdev_register(&device_timer);
202 return error;
203}
204
205device_initcall(init_timer_sysfs);
206
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
new file mode 100644
index 000000000000..d77f22030fe6
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -0,0 +1,258 @@
1/*
2 * (C) Dominik Brodowski <linux@brodo.de> 2003
3 *
4 * Driver to use the Power Management Timer (PMTMR) available in some
5 * southbridges as primary timing source for the Linux kernel.
6 *
7 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
8 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
9 *
10 * This file is licensed under the GPL v2.
11 */
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/device.h>
17#include <linux/init.h>
18#include <asm/types.h>
19#include <asm/timer.h>
20#include <asm/smp.h>
21#include <asm/io.h>
22#include <asm/arch_hooks.h>
23
24#include <linux/timex.h>
25#include "mach_timer.h"
26
27/* Number of PMTMR ticks expected during calibration run */
28#define PMTMR_TICKS_PER_SEC 3579545
29#define PMTMR_EXPECTED_RATE \
30 ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
31
32
33/* The I/O port the PMTMR resides at.
34 * The location is detected during setup_arch(),
35 * in arch/i386/acpi/boot.c */
36u32 pmtmr_ioport = 0;
37
38
39/* value of the Power timer at last timer interrupt */
40static u32 offset_tick;
41static u32 offset_delay;
42
43static unsigned long long monotonic_base;
44static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
45
46#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
47
48/*helper function to safely read acpi pm timesource*/
49static inline u32 read_pmtmr(void)
50{
51 u32 v1=0,v2=0,v3=0;
52 /* It has been reported that because of various broken
53 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
54 * source is not latched, so you must read it multiple
55 * times to insure a safe value is read.
56 */
57 do {
58 v1 = inl(pmtmr_ioport);
59 v2 = inl(pmtmr_ioport);
60 v3 = inl(pmtmr_ioport);
61 } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
62 || (v3 > v1 && v3 < v2));
63
64 /* mask the output to 24 bits */
65 return v2 & ACPI_PM_MASK;
66}
67
68
69/*
70 * Some boards have the PMTMR running way too fast. We check
71 * the PMTMR rate against PIT channel 2 to catch these cases.
72 */
73static int verify_pmtmr_rate(void)
74{
75 u32 value1, value2;
76 unsigned long count, delta;
77
78 mach_prepare_counter();
79 value1 = read_pmtmr();
80 mach_countup(&count);
81 value2 = read_pmtmr();
82 delta = (value2 - value1) & ACPI_PM_MASK;
83
84 /* Check that the PMTMR delta is within 5% of what we expect */
85 if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
86 delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
87 printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
88 return -1;
89 }
90
91 return 0;
92}
93
94
95static int init_pmtmr(char* override)
96{
97 u32 value1, value2;
98 unsigned int i;
99
100 if (override[0] && strncmp(override,"pmtmr",5))
101 return -ENODEV;
102
103 if (!pmtmr_ioport)
104 return -ENODEV;
105
106 /* we use the TSC for delay_pmtmr, so make sure it exists */
107 if (!cpu_has_tsc)
108 return -ENODEV;
109
110 /* "verify" this timing source */
111 value1 = read_pmtmr();
112 for (i = 0; i < 10000; i++) {
113 value2 = read_pmtmr();
114 if (value2 == value1)
115 continue;
116 if (value2 > value1)
117 goto pm_good;
118 if ((value2 < value1) && ((value2) < 0xFFF))
119 goto pm_good;
120 printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
121 return -EINVAL;
122 }
123 printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
124 return -ENODEV;
125
126pm_good:
127 if (verify_pmtmr_rate() != 0)
128 return -ENODEV;
129
130 init_cpu_khz();
131 return 0;
132}
133
134static inline u32 cyc2us(u32 cycles)
135{
136 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
137 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
138 *
139 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
140 * easily be multiplied with 286 (=0x11E) without having to fear
141 * u32 overflows.
142 */
143 cycles *= 286;
144 return (cycles >> 10);
145}
146
147/*
148 * this gets called during each timer interrupt
149 * - Called while holding the writer xtime_lock
150 */
151static void mark_offset_pmtmr(void)
152{
153 u32 lost, delta, last_offset;
154 static int first_run = 1;
155 last_offset = offset_tick;
156
157 write_seqlock(&monotonic_lock);
158
159 offset_tick = read_pmtmr();
160
161 /* calculate tick interval */
162 delta = (offset_tick - last_offset) & ACPI_PM_MASK;
163
164 /* convert to usecs */
165 delta = cyc2us(delta);
166
167 /* update the monotonic base value */
168 monotonic_base += delta * NSEC_PER_USEC;
169 write_sequnlock(&monotonic_lock);
170
171 /* convert to ticks */
172 delta += offset_delay;
173 lost = delta / (USEC_PER_SEC / HZ);
174 offset_delay = delta % (USEC_PER_SEC / HZ);
175
176
177 /* compensate for lost ticks */
178 if (lost >= 2)
179 jiffies_64 += lost - 1;
180
181 /* don't calculate delay for first run,
182 or if we've got less then a tick */
183 if (first_run || (lost < 1)) {
184 first_run = 0;
185 offset_delay = 0;
186 }
187}
188
189
190static unsigned long long monotonic_clock_pmtmr(void)
191{
192 u32 last_offset, this_offset;
193 unsigned long long base, ret;
194 unsigned seq;
195
196
197 /* atomically read monotonic base & last_offset */
198 do {
199 seq = read_seqbegin(&monotonic_lock);
200 last_offset = offset_tick;
201 base = monotonic_base;
202 } while (read_seqretry(&monotonic_lock, seq));
203
204 /* Read the pmtmr */
205 this_offset = read_pmtmr();
206
207 /* convert to nanoseconds */
208 ret = (this_offset - last_offset) & ACPI_PM_MASK;
209 ret = base + (cyc2us(ret) * NSEC_PER_USEC);
210 return ret;
211}
212
213static void delay_pmtmr(unsigned long loops)
214{
215 unsigned long bclock, now;
216
217 rdtscl(bclock);
218 do
219 {
220 rep_nop();
221 rdtscl(now);
222 } while ((now-bclock) < loops);
223}
224
225
226/*
227 * get the offset (in microseconds) from the last call to mark_offset()
228 * - Called holding a reader xtime_lock
229 */
230static unsigned long get_offset_pmtmr(void)
231{
232 u32 now, offset, delta = 0;
233
234 offset = offset_tick;
235 now = read_pmtmr();
236 delta = (now - offset)&ACPI_PM_MASK;
237
238 return (unsigned long) offset_delay + cyc2us(delta);
239}
240
241
242/* acpi timer_opts struct */
243static struct timer_opts timer_pmtmr = {
244 .name = "pmtmr",
245 .mark_offset = mark_offset_pmtmr,
246 .get_offset = get_offset_pmtmr,
247 .monotonic_clock = monotonic_clock_pmtmr,
248 .delay = delay_pmtmr,
249};
250
251struct init_timer_opts __initdata timer_pmtmr_init = {
252 .init = init_pmtmr,
253 .opts = &timer_pmtmr,
254};
255
256MODULE_LICENSE("GPL");
257MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
258MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
new file mode 100644
index 000000000000..a685994e5c8e
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -0,0 +1,560 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 *
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
9
10#include <linux/spinlock.h>
11#include <linux/init.h>
12#include <linux/timex.h>
13#include <linux/errno.h>
14#include <linux/cpufreq.h>
15#include <linux/string.h>
16#include <linux/jiffies.h>
17
18#include <asm/timer.h>
19#include <asm/io.h>
20/* processor.h for distable_tsc flag */
21#include <asm/processor.h>
22
23#include "io_ports.h"
24#include "mach_timer.h"
25
26#include <asm/hpet.h>
27
28#ifdef CONFIG_HPET_TIMER
29static unsigned long hpet_usec_quotient;
30static unsigned long hpet_last;
31static struct timer_opts timer_tsc;
32#endif
33
34static inline void cpufreq_delayed_get(void);
35
36int tsc_disable __initdata = 0;
37
38extern spinlock_t i8253_lock;
39
40static int use_tsc;
41/* Number of usecs that the last interrupt was delayed */
42static int delay_at_last_interrupt;
43
44static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
45static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
46static unsigned long long monotonic_base;
47static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
48
49/* convert from cycles(64bits) => nanoseconds (64bits)
50 * basic equation:
51 * ns = cycles / (freq / ns_per_sec)
52 * ns = cycles * (ns_per_sec / freq)
53 * ns = cycles * (10^9 / (cpu_mhz * 10^6))
54 * ns = cycles * (10^3 / cpu_mhz)
55 *
56 * Then we use scaling math (suggested by george@mvista.com) to get:
57 * ns = cycles * (10^3 * SC / cpu_mhz) / SC
58 * ns = cycles * cyc2ns_scale / SC
59 *
60 * And since SC is a constant power of two, we can convert the div
61 * into a shift.
62 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
63 */
64static unsigned long cyc2ns_scale;
65#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
66
67static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
68{
69 cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
70}
71
72static inline unsigned long long cycles_2_ns(unsigned long long cyc)
73{
74 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
75}
76
77static int count2; /* counter for mark_offset_tsc() */
78
79/* Cached *multiplier* to convert TSC counts to microseconds.
80 * (see the equation below).
81 * Equal to 2^32 * (1 / (clocks per usec) ).
82 * Initialized in time_init.
83 */
84static unsigned long fast_gettimeoffset_quotient;
85
86static unsigned long get_offset_tsc(void)
87{
88 register unsigned long eax, edx;
89
90 /* Read the Time Stamp Counter */
91
92 rdtsc(eax,edx);
93
94 /* .. relative to previous jiffy (32 bits is enough) */
95 eax -= last_tsc_low; /* tsc_low delta */
96
97 /*
98 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
99 * = (tsc_low delta) * (usecs_per_clock)
100 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
101 *
102 * Using a mull instead of a divl saves up to 31 clock cycles
103 * in the critical path.
104 */
105
106 __asm__("mull %2"
107 :"=a" (eax), "=d" (edx)
108 :"rm" (fast_gettimeoffset_quotient),
109 "0" (eax));
110
111 /* our adjusted time offset in microseconds */
112 return delay_at_last_interrupt + edx;
113}
114
115static unsigned long long monotonic_clock_tsc(void)
116{
117 unsigned long long last_offset, this_offset, base;
118 unsigned seq;
119
120 /* atomically read monotonic base & last_offset */
121 do {
122 seq = read_seqbegin(&monotonic_lock);
123 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
124 base = monotonic_base;
125 } while (read_seqretry(&monotonic_lock, seq));
126
127 /* Read the Time Stamp Counter */
128 rdtscll(this_offset);
129
130 /* return the value in ns */
131 return base + cycles_2_ns(this_offset - last_offset);
132}
133
134/*
135 * Scheduler clock - returns current time in nanosec units.
136 */
137unsigned long long sched_clock(void)
138{
139 unsigned long long this_offset;
140
141 /*
142 * In the NUMA case we dont use the TSC as they are not
143 * synchronized across all CPUs.
144 */
145#ifndef CONFIG_NUMA
146 if (!use_tsc)
147#endif
148 /* no locking but a rare wrong value is not a big deal */
149 return jiffies_64 * (1000000000 / HZ);
150
151 /* Read the Time Stamp Counter */
152 rdtscll(this_offset);
153
154 /* return the value in ns */
155 return cycles_2_ns(this_offset);
156}
157
158static void delay_tsc(unsigned long loops)
159{
160 unsigned long bclock, now;
161
162 rdtscl(bclock);
163 do
164 {
165 rep_nop();
166 rdtscl(now);
167 } while ((now-bclock) < loops);
168}
169
170#ifdef CONFIG_HPET_TIMER
171static void mark_offset_tsc_hpet(void)
172{
173 unsigned long long this_offset, last_offset;
174 unsigned long offset, temp, hpet_current;
175
176 write_seqlock(&monotonic_lock);
177 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
178 /*
179 * It is important that these two operations happen almost at
180 * the same time. We do the RDTSC stuff first, since it's
181 * faster. To avoid any inconsistencies, we need interrupts
182 * disabled locally.
183 */
184 /*
185 * Interrupts are just disabled locally since the timer irq
186 * has the SA_INTERRUPT flag set. -arca
187 */
188 /* read Pentium cycle counter */
189
190 hpet_current = hpet_readl(HPET_COUNTER);
191 rdtsc(last_tsc_low, last_tsc_high);
192
193 /* lost tick compensation */
194 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
195 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
196 int lost_ticks = (offset - hpet_last) / hpet_tick;
197 jiffies_64 += lost_ticks;
198 }
199 hpet_last = hpet_current;
200
201 /* update the monotonic base value */
202 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
203 monotonic_base += cycles_2_ns(this_offset - last_offset);
204 write_sequnlock(&monotonic_lock);
205
206 /* calculate delay_at_last_interrupt */
207 /*
208 * Time offset = (hpet delta) * ( usecs per HPET clock )
209 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
210 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
211 * Where,
212 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
213 */
214 delay_at_last_interrupt = hpet_current - offset;
215 ASM_MUL64_REG(temp, delay_at_last_interrupt,
216 hpet_usec_quotient, delay_at_last_interrupt);
217}
218#endif
219
220
221#ifdef CONFIG_CPU_FREQ
222#include <linux/workqueue.h>
223
224static unsigned int cpufreq_delayed_issched = 0;
225static unsigned int cpufreq_init = 0;
226static struct work_struct cpufreq_delayed_get_work;
227
228static void handle_cpufreq_delayed_get(void *v)
229{
230 unsigned int cpu;
231 for_each_online_cpu(cpu) {
232 cpufreq_get(cpu);
233 }
234 cpufreq_delayed_issched = 0;
235}
236
237/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
238 * to verify the CPU frequency the timing core thinks the CPU is running
239 * at is still correct.
240 */
241static inline void cpufreq_delayed_get(void)
242{
243 if (cpufreq_init && !cpufreq_delayed_issched) {
244 cpufreq_delayed_issched = 1;
245 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
246 schedule_work(&cpufreq_delayed_get_work);
247 }
248}
249
250/* If the CPU frequency is scaled, TSC-based delays will need a different
251 * loops_per_jiffy value to function properly.
252 */
253
254static unsigned int ref_freq = 0;
255static unsigned long loops_per_jiffy_ref = 0;
256
257#ifndef CONFIG_SMP
258static unsigned long fast_gettimeoffset_ref = 0;
259static unsigned long cpu_khz_ref = 0;
260#endif
261
262static int
263time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
264 void *data)
265{
266 struct cpufreq_freqs *freq = data;
267
268 if (val != CPUFREQ_RESUMECHANGE)
269 write_seqlock_irq(&xtime_lock);
270 if (!ref_freq) {
271 ref_freq = freq->old;
272 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
273#ifndef CONFIG_SMP
274 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
275 cpu_khz_ref = cpu_khz;
276#endif
277 }
278
279 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
280 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
281 (val == CPUFREQ_RESUMECHANGE)) {
282 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
283 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
284#ifndef CONFIG_SMP
285 if (cpu_khz)
286 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
287 if (use_tsc) {
288 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
289 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
290 set_cyc2ns_scale(cpu_khz/1000);
291 }
292 }
293#endif
294 }
295
296 if (val != CPUFREQ_RESUMECHANGE)
297 write_sequnlock_irq(&xtime_lock);
298
299 return 0;
300}
301
302static struct notifier_block time_cpufreq_notifier_block = {
303 .notifier_call = time_cpufreq_notifier
304};
305
306
307static int __init cpufreq_tsc(void)
308{
309 int ret;
310 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
311 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
312 CPUFREQ_TRANSITION_NOTIFIER);
313 if (!ret)
314 cpufreq_init = 1;
315 return ret;
316}
317core_initcall(cpufreq_tsc);
318
319#else /* CONFIG_CPU_FREQ */
320static inline void cpufreq_delayed_get(void) { return; }
321#endif
322
323static void mark_offset_tsc(void)
324{
325 unsigned long lost,delay;
326 unsigned long delta = last_tsc_low;
327 int count;
328 int countmp;
329 static int count1 = 0;
330 unsigned long long this_offset, last_offset;
331 static int lost_count = 0;
332
333 write_seqlock(&monotonic_lock);
334 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
335 /*
336 * It is important that these two operations happen almost at
337 * the same time. We do the RDTSC stuff first, since it's
338 * faster. To avoid any inconsistencies, we need interrupts
339 * disabled locally.
340 */
341
342 /*
343 * Interrupts are just disabled locally since the timer irq
344 * has the SA_INTERRUPT flag set. -arca
345 */
346
347 /* read Pentium cycle counter */
348
349 rdtsc(last_tsc_low, last_tsc_high);
350
351 spin_lock(&i8253_lock);
352 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
353
354 count = inb_p(PIT_CH0); /* read the latched count */
355 count |= inb(PIT_CH0) << 8;
356
357 /*
358 * VIA686a test code... reset the latch if count > max + 1
359 * from timer_pit.c - cjb
360 */
361 if (count > LATCH) {
362 outb_p(0x34, PIT_MODE);
363 outb_p(LATCH & 0xff, PIT_CH0);
364 outb(LATCH >> 8, PIT_CH0);
365 count = LATCH - 1;
366 }
367
368 spin_unlock(&i8253_lock);
369
370 if (pit_latch_buggy) {
371 /* get center value of last 3 time lutch */
372 if ((count2 >= count && count >= count1)
373 || (count1 >= count && count >= count2)) {
374 count2 = count1; count1 = count;
375 } else if ((count1 >= count2 && count2 >= count)
376 || (count >= count2 && count2 >= count1)) {
377 countmp = count;count = count2;
378 count2 = count1;count1 = countmp;
379 } else {
380 count2 = count1; count1 = count; count = count1;
381 }
382 }
383
384 /* lost tick compensation */
385 delta = last_tsc_low - delta;
386 {
387 register unsigned long eax, edx;
388 eax = delta;
389 __asm__("mull %2"
390 :"=a" (eax), "=d" (edx)
391 :"rm" (fast_gettimeoffset_quotient),
392 "0" (eax));
393 delta = edx;
394 }
395 delta += delay_at_last_interrupt;
396 lost = delta/(1000000/HZ);
397 delay = delta%(1000000/HZ);
398 if (lost >= 2) {
399 jiffies_64 += lost-1;
400
401 /* sanity check to ensure we're not always losing ticks */
402 if (lost_count++ > 100) {
403 printk(KERN_WARNING "Losing too many ticks!\n");
404 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
405 printk(KERN_WARNING "Possible reasons for this are:\n");
406 printk(KERN_WARNING " You're running with Speedstep,\n");
407 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
408 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
409 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
410
411 clock_fallback();
412 }
413 /* ... but give the TSC a fair chance */
414 if (lost_count > 25)
415 cpufreq_delayed_get();
416 } else
417 lost_count = 0;
418 /* update the monotonic base value */
419 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
420 monotonic_base += cycles_2_ns(this_offset - last_offset);
421 write_sequnlock(&monotonic_lock);
422
423 /* calculate delay_at_last_interrupt */
424 count = ((LATCH-1) - count) * TICK_SIZE;
425 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
426
427 /* catch corner case where tick rollover occured
428 * between tsc and pit reads (as noted when
429 * usec delta is > 90% # of usecs/tick)
430 */
431 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
432 jiffies_64++;
433}
434
435static int __init init_tsc(char* override)
436{
437
438 /* check clock override */
439 if (override[0] && strncmp(override,"tsc",3)) {
440#ifdef CONFIG_HPET_TIMER
441 if (is_hpet_enabled()) {
442 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
443 } else
444#endif
445 {
446 return -ENODEV;
447 }
448 }
449
450 /*
451 * If we have APM enabled or the CPU clock speed is variable
452 * (CPU stops clock on HLT or slows clock to save power)
453 * then the TSC timestamps may diverge by up to 1 jiffy from
454 * 'real time' but nothing will break.
455 * The most frequent case is that the CPU is "woken" from a halt
456 * state by the timer interrupt itself, so we get 0 error. In the
457 * rare cases where a driver would "wake" the CPU and request a
458 * timestamp, the maximum error is < 1 jiffy. But timestamps are
459 * still perfectly ordered.
460 * Note that the TSC counter will be reset if APM suspends
461 * to disk; this won't break the kernel, though, 'cuz we're
462 * smart. See arch/i386/kernel/apm.c.
463 */
464 /*
465 * Firstly we have to do a CPU check for chips with
466 * a potentially buggy TSC. At this point we haven't run
467 * the ident/bugs checks so we must run this hook as it
468 * may turn off the TSC flag.
469 *
470 * NOTE: this doesn't yet handle SMP 486 machines where only
471 * some CPU's have a TSC. Thats never worked and nobody has
472 * moaned if you have the only one in the world - you fix it!
473 */
474
475 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
476
477 if (cpu_has_tsc) {
478 unsigned long tsc_quotient;
479#ifdef CONFIG_HPET_TIMER
480 if (is_hpet_enabled()){
481 unsigned long result, remain;
482 printk("Using TSC for gettimeofday\n");
483 tsc_quotient = calibrate_tsc_hpet(NULL);
484 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
485 /*
486 * Math to calculate hpet to usec multiplier
487 * Look for the comments at get_offset_tsc_hpet()
488 */
489 ASM_DIV64_REG(result, remain, hpet_tick,
490 0, KERNEL_TICK_USEC);
491 if (remain > (hpet_tick >> 1))
492 result++; /* rounding the result */
493
494 hpet_usec_quotient = result;
495 } else
496#endif
497 {
498 tsc_quotient = calibrate_tsc();
499 }
500
501 if (tsc_quotient) {
502 fast_gettimeoffset_quotient = tsc_quotient;
503 use_tsc = 1;
504 /*
505 * We could be more selective here I suspect
506 * and just enable this for the next intel chips ?
507 */
508 /* report CPU clock rate in Hz.
509 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
510 * clock/second. Our precision is about 100 ppm.
511 */
512 { unsigned long eax=0, edx=1000;
513 __asm__("divl %2"
514 :"=a" (cpu_khz), "=d" (edx)
515 :"r" (tsc_quotient),
516 "0" (eax), "1" (edx));
517 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
518 }
519 set_cyc2ns_scale(cpu_khz/1000);
520 return 0;
521 }
522 }
523 return -ENODEV;
524}
525
526#ifndef CONFIG_X86_TSC
527/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
528 * in cpu/common.c */
529static int __init tsc_setup(char *str)
530{
531 tsc_disable = 1;
532 return 1;
533}
534#else
535static int __init tsc_setup(char *str)
536{
537 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
538 "cannot disable TSC.\n");
539 return 1;
540}
541#endif
542__setup("notsc", tsc_setup);
543
544
545
546/************************************************************/
547
548/* tsc timer_opts struct */
549static struct timer_opts timer_tsc = {
550 .name = "tsc",
551 .mark_offset = mark_offset_tsc,
552 .get_offset = get_offset_tsc,
553 .monotonic_clock = monotonic_clock_tsc,
554 .delay = delay_tsc,
555};
556
557struct init_timer_opts __initdata timer_tsc_init = {
558 .init = init_tsc,
559 .opts = &timer_tsc,
560};
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
new file mode 100644
index 000000000000..fcce0e61b0e7
--- /dev/null
+++ b/arch/i386/kernel/trampoline.S
@@ -0,0 +1,80 @@
1/*
2 *
3 * Trampoline.S Derived from Setup.S by Linus Torvalds
4 *
5 * 4 Jan 1997 Michael Chastain: changed to gnu as.
6 *
7 * This is only used for booting secondary CPUs in SMP machine
8 *
9 * Entry: CS:IP point to the start of our code, we are
10 * in real mode with no stack, but the rest of the
11 * trampoline page to make our stack and everything else
12 * is a mystery.
13 *
14 * In fact we don't actually need a stack so we don't
15 * set one up.
16 *
17 * We jump into the boot/compressed/head.S code. So you'd
18 * better be running a compressed kernel image or you
19 * won't get very far.
20 *
21 * On entry to trampoline_data, the processor is in real mode
22 * with 16-bit addressing and 16-bit data. CS has some value
23 * and IP is zero. Thus, data addresses need to be absolute
24 * (no relocation) and are taken with regard to r_base.
25 *
26 * If you work on this file, check the object module with
27 * objdump --reloc to make sure there are no relocation
28 * entries except for:
29 *
30 * TYPE VALUE
31 * R_386_32 startup_32_smp
32 * R_386_32 boot_gdt_table
33 */
34
35#include <linux/linkage.h>
36#include <asm/segment.h>
37#include <asm/page.h>
38
39.data
40
41.code16
42
43ENTRY(trampoline_data)
44r_base = .
45 wbinvd # Needed for NUMA-Q should be harmless for others
46 mov %cs, %ax # Code and data in the same place
47 mov %ax, %ds
48
49 cli # We should be safe anyway
50
51 movl $0xA5A5A5A5, trampoline_data - r_base
52 # write marker for master knows we're running
53
54 /* GDT tables in non default location kernel can be beyond 16MB and
55 * lgdt will not be able to load the address as in real mode default
56 * operand size is 16bit. Use lgdtl instead to force operand size
57 * to 32 bit.
58 */
59
60 lidtl boot_idt - r_base # load idt with 0, 0
61 lgdtl boot_gdt - r_base # load gdt with whatever is appropriate
62
63 xor %ax, %ax
64 inc %ax # protected mode (PE) bit
65 lmsw %ax # into protected mode
66 # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S
67 ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET)
68
69 # These need to be in the same 64K segment as the above;
70 # hence we don't use the boot_gdt_descr defined in head.S
71boot_gdt:
72 .word __BOOT_DS + 7 # gdt limit
73 .long boot_gdt_table-__PAGE_OFFSET # gdt base
74
75boot_idt:
76 .word 0 # idt limit = 0
77 .long 0 # idt base = 0L
78
79.globl trampoline_end
80trampoline_end:
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
new file mode 100644
index 000000000000..6c0e383915b6
--- /dev/null
+++ b/arch/i386/kernel/traps.c
@@ -0,0 +1,1084 @@
1/*
2 * linux/arch/i386/traps.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */
9
10/*
11 * 'Traps.c' handles hardware traps and faults after we have saved some
12 * state in 'asm.s'.
13 */
14#include <linux/config.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/string.h>
18#include <linux/errno.h>
19#include <linux/timer.h>
20#include <linux/mm.h>
21#include <linux/init.h>
22#include <linux/delay.h>
23#include <linux/spinlock.h>
24#include <linux/interrupt.h>
25#include <linux/highmem.h>
26#include <linux/kallsyms.h>
27#include <linux/ptrace.h>
28#include <linux/utsname.h>
29#include <linux/kprobes.h>
30
31#ifdef CONFIG_EISA
32#include <linux/ioport.h>
33#include <linux/eisa.h>
34#endif
35
36#ifdef CONFIG_MCA
37#include <linux/mca.h>
38#endif
39
40#include <asm/processor.h>
41#include <asm/system.h>
42#include <asm/uaccess.h>
43#include <asm/io.h>
44#include <asm/atomic.h>
45#include <asm/debugreg.h>
46#include <asm/desc.h>
47#include <asm/i387.h>
48#include <asm/nmi.h>
49
50#include <asm/smp.h>
51#include <asm/arch_hooks.h>
52#include <asm/kdebug.h>
53
54#include <linux/irq.h>
55#include <linux/module.h>
56
57#include "mach_traps.h"
58
59asmlinkage int system_call(void);
60
61struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
62 { 0, 0 }, { 0, 0 } };
63
64/* Do we ignore FPU interrupts ? */
65char ignore_fpu_irq = 0;
66
67/*
68 * The IDT has to be page-aligned to simplify the Pentium
69 * F0 0F bug workaround.. We have a special link segment
70 * for this.
71 */
72struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
73
74asmlinkage void divide_error(void);
75asmlinkage void debug(void);
76asmlinkage void nmi(void);
77asmlinkage void int3(void);
78asmlinkage void overflow(void);
79asmlinkage void bounds(void);
80asmlinkage void invalid_op(void);
81asmlinkage void device_not_available(void);
82asmlinkage void coprocessor_segment_overrun(void);
83asmlinkage void invalid_TSS(void);
84asmlinkage void segment_not_present(void);
85asmlinkage void stack_segment(void);
86asmlinkage void general_protection(void);
87asmlinkage void page_fault(void);
88asmlinkage void coprocessor_error(void);
89asmlinkage void simd_coprocessor_error(void);
90asmlinkage void alignment_check(void);
91asmlinkage void spurious_interrupt_bug(void);
92asmlinkage void machine_check(void);
93
94static int kstack_depth_to_print = 24;
95struct notifier_block *i386die_chain;
96static DEFINE_SPINLOCK(die_notifier_lock);
97
98int register_die_notifier(struct notifier_block *nb)
99{
100 int err = 0;
101 unsigned long flags;
102 spin_lock_irqsave(&die_notifier_lock, flags);
103 err = notifier_chain_register(&i386die_chain, nb);
104 spin_unlock_irqrestore(&die_notifier_lock, flags);
105 return err;
106}
107
108static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
109{
110 return p > (void *)tinfo &&
111 p < (void *)tinfo + THREAD_SIZE - 3;
112}
113
114static inline unsigned long print_context_stack(struct thread_info *tinfo,
115 unsigned long *stack, unsigned long ebp)
116{
117 unsigned long addr;
118
119#ifdef CONFIG_FRAME_POINTER
120 while (valid_stack_ptr(tinfo, (void *)ebp)) {
121 addr = *(unsigned long *)(ebp + 4);
122 printk(" [<%08lx>] ", addr);
123 print_symbol("%s", addr);
124 printk("\n");
125 ebp = *(unsigned long *)ebp;
126 }
127#else
128 while (valid_stack_ptr(tinfo, stack)) {
129 addr = *stack++;
130 if (__kernel_text_address(addr)) {
131 printk(" [<%08lx>]", addr);
132 print_symbol(" %s", addr);
133 printk("\n");
134 }
135 }
136#endif
137 return ebp;
138}
139
140void show_trace(struct task_struct *task, unsigned long * stack)
141{
142 unsigned long ebp;
143
144 if (!task)
145 task = current;
146
147 if (task == current) {
148 /* Grab ebp right from our regs */
149 asm ("movl %%ebp, %0" : "=r" (ebp) : );
150 } else {
151 /* ebp is the last reg pushed by switch_to */
152 ebp = *(unsigned long *) task->thread.esp;
153 }
154
155 while (1) {
156 struct thread_info *context;
157 context = (struct thread_info *)
158 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
159 ebp = print_context_stack(context, stack, ebp);
160 stack = (unsigned long*)context->previous_esp;
161 if (!stack)
162 break;
163 printk(" =======================\n");
164 }
165}
166
167void show_stack(struct task_struct *task, unsigned long *esp)
168{
169 unsigned long *stack;
170 int i;
171
172 if (esp == NULL) {
173 if (task)
174 esp = (unsigned long*)task->thread.esp;
175 else
176 esp = (unsigned long *)&esp;
177 }
178
179 stack = esp;
180 for(i = 0; i < kstack_depth_to_print; i++) {
181 if (kstack_end(stack))
182 break;
183 if (i && ((i % 8) == 0))
184 printk("\n ");
185 printk("%08lx ", *stack++);
186 }
187 printk("\nCall Trace:\n");
188 show_trace(task, esp);
189}
190
191/*
192 * The architecture-independent dump_stack generator
193 */
194void dump_stack(void)
195{
196 unsigned long stack;
197
198 show_trace(current, &stack);
199}
200
201EXPORT_SYMBOL(dump_stack);
202
203void show_registers(struct pt_regs *regs)
204{
205 int i;
206 int in_kernel = 1;
207 unsigned long esp;
208 unsigned short ss;
209
210 esp = (unsigned long) (&regs->esp);
211 ss = __KERNEL_DS;
212 if (regs->xcs & 3) {
213 in_kernel = 0;
214 esp = regs->esp;
215 ss = regs->xss & 0xffff;
216 }
217 print_modules();
218 printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx"
219 " (%s) \n",
220 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
221 print_tainted(), regs->eflags, system_utsname.release);
222 print_symbol("EIP is at %s\n", regs->eip);
223 printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
224 regs->eax, regs->ebx, regs->ecx, regs->edx);
225 printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
226 regs->esi, regs->edi, regs->ebp, esp);
227 printk("ds: %04x es: %04x ss: %04x\n",
228 regs->xds & 0xffff, regs->xes & 0xffff, ss);
229 printk("Process %s (pid: %d, threadinfo=%p task=%p)",
230 current->comm, current->pid, current_thread_info(), current);
231 /*
232 * When in-kernel, we also print out the stack and code at the
233 * time of the fault..
234 */
235 if (in_kernel) {
236 u8 *eip;
237
238 printk("\nStack: ");
239 show_stack(NULL, (unsigned long*)esp);
240
241 printk("Code: ");
242
243 eip = (u8 *)regs->eip - 43;
244 for (i = 0; i < 64; i++, eip++) {
245 unsigned char c;
246
247 if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
248 printk(" Bad EIP value.");
249 break;
250 }
251 if (eip == (u8 *)regs->eip)
252 printk("<%02x> ", c);
253 else
254 printk("%02x ", c);
255 }
256 }
257 printk("\n");
258}
259
260static void handle_BUG(struct pt_regs *regs)
261{
262 unsigned short ud2;
263 unsigned short line;
264 char *file;
265 char c;
266 unsigned long eip;
267
268 if (regs->xcs & 3)
269 goto no_bug; /* Not in kernel */
270
271 eip = regs->eip;
272
273 if (eip < PAGE_OFFSET)
274 goto no_bug;
275 if (__get_user(ud2, (unsigned short *)eip))
276 goto no_bug;
277 if (ud2 != 0x0b0f)
278 goto no_bug;
279 if (__get_user(line, (unsigned short *)(eip + 2)))
280 goto bug;
281 if (__get_user(file, (char **)(eip + 4)) ||
282 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
283 file = "<bad filename>";
284
285 printk("------------[ cut here ]------------\n");
286 printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
287
288no_bug:
289 return;
290
291 /* Here we know it was a BUG but file-n-line is unavailable */
292bug:
293 printk("Kernel BUG\n");
294}
295
296void die(const char * str, struct pt_regs * regs, long err)
297{
298 static struct {
299 spinlock_t lock;
300 u32 lock_owner;
301 int lock_owner_depth;
302 } die = {
303 .lock = SPIN_LOCK_UNLOCKED,
304 .lock_owner = -1,
305 .lock_owner_depth = 0
306 };
307 static int die_counter;
308
309 if (die.lock_owner != _smp_processor_id()) {
310 console_verbose();
311 spin_lock_irq(&die.lock);
312 die.lock_owner = smp_processor_id();
313 die.lock_owner_depth = 0;
314 bust_spinlocks(1);
315 }
316
317 if (++die.lock_owner_depth < 3) {
318 int nl = 0;
319 handle_BUG(regs);
320 printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
321#ifdef CONFIG_PREEMPT
322 printk("PREEMPT ");
323 nl = 1;
324#endif
325#ifdef CONFIG_SMP
326 printk("SMP ");
327 nl = 1;
328#endif
329#ifdef CONFIG_DEBUG_PAGEALLOC
330 printk("DEBUG_PAGEALLOC");
331 nl = 1;
332#endif
333 if (nl)
334 printk("\n");
335 notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
336 show_registers(regs);
337 } else
338 printk(KERN_ERR "Recursive die() failure, output suppressed\n");
339
340 bust_spinlocks(0);
341 die.lock_owner = -1;
342 spin_unlock_irq(&die.lock);
343 if (in_interrupt())
344 panic("Fatal exception in interrupt");
345
346 if (panic_on_oops) {
347 printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
348 ssleep(5);
349 panic("Fatal exception");
350 }
351 do_exit(SIGSEGV);
352}
353
354static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
355{
356 if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs))
357 die(str, regs, err);
358}
359
360static void do_trap(int trapnr, int signr, char *str, int vm86,
361 struct pt_regs * regs, long error_code, siginfo_t *info)
362{
363 if (regs->eflags & VM_MASK) {
364 if (vm86)
365 goto vm86_trap;
366 goto trap_signal;
367 }
368
369 if (!(regs->xcs & 3))
370 goto kernel_trap;
371
372 trap_signal: {
373 struct task_struct *tsk = current;
374 tsk->thread.error_code = error_code;
375 tsk->thread.trap_no = trapnr;
376 if (info)
377 force_sig_info(signr, info, tsk);
378 else
379 force_sig(signr, tsk);
380 return;
381 }
382
383 kernel_trap: {
384 if (!fixup_exception(regs))
385 die(str, regs, error_code);
386 return;
387 }
388
389 vm86_trap: {
390 int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
391 if (ret) goto trap_signal;
392 return;
393 }
394}
395
396#define DO_ERROR(trapnr, signr, str, name) \
397fastcall void do_##name(struct pt_regs * regs, long error_code) \
398{ \
399 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
400 == NOTIFY_STOP) \
401 return; \
402 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
403}
404
405#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
406fastcall void do_##name(struct pt_regs * regs, long error_code) \
407{ \
408 siginfo_t info; \
409 info.si_signo = signr; \
410 info.si_errno = 0; \
411 info.si_code = sicode; \
412 info.si_addr = (void __user *)siaddr; \
413 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
414 == NOTIFY_STOP) \
415 return; \
416 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
417}
418
419#define DO_VM86_ERROR(trapnr, signr, str, name) \
420fastcall void do_##name(struct pt_regs * regs, long error_code) \
421{ \
422 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
423 == NOTIFY_STOP) \
424 return; \
425 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
426}
427
428#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
429fastcall void do_##name(struct pt_regs * regs, long error_code) \
430{ \
431 siginfo_t info; \
432 info.si_signo = signr; \
433 info.si_errno = 0; \
434 info.si_code = sicode; \
435 info.si_addr = (void __user *)siaddr; \
436 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
437 == NOTIFY_STOP) \
438 return; \
439 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
440}
441
442DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
443#ifndef CONFIG_KPROBES
444DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
445#endif
446DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
447DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
448DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
449DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
450DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
451DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
452DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
453DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
454
455fastcall void do_general_protection(struct pt_regs * regs, long error_code)
456{
457 int cpu = get_cpu();
458 struct tss_struct *tss = &per_cpu(init_tss, cpu);
459 struct thread_struct *thread = &current->thread;
460
461 /*
462 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
463 * invalid offset set (the LAZY one) and the faulting thread has
464 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
465 * and we set the offset field correctly. Then we let the CPU to
466 * restart the faulting instruction.
467 */
468 if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
469 thread->io_bitmap_ptr) {
470 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
471 thread->io_bitmap_max);
472 /*
473 * If the previously set map was extending to higher ports
474 * than the current one, pad extra space with 0xff (no access).
475 */
476 if (thread->io_bitmap_max < tss->io_bitmap_max)
477 memset((char *) tss->io_bitmap +
478 thread->io_bitmap_max, 0xff,
479 tss->io_bitmap_max - thread->io_bitmap_max);
480 tss->io_bitmap_max = thread->io_bitmap_max;
481 tss->io_bitmap_base = IO_BITMAP_OFFSET;
482 put_cpu();
483 return;
484 }
485 put_cpu();
486
487 if (regs->eflags & VM_MASK)
488 goto gp_in_vm86;
489
490 if (!(regs->xcs & 3))
491 goto gp_in_kernel;
492
493 current->thread.error_code = error_code;
494 current->thread.trap_no = 13;
495 force_sig(SIGSEGV, current);
496 return;
497
498gp_in_vm86:
499 local_irq_enable();
500 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
501 return;
502
503gp_in_kernel:
504 if (!fixup_exception(regs)) {
505 if (notify_die(DIE_GPF, "general protection fault", regs,
506 error_code, 13, SIGSEGV) == NOTIFY_STOP)
507 return;
508 die("general protection fault", regs, error_code);
509 }
510}
511
512static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
513{
514 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
515 printk("You probably have a hardware problem with your RAM chips\n");
516
517 /* Clear and disable the memory parity error line. */
518 clear_mem_error(reason);
519}
520
521static void io_check_error(unsigned char reason, struct pt_regs * regs)
522{
523 unsigned long i;
524
525 printk("NMI: IOCK error (debug interrupt?)\n");
526 show_registers(regs);
527
528 /* Re-enable the IOCK line, wait for a few seconds */
529 reason = (reason & 0xf) | 8;
530 outb(reason, 0x61);
531 i = 2000;
532 while (--i) udelay(1000);
533 reason &= ~8;
534 outb(reason, 0x61);
535}
536
537static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
538{
539#ifdef CONFIG_MCA
540 /* Might actually be able to figure out what the guilty party
541 * is. */
542 if( MCA_bus ) {
543 mca_handle_nmi();
544 return;
545 }
546#endif
547 printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
548 reason, smp_processor_id());
549 printk("Dazed and confused, but trying to continue\n");
550 printk("Do you have a strange power saving mode enabled?\n");
551}
552
553static DEFINE_SPINLOCK(nmi_print_lock);
554
555void die_nmi (struct pt_regs *regs, const char *msg)
556{
557 spin_lock(&nmi_print_lock);
558 /*
559 * We are in trouble anyway, lets at least try
560 * to get a message out.
561 */
562 bust_spinlocks(1);
563 printk(msg);
564 printk(" on CPU%d, eip %08lx, registers:\n",
565 smp_processor_id(), regs->eip);
566 show_registers(regs);
567 printk("console shuts up ...\n");
568 console_silent();
569 spin_unlock(&nmi_print_lock);
570 bust_spinlocks(0);
571 do_exit(SIGSEGV);
572}
573
574static void default_do_nmi(struct pt_regs * regs)
575{
576 unsigned char reason = 0;
577
578 /* Only the BSP gets external NMIs from the system. */
579 if (!smp_processor_id())
580 reason = get_nmi_reason();
581
582 if (!(reason & 0xc0)) {
583 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
584 == NOTIFY_STOP)
585 return;
586#ifdef CONFIG_X86_LOCAL_APIC
587 /*
588 * Ok, so this is none of the documented NMI sources,
589 * so it must be the NMI watchdog.
590 */
591 if (nmi_watchdog) {
592 nmi_watchdog_tick(regs);
593 return;
594 }
595#endif
596 unknown_nmi_error(reason, regs);
597 return;
598 }
599 if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
600 return;
601 if (reason & 0x80)
602 mem_parity_error(reason, regs);
603 if (reason & 0x40)
604 io_check_error(reason, regs);
605 /*
606 * Reassert NMI in case it became active meanwhile
607 * as it's edge-triggered.
608 */
609 reassert_nmi();
610}
611
612static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
613{
614 return 0;
615}
616
617static nmi_callback_t nmi_callback = dummy_nmi_callback;
618
619fastcall void do_nmi(struct pt_regs * regs, long error_code)
620{
621 int cpu;
622
623 nmi_enter();
624
625 cpu = smp_processor_id();
626 ++nmi_count(cpu);
627
628 if (!nmi_callback(regs, cpu))
629 default_do_nmi(regs);
630
631 nmi_exit();
632}
633
634void set_nmi_callback(nmi_callback_t callback)
635{
636 nmi_callback = callback;
637}
638
639void unset_nmi_callback(void)
640{
641 nmi_callback = dummy_nmi_callback;
642}
643
644#ifdef CONFIG_KPROBES
645fastcall int do_int3(struct pt_regs *regs, long error_code)
646{
647 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
648 == NOTIFY_STOP)
649 return 1;
650 /* This is an interrupt gate, because kprobes wants interrupts
651 disabled. Normal trap handlers don't. */
652 restore_interrupts(regs);
653 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
654 return 0;
655}
656#endif
657
658/*
659 * Our handling of the processor debug registers is non-trivial.
660 * We do not clear them on entry and exit from the kernel. Therefore
661 * it is possible to get a watchpoint trap here from inside the kernel.
662 * However, the code in ./ptrace.c has ensured that the user can
663 * only set watchpoints on userspace addresses. Therefore the in-kernel
664 * watchpoint trap can only occur in code which is reading/writing
665 * from user space. Such code must not hold kernel locks (since it
666 * can equally take a page fault), therefore it is safe to call
667 * force_sig_info even though that claims and releases locks.
668 *
669 * Code in ./signal.c ensures that the debug control register
670 * is restored before we deliver any signal, and therefore that
671 * user code runs with the correct debug control register even though
672 * we clear it here.
673 *
674 * Being careful here means that we don't have to be as careful in a
675 * lot of more complicated places (task switching can be a bit lazy
676 * about restoring all the debug state, and ptrace doesn't have to
677 * find every occurrence of the TF bit that could be saved away even
678 * by user code)
679 */
680fastcall void do_debug(struct pt_regs * regs, long error_code)
681{
682 unsigned int condition;
683 struct task_struct *tsk = current;
684
685 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
686
687 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
688 SIGTRAP) == NOTIFY_STOP)
689 return;
690 /* It's safe to allow irq's after DR6 has been saved */
691 if (regs->eflags & X86_EFLAGS_IF)
692 local_irq_enable();
693
694 /* Mask out spurious debug traps due to lazy DR7 setting */
695 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
696 if (!tsk->thread.debugreg[7])
697 goto clear_dr7;
698 }
699
700 if (regs->eflags & VM_MASK)
701 goto debug_vm86;
702
703 /* Save debug status register where ptrace can see it */
704 tsk->thread.debugreg[6] = condition;
705
706 /*
707 * Single-stepping through TF: make sure we ignore any events in
708 * kernel space (but re-enable TF when returning to user mode).
709 */
710 if (condition & DR_STEP) {
711 /*
712 * We already checked v86 mode above, so we can
713 * check for kernel mode by just checking the CPL
714 * of CS.
715 */
716 if ((regs->xcs & 3) == 0)
717 goto clear_TF_reenable;
718 }
719
720 /* Ok, finally something we can handle */
721 send_sigtrap(tsk, regs, error_code);
722
723 /* Disable additional traps. They'll be re-enabled when
724 * the signal is delivered.
725 */
726clear_dr7:
727 __asm__("movl %0,%%db7"
728 : /* no output */
729 : "r" (0));
730 return;
731
732debug_vm86:
733 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
734 return;
735
736clear_TF_reenable:
737 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
738 regs->eflags &= ~TF_MASK;
739 return;
740}
741
742/*
743 * Note that we play around with the 'TS' bit in an attempt to get
744 * the correct behaviour even in the presence of the asynchronous
745 * IRQ13 behaviour
746 */
747void math_error(void __user *eip)
748{
749 struct task_struct * task;
750 siginfo_t info;
751 unsigned short cwd, swd;
752
753 /*
754 * Save the info for the exception handler and clear the error.
755 */
756 task = current;
757 save_init_fpu(task);
758 task->thread.trap_no = 16;
759 task->thread.error_code = 0;
760 info.si_signo = SIGFPE;
761 info.si_errno = 0;
762 info.si_code = __SI_FAULT;
763 info.si_addr = eip;
764 /*
765 * (~cwd & swd) will mask out exceptions that are not set to unmasked
766 * status. 0x3f is the exception bits in these regs, 0x200 is the
767 * C1 reg you need in case of a stack fault, 0x040 is the stack
768 * fault bit. We should only be taking one exception at a time,
769 * so if this combination doesn't produce any single exception,
770 * then we have a bad program that isn't syncronizing its FPU usage
771 * and it will suffer the consequences since we won't be able to
772 * fully reproduce the context of the exception
773 */
774 cwd = get_fpu_cwd(task);
775 swd = get_fpu_swd(task);
776 switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
777 case 0x000:
778 default:
779 break;
780 case 0x001: /* Invalid Op */
781 case 0x041: /* Stack Fault */
782 case 0x241: /* Stack Fault | Direction */
783 info.si_code = FPE_FLTINV;
784 /* Should we clear the SF or let user space do it ???? */
785 break;
786 case 0x002: /* Denormalize */
787 case 0x010: /* Underflow */
788 info.si_code = FPE_FLTUND;
789 break;
790 case 0x004: /* Zero Divide */
791 info.si_code = FPE_FLTDIV;
792 break;
793 case 0x008: /* Overflow */
794 info.si_code = FPE_FLTOVF;
795 break;
796 case 0x020: /* Precision */
797 info.si_code = FPE_FLTRES;
798 break;
799 }
800 force_sig_info(SIGFPE, &info, task);
801}
802
803fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
804{
805 ignore_fpu_irq = 1;
806 math_error((void __user *)regs->eip);
807}
808
809static void simd_math_error(void __user *eip)
810{
811 struct task_struct * task;
812 siginfo_t info;
813 unsigned short mxcsr;
814
815 /*
816 * Save the info for the exception handler and clear the error.
817 */
818 task = current;
819 save_init_fpu(task);
820 task->thread.trap_no = 19;
821 task->thread.error_code = 0;
822 info.si_signo = SIGFPE;
823 info.si_errno = 0;
824 info.si_code = __SI_FAULT;
825 info.si_addr = eip;
826 /*
827 * The SIMD FPU exceptions are handled a little differently, as there
828 * is only a single status/control register. Thus, to determine which
829 * unmasked exception was caught we must mask the exception mask bits
830 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
831 */
832 mxcsr = get_fpu_mxcsr(task);
833 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
834 case 0x000:
835 default:
836 break;
837 case 0x001: /* Invalid Op */
838 info.si_code = FPE_FLTINV;
839 break;
840 case 0x002: /* Denormalize */
841 case 0x010: /* Underflow */
842 info.si_code = FPE_FLTUND;
843 break;
844 case 0x004: /* Zero Divide */
845 info.si_code = FPE_FLTDIV;
846 break;
847 case 0x008: /* Overflow */
848 info.si_code = FPE_FLTOVF;
849 break;
850 case 0x020: /* Precision */
851 info.si_code = FPE_FLTRES;
852 break;
853 }
854 force_sig_info(SIGFPE, &info, task);
855}
856
857fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
858 long error_code)
859{
860 if (cpu_has_xmm) {
861 /* Handle SIMD FPU exceptions on PIII+ processors. */
862 ignore_fpu_irq = 1;
863 simd_math_error((void __user *)regs->eip);
864 } else {
865 /*
866 * Handle strange cache flush from user space exception
867 * in all other cases. This is undocumented behaviour.
868 */
869 if (regs->eflags & VM_MASK) {
870 handle_vm86_fault((struct kernel_vm86_regs *)regs,
871 error_code);
872 return;
873 }
874 die_if_kernel("cache flush denied", regs, error_code);
875 current->thread.trap_no = 19;
876 current->thread.error_code = error_code;
877 force_sig(SIGSEGV, current);
878 }
879}
880
881fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
882 long error_code)
883{
884#if 0
885 /* No need to warn about this any longer. */
886 printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
887#endif
888}
889
890fastcall void setup_x86_bogus_stack(unsigned char * stk)
891{
892 unsigned long *switch16_ptr, *switch32_ptr;
893 struct pt_regs *regs;
894 unsigned long stack_top, stack_bot;
895 unsigned short iret_frame16_off;
896 int cpu = smp_processor_id();
897 /* reserve the space on 32bit stack for the magic switch16 pointer */
898 memmove(stk, stk + 8, sizeof(struct pt_regs));
899 switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
900 regs = (struct pt_regs *)stk;
901 /* now the switch32 on 16bit stack */
902 stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
903 stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
904 switch32_ptr = (unsigned long *)(stack_top - 8);
905 iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
906 /* copy iret frame on 16bit stack */
907 memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
908 /* fill in the switch pointers */
909 switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
910 switch16_ptr[1] = __ESPFIX_SS;
911 switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
912 8 - CPU_16BIT_STACK_SIZE;
913 switch32_ptr[1] = __KERNEL_DS;
914}
915
916fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
917{
918 unsigned long *switch32_ptr;
919 unsigned char *stack16, *stack32;
920 unsigned long stack_top, stack_bot;
921 int len;
922 int cpu = smp_processor_id();
923 stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
924 stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
925 switch32_ptr = (unsigned long *)(stack_top - 8);
926 /* copy the data from 16bit stack to 32bit stack */
927 len = CPU_16BIT_STACK_SIZE - 8 - sp;
928 stack16 = (unsigned char *)(stack_bot + sp);
929 stack32 = (unsigned char *)
930 (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
931 memcpy(stack32, stack16, len);
932 return stack32;
933}
934
935/*
936 * 'math_state_restore()' saves the current math information in the
937 * old math state array, and gets the new ones from the current task
938 *
939 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
940 * Don't touch unless you *really* know how it works.
941 *
942 * Must be called with kernel preemption disabled (in this case,
943 * local interrupts are disabled at the call-site in entry.S).
944 */
945asmlinkage void math_state_restore(struct pt_regs regs)
946{
947 struct thread_info *thread = current_thread_info();
948 struct task_struct *tsk = thread->task;
949
950 clts(); /* Allow maths ops (or we recurse) */
951 if (!tsk_used_math(tsk))
952 init_fpu(tsk);
953 restore_fpu(tsk);
954 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
955}
956
957#ifndef CONFIG_MATH_EMULATION
958
959asmlinkage void math_emulate(long arg)
960{
961 printk("math-emulation not enabled and no coprocessor found.\n");
962 printk("killing %s.\n",current->comm);
963 force_sig(SIGFPE,current);
964 schedule();
965}
966
967#endif /* CONFIG_MATH_EMULATION */
968
969#ifdef CONFIG_X86_F00F_BUG
970void __init trap_init_f00f_bug(void)
971{
972 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
973
974 /*
975 * Update the IDT descriptor and reload the IDT so that
976 * it uses the read-only mapped virtual address.
977 */
978 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
979 __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
980}
981#endif
982
983#define _set_gate(gate_addr,type,dpl,addr,seg) \
984do { \
985 int __d0, __d1; \
986 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
987 "movw %4,%%dx\n\t" \
988 "movl %%eax,%0\n\t" \
989 "movl %%edx,%1" \
990 :"=m" (*((long *) (gate_addr))), \
991 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
992 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
993 "3" ((char *) (addr)),"2" ((seg) << 16)); \
994} while (0)
995
996
997/*
998 * This needs to use 'idt_table' rather than 'idt', and
999 * thus use the _nonmapped_ version of the IDT, as the
1000 * Pentium F0 0F bugfix can have resulted in the mapped
1001 * IDT being write-protected.
1002 */
1003void set_intr_gate(unsigned int n, void *addr)
1004{
1005 _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
1006}
1007
1008/*
1009 * This routine sets up an interrupt gate at directory privilege level 3.
1010 */
1011static inline void set_system_intr_gate(unsigned int n, void *addr)
1012{
1013 _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
1014}
1015
1016static void __init set_trap_gate(unsigned int n, void *addr)
1017{
1018 _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
1019}
1020
1021static void __init set_system_gate(unsigned int n, void *addr)
1022{
1023 _set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
1024}
1025
1026static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
1027{
1028 _set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
1029}
1030
1031
1032void __init trap_init(void)
1033{
1034#ifdef CONFIG_EISA
1035 void __iomem *p = ioremap(0x0FFFD9, 4);
1036 if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
1037 EISA_bus = 1;
1038 }
1039 iounmap(p);
1040#endif
1041
1042#ifdef CONFIG_X86_LOCAL_APIC
1043 init_apic_mappings();
1044#endif
1045
1046 set_trap_gate(0,&divide_error);
1047 set_intr_gate(1,&debug);
1048 set_intr_gate(2,&nmi);
1049 set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
1050 set_system_gate(4,&overflow);
1051 set_system_gate(5,&bounds);
1052 set_trap_gate(6,&invalid_op);
1053 set_trap_gate(7,&device_not_available);
1054 set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
1055 set_trap_gate(9,&coprocessor_segment_overrun);
1056 set_trap_gate(10,&invalid_TSS);
1057 set_trap_gate(11,&segment_not_present);
1058 set_trap_gate(12,&stack_segment);
1059 set_trap_gate(13,&general_protection);
1060 set_intr_gate(14,&page_fault);
1061 set_trap_gate(15,&spurious_interrupt_bug);
1062 set_trap_gate(16,&coprocessor_error);
1063 set_trap_gate(17,&alignment_check);
1064#ifdef CONFIG_X86_MCE
1065 set_trap_gate(18,&machine_check);
1066#endif
1067 set_trap_gate(19,&simd_coprocessor_error);
1068
1069 set_system_gate(SYSCALL_VECTOR,&system_call);
1070
1071 /*
1072 * Should be a barrier for any external CPU state.
1073 */
1074 cpu_init();
1075
1076 trap_init_hook();
1077}
1078
1079static int __init kstack_setup(char *s)
1080{
1081 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1082 return 0;
1083}
1084__setup("kstack=", kstack_setup);
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
new file mode 100644
index 000000000000..2f3d52dacff7
--- /dev/null
+++ b/arch/i386/kernel/vm86.c
@@ -0,0 +1,804 @@
1/*
2 * linux/kernel/vm86.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 *
6 * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
7 * stack - Manfred Spraul <manfreds@colorfullife.com>
8 *
9 * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle
10 * them correctly. Now the emulation will be in a
11 * consistent state after stackfaults - Kasper Dupont
12 * <kasperd@daimi.au.dk>
13 *
14 * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
15 * <kasperd@daimi.au.dk>
16 *
17 * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
18 * caused by Kasper Dupont's changes - Stas Sergeev
19 *
20 * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
21 * Kasper Dupont <kasperd@daimi.au.dk>
22 *
23 * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
24 * Kasper Dupont <kasperd@daimi.au.dk>
25 *
26 * 9 apr 2002 - Changed stack access macros to jump to a label
27 * instead of returning to userspace. This simplifies
28 * do_int, and is needed by handle_vm6_fault. Kasper
29 * Dupont <kasperd@daimi.au.dk>
30 *
31 */
32
33#include <linux/config.h>
34#include <linux/errno.h>
35#include <linux/interrupt.h>
36#include <linux/sched.h>
37#include <linux/kernel.h>
38#include <linux/signal.h>
39#include <linux/string.h>
40#include <linux/mm.h>
41#include <linux/smp.h>
42#include <linux/smp_lock.h>
43#include <linux/highmem.h>
44#include <linux/ptrace.h>
45
46#include <asm/uaccess.h>
47#include <asm/io.h>
48#include <asm/tlbflush.h>
49#include <asm/irq.h>
50
51/*
52 * Known problems:
53 *
54 * Interrupt handling is not guaranteed:
55 * - a real x86 will disable all interrupts for one instruction
56 * after a "mov ss,xx" to make stack handling atomic even without
57 * the 'lss' instruction. We can't guarantee this in v86 mode,
58 * as the next instruction might result in a page fault or similar.
59 * - a real x86 will have interrupts disabled for one instruction
60 * past the 'sti' that enables them. We don't bother with all the
61 * details yet.
62 *
63 * Let's hope these problems do not actually matter for anything.
64 */
65
66
67#define KVM86 ((struct kernel_vm86_struct *)regs)
68#define VMPI KVM86->vm86plus
69
70
71/*
72 * 8- and 16-bit register defines..
73 */
74#define AL(regs) (((unsigned char *)&((regs)->eax))[0])
75#define AH(regs) (((unsigned char *)&((regs)->eax))[1])
76#define IP(regs) (*(unsigned short *)&((regs)->eip))
77#define SP(regs) (*(unsigned short *)&((regs)->esp))
78
79/*
80 * virtual flags (16 and 32-bit versions)
81 */
82#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
83#define VEFLAGS (current->thread.v86flags)
84
85#define set_flags(X,new,mask) \
86((X) = ((X) & ~(mask)) | ((new) & (mask)))
87
88#define SAFE_MASK (0xDD5)
89#define RETURN_MASK (0xDFF)
90
91#define VM86_REGS_PART2 orig_eax
92#define VM86_REGS_SIZE1 \
93 ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
94#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
95
96struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
97struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
98{
99 struct tss_struct *tss;
100 struct pt_regs *ret;
101 unsigned long tmp;
102
103 /*
104 * This gets called from entry.S with interrupts disabled, but
105 * from process context. Enable interrupts here, before trying
106 * to access user space.
107 */
108 local_irq_enable();
109
110 if (!current->thread.vm86_info) {
111 printk("no vm86_info: BAD\n");
112 do_exit(SIGSEGV);
113 }
114 set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
115 tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
116 tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
117 &regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
118 tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
119 if (tmp) {
120 printk("vm86: could not access userspace vm86_info\n");
121 do_exit(SIGSEGV);
122 }
123
124 tss = &per_cpu(init_tss, get_cpu());
125 current->thread.esp0 = current->thread.saved_esp0;
126 current->thread.sysenter_cs = __KERNEL_CS;
127 load_esp0(tss, &current->thread);
128 current->thread.saved_esp0 = 0;
129 put_cpu();
130
131 loadsegment(fs, current->thread.saved_fs);
132 loadsegment(gs, current->thread.saved_gs);
133 ret = KVM86->regs32;
134 return ret;
135}
136
137static void mark_screen_rdonly(struct task_struct * tsk)
138{
139 pgd_t *pgd;
140 pud_t *pud;
141 pmd_t *pmd;
142 pte_t *pte, *mapped;
143 int i;
144
145 preempt_disable();
146 spin_lock(&tsk->mm->page_table_lock);
147 pgd = pgd_offset(tsk->mm, 0xA0000);
148 if (pgd_none_or_clear_bad(pgd))
149 goto out;
150 pud = pud_offset(pgd, 0xA0000);
151 if (pud_none_or_clear_bad(pud))
152 goto out;
153 pmd = pmd_offset(pud, 0xA0000);
154 if (pmd_none_or_clear_bad(pmd))
155 goto out;
156 pte = mapped = pte_offset_map(pmd, 0xA0000);
157 for (i = 0; i < 32; i++) {
158 if (pte_present(*pte))
159 set_pte(pte, pte_wrprotect(*pte));
160 pte++;
161 }
162 pte_unmap(mapped);
163out:
164 spin_unlock(&tsk->mm->page_table_lock);
165 preempt_enable();
166 flush_tlb();
167}
168
169
170
171static int do_vm86_irq_handling(int subfunction, int irqnumber);
172static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
173
174asmlinkage int sys_vm86old(struct pt_regs regs)
175{
176 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
177 struct kernel_vm86_struct info; /* declare this _on top_,
178 * this avoids wasting of stack space.
179 * This remains on the stack until we
180 * return to 32 bit user space.
181 */
182 struct task_struct *tsk;
183 int tmp, ret = -EPERM;
184
185 tsk = current;
186 if (tsk->thread.saved_esp0)
187 goto out;
188 tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
189 tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
190 (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
191 ret = -EFAULT;
192 if (tmp)
193 goto out;
194 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
195 info.regs32 = &regs;
196 tsk->thread.vm86_info = v86;
197 do_sys_vm86(&info, tsk);
198 ret = 0; /* we never return here */
199out:
200 return ret;
201}
202
203
204asmlinkage int sys_vm86(struct pt_regs regs)
205{
206 struct kernel_vm86_struct info; /* declare this _on top_,
207 * this avoids wasting of stack space.
208 * This remains on the stack until we
209 * return to 32 bit user space.
210 */
211 struct task_struct *tsk;
212 int tmp, ret;
213 struct vm86plus_struct __user *v86;
214
215 tsk = current;
216 switch (regs.ebx) {
217 case VM86_REQUEST_IRQ:
218 case VM86_FREE_IRQ:
219 case VM86_GET_IRQ_BITS:
220 case VM86_GET_AND_RESET_IRQ:
221 ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
222 goto out;
223 case VM86_PLUS_INSTALL_CHECK:
224 /* NOTE: on old vm86 stuff this will return the error
225 from verify_area(), because the subfunction is
226 interpreted as (invalid) address to vm86_struct.
227 So the installation check works.
228 */
229 ret = 0;
230 goto out;
231 }
232
233 /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
234 ret = -EPERM;
235 if (tsk->thread.saved_esp0)
236 goto out;
237 v86 = (struct vm86plus_struct __user *)regs.ecx;
238 tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
239 tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
240 (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
241 ret = -EFAULT;
242 if (tmp)
243 goto out;
244 info.regs32 = &regs;
245 info.vm86plus.is_vm86pus = 1;
246 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
247 do_sys_vm86(&info, tsk);
248 ret = 0; /* we never return here */
249out:
250 return ret;
251}
252
253
254static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
255{
256 struct tss_struct *tss;
257/*
258 * make sure the vm86() system call doesn't try to do anything silly
259 */
260 info->regs.__null_ds = 0;
261 info->regs.__null_es = 0;
262
263/* we are clearing fs,gs later just before "jmp resume_userspace",
264 * because starting with Linux 2.1.x they aren't no longer saved/restored
265 */
266
267/*
268 * The eflags register is also special: we cannot trust that the user
269 * has set it up safely, so this makes sure interrupt etc flags are
270 * inherited from protected mode.
271 */
272 VEFLAGS = info->regs.eflags;
273 info->regs.eflags &= SAFE_MASK;
274 info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
275 info->regs.eflags |= VM_MASK;
276
277 switch (info->cpu_type) {
278 case CPU_286:
279 tsk->thread.v86mask = 0;
280 break;
281 case CPU_386:
282 tsk->thread.v86mask = NT_MASK | IOPL_MASK;
283 break;
284 case CPU_486:
285 tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
286 break;
287 default:
288 tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
289 break;
290 }
291
292/*
293 * Save old state, set default return value (%eax) to 0
294 */
295 info->regs32->eax = 0;
296 tsk->thread.saved_esp0 = tsk->thread.esp0;
297 asm volatile("movl %%fs,%0":"=m" (tsk->thread.saved_fs));
298 asm volatile("movl %%gs,%0":"=m" (tsk->thread.saved_gs));
299
300 tss = &per_cpu(init_tss, get_cpu());
301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
302 if (cpu_has_sep)
303 tsk->thread.sysenter_cs = 0;
304 load_esp0(tss, &tsk->thread);
305 put_cpu();
306
307 tsk->thread.screen_bitmap = info->screen_bitmap;
308 if (info->flags & VM86_SCREEN_BITMAP)
309 mark_screen_rdonly(tsk);
310 __asm__ __volatile__(
311 "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
312 "movl %0,%%esp\n\t"
313 "movl %1,%%ebp\n\t"
314 "jmp resume_userspace"
315 : /* no outputs */
316 :"r" (&info->regs), "r" (tsk->thread_info) : "ax");
317 /* we never return here */
318}
319
320static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
321{
322 struct pt_regs * regs32;
323
324 regs32 = save_v86_state(regs16);
325 regs32->eax = retval;
326 __asm__ __volatile__("movl %0,%%esp\n\t"
327 "movl %1,%%ebp\n\t"
328 "jmp resume_userspace"
329 : : "r" (regs32), "r" (current_thread_info()));
330}
331
332static inline void set_IF(struct kernel_vm86_regs * regs)
333{
334 VEFLAGS |= VIF_MASK;
335 if (VEFLAGS & VIP_MASK)
336 return_to_32bit(regs, VM86_STI);
337}
338
339static inline void clear_IF(struct kernel_vm86_regs * regs)
340{
341 VEFLAGS &= ~VIF_MASK;
342}
343
344static inline void clear_TF(struct kernel_vm86_regs * regs)
345{
346 regs->eflags &= ~TF_MASK;
347}
348
349static inline void clear_AC(struct kernel_vm86_regs * regs)
350{
351 regs->eflags &= ~AC_MASK;
352}
353
354/* It is correct to call set_IF(regs) from the set_vflags_*
355 * functions. However someone forgot to call clear_IF(regs)
356 * in the opposite case.
357 * After the command sequence CLI PUSHF STI POPF you should
358 * end up with interrups disabled, but you ended up with
359 * interrupts enabled.
360 * ( I was testing my own changes, but the only bug I
361 * could find was in a function I had not changed. )
362 * [KD]
363 */
364
365static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
366{
367 set_flags(VEFLAGS, eflags, current->thread.v86mask);
368 set_flags(regs->eflags, eflags, SAFE_MASK);
369 if (eflags & IF_MASK)
370 set_IF(regs);
371 else
372 clear_IF(regs);
373}
374
375static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
376{
377 set_flags(VFLAGS, flags, current->thread.v86mask);
378 set_flags(regs->eflags, flags, SAFE_MASK);
379 if (flags & IF_MASK)
380 set_IF(regs);
381 else
382 clear_IF(regs);
383}
384
385static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
386{
387 unsigned long flags = regs->eflags & RETURN_MASK;
388
389 if (VEFLAGS & VIF_MASK)
390 flags |= IF_MASK;
391 flags |= IOPL_MASK;
392 return flags | (VEFLAGS & current->thread.v86mask);
393}
394
395static inline int is_revectored(int nr, struct revectored_struct * bitmap)
396{
397 __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
398 :"=r" (nr)
399 :"m" (*bitmap),"r" (nr));
400 return nr;
401}
402
403#define val_byte(val, n) (((__u8 *)&val)[n])
404
405#define pushb(base, ptr, val, err_label) \
406 do { \
407 __u8 __val = val; \
408 ptr--; \
409 if (put_user(__val, base + ptr) < 0) \
410 goto err_label; \
411 } while(0)
412
413#define pushw(base, ptr, val, err_label) \
414 do { \
415 __u16 __val = val; \
416 ptr--; \
417 if (put_user(val_byte(__val, 1), base + ptr) < 0) \
418 goto err_label; \
419 ptr--; \
420 if (put_user(val_byte(__val, 0), base + ptr) < 0) \
421 goto err_label; \
422 } while(0)
423
424#define pushl(base, ptr, val, err_label) \
425 do { \
426 __u32 __val = val; \
427 ptr--; \
428 if (put_user(val_byte(__val, 3), base + ptr) < 0) \
429 goto err_label; \
430 ptr--; \
431 if (put_user(val_byte(__val, 2), base + ptr) < 0) \
432 goto err_label; \
433 ptr--; \
434 if (put_user(val_byte(__val, 1), base + ptr) < 0) \
435 goto err_label; \
436 ptr--; \
437 if (put_user(val_byte(__val, 0), base + ptr) < 0) \
438 goto err_label; \
439 } while(0)
440
441#define popb(base, ptr, err_label) \
442 ({ \
443 __u8 __res; \
444 if (get_user(__res, base + ptr) < 0) \
445 goto err_label; \
446 ptr++; \
447 __res; \
448 })
449
450#define popw(base, ptr, err_label) \
451 ({ \
452 __u16 __res; \
453 if (get_user(val_byte(__res, 0), base + ptr) < 0) \
454 goto err_label; \
455 ptr++; \
456 if (get_user(val_byte(__res, 1), base + ptr) < 0) \
457 goto err_label; \
458 ptr++; \
459 __res; \
460 })
461
462#define popl(base, ptr, err_label) \
463 ({ \
464 __u32 __res; \
465 if (get_user(val_byte(__res, 0), base + ptr) < 0) \
466 goto err_label; \
467 ptr++; \
468 if (get_user(val_byte(__res, 1), base + ptr) < 0) \
469 goto err_label; \
470 ptr++; \
471 if (get_user(val_byte(__res, 2), base + ptr) < 0) \
472 goto err_label; \
473 ptr++; \
474 if (get_user(val_byte(__res, 3), base + ptr) < 0) \
475 goto err_label; \
476 ptr++; \
477 __res; \
478 })
479
480/* There are so many possible reasons for this function to return
481 * VM86_INTx, so adding another doesn't bother me. We can expect
482 * userspace programs to be able to handle it. (Getting a problem
483 * in userspace is always better than an Oops anyway.) [KD]
484 */
485static void do_int(struct kernel_vm86_regs *regs, int i,
486 unsigned char __user * ssp, unsigned short sp)
487{
488 unsigned long __user *intr_ptr;
489 unsigned long segoffs;
490
491 if (regs->cs == BIOSSEG)
492 goto cannot_handle;
493 if (is_revectored(i, &KVM86->int_revectored))
494 goto cannot_handle;
495 if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored))
496 goto cannot_handle;
497 intr_ptr = (unsigned long __user *) (i << 2);
498 if (get_user(segoffs, intr_ptr))
499 goto cannot_handle;
500 if ((segoffs >> 16) == BIOSSEG)
501 goto cannot_handle;
502 pushw(ssp, sp, get_vflags(regs), cannot_handle);
503 pushw(ssp, sp, regs->cs, cannot_handle);
504 pushw(ssp, sp, IP(regs), cannot_handle);
505 regs->cs = segoffs >> 16;
506 SP(regs) -= 6;
507 IP(regs) = segoffs & 0xffff;
508 clear_TF(regs);
509 clear_IF(regs);
510 clear_AC(regs);
511 return;
512
513cannot_handle:
514 return_to_32bit(regs, VM86_INTx + (i << 8));
515}
516
517int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno)
518{
519 if (VMPI.is_vm86pus) {
520 if ( (trapno==3) || (trapno==1) )
521 return_to_32bit(regs, VM86_TRAP + (trapno << 8));
522 do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs));
523 return 0;
524 }
525 if (trapno !=1)
526 return 1; /* we let this handle by the calling routine */
527 if (current->ptrace & PT_PTRACED) {
528 unsigned long flags;
529 spin_lock_irqsave(&current->sighand->siglock, flags);
530 sigdelset(&current->blocked, SIGTRAP);
531 recalc_sigpending();
532 spin_unlock_irqrestore(&current->sighand->siglock, flags);
533 }
534 send_sig(SIGTRAP, current, 1);
535 current->thread.trap_no = trapno;
536 current->thread.error_code = error_code;
537 return 0;
538}
539
540void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
541{
542 unsigned char opcode;
543 unsigned char __user *csp;
544 unsigned char __user *ssp;
545 unsigned short ip, sp;
546 int data32, pref_done;
547
548#define CHECK_IF_IN_TRAP \
549 if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
550 newflags |= TF_MASK
551#define VM86_FAULT_RETURN do { \
552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
553 return_to_32bit(regs, VM86_PICRETURN); \
554 return; } while (0)
555
556 csp = (unsigned char __user *) (regs->cs << 4);
557 ssp = (unsigned char __user *) (regs->ss << 4);
558 sp = SP(regs);
559 ip = IP(regs);
560
561 data32 = 0;
562 pref_done = 0;
563 do {
564 switch (opcode = popb(csp, ip, simulate_sigsegv)) {
565 case 0x66: /* 32-bit data */ data32=1; break;
566 case 0x67: /* 32-bit address */ break;
567 case 0x2e: /* CS */ break;
568 case 0x3e: /* DS */ break;
569 case 0x26: /* ES */ break;
570 case 0x36: /* SS */ break;
571 case 0x65: /* GS */ break;
572 case 0x64: /* FS */ break;
573 case 0xf2: /* repnz */ break;
574 case 0xf3: /* rep */ break;
575 default: pref_done = 1;
576 }
577 } while (!pref_done);
578
579 switch (opcode) {
580
581 /* pushf */
582 case 0x9c:
583 if (data32) {
584 pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
585 SP(regs) -= 4;
586 } else {
587 pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
588 SP(regs) -= 2;
589 }
590 IP(regs) = ip;
591 VM86_FAULT_RETURN;
592
593 /* popf */
594 case 0x9d:
595 {
596 unsigned long newflags;
597 if (data32) {
598 newflags=popl(ssp, sp, simulate_sigsegv);
599 SP(regs) += 4;
600 } else {
601 newflags = popw(ssp, sp, simulate_sigsegv);
602 SP(regs) += 2;
603 }
604 IP(regs) = ip;
605 CHECK_IF_IN_TRAP;
606 if (data32) {
607 set_vflags_long(newflags, regs);
608 } else {
609 set_vflags_short(newflags, regs);
610 }
611 VM86_FAULT_RETURN;
612 }
613
614 /* int xx */
615 case 0xcd: {
616 int intno=popb(csp, ip, simulate_sigsegv);
617 IP(regs) = ip;
618 if (VMPI.vm86dbg_active) {
619 if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] )
620 return_to_32bit(regs, VM86_INTx + (intno << 8));
621 }
622 do_int(regs, intno, ssp, sp);
623 return;
624 }
625
626 /* iret */
627 case 0xcf:
628 {
629 unsigned long newip;
630 unsigned long newcs;
631 unsigned long newflags;
632 if (data32) {
633 newip=popl(ssp, sp, simulate_sigsegv);
634 newcs=popl(ssp, sp, simulate_sigsegv);
635 newflags=popl(ssp, sp, simulate_sigsegv);
636 SP(regs) += 12;
637 } else {
638 newip = popw(ssp, sp, simulate_sigsegv);
639 newcs = popw(ssp, sp, simulate_sigsegv);
640 newflags = popw(ssp, sp, simulate_sigsegv);
641 SP(regs) += 6;
642 }
643 IP(regs) = newip;
644 regs->cs = newcs;
645 CHECK_IF_IN_TRAP;
646 if (data32) {
647 set_vflags_long(newflags, regs);
648 } else {
649 set_vflags_short(newflags, regs);
650 }
651 VM86_FAULT_RETURN;
652 }
653
654 /* cli */
655 case 0xfa:
656 IP(regs) = ip;
657 clear_IF(regs);
658 VM86_FAULT_RETURN;
659
660 /* sti */
661 /*
662 * Damn. This is incorrect: the 'sti' instruction should actually
663 * enable interrupts after the /next/ instruction. Not good.
664 *
665 * Probably needs some horsing around with the TF flag. Aiee..
666 */
667 case 0xfb:
668 IP(regs) = ip;
669 set_IF(regs);
670 VM86_FAULT_RETURN;
671
672 default:
673 return_to_32bit(regs, VM86_UNKNOWN);
674 }
675
676 return;
677
678simulate_sigsegv:
679 /* FIXME: After a long discussion with Stas we finally
680 * agreed, that this is wrong. Here we should
681 * really send a SIGSEGV to the user program.
682 * But how do we create the correct context? We
683 * are inside a general protection fault handler
684 * and has just returned from a page fault handler.
685 * The correct context for the signal handler
686 * should be a mixture of the two, but how do we
687 * get the information? [KD]
688 */
689 return_to_32bit(regs, VM86_UNKNOWN);
690}
691
692/* ---------------- vm86 special IRQ passing stuff ----------------- */
693
694#define VM86_IRQNAME "vm86irq"
695
696static struct vm86_irqs {
697 struct task_struct *tsk;
698 int sig;
699} vm86_irqs[16];
700
701static DEFINE_SPINLOCK(irqbits_lock);
702static int irqbits;
703
704#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \
705 | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \
706 | (1 << SIGUNUSED) )
707
708static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
709{
710 int irq_bit;
711 unsigned long flags;
712
713 spin_lock_irqsave(&irqbits_lock, flags);
714 irq_bit = 1 << intno;
715 if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk)
716 goto out;
717 irqbits |= irq_bit;
718 if (vm86_irqs[intno].sig)
719 send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
720 spin_unlock_irqrestore(&irqbits_lock, flags);
721 /*
722 * IRQ will be re-enabled when user asks for the irq (whether
723 * polling or as a result of the signal)
724 */
725 disable_irq(intno);
726 return IRQ_HANDLED;
727
728out:
729 spin_unlock_irqrestore(&irqbits_lock, flags);
730 return IRQ_NONE;
731}
732
733static inline void free_vm86_irq(int irqnumber)
734{
735 unsigned long flags;
736
737 free_irq(irqnumber, NULL);
738 vm86_irqs[irqnumber].tsk = NULL;
739
740 spin_lock_irqsave(&irqbits_lock, flags);
741 irqbits &= ~(1 << irqnumber);
742 spin_unlock_irqrestore(&irqbits_lock, flags);
743}
744
745void release_vm86_irqs(struct task_struct *task)
746{
747 int i;
748 for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
749 if (vm86_irqs[i].tsk == task)
750 free_vm86_irq(i);
751}
752
753static inline int get_and_reset_irq(int irqnumber)
754{
755 int bit;
756 unsigned long flags;
757
758 if (invalid_vm86_irq(irqnumber)) return 0;
759 if (vm86_irqs[irqnumber].tsk != current) return 0;
760 spin_lock_irqsave(&irqbits_lock, flags);
761 bit = irqbits & (1 << irqnumber);
762 irqbits &= ~bit;
763 spin_unlock_irqrestore(&irqbits_lock, flags);
764 if (!bit)
765 return 0;
766 enable_irq(irqnumber);
767 return 1;
768}
769
770
771static int do_vm86_irq_handling(int subfunction, int irqnumber)
772{
773 int ret;
774 switch (subfunction) {
775 case VM86_GET_AND_RESET_IRQ: {
776 return get_and_reset_irq(irqnumber);
777 }
778 case VM86_GET_IRQ_BITS: {
779 return irqbits;
780 }
781 case VM86_REQUEST_IRQ: {
782 int sig = irqnumber >> 8;
783 int irq = irqnumber & 255;
784 if (!capable(CAP_SYS_ADMIN)) return -EPERM;
785 if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
786 if (invalid_vm86_irq(irq)) return -EPERM;
787 if (vm86_irqs[irq].tsk) return -EPERM;
788 ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
789 if (ret) return ret;
790 vm86_irqs[irq].sig = sig;
791 vm86_irqs[irq].tsk = current;
792 return irq;
793 }
794 case VM86_FREE_IRQ: {
795 if (invalid_vm86_irq(irqnumber)) return -EPERM;
796 if (!vm86_irqs[irqnumber].tsk) return 0;
797 if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
798 free_vm86_irq(irqnumber);
799 return 0;
800 }
801 }
802 return -EINVAL;
803}
804
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..e0512cc8bea7
--- /dev/null
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -0,0 +1,134 @@
1/* ld script to make i386 Linux kernel
2 * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
3 */
4
5#include <asm-generic/vmlinux.lds.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8
9OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
10OUTPUT_ARCH(i386)
11ENTRY(startup_32)
12jiffies = jiffies_64;
13SECTIONS
14{
15 . = __PAGE_OFFSET + 0x100000;
16 /* read-only */
17 _text = .; /* Text and read-only data */
18 .text : {
19 *(.text)
20 SCHED_TEXT
21 LOCK_TEXT
22 *(.fixup)
23 *(.gnu.warning)
24 } = 0x9090
25
26 _etext = .; /* End of text section */
27
28 . = ALIGN(16); /* Exception table */
29 __start___ex_table = .;
30 __ex_table : { *(__ex_table) }
31 __stop___ex_table = .;
32
33 RODATA
34
35 /* writeable */
36 .data : { /* Data */
37 *(.data)
38 CONSTRUCTORS
39 }
40
41 . = ALIGN(4096);
42 __nosave_begin = .;
43 .data_nosave : { *(.data.nosave) }
44 . = ALIGN(4096);
45 __nosave_end = .;
46
47 . = ALIGN(4096);
48 .data.page_aligned : { *(.data.idt) }
49
50 . = ALIGN(32);
51 .data.cacheline_aligned : { *(.data.cacheline_aligned) }
52
53 _edata = .; /* End of data section */
54
55 . = ALIGN(THREAD_SIZE); /* init_task */
56 .data.init_task : { *(.data.init_task) }
57
58 /* will be freed after init */
59 . = ALIGN(4096); /* Init code and data */
60 __init_begin = .;
61 .init.text : {
62 _sinittext = .;
63 *(.init.text)
64 _einittext = .;
65 }
66 .init.data : { *(.init.data) }
67 . = ALIGN(16);
68 __setup_start = .;
69 .init.setup : { *(.init.setup) }
70 __setup_end = .;
71 __initcall_start = .;
72 .initcall.init : {
73 *(.initcall1.init)
74 *(.initcall2.init)
75 *(.initcall3.init)
76 *(.initcall4.init)
77 *(.initcall5.init)
78 *(.initcall6.init)
79 *(.initcall7.init)
80 }
81 __initcall_end = .;
82 __con_initcall_start = .;
83 .con_initcall.init : { *(.con_initcall.init) }
84 __con_initcall_end = .;
85 SECURITY_INIT
86 . = ALIGN(4);
87 __alt_instructions = .;
88 .altinstructions : { *(.altinstructions) }
89 __alt_instructions_end = .;
90 .altinstr_replacement : { *(.altinstr_replacement) }
91 /* .exit.text is discard at runtime, not link time, to deal with references
92 from .altinstructions and .eh_frame */
93 .exit.text : { *(.exit.text) }
94 .exit.data : { *(.exit.data) }
95 . = ALIGN(4096);
96 __initramfs_start = .;
97 .init.ramfs : { *(.init.ramfs) }
98 __initramfs_end = .;
99 . = ALIGN(32);
100 __per_cpu_start = .;
101 .data.percpu : { *(.data.percpu) }
102 __per_cpu_end = .;
103 . = ALIGN(4096);
104 __init_end = .;
105 /* freed after init ends here */
106
107 __bss_start = .; /* BSS */
108 .bss : {
109 *(.bss.page_aligned)
110 *(.bss)
111 }
112 . = ALIGN(4);
113 __bss_stop = .;
114
115 _end = . ;
116
117 /* This is where the kernel creates the early boot page tables */
118 . = ALIGN(4096);
119 pg0 = .;
120
121 /* Sections to be discarded */
122 /DISCARD/ : {
123 *(.exitcall.exit)
124 }
125
126 /* Stabs debugging sections. */
127 .stab 0 : { *(.stab) }
128 .stabstr 0 : { *(.stabstr) }
129 .stab.excl 0 : { *(.stab.excl) }
130 .stab.exclstr 0 : { *(.stab.exclstr) }
131 .stab.index 0 : { *(.stab.index) }
132 .stab.indexstr 0 : { *(.stab.indexstr) }
133 .comment 0 : { *(.comment) }
134}
diff --git a/arch/i386/kernel/vsyscall-int80.S b/arch/i386/kernel/vsyscall-int80.S
new file mode 100644
index 000000000000..530d0525e5e2
--- /dev/null
+++ b/arch/i386/kernel/vsyscall-int80.S
@@ -0,0 +1,53 @@
1/*
2 * Code for the vsyscall page. This version uses the old int $0x80 method.
3 *
4 * NOTE:
5 * 1) __kernel_vsyscall _must_ be first in this page.
6 * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
7 * for details.
8 */
9
10 .text
11 .globl __kernel_vsyscall
12 .type __kernel_vsyscall,@function
13__kernel_vsyscall:
14.LSTART_vsyscall:
15 int $0x80
16 ret
17.LEND_vsyscall:
18 .size __kernel_vsyscall,.-.LSTART_vsyscall
19 .previous
20
21 .section .eh_frame,"a",@progbits
22.LSTARTFRAMEDLSI:
23 .long .LENDCIEDLSI-.LSTARTCIEDLSI
24.LSTARTCIEDLSI:
25 .long 0 /* CIE ID */
26 .byte 1 /* Version number */
27 .string "zR" /* NUL-terminated augmentation string */
28 .uleb128 1 /* Code alignment factor */
29 .sleb128 -4 /* Data alignment factor */
30 .byte 8 /* Return address register column */
31 .uleb128 1 /* Augmentation value length */
32 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
33 .byte 0x0c /* DW_CFA_def_cfa */
34 .uleb128 4
35 .uleb128 4
36 .byte 0x88 /* DW_CFA_offset, column 0x8 */
37 .uleb128 1
38 .align 4
39.LENDCIEDLSI:
40 .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
41.LSTARTFDEDLSI:
42 .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
43 .long .LSTART_vsyscall-. /* PC-relative start address */
44 .long .LEND_vsyscall-.LSTART_vsyscall
45 .uleb128 0
46 .align 4
47.LENDFDEDLSI:
48 .previous
49
50/*
51 * Get the common code for the sigreturn entry points.
52 */
53#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
new file mode 100644
index 000000000000..c8fcf75b9be3
--- /dev/null
+++ b/arch/i386/kernel/vsyscall-sigreturn.S
@@ -0,0 +1,142 @@
1/*
2 * Common code for the sigreturn entry points on the vsyscall page.
3 * So far this code is the same for both int80 and sysenter versions.
4 * This file is #include'd by vsyscall-*.S to define them after the
5 * vsyscall entry point. The kernel assumes that the addresses of these
6 * routines are constant for all vsyscall implementations.
7 */
8
9#include <asm/unistd.h>
10#include <asm/asm_offsets.h>
11
12
13/* XXX
14 Should these be named "_sigtramp" or something?
15*/
16
17 .text
18 .org __kernel_vsyscall+32
19 .globl __kernel_sigreturn
20 .type __kernel_sigreturn,@function
21__kernel_sigreturn:
22.LSTART_sigreturn:
23 popl %eax /* XXX does this mean it needs unwind info? */
24 movl $__NR_sigreturn, %eax
25 int $0x80
26.LEND_sigreturn:
27 .size __kernel_sigreturn,.-.LSTART_sigreturn
28
29 .balign 32
30 .globl __kernel_rt_sigreturn
31 .type __kernel_rt_sigreturn,@function
32__kernel_rt_sigreturn:
33.LSTART_rt_sigreturn:
34 movl $__NR_rt_sigreturn, %eax
35 int $0x80
36.LEND_rt_sigreturn:
37 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
38 .previous
39
40 .section .eh_frame,"a",@progbits
41.LSTARTFRAMEDLSI1:
42 .long .LENDCIEDLSI1-.LSTARTCIEDLSI1
43.LSTARTCIEDLSI1:
44 .long 0 /* CIE ID */
45 .byte 1 /* Version number */
46 .string "zR" /* NUL-terminated augmentation string */
47 .uleb128 1 /* Code alignment factor */
48 .sleb128 -4 /* Data alignment factor */
49 .byte 8 /* Return address register column */
50 .uleb128 1 /* Augmentation value length */
51 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
52 .byte 0 /* DW_CFA_nop */
53 .align 4
54.LENDCIEDLSI1:
55 .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
56.LSTARTFDEDLSI1:
57 .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
58 /* HACK: The dwarf2 unwind routines will subtract 1 from the
59 return address to get an address in the middle of the
60 presumed call instruction. Since we didn't get here via
61 a call, we need to include the nop before the real start
62 to make up for it. */
63 .long .LSTART_sigreturn-1-. /* PC-relative start address */
64 .long .LEND_sigreturn-.LSTART_sigreturn+1
65 .uleb128 0 /* Augmentation */
66 /* What follows are the instructions for the table generation.
67 We record the locations of each register saved. This is
68 complicated by the fact that the "CFA" is always assumed to
69 be the value of the stack pointer in the caller. This means
70 that we must define the CFA of this body of code to be the
71 saved value of the stack pointer in the sigcontext. Which
72 also means that there is no fixed relation to the other
73 saved registers, which means that we must use DW_CFA_expression
74 to compute their addresses. It also means that when we
75 adjust the stack with the popl, we have to do it all over again. */
76
77#define do_cfa_expr(offset) \
78 .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
79 .uleb128 1f-0f; /* length */ \
800: .byte 0x74; /* DW_OP_breg4 */ \
81 .sleb128 offset; /* offset */ \
82 .byte 0x06; /* DW_OP_deref */ \
831:
84
85#define do_expr(regno, offset) \
86 .byte 0x10; /* DW_CFA_expression */ \
87 .uleb128 regno; /* regno */ \
88 .uleb128 1f-0f; /* length */ \
890: .byte 0x74; /* DW_OP_breg4 */ \
90 .sleb128 offset; /* offset */ \
911:
92
93 do_cfa_expr(SIGCONTEXT_esp+4)
94 do_expr(0, SIGCONTEXT_eax+4)
95 do_expr(1, SIGCONTEXT_ecx+4)
96 do_expr(2, SIGCONTEXT_edx+4)
97 do_expr(3, SIGCONTEXT_ebx+4)
98 do_expr(5, SIGCONTEXT_ebp+4)
99 do_expr(6, SIGCONTEXT_esi+4)
100 do_expr(7, SIGCONTEXT_edi+4)
101 do_expr(8, SIGCONTEXT_eip+4)
102
103 .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
104
105 do_cfa_expr(SIGCONTEXT_esp)
106 do_expr(0, SIGCONTEXT_eax)
107 do_expr(1, SIGCONTEXT_ecx)
108 do_expr(2, SIGCONTEXT_edx)
109 do_expr(3, SIGCONTEXT_ebx)
110 do_expr(5, SIGCONTEXT_ebp)
111 do_expr(6, SIGCONTEXT_esi)
112 do_expr(7, SIGCONTEXT_edi)
113 do_expr(8, SIGCONTEXT_eip)
114
115 .align 4
116.LENDFDEDLSI1:
117
118 .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
119.LSTARTFDEDLSI2:
120 .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
121 /* HACK: See above wrt unwind library assumptions. */
122 .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
123 .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
124 .uleb128 0 /* Augmentation */
125 /* What follows are the instructions for the table generation.
126 We record the locations of each register saved. This is
127 slightly less complicated than the above, since we don't
128 modify the stack pointer in the process. */
129
130 do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
131 do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
132 do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
133 do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
134 do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
135 do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
136 do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
137 do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
138 do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
139
140 .align 4
141.LENDFDEDLSI2:
142 .previous
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
new file mode 100644
index 000000000000..4daefb2ec1b2
--- /dev/null
+++ b/arch/i386/kernel/vsyscall-sysenter.S
@@ -0,0 +1,104 @@
1/*
2 * Code for the vsyscall page. This version uses the sysenter instruction.
3 *
4 * NOTE:
5 * 1) __kernel_vsyscall _must_ be first in this page.
6 * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
7 * for details.
8 */
9
10 .text
11 .globl __kernel_vsyscall
12 .type __kernel_vsyscall,@function
13__kernel_vsyscall:
14.LSTART_vsyscall:
15 push %ecx
16.Lpush_ecx:
17 push %edx
18.Lpush_edx:
19 push %ebp
20.Lenter_kernel:
21 movl %esp,%ebp
22 sysenter
23
24 /* 7: align return point with nop's to make disassembly easier */
25 .space 7,0x90
26
27 /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
28 jmp .Lenter_kernel
29 /* 16: System call normal return point is here! */
30 .globl SYSENTER_RETURN /* Symbol used by entry.S. */
31SYSENTER_RETURN:
32 pop %ebp
33.Lpop_ebp:
34 pop %edx
35.Lpop_edx:
36 pop %ecx
37.Lpop_ecx:
38 ret
39.LEND_vsyscall:
40 .size __kernel_vsyscall,.-.LSTART_vsyscall
41 .previous
42
43 .section .eh_frame,"a",@progbits
44.LSTARTFRAMEDLSI:
45 .long .LENDCIEDLSI-.LSTARTCIEDLSI
46.LSTARTCIEDLSI:
47 .long 0 /* CIE ID */
48 .byte 1 /* Version number */
49 .string "zR" /* NUL-terminated augmentation string */
50 .uleb128 1 /* Code alignment factor */
51 .sleb128 -4 /* Data alignment factor */
52 .byte 8 /* Return address register column */
53 .uleb128 1 /* Augmentation value length */
54 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
55 .byte 0x0c /* DW_CFA_def_cfa */
56 .uleb128 4
57 .uleb128 4
58 .byte 0x88 /* DW_CFA_offset, column 0x8 */
59 .uleb128 1
60 .align 4
61.LENDCIEDLSI:
62 .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
63.LSTARTFDEDLSI:
64 .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
65 .long .LSTART_vsyscall-. /* PC-relative start address */
66 .long .LEND_vsyscall-.LSTART_vsyscall
67 .uleb128 0
68 /* What follows are the instructions for the table generation.
69 We have to record all changes of the stack pointer. */
70 .byte 0x04 /* DW_CFA_advance_loc4 */
71 .long .Lpush_ecx-.LSTART_vsyscall
72 .byte 0x0e /* DW_CFA_def_cfa_offset */
73 .byte 0x08 /* RA at offset 8 now */
74 .byte 0x04 /* DW_CFA_advance_loc4 */
75 .long .Lpush_edx-.Lpush_ecx
76 .byte 0x0e /* DW_CFA_def_cfa_offset */
77 .byte 0x0c /* RA at offset 12 now */
78 .byte 0x04 /* DW_CFA_advance_loc4 */
79 .long .Lenter_kernel-.Lpush_edx
80 .byte 0x0e /* DW_CFA_def_cfa_offset */
81 .byte 0x10 /* RA at offset 16 now */
82 .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
83 /* Finally the epilogue. */
84 .byte 0x04 /* DW_CFA_advance_loc4 */
85 .long .Lpop_ebp-.Lenter_kernel
86 .byte 0x0e /* DW_CFA_def_cfa_offset */
87 .byte 0x0c /* RA at offset 12 now */
88 .byte 0xc5 /* DW_CFA_restore %ebp */
89 .byte 0x04 /* DW_CFA_advance_loc4 */
90 .long .Lpop_edx-.Lpop_ebp
91 .byte 0x0e /* DW_CFA_def_cfa_offset */
92 .byte 0x08 /* RA at offset 8 now */
93 .byte 0x04 /* DW_CFA_advance_loc4 */
94 .long .Lpop_ecx-.Lpop_edx
95 .byte 0x0e /* DW_CFA_def_cfa_offset */
96 .byte 0x04 /* RA at offset 4 now */
97 .align 4
98.LENDFDEDLSI:
99 .previous
100
101/*
102 * Get the common code for the sigreturn entry points.
103 */
104#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall.S b/arch/i386/kernel/vsyscall.S
new file mode 100644
index 000000000000..b403890fe39b
--- /dev/null
+++ b/arch/i386/kernel/vsyscall.S
@@ -0,0 +1,15 @@
1#include <linux/init.h>
2
3__INITDATA
4
5 .globl vsyscall_int80_start, vsyscall_int80_end
6vsyscall_int80_start:
7 .incbin "arch/i386/kernel/vsyscall-int80.so"
8vsyscall_int80_end:
9
10 .globl vsyscall_sysenter_start, vsyscall_sysenter_end
11vsyscall_sysenter_start:
12 .incbin "arch/i386/kernel/vsyscall-sysenter.so"
13vsyscall_sysenter_end:
14
15__FINIT
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
new file mode 100644
index 000000000000..3a8329d6536e
--- /dev/null
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -0,0 +1,65 @@
1/*
2 * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
3 * object prelinked to its virtual address, and with only one read-only
4 * segment (that fits in one page). This script controls its layout.
5 */
6#include <asm/asm_offsets.h>
7
8SECTIONS
9{
10 . = VSYSCALL_BASE + SIZEOF_HEADERS;
11
12 .hash : { *(.hash) } :text
13 .dynsym : { *(.dynsym) }
14 .dynstr : { *(.dynstr) }
15 .gnu.version : { *(.gnu.version) }
16 .gnu.version_d : { *(.gnu.version_d) }
17 .gnu.version_r : { *(.gnu.version_r) }
18
19 /* This linker script is used both with -r and with -shared.
20 For the layouts to match, we need to skip more than enough
21 space for the dynamic symbol table et al. If this amount
22 is insufficient, ld -shared will barf. Just increase it here. */
23 . = VSYSCALL_BASE + 0x400;
24
25 .text : { *(.text) } :text =0x90909090
26
27 .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
28 .eh_frame : { KEEP (*(.eh_frame)) } :text
29 .dynamic : { *(.dynamic) } :text :dynamic
30 .useless : {
31 *(.got.plt) *(.got)
32 *(.data .data.* .gnu.linkonce.d.*)
33 *(.dynbss)
34 *(.bss .bss.* .gnu.linkonce.b.*)
35 } :text
36}
37
38/*
39 * We must supply the ELF program headers explicitly to get just one
40 * PT_LOAD segment, and set the flags explicitly to make segments read-only.
41 */
42PHDRS
43{
44 text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
45 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
46 eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
47}
48
49/*
50 * This controls what symbols we export from the DSO.
51 */
52VERSION
53{
54 LINUX_2.5 {
55 global:
56 __kernel_vsyscall;
57 __kernel_sigreturn;
58 __kernel_rt_sigreturn;
59
60 local: *;
61 };
62}
63
64/* The ELF entry point can be used to set the AT_SYSINFO value. */
65ENTRY(__kernel_vsyscall);
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
new file mode 100644
index 000000000000..7b1932d20f96
--- /dev/null
+++ b/arch/i386/lib/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for i386-specific library files..
3#
4
5
6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
7 bitops.o
8
9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
10lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c
new file mode 100644
index 000000000000..97db3853dc82
--- /dev/null
+++ b/arch/i386/lib/bitops.c
@@ -0,0 +1,70 @@
1#include <linux/bitops.h>
2#include <linux/module.h>
3
4/**
5 * find_next_bit - find the first set bit in a memory region
6 * @addr: The address to base the search on
7 * @offset: The bitnumber to start searching at
8 * @size: The maximum size to search
9 */
10int find_next_bit(const unsigned long *addr, int size, int offset)
11{
12 const unsigned long *p = addr + (offset >> 5);
13 int set = 0, bit = offset & 31, res;
14
15 if (bit) {
16 /*
17 * Look for nonzero in the first 32 bits:
18 */
19 __asm__("bsfl %1,%0\n\t"
20 "jne 1f\n\t"
21 "movl $32, %0\n"
22 "1:"
23 : "=r" (set)
24 : "r" (*p >> bit));
25 if (set < (32 - bit))
26 return set + offset;
27 set = 32 - bit;
28 p++;
29 }
30 /*
31 * No set bit yet, search remaining full words for a bit
32 */
33 res = find_first_bit (p, size - 32 * (p - addr));
34 return (offset + set + res);
35}
36EXPORT_SYMBOL(find_next_bit);
37
38/**
39 * find_next_zero_bit - find the first zero bit in a memory region
40 * @addr: The address to base the search on
41 * @offset: The bitnumber to start searching at
42 * @size: The maximum size to search
43 */
44int find_next_zero_bit(const unsigned long *addr, int size, int offset)
45{
46 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
47 int set = 0, bit = offset & 31, res;
48
49 if (bit) {
50 /*
51 * Look for zero in the first 32 bits.
52 */
53 __asm__("bsfl %1,%0\n\t"
54 "jne 1f\n\t"
55 "movl $32, %0\n"
56 "1:"
57 : "=r" (set)
58 : "r" (~(*p >> bit)));
59 if (set < (32 - bit))
60 return set + offset;
61 set = 32 - bit;
62 p++;
63 }
64 /*
65 * No zero yet, search remaining full bytes for a zero
66 */
67 res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
68 return (offset + set + res);
69}
70EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
new file mode 100644
index 000000000000..94c7867ddc33
--- /dev/null
+++ b/arch/i386/lib/checksum.S
@@ -0,0 +1,496 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IP/TCP/UDP checksumming routines
7 *
8 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 * Tom May, <ftom@netcom.com>
11 * Pentium Pro/II routines:
12 * Alexander Kjeldaas <astor@guardian.no>
13 * Finn Arne Gangstad <finnag@guardian.no>
14 * Lots of code moved from tcp.c and ip.c; see those files
15 * for more names.
16 *
17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 * handling.
19 * Andi Kleen, add zeroing on error
20 * converted to pure assembler
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/config.h>
29#include <asm/errno.h>
30
31/*
32 * computes a partial checksum, e.g. for TCP/UDP fragments
33 */
34
35/*
36unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
37 */
38
39.text
40.align 4
41.globl csum_partial
42
43#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
44
45 /*
46 * Experiments with Ethernet and SLIP connections show that buff
47 * is aligned on either a 2-byte or 4-byte boundary. We get at
48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
50 * alignment for the unrolled loop.
51 */
52csum_partial:
53 pushl %esi
54 pushl %ebx
55 movl 20(%esp),%eax # Function arg: unsigned int sum
56 movl 16(%esp),%ecx # Function arg: int len
57 movl 12(%esp),%esi # Function arg: unsigned char *buff
58 testl $3, %esi # Check alignment.
59 jz 2f # Jump if alignment is ok.
60 testl $1, %esi # Check alignment.
61 jz 10f # Jump if alignment is boundary of 2bytes.
62
63 # buf is odd
64 dec %ecx
65 jl 8f
66 movzbl (%esi), %ebx
67 adcl %ebx, %eax
68 roll $8, %eax
69 inc %esi
70 testl $2, %esi
71 jz 2f
7210:
73 subl $2, %ecx # Alignment uses up two bytes.
74 jae 1f # Jump if we had at least two bytes.
75 addl $2, %ecx # ecx was < 2. Deal with it.
76 jmp 4f
771: movw (%esi), %bx
78 addl $2, %esi
79 addw %bx, %ax
80 adcl $0, %eax
812:
82 movl %ecx, %edx
83 shrl $5, %ecx
84 jz 2f
85 testl %esi, %esi
861: movl (%esi), %ebx
87 adcl %ebx, %eax
88 movl 4(%esi), %ebx
89 adcl %ebx, %eax
90 movl 8(%esi), %ebx
91 adcl %ebx, %eax
92 movl 12(%esi), %ebx
93 adcl %ebx, %eax
94 movl 16(%esi), %ebx
95 adcl %ebx, %eax
96 movl 20(%esi), %ebx
97 adcl %ebx, %eax
98 movl 24(%esi), %ebx
99 adcl %ebx, %eax
100 movl 28(%esi), %ebx
101 adcl %ebx, %eax
102 lea 32(%esi), %esi
103 dec %ecx
104 jne 1b
105 adcl $0, %eax
1062: movl %edx, %ecx
107 andl $0x1c, %edx
108 je 4f
109 shrl $2, %edx # This clears CF
1103: adcl (%esi), %eax
111 lea 4(%esi), %esi
112 dec %edx
113 jne 3b
114 adcl $0, %eax
1154: andl $3, %ecx
116 jz 7f
117 cmpl $2, %ecx
118 jb 5f
119 movw (%esi),%cx
120 leal 2(%esi),%esi
121 je 6f
122 shll $16,%ecx
1235: movb (%esi),%cl
1246: addl %ecx,%eax
125 adcl $0, %eax
1267:
127 testl $1, 12(%esp)
128 jz 8f
129 roll $8, %eax
1308:
131 popl %ebx
132 popl %esi
133 ret
134
135#else
136
137/* Version for PentiumII/PPro */
138
139csum_partial:
140 pushl %esi
141 pushl %ebx
142 movl 20(%esp),%eax # Function arg: unsigned int sum
143 movl 16(%esp),%ecx # Function arg: int len
144 movl 12(%esp),%esi # Function arg: const unsigned char *buf
145
146 testl $3, %esi
147 jnz 25f
14810:
149 movl %ecx, %edx
150 movl %ecx, %ebx
151 andl $0x7c, %ebx
152 shrl $7, %ecx
153 addl %ebx,%esi
154 shrl $2, %ebx
155 negl %ebx
156 lea 45f(%ebx,%ebx,2), %ebx
157 testl %esi, %esi
158 jmp *%ebx
159
160 # Handle 2-byte-aligned regions
16120: addw (%esi), %ax
162 lea 2(%esi), %esi
163 adcl $0, %eax
164 jmp 10b
16525:
166 testl $1, %esi
167 jz 30f
168 # buf is odd
169 dec %ecx
170 jl 90f
171 movzbl (%esi), %ebx
172 addl %ebx, %eax
173 adcl $0, %eax
174 roll $8, %eax
175 inc %esi
176 testl $2, %esi
177 jz 10b
178
17930: subl $2, %ecx
180 ja 20b
181 je 32f
182 addl $2, %ecx
183 jz 80f
184 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
185 addl %ebx, %eax
186 adcl $0, %eax
187 jmp 80f
18832:
189 addw (%esi), %ax # csumming 2 bytes, 2-aligned
190 adcl $0, %eax
191 jmp 80f
192
19340:
194 addl -128(%esi), %eax
195 adcl -124(%esi), %eax
196 adcl -120(%esi), %eax
197 adcl -116(%esi), %eax
198 adcl -112(%esi), %eax
199 adcl -108(%esi), %eax
200 adcl -104(%esi), %eax
201 adcl -100(%esi), %eax
202 adcl -96(%esi), %eax
203 adcl -92(%esi), %eax
204 adcl -88(%esi), %eax
205 adcl -84(%esi), %eax
206 adcl -80(%esi), %eax
207 adcl -76(%esi), %eax
208 adcl -72(%esi), %eax
209 adcl -68(%esi), %eax
210 adcl -64(%esi), %eax
211 adcl -60(%esi), %eax
212 adcl -56(%esi), %eax
213 adcl -52(%esi), %eax
214 adcl -48(%esi), %eax
215 adcl -44(%esi), %eax
216 adcl -40(%esi), %eax
217 adcl -36(%esi), %eax
218 adcl -32(%esi), %eax
219 adcl -28(%esi), %eax
220 adcl -24(%esi), %eax
221 adcl -20(%esi), %eax
222 adcl -16(%esi), %eax
223 adcl -12(%esi), %eax
224 adcl -8(%esi), %eax
225 adcl -4(%esi), %eax
22645:
227 lea 128(%esi), %esi
228 adcl $0, %eax
229 dec %ecx
230 jge 40b
231 movl %edx, %ecx
23250: andl $3, %ecx
233 jz 80f
234
235 # Handle the last 1-3 bytes without jumping
236 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
237 movl $0xffffff,%ebx # by the shll and shrl instructions
238 shll $3,%ecx
239 shrl %cl,%ebx
240 andl -128(%esi),%ebx # esi is 4-aligned so should be ok
241 addl %ebx,%eax
242 adcl $0,%eax
24380:
244 testl $1, 12(%esp)
245 jz 90f
246 roll $8, %eax
24790:
248 popl %ebx
249 popl %esi
250 ret
251
252#endif
253
254/*
255unsigned int csum_partial_copy_generic (const char *src, char *dst,
256 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
257 */
258
259/*
260 * Copy from ds while checksumming, otherwise like csum_partial
261 *
262 * The macros SRC and DST specify the type of access for the instruction.
263 * thus we can call a custom exception handler for all access types.
264 *
265 * FIXME: could someone double-check whether I haven't mixed up some SRC and
266 * DST definitions? It's damn hard to trigger all cases. I hope I got
267 * them all but there's no guarantee.
268 */
269
270#define SRC(y...) \
271 9999: y; \
272 .section __ex_table, "a"; \
273 .long 9999b, 6001f ; \
274 .previous
275
276#define DST(y...) \
277 9999: y; \
278 .section __ex_table, "a"; \
279 .long 9999b, 6002f ; \
280 .previous
281
282.align 4
283.globl csum_partial_copy_generic
284
285#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
286
287#define ARGBASE 16
288#define FP 12
289
290csum_partial_copy_generic:
291 subl $4,%esp
292 pushl %edi
293 pushl %esi
294 pushl %ebx
295 movl ARGBASE+16(%esp),%eax # sum
296 movl ARGBASE+12(%esp),%ecx # len
297 movl ARGBASE+4(%esp),%esi # src
298 movl ARGBASE+8(%esp),%edi # dst
299
300 testl $2, %edi # Check alignment.
301 jz 2f # Jump if alignment is ok.
302 subl $2, %ecx # Alignment uses up two bytes.
303 jae 1f # Jump if we had at least two bytes.
304 addl $2, %ecx # ecx was < 2. Deal with it.
305 jmp 4f
306SRC(1: movw (%esi), %bx )
307 addl $2, %esi
308DST( movw %bx, (%edi) )
309 addl $2, %edi
310 addw %bx, %ax
311 adcl $0, %eax
3122:
313 movl %ecx, FP(%esp)
314 shrl $5, %ecx
315 jz 2f
316 testl %esi, %esi
317SRC(1: movl (%esi), %ebx )
318SRC( movl 4(%esi), %edx )
319 adcl %ebx, %eax
320DST( movl %ebx, (%edi) )
321 adcl %edx, %eax
322DST( movl %edx, 4(%edi) )
323
324SRC( movl 8(%esi), %ebx )
325SRC( movl 12(%esi), %edx )
326 adcl %ebx, %eax
327DST( movl %ebx, 8(%edi) )
328 adcl %edx, %eax
329DST( movl %edx, 12(%edi) )
330
331SRC( movl 16(%esi), %ebx )
332SRC( movl 20(%esi), %edx )
333 adcl %ebx, %eax
334DST( movl %ebx, 16(%edi) )
335 adcl %edx, %eax
336DST( movl %edx, 20(%edi) )
337
338SRC( movl 24(%esi), %ebx )
339SRC( movl 28(%esi), %edx )
340 adcl %ebx, %eax
341DST( movl %ebx, 24(%edi) )
342 adcl %edx, %eax
343DST( movl %edx, 28(%edi) )
344
345 lea 32(%esi), %esi
346 lea 32(%edi), %edi
347 dec %ecx
348 jne 1b
349 adcl $0, %eax
3502: movl FP(%esp), %edx
351 movl %edx, %ecx
352 andl $0x1c, %edx
353 je 4f
354 shrl $2, %edx # This clears CF
355SRC(3: movl (%esi), %ebx )
356 adcl %ebx, %eax
357DST( movl %ebx, (%edi) )
358 lea 4(%esi), %esi
359 lea 4(%edi), %edi
360 dec %edx
361 jne 3b
362 adcl $0, %eax
3634: andl $3, %ecx
364 jz 7f
365 cmpl $2, %ecx
366 jb 5f
367SRC( movw (%esi), %cx )
368 leal 2(%esi), %esi
369DST( movw %cx, (%edi) )
370 leal 2(%edi), %edi
371 je 6f
372 shll $16,%ecx
373SRC(5: movb (%esi), %cl )
374DST( movb %cl, (%edi) )
3756: addl %ecx, %eax
376 adcl $0, %eax
3777:
3785000:
379
380# Exception handler:
381.section .fixup, "ax"
382
3836001:
384 movl ARGBASE+20(%esp), %ebx # src_err_ptr
385 movl $-EFAULT, (%ebx)
386
387 # zero the complete destination - computing the rest
388 # is too much work
389 movl ARGBASE+8(%esp), %edi # dst
390 movl ARGBASE+12(%esp), %ecx # len
391 xorl %eax,%eax
392 rep ; stosb
393
394 jmp 5000b
395
3966002:
397 movl ARGBASE+24(%esp), %ebx # dst_err_ptr
398 movl $-EFAULT,(%ebx)
399 jmp 5000b
400
401.previous
402
403 popl %ebx
404 popl %esi
405 popl %edi
406 popl %ecx # equivalent to addl $4,%esp
407 ret
408
409#else
410
411/* Version for PentiumII/PPro */
412
413#define ROUND1(x) \
414 SRC(movl x(%esi), %ebx ) ; \
415 addl %ebx, %eax ; \
416 DST(movl %ebx, x(%edi) ) ;
417
418#define ROUND(x) \
419 SRC(movl x(%esi), %ebx ) ; \
420 adcl %ebx, %eax ; \
421 DST(movl %ebx, x(%edi) ) ;
422
423#define ARGBASE 12
424
425csum_partial_copy_generic:
426 pushl %ebx
427 pushl %edi
428 pushl %esi
429 movl ARGBASE+4(%esp),%esi #src
430 movl ARGBASE+8(%esp),%edi #dst
431 movl ARGBASE+12(%esp),%ecx #len
432 movl ARGBASE+16(%esp),%eax #sum
433# movl %ecx, %edx
434 movl %ecx, %ebx
435 movl %esi, %edx
436 shrl $6, %ecx
437 andl $0x3c, %ebx
438 negl %ebx
439 subl %ebx, %esi
440 subl %ebx, %edi
441 lea -1(%esi),%edx
442 andl $-32,%edx
443 lea 3f(%ebx,%ebx), %ebx
444 testl %esi, %esi
445 jmp *%ebx
4461: addl $64,%esi
447 addl $64,%edi
448 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
449 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
450 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
451 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
452 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
4533: adcl $0,%eax
454 addl $64, %edx
455 dec %ecx
456 jge 1b
4574: movl ARGBASE+12(%esp),%edx #len
458 andl $3, %edx
459 jz 7f
460 cmpl $2, %edx
461 jb 5f
462SRC( movw (%esi), %dx )
463 leal 2(%esi), %esi
464DST( movw %dx, (%edi) )
465 leal 2(%edi), %edi
466 je 6f
467 shll $16,%edx
4685:
469SRC( movb (%esi), %dl )
470DST( movb %dl, (%edi) )
4716: addl %edx, %eax
472 adcl $0, %eax
4737:
474.section .fixup, "ax"
4756001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
476 movl $-EFAULT, (%ebx)
477 # zero the complete destination (computing the rest is too much work)
478 movl ARGBASE+8(%esp),%edi # dst
479 movl ARGBASE+12(%esp),%ecx # len
480 xorl %eax,%eax
481 rep; stosb
482 jmp 7b
4836002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
484 movl $-EFAULT, (%ebx)
485 jmp 7b
486.previous
487
488 popl %esi
489 popl %edi
490 popl %ebx
491 ret
492
493#undef ROUND
494#undef ROUND1
495
496#endif
diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c
new file mode 100644
index 000000000000..ab43394dc775
--- /dev/null
+++ b/arch/i386/lib/dec_and_lock.c
@@ -0,0 +1,40 @@
1/*
2 * x86 version of "atomic_dec_and_lock()" using
3 * the atomic "cmpxchg" instruction.
4 *
5 * (For CPU's lacking cmpxchg, we use the slow
6 * generic version, and this one never even gets
7 * compiled).
8 */
9
10#include <linux/spinlock.h>
11#include <asm/atomic.h>
12
13int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
14{
15 int counter;
16 int newcount;
17
18repeat:
19 counter = atomic_read(atomic);
20 newcount = counter-1;
21
22 if (!newcount)
23 goto slow_path;
24
25 asm volatile("lock; cmpxchgl %1,%2"
26 :"=a" (newcount)
27 :"r" (newcount), "m" (atomic->counter), "0" (counter));
28
29 /* If the above failed, "eax" will have changed */
30 if (newcount != counter)
31 goto repeat;
32 return 0;
33
34slow_path:
35 spin_lock(lock);
36 if (atomic_dec_and_test(atomic))
37 return 1;
38 spin_unlock(lock);
39 return 0;
40}
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
new file mode 100644
index 000000000000..080639f262b1
--- /dev/null
+++ b/arch/i386/lib/delay.c
@@ -0,0 +1,49 @@
1/*
2 * Precise Delay Loops for i386
3 *
4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 *
7 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors. The additional
9 * jump magic is needed to get the timing stable on all the CPU's
10 * we have to worry about.
11 */
12
13#include <linux/config.h>
14#include <linux/sched.h>
15#include <linux/delay.h>
16#include <asm/processor.h>
17#include <asm/delay.h>
18#include <asm/timer.h>
19
20#ifdef CONFIG_SMP
21#include <asm/smp.h>
22#endif
23
24extern struct timer_opts* timer;
25
26void __delay(unsigned long loops)
27{
28 cur_timer->delay(loops);
29}
30
31inline void __const_udelay(unsigned long xloops)
32{
33 int d0;
34 xloops *= 4;
35 __asm__("mull %0"
36 :"=d" (xloops), "=&a" (d0)
37 :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
38 __delay(++xloops);
39}
40
41void __udelay(unsigned long usecs)
42{
43 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
44}
45
46void __ndelay(unsigned long nsecs)
47{
48 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
49}
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S
new file mode 100644
index 000000000000..62d7f178a326
--- /dev/null
+++ b/arch/i386/lib/getuser.S
@@ -0,0 +1,70 @@
1/*
2 * __get_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 *
6 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they
8 * return an error value in addition to the "real"
9 * return value.
10 */
11#include <asm/thread_info.h>
12
13
14/*
15 * __get_user_X
16 *
17 * Inputs: %eax contains the address
18 *
19 * Outputs: %eax is error code (0 or -EFAULT)
20 * %edx contains zero-extended value
21 *
22 * These functions should not modify any other registers,
23 * as they get called from within inline assembly.
24 */
25
26.text
27.align 4
28.globl __get_user_1
29__get_user_1:
30 GET_THREAD_INFO(%edx)
31 cmpl TI_addr_limit(%edx),%eax
32 jae bad_get_user
331: movzbl (%eax),%edx
34 xorl %eax,%eax
35 ret
36
37.align 4
38.globl __get_user_2
39__get_user_2:
40 addl $1,%eax
41 jc bad_get_user
42 GET_THREAD_INFO(%edx)
43 cmpl TI_addr_limit(%edx),%eax
44 jae bad_get_user
452: movzwl -1(%eax),%edx
46 xorl %eax,%eax
47 ret
48
49.align 4
50.globl __get_user_4
51__get_user_4:
52 addl $3,%eax
53 jc bad_get_user
54 GET_THREAD_INFO(%edx)
55 cmpl TI_addr_limit(%edx),%eax
56 jae bad_get_user
573: movl -3(%eax),%edx
58 xorl %eax,%eax
59 ret
60
61bad_get_user:
62 xorl %edx,%edx
63 movl $-14,%eax
64 ret
65
66.section __ex_table,"a"
67 .long 1b,bad_get_user
68 .long 2b,bad_get_user
69 .long 3b,bad_get_user
70.previous
diff --git a/arch/i386/lib/memcpy.c b/arch/i386/lib/memcpy.c
new file mode 100644
index 000000000000..891b2359d18a
--- /dev/null
+++ b/arch/i386/lib/memcpy.c
@@ -0,0 +1,44 @@
1#include <linux/config.h>
2#include <linux/string.h>
3#include <linux/module.h>
4
5#undef memcpy
6#undef memset
7
8void *memcpy(void *to, const void *from, size_t n)
9{
10#ifdef CONFIG_X86_USE_3DNOW
11 return __memcpy3d(to, from, n);
12#else
13 return __memcpy(to, from, n);
14#endif
15}
16EXPORT_SYMBOL(memcpy);
17
18void *memset(void *s, int c, size_t count)
19{
20 return __memset(s, c, count);
21}
22EXPORT_SYMBOL(memset);
23
24void *memmove(void *dest, const void *src, size_t n)
25{
26 int d0, d1, d2;
27
28 if (dest < src) {
29 memcpy(dest,src,n);
30 } else {
31 __asm__ __volatile__(
32 "std\n\t"
33 "rep\n\t"
34 "movsb\n\t"
35 "cld"
36 : "=&c" (d0), "=&S" (d1), "=&D" (d2)
37 :"0" (n),
38 "1" (n-1+(const char *)src),
39 "2" (n-1+(char *)dest)
40 :"memory");
41 }
42 return dest;
43}
44EXPORT_SYMBOL(memmove);
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c
new file mode 100644
index 000000000000..01f8b1a2cc84
--- /dev/null
+++ b/arch/i386/lib/mmx.c
@@ -0,0 +1,399 @@
1#include <linux/config.h>
2#include <linux/types.h>
3#include <linux/string.h>
4#include <linux/sched.h>
5#include <linux/hardirq.h>
6
7#include <asm/i387.h>
8
9
10/*
11 * MMX 3DNow! library helper functions
12 *
13 * To do:
14 * We can use MMX just for prefetch in IRQ's. This may be a win.
15 * (reported so on K6-III)
16 * We should use a better code neutral filler for the short jump
17 * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
18 * We also want to clobber the filler register so we don't get any
19 * register forwarding stalls on the filler.
20 *
21 * Add *user handling. Checksums are not a win with MMX on any CPU
22 * tested so far for any MMX solution figured.
23 *
24 * 22/09/2000 - Arjan van de Ven
25 * Improved for non-egineering-sample Athlons
26 *
27 */
28
29void *_mmx_memcpy(void *to, const void *from, size_t len)
30{
31 void *p;
32 int i;
33
34 if (unlikely(in_interrupt()))
35 return __memcpy(to, from, len);
36
37 p = to;
38 i = len >> 6; /* len/64 */
39
40 kernel_fpu_begin();
41
42 __asm__ __volatile__ (
43 "1: prefetch (%0)\n" /* This set is 28 bytes */
44 " prefetch 64(%0)\n"
45 " prefetch 128(%0)\n"
46 " prefetch 192(%0)\n"
47 " prefetch 256(%0)\n"
48 "2: \n"
49 ".section .fixup, \"ax\"\n"
50 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
51 " jmp 2b\n"
52 ".previous\n"
53 ".section __ex_table,\"a\"\n"
54 " .align 4\n"
55 " .long 1b, 3b\n"
56 ".previous"
57 : : "r" (from) );
58
59
60 for(; i>5; i--)
61 {
62 __asm__ __volatile__ (
63 "1: prefetch 320(%0)\n"
64 "2: movq (%0), %%mm0\n"
65 " movq 8(%0), %%mm1\n"
66 " movq 16(%0), %%mm2\n"
67 " movq 24(%0), %%mm3\n"
68 " movq %%mm0, (%1)\n"
69 " movq %%mm1, 8(%1)\n"
70 " movq %%mm2, 16(%1)\n"
71 " movq %%mm3, 24(%1)\n"
72 " movq 32(%0), %%mm0\n"
73 " movq 40(%0), %%mm1\n"
74 " movq 48(%0), %%mm2\n"
75 " movq 56(%0), %%mm3\n"
76 " movq %%mm0, 32(%1)\n"
77 " movq %%mm1, 40(%1)\n"
78 " movq %%mm2, 48(%1)\n"
79 " movq %%mm3, 56(%1)\n"
80 ".section .fixup, \"ax\"\n"
81 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
82 " jmp 2b\n"
83 ".previous\n"
84 ".section __ex_table,\"a\"\n"
85 " .align 4\n"
86 " .long 1b, 3b\n"
87 ".previous"
88 : : "r" (from), "r" (to) : "memory");
89 from+=64;
90 to+=64;
91 }
92
93 for(; i>0; i--)
94 {
95 __asm__ __volatile__ (
96 " movq (%0), %%mm0\n"
97 " movq 8(%0), %%mm1\n"
98 " movq 16(%0), %%mm2\n"
99 " movq 24(%0), %%mm3\n"
100 " movq %%mm0, (%1)\n"
101 " movq %%mm1, 8(%1)\n"
102 " movq %%mm2, 16(%1)\n"
103 " movq %%mm3, 24(%1)\n"
104 " movq 32(%0), %%mm0\n"
105 " movq 40(%0), %%mm1\n"
106 " movq 48(%0), %%mm2\n"
107 " movq 56(%0), %%mm3\n"
108 " movq %%mm0, 32(%1)\n"
109 " movq %%mm1, 40(%1)\n"
110 " movq %%mm2, 48(%1)\n"
111 " movq %%mm3, 56(%1)\n"
112 : : "r" (from), "r" (to) : "memory");
113 from+=64;
114 to+=64;
115 }
116 /*
117 * Now do the tail of the block
118 */
119 __memcpy(to, from, len&63);
120 kernel_fpu_end();
121 return p;
122}
123
124#ifdef CONFIG_MK7
125
126/*
127 * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
128 * other MMX using processors do not.
129 */
130
131static void fast_clear_page(void *page)
132{
133 int i;
134
135 kernel_fpu_begin();
136
137 __asm__ __volatile__ (
138 " pxor %%mm0, %%mm0\n" : :
139 );
140
141 for(i=0;i<4096/64;i++)
142 {
143 __asm__ __volatile__ (
144 " movntq %%mm0, (%0)\n"
145 " movntq %%mm0, 8(%0)\n"
146 " movntq %%mm0, 16(%0)\n"
147 " movntq %%mm0, 24(%0)\n"
148 " movntq %%mm0, 32(%0)\n"
149 " movntq %%mm0, 40(%0)\n"
150 " movntq %%mm0, 48(%0)\n"
151 " movntq %%mm0, 56(%0)\n"
152 : : "r" (page) : "memory");
153 page+=64;
154 }
155 /* since movntq is weakly-ordered, a "sfence" is needed to become
156 * ordered again.
157 */
158 __asm__ __volatile__ (
159 " sfence \n" : :
160 );
161 kernel_fpu_end();
162}
163
164static void fast_copy_page(void *to, void *from)
165{
166 int i;
167
168 kernel_fpu_begin();
169
170 /* maybe the prefetch stuff can go before the expensive fnsave...
171 * but that is for later. -AV
172 */
173 __asm__ __volatile__ (
174 "1: prefetch (%0)\n"
175 " prefetch 64(%0)\n"
176 " prefetch 128(%0)\n"
177 " prefetch 192(%0)\n"
178 " prefetch 256(%0)\n"
179 "2: \n"
180 ".section .fixup, \"ax\"\n"
181 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
182 " jmp 2b\n"
183 ".previous\n"
184 ".section __ex_table,\"a\"\n"
185 " .align 4\n"
186 " .long 1b, 3b\n"
187 ".previous"
188 : : "r" (from) );
189
190 for(i=0; i<(4096-320)/64; i++)
191 {
192 __asm__ __volatile__ (
193 "1: prefetch 320(%0)\n"
194 "2: movq (%0), %%mm0\n"
195 " movntq %%mm0, (%1)\n"
196 " movq 8(%0), %%mm1\n"
197 " movntq %%mm1, 8(%1)\n"
198 " movq 16(%0), %%mm2\n"
199 " movntq %%mm2, 16(%1)\n"
200 " movq 24(%0), %%mm3\n"
201 " movntq %%mm3, 24(%1)\n"
202 " movq 32(%0), %%mm4\n"
203 " movntq %%mm4, 32(%1)\n"
204 " movq 40(%0), %%mm5\n"
205 " movntq %%mm5, 40(%1)\n"
206 " movq 48(%0), %%mm6\n"
207 " movntq %%mm6, 48(%1)\n"
208 " movq 56(%0), %%mm7\n"
209 " movntq %%mm7, 56(%1)\n"
210 ".section .fixup, \"ax\"\n"
211 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
212 " jmp 2b\n"
213 ".previous\n"
214 ".section __ex_table,\"a\"\n"
215 " .align 4\n"
216 " .long 1b, 3b\n"
217 ".previous"
218 : : "r" (from), "r" (to) : "memory");
219 from+=64;
220 to+=64;
221 }
222 for(i=(4096-320)/64; i<4096/64; i++)
223 {
224 __asm__ __volatile__ (
225 "2: movq (%0), %%mm0\n"
226 " movntq %%mm0, (%1)\n"
227 " movq 8(%0), %%mm1\n"
228 " movntq %%mm1, 8(%1)\n"
229 " movq 16(%0), %%mm2\n"
230 " movntq %%mm2, 16(%1)\n"
231 " movq 24(%0), %%mm3\n"
232 " movntq %%mm3, 24(%1)\n"
233 " movq 32(%0), %%mm4\n"
234 " movntq %%mm4, 32(%1)\n"
235 " movq 40(%0), %%mm5\n"
236 " movntq %%mm5, 40(%1)\n"
237 " movq 48(%0), %%mm6\n"
238 " movntq %%mm6, 48(%1)\n"
239 " movq 56(%0), %%mm7\n"
240 " movntq %%mm7, 56(%1)\n"
241 : : "r" (from), "r" (to) : "memory");
242 from+=64;
243 to+=64;
244 }
245 /* since movntq is weakly-ordered, a "sfence" is needed to become
246 * ordered again.
247 */
248 __asm__ __volatile__ (
249 " sfence \n" : :
250 );
251 kernel_fpu_end();
252}
253
254#else
255
256/*
257 * Generic MMX implementation without K7 specific streaming
258 */
259
260static void fast_clear_page(void *page)
261{
262 int i;
263
264 kernel_fpu_begin();
265
266 __asm__ __volatile__ (
267 " pxor %%mm0, %%mm0\n" : :
268 );
269
270 for(i=0;i<4096/128;i++)
271 {
272 __asm__ __volatile__ (
273 " movq %%mm0, (%0)\n"
274 " movq %%mm0, 8(%0)\n"
275 " movq %%mm0, 16(%0)\n"
276 " movq %%mm0, 24(%0)\n"
277 " movq %%mm0, 32(%0)\n"
278 " movq %%mm0, 40(%0)\n"
279 " movq %%mm0, 48(%0)\n"
280 " movq %%mm0, 56(%0)\n"
281 " movq %%mm0, 64(%0)\n"
282 " movq %%mm0, 72(%0)\n"
283 " movq %%mm0, 80(%0)\n"
284 " movq %%mm0, 88(%0)\n"
285 " movq %%mm0, 96(%0)\n"
286 " movq %%mm0, 104(%0)\n"
287 " movq %%mm0, 112(%0)\n"
288 " movq %%mm0, 120(%0)\n"
289 : : "r" (page) : "memory");
290 page+=128;
291 }
292
293 kernel_fpu_end();
294}
295
296static void fast_copy_page(void *to, void *from)
297{
298 int i;
299
300
301 kernel_fpu_begin();
302
303 __asm__ __volatile__ (
304 "1: prefetch (%0)\n"
305 " prefetch 64(%0)\n"
306 " prefetch 128(%0)\n"
307 " prefetch 192(%0)\n"
308 " prefetch 256(%0)\n"
309 "2: \n"
310 ".section .fixup, \"ax\"\n"
311 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
312 " jmp 2b\n"
313 ".previous\n"
314 ".section __ex_table,\"a\"\n"
315 " .align 4\n"
316 " .long 1b, 3b\n"
317 ".previous"
318 : : "r" (from) );
319
320 for(i=0; i<4096/64; i++)
321 {
322 __asm__ __volatile__ (
323 "1: prefetch 320(%0)\n"
324 "2: movq (%0), %%mm0\n"
325 " movq 8(%0), %%mm1\n"
326 " movq 16(%0), %%mm2\n"
327 " movq 24(%0), %%mm3\n"
328 " movq %%mm0, (%1)\n"
329 " movq %%mm1, 8(%1)\n"
330 " movq %%mm2, 16(%1)\n"
331 " movq %%mm3, 24(%1)\n"
332 " movq 32(%0), %%mm0\n"
333 " movq 40(%0), %%mm1\n"
334 " movq 48(%0), %%mm2\n"
335 " movq 56(%0), %%mm3\n"
336 " movq %%mm0, 32(%1)\n"
337 " movq %%mm1, 40(%1)\n"
338 " movq %%mm2, 48(%1)\n"
339 " movq %%mm3, 56(%1)\n"
340 ".section .fixup, \"ax\"\n"
341 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
342 " jmp 2b\n"
343 ".previous\n"
344 ".section __ex_table,\"a\"\n"
345 " .align 4\n"
346 " .long 1b, 3b\n"
347 ".previous"
348 : : "r" (from), "r" (to) : "memory");
349 from+=64;
350 to+=64;
351 }
352 kernel_fpu_end();
353}
354
355
356#endif
357
358/*
359 * Favour MMX for page clear and copy.
360 */
361
362static void slow_zero_page(void * page)
363{
364 int d0, d1;
365 __asm__ __volatile__( \
366 "cld\n\t" \
367 "rep ; stosl" \
368 : "=&c" (d0), "=&D" (d1)
369 :"a" (0),"1" (page),"0" (1024)
370 :"memory");
371}
372
373void mmx_clear_page(void * page)
374{
375 if(unlikely(in_interrupt()))
376 slow_zero_page(page);
377 else
378 fast_clear_page(page);
379}
380
381static void slow_copy_page(void *to, void *from)
382{
383 int d0, d1, d2;
384 __asm__ __volatile__( \
385 "cld\n\t" \
386 "rep ; movsl" \
387 : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
388 : "0" (1024),"1" ((long) to),"2" ((long) from) \
389 : "memory");
390}
391
392
393void mmx_copy_page(void *to, void *from)
394{
395 if(unlikely(in_interrupt()))
396 slow_copy_page(to, from);
397 else
398 fast_copy_page(to, from);
399}
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
new file mode 100644
index 000000000000..a32d9f570f48
--- /dev/null
+++ b/arch/i386/lib/putuser.S
@@ -0,0 +1,87 @@
1/*
2 * __put_user functions.
3 *
4 * (C) Copyright 2005 Linus Torvalds
5 *
6 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they
8 * return an error value in addition to the "real"
9 * return value.
10 */
11#include <asm/thread_info.h>
12
13
14/*
15 * __put_user_X
16 *
17 * Inputs: %eax[:%edx] contains the data
18 * %ecx contains the address
19 *
20 * Outputs: %eax is error code (0 or -EFAULT)
21 *
22 * These functions should not modify any other registers,
23 * as they get called from within inline assembly.
24 */
25
26#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
27#define EXIT popl %ebx ; ret
28
29.text
30.align 4
31.globl __put_user_1
32__put_user_1:
33 ENTER
34 cmpl TI_addr_limit(%ebx),%ecx
35 jae bad_put_user
361: movb %al,(%ecx)
37 xorl %eax,%eax
38 EXIT
39
40.align 4
41.globl __put_user_2
42__put_user_2:
43 ENTER
44 movl TI_addr_limit(%ebx),%ebx
45 subl $1,%ebx
46 cmpl %ebx,%ecx
47 jae bad_put_user
482: movw %ax,(%ecx)
49 xorl %eax,%eax
50 EXIT
51
52.align 4
53.globl __put_user_4
54__put_user_4:
55 ENTER
56 movl TI_addr_limit(%ebx),%ebx
57 subl $3,%ebx
58 cmpl %ebx,%ecx
59 jae bad_put_user
603: movl %eax,(%ecx)
61 xorl %eax,%eax
62 EXIT
63
64.align 4
65.globl __put_user_8
66__put_user_8:
67 ENTER
68 movl TI_addr_limit(%ebx),%ebx
69 subl $7,%ebx
70 cmpl %ebx,%ecx
71 jae bad_put_user
724: movl %eax,(%ecx)
735: movl %edx,4(%ecx)
74 xorl %eax,%eax
75 EXIT
76
77bad_put_user:
78 movl $-14,%eax
79 EXIT
80
81.section __ex_table,"a"
82 .long 1b,bad_put_user
83 .long 2b,bad_put_user
84 .long 3b,bad_put_user
85 .long 4b,bad_put_user
86 .long 5b,bad_put_user
87.previous
diff --git a/arch/i386/lib/strstr.c b/arch/i386/lib/strstr.c
new file mode 100644
index 000000000000..a3dafbf59dae
--- /dev/null
+++ b/arch/i386/lib/strstr.c
@@ -0,0 +1,31 @@
1#include <linux/string.h>
2
3char * strstr(const char * cs,const char * ct)
4{
5int d0, d1;
6register char * __res;
7__asm__ __volatile__(
8 "movl %6,%%edi\n\t"
9 "repne\n\t"
10 "scasb\n\t"
11 "notl %%ecx\n\t"
12 "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */
13 "movl %%ecx,%%edx\n"
14 "1:\tmovl %6,%%edi\n\t"
15 "movl %%esi,%%eax\n\t"
16 "movl %%edx,%%ecx\n\t"
17 "repe\n\t"
18 "cmpsb\n\t"
19 "je 2f\n\t" /* also works for empty string, see above */
20 "xchgl %%eax,%%esi\n\t"
21 "incl %%esi\n\t"
22 "cmpb $0,-1(%%eax)\n\t"
23 "jne 1b\n\t"
24 "xorl %%eax,%%eax\n\t"
25 "2:"
26 :"=a" (__res), "=&c" (d0), "=&S" (d1)
27 :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
28 :"dx", "di");
29return __res;
30}
31
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
new file mode 100644
index 000000000000..51aa2bbb0269
--- /dev/null
+++ b/arch/i386/lib/usercopy.c
@@ -0,0 +1,636 @@
1/*
2 * User address space access functions.
3 * The non inlined parts of asm-i386/uaccess.h are here.
4 *
5 * Copyright 1997 Andi Kleen <ak@muc.de>
6 * Copyright 1997 Linus Torvalds
7 */
8#include <linux/config.h>
9#include <linux/mm.h>
10#include <linux/highmem.h>
11#include <linux/blkdev.h>
12#include <linux/module.h>
13#include <asm/uaccess.h>
14#include <asm/mmx.h>
15
16static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
17{
18#ifdef CONFIG_X86_INTEL_USERCOPY
19 if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
20 return 0;
21#endif
22 return 1;
23}
24#define movsl_is_ok(a1,a2,n) \
25 __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n))
26
27/*
28 * Copy a null terminated string from userspace.
29 */
30
31#define __do_strncpy_from_user(dst,src,count,res) \
32do { \
33 int __d0, __d1, __d2; \
34 might_sleep(); \
35 __asm__ __volatile__( \
36 " testl %1,%1\n" \
37 " jz 2f\n" \
38 "0: lodsb\n" \
39 " stosb\n" \
40 " testb %%al,%%al\n" \
41 " jz 1f\n" \
42 " decl %1\n" \
43 " jnz 0b\n" \
44 "1: subl %1,%0\n" \
45 "2:\n" \
46 ".section .fixup,\"ax\"\n" \
47 "3: movl %5,%0\n" \
48 " jmp 2b\n" \
49 ".previous\n" \
50 ".section __ex_table,\"a\"\n" \
51 " .align 4\n" \
52 " .long 0b,3b\n" \
53 ".previous" \
54 : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
55 "=&D" (__d2) \
56 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
57 : "memory"); \
58} while (0)
59
60/**
61 * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
62 * @dst: Destination address, in kernel space. This buffer must be at
63 * least @count bytes long.
64 * @src: Source address, in user space.
65 * @count: Maximum number of bytes to copy, including the trailing NUL.
66 *
67 * Copies a NUL-terminated string from userspace to kernel space.
68 * Caller must check the specified block with access_ok() before calling
69 * this function.
70 *
71 * On success, returns the length of the string (not including the trailing
72 * NUL).
73 *
74 * If access to userspace fails, returns -EFAULT (some data may have been
75 * copied).
76 *
77 * If @count is smaller than the length of the string, copies @count bytes
78 * and returns @count.
79 */
80long
81__strncpy_from_user(char *dst, const char __user *src, long count)
82{
83 long res;
84 __do_strncpy_from_user(dst, src, count, res);
85 return res;
86}
87
88/**
89 * strncpy_from_user: - Copy a NUL terminated string from userspace.
90 * @dst: Destination address, in kernel space. This buffer must be at
91 * least @count bytes long.
92 * @src: Source address, in user space.
93 * @count: Maximum number of bytes to copy, including the trailing NUL.
94 *
95 * Copies a NUL-terminated string from userspace to kernel space.
96 *
97 * On success, returns the length of the string (not including the trailing
98 * NUL).
99 *
100 * If access to userspace fails, returns -EFAULT (some data may have been
101 * copied).
102 *
103 * If @count is smaller than the length of the string, copies @count bytes
104 * and returns @count.
105 */
106long
107strncpy_from_user(char *dst, const char __user *src, long count)
108{
109 long res = -EFAULT;
110 if (access_ok(VERIFY_READ, src, 1))
111 __do_strncpy_from_user(dst, src, count, res);
112 return res;
113}
114
115
116/*
117 * Zero Userspace
118 */
119
120#define __do_clear_user(addr,size) \
121do { \
122 int __d0; \
123 might_sleep(); \
124 __asm__ __volatile__( \
125 "0: rep; stosl\n" \
126 " movl %2,%0\n" \
127 "1: rep; stosb\n" \
128 "2:\n" \
129 ".section .fixup,\"ax\"\n" \
130 "3: lea 0(%2,%0,4),%0\n" \
131 " jmp 2b\n" \
132 ".previous\n" \
133 ".section __ex_table,\"a\"\n" \
134 " .align 4\n" \
135 " .long 0b,3b\n" \
136 " .long 1b,2b\n" \
137 ".previous" \
138 : "=&c"(size), "=&D" (__d0) \
139 : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
140} while (0)
141
142/**
143 * clear_user: - Zero a block of memory in user space.
144 * @to: Destination address, in user space.
145 * @n: Number of bytes to zero.
146 *
147 * Zero a block of memory in user space.
148 *
149 * Returns number of bytes that could not be cleared.
150 * On success, this will be zero.
151 */
152unsigned long
153clear_user(void __user *to, unsigned long n)
154{
155 might_sleep();
156 if (access_ok(VERIFY_WRITE, to, n))
157 __do_clear_user(to, n);
158 return n;
159}
160
161/**
162 * __clear_user: - Zero a block of memory in user space, with less checking.
163 * @to: Destination address, in user space.
164 * @n: Number of bytes to zero.
165 *
166 * Zero a block of memory in user space. Caller must check
167 * the specified block with access_ok() before calling this function.
168 *
169 * Returns number of bytes that could not be cleared.
170 * On success, this will be zero.
171 */
172unsigned long
173__clear_user(void __user *to, unsigned long n)
174{
175 __do_clear_user(to, n);
176 return n;
177}
178
179/**
180 * strlen_user: - Get the size of a string in user space.
181 * @s: The string to measure.
182 * @n: The maximum valid length
183 *
184 * Get the size of a NUL-terminated string in user space.
185 *
186 * Returns the size of the string INCLUDING the terminating NUL.
187 * On exception, returns 0.
188 * If the string is too long, returns a value greater than @n.
189 */
190long strnlen_user(const char __user *s, long n)
191{
192 unsigned long mask = -__addr_ok(s);
193 unsigned long res, tmp;
194
195 might_sleep();
196
197 __asm__ __volatile__(
198 " testl %0, %0\n"
199 " jz 3f\n"
200 " andl %0,%%ecx\n"
201 "0: repne; scasb\n"
202 " setne %%al\n"
203 " subl %%ecx,%0\n"
204 " addl %0,%%eax\n"
205 "1:\n"
206 ".section .fixup,\"ax\"\n"
207 "2: xorl %%eax,%%eax\n"
208 " jmp 1b\n"
209 "3: movb $1,%%al\n"
210 " jmp 1b\n"
211 ".previous\n"
212 ".section __ex_table,\"a\"\n"
213 " .align 4\n"
214 " .long 0b,2b\n"
215 ".previous"
216 :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
217 :"0" (n), "1" (s), "2" (0), "3" (mask)
218 :"cc");
219 return res & mask;
220}
221
222#ifdef CONFIG_X86_INTEL_USERCOPY
223static unsigned long
224__copy_user_intel(void __user *to, const void *from, unsigned long size)
225{
226 int d0, d1;
227 __asm__ __volatile__(
228 " .align 2,0x90\n"
229 "1: movl 32(%4), %%eax\n"
230 " cmpl $67, %0\n"
231 " jbe 3f\n"
232 "2: movl 64(%4), %%eax\n"
233 " .align 2,0x90\n"
234 "3: movl 0(%4), %%eax\n"
235 "4: movl 4(%4), %%edx\n"
236 "5: movl %%eax, 0(%3)\n"
237 "6: movl %%edx, 4(%3)\n"
238 "7: movl 8(%4), %%eax\n"
239 "8: movl 12(%4),%%edx\n"
240 "9: movl %%eax, 8(%3)\n"
241 "10: movl %%edx, 12(%3)\n"
242 "11: movl 16(%4), %%eax\n"
243 "12: movl 20(%4), %%edx\n"
244 "13: movl %%eax, 16(%3)\n"
245 "14: movl %%edx, 20(%3)\n"
246 "15: movl 24(%4), %%eax\n"
247 "16: movl 28(%4), %%edx\n"
248 "17: movl %%eax, 24(%3)\n"
249 "18: movl %%edx, 28(%3)\n"
250 "19: movl 32(%4), %%eax\n"
251 "20: movl 36(%4), %%edx\n"
252 "21: movl %%eax, 32(%3)\n"
253 "22: movl %%edx, 36(%3)\n"
254 "23: movl 40(%4), %%eax\n"
255 "24: movl 44(%4), %%edx\n"
256 "25: movl %%eax, 40(%3)\n"
257 "26: movl %%edx, 44(%3)\n"
258 "27: movl 48(%4), %%eax\n"
259 "28: movl 52(%4), %%edx\n"
260 "29: movl %%eax, 48(%3)\n"
261 "30: movl %%edx, 52(%3)\n"
262 "31: movl 56(%4), %%eax\n"
263 "32: movl 60(%4), %%edx\n"
264 "33: movl %%eax, 56(%3)\n"
265 "34: movl %%edx, 60(%3)\n"
266 " addl $-64, %0\n"
267 " addl $64, %4\n"
268 " addl $64, %3\n"
269 " cmpl $63, %0\n"
270 " ja 1b\n"
271 "35: movl %0, %%eax\n"
272 " shrl $2, %0\n"
273 " andl $3, %%eax\n"
274 " cld\n"
275 "99: rep; movsl\n"
276 "36: movl %%eax, %0\n"
277 "37: rep; movsb\n"
278 "100:\n"
279 ".section .fixup,\"ax\"\n"
280 "101: lea 0(%%eax,%0,4),%0\n"
281 " jmp 100b\n"
282 ".previous\n"
283 ".section __ex_table,\"a\"\n"
284 " .align 4\n"
285 " .long 1b,100b\n"
286 " .long 2b,100b\n"
287 " .long 3b,100b\n"
288 " .long 4b,100b\n"
289 " .long 5b,100b\n"
290 " .long 6b,100b\n"
291 " .long 7b,100b\n"
292 " .long 8b,100b\n"
293 " .long 9b,100b\n"
294 " .long 10b,100b\n"
295 " .long 11b,100b\n"
296 " .long 12b,100b\n"
297 " .long 13b,100b\n"
298 " .long 14b,100b\n"
299 " .long 15b,100b\n"
300 " .long 16b,100b\n"
301 " .long 17b,100b\n"
302 " .long 18b,100b\n"
303 " .long 19b,100b\n"
304 " .long 20b,100b\n"
305 " .long 21b,100b\n"
306 " .long 22b,100b\n"
307 " .long 23b,100b\n"
308 " .long 24b,100b\n"
309 " .long 25b,100b\n"
310 " .long 26b,100b\n"
311 " .long 27b,100b\n"
312 " .long 28b,100b\n"
313 " .long 29b,100b\n"
314 " .long 30b,100b\n"
315 " .long 31b,100b\n"
316 " .long 32b,100b\n"
317 " .long 33b,100b\n"
318 " .long 34b,100b\n"
319 " .long 35b,100b\n"
320 " .long 36b,100b\n"
321 " .long 37b,100b\n"
322 " .long 99b,101b\n"
323 ".previous"
324 : "=&c"(size), "=&D" (d0), "=&S" (d1)
325 : "1"(to), "2"(from), "0"(size)
326 : "eax", "edx", "memory");
327 return size;
328}
329
330static unsigned long
331__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
332{
333 int d0, d1;
334 __asm__ __volatile__(
335 " .align 2,0x90\n"
336 "0: movl 32(%4), %%eax\n"
337 " cmpl $67, %0\n"
338 " jbe 2f\n"
339 "1: movl 64(%4), %%eax\n"
340 " .align 2,0x90\n"
341 "2: movl 0(%4), %%eax\n"
342 "21: movl 4(%4), %%edx\n"
343 " movl %%eax, 0(%3)\n"
344 " movl %%edx, 4(%3)\n"
345 "3: movl 8(%4), %%eax\n"
346 "31: movl 12(%4),%%edx\n"
347 " movl %%eax, 8(%3)\n"
348 " movl %%edx, 12(%3)\n"
349 "4: movl 16(%4), %%eax\n"
350 "41: movl 20(%4), %%edx\n"
351 " movl %%eax, 16(%3)\n"
352 " movl %%edx, 20(%3)\n"
353 "10: movl 24(%4), %%eax\n"
354 "51: movl 28(%4), %%edx\n"
355 " movl %%eax, 24(%3)\n"
356 " movl %%edx, 28(%3)\n"
357 "11: movl 32(%4), %%eax\n"
358 "61: movl 36(%4), %%edx\n"
359 " movl %%eax, 32(%3)\n"
360 " movl %%edx, 36(%3)\n"
361 "12: movl 40(%4), %%eax\n"
362 "71: movl 44(%4), %%edx\n"
363 " movl %%eax, 40(%3)\n"
364 " movl %%edx, 44(%3)\n"
365 "13: movl 48(%4), %%eax\n"
366 "81: movl 52(%4), %%edx\n"
367 " movl %%eax, 48(%3)\n"
368 " movl %%edx, 52(%3)\n"
369 "14: movl 56(%4), %%eax\n"
370 "91: movl 60(%4), %%edx\n"
371 " movl %%eax, 56(%3)\n"
372 " movl %%edx, 60(%3)\n"
373 " addl $-64, %0\n"
374 " addl $64, %4\n"
375 " addl $64, %3\n"
376 " cmpl $63, %0\n"
377 " ja 0b\n"
378 "5: movl %0, %%eax\n"
379 " shrl $2, %0\n"
380 " andl $3, %%eax\n"
381 " cld\n"
382 "6: rep; movsl\n"
383 " movl %%eax,%0\n"
384 "7: rep; movsb\n"
385 "8:\n"
386 ".section .fixup,\"ax\"\n"
387 "9: lea 0(%%eax,%0,4),%0\n"
388 "16: pushl %0\n"
389 " pushl %%eax\n"
390 " xorl %%eax,%%eax\n"
391 " rep; stosb\n"
392 " popl %%eax\n"
393 " popl %0\n"
394 " jmp 8b\n"
395 ".previous\n"
396 ".section __ex_table,\"a\"\n"
397 " .align 4\n"
398 " .long 0b,16b\n"
399 " .long 1b,16b\n"
400 " .long 2b,16b\n"
401 " .long 21b,16b\n"
402 " .long 3b,16b\n"
403 " .long 31b,16b\n"
404 " .long 4b,16b\n"
405 " .long 41b,16b\n"
406 " .long 10b,16b\n"
407 " .long 51b,16b\n"
408 " .long 11b,16b\n"
409 " .long 61b,16b\n"
410 " .long 12b,16b\n"
411 " .long 71b,16b\n"
412 " .long 13b,16b\n"
413 " .long 81b,16b\n"
414 " .long 14b,16b\n"
415 " .long 91b,16b\n"
416 " .long 6b,9b\n"
417 " .long 7b,16b\n"
418 ".previous"
419 : "=&c"(size), "=&D" (d0), "=&S" (d1)
420 : "1"(to), "2"(from), "0"(size)
421 : "eax", "edx", "memory");
422 return size;
423}
424#else
425/*
426 * Leave these declared but undefined. They should not be any references to
427 * them
428 */
429unsigned long
430__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
431unsigned long
432__copy_user_intel(void __user *to, const void *from, unsigned long size);
433#endif /* CONFIG_X86_INTEL_USERCOPY */
434
435/* Generic arbitrary sized copy. */
436#define __copy_user(to,from,size) \
437do { \
438 int __d0, __d1, __d2; \
439 __asm__ __volatile__( \
440 " cmp $7,%0\n" \
441 " jbe 1f\n" \
442 " movl %1,%0\n" \
443 " negl %0\n" \
444 " andl $7,%0\n" \
445 " subl %0,%3\n" \
446 "4: rep; movsb\n" \
447 " movl %3,%0\n" \
448 " shrl $2,%0\n" \
449 " andl $3,%3\n" \
450 " .align 2,0x90\n" \
451 "0: rep; movsl\n" \
452 " movl %3,%0\n" \
453 "1: rep; movsb\n" \
454 "2:\n" \
455 ".section .fixup,\"ax\"\n" \
456 "5: addl %3,%0\n" \
457 " jmp 2b\n" \
458 "3: lea 0(%3,%0,4),%0\n" \
459 " jmp 2b\n" \
460 ".previous\n" \
461 ".section __ex_table,\"a\"\n" \
462 " .align 4\n" \
463 " .long 4b,5b\n" \
464 " .long 0b,3b\n" \
465 " .long 1b,2b\n" \
466 ".previous" \
467 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
468 : "3"(size), "0"(size), "1"(to), "2"(from) \
469 : "memory"); \
470} while (0)
471
472#define __copy_user_zeroing(to,from,size) \
473do { \
474 int __d0, __d1, __d2; \
475 __asm__ __volatile__( \
476 " cmp $7,%0\n" \
477 " jbe 1f\n" \
478 " movl %1,%0\n" \
479 " negl %0\n" \
480 " andl $7,%0\n" \
481 " subl %0,%3\n" \
482 "4: rep; movsb\n" \
483 " movl %3,%0\n" \
484 " shrl $2,%0\n" \
485 " andl $3,%3\n" \
486 " .align 2,0x90\n" \
487 "0: rep; movsl\n" \
488 " movl %3,%0\n" \
489 "1: rep; movsb\n" \
490 "2:\n" \
491 ".section .fixup,\"ax\"\n" \
492 "5: addl %3,%0\n" \
493 " jmp 6f\n" \
494 "3: lea 0(%3,%0,4),%0\n" \
495 "6: pushl %0\n" \
496 " pushl %%eax\n" \
497 " xorl %%eax,%%eax\n" \
498 " rep; stosb\n" \
499 " popl %%eax\n" \
500 " popl %0\n" \
501 " jmp 2b\n" \
502 ".previous\n" \
503 ".section __ex_table,\"a\"\n" \
504 " .align 4\n" \
505 " .long 4b,5b\n" \
506 " .long 0b,3b\n" \
507 " .long 1b,6b\n" \
508 ".previous" \
509 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
510 : "3"(size), "0"(size), "1"(to), "2"(from) \
511 : "memory"); \
512} while (0)
513
514
515unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
516{
517 BUG_ON((long) n < 0);
518#ifndef CONFIG_X86_WP_WORKS_OK
519 if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
520 ((unsigned long )to) < TASK_SIZE) {
521 /*
522 * CPU does not honor the WP bit when writing
523 * from supervisory mode, and due to preemption or SMP,
524 * the page tables can change at any time.
525 * Do it manually. Manfred <manfred@colorfullife.com>
526 */
527 while (n) {
528 unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
529 unsigned long len = PAGE_SIZE - offset;
530 int retval;
531 struct page *pg;
532 void *maddr;
533
534 if (len > n)
535 len = n;
536
537survive:
538 down_read(&current->mm->mmap_sem);
539 retval = get_user_pages(current, current->mm,
540 (unsigned long )to, 1, 1, 0, &pg, NULL);
541
542 if (retval == -ENOMEM && current->pid == 1) {
543 up_read(&current->mm->mmap_sem);
544 blk_congestion_wait(WRITE, HZ/50);
545 goto survive;
546 }
547
548 if (retval != 1) {
549 up_read(&current->mm->mmap_sem);
550 break;
551 }
552
553 maddr = kmap_atomic(pg, KM_USER0);
554 memcpy(maddr + offset, from, len);
555 kunmap_atomic(maddr, KM_USER0);
556 set_page_dirty_lock(pg);
557 put_page(pg);
558 up_read(&current->mm->mmap_sem);
559
560 from += len;
561 to += len;
562 n -= len;
563 }
564 return n;
565 }
566#endif
567 if (movsl_is_ok(to, from, n))
568 __copy_user(to, from, n);
569 else
570 n = __copy_user_intel(to, from, n);
571 return n;
572}
573
574unsigned long
575__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
576{
577 BUG_ON((long)n < 0);
578 if (movsl_is_ok(to, from, n))
579 __copy_user_zeroing(to, from, n);
580 else
581 n = __copy_user_zeroing_intel(to, from, n);
582 return n;
583}
584
585/**
586 * copy_to_user: - Copy a block of data into user space.
587 * @to: Destination address, in user space.
588 * @from: Source address, in kernel space.
589 * @n: Number of bytes to copy.
590 *
591 * Context: User context only. This function may sleep.
592 *
593 * Copy data from kernel space to user space.
594 *
595 * Returns number of bytes that could not be copied.
596 * On success, this will be zero.
597 */
598unsigned long
599copy_to_user(void __user *to, const void *from, unsigned long n)
600{
601 might_sleep();
602 BUG_ON((long) n < 0);
603 if (access_ok(VERIFY_WRITE, to, n))
604 n = __copy_to_user(to, from, n);
605 return n;
606}
607EXPORT_SYMBOL(copy_to_user);
608
609/**
610 * copy_from_user: - Copy a block of data from user space.
611 * @to: Destination address, in kernel space.
612 * @from: Source address, in user space.
613 * @n: Number of bytes to copy.
614 *
615 * Context: User context only. This function may sleep.
616 *
617 * Copy data from user space to kernel space.
618 *
619 * Returns number of bytes that could not be copied.
620 * On success, this will be zero.
621 *
622 * If some data could not be copied, this function will pad the copied
623 * data to the requested size using zero bytes.
624 */
625unsigned long
626copy_from_user(void *to, const void __user *from, unsigned long n)
627{
628 might_sleep();
629 BUG_ON((long) n < 0);
630 if (access_ok(VERIFY_READ, from, n))
631 n = __copy_from_user(to, from, n);
632 else
633 memset(to, 0, n);
634 return n;
635}
636EXPORT_SYMBOL(copy_from_user);
diff --git a/arch/i386/mach-default/Makefile b/arch/i386/mach-default/Makefile
new file mode 100644
index 000000000000..e95bb0237921
--- /dev/null
+++ b/arch/i386/mach-default/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-y := setup.o topology.o
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
new file mode 100644
index 000000000000..0aa08eaa8932
--- /dev/null
+++ b/arch/i386/mach-default/setup.c
@@ -0,0 +1,106 @@
1/*
2 * Machine specific setup for generic
3 */
4
5#include <linux/config.h>
6#include <linux/smp.h>
7#include <linux/init.h>
8#include <linux/irq.h>
9#include <linux/interrupt.h>
10#include <asm/acpi.h>
11#include <asm/arch_hooks.h>
12
13/**
14 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
15 *
16 * Description:
17 * Perform any necessary interrupt initialisation prior to setting up
18 * the "ordinary" interrupt call gates. For legacy reasons, the ISA
19 * interrupts should be initialised here if the machine emulates a PC
20 * in any way.
21 **/
22void __init pre_intr_init_hook(void)
23{
24 init_ISA_irqs();
25}
26
27/*
28 * IRQ2 is cascade interrupt to second interrupt controller
29 */
30static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
31
32/**
33 * intr_init_hook - post gate setup interrupt initialisation
34 *
35 * Description:
36 * Fill in any interrupts that may have been left out by the general
37 * init_IRQ() routine. interrupts having to do with the machine rather
38 * than the devices on the I/O bus (like APIC interrupts in intel MP
39 * systems) are started here.
40 **/
41void __init intr_init_hook(void)
42{
43#ifdef CONFIG_X86_LOCAL_APIC
44 apic_intr_init();
45#endif
46
47 if (!acpi_ioapic)
48 setup_irq(2, &irq2);
49}
50
51/**
52 * pre_setup_arch_hook - hook called prior to any setup_arch() execution
53 *
54 * Description:
55 * generally used to activate any machine specific identification
56 * routines that may be needed before setup_arch() runs. On VISWS
57 * this is used to get the board revision and type.
58 **/
59void __init pre_setup_arch_hook(void)
60{
61}
62
63/**
64 * trap_init_hook - initialise system specific traps
65 *
66 * Description:
67 * Called as the final act of trap_init(). Used in VISWS to initialise
68 * the various board specific APIC traps.
69 **/
70void __init trap_init_hook(void)
71{
72}
73
74static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
75
76/**
77 * time_init_hook - do any specific initialisations for the system timer.
78 *
79 * Description:
80 * Must plug the system timer interrupt source at HZ into the IRQ listed
81 * in irq_vectors.h:TIMER_IRQ
82 **/
83void __init time_init_hook(void)
84{
85 setup_irq(0, &irq0);
86}
87
88#ifdef CONFIG_MCA
89/**
90 * mca_nmi_hook - hook into MCA specific NMI chain
91 *
92 * Description:
93 * The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources
94 * along the MCA bus. Use this to hook into that chain if you will need
95 * it.
96 **/
97void __init mca_nmi_hook(void)
98{
99 /* If I recall correctly, there's a whole bunch of other things that
100 * we can do to check for NMI problems, but that's all I know about
101 * at the moment.
102 */
103
104 printk("NMI generated from unknown source!\n");
105}
106#endif
diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c
new file mode 100644
index 000000000000..5b3e8817dae8
--- /dev/null
+++ b/arch/i386/mach-default/topology.c
@@ -0,0 +1,98 @@
1/*
2 * arch/i386/mach-generic/topology.c - Populate driverfs with topology information
3 *
4 * Written by: Matthew Dobson, IBM Corporation
5 * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL
6 *
7 * Copyright (C) 2002, IBM Corp.
8 *
9 * All rights reserved.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
19 * NON INFRINGEMENT. See the GNU General Public License for more
20 * details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 * Send feedback to <colpatch@us.ibm.com>
27 */
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/nodemask.h>
31#include <asm/cpu.h>
32
33static struct i386_cpu cpu_devices[NR_CPUS];
34
35int arch_register_cpu(int num){
36 struct node *parent = NULL;
37
38#ifdef CONFIG_NUMA
39 int node = cpu_to_node(num);
40 if (node_online(node))
41 parent = &node_devices[node].node;
42#endif /* CONFIG_NUMA */
43
44 return register_cpu(&cpu_devices[num].cpu, num, parent);
45}
46
47#ifdef CONFIG_HOTPLUG_CPU
48
49void arch_unregister_cpu(int num) {
50 struct node *parent = NULL;
51
52#ifdef CONFIG_NUMA
53 int node = cpu_to_node(num);
54 if (node_online(node))
55 parent = &node_devices[node].node;
56#endif /* CONFIG_NUMA */
57
58 return unregister_cpu(&cpu_devices[num].cpu, parent);
59}
60EXPORT_SYMBOL(arch_register_cpu);
61EXPORT_SYMBOL(arch_unregister_cpu);
62#endif /*CONFIG_HOTPLUG_CPU*/
63
64
65
66#ifdef CONFIG_NUMA
67#include <linux/mmzone.h>
68#include <asm/node.h>
69
70struct i386_node node_devices[MAX_NUMNODES];
71
72static int __init topology_init(void)
73{
74 int i;
75
76 for (i = 0; i < MAX_NUMNODES; i++) {
77 if (node_online(i))
78 arch_register_node(i);
79 }
80 for (i = 0; i < NR_CPUS; i++)
81 if (cpu_possible(i)) arch_register_cpu(i);
82 return 0;
83}
84
85#else /* !CONFIG_NUMA */
86
87static int __init topology_init(void)
88{
89 int i;
90
91 for (i = 0; i < NR_CPUS; i++)
92 if (cpu_possible(i)) arch_register_cpu(i);
93 return 0;
94}
95
96#endif /* CONFIG_NUMA */
97
98subsys_initcall(topology_init);
diff --git a/arch/i386/mach-es7000/Makefile b/arch/i386/mach-es7000/Makefile
new file mode 100644
index 000000000000..69dd4da218dc
--- /dev/null
+++ b/arch/i386/mach-es7000/Makefile
@@ -0,0 +1,6 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-$(CONFIG_X86_ES7000) := es7000plat.o
6obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o
diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h
new file mode 100644
index 000000000000..70691f0c4ce2
--- /dev/null
+++ b/arch/i386/mach-es7000/es7000.h
@@ -0,0 +1,110 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager.
6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it would be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 *
24 * http://www.unisys.com
25 */
26
27#define MIP_REG 1
28#define MIP_PSAI_REG 4
29
30#define MIP_BUSY 1
31#define MIP_SPIN 0xf0000
32#define MIP_VALID 0x0100000000000000ULL
33#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
34
35#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
36
37struct mip_reg_info {
38 unsigned long long mip_info;
39 unsigned long long delivery_info;
40 unsigned long long host_reg;
41 unsigned long long mip_reg;
42};
43
44struct part_info {
45 unsigned char type;
46 unsigned char length;
47 unsigned char part_id;
48 unsigned char apic_mode;
49 unsigned long snum;
50 char ptype[16];
51 char sname[64];
52 char pname[64];
53};
54
55struct psai {
56 unsigned long long entry_type;
57 unsigned long long addr;
58 unsigned long long bep_addr;
59};
60
61struct es7000_mem_info {
62 unsigned char type;
63 unsigned char length;
64 unsigned char resv[6];
65 unsigned long long start;
66 unsigned long long size;
67};
68
69struct es7000_oem_table {
70 unsigned long long hdr;
71 struct mip_reg_info mip;
72 struct part_info pif;
73 struct es7000_mem_info shm;
74 struct psai psai;
75};
76
77struct acpi_table_sdt {
78 unsigned long pa;
79 unsigned long count;
80 struct {
81 unsigned long pa;
82 enum acpi_table_id id;
83 unsigned long size;
84 } entry[50];
85};
86
87struct oem_table {
88 struct acpi_table_header Header;
89 u32 OEMTableAddr;
90 u32 OEMTableSize;
91};
92
93struct mip_reg {
94 unsigned long long off_0;
95 unsigned long long off_8;
96 unsigned long long off_10;
97 unsigned long long off_18;
98 unsigned long long off_20;
99 unsigned long long off_28;
100 unsigned long long off_30;
101 unsigned long long off_38;
102};
103
104#define MIP_SW_APIC 0x1020b
105#define MIP_FUNC(VALUE) (VALUE & 0xff)
106
107extern int parse_unisys_oem (char *oemptr, int oem_entries);
108extern int find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length);
109extern int es7000_start_cpu(int cpu, unsigned long eip);
110extern void es7000_sw_apic(void);
diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c
new file mode 100644
index 000000000000..d5936d500479
--- /dev/null
+++ b/arch/i386/mach-es7000/es7000plat.c
@@ -0,0 +1,316 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager.
6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it would be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 *
24 * http://www.unisys.com
25 */
26
27#include <linux/module.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/smp.h>
31#include <linux/string.h>
32#include <linux/spinlock.h>
33#include <linux/errno.h>
34#include <linux/notifier.h>
35#include <linux/reboot.h>
36#include <linux/init.h>
37#include <linux/acpi.h>
38#include <asm/io.h>
39#include <asm/nmi.h>
40#include <asm/smp.h>
41#include <asm/apicdef.h>
42#include "es7000.h"
43
44/*
45 * ES7000 Globals
46 */
47
48volatile unsigned long *psai = NULL;
49struct mip_reg *mip_reg;
50struct mip_reg *host_reg;
51int mip_port;
52unsigned long mip_addr, host_addr;
53
54#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
55
56/*
57 * GSI override for ES7000 platforms.
58 */
59
60static unsigned int base;
61
62static int
63es7000_rename_gsi(int ioapic, int gsi)
64{
65 if (!base) {
66 int i;
67 for (i = 0; i < nr_ioapics; i++)
68 base += nr_ioapic_registers[i];
69 }
70
71 if (!ioapic && (gsi < 16))
72 gsi += base;
73 return gsi;
74}
75
76#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)
77
78/*
79 * Parse the OEM Table
80 */
81
82int __init
83parse_unisys_oem (char *oemptr, int oem_entries)
84{
85 int i;
86 int success = 0;
87 unsigned char type, size;
88 unsigned long val;
89 char *tp = NULL;
90 struct psai *psaip = NULL;
91 struct mip_reg_info *mi;
92 struct mip_reg *host, *mip;
93
94 tp = oemptr;
95
96 tp += 8;
97
98 for (i=0; i <= oem_entries; i++) {
99 type = *tp++;
100 size = *tp++;
101 tp -= 2;
102 switch (type) {
103 case MIP_REG:
104 mi = (struct mip_reg_info *)tp;
105 val = MIP_RD_LO(mi->host_reg);
106 host_addr = val;
107 host = (struct mip_reg *)val;
108 host_reg = __va(host);
109 val = MIP_RD_LO(mi->mip_reg);
110 mip_port = MIP_PORT(mi->mip_info);
111 mip_addr = val;
112 mip = (struct mip_reg *)val;
113 mip_reg = __va(mip);
114 Dprintk("es7000_mipcfg: host_reg = 0x%lx \n",
115 (unsigned long)host_reg);
116 Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n",
117 (unsigned long)mip_reg);
118 success++;
119 break;
120 case MIP_PSAI_REG:
121 psaip = (struct psai *)tp;
122 if (tp != NULL) {
123 if (psaip->addr)
124 psai = __va(psaip->addr);
125 else
126 psai = NULL;
127 success++;
128 }
129 break;
130 default:
131 break;
132 }
133 if (i == 6) break;
134 tp += size;
135 }
136
137 if (success < 2) {
138 es7000_plat = 0;
139 } else {
140 printk("\nEnabling ES7000 specific features...\n");
141 /*
142 * Determine the generation of the ES7000 currently running.
143 *
144 * es7000_plat = 0 if the machine is NOT a Unisys ES7000 box
145 * es7000_plat = 1 if the machine is a 5xx ES7000 box
146 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
147 *
148 */
149 if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
150 es7000_plat = 2;
151 else
152 es7000_plat = 1;
153
154 ioapic_renumber_irq = es7000_rename_gsi;
155 }
156 return es7000_plat;
157}
158
159int __init
160find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length)
161{
162 struct acpi_table_rsdp *rsdp = NULL;
163 unsigned long rsdp_phys = 0;
164 struct acpi_table_header *header = NULL;
165 int i;
166 struct acpi_table_sdt sdt;
167
168 rsdp_phys = acpi_find_rsdp();
169 rsdp = __va(rsdp_phys);
170 if (rsdp->rsdt_address) {
171 struct acpi_table_rsdt *mapped_rsdt = NULL;
172 sdt.pa = rsdp->rsdt_address;
173
174 header = (struct acpi_table_header *)
175 __acpi_map_table(sdt.pa, sizeof(struct acpi_table_header));
176 if (!header)
177 return -ENODEV;
178
179 sdt.count = (header->length - sizeof(struct acpi_table_header)) >> 3;
180 mapped_rsdt = (struct acpi_table_rsdt *)
181 __acpi_map_table(sdt.pa, header->length);
182 if (!mapped_rsdt)
183 return -ENODEV;
184
185 header = &mapped_rsdt->header;
186
187 for (i = 0; i < sdt.count; i++)
188 sdt.entry[i].pa = (unsigned long) mapped_rsdt->entry[i];
189 };
190 for (i = 0; i < sdt.count; i++) {
191
192 header = (struct acpi_table_header *)
193 __acpi_map_table(sdt.entry[i].pa,
194 sizeof(struct acpi_table_header));
195 if (!header)
196 continue;
197 if (!strncmp((char *) &header->signature, "OEM1", 4)) {
198 if (!strncmp((char *) &header->oem_id, "UNISYS", 6)) {
199 void *addr;
200 struct oem_table *t;
201 acpi_table_print(header, sdt.entry[i].pa);
202 t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length);
203 addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize);
204 *length = header->length;
205 *oem_addr = (unsigned long) addr;
206 return 0;
207 }
208 }
209 }
210 Dprintk("ES7000: did not find Unisys ACPI OEM table!\n");
211 return -1;
212}
213
214static void
215es7000_spin(int n)
216{
217 int i = 0;
218
219 while (i++ < n)
220 rep_nop();
221}
222
223static int __init
224es7000_mip_write(struct mip_reg *mip_reg)
225{
226 int status = 0;
227 int spin;
228
229 spin = MIP_SPIN;
230 while (((unsigned long long)host_reg->off_38 &
231 (unsigned long long)MIP_VALID) != 0) {
232 if (--spin <= 0) {
233 printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
234 return -1;
235 }
236 es7000_spin(MIP_SPIN);
237 }
238
239 memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
240 outb(1, mip_port);
241
242 spin = MIP_SPIN;
243
244 while (((unsigned long long)mip_reg->off_38 &
245 (unsigned long long)MIP_VALID) == 0) {
246 if (--spin <= 0) {
247 printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
248 return -1;
249 }
250 es7000_spin(MIP_SPIN);
251 }
252
253 status = ((unsigned long long)mip_reg->off_0 &
254 (unsigned long long)0xffff0000000000ULL) >> 48;
255 mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
256 (unsigned long long)~MIP_VALID);
257 return status;
258}
259
260int
261es7000_start_cpu(int cpu, unsigned long eip)
262{
263 unsigned long vect = 0, psaival = 0;
264
265 if (psai == NULL)
266 return -1;
267
268 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
269 psaival = (0x1000000 | vect | cpu);
270
271 while (*psai & 0x1000000)
272 ;
273
274 *psai = psaival;
275
276 return 0;
277
278}
279
280int
281es7000_stop_cpu(int cpu)
282{
283 int startup;
284
285 if (psai == NULL)
286 return -1;
287
288 startup= (0x1000000 | cpu);
289
290 while ((*psai & 0xff00ffff) != startup)
291 ;
292
293 startup = (*psai & 0xff0000) >> 16;
294 *psai &= 0xffffff;
295
296 return 0;
297
298}
299
300void __init
301es7000_sw_apic()
302{
303 if (es7000_plat) {
304 int mip_status;
305 struct mip_reg es7000_mip_reg;
306
307 printk("ES7000: Enabling APIC mode.\n");
308 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
309 es7000_mip_reg.off_0 = MIP_SW_APIC;
310 es7000_mip_reg.off_38 = (MIP_VALID);
311 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
312 printk("es7000_sw_apic: command failed, status = %x\n",
313 mip_status);
314 return;
315 }
316}
diff --git a/arch/i386/mach-generic/Makefile b/arch/i386/mach-generic/Makefile
new file mode 100644
index 000000000000..77fbc9f64fbc
--- /dev/null
+++ b/arch/i386/mach-generic/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the generic architecture
3#
4
5EXTRA_CFLAGS += -I../kernel
6
7obj-y := probe.o summit.o bigsmp.o es7000.o default.o ../mach-es7000/
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c
new file mode 100644
index 000000000000..25883b44f625
--- /dev/null
+++ b/arch/i386/mach-generic/bigsmp.c
@@ -0,0 +1,54 @@
1/*
2 * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
3 * Drives the local APIC in "clustered mode".
4 */
5#define APIC_DEFINITION 1
6#include <linux/config.h>
7#include <linux/threads.h>
8#include <linux/cpumask.h>
9#include <asm/mpspec.h>
10#include <asm/genapic.h>
11#include <asm/fixmap.h>
12#include <asm/apicdef.h>
13#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h>
16#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h>
18#include <asm/mach-bigsmp/mach_apicdef.h>
19#include <asm/mach-bigsmp/mach_ipi.h>
20#include <asm/mach-default/mach_mpparse.h>
21
22static int dmi_bigsmp; /* can be set by dmi scanners */
23
24static __init int hp_ht_bigsmp(struct dmi_system_id *d)
25{
26#ifdef CONFIG_X86_GENERICARCH
27 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
28 dmi_bigsmp = 1;
29#endif
30 return 0;
31}
32
33
34static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
35 { hp_ht_bigsmp, "HP ProLiant DL760 G2", {
36 DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
37 DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
38 }},
39
40 { hp_ht_bigsmp, "HP ProLiant DL740", {
41 DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
42 DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
43 }},
44 { }
45};
46
47
48static __init int probe_bigsmp(void)
49{
50 dmi_check_system(bigsmp_dmi_table);
51 return dmi_bigsmp;
52}
53
54struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp);
diff --git a/arch/i386/mach-generic/default.c b/arch/i386/mach-generic/default.c
new file mode 100644
index 000000000000..7da14e9a79c3
--- /dev/null
+++ b/arch/i386/mach-generic/default.c
@@ -0,0 +1,27 @@
1/*
2 * Default generic APIC driver. This handles upto 8 CPUs.
3 */
4#define APIC_DEFINITION 1
5#include <linux/config.h>
6#include <linux/threads.h>
7#include <linux/cpumask.h>
8#include <asm/mpspec.h>
9#include <asm/mach-default/mach_apicdef.h>
10#include <asm/genapic.h>
11#include <asm/fixmap.h>
12#include <asm/apicdef.h>
13#include <linux/kernel.h>
14#include <linux/string.h>
15#include <linux/smp.h>
16#include <linux/init.h>
17#include <asm/mach-default/mach_apic.h>
18#include <asm/mach-default/mach_ipi.h>
19#include <asm/mach-default/mach_mpparse.h>
20
21/* should be called last. */
22static __init int probe_default(void)
23{
24 return 1;
25}
26
27struct genapic apic_default = APIC_INIT("default", probe_default);
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c
new file mode 100644
index 000000000000..48d3ec37241b
--- /dev/null
+++ b/arch/i386/mach-generic/es7000.c
@@ -0,0 +1,28 @@
1/*
2 * APIC driver for the Unisys ES7000 chipset.
3 */
4#define APIC_DEFINITION 1
5#include <linux/config.h>
6#include <linux/threads.h>
7#include <linux/cpumask.h>
8#include <asm/mpspec.h>
9#include <asm/genapic.h>
10#include <asm/fixmap.h>
11#include <asm/apicdef.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h>
17#include <asm/mach-es7000/mach_apic.h>
18#include <asm/mach-es7000/mach_ipi.h>
19#include <asm/mach-es7000/mach_mpparse.h>
20#include <asm/mach-es7000/mach_wakecpu.h>
21
22static __init int probe_es7000(void)
23{
24 /* probed later in mptable/ACPI hooks */
25 return 0;
26}
27
28struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c
new file mode 100644
index 000000000000..5497c65a8790
--- /dev/null
+++ b/arch/i386/mach-generic/probe.c
@@ -0,0 +1,102 @@
1/* Copyright 2003 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License, v.2
3 *
4 * Generic x86 APIC driver probe layer.
5 */
6#include <linux/config.h>
7#include <linux/threads.h>
8#include <linux/cpumask.h>
9#include <linux/string.h>
10#include <linux/kernel.h>
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <asm/fixmap.h>
14#include <asm/mpspec.h>
15#include <asm/apicdef.h>
16#include <asm/genapic.h>
17
18extern struct genapic apic_summit;
19extern struct genapic apic_bigsmp;
20extern struct genapic apic_es7000;
21extern struct genapic apic_default;
22
23struct genapic *genapic = &apic_default;
24
25struct genapic *apic_probe[] __initdata = {
26 &apic_summit,
27 &apic_bigsmp,
28 &apic_es7000,
29 &apic_default, /* must be last */
30 NULL,
31};
32
33void __init generic_apic_probe(char *command_line)
34{
35 char *s;
36 int i;
37 int changed = 0;
38
39 s = strstr(command_line, "apic=");
40 if (s && (s == command_line || isspace(s[-1]))) {
41 char *p = strchr(s, ' '), old;
42 if (!p)
43 p = strchr(s, '\0');
44 old = *p;
45 *p = 0;
46 for (i = 0; !changed && apic_probe[i]; i++) {
47 if (!strcmp(apic_probe[i]->name, s+5)) {
48 changed = 1;
49 genapic = apic_probe[i];
50 }
51 }
52 if (!changed)
53 printk(KERN_ERR "Unknown genapic `%s' specified.\n", s);
54 *p = old;
55 }
56 for (i = 0; !changed && apic_probe[i]; i++) {
57 if (apic_probe[i]->probe()) {
58 changed = 1;
59 genapic = apic_probe[i];
60 }
61 }
62 /* Not visible without early console */
63 if (!changed)
64 panic("Didn't find an APIC driver");
65
66 printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
67}
68
69/* These functions can switch the APIC even after the initial ->probe() */
70
71int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
72{
73 int i;
74 for (i = 0; apic_probe[i]; ++i) {
75 if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) {
76 genapic = apic_probe[i];
77 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
78 genapic->name);
79 return 1;
80 }
81 }
82 return 0;
83}
84
85int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
86{
87 int i;
88 for (i = 0; apic_probe[i]; ++i) {
89 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
90 genapic = apic_probe[i];
91 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
92 genapic->name);
93 return 1;
94 }
95 }
96 return 0;
97}
98
99int hard_smp_processor_id(void)
100{
101 return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
102}
diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c
new file mode 100644
index 000000000000..65ddf74d7f25
--- /dev/null
+++ b/arch/i386/mach-generic/summit.c
@@ -0,0 +1,27 @@
1/*
2 * APIC driver for the IBM "Summit" chipset.
3 */
4#define APIC_DEFINITION 1
5#include <linux/config.h>
6#include <linux/threads.h>
7#include <linux/cpumask.h>
8#include <asm/mpspec.h>
9#include <asm/genapic.h>
10#include <asm/fixmap.h>
11#include <asm/apicdef.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h>
17#include <asm/mach-summit/mach_apicdef.h>
18#include <asm/mach-summit/mach_ipi.h>
19#include <asm/mach-summit/mach_mpparse.h>
20
21static __init int probe_summit(void)
22{
23 /* probed later in mptable/ACPI hooks */
24 return 0;
25}
26
27struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/i386/mach-visws/Makefile b/arch/i386/mach-visws/Makefile
new file mode 100644
index 000000000000..835fd96ad768
--- /dev/null
+++ b/arch/i386/mach-visws/Makefile
@@ -0,0 +1,8 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-y := setup.o traps.o reboot.o
6
7obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o
8obj-$(CONFIG_X86_LOCAL_APIC) += mpparse.o
diff --git a/arch/i386/mach-visws/mpparse.c b/arch/i386/mach-visws/mpparse.c
new file mode 100644
index 000000000000..5a22082147f4
--- /dev/null
+++ b/arch/i386/mach-visws/mpparse.c
@@ -0,0 +1,105 @@
1
2#include <linux/config.h>
3#include <linux/init.h>
4#include <linux/smp.h>
5
6#include <asm/smp.h>
7#include <asm/io.h>
8
9#include "cobalt.h"
10#include "mach_apic.h"
11
12/* Have we found an MP table */
13int smp_found_config;
14
15/*
16 * Various Linux-internal data structures created from the
17 * MP-table.
18 */
19int apic_version [MAX_APICS];
20
21int pic_mode;
22unsigned long mp_lapic_addr;
23
24/* Processor that is doing the boot up */
25unsigned int boot_cpu_physical_apicid = -1U;
26unsigned int boot_cpu_logical_apicid = -1U;
27
28/* Bitmask of physically existing CPUs */
29physid_mask_t phys_cpu_present_map;
30
31unsigned int __initdata maxcpus = NR_CPUS;
32
33/*
34 * The Visual Workstation is Intel MP compliant in the hardware
35 * sense, but it doesn't have a BIOS(-configuration table).
36 * No problem for Linux.
37 */
38
39static void __init MP_processor_info (struct mpc_config_processor *m)
40{
41 int ver, logical_apicid;
42 physid_mask_t apic_cpus;
43
44 if (!(m->mpc_cpuflag & CPU_ENABLED))
45 return;
46
47 logical_apicid = m->mpc_apicid;
48 printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n",
49 m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
50 m->mpc_apicid,
51 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
52 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
53 m->mpc_apicver);
54
55 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
56 boot_cpu_physical_apicid = m->mpc_apicid;
57 boot_cpu_logical_apicid = logical_apicid;
58 }
59
60 ver = m->mpc_apicver;
61 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
62 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
63 m->mpc_apicid, MAX_APICS);
64 return;
65 }
66
67 apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
68 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
69 /*
70 * Validate version
71 */
72 if (ver == 0x0) {
73 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
74 "fixing up to 0x10. (tell your hw vendor)\n",
75 m->mpc_apicid);
76 ver = 0x10;
77 }
78 apic_version[m->mpc_apicid] = ver;
79}
80
81void __init find_smp_config(void)
82{
83 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
84 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
85
86 if (ncpus > CO_CPU_MAX) {
87 printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
88 ncpus, mp);
89
90 ncpus = CO_CPU_MAX;
91 }
92
93 if (ncpus > maxcpus)
94 ncpus = maxcpus;
95
96 smp_found_config = 1;
97 while (ncpus--)
98 MP_processor_info(mp++);
99
100 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
101}
102
103void __init get_smp_config (void)
104{
105}
diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c
new file mode 100644
index 000000000000..3a81e904a7b8
--- /dev/null
+++ b/arch/i386/mach-visws/reboot.c
@@ -0,0 +1,51 @@
1#include <linux/module.h>
2#include <linux/smp.h>
3#include <linux/delay.h>
4#include <linux/platform.h>
5
6#include <asm/io.h>
7#include "piix4.h"
8
9void (*pm_power_off)(void);
10
11void machine_restart(char * __unused)
12{
13#ifdef CONFIG_SMP
14 smp_send_stop();
15#endif
16
17 /*
18 * Visual Workstations restart after this
19 * register is poked on the PIIX4
20 */
21 outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
22}
23
24EXPORT_SYMBOL(machine_restart);
25
26void machine_power_off(void)
27{
28 unsigned short pm_status;
29 extern unsigned int pci_bus0;
30
31 while ((pm_status = inw(PMSTS_PORT)) & 0x100)
32 outw(pm_status, PMSTS_PORT);
33
34 outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
35
36 mdelay(10);
37
38#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
39 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
40
41 outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8);
42 outl(PIIX_SPECIAL_STOP, 0xCFC);
43}
44
45EXPORT_SYMBOL(machine_power_off);
46
47void machine_halt(void)
48{
49}
50
51EXPORT_SYMBOL(machine_halt);
diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c
new file mode 100644
index 000000000000..9f6d2d9b1be7
--- /dev/null
+++ b/arch/i386/mach-visws/setup.c
@@ -0,0 +1,134 @@
1/*
2 * Unmaintained SGI Visual Workstation support.
3 * Split out from setup.c by davej@suse.de
4 */
5
6#include <linux/smp.h>
7#include <linux/init.h>
8#include <linux/irq.h>
9#include <linux/interrupt.h>
10
11#include <asm/fixmap.h>
12#include <asm/arch_hooks.h>
13#include <asm/io.h>
14#include "cobalt.h"
15#include "piix4.h"
16
17char visws_board_type = -1;
18char visws_board_rev = -1;
19
20void __init visws_get_board_type_and_rev(void)
21{
22 int raw;
23
24 visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
25 >> PIIX_GPI_BD_SHIFT;
26 /*
27 * Get Board rev.
28 * First, we have to initialize the 307 part to allow us access
29 * to the GPIO registers. Let's map them at 0x0fc0 which is right
30 * after the PIIX4 PM section.
31 */
32 outb_p(SIO_DEV_SEL, SIO_INDEX);
33 outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
34
35 outb_p(SIO_DEV_MSB, SIO_INDEX);
36 outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
37
38 outb_p(SIO_DEV_LSB, SIO_INDEX);
39 outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
40
41 outb_p(SIO_DEV_ENB, SIO_INDEX);
42 outb_p(1, SIO_DATA); /* Enable GPIO registers. */
43
44 /*
45 * Now, we have to map the power management section to write
46 * a bit which enables access to the GPIO registers.
47 * What lunatic came up with this shit?
48 */
49 outb_p(SIO_DEV_SEL, SIO_INDEX);
50 outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
51
52 outb_p(SIO_DEV_MSB, SIO_INDEX);
53 outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
54
55 outb_p(SIO_DEV_LSB, SIO_INDEX);
56 outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
57
58 outb_p(SIO_DEV_ENB, SIO_INDEX);
59 outb_p(1, SIO_DATA); /* Enable PM registers. */
60
61 /*
62 * Now, write the PM register which enables the GPIO registers.
63 */
64 outb_p(SIO_PM_FER2, SIO_PM_INDEX);
65 outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
66
67 /*
68 * Now, initialize the GPIO registers.
69 * We want them all to be inputs which is the
70 * power on default, so let's leave them alone.
71 * So, let's just read the board rev!
72 */
73 raw = inb_p(SIO_GP_DATA1);
74 raw &= 0x7f; /* 7 bits of valid board revision ID. */
75
76 if (visws_board_type == VISWS_320) {
77 if (raw < 0x6) {
78 visws_board_rev = 4;
79 } else if (raw < 0xc) {
80 visws_board_rev = 5;
81 } else {
82 visws_board_rev = 6;
83 }
84 } else if (visws_board_type == VISWS_540) {
85 visws_board_rev = 2;
86 } else {
87 visws_board_rev = raw;
88 }
89
90 printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
91 (visws_board_type == VISWS_320 ? "320" :
92 (visws_board_type == VISWS_540 ? "540" :
93 "unknown")), visws_board_rev);
94}
95
96void __init pre_intr_init_hook(void)
97{
98 init_VISWS_APIC_irqs();
99}
100
101void __init intr_init_hook(void)
102{
103#ifdef CONFIG_X86_LOCAL_APIC
104 apic_intr_init();
105#endif
106}
107
108void __init pre_setup_arch_hook()
109{
110 visws_get_board_type_and_rev();
111}
112
113static struct irqaction irq0 = {
114 .handler = timer_interrupt,
115 .flags = SA_INTERRUPT,
116 .name = "timer",
117};
118
119void __init time_init_hook(void)
120{
121 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
122
123 /* Set the countdown value */
124 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
125
126 /* Start the timer */
127 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
128
129 /* Enable (unmask) the timer interrupt */
130 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
131
132 /* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
133 setup_irq(0, &irq0);
134}
diff --git a/arch/i386/mach-visws/traps.c b/arch/i386/mach-visws/traps.c
new file mode 100644
index 000000000000..964353992031
--- /dev/null
+++ b/arch/i386/mach-visws/traps.c
@@ -0,0 +1,69 @@
1/* VISWS traps */
2
3#include <linux/config.h>
4#include <linux/sched.h>
5#include <linux/kernel.h>
6#include <linux/init.h>
7#include <linux/pci.h>
8#include <linux/pci_ids.h>
9
10#include <asm/io.h>
11#include <asm/arch_hooks.h>
12#include <asm/apic.h>
13#include "cobalt.h"
14#include "lithium.h"
15
16
17#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
18#define BCD (LI_INTB | LI_INTC | LI_INTD)
19#define ALLDEVS (A01234 | BCD)
20
21static __init void lithium_init(void)
22{
23 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
24 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
25
26 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
27 (li_pcia_read16(PCI_DEVICE_ID) != PCI_VENDOR_ID_SGI_LITHIUM)) {
28 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
29 panic("This machine is not SGI Visual Workstation 320/540");
30 }
31
32 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
33 (li_pcib_read16(PCI_DEVICE_ID) != PCI_VENDOR_ID_SGI_LITHIUM)) {
34 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
35 panic("This machine is not SGI Visual Workstation 320/540");
36 }
37
38 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
39 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
40}
41
42static __init void cobalt_init(void)
43{
44 /*
45 * On normal SMP PC this is used only with SMP, but we have to
46 * use it and set it up here to start the Cobalt clock
47 */
48 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
49 setup_local_APIC();
50 printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n",
51 apic_read(APIC_LVR), apic_read(APIC_ID));
52
53 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
54 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
55 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
56 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
57
58 /* Enable Cobalt APIC being careful to NOT change the ID! */
59 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
60
61 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
62 co_apic_read(CO_APIC_ID));
63}
64
65void __init trap_init_hook(void)
66{
67 lithium_init();
68 cobalt_init();
69}
diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c
new file mode 100644
index 000000000000..04e6585849a2
--- /dev/null
+++ b/arch/i386/mach-visws/visws_apic.c
@@ -0,0 +1,303 @@
1/*
2 * linux/arch/i386/mach_visws/visws_apic.c
3 *
4 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
5 *
6 * SGI Visual Workstation interrupt controller
7 *
8 * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
9 * which serves as the main interrupt controller in the system. Non-legacy
10 * hardware in the system uses this controller directly. Legacy devices
11 * are connected to the PIIX4 which in turn has its 8259(s) connected to
12 * a of the Cobalt APIC entry.
13 *
14 * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
15 *
16 * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
17 */
18
19#include <linux/config.h>
20#include <linux/kernel_stat.h>
21#include <linux/interrupt.h>
22#include <linux/irq.h>
23#include <linux/smp_lock.h>
24#include <linux/init.h>
25
26#include <asm/io.h>
27#include <asm/apic.h>
28#include <asm/i8259.h>
29
30#include "cobalt.h"
31#include "irq_vectors.h"
32
33
34static DEFINE_SPINLOCK(cobalt_lock);
35
36/*
37 * Set the given Cobalt APIC Redirection Table entry to point
38 * to the given IDT vector/index.
39 */
40static inline void co_apic_set(int entry, int irq)
41{
42 co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
43 co_apic_write(CO_APIC_HI(entry), 0);
44}
45
46/*
47 * Cobalt (IO)-APIC functions to handle PCI devices.
48 */
49static inline int co_apic_ide0_hack(void)
50{
51 extern char visws_board_type;
52 extern char visws_board_rev;
53
54 if (visws_board_type == VISWS_320 && visws_board_rev == 5)
55 return 5;
56 return CO_APIC_IDE0;
57}
58
59static int is_co_apic(unsigned int irq)
60{
61 if (IS_CO_APIC(irq))
62 return CO_APIC(irq);
63
64 switch (irq) {
65 case 0: return CO_APIC_CPU;
66 case CO_IRQ_IDE0: return co_apic_ide0_hack();
67 case CO_IRQ_IDE1: return CO_APIC_IDE1;
68 default: return -1;
69 }
70}
71
72
73/*
74 * This is the SGI Cobalt (IO-)APIC:
75 */
76
77static void enable_cobalt_irq(unsigned int irq)
78{
79 co_apic_set(is_co_apic(irq), irq);
80}
81
82static void disable_cobalt_irq(unsigned int irq)
83{
84 int entry = is_co_apic(irq);
85
86 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
87 co_apic_read(CO_APIC_LO(entry));
88}
89
90/*
91 * "irq" really just serves to identify the device. Here is where we
92 * map this to the Cobalt APIC entry where it's physically wired.
93 * This is called via request_irq -> setup_irq -> irq_desc->startup()
94 */
95static unsigned int startup_cobalt_irq(unsigned int irq)
96{
97 unsigned long flags;
98
99 spin_lock_irqsave(&cobalt_lock, flags);
100 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
101 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
102 enable_cobalt_irq(irq);
103 spin_unlock_irqrestore(&cobalt_lock, flags);
104 return 0;
105}
106
107static void ack_cobalt_irq(unsigned int irq)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&cobalt_lock, flags);
112 disable_cobalt_irq(irq);
113 apic_write(APIC_EOI, APIC_EIO_ACK);
114 spin_unlock_irqrestore(&cobalt_lock, flags);
115}
116
117static void end_cobalt_irq(unsigned int irq)
118{
119 unsigned long flags;
120
121 spin_lock_irqsave(&cobalt_lock, flags);
122 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
123 enable_cobalt_irq(irq);
124 spin_unlock_irqrestore(&cobalt_lock, flags);
125}
126
127static struct hw_interrupt_type cobalt_irq_type = {
128 .typename = "Cobalt-APIC",
129 .startup = startup_cobalt_irq,
130 .shutdown = disable_cobalt_irq,
131 .enable = enable_cobalt_irq,
132 .disable = disable_cobalt_irq,
133 .ack = ack_cobalt_irq,
134 .end = end_cobalt_irq,
135};
136
137
138/*
139 * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
140 * -- not the manner expected by the code in i8259.c.
141 *
142 * there is a 'master' physical interrupt source that gets sent to
143 * the CPU. But in the chipset there are various 'virtual' interrupts
144 * waiting to be handled. We represent this to Linux through a 'master'
145 * interrupt controller type, and through a special virtual interrupt-
146 * controller. Device drivers only see the virtual interrupt sources.
147 */
148static unsigned int startup_piix4_master_irq(unsigned int irq)
149{
150 init_8259A(0);
151
152 return startup_cobalt_irq(irq);
153}
154
155static void end_piix4_master_irq(unsigned int irq)
156{
157 unsigned long flags;
158
159 spin_lock_irqsave(&cobalt_lock, flags);
160 enable_cobalt_irq(irq);
161 spin_unlock_irqrestore(&cobalt_lock, flags);
162}
163
164static struct hw_interrupt_type piix4_master_irq_type = {
165 .typename = "PIIX4-master",
166 .startup = startup_piix4_master_irq,
167 .ack = ack_cobalt_irq,
168 .end = end_piix4_master_irq,
169};
170
171
172static struct hw_interrupt_type piix4_virtual_irq_type = {
173 .typename = "PIIX4-virtual",
174 .startup = startup_8259A_irq,
175 .shutdown = disable_8259A_irq,
176 .enable = enable_8259A_irq,
177 .disable = disable_8259A_irq,
178};
179
180
181/*
182 * PIIX4-8259 master/virtual functions to handle interrupt requests
183 * from legacy devices: floppy, parallel, serial, rtc.
184 *
185 * None of these get Cobalt APIC entries, neither do they have IDT
186 * entries. These interrupts are purely virtual and distributed from
187 * the 'master' interrupt source: CO_IRQ_8259.
188 *
189 * When the 8259 interrupts its handler figures out which of these
190 * devices is interrupting and dispatches to its handler.
191 *
192 * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
193 * enable_irq gets the right irq. This 'master' irq is never directly
194 * manipulated by any driver.
195 */
196static irqreturn_t piix4_master_intr(int irq, void *dev_id, struct pt_regs * regs)
197{
198 int realirq;
199 irq_desc_t *desc;
200 unsigned long flags;
201
202 spin_lock_irqsave(&i8259A_lock, flags);
203
204 /* Find out what's interrupting in the PIIX4 master 8259 */
205 outb(0x0c, 0x20); /* OCW3 Poll command */
206 realirq = inb(0x20);
207
208 /*
209 * Bit 7 == 0 means invalid/spurious
210 */
211 if (unlikely(!(realirq & 0x80)))
212 goto out_unlock;
213
214 realirq &= 7;
215
216 if (unlikely(realirq == 2)) {
217 outb(0x0c, 0xa0);
218 realirq = inb(0xa0);
219
220 if (unlikely(!(realirq & 0x80)))
221 goto out_unlock;
222
223 realirq = (realirq & 7) + 8;
224 }
225
226 /* mask and ack interrupt */
227 cached_irq_mask |= 1 << realirq;
228 if (unlikely(realirq > 7)) {
229 inb(0xa1);
230 outb(cached_slave_mask, 0xa1);
231 outb(0x60 + (realirq & 7), 0xa0);
232 outb(0x60 + 2, 0x20);
233 } else {
234 inb(0x21);
235 outb(cached_master_mask, 0x21);
236 outb(0x60 + realirq, 0x20);
237 }
238
239 spin_unlock_irqrestore(&i8259A_lock, flags);
240
241 desc = irq_desc + realirq;
242
243 /*
244 * handle this 'virtual interrupt' as a Cobalt one now.
245 */
246 kstat_cpu(smp_processor_id()).irqs[realirq]++;
247
248 if (likely(desc->action != NULL))
249 handle_IRQ_event(realirq, regs, desc->action);
250
251 if (!(desc->status & IRQ_DISABLED))
252 enable_8259A_irq(realirq);
253
254 return IRQ_HANDLED;
255
256out_unlock:
257 spin_unlock_irqrestore(&i8259A_lock, flags);
258 return IRQ_NONE;
259}
260
261static struct irqaction master_action = {
262 .handler = piix4_master_intr,
263 .name = "PIIX4-8259",
264};
265
266static struct irqaction cascade_action = {
267 .handler = no_action,
268 .name = "cascade",
269};
270
271
272void init_VISWS_APIC_irqs(void)
273{
274 int i;
275
276 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
277 irq_desc[i].status = IRQ_DISABLED;
278 irq_desc[i].action = 0;
279 irq_desc[i].depth = 1;
280
281 if (i == 0) {
282 irq_desc[i].handler = &cobalt_irq_type;
283 }
284 else if (i == CO_IRQ_IDE0) {
285 irq_desc[i].handler = &cobalt_irq_type;
286 }
287 else if (i == CO_IRQ_IDE1) {
288 irq_desc[i].handler = &cobalt_irq_type;
289 }
290 else if (i == CO_IRQ_8259) {
291 irq_desc[i].handler = &piix4_master_irq_type;
292 }
293 else if (i < CO_IRQ_APIC0) {
294 irq_desc[i].handler = &piix4_virtual_irq_type;
295 }
296 else if (IS_CO_APIC(i)) {
297 irq_desc[i].handler = &cobalt_irq_type;
298 }
299 }
300
301 setup_irq(CO_IRQ_8259, &master_action);
302 setup_irq(2, &cascade_action);
303}
diff --git a/arch/i386/mach-voyager/Makefile b/arch/i386/mach-voyager/Makefile
new file mode 100644
index 000000000000..f24d29651318
--- /dev/null
+++ b/arch/i386/mach-voyager/Makefile
@@ -0,0 +1,8 @@
1#
2# Makefile for the linux kernel.
3#
4
5EXTRA_CFLAGS += -I../kernel
6obj-y := setup.o voyager_basic.o voyager_thread.o
7
8obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o
diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c
new file mode 100644
index 000000000000..df123fc487bb
--- /dev/null
+++ b/arch/i386/mach-voyager/setup.c
@@ -0,0 +1,48 @@
1/*
2 * Machine specific setup for generic
3 */
4
5#include <linux/config.h>
6#include <linux/init.h>
7#include <linux/irq.h>
8#include <linux/interrupt.h>
9#include <asm/acpi.h>
10#include <asm/arch_hooks.h>
11
12void __init pre_intr_init_hook(void)
13{
14 init_ISA_irqs();
15}
16
17/*
18 * IRQ2 is cascade interrupt to second interrupt controller
19 */
20static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
21
22void __init intr_init_hook(void)
23{
24#ifdef CONFIG_SMP
25 smp_intr_init();
26#endif
27
28 if (!acpi_ioapic)
29 setup_irq(2, &irq2);
30}
31
32void __init pre_setup_arch_hook(void)
33{
34 /* Voyagers run their CPUs from independent clocks, so disable
35 * the TSC code because we can't sync them */
36 tsc_disable = 1;
37}
38
39void __init trap_init_hook(void)
40{
41}
42
43static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
44
45void __init time_init_hook(void)
46{
47 setup_irq(0, &irq0);
48}
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
new file mode 100644
index 000000000000..602aea240e9b
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -0,0 +1,325 @@
1/* Copyright (C) 1999,2001
2 *
3 * Author: J.E.J.Bottomley@HansenPartnership.com
4 *
5 * linux/arch/i386/kernel/voyager.c
6 *
7 * This file contains all the voyager specific routines for getting
8 * initialisation of the architecture to function. For additional
9 * features see:
10 *
11 * voyager_cat.c - Voyager CAT bus interface
12 * voyager_smp.c - Voyager SMP hal (emulates linux smp.c)
13 */
14
15#include <linux/config.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/sched.h>
19#include <linux/ptrace.h>
20#include <linux/ioport.h>
21#include <linux/interrupt.h>
22#include <linux/init.h>
23#include <linux/delay.h>
24#include <linux/reboot.h>
25#include <linux/sysrq.h>
26#include <asm/io.h>
27#include <asm/voyager.h>
28#include <asm/vic.h>
29#include <linux/pm.h>
30#include <linux/irq.h>
31#include <asm/tlbflush.h>
32#include <asm/arch_hooks.h>
33
34/*
35 * Power off function, if any
36 */
37void (*pm_power_off)(void);
38
39int voyager_level = 0;
40
41struct voyager_SUS *voyager_SUS = NULL;
42
43#ifdef CONFIG_SMP
44static void
45voyager_dump(int dummy1, struct pt_regs *dummy2, struct tty_struct *dummy3)
46{
47 /* get here via a sysrq */
48 voyager_smp_dump();
49}
50
51static struct sysrq_key_op sysrq_voyager_dump_op = {
52 .handler = voyager_dump,
53 .help_msg = "Voyager",
54 .action_msg = "Dump Voyager Status",
55};
56#endif
57
58void
59voyager_detect(struct voyager_bios_info *bios)
60{
61 if(bios->len != 0xff) {
62 int class = (bios->class_1 << 8)
63 | (bios->class_2 & 0xff);
64
65 printk("Voyager System detected.\n"
66 " Class %x, Revision %d.%d\n",
67 class, bios->major, bios->minor);
68 if(class == VOYAGER_LEVEL4)
69 voyager_level = 4;
70 else if(class < VOYAGER_LEVEL5_AND_ABOVE)
71 voyager_level = 3;
72 else
73 voyager_level = 5;
74 printk(" Architecture Level %d\n", voyager_level);
75 if(voyager_level < 4)
76 printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
77 /* install the power off handler */
78 pm_power_off = voyager_power_off;
79#ifdef CONFIG_SMP
80 register_sysrq_key('v', &sysrq_voyager_dump_op);
81#endif
82 } else {
83 printk("\n\n**WARNING**: No Voyager Subsystem Found\n");
84 }
85}
86
87void
88voyager_system_interrupt(int cpl, void *dev_id, struct pt_regs *regs)
89{
90 printk("Voyager: detected system interrupt\n");
91}
92
93/* Routine to read information from the extended CMOS area */
94__u8
95voyager_extended_cmos_read(__u16 addr)
96{
97 outb(addr & 0xff, 0x74);
98 outb((addr >> 8) & 0xff, 0x75);
99 return inb(0x76);
100}
101
102/* internal definitions for the SUS Click Map of memory */
103
104#define CLICK_ENTRIES 16
105#define CLICK_SIZE 4096 /* click to byte conversion for Length */
106
107typedef struct ClickMap {
108 struct Entry {
109 __u32 Address;
110 __u32 Length;
111 } Entry[CLICK_ENTRIES];
112} ClickMap_t;
113
114
115/* This routine is pretty much an awful hack to read the bios clickmap by
116 * mapping it into page 0. There are usually three regions in the map:
117 * Base Memory
118 * Extended Memory
119 * zero length marker for end of map
120 *
121 * Returns are 0 for failure and 1 for success on extracting region.
122 */
123int __init
124voyager_memory_detect(int region, __u32 *start, __u32 *length)
125{
126 int i;
127 int retval = 0;
128 __u8 cmos[4];
129 ClickMap_t *map;
130 unsigned long map_addr;
131 unsigned long old;
132
133 if(region >= CLICK_ENTRIES) {
134 printk("Voyager: Illegal ClickMap region %d\n", region);
135 return 0;
136 }
137
138 for(i = 0; i < sizeof(cmos); i++)
139 cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
140
141 map_addr = *(unsigned long *)cmos;
142
143 /* steal page 0 for this */
144 old = pg0[0];
145 pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
146 local_flush_tlb();
147 /* now clear everything out but page 0 */
148 map = (ClickMap_t *)(map_addr & (~PAGE_MASK));
149
150 /* zero length is the end of the clickmap */
151 if(map->Entry[region].Length != 0) {
152 *length = map->Entry[region].Length * CLICK_SIZE;
153 *start = map->Entry[region].Address;
154 retval = 1;
155 }
156
157 /* replace the mapping */
158 pg0[0] = old;
159 local_flush_tlb();
160 return retval;
161}
162
163/* voyager specific handling code for timer interrupts. Used to hand
164 * off the timer tick to the SMP code, since the VIC doesn't have an
165 * internal timer (The QIC does, but that's another story). */
166void
167voyager_timer_interrupt(struct pt_regs *regs)
168{
169 if((jiffies & 0x3ff) == 0) {
170
171 /* There seems to be something flaky in either
172 * hardware or software that is resetting the timer 0
173 * count to something much higher than it should be
174 * This seems to occur in the boot sequence, just
175 * before root is mounted. Therefore, every 10
176 * seconds or so, we sanity check the timer zero count
177 * and kick it back to where it should be.
178 *
179 * FIXME: This is the most awful hack yet seen. I
180 * should work out exactly what is interfering with
181 * the timer count settings early in the boot sequence
182 * and swiftly introduce it to something sharp and
183 * pointy. */
184 __u16 val;
185 extern spinlock_t i8253_lock;
186
187 spin_lock(&i8253_lock);
188
189 outb_p(0x00, 0x43);
190 val = inb_p(0x40);
191 val |= inb(0x40) << 8;
192 spin_unlock(&i8253_lock);
193
194 if(val > LATCH) {
195 printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
196 spin_lock(&i8253_lock);
197 outb(0x34,0x43);
198 outb_p(LATCH & 0xff , 0x40); /* LSB */
199 outb(LATCH >> 8 , 0x40); /* MSB */
200 spin_unlock(&i8253_lock);
201 }
202 }
203#ifdef CONFIG_SMP
204 smp_vic_timer_interrupt(regs);
205#endif
206}
207
208void
209voyager_power_off(void)
210{
211 printk("VOYAGER Power Off\n");
212
213 if(voyager_level == 5) {
214 voyager_cat_power_off();
215 } else if(voyager_level == 4) {
216 /* This doesn't apparently work on most L4 machines,
217 * but the specs say to do this to get automatic power
218 * off. Unfortunately, if it doesn't power off the
219 * machine, it ends up doing a cold restart, which
220 * isn't really intended, so comment out the code */
221#if 0
222 int port;
223
224
225 /* enable the voyager Configuration Space */
226 outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8,
227 VOYAGER_MC_SETUP);
228 /* the port for the power off flag is an offset from the
229 floating base */
230 port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
231 /* set the power off flag */
232 outb(inb(port) | 0x1, port);
233#endif
234 }
235 /* and wait for it to happen */
236 for(;;) {
237 __asm("cli");
238 __asm("hlt");
239 }
240}
241
242/* copied from process.c */
243static inline void
244kb_wait(void)
245{
246 int i;
247
248 for (i=0; i<0x10000; i++)
249 if ((inb_p(0x64) & 0x02) == 0)
250 break;
251}
252
253void
254machine_restart(char *cmd)
255{
256 printk("Voyager Warm Restart\n");
257 kb_wait();
258
259 if(voyager_level == 5) {
260 /* write magic values to the RTC to inform system that
261 * shutdown is beginning */
262 outb(0x8f, 0x70);
263 outb(0x5 , 0x71);
264
265 udelay(50);
266 outb(0xfe,0x64); /* pull reset low */
267 } else if(voyager_level == 4) {
268 __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
269 __u8 basebd = inb(VOYAGER_MC_SETUP);
270
271 outb(basebd | 0x08, VOYAGER_MC_SETUP);
272 outb(0x02, catbase + 0x21);
273 }
274 for(;;) {
275 asm("cli");
276 asm("hlt");
277 }
278}
279
280EXPORT_SYMBOL(machine_restart);
281
282void
283mca_nmi_hook(void)
284{
285 __u8 dumpval __attribute__((unused)) = inb(0xf823);
286 __u8 swnmi __attribute__((unused)) = inb(0xf813);
287
288 /* FIXME: assume dump switch pressed */
289 /* check to see if the dump switch was pressed */
290 VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi));
291 /* clear swnmi */
292 outb(0xff, 0xf813);
293 /* tell SUS to ignore dump */
294 if(voyager_level == 5 && voyager_SUS != NULL) {
295 if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
296 voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
297 voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
298 udelay(1000);
299 voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP;
300 voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
301 }
302 }
303 printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id());
304 show_stack(NULL, NULL);
305 show_state();
306}
307
308
309
310void
311machine_halt(void)
312{
313 /* treat a halt like a power off */
314 machine_power_off();
315}
316
317EXPORT_SYMBOL(machine_halt);
318
319void machine_power_off(void)
320{
321 if (pm_power_off)
322 pm_power_off();
323}
324
325EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c
new file mode 100644
index 000000000000..23967fe658d3
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_cat.c
@@ -0,0 +1,1178 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 1999,2001
4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 *
7 * linux/arch/i386/kernel/voyager_cat.c
8 *
9 * This file contains all the logic for manipulating the CAT bus
10 * in a level 5 machine.
11 *
12 * The CAT bus is a serial configuration and test bus. Its primary
13 * uses are to probe the initial configuration of the system and to
14 * diagnose error conditions when a system interrupt occurs. The low
15 * level interface is fairly primitive, so most of this file consists
16 * of bit shift manipulations to send and receive packets on the
17 * serial bus */
18
19#include <linux/config.h>
20#include <linux/types.h>
21#include <linux/completion.h>
22#include <linux/sched.h>
23#include <asm/voyager.h>
24#include <asm/vic.h>
25#include <linux/ioport.h>
26#include <linux/init.h>
27#include <linux/slab.h>
28#include <linux/delay.h>
29#include <asm/io.h>
30
31#ifdef VOYAGER_CAT_DEBUG
32#define CDEBUG(x) printk x
33#else
34#define CDEBUG(x)
35#endif
36
37/* the CAT command port */
38#define CAT_CMD (sspb + 0xe)
39/* the CAT data port */
40#define CAT_DATA (sspb + 0xd)
41
42/* the internal cat functions */
43static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data,
44 __u16 num_bits);
45static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data,
46 __u16 num_bits);
47static void cat_build_header(__u8 *header, const __u16 len,
48 const __u16 smallest_reg_bits,
49 const __u16 longest_reg_bits);
50static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp,
51 __u8 reg, __u8 op);
52static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp,
53 __u8 reg, __u8 *value);
54static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes,
55 __u8 pad_bits);
56static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
57 __u8 value);
58static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
59 __u8 *value);
60static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp,
61 __u16 offset, __u16 len, void *buf);
62static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
63 __u8 reg, __u8 value);
64static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp);
65static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp);
66
67static inline const char *
68cat_module_name(int module_id)
69{
70 switch(module_id) {
71 case 0x10:
72 return "Processor Slot 0";
73 case 0x11:
74 return "Processor Slot 1";
75 case 0x12:
76 return "Processor Slot 2";
77 case 0x13:
78 return "Processor Slot 4";
79 case 0x14:
80 return "Memory Slot 0";
81 case 0x15:
82 return "Memory Slot 1";
83 case 0x18:
84 return "Primary Microchannel";
85 case 0x19:
86 return "Secondary Microchannel";
87 case 0x1a:
88 return "Power Supply Interface";
89 case 0x1c:
90 return "Processor Slot 5";
91 case 0x1d:
92 return "Processor Slot 6";
93 case 0x1e:
94 return "Processor Slot 7";
95 case 0x1f:
96 return "Processor Slot 8";
97 default:
98 return "Unknown Module";
99 }
100}
101
102static int sspb = 0; /* stores the super port location */
103int voyager_8slot = 0; /* set to true if a 51xx monster */
104
105voyager_module_t *voyager_cat_list;
106
107/* the I/O port assignments for the VIC and QIC */
108static struct resource vic_res = {
109 "Voyager Interrupt Controller", 0xFC00, 0xFC6F };
110static struct resource qic_res = {
111 "Quad Interrupt Controller", 0xFC70, 0xFCFF };
112
113/* This function is used to pack a data bit stream inside a message.
114 * It writes num_bits of the data buffer in msg starting at start_bit.
115 * Note: This function assumes that any unused bit in the data stream
116 * is set to zero so that the ors will work correctly */
117#define BITS_PER_BYTE 8
118static void
119cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
120{
121 /* compute initial shift needed */
122 const __u16 offset = start_bit % BITS_PER_BYTE;
123 __u16 len = num_bits / BITS_PER_BYTE;
124 __u16 byte = start_bit / BITS_PER_BYTE;
125 __u16 residue = (num_bits % BITS_PER_BYTE) + offset;
126 int i;
127
128 /* adjust if we have more than a byte of residue */
129 if(residue >= BITS_PER_BYTE) {
130 residue -= BITS_PER_BYTE;
131 len++;
132 }
133
134 /* clear out the bits. We assume here that if len==0 then
135 * residue >= offset. This is always true for the catbus
136 * operations */
137 msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
138 msg[byte++] |= data[0] >> offset;
139 if(len == 0)
140 return;
141 for(i = 1; i < len; i++)
142 msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset))
143 | (data[i] >> offset);
144 if(residue != 0) {
145 __u8 mask = 0xff >> residue;
146 __u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset)
147 | (data[i] >> offset);
148
149 last_byte &= ~mask;
150 msg[byte] &= mask;
151 msg[byte] |= last_byte;
152 }
153 return;
154}
155/* unpack the data again (same arguments as cat_pack()). data buffer
156 * must be zero populated.
157 *
158 * Function: given a message string move to start_bit and copy num_bits into
159 * data (starting at bit 0 in data).
160 */
161static void
162cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
163{
164 /* compute initial shift needed */
165 const __u16 offset = start_bit % BITS_PER_BYTE;
166 __u16 len = num_bits / BITS_PER_BYTE;
167 const __u8 last_bits = num_bits % BITS_PER_BYTE;
168 __u16 byte = start_bit / BITS_PER_BYTE;
169 int i;
170
171 if(last_bits != 0)
172 len++;
173
174 /* special case: want < 8 bits from msg and we can get it from
175 * a single byte of the msg */
176 if(len == 0 && BITS_PER_BYTE - offset >= num_bits) {
177 data[0] = msg[byte] << offset;
178 data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
179 return;
180 }
181 for(i = 0; i < len; i++) {
182 /* this annoying if has to be done just in case a read of
183 * msg one beyond the array causes a panic */
184 if(offset != 0) {
185 data[i] = msg[byte++] << offset;
186 data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
187 }
188 else {
189 data[i] = msg[byte++];
190 }
191 }
192 /* do we need to truncate the final byte */
193 if(last_bits != 0) {
194 data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits);
195 }
196 return;
197}
198
199static void
200cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits,
201 const __u16 longest_reg_bits)
202{
203 int i;
204 __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
205 __u8 *last_byte = &header[len - 1];
206
207 if(start_bit == 0)
208 start_bit = 1; /* must have at least one bit in the hdr */
209
210 for(i=0; i < len; i++)
211 header[i] = 0;
212
213 for(i = start_bit; i > 0; i--)
214 *last_byte = ((*last_byte) << 1) + 1;
215
216}
217
218static int
219cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
220{
221 __u8 parity, inst, inst_buf[4] = { 0 };
222 __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
223 __u16 ibytes, hbytes, padbits;
224 int i;
225
226 /*
227 * Parity is the parity of the register number + 1 (READ_REGISTER
228 * and WRITE_REGISTER always add '1' to the number of bits == 1)
229 */
230 parity = (__u8)(1 + (reg & 0x01) +
231 ((__u8)(reg & 0x02) >> 1) +
232 ((__u8)(reg & 0x04) >> 2) +
233 ((__u8)(reg & 0x08) >> 3)) % 2;
234
235 inst = ((parity << 7) | (reg << 2) | op);
236
237 outb(VOYAGER_CAT_IRCYC, CAT_CMD);
238 if(!modp->scan_path_connected) {
239 if(asicp->asic_id != VOYAGER_CAT_ID) {
240 printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
241 return 1;
242 }
243 outb(VOYAGER_CAT_HEADER, CAT_DATA);
244 outb(inst, CAT_DATA);
245 if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
246 CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
247 return 1;
248 }
249 return 0;
250 }
251 ibytes = modp->inst_bits / BITS_PER_BYTE;
252 if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
253 padbits = BITS_PER_BYTE - padbits;
254 ibytes++;
255 }
256 hbytes = modp->largest_reg / BITS_PER_BYTE;
257 if(modp->largest_reg % BITS_PER_BYTE)
258 hbytes++;
259 CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
260 /* initialise the instruction sequence to 0xff */
261 for(i=0; i < ibytes + hbytes; i++)
262 iseq[i] = 0xff;
263 cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
264 cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
265 inst_buf[0] = inst;
266 inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE);
267 cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
268#ifdef VOYAGER_CAT_DEBUG
269 printk("ins = 0x%x, iseq: ", inst);
270 for(i=0; i< ibytes + hbytes; i++)
271 printk("0x%x ", iseq[i]);
272 printk("\n");
273#endif
274 if(cat_shiftout(iseq, ibytes, hbytes, padbits)) {
275 CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
276 return 1;
277 }
278 CDEBUG(("CAT SHIFTOUT DONE\n"));
279 return 0;
280}
281
282static int
283cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
284 __u8 *value)
285{
286 if(!modp->scan_path_connected) {
287 if(asicp->asic_id != VOYAGER_CAT_ID) {
288 CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
289 return 1;
290 }
291 if(reg > VOYAGER_SUBADDRHI)
292 outb(VOYAGER_CAT_RUN, CAT_CMD);
293 outb(VOYAGER_CAT_DRCYC, CAT_CMD);
294 outb(VOYAGER_CAT_HEADER, CAT_DATA);
295 *value = inb(CAT_DATA);
296 outb(0xAA, CAT_DATA);
297 if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
298 CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
299 return 1;
300 }
301 return 0;
302 }
303 else {
304 __u16 sbits = modp->num_asics -1 + asicp->ireg_length;
305 __u16 sbytes = sbits / BITS_PER_BYTE;
306 __u16 tbytes;
307 __u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE];
308 __u8 padbits;
309 int i;
310
311 outb(VOYAGER_CAT_DRCYC, CAT_CMD);
312
313 if((padbits = sbits % BITS_PER_BYTE) != 0) {
314 padbits = BITS_PER_BYTE - padbits;
315 sbytes++;
316 }
317 tbytes = asicp->ireg_length / BITS_PER_BYTE;
318 if(asicp->ireg_length % BITS_PER_BYTE)
319 tbytes++;
320 CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
321 tbytes, sbytes, padbits));
322 cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
323
324
325 for(i = tbytes - 1; i >= 0; i--) {
326 outb(trailer[i], CAT_DATA);
327 string[sbytes + i] = inb(CAT_DATA);
328 }
329
330 for(i = sbytes - 1; i >= 0; i--) {
331 outb(0xaa, CAT_DATA);
332 string[i] = inb(CAT_DATA);
333 }
334 *value = 0;
335 cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length);
336#ifdef VOYAGER_CAT_DEBUG
337 printk("value=0x%x, string: ", *value);
338 for(i=0; i< tbytes+sbytes; i++)
339 printk("0x%x ", string[i]);
340 printk("\n");
341#endif
342
343 /* sanity check the rest of the return */
344 for(i=0; i < tbytes; i++) {
345 __u8 input = 0;
346
347 cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE);
348 if(trailer[i] != input) {
349 CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
350 return 1;
351 }
352 }
353 CDEBUG(("cat_getdata DONE\n"));
354 return 0;
355 }
356}
357
358static int
359cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
360{
361 int i;
362
363 for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
364 outb(data[i], CAT_DATA);
365
366 for(i = header_bytes - 1; i >= 0; i--) {
367 __u8 header = 0;
368 __u8 input;
369
370 outb(data[i], CAT_DATA);
371 input = inb(CAT_DATA);
372 CDEBUG(("cat_shiftout: returned 0x%x\n", input));
373 cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
374 &header, BITS_PER_BYTE);
375 if(input != header) {
376 CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
377 return 1;
378 }
379 }
380 return 0;
381}
382
383static int
384cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
385 __u8 reg, __u8 value)
386{
387 outb(VOYAGER_CAT_DRCYC, CAT_CMD);
388 if(!modp->scan_path_connected) {
389 if(asicp->asic_id != VOYAGER_CAT_ID) {
390 CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
391 return 1;
392 }
393 outb(VOYAGER_CAT_HEADER, CAT_DATA);
394 outb(value, CAT_DATA);
395 if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
396 CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
397 return 1;
398 }
399 if(reg > VOYAGER_SUBADDRHI) {
400 outb(VOYAGER_CAT_RUN, CAT_CMD);
401 outb(VOYAGER_CAT_END, CAT_CMD);
402 outb(VOYAGER_CAT_RUN, CAT_CMD);
403 }
404
405 return 0;
406 }
407 else {
408 __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
409 __u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE;
410 __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
411 hseq[VOYAGER_MAX_REG_SIZE];
412 int i;
413
414 if((padbits = (modp->num_asics - 1
415 + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
416 padbits = BITS_PER_BYTE - padbits;
417 dbytes++;
418 }
419 if(asicp->ireg_length % BITS_PER_BYTE)
420 hbytes++;
421
422 cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
423
424 for(i = 0; i < dbytes + hbytes; i++)
425 dseq[i] = 0xff;
426 CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
427 dbytes, hbytes, padbits));
428 cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
429 hseq, hbytes * BITS_PER_BYTE);
430 cat_pack(dseq, asicp->asic_location, &value,
431 asicp->ireg_length);
432#ifdef VOYAGER_CAT_DEBUG
433 printk("dseq ");
434 for(i=0; i<hbytes+dbytes; i++) {
435 printk("0x%x ", dseq[i]);
436 }
437 printk("\n");
438#endif
439 return cat_shiftout(dseq, dbytes, hbytes, padbits);
440 }
441}
442
443static int
444cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
445 __u8 value)
446{
447 if(cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
448 return 1;
449 return cat_senddata(modp, asicp, reg, value);
450}
451
452static int
453cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
454 __u8 *value)
455{
456 if(cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
457 return 1;
458 return cat_getdata(modp, asicp, reg, value);
459}
460
461static int
462cat_subaddrsetup(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
463 __u16 len)
464{
465 __u8 val;
466
467 if(len > 1) {
468 /* set auto increment */
469 __u8 newval;
470
471 if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
472 CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
473 return 1;
474 }
475 CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val));
476 newval = val | VOYAGER_AUTO_INC;
477 if(newval != val) {
478 if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
479 CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
480 return 1;
481 }
482 }
483 }
484 if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) {
485 CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
486 return 1;
487 }
488 if(asicp->subaddr > VOYAGER_SUBADDR_LO) {
489 if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) {
490 CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
491 return 1;
492 }
493 cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
494 CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val));
495 }
496 cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
497 CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
498 return 0;
499}
500
501static int
502cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
503 __u16 len, void *buf)
504{
505 int i, retval;
506
507 /* FIXME: need special actions for VOYAGER_CAT_ID here */
508 if(asicp->asic_id == VOYAGER_CAT_ID) {
509 CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
510 /* FIXME -- This is supposed to be handled better
511 * There is a problem writing to the cat asic in the
512 * PSI. The 30us delay seems to work, though */
513 udelay(30);
514 }
515
516 if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
517 printk("cat_subwrite: cat_subaddrsetup FAILED\n");
518 return retval;
519 }
520
521 if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
522 printk("cat_subwrite: cat_sendinst FAILED\n");
523 return 1;
524 }
525 for(i = 0; i < len; i++) {
526 if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) {
527 printk("cat_subwrite: cat_sendata element at %d FAILED\n", i);
528 return 1;
529 }
530 }
531 return 0;
532}
533static int
534cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
535 __u16 len, void *buf)
536{
537 int i, retval;
538
539 if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
540 CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
541 return retval;
542 }
543
544 if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
545 CDEBUG(("cat_subread: cat_sendinst failed\n"));
546 return 1;
547 }
548 for(i = 0; i < len; i++) {
549 if(cat_getdata(modp, asicp, 0xFF,
550 &((__u8 *)buf)[i])) {
551 CDEBUG(("cat_subread: cat_getdata element %d failed\n", i));
552 return 1;
553 }
554 }
555 return 0;
556}
557
558
559/* buffer for storing EPROM data read in during initialisation */
560static __initdata __u8 eprom_buf[0xFFFF];
561static voyager_module_t *voyager_initial_module;
562
563/* Initialise the cat bus components. We assume this is called by the
564 * boot cpu *after* all memory initialisation has been done (so we can
565 * use kmalloc) but before smp initialisation, so we can probe the SMP
566 * configuration and pick up necessary information. */
567void
568voyager_cat_init(void)
569{
570 voyager_module_t **modpp = &voyager_initial_module;
571 voyager_asic_t **asicpp;
572 voyager_asic_t *qabc_asic = NULL;
573 int i, j;
574 unsigned long qic_addr = 0;
575 __u8 qabc_data[0x20];
576 __u8 num_submodules, val;
577 voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0];
578
579 __u8 cmos[4];
580 unsigned long addr;
581
582 /* initiallise the SUS mailbox */
583 for(i=0; i<sizeof(cmos); i++)
584 cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
585 addr = *(unsigned long *)cmos;
586 if((addr & 0xff000000) != 0xff000000) {
587 printk(KERN_ERR "Voyager failed to get SUS mailbox (addr = 0x%lx\n", addr);
588 } else {
589 static struct resource res;
590
591 res.name = "voyager SUS";
592 res.start = addr;
593 res.end = addr+0x3ff;
594
595 request_resource(&iomem_resource, &res);
596 voyager_SUS = (struct voyager_SUS *)
597 ioremap(addr, 0x400);
598 printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
599 voyager_SUS->SUS_version);
600 voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
601 voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT;
602 }
603
604 /* clear the processor counts */
605 voyager_extended_vic_processors = 0;
606 voyager_quad_processors = 0;
607
608
609
610 printk("VOYAGER: beginning CAT bus probe\n");
611 /* set up the SuperSet Port Block which tells us where the
612 * CAT communication port is */
613 sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100;
614 VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
615
616 /* now find out if were 8 slot or normal */
617 if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
618 == EIGHT_SLOT_IDENTIFIER) {
619 voyager_8slot = 1;
620 printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n");
621 }
622
623 for(i = VOYAGER_MIN_MODULE;
624 i <= VOYAGER_MAX_MODULE; i++) {
625 __u8 input;
626 int asic;
627 __u16 eprom_size;
628 __u16 sp_offset;
629
630 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
631 outb(i, VOYAGER_CAT_CONFIG_PORT);
632
633 /* check the presence of the module */
634 outb(VOYAGER_CAT_RUN, CAT_CMD);
635 outb(VOYAGER_CAT_IRCYC, CAT_CMD);
636 outb(VOYAGER_CAT_HEADER, CAT_DATA);
637 /* stream series of alternating 1's and 0's to stimulate
638 * response */
639 outb(0xAA, CAT_DATA);
640 input = inb(CAT_DATA);
641 outb(VOYAGER_CAT_END, CAT_CMD);
642 if(input != VOYAGER_CAT_HEADER) {
643 continue;
644 }
645 CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
646 cat_module_name(i)));
647 *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/
648 if(*modpp == NULL) {
649 printk("**WARNING** kmalloc failure in cat_init\n");
650 continue;
651 }
652 memset(*modpp, 0, sizeof(voyager_module_t));
653 /* need temporary asic for cat_subread. It will be
654 * filled in correctly later */
655 (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/
656 if((*modpp)->asic == NULL) {
657 printk("**WARNING** kmalloc failure in cat_init\n");
658 continue;
659 }
660 memset((*modpp)->asic, 0, sizeof(voyager_asic_t));
661 (*modpp)->asic->asic_id = VOYAGER_CAT_ID;
662 (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
663 (*modpp)->module_addr = i;
664 (*modpp)->scan_path_connected = 0;
665 if(i == VOYAGER_PSI) {
666 /* Exception leg for modules with no EEPROM */
667 printk("Module \"%s\"\n", cat_module_name(i));
668 continue;
669 }
670
671 CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
672 outb(VOYAGER_CAT_RUN, CAT_CMD);
673 cat_disconnect(*modpp, (*modpp)->asic);
674 if(cat_subread(*modpp, (*modpp)->asic,
675 VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
676 &eprom_size)) {
677 printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
678 outb(VOYAGER_CAT_END, CAT_CMD);
679 continue;
680 }
681 if(eprom_size > sizeof(eprom_buf)) {
682 printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size);
683 outb(VOYAGER_CAT_END, CAT_CMD);
684 continue;
685 }
686 outb(VOYAGER_CAT_END, CAT_CMD);
687 outb(VOYAGER_CAT_RUN, CAT_CMD);
688 CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
689 if(cat_subread(*modpp, (*modpp)->asic, 0,
690 eprom_size, eprom_buf)) {
691 outb(VOYAGER_CAT_END, CAT_CMD);
692 continue;
693 }
694 outb(VOYAGER_CAT_END, CAT_CMD);
695 printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
696 cat_module_name(i), eprom_hdr->version_id,
697 *((__u32 *)eprom_hdr->tracer), eprom_hdr->num_asics);
698 (*modpp)->ee_size = eprom_hdr->ee_size;
699 (*modpp)->num_asics = eprom_hdr->num_asics;
700 asicpp = &((*modpp)->asic);
701 sp_offset = eprom_hdr->scan_path_offset;
702 /* All we really care about are the Quad cards. We
703 * identify them because they are in a processor slot
704 * and have only four asics */
705 if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) {
706 modpp = &((*modpp)->next);
707 continue;
708 }
709 /* Now we know it's in a processor slot, does it have
710 * a quad baseboard submodule */
711 outb(VOYAGER_CAT_RUN, CAT_CMD);
712 cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT,
713 &num_submodules);
714 /* lowest two bits, active low */
715 num_submodules = ~(0xfc | num_submodules);
716 CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules));
717 if(num_submodules == 0) {
718 /* fill in the dyadic extended processors */
719 __u8 cpu = i & 0x07;
720
721 printk("Module \"%s\": Dyadic Processor Card\n",
722 cat_module_name(i));
723 voyager_extended_vic_processors |= (1<<cpu);
724 cpu += 4;
725 voyager_extended_vic_processors |= (1<<cpu);
726 outb(VOYAGER_CAT_END, CAT_CMD);
727 continue;
728 }
729
730 /* now we want to read the asics on the first submodule,
731 * which should be the quad base board */
732
733 cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val);
734 CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val));
735 val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD;
736 cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
737
738 outb(VOYAGER_CAT_END, CAT_CMD);
739
740
741 CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
742 outb(VOYAGER_CAT_RUN, CAT_CMD);
743 cat_disconnect(*modpp, (*modpp)->asic);
744 if(cat_subread(*modpp, (*modpp)->asic,
745 VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
746 &eprom_size)) {
747 printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
748 outb(VOYAGER_CAT_END, CAT_CMD);
749 continue;
750 }
751 if(eprom_size > sizeof(eprom_buf)) {
752 printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size);
753 outb(VOYAGER_CAT_END, CAT_CMD);
754 continue;
755 }
756 outb(VOYAGER_CAT_END, CAT_CMD);
757 outb(VOYAGER_CAT_RUN, CAT_CMD);
758 CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
759 if(cat_subread(*modpp, (*modpp)->asic, 0,
760 eprom_size, eprom_buf)) {
761 outb(VOYAGER_CAT_END, CAT_CMD);
762 continue;
763 }
764 outb(VOYAGER_CAT_END, CAT_CMD);
765 /* Now do everything for the QBB submodule 1 */
766 (*modpp)->ee_size = eprom_hdr->ee_size;
767 (*modpp)->num_asics = eprom_hdr->num_asics;
768 asicpp = &((*modpp)->asic);
769 sp_offset = eprom_hdr->scan_path_offset;
770 /* get rid of the dummy CAT asic and read the real one */
771 kfree((*modpp)->asic);
772 for(asic=0; asic < (*modpp)->num_asics; asic++) {
773 int j;
774 voyager_asic_t *asicp = *asicpp
775 = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
776 voyager_sp_table_t *sp_table;
777 voyager_at_t *asic_table;
778 voyager_jtt_t *jtag_table;
779
780 if(asicp == NULL) {
781 printk("**WARNING** kmalloc failure in cat_init\n");
782 continue;
783 }
784 memset(asicp, 0, sizeof(voyager_asic_t));
785 asicpp = &(asicp->next);
786 asicp->asic_location = asic;
787 sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset);
788 asicp->asic_id = sp_table->asic_id;
789 asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset);
790 for(j=0; j<4; j++)
791 asicp->jtag_id[j] = asic_table->jtag_id[j];
792 jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset);
793 asicp->ireg_length = jtag_table->ireg_len;
794 asicp->bit_location = (*modpp)->inst_bits;
795 (*modpp)->inst_bits += asicp->ireg_length;
796 if(asicp->ireg_length > (*modpp)->largest_reg)
797 (*modpp)->largest_reg = asicp->ireg_length;
798 if (asicp->ireg_length < (*modpp)->smallest_reg ||
799 (*modpp)->smallest_reg == 0)
800 (*modpp)->smallest_reg = asicp->ireg_length;
801 CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
802 asicp->asic_id, asicp->ireg_length,
803 asicp->bit_location));
804 if(asicp->asic_id == VOYAGER_QUAD_QABC) {
805 CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
806 qabc_asic = asicp;
807 }
808 sp_offset += sizeof(voyager_sp_table_t);
809 }
810 CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n",
811 (*modpp)->inst_bits, (*modpp)->largest_reg,
812 (*modpp)->smallest_reg));
813 /* OK, now we have the QUAD ASICs set up, use them.
814 * we need to:
815 *
816 * 1. Find the Memory area for the Quad CPIs.
817 * 2. Find the Extended VIC processor
818 * 3. Configure a second extended VIC processor (This
819 * cannot be done for the 51xx.
820 * */
821 outb(VOYAGER_CAT_RUN, CAT_CMD);
822 cat_connect(*modpp, (*modpp)->asic);
823 CDEBUG(("CAT CONNECTED!!\n"));
824 cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data);
825 qic_addr = qabc_data[5] << 8;
826 qic_addr = (qic_addr | qabc_data[6]) << 8;
827 qic_addr = (qic_addr | qabc_data[7]) << 8;
828 printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
829 cat_module_name(i), qic_addr, qabc_data[8]);
830#if 0 /* plumbing fails---FIXME */
831 if((qabc_data[8] & 0xf0) == 0) {
832 /* FIXME: 32 way 8 CPU slot monster cannot be
833 * plumbed this way---need to check for it */
834
835 printk("Plumbing second Extended Quad Processor\n");
836 /* second VIC line hardwired to Quad CPU 1 */
837 qabc_data[8] |= 0x20;
838 cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
839#ifdef VOYAGER_CAT_DEBUG
840 /* verify plumbing */
841 cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
842 if((qabc_data[8] & 0xf0) == 0) {
843 CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8]));
844 }
845#endif
846 }
847#endif
848
849 {
850 struct resource *res = kmalloc(sizeof(struct resource),GFP_KERNEL);
851 memset(res, 0, sizeof(struct resource));
852 res->name = kmalloc(128, GFP_KERNEL);
853 sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i));
854 res->start = qic_addr;
855 res->end = qic_addr + 0x3ff;
856 request_resource(&iomem_resource, res);
857 }
858
859 qic_addr = (unsigned long)ioremap(qic_addr, 0x400);
860
861 for(j = 0; j < 4; j++) {
862 __u8 cpu;
863
864 if(voyager_8slot) {
865 /* 8 slot has a different mapping,
866 * each slot has only one vic line, so
867 * 1 cpu in each slot must be < 8 */
868 cpu = (i & 0x07) + j*8;
869 } else {
870 cpu = (i & 0x03) + j*4;
871 }
872 if( (qabc_data[8] & (1<<j))) {
873 voyager_extended_vic_processors |= (1<<cpu);
874 }
875 if(qabc_data[8] & (1<<(j+4)) ) {
876 /* Second SET register plumbed: Quad
877 * card has two VIC connected CPUs.
878 * Secondary cannot be booted as a VIC
879 * CPU */
880 voyager_extended_vic_processors |= (1<<cpu);
881 voyager_allowed_boot_processors &= (~(1<<cpu));
882 }
883
884 voyager_quad_processors |= (1<<cpu);
885 voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
886 (qic_addr+(j<<8));
887 CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
888 (unsigned long)voyager_quad_cpi_addr[cpu]));
889 }
890 outb(VOYAGER_CAT_END, CAT_CMD);
891
892
893
894 *asicpp = NULL;
895 modpp = &((*modpp)->next);
896 }
897 *modpp = NULL;
898 printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors);
899 request_resource(&ioport_resource, &vic_res);
900 if(voyager_quad_processors)
901 request_resource(&ioport_resource, &qic_res);
902 /* set up the front power switch */
903}
904
905int
906voyager_cat_readb(__u8 module, __u8 asic, int reg)
907{
908 return 0;
909}
910
911static int
912cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp)
913{
914 __u8 val;
915 int err = 0;
916
917 if(!modp->scan_path_connected)
918 return 0;
919 if(asicp->asic_id != VOYAGER_CAT_ID) {
920 CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
921 return 1;
922 }
923 err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
924 if(err) {
925 CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
926 return err;
927 }
928 val &= VOYAGER_DISCONNECT_ASIC;
929 err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
930 if(err) {
931 CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
932 return err;
933 }
934 outb(VOYAGER_CAT_END, CAT_CMD);
935 outb(VOYAGER_CAT_RUN, CAT_CMD);
936 modp->scan_path_connected = 0;
937
938 return 0;
939}
940
941static int
942cat_connect(voyager_module_t *modp, voyager_asic_t *asicp)
943{
944 __u8 val;
945 int err = 0;
946
947 if(modp->scan_path_connected)
948 return 0;
949 if(asicp->asic_id != VOYAGER_CAT_ID) {
950 CDEBUG(("cat_connect: ASIC is not CAT\n"));
951 return 1;
952 }
953
954 err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
955 if(err) {
956 CDEBUG(("cat_connect: failed to read SCANPATH\n"));
957 return err;
958 }
959 val |= VOYAGER_CONNECT_ASIC;
960 err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
961 if(err) {
962 CDEBUG(("cat_connect: failed to write SCANPATH\n"));
963 return err;
964 }
965 outb(VOYAGER_CAT_END, CAT_CMD);
966 outb(VOYAGER_CAT_RUN, CAT_CMD);
967 modp->scan_path_connected = 1;
968
969 return 0;
970}
971
972void
973voyager_cat_power_off(void)
974{
975 /* Power the machine off by writing to the PSI over the CAT
976 * bus */
977 __u8 data;
978 voyager_module_t psi = { 0 };
979 voyager_asic_t psi_asic = { 0 };
980
981 psi.asic = &psi_asic;
982 psi.asic->asic_id = VOYAGER_CAT_ID;
983 psi.asic->subaddr = VOYAGER_SUBADDR_HI;
984 psi.module_addr = VOYAGER_PSI;
985 psi.scan_path_connected = 0;
986
987 outb(VOYAGER_CAT_END, CAT_CMD);
988 /* Connect the PSI to the CAT Bus */
989 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
990 outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
991 outb(VOYAGER_CAT_RUN, CAT_CMD);
992 cat_disconnect(&psi, &psi_asic);
993 /* Read the status */
994 cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
995 outb(VOYAGER_CAT_END, CAT_CMD);
996 CDEBUG(("PSI STATUS 0x%x\n", data));
997 /* These two writes are power off prep and perform */
998 data = PSI_CLEAR;
999 outb(VOYAGER_CAT_RUN, CAT_CMD);
1000 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
1001 outb(VOYAGER_CAT_END, CAT_CMD);
1002 data = PSI_POWER_DOWN;
1003 outb(VOYAGER_CAT_RUN, CAT_CMD);
1004 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
1005 outb(VOYAGER_CAT_END, CAT_CMD);
1006}
1007
1008struct voyager_status voyager_status = { 0 };
1009
1010void
1011voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
1012{
1013 voyager_module_t psi = { 0 };
1014 voyager_asic_t psi_asic = { 0 };
1015
1016 psi.asic = &psi_asic;
1017 psi.asic->asic_id = VOYAGER_CAT_ID;
1018 psi.asic->subaddr = VOYAGER_SUBADDR_HI;
1019 psi.module_addr = VOYAGER_PSI;
1020 psi.scan_path_connected = 0;
1021
1022 outb(VOYAGER_CAT_END, CAT_CMD);
1023 /* Connect the PSI to the CAT Bus */
1024 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
1025 outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
1026 outb(VOYAGER_CAT_RUN, CAT_CMD);
1027 cat_disconnect(&psi, &psi_asic);
1028 switch(cmd) {
1029 case VOYAGER_PSI_READ:
1030 cat_read(&psi, &psi_asic, reg, data);
1031 break;
1032 case VOYAGER_PSI_WRITE:
1033 cat_write(&psi, &psi_asic, reg, *data);
1034 break;
1035 case VOYAGER_PSI_SUBREAD:
1036 cat_subread(&psi, &psi_asic, reg, 1, data);
1037 break;
1038 case VOYAGER_PSI_SUBWRITE:
1039 cat_subwrite(&psi, &psi_asic, reg, 1, data);
1040 break;
1041 default:
1042 printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd);
1043 break;
1044 }
1045 outb(VOYAGER_CAT_END, CAT_CMD);
1046}
1047
1048void
1049voyager_cat_do_common_interrupt(void)
1050{
1051 /* This is caused either by a memory parity error or something
1052 * in the PSI */
1053 __u8 data;
1054 voyager_module_t psi = { 0 };
1055 voyager_asic_t psi_asic = { 0 };
1056 struct voyager_psi psi_reg;
1057 int i;
1058 re_read:
1059 psi.asic = &psi_asic;
1060 psi.asic->asic_id = VOYAGER_CAT_ID;
1061 psi.asic->subaddr = VOYAGER_SUBADDR_HI;
1062 psi.module_addr = VOYAGER_PSI;
1063 psi.scan_path_connected = 0;
1064
1065 outb(VOYAGER_CAT_END, CAT_CMD);
1066 /* Connect the PSI to the CAT Bus */
1067 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
1068 outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
1069 outb(VOYAGER_CAT_RUN, CAT_CMD);
1070 cat_disconnect(&psi, &psi_asic);
1071 /* Read the status. NOTE: Need to read *all* the PSI regs here
1072 * otherwise the cmn int will be reasserted */
1073 for(i = 0; i < sizeof(psi_reg.regs); i++) {
1074 cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]);
1075 }
1076 outb(VOYAGER_CAT_END, CAT_CMD);
1077 if((psi_reg.regs.checkbit & 0x02) == 0) {
1078 psi_reg.regs.checkbit |= 0x02;
1079 cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
1080 printk("VOYAGER RE-READ PSI\n");
1081 goto re_read;
1082 }
1083 outb(VOYAGER_CAT_RUN, CAT_CMD);
1084 for(i = 0; i < sizeof(psi_reg.subregs); i++) {
1085 /* This looks strange, but the PSI doesn't do auto increment
1086 * correctly */
1087 cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
1088 1, &((__u8 *)&psi_reg.subregs)[i]);
1089 }
1090 outb(VOYAGER_CAT_END, CAT_CMD);
1091#ifdef VOYAGER_CAT_DEBUG
1092 printk("VOYAGER PSI: ");
1093 for(i=0; i<sizeof(psi_reg.regs); i++)
1094 printk("%02x ", ((__u8 *)&psi_reg.regs)[i]);
1095 printk("\n ");
1096 for(i=0; i<sizeof(psi_reg.subregs); i++)
1097 printk("%02x ", ((__u8 *)&psi_reg.subregs)[i]);
1098 printk("\n");
1099#endif
1100 if(psi_reg.regs.intstatus & PSI_MON) {
1101 /* switch off or power fail */
1102
1103 if(psi_reg.subregs.supply & PSI_SWITCH_OFF) {
1104 if(voyager_status.switch_off) {
1105 printk(KERN_ERR "Voyager front panel switch turned off again---Immediate power off!\n");
1106 voyager_cat_power_off();
1107 /* not reached */
1108 } else {
1109 printk(KERN_ERR "Voyager front panel switch turned off\n");
1110 voyager_status.switch_off = 1;
1111 voyager_status.request_from_kernel = 1;
1112 up(&kvoyagerd_sem);
1113 }
1114 /* Tell the hardware we're taking care of the
1115 * shutdown, otherwise it will power the box off
1116 * within 3 seconds of the switch being pressed and,
1117 * which is much more important to us, continue to
1118 * assert the common interrupt */
1119 data = PSI_CLR_SWITCH_OFF;
1120 outb(VOYAGER_CAT_RUN, CAT_CMD);
1121 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG,
1122 1, &data);
1123 outb(VOYAGER_CAT_END, CAT_CMD);
1124 } else {
1125
1126 VDEBUG(("Voyager ac fail reg 0x%x\n",
1127 psi_reg.subregs.ACfail));
1128 if((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
1129 /* No further update */
1130 return;
1131 }
1132#if 0
1133 /* Don't bother trying to find out who failed.
1134 * FIXME: This probably makes the code incorrect on
1135 * anything other than a 345x */
1136 for(i=0; i< 5; i++) {
1137 if( psi_reg.subregs.ACfail &(1<<i)) {
1138 break;
1139 }
1140 }
1141 printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
1142#endif
1143 /* DON'T do this: it shuts down the AC PSI
1144 outb(VOYAGER_CAT_RUN, CAT_CMD);
1145 data = PSI_MASK_MASK | i;
1146 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
1147 1, &data);
1148 outb(VOYAGER_CAT_END, CAT_CMD);
1149 */
1150 printk(KERN_ERR "Voyager AC power failure\n");
1151 outb(VOYAGER_CAT_RUN, CAT_CMD);
1152 data = PSI_COLD_START;
1153 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG,
1154 1, &data);
1155 outb(VOYAGER_CAT_END, CAT_CMD);
1156 voyager_status.power_fail = 1;
1157 voyager_status.request_from_kernel = 1;
1158 up(&kvoyagerd_sem);
1159 }
1160
1161
1162 } else if(psi_reg.regs.intstatus & PSI_FAULT) {
1163 /* Major fault! */
1164 printk(KERN_ERR "Voyager PSI Detected major fault, immediate power off!\n");
1165 voyager_cat_power_off();
1166 /* not reached */
1167 } else if(psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
1168 | PSI_CURRENT | PSI_DVM
1169 | PSI_PSCFAULT | PSI_STAT_CHG)) {
1170 /* other psi fault */
1171
1172 printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
1173 /* clear the PSI fault */
1174 outb(VOYAGER_CAT_RUN, CAT_CMD);
1175 cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0);
1176 outb(VOYAGER_CAT_END, CAT_CMD);
1177 }
1178}
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
new file mode 100644
index 000000000000..903d739ca74a
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -0,0 +1,1931 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 1999,2001
4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 *
7 * linux/arch/i386/kernel/voyager_smp.c
8 *
9 * This file provides all the same external entries as smp.c but uses
10 * the voyager hal to provide the functionality
11 */
12#include <linux/config.h>
13#include <linux/mm.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/mc146818rtc.h>
17#include <linux/cache.h>
18#include <linux/interrupt.h>
19#include <linux/smp_lock.h>
20#include <linux/init.h>
21#include <linux/kernel.h>
22#include <linux/bootmem.h>
23#include <linux/completion.h>
24#include <asm/desc.h>
25#include <asm/voyager.h>
26#include <asm/vic.h>
27#include <asm/mtrr.h>
28#include <asm/pgalloc.h>
29#include <asm/tlbflush.h>
30#include <asm/arch_hooks.h>
31
32#include <linux/irq.h>
33
34/* TLB state -- visible externally, indexed physically */
35DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 };
36
37/* CPU IRQ affinity -- set to all ones initially */
38static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = ~0UL };
39
40/* per CPU data structure (for /proc/cpuinfo et al), visible externally
41 * indexed physically */
42struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
43
44/* physical ID of the CPU used to boot the system */
45unsigned char boot_cpu_id;
46
47/* The memory line addresses for the Quad CPIs */
48struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned;
49
50/* The masks for the Extended VIC processors, filled in by cat_init */
51__u32 voyager_extended_vic_processors = 0;
52
53/* Masks for the extended Quad processors which cannot be VIC booted */
54__u32 voyager_allowed_boot_processors = 0;
55
56/* The mask for the Quad Processors (both extended and non-extended) */
57__u32 voyager_quad_processors = 0;
58
59/* Total count of live CPUs, used in process.c to display
60 * the CPU information and in irq.c for the per CPU irq
61 * activity count. Finally exported by i386_ksyms.c */
62static int voyager_extended_cpus = 1;
63
64/* Have we found an SMP box - used by time.c to do the profiling
65 interrupt for timeslicing; do not set to 1 until the per CPU timer
66 interrupt is active */
67int smp_found_config = 0;
68
69/* Used for the invalidate map that's also checked in the spinlock */
70static volatile unsigned long smp_invalidate_needed;
71
72/* Bitmask of currently online CPUs - used by setup.c for
73 /proc/cpuinfo, visible externally but still physical */
74cpumask_t cpu_online_map = CPU_MASK_NONE;
75
76/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
77 * by scheduler but indexed physically */
78cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
79
80
81/* The internal functions */
82static void send_CPI(__u32 cpuset, __u8 cpi);
83static void ack_CPI(__u8 cpi);
84static int ack_QIC_CPI(__u8 cpi);
85static void ack_special_QIC_CPI(__u8 cpi);
86static void ack_VIC_CPI(__u8 cpi);
87static void send_CPI_allbutself(__u8 cpi);
88static void enable_vic_irq(unsigned int irq);
89static void disable_vic_irq(unsigned int irq);
90static unsigned int startup_vic_irq(unsigned int irq);
91static void enable_local_vic_irq(unsigned int irq);
92static void disable_local_vic_irq(unsigned int irq);
93static void before_handle_vic_irq(unsigned int irq);
94static void after_handle_vic_irq(unsigned int irq);
95static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask);
96static void ack_vic_irq(unsigned int irq);
97static void vic_enable_cpi(void);
98static void do_boot_cpu(__u8 cpuid);
99static void do_quad_bootstrap(void);
100static inline void wrapper_smp_local_timer_interrupt(struct pt_regs *);
101
102int hard_smp_processor_id(void);
103
104/* Inline functions */
105static inline void
106send_one_QIC_CPI(__u8 cpu, __u8 cpi)
107{
108 voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
109 (smp_processor_id() << 16) + cpi;
110}
111
112static inline void
113send_QIC_CPI(__u32 cpuset, __u8 cpi)
114{
115 int cpu;
116
117 for_each_online_cpu(cpu) {
118 if(cpuset & (1<<cpu)) {
119#ifdef VOYAGER_DEBUG
120 if(!cpu_isset(cpu, cpu_online_map))
121 VDEBUG(("CPU%d sending cpi %d to CPU%d not in cpu_online_map\n", hard_smp_processor_id(), cpi, cpu));
122#endif
123 send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
124 }
125 }
126}
127
128static inline void
129send_one_CPI(__u8 cpu, __u8 cpi)
130{
131 if(voyager_quad_processors & (1<<cpu))
132 send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
133 else
134 send_CPI(1<<cpu, cpi);
135}
136
137static inline void
138send_CPI_allbutself(__u8 cpi)
139{
140 __u8 cpu = smp_processor_id();
141 __u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
142 send_CPI(mask, cpi);
143}
144
145static inline int
146is_cpu_quad(void)
147{
148 __u8 cpumask = inb(VIC_PROC_WHO_AM_I);
149 return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
150}
151
152static inline int
153is_cpu_extended(void)
154{
155 __u8 cpu = hard_smp_processor_id();
156
157 return(voyager_extended_vic_processors & (1<<cpu));
158}
159
160static inline int
161is_cpu_vic_boot(void)
162{
163 __u8 cpu = hard_smp_processor_id();
164
165 return(voyager_extended_vic_processors
166 & voyager_allowed_boot_processors & (1<<cpu));
167}
168
169
170static inline void
171ack_CPI(__u8 cpi)
172{
173 switch(cpi) {
174 case VIC_CPU_BOOT_CPI:
175 if(is_cpu_quad() && !is_cpu_vic_boot())
176 ack_QIC_CPI(cpi);
177 else
178 ack_VIC_CPI(cpi);
179 break;
180 case VIC_SYS_INT:
181 case VIC_CMN_INT:
182 /* These are slightly strange. Even on the Quad card,
183 * They are vectored as VIC CPIs */
184 if(is_cpu_quad())
185 ack_special_QIC_CPI(cpi);
186 else
187 ack_VIC_CPI(cpi);
188 break;
189 default:
190 printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi);
191 break;
192 }
193}
194
195/* local variables */
196
197/* The VIC IRQ descriptors -- these look almost identical to the
198 * 8259 IRQs except that masks and things must be kept per processor
199 */
200static struct hw_interrupt_type vic_irq_type = {
201 .typename = "VIC-level",
202 .startup = startup_vic_irq,
203 .shutdown = disable_vic_irq,
204 .enable = enable_vic_irq,
205 .disable = disable_vic_irq,
206 .ack = before_handle_vic_irq,
207 .end = after_handle_vic_irq,
208 .set_affinity = set_vic_irq_affinity,
209};
210
211/* used to count up as CPUs are brought on line (starts at 0) */
212static int cpucount = 0;
213
214/* steal a page from the bottom of memory for the trampoline and
215 * squirrel its address away here. This will be in kernel virtual
216 * space */
217static __u32 trampoline_base;
218
219/* The per cpu profile stuff - used in smp_local_timer_interrupt */
220static DEFINE_PER_CPU(int, prof_multiplier) = 1;
221static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
222static DEFINE_PER_CPU(int, prof_counter) = 1;
223
224/* the map used to check if a CPU has booted */
225static __u32 cpu_booted_map;
226
227/* the synchronize flag used to hold all secondary CPUs spinning in
228 * a tight loop until the boot sequence is ready for them */
229static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
230
231/* This is for the new dynamic CPU boot code */
232cpumask_t cpu_callin_map = CPU_MASK_NONE;
233cpumask_t cpu_callout_map = CPU_MASK_NONE;
234
235/* The per processor IRQ masks (these are usually kept in sync) */
236static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
237
238/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */
239static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
240
241/* Lock for enable/disable of VIC interrupts */
242static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
243
244/* The boot processor is correctly set up in PC mode when it
245 * comes up, but the secondaries need their master/slave 8259
246 * pairs initializing correctly */
247
248/* Interrupt counters (per cpu) and total - used to try to
249 * even up the interrupt handling routines */
250static long vic_intr_total = 0;
251static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 };
252static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
253
254/* Since we can only use CPI0, we fake all the other CPIs */
255static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
256
257/* debugging routine to read the isr of the cpu's pic */
258static inline __u16
259vic_read_isr(void)
260{
261 __u16 isr;
262
263 outb(0x0b, 0xa0);
264 isr = inb(0xa0) << 8;
265 outb(0x0b, 0x20);
266 isr |= inb(0x20);
267
268 return isr;
269}
270
271static __init void
272qic_setup(void)
273{
274 if(!is_cpu_quad()) {
275 /* not a quad, no setup */
276 return;
277 }
278 outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
279 outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
280
281 if(is_cpu_extended()) {
282 /* the QIC duplicate of the VIC base register */
283 outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
284 outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
285
286 /* FIXME: should set up the QIC timer and memory parity
287 * error vectors here */
288 }
289}
290
291static __init void
292vic_setup_pic(void)
293{
294 outb(1, VIC_REDIRECT_REGISTER_1);
295 /* clear the claim registers for dynamic routing */
296 outb(0, VIC_CLAIM_REGISTER_0);
297 outb(0, VIC_CLAIM_REGISTER_1);
298
299 outb(0, VIC_PRIORITY_REGISTER);
300 /* Set the Primary and Secondary Microchannel vector
301 * bases to be the same as the ordinary interrupts
302 *
303 * FIXME: This would be more efficient using separate
304 * vectors. */
305 outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
306 outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
307 /* Now initiallise the master PIC belonging to this CPU by
308 * sending the four ICWs */
309
310 /* ICW1: level triggered, ICW4 needed */
311 outb(0x19, 0x20);
312
313 /* ICW2: vector base */
314 outb(FIRST_EXTERNAL_VECTOR, 0x21);
315
316 /* ICW3: slave at line 2 */
317 outb(0x04, 0x21);
318
319 /* ICW4: 8086 mode */
320 outb(0x01, 0x21);
321
322 /* now the same for the slave PIC */
323
324 /* ICW1: level trigger, ICW4 needed */
325 outb(0x19, 0xA0);
326
327 /* ICW2: slave vector base */
328 outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
329
330 /* ICW3: slave ID */
331 outb(0x02, 0xA1);
332
333 /* ICW4: 8086 mode */
334 outb(0x01, 0xA1);
335}
336
337static void
338do_quad_bootstrap(void)
339{
340 if(is_cpu_quad() && is_cpu_vic_boot()) {
341 int i;
342 unsigned long flags;
343 __u8 cpuid = hard_smp_processor_id();
344
345 local_irq_save(flags);
346
347 for(i = 0; i<4; i++) {
348 /* FIXME: this would be >>3 &0x7 on the 32 way */
349 if(((cpuid >> 2) & 0x03) == i)
350 /* don't lower our own mask! */
351 continue;
352
353 /* masquerade as local Quad CPU */
354 outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID);
355 /* enable the startup CPI */
356 outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1);
357 /* restore cpu id */
358 outb(0, QIC_PROCESSOR_ID);
359 }
360 local_irq_restore(flags);
361 }
362}
363
364
365/* Set up all the basic stuff: read the SMP config and make all the
366 * SMP information reflect only the boot cpu. All others will be
367 * brought on-line later. */
368void __init
369find_smp_config(void)
370{
371 int i;
372
373 boot_cpu_id = hard_smp_processor_id();
374
375 printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
376
377 /* initialize the CPU structures (moved from smp_boot_cpus) */
378 for(i=0; i<NR_CPUS; i++) {
379 cpu_irq_affinity[i] = ~0;
380 }
381 cpu_online_map = cpumask_of_cpu(boot_cpu_id);
382
383 /* The boot CPU must be extended */
384 voyager_extended_vic_processors = 1<<boot_cpu_id;
385 /* initially, all of the first 8 cpu's can boot */
386 voyager_allowed_boot_processors = 0xff;
387 /* set up everything for just this CPU, we can alter
388 * this as we start the other CPUs later */
389 /* now get the CPU disposition from the extended CMOS */
390 cpus_addr(phys_cpu_present_map)[0] = voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
391 cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
392 cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16;
393 cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24;
394 printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]);
395 /* Here we set up the VIC to enable SMP */
396 /* enable the CPIs by writing the base vector to their register */
397 outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
398 outb(1, VIC_REDIRECT_REGISTER_1);
399 /* set the claim registers for static routing --- Boot CPU gets
400 * all interrupts untill all other CPUs started */
401 outb(0xff, VIC_CLAIM_REGISTER_0);
402 outb(0xff, VIC_CLAIM_REGISTER_1);
403 /* Set the Primary and Secondary Microchannel vector
404 * bases to be the same as the ordinary interrupts
405 *
406 * FIXME: This would be more efficient using separate
407 * vectors. */
408 outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
409 outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
410
411 /* Finally tell the firmware that we're driving */
412 outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG,
413 VOYAGER_SUS_IN_CONTROL_PORT);
414
415 current_thread_info()->cpu = boot_cpu_id;
416}
417
418/*
419 * The bootstrap kernel entry code has set these up. Save them
420 * for a given CPU, id is physical */
421void __init
422smp_store_cpu_info(int id)
423{
424 struct cpuinfo_x86 *c=&cpu_data[id];
425
426 *c = boot_cpu_data;
427
428 identify_cpu(c);
429}
430
431/* set up the trampoline and return the physical address of the code */
432static __u32 __init
433setup_trampoline(void)
434{
435 /* these two are global symbols in trampoline.S */
436 extern __u8 trampoline_end[];
437 extern __u8 trampoline_data[];
438
439 memcpy((__u8 *)trampoline_base, trampoline_data,
440 trampoline_end - trampoline_data);
441 return virt_to_phys((__u8 *)trampoline_base);
442}
443
444/* Routine initially called when a non-boot CPU is brought online */
445static void __init
446start_secondary(void *unused)
447{
448 __u8 cpuid = hard_smp_processor_id();
449 /* external functions not defined in the headers */
450 extern void calibrate_delay(void);
451
452 cpu_init();
453
454 /* OK, we're in the routine */
455 ack_CPI(VIC_CPU_BOOT_CPI);
456
457 /* setup the 8259 master slave pair belonging to this CPU ---
458 * we won't actually receive any until the boot CPU
459 * relinquishes it's static routing mask */
460 vic_setup_pic();
461
462 qic_setup();
463
464 if(is_cpu_quad() && !is_cpu_vic_boot()) {
465 /* clear the boot CPI */
466 __u8 dummy;
467
468 dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
469 printk("read dummy %d\n", dummy);
470 }
471
472 /* lower the mask to receive CPIs */
473 vic_enable_cpi();
474
475 VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
476
477 /* enable interrupts */
478 local_irq_enable();
479
480 /* get our bogomips */
481 calibrate_delay();
482
483 /* save our processor parameters */
484 smp_store_cpu_info(cpuid);
485
486 /* if we're a quad, we may need to bootstrap other CPUs */
487 do_quad_bootstrap();
488
489 /* FIXME: this is rather a poor hack to prevent the CPU
490 * activating softirqs while it's supposed to be waiting for
491 * permission to proceed. Without this, the new per CPU stuff
492 * in the softirqs will fail */
493 local_irq_disable();
494 cpu_set(cpuid, cpu_callin_map);
495
496 /* signal that we're done */
497 cpu_booted_map = 1;
498
499 while (!cpu_isset(cpuid, smp_commenced_mask))
500 rep_nop();
501 local_irq_enable();
502
503 local_flush_tlb();
504
505 cpu_set(cpuid, cpu_online_map);
506 wmb();
507 cpu_idle();
508}
509
510
511/* Routine to kick start the given CPU and wait for it to report ready
512 * (or timeout in startup). When this routine returns, the requested
513 * CPU is either fully running and configured or known to be dead.
514 *
515 * We call this routine sequentially 1 CPU at a time, so no need for
516 * locking */
517
518static void __init
519do_boot_cpu(__u8 cpu)
520{
521 struct task_struct *idle;
522 int timeout;
523 unsigned long flags;
524 int quad_boot = (1<<cpu) & voyager_quad_processors
525 & ~( voyager_extended_vic_processors
526 & voyager_allowed_boot_processors);
527
528 /* For the 486, we can't use the 4Mb page table trick, so
529 * must map a region of memory */
530#ifdef CONFIG_M486
531 int i;
532 unsigned long *page_table_copies = (unsigned long *)
533 __get_free_page(GFP_KERNEL);
534#endif
535 pgd_t orig_swapper_pg_dir0;
536
537 /* This is an area in head.S which was used to set up the
538 * initial kernel stack. We need to alter this to give the
539 * booting CPU a new stack (taken from its idle process) */
540 extern struct {
541 __u8 *esp;
542 unsigned short ss;
543 } stack_start;
544 /* This is the format of the CPI IDT gate (in real mode) which
545 * we're hijacking to boot the CPU */
546 union IDTFormat {
547 struct seg {
548 __u16 Offset;
549 __u16 Segment;
550 } idt;
551 __u32 val;
552 } hijack_source;
553
554 __u32 *hijack_vector;
555 __u32 start_phys_address = setup_trampoline();
556
557 /* There's a clever trick to this: The linux trampoline is
558 * compiled to begin at absolute location zero, so make the
559 * address zero but have the data segment selector compensate
560 * for the actual address */
561 hijack_source.idt.Offset = start_phys_address & 0x000F;
562 hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF;
563
564 cpucount++;
565 idle = fork_idle(cpu);
566 if(IS_ERR(idle))
567 panic("failed fork for CPU%d", cpu);
568 idle->thread.eip = (unsigned long) start_secondary;
569 /* init_tasks (in sched.c) is indexed logically */
570 stack_start.esp = (void *) idle->thread.esp;
571
572 irq_ctx_init(cpu);
573
574 /* Note: Don't modify initial ss override */
575 VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
576 (unsigned long)hijack_source.val, hijack_source.idt.Segment,
577 hijack_source.idt.Offset, stack_start.esp));
578 /* set the original swapper_pg_dir[0] to map 0 to 4Mb transparently
579 * (so that the booting CPU can find start_32 */
580 orig_swapper_pg_dir0 = swapper_pg_dir[0];
581#ifdef CONFIG_M486
582 if(page_table_copies == NULL)
583 panic("No free memory for 486 page tables\n");
584 for(i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++)
585 page_table_copies[i] = (i * PAGE_SIZE)
586 | _PAGE_RW | _PAGE_USER | _PAGE_PRESENT;
587
588 ((unsigned long *)swapper_pg_dir)[0] =
589 ((virt_to_phys(page_table_copies)) & PAGE_MASK)
590 | _PAGE_RW | _PAGE_USER | _PAGE_PRESENT;
591#else
592 ((unsigned long *)swapper_pg_dir)[0] =
593 (virt_to_phys(pg0) & PAGE_MASK)
594 | _PAGE_RW | _PAGE_USER | _PAGE_PRESENT;
595#endif
596
597 if(quad_boot) {
598 printk("CPU %d: non extended Quad boot\n", cpu);
599 hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4);
600 *hijack_vector = hijack_source.val;
601 } else {
602 printk("CPU%d: extended VIC boot\n", cpu);
603 hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4);
604 *hijack_vector = hijack_source.val;
605 /* VIC errata, may also receive interrupt at this address */
606 hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4);
607 *hijack_vector = hijack_source.val;
608 }
609 /* All non-boot CPUs start with interrupts fully masked. Need
610 * to lower the mask of the CPI we're about to send. We do
611 * this in the VIC by masquerading as the processor we're
612 * about to boot and lowering its interrupt mask */
613 local_irq_save(flags);
614 if(quad_boot) {
615 send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
616 } else {
617 outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
618 /* here we're altering registers belonging to `cpu' */
619
620 outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
621 /* now go back to our original identity */
622 outb(boot_cpu_id, VIC_PROCESSOR_ID);
623
624 /* and boot the CPU */
625
626 send_CPI((1<<cpu), VIC_CPU_BOOT_CPI);
627 }
628 cpu_booted_map = 0;
629 local_irq_restore(flags);
630
631 /* now wait for it to become ready (or timeout) */
632 for(timeout = 0; timeout < 50000; timeout++) {
633 if(cpu_booted_map)
634 break;
635 udelay(100);
636 }
637 /* reset the page table */
638 swapper_pg_dir[0] = orig_swapper_pg_dir0;
639 local_flush_tlb();
640#ifdef CONFIG_M486
641 free_page((unsigned long)page_table_copies);
642#endif
643
644 if (cpu_booted_map) {
645 VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
646 cpu, smp_processor_id()));
647
648 printk("CPU%d: ", cpu);
649 print_cpu_info(&cpu_data[cpu]);
650 wmb();
651 cpu_set(cpu, cpu_callout_map);
652 }
653 else {
654 printk("CPU%d FAILED TO BOOT: ", cpu);
655 if (*((volatile unsigned char *)phys_to_virt(start_phys_address))==0xA5)
656 printk("Stuck.\n");
657 else
658 printk("Not responding.\n");
659
660 cpucount--;
661 }
662}
663
664void __init
665smp_boot_cpus(void)
666{
667 int i;
668
669 /* CAT BUS initialisation must be done after the memory */
670 /* FIXME: The L4 has a catbus too, it just needs to be
671 * accessed in a totally different way */
672 if(voyager_level == 5) {
673 voyager_cat_init();
674
675 /* now that the cat has probed the Voyager System Bus, sanity
676 * check the cpu map */
677 if( ((voyager_quad_processors | voyager_extended_vic_processors)
678 & cpus_addr(phys_cpu_present_map)[0]) != cpus_addr(phys_cpu_present_map)[0]) {
679 /* should panic */
680 printk("\n\n***WARNING*** Sanity check of CPU present map FAILED\n");
681 }
682 } else if(voyager_level == 4)
683 voyager_extended_vic_processors = cpus_addr(phys_cpu_present_map)[0];
684
685 /* this sets up the idle task to run on the current cpu */
686 voyager_extended_cpus = 1;
687 /* Remove the global_irq_holder setting, it triggers a BUG() on
688 * schedule at the moment */
689 //global_irq_holder = boot_cpu_id;
690
691 /* FIXME: Need to do something about this but currently only works
692 * on CPUs with a tsc which none of mine have.
693 smp_tune_scheduling();
694 */
695 smp_store_cpu_info(boot_cpu_id);
696 printk("CPU%d: ", boot_cpu_id);
697 print_cpu_info(&cpu_data[boot_cpu_id]);
698
699 if(is_cpu_quad()) {
700 /* booting on a Quad CPU */
701 printk("VOYAGER SMP: Boot CPU is Quad\n");
702 qic_setup();
703 do_quad_bootstrap();
704 }
705
706 /* enable our own CPIs */
707 vic_enable_cpi();
708
709 cpu_set(boot_cpu_id, cpu_online_map);
710 cpu_set(boot_cpu_id, cpu_callout_map);
711
712 /* loop over all the extended VIC CPUs and boot them. The
713 * Quad CPUs must be bootstrapped by their extended VIC cpu */
714 for(i = 0; i < NR_CPUS; i++) {
715 if(i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
716 continue;
717 do_boot_cpu(i);
718 /* This udelay seems to be needed for the Quad boots
719 * don't remove unless you know what you're doing */
720 udelay(1000);
721 }
722 /* we could compute the total bogomips here, but why bother?,
723 * Code added from smpboot.c */
724 {
725 unsigned long bogosum = 0;
726 for (i = 0; i < NR_CPUS; i++)
727 if (cpu_isset(i, cpu_online_map))
728 bogosum += cpu_data[i].loops_per_jiffy;
729 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
730 cpucount+1,
731 bogosum/(500000/HZ),
732 (bogosum/(5000/HZ))%100);
733 }
734 voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
735 printk("VOYAGER: Extended (interrupt handling CPUs): %d, non-extended: %d\n", voyager_extended_cpus, num_booting_cpus() - voyager_extended_cpus);
736 /* that's it, switch to symmetric mode */
737 outb(0, VIC_PRIORITY_REGISTER);
738 outb(0, VIC_CLAIM_REGISTER_0);
739 outb(0, VIC_CLAIM_REGISTER_1);
740
741 VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
742}
743
744/* Reload the secondary CPUs task structure (this function does not
745 * return ) */
746void __init
747initialize_secondary(void)
748{
749#if 0
750 // AC kernels only
751 set_current(hard_get_current());
752#endif
753
754 /*
755 * We don't actually need to load the full TSS,
756 * basically just the stack pointer and the eip.
757 */
758
759 asm volatile(
760 "movl %0,%%esp\n\t"
761 "jmp *%1"
762 :
763 :"r" (current->thread.esp),"r" (current->thread.eip));
764}
765
766/* handle a Voyager SYS_INT -- If we don't, the base board will
767 * panic the system.
768 *
769 * System interrupts occur because some problem was detected on the
770 * various busses. To find out what you have to probe all the
771 * hardware via the CAT bus. FIXME: At the moment we do nothing. */
772fastcall void
773smp_vic_sys_interrupt(struct pt_regs *regs)
774{
775 ack_CPI(VIC_SYS_INT);
776 printk("Voyager SYSTEM INTERRUPT\n");
777}
778
779/* Handle a voyager CMN_INT; These interrupts occur either because of
780 * a system status change or because a single bit memory error
781 * occurred. FIXME: At the moment, ignore all this. */
782fastcall void
783smp_vic_cmn_interrupt(struct pt_regs *regs)
784{
785 static __u8 in_cmn_int = 0;
786 static DEFINE_SPINLOCK(cmn_int_lock);
787
788 /* common ints are broadcast, so make sure we only do this once */
789 _raw_spin_lock(&cmn_int_lock);
790 if(in_cmn_int)
791 goto unlock_end;
792
793 in_cmn_int++;
794 _raw_spin_unlock(&cmn_int_lock);
795
796 VDEBUG(("Voyager COMMON INTERRUPT\n"));
797
798 if(voyager_level == 5)
799 voyager_cat_do_common_interrupt();
800
801 _raw_spin_lock(&cmn_int_lock);
802 in_cmn_int = 0;
803 unlock_end:
804 _raw_spin_unlock(&cmn_int_lock);
805 ack_CPI(VIC_CMN_INT);
806}
807
808/*
809 * Reschedule call back. Nothing to do, all the work is done
810 * automatically when we return from the interrupt. */
811static void
812smp_reschedule_interrupt(void)
813{
814 /* do nothing */
815}
816
817static struct mm_struct * flush_mm;
818static unsigned long flush_va;
819static DEFINE_SPINLOCK(tlbstate_lock);
820#define FLUSH_ALL 0xffffffff
821
822/*
823 * We cannot call mmdrop() because we are in interrupt context,
824 * instead update mm->cpu_vm_mask.
825 *
826 * We need to reload %cr3 since the page tables may be going
827 * away from under us..
828 */
829static inline void
830leave_mm (unsigned long cpu)
831{
832 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
833 BUG();
834 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
835 load_cr3(swapper_pg_dir);
836}
837
838
839/*
840 * Invalidate call-back
841 */
842static void
843smp_invalidate_interrupt(void)
844{
845 __u8 cpu = smp_processor_id();
846
847 if (!test_bit(cpu, &smp_invalidate_needed))
848 return;
849 /* This will flood messages. Don't uncomment unless you see
850 * Problems with cross cpu invalidation
851 VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
852 smp_processor_id()));
853 */
854
855 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
856 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
857 if (flush_va == FLUSH_ALL)
858 local_flush_tlb();
859 else
860 __flush_tlb_one(flush_va);
861 } else
862 leave_mm(cpu);
863 }
864 smp_mb__before_clear_bit();
865 clear_bit(cpu, &smp_invalidate_needed);
866 smp_mb__after_clear_bit();
867}
868
869/* All the new flush operations for 2.4 */
870
871
872/* This routine is called with a physical cpu mask */
873static void
874flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
875 unsigned long va)
876{
877 int stuck = 50000;
878
879 if (!cpumask)
880 BUG();
881 if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask)
882 BUG();
883 if (cpumask & (1 << smp_processor_id()))
884 BUG();
885 if (!mm)
886 BUG();
887
888 spin_lock(&tlbstate_lock);
889
890 flush_mm = mm;
891 flush_va = va;
892 atomic_set_mask(cpumask, &smp_invalidate_needed);
893 /*
894 * We have to send the CPI only to
895 * CPUs affected.
896 */
897 send_CPI(cpumask, VIC_INVALIDATE_CPI);
898
899 while (smp_invalidate_needed) {
900 mb();
901 if(--stuck == 0) {
902 printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id());
903 break;
904 }
905 }
906
907 /* Uncomment only to debug invalidation problems
908 VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
909 */
910
911 flush_mm = NULL;
912 flush_va = 0;
913 spin_unlock(&tlbstate_lock);
914}
915
916void
917flush_tlb_current_task(void)
918{
919 struct mm_struct *mm = current->mm;
920 unsigned long cpu_mask;
921
922 preempt_disable();
923
924 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
925 local_flush_tlb();
926 if (cpu_mask)
927 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
928
929 preempt_enable();
930}
931
932
933void
934flush_tlb_mm (struct mm_struct * mm)
935{
936 unsigned long cpu_mask;
937
938 preempt_disable();
939
940 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
941
942 if (current->active_mm == mm) {
943 if (current->mm)
944 local_flush_tlb();
945 else
946 leave_mm(smp_processor_id());
947 }
948 if (cpu_mask)
949 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
950
951 preempt_enable();
952}
953
954void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
955{
956 struct mm_struct *mm = vma->vm_mm;
957 unsigned long cpu_mask;
958
959 preempt_disable();
960
961 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
962 if (current->active_mm == mm) {
963 if(current->mm)
964 __flush_tlb_one(va);
965 else
966 leave_mm(smp_processor_id());
967 }
968
969 if (cpu_mask)
970 flush_tlb_others(cpu_mask, mm, va);
971
972 preempt_enable();
973}
974
975/* enable the requested IRQs */
976static void
977smp_enable_irq_interrupt(void)
978{
979 __u8 irq;
980 __u8 cpu = get_cpu();
981
982 VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
983 vic_irq_enable_mask[cpu]));
984
985 spin_lock(&vic_irq_lock);
986 for(irq = 0; irq < 16; irq++) {
987 if(vic_irq_enable_mask[cpu] & (1<<irq))
988 enable_local_vic_irq(irq);
989 }
990 vic_irq_enable_mask[cpu] = 0;
991 spin_unlock(&vic_irq_lock);
992
993 put_cpu_no_resched();
994}
995
996/*
997 * CPU halt call-back
998 */
999static void
1000smp_stop_cpu_function(void *dummy)
1001{
1002 VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
1003 cpu_clear(smp_processor_id(), cpu_online_map);
1004 local_irq_disable();
1005 for(;;)
1006 __asm__("hlt");
1007}
1008
1009static DEFINE_SPINLOCK(call_lock);
1010
1011struct call_data_struct {
1012 void (*func) (void *info);
1013 void *info;
1014 volatile unsigned long started;
1015 volatile unsigned long finished;
1016 int wait;
1017};
1018
1019static struct call_data_struct * call_data;
1020
1021/* execute a thread on a new CPU. The function to be called must be
1022 * previously set up. This is used to schedule a function for
1023 * execution on all CPU's - set up the function then broadcast a
1024 * function_interrupt CPI to come here on each CPU */
1025static void
1026smp_call_function_interrupt(void)
1027{
1028 void (*func) (void *info) = call_data->func;
1029 void *info = call_data->info;
1030 /* must take copy of wait because call_data may be replaced
1031 * unless the function is waiting for us to finish */
1032 int wait = call_data->wait;
1033 __u8 cpu = smp_processor_id();
1034
1035 /*
1036 * Notify initiating CPU that I've grabbed the data and am
1037 * about to execute the function
1038 */
1039 mb();
1040 if(!test_and_clear_bit(cpu, &call_data->started)) {
1041 /* If the bit wasn't set, this could be a replay */
1042 printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu);
1043 return;
1044 }
1045 /*
1046 * At this point the info structure may be out of scope unless wait==1
1047 */
1048 irq_enter();
1049 (*func)(info);
1050 irq_exit();
1051 if (wait) {
1052 mb();
1053 clear_bit(cpu, &call_data->finished);
1054 }
1055}
1056
1057/* Call this function on all CPUs using the function_interrupt above
1058 <func> The function to run. This must be fast and non-blocking.
1059 <info> An arbitrary pointer to pass to the function.
1060 <retry> If true, keep retrying until ready.
1061 <wait> If true, wait until function has completed on other CPUs.
1062 [RETURNS] 0 on success, else a negative status code. Does not return until
1063 remote CPUs are nearly ready to execute <<func>> or are or have executed.
1064*/
1065int
1066smp_call_function (void (*func) (void *info), void *info, int retry,
1067 int wait)
1068{
1069 struct call_data_struct data;
1070 __u32 mask = cpus_addr(cpu_online_map)[0];
1071
1072 mask &= ~(1<<smp_processor_id());
1073
1074 if (!mask)
1075 return 0;
1076
1077 /* Can deadlock when called with interrupts disabled */
1078 WARN_ON(irqs_disabled());
1079
1080 data.func = func;
1081 data.info = info;
1082 data.started = mask;
1083 data.wait = wait;
1084 if (wait)
1085 data.finished = mask;
1086
1087 spin_lock(&call_lock);
1088 call_data = &data;
1089 wmb();
1090 /* Send a message to all other CPUs and wait for them to respond */
1091 send_CPI_allbutself(VIC_CALL_FUNCTION_CPI);
1092
1093 /* Wait for response */
1094 while (data.started)
1095 barrier();
1096
1097 if (wait)
1098 while (data.finished)
1099 barrier();
1100
1101 spin_unlock(&call_lock);
1102
1103 return 0;
1104}
1105
1106/* Sorry about the name. In an APIC based system, the APICs
1107 * themselves are programmed to send a timer interrupt. This is used
1108 * by linux to reschedule the processor. Voyager doesn't have this,
1109 * so we use the system clock to interrupt one processor, which in
1110 * turn, broadcasts a timer CPI to all the others --- we receive that
1111 * CPI here. We don't use this actually for counting so losing
1112 * ticks doesn't matter
1113 *
1114 * FIXME: For those CPU's which actually have a local APIC, we could
1115 * try to use it to trigger this interrupt instead of having to
1116 * broadcast the timer tick. Unfortunately, all my pentium DYADs have
1117 * no local APIC, so I can't do this
1118 *
1119 * This function is currently a placeholder and is unused in the code */
1120fastcall void
1121smp_apic_timer_interrupt(struct pt_regs *regs)
1122{
1123 wrapper_smp_local_timer_interrupt(regs);
1124}
1125
1126/* All of the QUAD interrupt GATES */
1127fastcall void
1128smp_qic_timer_interrupt(struct pt_regs *regs)
1129{
1130 ack_QIC_CPI(QIC_TIMER_CPI);
1131 wrapper_smp_local_timer_interrupt(regs);
1132}
1133
1134fastcall void
1135smp_qic_invalidate_interrupt(struct pt_regs *regs)
1136{
1137 ack_QIC_CPI(QIC_INVALIDATE_CPI);
1138 smp_invalidate_interrupt();
1139}
1140
1141fastcall void
1142smp_qic_reschedule_interrupt(struct pt_regs *regs)
1143{
1144 ack_QIC_CPI(QIC_RESCHEDULE_CPI);
1145 smp_reschedule_interrupt();
1146}
1147
1148fastcall void
1149smp_qic_enable_irq_interrupt(struct pt_regs *regs)
1150{
1151 ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
1152 smp_enable_irq_interrupt();
1153}
1154
1155fastcall void
1156smp_qic_call_function_interrupt(struct pt_regs *regs)
1157{
1158 ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
1159 smp_call_function_interrupt();
1160}
1161
1162fastcall void
1163smp_vic_cpi_interrupt(struct pt_regs *regs)
1164{
1165 __u8 cpu = smp_processor_id();
1166
1167 if(is_cpu_quad())
1168 ack_QIC_CPI(VIC_CPI_LEVEL0);
1169 else
1170 ack_VIC_CPI(VIC_CPI_LEVEL0);
1171
1172 if(test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
1173 wrapper_smp_local_timer_interrupt(regs);
1174 if(test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
1175 smp_invalidate_interrupt();
1176 if(test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
1177 smp_reschedule_interrupt();
1178 if(test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
1179 smp_enable_irq_interrupt();
1180 if(test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
1181 smp_call_function_interrupt();
1182}
1183
1184static void
1185do_flush_tlb_all(void* info)
1186{
1187 unsigned long cpu = smp_processor_id();
1188
1189 __flush_tlb_all();
1190 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
1191 leave_mm(cpu);
1192}
1193
1194
1195/* flush the TLB of every active CPU in the system */
1196void
1197flush_tlb_all(void)
1198{
1199 on_each_cpu(do_flush_tlb_all, 0, 1, 1);
1200}
1201
1202/* used to set up the trampoline for other CPUs when the memory manager
1203 * is sorted out */
1204void __init
1205smp_alloc_memory(void)
1206{
1207 trampoline_base = (__u32)alloc_bootmem_low_pages(PAGE_SIZE);
1208 if(__pa(trampoline_base) >= 0x93000)
1209 BUG();
1210}
1211
1212/* send a reschedule CPI to one CPU by physical CPU number*/
1213void
1214smp_send_reschedule(int cpu)
1215{
1216 send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
1217}
1218
1219
1220int
1221hard_smp_processor_id(void)
1222{
1223 __u8 i;
1224 __u8 cpumask = inb(VIC_PROC_WHO_AM_I);
1225 if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
1226 return cpumask & 0x1F;
1227
1228 for(i = 0; i < 8; i++) {
1229 if(cpumask & (1<<i))
1230 return i;
1231 }
1232 printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
1233 return 0;
1234}
1235
1236/* broadcast a halt to all other CPUs */
1237void
1238smp_send_stop(void)
1239{
1240 smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
1241}
1242
1243/* this function is triggered in time.c when a clock tick fires
1244 * we need to re-broadcast the tick to all CPUs */
1245void
1246smp_vic_timer_interrupt(struct pt_regs *regs)
1247{
1248 send_CPI_allbutself(VIC_TIMER_CPI);
1249 smp_local_timer_interrupt(regs);
1250}
1251
1252static inline void
1253wrapper_smp_local_timer_interrupt(struct pt_regs *regs)
1254{
1255 irq_enter();
1256 smp_local_timer_interrupt(regs);
1257 irq_exit();
1258}
1259
1260/* local (per CPU) timer interrupt. It does both profiling and
1261 * process statistics/rescheduling.
1262 *
1263 * We do profiling in every local tick, statistics/rescheduling
1264 * happen only every 'profiling multiplier' ticks. The default
1265 * multiplier is 1 and it can be changed by writing the new multiplier
1266 * value into /proc/profile.
1267 */
1268void
1269smp_local_timer_interrupt(struct pt_regs * regs)
1270{
1271 int cpu = smp_processor_id();
1272 long weight;
1273
1274 profile_tick(CPU_PROFILING, regs);
1275 if (--per_cpu(prof_counter, cpu) <= 0) {
1276 /*
1277 * The multiplier may have changed since the last time we got
1278 * to this point as a result of the user writing to
1279 * /proc/profile. In this case we need to adjust the APIC
1280 * timer accordingly.
1281 *
1282 * Interrupts are already masked off at this point.
1283 */
1284 per_cpu(prof_counter,cpu) = per_cpu(prof_multiplier, cpu);
1285 if (per_cpu(prof_counter, cpu) !=
1286 per_cpu(prof_old_multiplier, cpu)) {
1287 /* FIXME: need to update the vic timer tick here */
1288 per_cpu(prof_old_multiplier, cpu) =
1289 per_cpu(prof_counter, cpu);
1290 }
1291
1292 update_process_times(user_mode(regs));
1293 }
1294
1295 if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
1296 /* only extended VIC processors participate in
1297 * interrupt distribution */
1298 return;
1299
1300 /*
1301 * We take the 'long' return path, and there every subsystem
1302 * grabs the apropriate locks (kernel lock/ irq lock).
1303 *
1304 * we might want to decouple profiling from the 'long path',
1305 * and do the profiling totally in assembly.
1306 *
1307 * Currently this isn't too much of an issue (performance wise),
1308 * we can take more than 100K local irqs per second on a 100 MHz P5.
1309 */
1310
1311 if((++vic_tick[cpu] & 0x7) != 0)
1312 return;
1313 /* get here every 16 ticks (about every 1/6 of a second) */
1314
1315 /* Change our priority to give someone else a chance at getting
1316 * the IRQ. The algorithm goes like this:
1317 *
1318 * In the VIC, the dynamically routed interrupt is always
1319 * handled by the lowest priority eligible (i.e. receiving
1320 * interrupts) CPU. If >1 eligible CPUs are equal lowest, the
1321 * lowest processor number gets it.
1322 *
1323 * The priority of a CPU is controlled by a special per-CPU
1324 * VIC priority register which is 3 bits wide 0 being lowest
1325 * and 7 highest priority..
1326 *
1327 * Therefore we subtract the average number of interrupts from
1328 * the number we've fielded. If this number is negative, we
1329 * lower the activity count and if it is positive, we raise
1330 * it.
1331 *
1332 * I'm afraid this still leads to odd looking interrupt counts:
1333 * the totals are all roughly equal, but the individual ones
1334 * look rather skewed.
1335 *
1336 * FIXME: This algorithm is total crap when mixed with SMP
1337 * affinity code since we now try to even up the interrupt
1338 * counts when an affinity binding is keeping them on a
1339 * particular CPU*/
1340 weight = (vic_intr_count[cpu]*voyager_extended_cpus
1341 - vic_intr_total) >> 4;
1342 weight += 4;
1343 if(weight > 7)
1344 weight = 7;
1345 if(weight < 0)
1346 weight = 0;
1347
1348 outb((__u8)weight, VIC_PRIORITY_REGISTER);
1349
1350#ifdef VOYAGER_DEBUG
1351 if((vic_tick[cpu] & 0xFFF) == 0) {
1352 /* print this message roughly every 25 secs */
1353 printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
1354 cpu, vic_tick[cpu], weight);
1355 }
1356#endif
1357}
1358
1359/* setup the profiling timer */
1360int
1361setup_profiling_timer(unsigned int multiplier)
1362{
1363 int i;
1364
1365 if ( (!multiplier))
1366 return -EINVAL;
1367
1368 /*
1369 * Set the new multiplier for each CPU. CPUs don't start using the
1370 * new values until the next timer interrupt in which they do process
1371 * accounting.
1372 */
1373 for (i = 0; i < NR_CPUS; ++i)
1374 per_cpu(prof_multiplier, i) = multiplier;
1375
1376 return 0;
1377}
1378
1379
1380/* The CPIs are handled in the per cpu 8259s, so they must be
1381 * enabled to be received: FIX: enabling the CPIs in the early
1382 * boot sequence interferes with bug checking; enable them later
1383 * on in smp_init */
1384#define VIC_SET_GATE(cpi, vector) \
1385 set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector))
1386#define QIC_SET_GATE(cpi, vector) \
1387 set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
1388
1389void __init
1390smp_intr_init(void)
1391{
1392 int i;
1393
1394 /* initialize the per cpu irq mask to all disabled */
1395 for(i = 0; i < NR_CPUS; i++)
1396 vic_irq_mask[i] = 0xFFFF;
1397
1398 VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
1399
1400 VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt);
1401 VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt);
1402
1403 QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt);
1404 QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt);
1405 QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
1406 QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
1407 QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
1408
1409
1410 /* now put the VIC descriptor into the first 48 IRQs
1411 *
1412 * This is for later: first 16 correspond to PC IRQs; next 16
1413 * are Primary MC IRQs and final 16 are Secondary MC IRQs */
1414 for(i = 0; i < 48; i++)
1415 irq_desc[i].handler = &vic_irq_type;
1416}
1417
1418/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
1419 * processor to receive CPI */
1420static void
1421send_CPI(__u32 cpuset, __u8 cpi)
1422{
1423 int cpu;
1424 __u32 quad_cpuset = (cpuset & voyager_quad_processors);
1425
1426 if(cpi < VIC_START_FAKE_CPI) {
1427 /* fake CPI are only used for booting, so send to the
1428 * extended quads as well---Quads must be VIC booted */
1429 outb((__u8)(cpuset), VIC_CPI_Registers[cpi]);
1430 return;
1431 }
1432 if(quad_cpuset)
1433 send_QIC_CPI(quad_cpuset, cpi);
1434 cpuset &= ~quad_cpuset;
1435 cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */
1436 if(cpuset == 0)
1437 return;
1438 for_each_online_cpu(cpu) {
1439 if(cpuset & (1<<cpu))
1440 set_bit(cpi, &vic_cpi_mailbox[cpu]);
1441 }
1442 if(cpuset)
1443 outb((__u8)cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
1444}
1445
1446/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
1447 * set the cache line to shared by reading it.
1448 *
1449 * DON'T make this inline otherwise the cache line read will be
1450 * optimised away
1451 * */
1452static int
1453ack_QIC_CPI(__u8 cpi) {
1454 __u8 cpu = hard_smp_processor_id();
1455
1456 cpi &= 7;
1457
1458 outb(1<<cpi, QIC_INTERRUPT_CLEAR1);
1459 return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
1460}
1461
1462static void
1463ack_special_QIC_CPI(__u8 cpi)
1464{
1465 switch(cpi) {
1466 case VIC_CMN_INT:
1467 outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
1468 break;
1469 case VIC_SYS_INT:
1470 outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0);
1471 break;
1472 }
1473 /* also clear at the VIC, just in case (nop for non-extended proc) */
1474 ack_VIC_CPI(cpi);
1475}
1476
1477/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
1478static void
1479ack_VIC_CPI(__u8 cpi)
1480{
1481#ifdef VOYAGER_DEBUG
1482 unsigned long flags;
1483 __u16 isr;
1484 __u8 cpu = smp_processor_id();
1485
1486 local_irq_save(flags);
1487 isr = vic_read_isr();
1488 if((isr & (1<<(cpi &7))) == 0) {
1489 printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
1490 }
1491#endif
1492 /* send specific EOI; the two system interrupts have
1493 * bit 4 set for a separate vector but behave as the
1494 * corresponding 3 bit intr */
1495 outb_p(0x60|(cpi & 7),0x20);
1496
1497#ifdef VOYAGER_DEBUG
1498 if((vic_read_isr() & (1<<(cpi &7))) != 0) {
1499 printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
1500 }
1501 local_irq_restore(flags);
1502#endif
1503}
1504
1505/* cribbed with thanks from irq.c */
1506#define __byte(x,y) (((unsigned char *)&(y))[x])
1507#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu]))
1508#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu]))
1509
1510static unsigned int
1511startup_vic_irq(unsigned int irq)
1512{
1513 enable_vic_irq(irq);
1514
1515 return 0;
1516}
1517
1518/* The enable and disable routines. This is where we run into
1519 * conflicting architectural philosophy. Fundamentally, the voyager
1520 * architecture does not expect to have to disable interrupts globally
1521 * (the IRQ controllers belong to each CPU). The processor masquerade
1522 * which is used to start the system shouldn't be used in a running OS
1523 * since it will cause great confusion if two separate CPUs drive to
1524 * the same IRQ controller (I know, I've tried it).
1525 *
1526 * The solution is a variant on the NCR lazy SPL design:
1527 *
1528 * 1) To disable an interrupt, do nothing (other than set the
1529 * IRQ_DISABLED flag). This dares the interrupt actually to arrive.
1530 *
1531 * 2) If the interrupt dares to come in, raise the local mask against
1532 * it (this will result in all the CPU masks being raised
1533 * eventually).
1534 *
1535 * 3) To enable the interrupt, lower the mask on the local CPU and
1536 * broadcast an Interrupt enable CPI which causes all other CPUs to
1537 * adjust their masks accordingly. */
1538
1539static void
1540enable_vic_irq(unsigned int irq)
1541{
1542 /* linux doesn't to processor-irq affinity, so enable on
1543 * all CPUs we know about */
1544 int cpu = smp_processor_id(), real_cpu;
1545 __u16 mask = (1<<irq);
1546 __u32 processorList = 0;
1547 unsigned long flags;
1548
1549 VDEBUG(("VOYAGER: enable_vic_irq(%d) CPU%d affinity 0x%lx\n",
1550 irq, cpu, cpu_irq_affinity[cpu]));
1551 spin_lock_irqsave(&vic_irq_lock, flags);
1552 for_each_online_cpu(real_cpu) {
1553 if(!(voyager_extended_vic_processors & (1<<real_cpu)))
1554 continue;
1555 if(!(cpu_irq_affinity[real_cpu] & mask)) {
1556 /* irq has no affinity for this CPU, ignore */
1557 continue;
1558 }
1559 if(real_cpu == cpu) {
1560 enable_local_vic_irq(irq);
1561 }
1562 else if(vic_irq_mask[real_cpu] & mask) {
1563 vic_irq_enable_mask[real_cpu] |= mask;
1564 processorList |= (1<<real_cpu);
1565 }
1566 }
1567 spin_unlock_irqrestore(&vic_irq_lock, flags);
1568 if(processorList)
1569 send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
1570}
1571
1572static void
1573disable_vic_irq(unsigned int irq)
1574{
1575 /* lazy disable, do nothing */
1576}
1577
1578static void
1579enable_local_vic_irq(unsigned int irq)
1580{
1581 __u8 cpu = smp_processor_id();
1582 __u16 mask = ~(1 << irq);
1583 __u16 old_mask = vic_irq_mask[cpu];
1584
1585 vic_irq_mask[cpu] &= mask;
1586 if(vic_irq_mask[cpu] == old_mask)
1587 return;
1588
1589 VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
1590 irq, cpu));
1591
1592 if (irq & 8) {
1593 outb_p(cached_A1(cpu),0xA1);
1594 (void)inb_p(0xA1);
1595 }
1596 else {
1597 outb_p(cached_21(cpu),0x21);
1598 (void)inb_p(0x21);
1599 }
1600}
1601
1602static void
1603disable_local_vic_irq(unsigned int irq)
1604{
1605 __u8 cpu = smp_processor_id();
1606 __u16 mask = (1 << irq);
1607 __u16 old_mask = vic_irq_mask[cpu];
1608
1609 if(irq == 7)
1610 return;
1611
1612 vic_irq_mask[cpu] |= mask;
1613 if(old_mask == vic_irq_mask[cpu])
1614 return;
1615
1616 VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
1617 irq, cpu));
1618
1619 if (irq & 8) {
1620 outb_p(cached_A1(cpu),0xA1);
1621 (void)inb_p(0xA1);
1622 }
1623 else {
1624 outb_p(cached_21(cpu),0x21);
1625 (void)inb_p(0x21);
1626 }
1627}
1628
1629/* The VIC is level triggered, so the ack can only be issued after the
1630 * interrupt completes. However, we do Voyager lazy interrupt
1631 * handling here: It is an extremely expensive operation to mask an
1632 * interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If
1633 * this interrupt actually comes in, then we mask and ack here to push
1634 * the interrupt off to another CPU */
1635static void
1636before_handle_vic_irq(unsigned int irq)
1637{
1638 irq_desc_t *desc = irq_desc + irq;
1639 __u8 cpu = smp_processor_id();
1640
1641 _raw_spin_lock(&vic_irq_lock);
1642 vic_intr_total++;
1643 vic_intr_count[cpu]++;
1644
1645 if(!(cpu_irq_affinity[cpu] & (1<<irq))) {
1646 /* The irq is not in our affinity mask, push it off
1647 * onto another CPU */
1648 VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d on cpu %d\n",
1649 irq, cpu));
1650 disable_local_vic_irq(irq);
1651 /* set IRQ_INPROGRESS to prevent the handler in irq.c from
1652 * actually calling the interrupt routine */
1653 desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
1654 } else if(desc->status & IRQ_DISABLED) {
1655 /* Damn, the interrupt actually arrived, do the lazy
1656 * disable thing. The interrupt routine in irq.c will
1657 * not handle a IRQ_DISABLED interrupt, so nothing more
1658 * need be done here */
1659 VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n",
1660 irq, cpu));
1661 disable_local_vic_irq(irq);
1662 desc->status |= IRQ_REPLAY;
1663 } else {
1664 desc->status &= ~IRQ_REPLAY;
1665 }
1666
1667 _raw_spin_unlock(&vic_irq_lock);
1668}
1669
1670/* Finish the VIC interrupt: basically mask */
1671static void
1672after_handle_vic_irq(unsigned int irq)
1673{
1674 irq_desc_t *desc = irq_desc + irq;
1675
1676 _raw_spin_lock(&vic_irq_lock);
1677 {
1678 unsigned int status = desc->status & ~IRQ_INPROGRESS;
1679#ifdef VOYAGER_DEBUG
1680 __u16 isr;
1681#endif
1682
1683 desc->status = status;
1684 if ((status & IRQ_DISABLED))
1685 disable_local_vic_irq(irq);
1686#ifdef VOYAGER_DEBUG
1687 /* DEBUG: before we ack, check what's in progress */
1688 isr = vic_read_isr();
1689 if((isr & (1<<irq) && !(status & IRQ_REPLAY)) == 0) {
1690 int i;
1691 __u8 cpu = smp_processor_id();
1692 __u8 real_cpu;
1693 int mask; /* Um... initialize me??? --RR */
1694
1695 printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
1696 cpu, irq);
1697 for_each_cpu(real_cpu, mask) {
1698
1699 outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
1700 VIC_PROCESSOR_ID);
1701 isr = vic_read_isr();
1702 if(isr & (1<<irq)) {
1703 printk("VOYAGER SMP: CPU%d ack irq %d\n",
1704 real_cpu, irq);
1705 ack_vic_irq(irq);
1706 }
1707 outb(cpu, VIC_PROCESSOR_ID);
1708 }
1709 }
1710#endif /* VOYAGER_DEBUG */
1711 /* as soon as we ack, the interrupt is eligible for
1712 * receipt by another CPU so everything must be in
1713 * order here */
1714 ack_vic_irq(irq);
1715 if(status & IRQ_REPLAY) {
1716 /* replay is set if we disable the interrupt
1717 * in the before_handle_vic_irq() routine, so
1718 * clear the in progress bit here to allow the
1719 * next CPU to handle this correctly */
1720 desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS);
1721 }
1722#ifdef VOYAGER_DEBUG
1723 isr = vic_read_isr();
1724 if((isr & (1<<irq)) != 0)
1725 printk("VOYAGER SMP: after_handle_vic_irq() after ack irq=%d, isr=0x%x\n",
1726 irq, isr);
1727#endif /* VOYAGER_DEBUG */
1728 }
1729 _raw_spin_unlock(&vic_irq_lock);
1730
1731 /* All code after this point is out of the main path - the IRQ
1732 * may be intercepted by another CPU if reasserted */
1733}
1734
1735
1736/* Linux processor - interrupt affinity manipulations.
1737 *
1738 * For each processor, we maintain a 32 bit irq affinity mask.
1739 * Initially it is set to all 1's so every processor accepts every
1740 * interrupt. In this call, we change the processor's affinity mask:
1741 *
1742 * Change from enable to disable:
1743 *
1744 * If the interrupt ever comes in to the processor, we will disable it
1745 * and ack it to push it off to another CPU, so just accept the mask here.
1746 *
1747 * Change from disable to enable:
1748 *
1749 * change the mask and then do an interrupt enable CPI to re-enable on
1750 * the selected processors */
1751
1752void
1753set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
1754{
1755 /* Only extended processors handle interrupts */
1756 unsigned long real_mask;
1757 unsigned long irq_mask = 1 << irq;
1758 int cpu;
1759
1760 real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors;
1761
1762 if(cpus_addr(mask)[0] == 0)
1763 /* can't have no cpu's to accept the interrupt -- extremely
1764 * bad things will happen */
1765 return;
1766
1767 if(irq == 0)
1768 /* can't change the affinity of the timer IRQ. This
1769 * is due to the constraint in the voyager
1770 * architecture that the CPI also comes in on and IRQ
1771 * line and we have chosen IRQ0 for this. If you
1772 * raise the mask on this interrupt, the processor
1773 * will no-longer be able to accept VIC CPIs */
1774 return;
1775
1776 if(irq >= 32)
1777 /* You can only have 32 interrupts in a voyager system
1778 * (and 32 only if you have a secondary microchannel
1779 * bus) */
1780 return;
1781
1782 for_each_online_cpu(cpu) {
1783 unsigned long cpu_mask = 1 << cpu;
1784
1785 if(cpu_mask & real_mask) {
1786 /* enable the interrupt for this cpu */
1787 cpu_irq_affinity[cpu] |= irq_mask;
1788 } else {
1789 /* disable the interrupt for this cpu */
1790 cpu_irq_affinity[cpu] &= ~irq_mask;
1791 }
1792 }
1793 /* this is magic, we now have the correct affinity maps, so
1794 * enable the interrupt. This will send an enable CPI to
1795 * those cpu's who need to enable it in their local masks,
1796 * causing them to correct for the new affinity . If the
1797 * interrupt is currently globally disabled, it will simply be
1798 * disabled again as it comes in (voyager lazy disable). If
1799 * the affinity map is tightened to disable the interrupt on a
1800 * cpu, it will be pushed off when it comes in */
1801 enable_vic_irq(irq);
1802}
1803
1804static void
1805ack_vic_irq(unsigned int irq)
1806{
1807 if (irq & 8) {
1808 outb(0x62,0x20); /* Specific EOI to cascade */
1809 outb(0x60|(irq & 7),0xA0);
1810 } else {
1811 outb(0x60 | (irq & 7),0x20);
1812 }
1813}
1814
1815/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259
1816 * but are not vectored by it. This means that the 8259 mask must be
1817 * lowered to receive them */
1818static __init void
1819vic_enable_cpi(void)
1820{
1821 __u8 cpu = smp_processor_id();
1822
1823 /* just take a copy of the current mask (nop for boot cpu) */
1824 vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
1825
1826 enable_local_vic_irq(VIC_CPI_LEVEL0);
1827 enable_local_vic_irq(VIC_CPI_LEVEL1);
1828 /* for sys int and cmn int */
1829 enable_local_vic_irq(7);
1830
1831 if(is_cpu_quad()) {
1832 outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
1833 outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
1834 VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
1835 cpu, QIC_CPI_ENABLE));
1836 }
1837
1838 VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n",
1839 cpu, vic_irq_mask[cpu]));
1840}
1841
1842void
1843voyager_smp_dump()
1844{
1845 int old_cpu = smp_processor_id(), cpu;
1846
1847 /* dump the interrupt masks of each processor */
1848 for_each_online_cpu(cpu) {
1849 __u16 imr, isr, irr;
1850 unsigned long flags;
1851
1852 local_irq_save(flags);
1853 outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
1854 imr = (inb(0xa1) << 8) | inb(0x21);
1855 outb(0x0a, 0xa0);
1856 irr = inb(0xa0) << 8;
1857 outb(0x0a, 0x20);
1858 irr |= inb(0x20);
1859 outb(0x0b, 0xa0);
1860 isr = inb(0xa0) << 8;
1861 outb(0x0b, 0x20);
1862 isr |= inb(0x20);
1863 outb(old_cpu, VIC_PROCESSOR_ID);
1864 local_irq_restore(flags);
1865 printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n",
1866 cpu, vic_irq_mask[cpu], imr, irr, isr);
1867#if 0
1868 /* These lines are put in to try to unstick an un ack'd irq */
1869 if(isr != 0) {
1870 int irq;
1871 for(irq=0; irq<16; irq++) {
1872 if(isr & (1<<irq)) {
1873 printk("\tCPU%d: ack irq %d\n",
1874 cpu, irq);
1875 local_irq_save(flags);
1876 outb(VIC_CPU_MASQUERADE_ENABLE | cpu,
1877 VIC_PROCESSOR_ID);
1878 ack_vic_irq(irq);
1879 outb(old_cpu, VIC_PROCESSOR_ID);
1880 local_irq_restore(flags);
1881 }
1882 }
1883 }
1884#endif
1885 }
1886}
1887
1888void
1889smp_voyager_power_off(void *dummy)
1890{
1891 if(smp_processor_id() == boot_cpu_id)
1892 voyager_power_off();
1893 else
1894 smp_stop_cpu_function(NULL);
1895}
1896
1897void __init
1898smp_prepare_cpus(unsigned int max_cpus)
1899{
1900 /* FIXME: ignore max_cpus for now */
1901 smp_boot_cpus();
1902}
1903
1904void __devinit smp_prepare_boot_cpu(void)
1905{
1906 cpu_set(smp_processor_id(), cpu_online_map);
1907 cpu_set(smp_processor_id(), cpu_callout_map);
1908}
1909
1910int __devinit
1911__cpu_up(unsigned int cpu)
1912{
1913 /* This only works at boot for x86. See "rewrite" above. */
1914 if (cpu_isset(cpu, smp_commenced_mask))
1915 return -ENOSYS;
1916
1917 /* In case one didn't come up */
1918 if (!cpu_isset(cpu, cpu_callin_map))
1919 return -EIO;
1920 /* Unleash the CPU! */
1921 cpu_set(cpu, smp_commenced_mask);
1922 while (!cpu_isset(cpu, cpu_online_map))
1923 mb();
1924 return 0;
1925}
1926
1927void __init
1928smp_cpus_done(unsigned int max_cpus)
1929{
1930 zap_low_mappings();
1931}
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
new file mode 100644
index 000000000000..9980eef31fda
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -0,0 +1,167 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 2001
4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 *
7 * linux/arch/i386/kernel/voyager_thread.c
8 *
9 * This module provides the machine status monitor thread for the
10 * voyager architecture. This allows us to monitor the machine
11 * environment (temp, voltage, fan function) and the front panel and
12 * internal UPS. If a fault is detected, this thread takes corrective
13 * action (usually just informing init)
14 * */
15
16#include <linux/module.h>
17#include <linux/config.h>
18#include <linux/mm.h>
19#include <linux/kernel_stat.h>
20#include <linux/delay.h>
21#include <linux/mc146818rtc.h>
22#include <linux/smp_lock.h>
23#include <linux/init.h>
24#include <linux/bootmem.h>
25#include <linux/kmod.h>
26#include <linux/completion.h>
27#include <linux/sched.h>
28#include <asm/desc.h>
29#include <asm/voyager.h>
30#include <asm/vic.h>
31#include <asm/mtrr.h>
32#include <asm/msr.h>
33
34#include <linux/irq.h>
35
36#define THREAD_NAME "kvoyagerd"
37
38/* external variables */
39int kvoyagerd_running = 0;
40DECLARE_MUTEX_LOCKED(kvoyagerd_sem);
41
42static int thread(void *);
43
44static __u8 set_timeout = 0;
45
46/* Start the machine monitor thread. Return 1 if OK, 0 if fail */
47static int __init
48voyager_thread_start(void)
49{
50 if(kernel_thread(thread, NULL, CLONE_KERNEL) < 0) {
51 /* This is serious, but not fatal */
52 printk(KERN_ERR "Voyager: Failed to create system monitor thread!!!\n");
53 return 1;
54 }
55 return 0;
56}
57
58static int
59execute(const char *string)
60{
61 int ret;
62
63 char *envp[] = {
64 "HOME=/",
65 "TERM=linux",
66 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
67 NULL,
68 };
69 char *argv[] = {
70 "/bin/bash",
71 "-c",
72 (char *)string,
73 NULL,
74 };
75
76 if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) {
77 printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
78 string, ret);
79 }
80 return ret;
81}
82
83static void
84check_from_kernel(void)
85{
86 if(voyager_status.switch_off) {
87
88 /* FIXME: This should be configureable via proc */
89 execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
90 } else if(voyager_status.power_fail) {
91 VDEBUG(("Voyager daemon detected AC power failure\n"));
92
93 /* FIXME: This should be configureable via proc */
94 execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
95 set_timeout = 1;
96 }
97}
98
99static void
100check_continuing_condition(void)
101{
102 if(voyager_status.power_fail) {
103 __u8 data;
104 voyager_cat_psi(VOYAGER_PSI_SUBREAD,
105 VOYAGER_PSI_AC_FAIL_REG, &data);
106 if((data & 0x1f) == 0) {
107 /* all power restored */
108 printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n");
109 /* FIXME: should be user configureable */
110 execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
111 set_timeout = 0;
112 }
113 }
114}
115
116static void
117wakeup(unsigned long unused)
118{
119 up(&kvoyagerd_sem);
120}
121
122static int
123thread(void *unused)
124{
125 struct timer_list wakeup_timer;
126
127 kvoyagerd_running = 1;
128
129 reparent_to_init();
130 daemonize(THREAD_NAME);
131
132 set_timeout = 0;
133
134 init_timer(&wakeup_timer);
135
136 sigfillset(&current->blocked);
137 current->signal->tty = NULL;
138
139 printk(KERN_NOTICE "Voyager starting monitor thread\n");
140
141 for(;;) {
142 down_interruptible(&kvoyagerd_sem);
143 VDEBUG(("Voyager Daemon awoken\n"));
144 if(voyager_status.request_from_kernel == 0) {
145 /* probably awoken from timeout */
146 check_continuing_condition();
147 } else {
148 check_from_kernel();
149 voyager_status.request_from_kernel = 0;
150 }
151 if(set_timeout) {
152 del_timer(&wakeup_timer);
153 wakeup_timer.expires = HZ + jiffies;
154 wakeup_timer.function = wakeup;
155 add_timer(&wakeup_timer);
156 }
157 }
158}
159
160static void __exit
161voyager_thread_stop(void)
162{
163 /* FIXME: do nothing at the moment */
164}
165
166module_init(voyager_thread_start);
167//module_exit(voyager_thread_stop);
diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile
new file mode 100644
index 000000000000..9c943fa6ce6b
--- /dev/null
+++ b/arch/i386/math-emu/Makefile
@@ -0,0 +1,30 @@
1#
2# Makefile for wm-FPU-emu
3#
4
5#DEBUG = -DDEBUGGING
6DEBUG =
7PARANOID = -DPARANOID
8CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
9
10EXTRA_AFLAGS := $(PARANOID)
11
12# From 'C' language sources:
13C_OBJS =fpu_entry.o errors.o \
14 fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \
15 load_store.o get_address.o \
16 poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
17 reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \
18 reg_ld_str.o reg_divide.o reg_mul.o
19
20# From 80x86 assembler sources:
21A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \
22 div_small.o reg_norm.o reg_round.o \
23 wm_shrx.o wm_sqrt.o \
24 div_Xsig.o polynom_Xsig.o round_Xsig.o \
25 shr_Xsig.o mul_Xsig.o
26
27obj-y =$(C_OBJS) $(A_OBJS)
28
29proto:
30 cproto -e -DMAKING_PROTO *.c >fpu_proto.h
diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README
new file mode 100644
index 000000000000..e6235491d6eb
--- /dev/null
+++ b/arch/i386/math-emu/README
@@ -0,0 +1,427 @@
1 +---------------------------------------------------------------------------+
2 | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. |
3 | |
4 | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@melbpc.org.au |
7 | |
8 | This program is free software; you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License version 2 as |
10 | published by the Free Software Foundation. |
11 | |
12 | This program is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | GNU General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with this program; if not, write to the Free Software |
19 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | |
21 +---------------------------------------------------------------------------+
22
23
24
25wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
26which was my 80387 emulator for early versions of djgpp (gcc under
27msdos); wm-emu387 was in turn based upon emu387 which was written by
28DJ Delorie for djgpp. The interface to the Linux kernel is based upon
29the original Linux math emulator by Linus Torvalds.
30
31My target FPU for wm-FPU-emu is that described in the Intel486
32Programmer's Reference Manual (1992 edition). Unfortunately, numerous
33facets of the functioning of the FPU are not well covered in the
34Reference Manual. The information in the manual has been supplemented
35with measurements on real 80486's. Unfortunately, it is simply not
36possible to be sure that all of the peculiarities of the 80486 have
37been discovered, so there is always likely to be obscure differences
38in the detailed behaviour of the emulator and a real 80486.
39
40wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
41but is very close. See "Limitations" later in this file for a list of
42some differences.
43
44Please report bugs, etc to me at:
45 billm@melbpc.org.au
46or b.metzenthen@medoto.unimelb.edu.au
47
48For more information on the emulator and on floating point topics, see
49my web pages, currently at http://www.suburbia.net/~billm/
50
51
52--Bill Metzenthen
53 December 1999
54
55
56----------------------- Internals of wm-FPU-emu -----------------------
57
58Numeric algorithms:
59(1) Add, subtract, and multiply. Nothing remarkable in these.
60(2) Divide has been tuned to get reasonable performance. The algorithm
61 is not the obvious one which most people seem to use, but is designed
62 to take advantage of the characteristics of the 80386. I expect that
63 it has been invented many times before I discovered it, but I have not
64 seen it. It is based upon one of those ideas which one carries around
65 for years without ever bothering to check it out.
66(3) The sqrt function has been tuned to get good performance. It is based
67 upon Newton's classic method. Performance was improved by capitalizing
68 upon the properties of Newton's method, and the code is once again
69 structured taking account of the 80386 characteristics.
70(4) The trig, log, and exp functions are based in each case upon quasi-
71 "optimal" polynomial approximations. My definition of "optimal" was
72 based upon getting good accuracy with reasonable speed.
73(5) The argument reducing code for the trig function effectively uses
74 a value of pi which is accurate to more than 128 bits. As a consequence,
75 the reduced argument is accurate to more than 64 bits for arguments up
76 to a few pi, and accurate to more than 64 bits for most arguments,
77 even for arguments approaching 2^63. This is far superior to an
78 80486, which uses a value of pi which is accurate to 66 bits.
79
80The code of the emulator is complicated slightly by the need to
81account for a limited form of re-entrancy. Normally, the emulator will
82emulate each FPU instruction to completion without interruption.
83However, it may happen that when the emulator is accessing the user
84memory space, swapping may be needed. In this case the emulator may be
85temporarily suspended while disk i/o takes place. During this time
86another process may use the emulator, thereby perhaps changing static
87variables. The code which accesses user memory is confined to five
88files:
89 fpu_entry.c
90 reg_ld_str.c
91 load_store.c
92 get_address.c
93 errors.c
94As from version 1.12 of the emulator, no static variables are used
95(apart from those in the kernel's per-process tables). The emulator is
96therefore now fully re-entrant, rather than having just the restricted
97form of re-entrancy which is required by the Linux kernel.
98
99----------------------- Limitations of wm-FPU-emu -----------------------
100
101There are a number of differences between the current wm-FPU-emu
102(version 2.01) and the 80486 FPU (apart from bugs). The differences
103are fewer than those which applied to the 1.xx series of the emulator.
104Some of the more important differences are listed below:
105
106The Roundup flag does not have much meaning for the transcendental
107functions and its 80486 value with these functions is likely to differ
108from its emulator value.
109
110In a few rare cases the Underflow flag obtained with the emulator will
111be different from that obtained with an 80486. This occurs when the
112following conditions apply simultaneously:
113(a) the operands have a higher precision than the current setting of the
114 precision control (PC) flags.
115(b) the underflow exception is masked.
116(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
117(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
118(e) the magnitude of the exact result would be exactly 2^-16382 if the
119 operands were rounded to the current precision before the arithmetic
120 operation was performed.
121If all of these apply, the emulator will set the Underflow flag but a real
12280486 will not.
123
124NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
125unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
126and Unnormals. None of these will be generated by an 80486 or by the
127emulator. Do not use them. The emulator treats them differently in
128detail from the way an 80486 does.
129
130Self modifying code can cause the emulator to fail. An example of such
131code is:
132 movl %esp,[%ebx]
133 fld1
134The FPU instruction may be (usually will be) loaded into the pre-fetch
135queue of the CPU before the mov instruction is executed. If the
136destination of the 'movl' overlaps the FPU instruction then the bytes
137in the prefetch queue and memory will be inconsistent when the FPU
138instruction is executed. The emulator will be invoked but will not be
139able to find the instruction which caused the device-not-present
140exception. For this case, the emulator cannot emulate the behaviour of
141an 80486DX.
142
143Handling of the address size override prefix byte (0x67) has not been
144extensively tested yet. A major problem exists because using it in
145vm86 mode can cause a general protection fault. Address offsets
146greater than 0xffff appear to be illegal in vm86 mode but are quite
147acceptable (and work) in real mode. A small test program developed to
148check the addressing, and which runs successfully in real mode,
149crashes dosemu under Linux and also brings Windows down with a general
150protection fault message when run under the MS-DOS prompt of Windows
1513.1. (The program simply reads data from a valid address).
152
153The emulator supports 16-bit protected mode, with one difference from
154an 80486DX. A 80486DX will allow some floating point instructions to
155write a few bytes below the lowest address of the stack. The emulator
156will not allow this in 16-bit protected mode: no instructions are
157allowed to write outside the bounds set by the protection.
158
159----------------------- Performance of wm-FPU-emu -----------------------
160
161Speed.
162-----
163
164The speed of floating point computation with the emulator will depend
165upon instruction mix. Relative performance is best for the instructions
166which require most computation. The simple instructions are adversely
167affected by the FPU instruction trap overhead.
168
169
170Timing: Some simple timing tests have been made on the emulator functions.
171The times include load/store instructions. All times are in microseconds
172measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
173ms-dos, the next two columns are for emulators running with the djgpp
174ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
175using libm4.0 (hard).
176
177function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu
178
179 + 60.5 154.8 76.5 139.4
180 - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7
181 * 71.0 190.8 79.6 146.6
182 / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1
183
184 sin() 310.8 4692.0 319.0 398.5
185 cos() 284.4 4855.2 308.0 388.7
186 tan() 495.0 8807.1 394.9 504.7
187 atan() 328.9 4866.4 601.1 419.5-491.9
188
189 sqrt() 128.7 crashed 145.2 227.0
190 log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1
191 exp() 479.1 6619.2 469.1 850.8
192
193
194The performance under Linux is improved by the use of look-ahead code.
195The following results show the improvement which is obtained under
196Linux due to the look-ahead code. Also given are the times for the
197original Linux emulator with the 4.1 'soft' lib.
198
199 [ Linus' note: I changed look-ahead to be the default under linux, as
200 there was no reason not to use it after I had edited it to be
201 disabled during tracing ]
202
203 wm-FPU-emu w original w
204 look-ahead 'soft' lib
205 + 106.4 190.2
206 - 108.6-111.6 192.4-216.2
207 * 113.4 193.1
208 / 108.8-124.4 700.1-706.2
209
210 sin() 390.5 2642.0
211 cos() 381.5 2767.4
212 tan() 496.5 3153.3
213 atan() 367.2-435.5 2439.4-3396.8
214
215 sqrt() 195.1 4732.5
216 log() 358.0-387.5 3359.2-3390.3
217 exp() 619.3 4046.4
218
219
220These figures are now somewhat out-of-date. The emulator has become
221progressively slower for most functions as more of the 80486 features
222have been implemented.
223
224
225----------------------- Accuracy of wm-FPU-emu -----------------------
226
227
228The accuracy of the emulator is in almost all cases equal to or better
229than that of an Intel 80486 FPU.
230
231The results of the basic arithmetic functions (+,-,*,/), and fsqrt
232match those of an 80486 FPU. They are the best possible; the error for
233these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
234return exact results; they have no error.
235
236
237The following table compares the emulator accuracy for the sqrt(),
238trig and log functions against the Turbo C "emulator". For this table,
239each function was tested at about 400 points. Ideal worst-case results
240would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
241arguments greater than pi/4 can be thought of as being related to the
242precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
243accurate to 64 bits can result in a relative accuracy in cos() of
244about 64 + log2(cos(x)) = 31 bits.
245
246
247Function Tested x range Worst result Turbo C
248 (relative bits)
249
250sqrt(x) 1 .. 2 64.1 63.2
251atan(x) 1e-10 .. 200 64.2 62.8
252cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4
253 64.1 (x = pi/2-(1e-10)) 31.9
254sin(x) 1e-10 .. pi/2 64.0 62.8
255tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1
256 64.1 (x = pi/2-(1e-10)) 31.9
257exp(x) 0 .. 1 63.1 ** 62.9
258log(x) 1+1e-6 .. 2 63.8 ** 62.1
259
260** The accuracy for exp() and log() is low because the FPU (emulator)
261does not compute them directly; two operations are required.
262
263
264The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
265later) for 'float' variables (24 bit precision numbers) when precision
266control is set to 24, 53 or 64 bits, and for 'double' variables (53
267bit precision numbers) when precision control is set to 53 bits (a
268properly performing FPU cannot pass the 'paranoia' tests for 'double'
269variables when precision control is set to 64 bits).
270
271The code for reducing the argument for the trig functions (fsin, fcos,
272fptan and fsincos) has been improved and now effectively uses a value
273for pi which is accurate to more than 128 bits precision. As a
274consequence, the accuracy of these functions for large arguments has
275been dramatically improved (and is now very much better than an 80486
276FPU). There is also now no degradation of accuracy for fcos and fptan
277for operands close to pi/2. Measured results are (note that the
278definition of accuracy has changed slightly from that used for the
279above table):
280
281Function Tested x range Worst result
282 (absolute bits)
283
284cos(x) 0 .. 9.22e+18 62.0
285sin(x) 1e-16 .. 9.22e+18 62.1
286tan(x) 1e-16 .. 9.22e+18 61.8
287
288It is possible with some effort to find very large arguments which
289give much degraded precision. For example, the integer number
290 8227740058411162616.0
291is within about 10e-7 of a multiple of pi. To find the tan (for
292example) of this number to 64 bits precision it would be necessary to
293have a value of pi which had about 150 bits precision. The FPU
294emulator computes the result to about 42.6 bits precision (the correct
295result is about -9.739715e-8). On the other hand, an 80486 FPU returns
2960.01059, which in relative terms is hopelessly inaccurate.
297
298For arguments close to critical angles (which occur at multiples of
299pi/2) the emulator is more accurate than an 80486 FPU. For very large
300arguments, the emulator is far more accurate.
301
302
303Prior to version 1.20 of the emulator, the accuracy of the results for
304the transcendental functions (in their principal range) was not as
305good as the results from an 80486 FPU. From version 1.20, the accuracy
306has been considerably improved and these functions now give measured
307worst-case results which are better than the worst-case results given
308by an 80486 FPU.
309
310The following table gives the measured results for the emulator. The
311number of randomly selected arguments in each case is about half a
312million. The group of three columns gives the frequency of the given
313accuracy in number of times per million, thus the second of these
314columns shows that an accuracy of between 63.80 and 63.89 bits was
315found at a rate of 133 times per one million measurements for fsin.
316The results show that the fsin, fcos and fptan instructions return
317results which are in error (i.e. less accurate than the best possible
318result (which is 64 bits)) for about one per cent of all arguments
319between -pi/2 and +pi/2. The other instructions have a lower
320frequency of results which are in error. The last two columns give
321the worst accuracy which was found (in bits) and the approximate value
322of the argument which produced it.
323
324 frequency (per M)
325 ------------------- ---------------
326instr arg range # tests 63.7 63.8 63.9 worst at arg
327 bits bits bits bits
328----- ------------ ------- ---- ---- ----- ----- --------
329fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317
330fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801
331fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876
332fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q)
333fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x)
334fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x)
335f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709
336
337
338Tests performed on an 80486 FPU showed results of lower accuracy. The
339following table gives the results which were obtained with an AMD
340486DX2/66 (other tests indicate that an Intel 486DX produces
341identical results). The tests were basically the same as those used
342to measure the emulator (the values, being random, were in general not
343the same). The total number of tests for each instruction are given
344at the end of the table, in case each about 100k tests were performed.
345Another line of figures at the end of the table shows that most of the
346instructions return results which are in error for more than 10
347percent of the arguments tested.
348
349The numbers in the body of the table give the approx number of times a
350result of the given accuracy in bits (given in the left-most column)
351was obtained per one million arguments. For three of the instructions,
352two columns of results are given: * The second column for f2xm1 gives
353the number cases where the results of the first column were for a
354positive argument, this shows that this instruction gives better
355results for positive arguments than it does for negative. * In the
356cases of fcos and fptan, the first column gives the results when all
357cases where arguments greater than 1.5 were removed from the results
358given in the second column. Unlike the emulator, an 80486 FPU returns
359results of relatively poor accuracy for these instructions when the
360argument approaches pi/2. The table does not show those cases when the
361accuracy of the results were less than 62 bits, which occurs quite
362often for fsin and fptan when the argument approaches pi/2. This poor
363accuracy is discussed above in relation to the Turbo C "emulator", and
364the accuracy of the value of pi.
365
366
367bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan
36862.0 0 0 0 0 437 0 0 0 0 925
36962.1 0 0 10 0 894 0 0 0 0 1023
37062.2 14 0 0 0 1033 0 0 0 0 945
37162.3 57 0 0 0 1202 0 0 0 0 1023
37262.4 385 0 0 10 1292 0 23 0 0 1178
37362.5 1140 0 0 119 1649 0 39 0 0 1149
37462.6 2037 0 0 189 1620 0 16 0 0 1169
37562.7 5086 14 0 646 2315 10 101 35 39 1402
37662.8 8818 86 0 984 3050 59 287 131 224 2036
37762.9 11340 1355 0 2126 4153 79 605 357 321 1948
37863.0 15557 4750 0 3319 5376 246 1281 862 808 2688
37963.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302
38063.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724
38163.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236
38263.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587
38363.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262
38463.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346
38563.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209
38663.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329
38763.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601
388
389Per cent with error:
390 30.9 3.2 18.5 9.8 13.1 11.6 17.4
391Total arguments tested:
392 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675
393
394
395------------------------- Contributors -------------------------------
396
397A number of people have contributed to the development of the
398emulator, often by just reporting bugs, sometimes with suggested
399fixes, and a few kind people have provided me with access in one way
400or another to an 80486 machine. Contributors include (to those people
401who I may have forgotten, please forgive me):
402
403Linus Torvalds
404Tommy.Thorn@daimi.aau.dk
405Andrew.Tridgell@anu.edu.au
406Nick Holloway, alfie@dcs.warwick.ac.uk
407Hermano Moura, moura@dcs.gla.ac.uk
408Jon Jagger, J.Jagger@scp.ac.uk
409Lennart Benschop
410Brian Gallew, geek+@CMU.EDU
411Thomas Staniszewski, ts3v+@andrew.cmu.edu
412Martin Howell, mph@plasma.apana.org.au
413M Saggaf, alsaggaf@athena.mit.edu
414Peter Barker, PETER@socpsy.sci.fau.edu
415tom@vlsivie.tuwien.ac.at
416Dan Russel, russed@rpi.edu
417Daniel Carosone, danielce@ee.mu.oz.au
418cae@jpmorgan.com
419Hamish Coleman, t933093@minyos.xx.rmit.oz.au
420Bruce Evans, bde@kralizec.zeta.org.au
421Timo Korvola, Timo.Korvola@hut.fi
422Rick Lyons, rick@razorback.brisnet.org.au
423Rick, jrs@world.std.com
424
425...and numerous others who responded to my request for help with
426a real 80486.
427
diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h
new file mode 100644
index 000000000000..ae2274dbd305
--- /dev/null
+++ b/arch/i386/math-emu/control_w.h
@@ -0,0 +1,45 @@
1/*---------------------------------------------------------------------------+
2 | control_w.h |
3 | |
4 | Copyright (C) 1992,1993 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _CONTROLW_H_
11#define _CONTROLW_H_
12
13#ifdef __ASSEMBLY__
14#define _Const_(x) $##x
15#else
16#define _Const_(x) x
17#endif
18
19#define CW_RC _Const_(0x0C00) /* rounding control */
20#define CW_PC _Const_(0x0300) /* precision control */
21
22#define CW_Precision Const_(0x0020) /* loss of precision mask */
23#define CW_Underflow Const_(0x0010) /* underflow mask */
24#define CW_Overflow Const_(0x0008) /* overflow mask */
25#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */
26#define CW_Denormal Const_(0x0002) /* denormalized operand mask */
27#define CW_Invalid Const_(0x0001) /* invalid operation mask */
28
29#define CW_Exceptions _Const_(0x003f) /* all masks */
30
31#define RC_RND _Const_(0x0000)
32#define RC_DOWN _Const_(0x0400)
33#define RC_UP _Const_(0x0800)
34#define RC_CHOP _Const_(0x0C00)
35
36/* p 15-5: Precision control bits affect only the following:
37 ADD, SUB(R), MUL, DIV(R), and SQRT */
38#define PR_24_BITS _Const_(0x000)
39#define PR_53_BITS _Const_(0x200)
40#define PR_64_BITS _Const_(0x300)
41#define PR_RESERVED_BITS _Const_(0x100)
42/* FULL_PRECISION simulates all exceptions masked */
43#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f)
44
45#endif /* _CONTROLW_H_ */
diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S
new file mode 100644
index 000000000000..f77ba3058b31
--- /dev/null
+++ b/arch/i386/math-emu/div_Xsig.S
@@ -0,0 +1,365 @@
1 .file "div_Xsig.S"
2/*---------------------------------------------------------------------------+
3 | div_Xsig.S |
4 | |
5 | Division subroutine for 96 bit quantities |
6 | |
7 | Copyright (C) 1994,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
16 | put the 96 bit result at the location d. |
17 | |
18 | The result may not be accurate to 96 bits. It is intended for use where |
19 | a result better than 64 bits is required. The result should usually be |
20 | good to at least 94 bits. |
21 | The returned result is actually divided by one half. This is done to |
22 | prevent overflow. |
23 | |
24 | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
25 | |
26 | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
27 | |
28 +---------------------------------------------------------------------------*/
29
30#include "exception.h"
31#include "fpu_emu.h"
32
33
34#define XsigLL(x) (x)
35#define XsigL(x) 4(x)
36#define XsigH(x) 8(x)
37
38
39#ifndef NON_REENTRANT_FPU
40/*
41 Local storage on the stack:
42 Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
43 */
44#define FPU_accum_3 -4(%ebp)
45#define FPU_accum_2 -8(%ebp)
46#define FPU_accum_1 -12(%ebp)
47#define FPU_accum_0 -16(%ebp)
48#define FPU_result_3 -20(%ebp)
49#define FPU_result_2 -24(%ebp)
50#define FPU_result_1 -28(%ebp)
51
52#else
53.data
54/*
55 Local storage in a static area:
56 Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
57 */
58 .align 4,0
59FPU_accum_3:
60 .long 0
61FPU_accum_2:
62 .long 0
63FPU_accum_1:
64 .long 0
65FPU_accum_0:
66 .long 0
67FPU_result_3:
68 .long 0
69FPU_result_2:
70 .long 0
71FPU_result_1:
72 .long 0
73#endif /* NON_REENTRANT_FPU */
74
75
76.text
77ENTRY(div_Xsig)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83
84 pushl %esi
85 pushl %edi
86 pushl %ebx
87
88 movl PARAM1,%esi /* pointer to num */
89 movl PARAM2,%ebx /* pointer to denom */
90
91#ifdef PARANOID
92 testl $0x80000000, XsigH(%ebx) /* Divisor */
93 je L_bugged
94#endif /* PARANOID */
95
96
97/*---------------------------------------------------------------------------+
98 | Divide: Return arg1/arg2 to arg3. |
99 | |
100 | The maximum returned value is (ignoring exponents) |
101 | .ffffffff ffffffff |
102 | ------------------ = 1.ffffffff fffffffe |
103 | .80000000 00000000 |
104 | and the minimum is |
105 | .80000000 00000000 |
106 | ------------------ = .80000000 00000001 (rounded) |
107 | .ffffffff ffffffff |
108 | |
109 +---------------------------------------------------------------------------*/
110
111 /* Save extended dividend in local register */
112
113 /* Divide by 2 to prevent overflow */
114 clc
115 movl XsigH(%esi),%eax
116 rcrl %eax
117 movl %eax,FPU_accum_3
118 movl XsigL(%esi),%eax
119 rcrl %eax
120 movl %eax,FPU_accum_2
121 movl XsigLL(%esi),%eax
122 rcrl %eax
123 movl %eax,FPU_accum_1
124 movl $0,%eax
125 rcrl %eax
126 movl %eax,FPU_accum_0
127
128 movl FPU_accum_2,%eax /* Get the current num */
129 movl FPU_accum_3,%edx
130
131/*----------------------------------------------------------------------*/
132/* Initialization done.
133 Do the first 32 bits. */
134
135 /* We will divide by a number which is too large */
136 movl XsigH(%ebx),%ecx
137 addl $1,%ecx
138 jnc LFirst_div_not_1
139
140 /* here we need to divide by 100000000h,
141 i.e., no division at all.. */
142 mov %edx,%eax
143 jmp LFirst_div_done
144
145LFirst_div_not_1:
146 divl %ecx /* Divide the numerator by the augmented
147 denom ms dw */
148
149LFirst_div_done:
150 movl %eax,FPU_result_3 /* Put the result in the answer */
151
152 mull XsigH(%ebx) /* mul by the ms dw of the denom */
153
154 subl %eax,FPU_accum_2 /* Subtract from the num local reg */
155 sbbl %edx,FPU_accum_3
156
157 movl FPU_result_3,%eax /* Get the result back */
158 mull XsigL(%ebx) /* now mul the ls dw of the denom */
159
160 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
161 sbbl %edx,FPU_accum_2
162 sbbl $0,FPU_accum_3
163 je LDo_2nd_32_bits /* Must check for non-zero result here */
164
165#ifdef PARANOID
166 jb L_bugged_1
167#endif /* PARANOID */
168
169 /* need to subtract another once of the denom */
170 incl FPU_result_3 /* Correct the answer */
171
172 movl XsigL(%ebx),%eax
173 movl XsigH(%ebx),%edx
174 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
175 sbbl %edx,FPU_accum_2
176
177#ifdef PARANOID
178 sbbl $0,FPU_accum_3
179 jne L_bugged_1 /* Must check for non-zero result here */
180#endif /* PARANOID */
181
182/*----------------------------------------------------------------------*/
183/* Half of the main problem is done, there is just a reduced numerator
184 to handle now.
185 Work with the second 32 bits, FPU_accum_0 not used from now on */
186LDo_2nd_32_bits:
187 movl FPU_accum_2,%edx /* get the reduced num */
188 movl FPU_accum_1,%eax
189
190 /* need to check for possible subsequent overflow */
191 cmpl XsigH(%ebx),%edx
192 jb LDo_2nd_div
193 ja LPrevent_2nd_overflow
194
195 cmpl XsigL(%ebx),%eax
196 jb LDo_2nd_div
197
198LPrevent_2nd_overflow:
199/* The numerator is greater or equal, would cause overflow */
200 /* prevent overflow */
201 subl XsigL(%ebx),%eax
202 sbbl XsigH(%ebx),%edx
203 movl %edx,FPU_accum_2
204 movl %eax,FPU_accum_1
205
206 incl FPU_result_3 /* Reflect the subtraction in the answer */
207
208#ifdef PARANOID
209 je L_bugged_2 /* Can't bump the result to 1.0 */
210#endif /* PARANOID */
211
212LDo_2nd_div:
213 cmpl $0,%ecx /* augmented denom msw */
214 jnz LSecond_div_not_1
215
216 /* %ecx == 0, we are dividing by 1.0 */
217 mov %edx,%eax
218 jmp LSecond_div_done
219
220LSecond_div_not_1:
221 divl %ecx /* Divide the numerator by the denom ms dw */
222
223LSecond_div_done:
224 movl %eax,FPU_result_2 /* Put the result in the answer */
225
226 mull XsigH(%ebx) /* mul by the ms dw of the denom */
227
228 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
229 sbbl %edx,FPU_accum_2
230
231#ifdef PARANOID
232 jc L_bugged_2
233#endif /* PARANOID */
234
235 movl FPU_result_2,%eax /* Get the result back */
236 mull XsigL(%ebx) /* now mul the ls dw of the denom */
237
238 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
239 sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
240 sbbl $0,FPU_accum_2
241
242#ifdef PARANOID
243 jc L_bugged_2
244#endif /* PARANOID */
245
246 jz LDo_3rd_32_bits
247
248#ifdef PARANOID
249 cmpl $1,FPU_accum_2
250 jne L_bugged_2
251#endif /* PARANOID */
252
253 /* need to subtract another once of the denom */
254 movl XsigL(%ebx),%eax
255 movl XsigH(%ebx),%edx
256 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
257 sbbl %edx,FPU_accum_1
258 sbbl $0,FPU_accum_2
259
260#ifdef PARANOID
261 jc L_bugged_2
262 jne L_bugged_2
263#endif /* PARANOID */
264
265 addl $1,FPU_result_2 /* Correct the answer */
266 adcl $0,FPU_result_3
267
268#ifdef PARANOID
269 jc L_bugged_2 /* Must check for non-zero result here */
270#endif /* PARANOID */
271
272/*----------------------------------------------------------------------*/
273/* The division is essentially finished here, we just need to perform
274 tidying operations.
275 Deal with the 3rd 32 bits */
276LDo_3rd_32_bits:
277 /* We use an approximation for the third 32 bits.
278 To take account of the 3rd 32 bits of the divisor
279 (call them del), we subtract del * (a/b) */
280
281 movl FPU_result_3,%eax /* a/b */
282 mull XsigLL(%ebx) /* del */
283
284 subl %edx,FPU_accum_1
285
286 /* A borrow indicates that the result is negative */
287 jnb LTest_over
288
289 movl XsigH(%ebx),%edx
290 addl %edx,FPU_accum_1
291
292 subl $1,FPU_result_2 /* Adjust the answer */
293 sbbl $0,FPU_result_3
294
295 /* The above addition might not have been enough, check again. */
296 movl FPU_accum_1,%edx /* get the reduced num */
297 cmpl XsigH(%ebx),%edx /* denom */
298 jb LDo_3rd_div
299
300 movl XsigH(%ebx),%edx
301 addl %edx,FPU_accum_1
302
303 subl $1,FPU_result_2 /* Adjust the answer */
304 sbbl $0,FPU_result_3
305 jmp LDo_3rd_div
306
307LTest_over:
308 movl FPU_accum_1,%edx /* get the reduced num */
309
310 /* need to check for possible subsequent overflow */
311 cmpl XsigH(%ebx),%edx /* denom */
312 jb LDo_3rd_div
313
314 /* prevent overflow */
315 subl XsigH(%ebx),%edx
316 movl %edx,FPU_accum_1
317
318 addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
319 adcl $0,FPU_result_3
320
321LDo_3rd_div:
322 movl FPU_accum_0,%eax
323 movl FPU_accum_1,%edx
324 divl XsigH(%ebx)
325
326 movl %eax,FPU_result_1 /* Rough estimate of third word */
327
328 movl PARAM3,%esi /* pointer to answer */
329
330 movl FPU_result_1,%eax
331 movl %eax,XsigLL(%esi)
332 movl FPU_result_2,%eax
333 movl %eax,XsigL(%esi)
334 movl FPU_result_3,%eax
335 movl %eax,XsigH(%esi)
336
337L_exit:
338 popl %ebx
339 popl %edi
340 popl %esi
341
342 leave
343 ret
344
345
346#ifdef PARANOID
347/* The logic is wrong if we got here */
348L_bugged:
349 pushl EX_INTERNAL|0x240
350 call EXCEPTION
351 pop %ebx
352 jmp L_exit
353
354L_bugged_1:
355 pushl EX_INTERNAL|0x241
356 call EXCEPTION
357 pop %ebx
358 jmp L_exit
359
360L_bugged_2:
361 pushl EX_INTERNAL|0x242
362 call EXCEPTION
363 pop %ebx
364 jmp L_exit
365#endif /* PARANOID */
diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S
new file mode 100644
index 000000000000..47099628fa4c
--- /dev/null
+++ b/arch/i386/math-emu/div_small.S
@@ -0,0 +1,47 @@
1 .file "div_small.S"
2/*---------------------------------------------------------------------------+
3 | div_small.S |
4 | |
5 | Divide a 64 bit integer by a 32 bit integer & return remainder. |
6 | |
7 | Copyright (C) 1992,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19
20.text
21ENTRY(FPU_div_small)
22 pushl %ebp
23 movl %esp,%ebp
24
25 pushl %esi
26
27 movl PARAM1,%esi /* pointer to num */
28 movl PARAM2,%ecx /* The denominator */
29
30 movl 4(%esi),%eax /* Get the current num msw */
31 xorl %edx,%edx
32 divl %ecx
33
34 movl %eax,4(%esi)
35
36 movl (%esi),%eax /* Get the num lsw */
37 divl %ecx
38
39 movl %eax,(%esi)
40
41 movl %edx,%eax /* Return the remainder in eax */
42
43 popl %esi
44
45 leave
46 ret
47
diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c
new file mode 100644
index 000000000000..a1b0d22f6978
--- /dev/null
+++ b/arch/i386/math-emu/errors.c
@@ -0,0 +1,739 @@
1/*---------------------------------------------------------------------------+
2 | errors.c |
3 | |
4 | The error handling functions for wm-FPU-emu |
5 | |
6 | Copyright (C) 1992,1993,1994,1996 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | Note: |
15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/
19
20#include <linux/signal.h>
21
22#include <asm/uaccess.h>
23
24#include "fpu_emu.h"
25#include "fpu_system.h"
26#include "exception.h"
27#include "status_w.h"
28#include "control_w.h"
29#include "reg_constant.h"
30#include "version.h"
31
32/* */
33#undef PRINT_MESSAGES
34/* */
35
36
37#if 0
38void Un_impl(void)
39{
40 u_char byte1, FPU_modrm;
41 unsigned long address = FPU_ORIG_EIP;
42
43 RE_ENTRANT_CHECK_OFF;
44 /* No need to check access_ok(), we have previously fetched these bytes. */
45 printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
46 if ( FPU_CS == __USER_CS )
47 {
48 while ( 1 )
49 {
50 FPU_get_user(byte1, (u_char __user *) address);
51 if ( (byte1 & 0xf8) == 0xd8 ) break;
52 printk("[%02x]", byte1);
53 address++;
54 }
55 printk("%02x ", byte1);
56 FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
57
58 if (FPU_modrm >= 0300)
59 printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
60 else
61 printk("/%d\n", (FPU_modrm >> 3) & 7);
62 }
63 else
64 {
65 printk("cs selector = %04x\n", FPU_CS);
66 }
67
68 RE_ENTRANT_CHECK_ON;
69
70 EXCEPTION(EX_Invalid);
71
72}
73#endif /* 0 */
74
75
76/*
77 Called for opcodes which are illegal and which are known to result in a
78 SIGILL with a real 80486.
79 */
80void FPU_illegal(void)
81{
82 math_abort(FPU_info,SIGILL);
83}
84
85
86
87void FPU_printall(void)
88{
89 int i;
90 static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
91 "DeNorm", "Inf", "NaN" };
92 u_char byte1, FPU_modrm;
93 unsigned long address = FPU_ORIG_EIP;
94
95 RE_ENTRANT_CHECK_OFF;
96 /* No need to check access_ok(), we have previously fetched these bytes. */
97 printk("At %p:", (void *) address);
98 if ( FPU_CS == __USER_CS )
99 {
100#define MAX_PRINTED_BYTES 20
101 for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
102 {
103 FPU_get_user(byte1, (u_char __user *) address);
104 if ( (byte1 & 0xf8) == 0xd8 )
105 {
106 printk(" %02x", byte1);
107 break;
108 }
109 printk(" [%02x]", byte1);
110 address++;
111 }
112 if ( i == MAX_PRINTED_BYTES )
113 printk(" [more..]\n");
114 else
115 {
116 FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
117
118 if (FPU_modrm >= 0300)
119 printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
120 else
121 printk(" /%d, mod=%d rm=%d\n",
122 (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
123 }
124 }
125 else
126 {
127 printk("%04x\n", FPU_CS);
128 }
129
130 partial_status = status_word();
131
132#ifdef DEBUGGING
133if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n");
134if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n");
135if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n");
136if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n");
137if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n");
138if ( partial_status & SW_Summary ) printk("SW: exception summary\n");
139if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
140if ( partial_status & SW_Precision ) printk("SW: loss of precision\n");
141if ( partial_status & SW_Underflow ) printk("SW: underflow\n");
142if ( partial_status & SW_Overflow ) printk("SW: overflow\n");
143if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n");
144if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n");
145if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n");
146#endif /* DEBUGGING */
147
148 printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
149 partial_status & 0x8000 ? 1 : 0, /* busy */
150 (partial_status & 0x3800) >> 11, /* stack top pointer */
151 partial_status & 0x80 ? 1 : 0, /* Error summary status */
152 partial_status & 0x40 ? 1 : 0, /* Stack flag */
153 partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
154 partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
155 partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
156 partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
157 partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
158
159printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n",
160 control_word & 0x1000 ? 1 : 0,
161 (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
162 (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
163 control_word & 0x80 ? 1 : 0,
164 control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
165 control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
166 control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
167
168 for ( i = 0; i < 8; i++ )
169 {
170 FPU_REG *r = &st(i);
171 u_char tagi = FPU_gettagi(i);
172 switch (tagi)
173 {
174 case TAG_Empty:
175 continue;
176 break;
177 case TAG_Zero:
178 case TAG_Special:
179 tagi = FPU_Special(r);
180 case TAG_Valid:
181 printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
182 getsign(r) ? '-' : '+',
183 (long)(r->sigh >> 16),
184 (long)(r->sigh & 0xFFFF),
185 (long)(r->sigl >> 16),
186 (long)(r->sigl & 0xFFFF),
187 exponent(r) - EXP_BIAS + 1);
188 break;
189 default:
190 printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
191 continue;
192 break;
193 }
194 printk("%s\n", tag_desc[(int) (unsigned) tagi]);
195 }
196
197 RE_ENTRANT_CHECK_ON;
198
199}
200
201static struct {
202 int type;
203 const char *name;
204} exception_names[] = {
205 { EX_StackOver, "stack overflow" },
206 { EX_StackUnder, "stack underflow" },
207 { EX_Precision, "loss of precision" },
208 { EX_Underflow, "underflow" },
209 { EX_Overflow, "overflow" },
210 { EX_ZeroDiv, "divide by zero" },
211 { EX_Denormal, "denormalized operand" },
212 { EX_Invalid, "invalid operation" },
213 { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
214 { 0, NULL }
215};
216
217/*
218 EX_INTERNAL is always given with a code which indicates where the
219 error was detected.
220
221 Internal error types:
222 0x14 in fpu_etc.c
223 0x1nn in a *.c file:
224 0x101 in reg_add_sub.c
225 0x102 in reg_mul.c
226 0x104 in poly_atan.c
227 0x105 in reg_mul.c
228 0x107 in fpu_trig.c
229 0x108 in reg_compare.c
230 0x109 in reg_compare.c
231 0x110 in reg_add_sub.c
232 0x111 in fpe_entry.c
233 0x112 in fpu_trig.c
234 0x113 in errors.c
235 0x115 in fpu_trig.c
236 0x116 in fpu_trig.c
237 0x117 in fpu_trig.c
238 0x118 in fpu_trig.c
239 0x119 in fpu_trig.c
240 0x120 in poly_atan.c
241 0x121 in reg_compare.c
242 0x122 in reg_compare.c
243 0x123 in reg_compare.c
244 0x125 in fpu_trig.c
245 0x126 in fpu_entry.c
246 0x127 in poly_2xm1.c
247 0x128 in fpu_entry.c
248 0x129 in fpu_entry.c
249 0x130 in get_address.c
250 0x131 in get_address.c
251 0x132 in get_address.c
252 0x133 in get_address.c
253 0x140 in load_store.c
254 0x141 in load_store.c
255 0x150 in poly_sin.c
256 0x151 in poly_sin.c
257 0x160 in reg_ld_str.c
258 0x161 in reg_ld_str.c
259 0x162 in reg_ld_str.c
260 0x163 in reg_ld_str.c
261 0x164 in reg_ld_str.c
262 0x170 in fpu_tags.c
263 0x171 in fpu_tags.c
264 0x172 in fpu_tags.c
265 0x180 in reg_convert.c
266 0x2nn in an *.S file:
267 0x201 in reg_u_add.S
268 0x202 in reg_u_div.S
269 0x203 in reg_u_div.S
270 0x204 in reg_u_div.S
271 0x205 in reg_u_mul.S
272 0x206 in reg_u_sub.S
273 0x207 in wm_sqrt.S
274 0x208 in reg_div.S
275 0x209 in reg_u_sub.S
276 0x210 in reg_u_sub.S
277 0x211 in reg_u_sub.S
278 0x212 in reg_u_sub.S
279 0x213 in wm_sqrt.S
280 0x214 in wm_sqrt.S
281 0x215 in wm_sqrt.S
282 0x220 in reg_norm.S
283 0x221 in reg_norm.S
284 0x230 in reg_round.S
285 0x231 in reg_round.S
286 0x232 in reg_round.S
287 0x233 in reg_round.S
288 0x234 in reg_round.S
289 0x235 in reg_round.S
290 0x236 in reg_round.S
291 0x240 in div_Xsig.S
292 0x241 in div_Xsig.S
293 0x242 in div_Xsig.S
294 */
295
296asmlinkage void FPU_exception(int n)
297{
298 int i, int_type;
299
300 int_type = 0; /* Needed only to stop compiler warnings */
301 if ( n & EX_INTERNAL )
302 {
303 int_type = n - EX_INTERNAL;
304 n = EX_INTERNAL;
305 /* Set lots of exception bits! */
306 partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
307 }
308 else
309 {
310 /* Extract only the bits which we use to set the status word */
311 n &= (SW_Exc_Mask);
312 /* Set the corresponding exception bit */
313 partial_status |= n;
314 /* Set summary bits iff exception isn't masked */
315 if ( partial_status & ~control_word & CW_Exceptions )
316 partial_status |= (SW_Summary | SW_Backward);
317 if ( n & (SW_Stack_Fault | EX_Precision) )
318 {
319 if ( !(n & SW_C1) )
320 /* This bit distinguishes over- from underflow for a stack fault,
321 and roundup from round-down for precision loss. */
322 partial_status &= ~SW_C1;
323 }
324 }
325
326 RE_ENTRANT_CHECK_OFF;
327 if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
328 {
329#ifdef PRINT_MESSAGES
330 /* My message from the sponsor */
331 printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
332#endif /* PRINT_MESSAGES */
333
334 /* Get a name string for error reporting */
335 for (i=0; exception_names[i].type; i++)
336 if ( (exception_names[i].type & n) == exception_names[i].type )
337 break;
338
339 if (exception_names[i].type)
340 {
341#ifdef PRINT_MESSAGES
342 printk("FP Exception: %s!\n", exception_names[i].name);
343#endif /* PRINT_MESSAGES */
344 }
345 else
346 printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
347
348 if ( n == EX_INTERNAL )
349 {
350 printk("FPU emulator: Internal error type 0x%04x\n", int_type);
351 FPU_printall();
352 }
353#ifdef PRINT_MESSAGES
354 else
355 FPU_printall();
356#endif /* PRINT_MESSAGES */
357
358 /*
359 * The 80486 generates an interrupt on the next non-control FPU
360 * instruction. So we need some means of flagging it.
361 * We use the ES (Error Summary) bit for this.
362 */
363 }
364 RE_ENTRANT_CHECK_ON;
365
366#ifdef __DEBUG__
367 math_abort(FPU_info,SIGFPE);
368#endif /* __DEBUG__ */
369
370}
371
372
373/* Real operation attempted on a NaN. */
374/* Returns < 0 if the exception is unmasked */
375int real_1op_NaN(FPU_REG *a)
376{
377 int signalling, isNaN;
378
379 isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
380
381 /* The default result for the case of two "equal" NaNs (signs may
382 differ) is chosen to reproduce 80486 behaviour */
383 signalling = isNaN && !(a->sigh & 0x40000000);
384
385 if ( !signalling )
386 {
387 if ( !isNaN ) /* pseudo-NaN, or other unsupported? */
388 {
389 if ( control_word & CW_Invalid )
390 {
391 /* Masked response */
392 reg_copy(&CONST_QNaN, a);
393 }
394 EXCEPTION(EX_Invalid);
395 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
396 }
397 return TAG_Special;
398 }
399
400 if ( control_word & CW_Invalid )
401 {
402 /* The masked response */
403 if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */
404 {
405 reg_copy(&CONST_QNaN, a);
406 }
407 /* ensure a Quiet NaN */
408 a->sigh |= 0x40000000;
409 }
410
411 EXCEPTION(EX_Invalid);
412
413 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
414}
415
416
417/* Real operation attempted on two operands, one a NaN. */
418/* Returns < 0 if the exception is unmasked */
419int real_2op_NaN(FPU_REG const *b, u_char tagb,
420 int deststnr,
421 FPU_REG const *defaultNaN)
422{
423 FPU_REG *dest = &st(deststnr);
424 FPU_REG const *a = dest;
425 u_char taga = FPU_gettagi(deststnr);
426 FPU_REG const *x;
427 int signalling, unsupported;
428
429 if ( taga == TAG_Special )
430 taga = FPU_Special(a);
431 if ( tagb == TAG_Special )
432 tagb = FPU_Special(b);
433
434 /* TW_NaN is also used for unsupported data types. */
435 unsupported = ((taga == TW_NaN)
436 && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
437 || ((tagb == TW_NaN)
438 && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
439 if ( unsupported )
440 {
441 if ( control_word & CW_Invalid )
442 {
443 /* Masked response */
444 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
445 }
446 EXCEPTION(EX_Invalid);
447 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
448 }
449
450 if (taga == TW_NaN)
451 {
452 x = a;
453 if (tagb == TW_NaN)
454 {
455 signalling = !(a->sigh & b->sigh & 0x40000000);
456 if ( significand(b) > significand(a) )
457 x = b;
458 else if ( significand(b) == significand(a) )
459 {
460 /* The default result for the case of two "equal" NaNs (signs may
461 differ) is chosen to reproduce 80486 behaviour */
462 x = defaultNaN;
463 }
464 }
465 else
466 {
467 /* return the quiet version of the NaN in a */
468 signalling = !(a->sigh & 0x40000000);
469 }
470 }
471 else
472#ifdef PARANOID
473 if (tagb == TW_NaN)
474#endif /* PARANOID */
475 {
476 signalling = !(b->sigh & 0x40000000);
477 x = b;
478 }
479#ifdef PARANOID
480 else
481 {
482 signalling = 0;
483 EXCEPTION(EX_INTERNAL|0x113);
484 x = &CONST_QNaN;
485 }
486#endif /* PARANOID */
487
488 if ( (!signalling) || (control_word & CW_Invalid) )
489 {
490 if ( ! x )
491 x = b;
492
493 if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */
494 x = &CONST_QNaN;
495
496 FPU_copy_to_regi(x, TAG_Special, deststnr);
497
498 if ( !signalling )
499 return TAG_Special;
500
501 /* ensure a Quiet NaN */
502 dest->sigh |= 0x40000000;
503 }
504
505 EXCEPTION(EX_Invalid);
506
507 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
508}
509
510
511/* Invalid arith operation on Valid registers */
512/* Returns < 0 if the exception is unmasked */
513asmlinkage int arith_invalid(int deststnr)
514{
515
516 EXCEPTION(EX_Invalid);
517
518 if ( control_word & CW_Invalid )
519 {
520 /* The masked response */
521 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
522 }
523
524 return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
525
526}
527
528
529/* Divide a finite number by zero */
530asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
531{
532 FPU_REG *dest = &st(deststnr);
533 int tag = TAG_Valid;
534
535 if ( control_word & CW_ZeroDiv )
536 {
537 /* The masked response */
538 FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
539 setsign(dest, sign);
540 tag = TAG_Special;
541 }
542
543 EXCEPTION(EX_ZeroDiv);
544
545 return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
546
547}
548
549
550/* This may be called often, so keep it lean */
551int set_precision_flag(int flags)
552{
553 if ( control_word & CW_Precision )
554 {
555 partial_status &= ~(SW_C1 & flags);
556 partial_status |= flags; /* The masked response */
557 return 0;
558 }
559 else
560 {
561 EXCEPTION(flags);
562 return 1;
563 }
564}
565
566
567/* This may be called often, so keep it lean */
568asmlinkage void set_precision_flag_up(void)
569{
570 if ( control_word & CW_Precision )
571 partial_status |= (SW_Precision | SW_C1); /* The masked response */
572 else
573 EXCEPTION(EX_Precision | SW_C1);
574}
575
576
577/* This may be called often, so keep it lean */
578asmlinkage void set_precision_flag_down(void)
579{
580 if ( control_word & CW_Precision )
581 { /* The masked response */
582 partial_status &= ~SW_C1;
583 partial_status |= SW_Precision;
584 }
585 else
586 EXCEPTION(EX_Precision);
587}
588
589
590asmlinkage int denormal_operand(void)
591{
592 if ( control_word & CW_Denormal )
593 { /* The masked response */
594 partial_status |= SW_Denorm_Op;
595 return TAG_Special;
596 }
597 else
598 {
599 EXCEPTION(EX_Denormal);
600 return TAG_Special | FPU_Exception;
601 }
602}
603
604
605asmlinkage int arith_overflow(FPU_REG *dest)
606{
607 int tag = TAG_Valid;
608
609 if ( control_word & CW_Overflow )
610 {
611 /* The masked response */
612/* ###### The response here depends upon the rounding mode */
613 reg_copy(&CONST_INF, dest);
614 tag = TAG_Special;
615 }
616 else
617 {
618 /* Subtract the magic number from the exponent */
619 addexponent(dest, (-3 * (1 << 13)));
620 }
621
622 EXCEPTION(EX_Overflow);
623 if ( control_word & CW_Overflow )
624 {
625 /* The overflow exception is masked. */
626 /* By definition, precision is lost.
627 The roundup bit (C1) is also set because we have
628 "rounded" upwards to Infinity. */
629 EXCEPTION(EX_Precision | SW_C1);
630 return tag;
631 }
632
633 return tag;
634
635}
636
637
638asmlinkage int arith_underflow(FPU_REG *dest)
639{
640 int tag = TAG_Valid;
641
642 if ( control_word & CW_Underflow )
643 {
644 /* The masked response */
645 if ( exponent16(dest) <= EXP_UNDER - 63 )
646 {
647 reg_copy(&CONST_Z, dest);
648 partial_status &= ~SW_C1; /* Round down. */
649 tag = TAG_Zero;
650 }
651 else
652 {
653 stdexp(dest);
654 }
655 }
656 else
657 {
658 /* Add the magic number to the exponent. */
659 addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
660 }
661
662 EXCEPTION(EX_Underflow);
663 if ( control_word & CW_Underflow )
664 {
665 /* The underflow exception is masked. */
666 EXCEPTION(EX_Precision);
667 return tag;
668 }
669
670 return tag;
671
672}
673
674
675void FPU_stack_overflow(void)
676{
677
678 if ( control_word & CW_Invalid )
679 {
680 /* The masked response */
681 top--;
682 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
683 }
684
685 EXCEPTION(EX_StackOver);
686
687 return;
688
689}
690
691
692void FPU_stack_underflow(void)
693{
694
695 if ( control_word & CW_Invalid )
696 {
697 /* The masked response */
698 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
699 }
700
701 EXCEPTION(EX_StackUnder);
702
703 return;
704
705}
706
707
708void FPU_stack_underflow_i(int i)
709{
710
711 if ( control_word & CW_Invalid )
712 {
713 /* The masked response */
714 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
715 }
716
717 EXCEPTION(EX_StackUnder);
718
719 return;
720
721}
722
723
724void FPU_stack_underflow_pop(int i)
725{
726
727 if ( control_word & CW_Invalid )
728 {
729 /* The masked response */
730 FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
731 FPU_pop();
732 }
733
734 EXCEPTION(EX_StackUnder);
735
736 return;
737
738}
739
diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h
new file mode 100644
index 000000000000..b463f21a811e
--- /dev/null
+++ b/arch/i386/math-emu/exception.h
@@ -0,0 +1,53 @@
1/*---------------------------------------------------------------------------+
2 | exception.h |
3 | |
4 | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
5 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
6 | |
7 +---------------------------------------------------------------------------*/
8
9#ifndef _EXCEPTION_H_
10#define _EXCEPTION_H_
11
12
13#ifdef __ASSEMBLY__
14#define Const_(x) $##x
15#else
16#define Const_(x) x
17#endif
18
19#ifndef SW_C1
20#include "fpu_emu.h"
21#endif /* SW_C1 */
22
23#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */
24#define EX_ErrorSummary Const_(0x0080) /* Error summary status */
25/* Special exceptions: */
26#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */
27#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */
28#define EX_StackUnder Const_(0x0041) /* stack underflow */
29/* Exception flags: */
30#define EX_Precision Const_(0x0020) /* loss of precision */
31#define EX_Underflow Const_(0x0010) /* underflow */
32#define EX_Overflow Const_(0x0008) /* overflow */
33#define EX_ZeroDiv Const_(0x0004) /* divide by zero */
34#define EX_Denormal Const_(0x0002) /* denormalized operand */
35#define EX_Invalid Const_(0x0001) /* invalid operation */
36
37
38#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1))
39#define PRECISION_LOST_DOWN Const_(EX_Precision)
40
41
42#ifndef __ASSEMBLY__
43
44#ifdef DEBUG
45#define EXCEPTION(x) { printk("exception in %s at line %d\n", \
46 __FILE__, __LINE__); FPU_exception(x); }
47#else
48#define EXCEPTION(x) FPU_exception(x)
49#endif
50
51#endif /* __ASSEMBLY__ */
52
53#endif /* _EXCEPTION_H_ */
diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c
new file mode 100644
index 000000000000..6972dec01af6
--- /dev/null
+++ b/arch/i386/math-emu/fpu_arith.c
@@ -0,0 +1,174 @@
1/*---------------------------------------------------------------------------+
2 | fpu_arith.c |
3 | |
4 | Code to implement the FPU register/register arithmetic instructions |
5 | |
6 | Copyright (C) 1992,1993,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "fpu_emu.h"
15#include "control_w.h"
16#include "status_w.h"
17
18
19void fadd__(void)
20{
21 /* fadd st,st(i) */
22 int i = FPU_rm;
23 clear_C1();
24 FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
25}
26
27
28void fmul__(void)
29{
30 /* fmul st,st(i) */
31 int i = FPU_rm;
32 clear_C1();
33 FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
34}
35
36
37
38void fsub__(void)
39{
40 /* fsub st,st(i) */
41 clear_C1();
42 FPU_sub(0, FPU_rm, control_word);
43}
44
45
46void fsubr_(void)
47{
48 /* fsubr st,st(i) */
49 clear_C1();
50 FPU_sub(REV, FPU_rm, control_word);
51}
52
53
54void fdiv__(void)
55{
56 /* fdiv st,st(i) */
57 clear_C1();
58 FPU_div(0, FPU_rm, control_word);
59}
60
61
62void fdivr_(void)
63{
64 /* fdivr st,st(i) */
65 clear_C1();
66 FPU_div(REV, FPU_rm, control_word);
67}
68
69
70
71void fadd_i(void)
72{
73 /* fadd st(i),st */
74 int i = FPU_rm;
75 clear_C1();
76 FPU_add(&st(i), FPU_gettagi(i), i, control_word);
77}
78
79
80void fmul_i(void)
81{
82 /* fmul st(i),st */
83 clear_C1();
84 FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
85}
86
87
88void fsubri(void)
89{
90 /* fsubr st(i),st */
91 clear_C1();
92 FPU_sub(DEST_RM, FPU_rm, control_word);
93}
94
95
96void fsub_i(void)
97{
98 /* fsub st(i),st */
99 clear_C1();
100 FPU_sub(REV|DEST_RM, FPU_rm, control_word);
101}
102
103
104void fdivri(void)
105{
106 /* fdivr st(i),st */
107 clear_C1();
108 FPU_div(DEST_RM, FPU_rm, control_word);
109}
110
111
112void fdiv_i(void)
113{
114 /* fdiv st(i),st */
115 clear_C1();
116 FPU_div(REV|DEST_RM, FPU_rm, control_word);
117}
118
119
120
121void faddp_(void)
122{
123 /* faddp st(i),st */
124 int i = FPU_rm;
125 clear_C1();
126 if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
127 FPU_pop();
128}
129
130
131void fmulp_(void)
132{
133 /* fmulp st(i),st */
134 clear_C1();
135 if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
136 FPU_pop();
137}
138
139
140
141void fsubrp(void)
142{
143 /* fsubrp st(i),st */
144 clear_C1();
145 if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
146 FPU_pop();
147}
148
149
150void fsubp_(void)
151{
152 /* fsubp st(i),st */
153 clear_C1();
154 if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
155 FPU_pop();
156}
157
158
159void fdivrp(void)
160{
161 /* fdivrp st(i),st */
162 clear_C1();
163 if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
164 FPU_pop();
165}
166
167
168void fdivp_(void)
169{
170 /* fdivp st(i),st */
171 clear_C1();
172 if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
173 FPU_pop();
174}
diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h
new file mode 100644
index 000000000000..9ba12416df12
--- /dev/null
+++ b/arch/i386/math-emu/fpu_asm.h
@@ -0,0 +1,32 @@
1/*---------------------------------------------------------------------------+
2 | fpu_asm.h |
3 | |
4 | Copyright (C) 1992,1995,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _FPU_ASM_H_
11#define _FPU_ASM_H_
12
13#include <linux/linkage.h>
14
15#define EXCEPTION FPU_exception
16
17
18#define PARAM1 8(%ebp)
19#define PARAM2 12(%ebp)
20#define PARAM3 16(%ebp)
21#define PARAM4 20(%ebp)
22#define PARAM5 24(%ebp)
23#define PARAM6 28(%ebp)
24#define PARAM7 32(%ebp)
25
26#define SIGL_OFFSET 0
27#define EXP(x) 8(x)
28#define SIG(x) SIGL_OFFSET##(x)
29#define SIGL(x) SIGL_OFFSET##(x)
30#define SIGH(x) 4(x)
31
32#endif /* _FPU_ASM_H_ */
diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c
new file mode 100644
index 000000000000..20886cfb9f76
--- /dev/null
+++ b/arch/i386/math-emu/fpu_aux.c
@@ -0,0 +1,204 @@
1/*---------------------------------------------------------------------------+
2 | fpu_aux.c |
3 | |
4 | Code to implement some of the FPU auxiliary instructions. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "exception.h"
15#include "fpu_emu.h"
16#include "status_w.h"
17#include "control_w.h"
18
19
20static void fnop(void)
21{
22}
23
24static void fclex(void)
25{
26 partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
27 SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
28 SW_Invalid);
29 no_ip_update = 1;
30}
31
32/* Needs to be externally visible */
33void finit(void)
34{
35 control_word = 0x037f;
36 partial_status = 0;
37 top = 0; /* We don't keep top in the status word internally. */
38 fpu_tag_word = 0xffff;
39 /* The behaviour is different from that detailed in
40 Section 15.1.6 of the Intel manual */
41 operand_address.offset = 0;
42 operand_address.selector = 0;
43 instruction_address.offset = 0;
44 instruction_address.selector = 0;
45 instruction_address.opcode = 0;
46 no_ip_update = 1;
47}
48
49/*
50 * These are nops on the i387..
51 */
52#define feni fnop
53#define fdisi fnop
54#define fsetpm fnop
55
56static FUNC const finit_table[] = {
57 feni, fdisi, fclex, finit,
58 fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
59};
60
61void finit_(void)
62{
63 (finit_table[FPU_rm])();
64}
65
66
67static void fstsw_ax(void)
68{
69 *(short *) &FPU_EAX = status_word();
70 no_ip_update = 1;
71}
72
73static FUNC const fstsw_table[] = {
74 fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
75 FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
76};
77
78void fstsw_(void)
79{
80 (fstsw_table[FPU_rm])();
81}
82
83
84static FUNC const fp_nop_table[] = {
85 fnop, FPU_illegal, FPU_illegal, FPU_illegal,
86 FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
87};
88
89void fp_nop(void)
90{
91 (fp_nop_table[FPU_rm])();
92}
93
94
95void fld_i_(void)
96{
97 FPU_REG *st_new_ptr;
98 int i;
99 u_char tag;
100
101 if ( STACK_OVERFLOW )
102 { FPU_stack_overflow(); return; }
103
104 /* fld st(i) */
105 i = FPU_rm;
106 if ( NOT_EMPTY(i) )
107 {
108 reg_copy(&st(i), st_new_ptr);
109 tag = FPU_gettagi(i);
110 push();
111 FPU_settag0(tag);
112 }
113 else
114 {
115 if ( control_word & CW_Invalid )
116 {
117 /* The masked response */
118 FPU_stack_underflow();
119 }
120 else
121 EXCEPTION(EX_StackUnder);
122 }
123
124}
125
126
127void fxch_i(void)
128{
129 /* fxch st(i) */
130 FPU_REG t;
131 int i = FPU_rm;
132 FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
133 long tag_word = fpu_tag_word;
134 int regnr = top & 7, regnri = ((regnr + i) & 7);
135 u_char st0_tag = (tag_word >> (regnr*2)) & 3;
136 u_char sti_tag = (tag_word >> (regnri*2)) & 3;
137
138 if ( st0_tag == TAG_Empty )
139 {
140 if ( sti_tag == TAG_Empty )
141 {
142 FPU_stack_underflow();
143 FPU_stack_underflow_i(i);
144 return;
145 }
146 if ( control_word & CW_Invalid )
147 {
148 /* Masked response */
149 FPU_copy_to_reg0(sti_ptr, sti_tag);
150 }
151 FPU_stack_underflow_i(i);
152 return;
153 }
154 if ( sti_tag == TAG_Empty )
155 {
156 if ( control_word & CW_Invalid )
157 {
158 /* Masked response */
159 FPU_copy_to_regi(st0_ptr, st0_tag, i);
160 }
161 FPU_stack_underflow();
162 return;
163 }
164 clear_C1();
165
166 reg_copy(st0_ptr, &t);
167 reg_copy(sti_ptr, st0_ptr);
168 reg_copy(&t, sti_ptr);
169
170 tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
171 tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
172 fpu_tag_word = tag_word;
173}
174
175
176void ffree_(void)
177{
178 /* ffree st(i) */
179 FPU_settagi(FPU_rm, TAG_Empty);
180}
181
182
183void ffreep(void)
184{
185 /* ffree st(i) + pop - unofficial code */
186 FPU_settagi(FPU_rm, TAG_Empty);
187 FPU_pop();
188}
189
190
191void fst_i_(void)
192{
193 /* fst st(i) */
194 FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
195}
196
197
198void fstp_i(void)
199{
200 /* fstp st(i) */
201 FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
202 FPU_pop();
203}
204
diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h
new file mode 100644
index 000000000000..d62b20a3e660
--- /dev/null
+++ b/arch/i386/math-emu/fpu_emu.h
@@ -0,0 +1,217 @@
1/*---------------------------------------------------------------------------+
2 | fpu_emu.h |
3 | |
4 | Copyright (C) 1992,1993,1994,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 +---------------------------------------------------------------------------*/
9
10
11#ifndef _FPU_EMU_H_
12#define _FPU_EMU_H_
13
14/*
15 * Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
16 * rather than behaviour which appears to be cleaner.
17 * This is a matter of opinion: for all I know, the 80486 may simply
18 * be complying with the IEEE spec. Maybe one day I'll get to see the
19 * spec...
20 */
21#define PECULIAR_486
22
23#ifdef __ASSEMBLY__
24#include "fpu_asm.h"
25#define Const(x) $##x
26#else
27#define Const(x) x
28#endif
29
30#define EXP_BIAS Const(0)
31#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */
32#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */
33#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but
34 still a 16 bit nr. */
35#define EXP_Infinity EXP_OVER
36#define EXP_NaN EXP_OVER
37
38#define EXTENDED_Ebias Const(0x3fff)
39#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
40
41#define SIGN_POS Const(0)
42#define SIGN_NEG Const(0x80)
43
44#define SIGN_Positive Const(0)
45#define SIGN_Negative Const(0x8000)
46
47
48/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
49/* The following fold to 2 (Special) in the Tag Word */
50#define TW_Denormal Const(4) /* De-normal */
51#define TW_Infinity Const(5) /* + or - infinity */
52#define TW_NaN Const(6) /* Not a Number */
53#define TW_Unsupported Const(7) /* Not supported by an 80486 */
54
55#define TAG_Valid Const(0) /* valid */
56#define TAG_Zero Const(1) /* zero */
57#define TAG_Special Const(2) /* De-normal, + or - infinity,
58 or Not a Number */
59#define TAG_Empty Const(3) /* empty */
60
61#define LOADED_DATA Const(10101) /* Special st() number to identify
62 loaded data (not on stack). */
63
64/* A few flags (must be >= 0x10). */
65#define REV 0x10
66#define DEST_RM 0x20
67#define LOADED 0x40
68
69#define FPU_Exception Const(0x80000000) /* Added to tag returns. */
70
71
72#ifndef __ASSEMBLY__
73
74#include "fpu_system.h"
75
76#include <asm/sigcontext.h> /* for struct _fpstate */
77#include <asm/math_emu.h>
78#include <linux/linkage.h>
79
80/*
81#define RE_ENTRANT_CHECKING
82 */
83
84#ifdef RE_ENTRANT_CHECKING
85extern u_char emulating;
86# define RE_ENTRANT_CHECK_OFF emulating = 0
87# define RE_ENTRANT_CHECK_ON emulating = 1
88#else
89# define RE_ENTRANT_CHECK_OFF
90# define RE_ENTRANT_CHECK_ON
91#endif /* RE_ENTRANT_CHECKING */
92
93#define FWAIT_OPCODE 0x9b
94#define OP_SIZE_PREFIX 0x66
95#define ADDR_SIZE_PREFIX 0x67
96#define PREFIX_CS 0x2e
97#define PREFIX_DS 0x3e
98#define PREFIX_ES 0x26
99#define PREFIX_SS 0x36
100#define PREFIX_FS 0x64
101#define PREFIX_GS 0x65
102#define PREFIX_REPE 0xf3
103#define PREFIX_REPNE 0xf2
104#define PREFIX_LOCK 0xf0
105#define PREFIX_CS_ 1
106#define PREFIX_DS_ 2
107#define PREFIX_ES_ 3
108#define PREFIX_FS_ 4
109#define PREFIX_GS_ 5
110#define PREFIX_SS_ 6
111#define PREFIX_DEFAULT 7
112
113struct address {
114 unsigned int offset;
115 unsigned int selector:16;
116 unsigned int opcode:11;
117 unsigned int empty:5;
118};
119struct fpu__reg {
120 unsigned sigl;
121 unsigned sigh;
122 short exp;
123};
124
125typedef void (*FUNC)(void);
126typedef struct fpu__reg FPU_REG;
127typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
128typedef struct { u_char address_size, operand_size, segment; }
129 overrides;
130/* This structure is 32 bits: */
131typedef struct { overrides override;
132 u_char default_mode; } fpu_addr_modes;
133/* PROTECTED has a restricted meaning in the emulator; it is used
134 to signal that the emulator needs to do special things to ensure
135 that protection is respected in a segmented model. */
136#define PROTECTED 4
137#define SIXTEEN 1 /* We rely upon this being 1 (true) */
138#define VM86 SIXTEEN
139#define PM16 (SIXTEEN | PROTECTED)
140#define SEG32 PROTECTED
141extern u_char const data_sizes_16[32];
142
143#define register_base ((u_char *) registers )
144#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) )
145#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) )
146
147#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr))
148#define NOT_EMPTY(i) (!FPU_empty_i(i))
149
150#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty)
151
152#define poppop() { FPU_pop(); FPU_pop(); }
153
154/* push() does not affect the tags */
155#define push() { top--; }
156
157#define signbyte(a) (((u_char *)(a))[9])
158#define getsign(a) (signbyte(a) & 0x80)
159#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
160#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
161 else signbyte(b) &= 0x7f; }
162#define changesign(a) { signbyte(a) ^= 0x80; }
163#define setpositive(a) { signbyte(a) &= 0x7f; }
164#define setnegative(a) { signbyte(a) |= 0x80; }
165#define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
166#define signnegative(a) (signbyte(a) & 0x80)
167
168static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
169{
170 *(short *)&(y->exp) = *(const short *)&(x->exp);
171 *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
172}
173
174#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
175#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
176 ((y) + EXTENDED_Ebias) & 0x7fff; }
177#define exponent16(x) (*(short *)&((x)->exp))
178#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); }
179#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
180#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
181
182#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER)
183
184#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
185
186
187/*----- Prototypes for functions written in assembler -----*/
188/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
189
190asmlinkage int FPU_normalize(FPU_REG *x);
191asmlinkage int FPU_normalize_nuo(FPU_REG *x);
192asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
193 FPU_REG *answ, unsigned int control_w, u_char sign,
194 int expa, int expb);
195asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
196 FPU_REG *answ, unsigned int control_w, u_char sign,
197 int expon);
198asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
199 FPU_REG *answ, unsigned int control_w, u_char sign);
200asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
201 FPU_REG *answ, unsigned int control_w, u_char sign,
202 int expa, int expb);
203asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
204 unsigned int control_w, u_char sign);
205asmlinkage unsigned FPU_shrx(void *l, unsigned x);
206asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
207asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
208asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
209 unsigned int control_w, u_char sign);
210
211#ifndef MAKING_PROTO
212#include "fpu_proto.h"
213#endif
214
215#endif /* __ASSEMBLY__ */
216
217#endif /* _FPU_EMU_H_ */
diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c
new file mode 100644
index 000000000000..d93f16ef828f
--- /dev/null
+++ b/arch/i386/math-emu/fpu_entry.c
@@ -0,0 +1,760 @@
1/*---------------------------------------------------------------------------+
2 | fpu_entry.c |
3 | |
4 | The entry functions for wm-FPU-emu |
5 | |
6 | Copyright (C) 1992,1993,1994,1996,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | See the files "README" and "COPYING" for further copyright and warranty |
11 | information. |
12 | |
13 +---------------------------------------------------------------------------*/
14
15/*---------------------------------------------------------------------------+
16 | Note: |
17 | The file contains code which accesses user memory. |
18 | Emulator static data may change when user memory is accessed, due to |
19 | other processes using the emulator while swapping is in progress. |
20 +---------------------------------------------------------------------------*/
21
22/*---------------------------------------------------------------------------+
23 | math_emulate(), restore_i387_soft() and save_i387_soft() are the only |
24 | entry points for wm-FPU-emu. |
25 +---------------------------------------------------------------------------*/
26
27#include <linux/signal.h>
28#include <linux/ptrace.h>
29
30#include <asm/uaccess.h>
31#include <asm/desc.h>
32
33#include "fpu_system.h"
34#include "fpu_emu.h"
35#include "exception.h"
36#include "control_w.h"
37#include "status_w.h"
38
39#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
40
41#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
42
43/* WARNING: These codes are not documented by Intel in their 80486 manual
44 and may not work on FPU clones or later Intel FPUs. */
45
46/* Changes to support the un-doc codes provided by Linus Torvalds. */
47
48#define _d9_d8_ fstp_i /* unofficial code (19) */
49#define _dc_d0_ fcom_st /* unofficial code (14) */
50#define _dc_d8_ fcompst /* unofficial code (1c) */
51#define _dd_c8_ fxch_i /* unofficial code (0d) */
52#define _de_d0_ fcompst /* unofficial code (16) */
53#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
54#define _df_c8_ fxch_i /* unofficial code (0f) */
55#define _df_d0_ fstp_i /* unofficial code (17) */
56#define _df_d8_ fstp_i /* unofficial code (1f) */
57
58static FUNC const st_instr_table[64] = {
59 fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
60 fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
61 fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
62 fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
63 fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
64 fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
65 fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
66 fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
67};
68
69#else /* Support only documented FPU op-codes */
70
71static FUNC const st_instr_table[64] = {
72 fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
73 fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
74 fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
75 fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
76 fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
77 fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
78 fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
79 fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
80};
81
82#endif /* NO_UNDOC_CODE */
83
84
85#define _NONE_ 0 /* Take no special action */
86#define _REG0_ 1 /* Need to check for not empty st(0) */
87#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
88#define _REGi_ 0 /* Uses st(rm) */
89#define _PUSH_ 3 /* Need to check for space to push onto stack */
90#define _null_ 4 /* Function illegal or not implemented */
91#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */
92#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */
93#define _REGIc 0 /* Compare st(0) and st(rm) */
94#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
95
96#ifndef NO_UNDOC_CODE
97
98/* Un-documented FPU op-codes supported by default. (see above) */
99
100static u_char const type_table[64] = {
101 _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
102 _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
103 _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
104 _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
105 _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
106 _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
107 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
108 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
109};
110
111#else /* Support only documented FPU op-codes */
112
113static u_char const type_table[64] = {
114 _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
115 _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
116 _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
117 _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
118 _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
119 _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
120 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
121 _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
122};
123
124#endif /* NO_UNDOC_CODE */
125
126
127#ifdef RE_ENTRANT_CHECKING
128u_char emulating=0;
129#endif /* RE_ENTRANT_CHECKING */
130
131static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
132 overrides *override);
133
134asmlinkage void math_emulate(long arg)
135{
136 u_char FPU_modrm, byte1;
137 unsigned short code;
138 fpu_addr_modes addr_modes;
139 int unmasked;
140 FPU_REG loaded_data;
141 FPU_REG *st0_ptr;
142 u_char loaded_tag, st0_tag;
143 void __user *data_address;
144 struct address data_sel_off;
145 struct address entry_sel_off;
146 unsigned long code_base = 0;
147 unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
148 struct desc_struct code_descriptor;
149
150#ifdef RE_ENTRANT_CHECKING
151 if ( emulating )
152 {
153 printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
154 }
155 RE_ENTRANT_CHECK_ON;
156#endif /* RE_ENTRANT_CHECKING */
157
158 if (!used_math())
159 {
160 finit();
161 set_used_math();
162 }
163
164 SETUP_DATA_AREA(arg);
165
166 FPU_ORIG_EIP = FPU_EIP;
167
168 if ( (FPU_EFLAGS & 0x00020000) != 0 )
169 {
170 /* Virtual 8086 mode */
171 addr_modes.default_mode = VM86;
172 FPU_EIP += code_base = FPU_CS << 4;
173 code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */
174 }
175 else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
176 {
177 addr_modes.default_mode = 0;
178 }
179 else if ( FPU_CS == __KERNEL_CS )
180 {
181 printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
182 panic("Math emulation needed in kernel");
183 }
184 else
185 {
186
187 if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */
188 {
189 /* Can only handle segmented addressing via the LDT
190 for now, and it must be 16 bit */
191 printk("FPU emulator: Unsupported addressing mode\n");
192 math_abort(FPU_info, SIGILL);
193 }
194
195 code_descriptor = LDT_DESCRIPTOR(FPU_CS);
196 if ( SEG_D_SIZE(code_descriptor) )
197 {
198 /* The above test may be wrong, the book is not clear */
199 /* Segmented 32 bit protected mode */
200 addr_modes.default_mode = SEG32;
201 }
202 else
203 {
204 /* 16 bit protected mode */
205 addr_modes.default_mode = PM16;
206 }
207 FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
208 code_limit = code_base
209 + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
210 - 1;
211 if ( code_limit < code_base ) code_limit = 0xffffffff;
212 }
213
214 FPU_lookahead = 1;
215 if (current->ptrace & PT_PTRACED)
216 FPU_lookahead = 0;
217
218 if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
219 &addr_modes.override) )
220 {
221 RE_ENTRANT_CHECK_OFF;
222 printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
223 "FPU emulator: self-modifying code! (emulation impossible)\n",
224 byte1);
225 RE_ENTRANT_CHECK_ON;
226 EXCEPTION(EX_INTERNAL|0x126);
227 math_abort(FPU_info,SIGILL);
228 }
229
230do_another_FPU_instruction:
231
232 no_ip_update = 0;
233
234 FPU_EIP++; /* We have fetched the prefix and first code bytes. */
235
236 if ( addr_modes.default_mode )
237 {
238 /* This checks for the minimum instruction bytes.
239 We also need to check any extra (address mode) code access. */
240 if ( FPU_EIP > code_limit )
241 math_abort(FPU_info,SIGSEGV);
242 }
243
244 if ( (byte1 & 0xf8) != 0xd8 )
245 {
246 if ( byte1 == FWAIT_OPCODE )
247 {
248 if (partial_status & SW_Summary)
249 goto do_the_FPU_interrupt;
250 else
251 goto FPU_fwait_done;
252 }
253#ifdef PARANOID
254 EXCEPTION(EX_INTERNAL|0x128);
255 math_abort(FPU_info,SIGILL);
256#endif /* PARANOID */
257 }
258
259 RE_ENTRANT_CHECK_OFF;
260 FPU_code_access_ok(1);
261 FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
262 RE_ENTRANT_CHECK_ON;
263 FPU_EIP++;
264
265 if (partial_status & SW_Summary)
266 {
267 /* Ignore the error for now if the current instruction is a no-wait
268 control instruction */
269 /* The 80486 manual contradicts itself on this topic,
270 but a real 80486 uses the following instructions:
271 fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
272 */
273 code = (FPU_modrm << 8) | byte1;
274 if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */
275 (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv,
276 fnstsw */
277 ((code & 0xc000) != 0xc000))) ) )
278 {
279 /*
280 * We need to simulate the action of the kernel to FPU
281 * interrupts here.
282 */
283 do_the_FPU_interrupt:
284
285 FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
286
287 RE_ENTRANT_CHECK_OFF;
288 current->thread.trap_no = 16;
289 current->thread.error_code = 0;
290 send_sig(SIGFPE, current, 1);
291 return;
292 }
293 }
294
295 entry_sel_off.offset = FPU_ORIG_EIP;
296 entry_sel_off.selector = FPU_CS;
297 entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
298
299 FPU_rm = FPU_modrm & 7;
300
301 if ( FPU_modrm < 0300 )
302 {
303 /* All of these instructions use the mod/rm byte to get a data address */
304
305 if ( (addr_modes.default_mode & SIXTEEN)
306 ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
307 data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
308 addr_modes);
309 else
310 data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
311 addr_modes);
312
313 if ( addr_modes.default_mode )
314 {
315 if ( FPU_EIP-1 > code_limit )
316 math_abort(FPU_info,SIGSEGV);
317 }
318
319 if ( !(byte1 & 1) )
320 {
321 unsigned short status1 = partial_status;
322
323 st0_ptr = &st(0);
324 st0_tag = FPU_gettag0();
325
326 /* Stack underflow has priority */
327 if ( NOT_EMPTY_ST0 )
328 {
329 if ( addr_modes.default_mode & PROTECTED )
330 {
331 /* This table works for 16 and 32 bit protected mode */
332 if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
333 math_abort(FPU_info,SIGSEGV);
334 }
335
336 unmasked = 0; /* Do this here to stop compiler warnings. */
337 switch ( (byte1 >> 1) & 3 )
338 {
339 case 0:
340 unmasked = FPU_load_single((float __user *)data_address,
341 &loaded_data);
342 loaded_tag = unmasked & 0xff;
343 unmasked &= ~0xff;
344 break;
345 case 1:
346 loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
347 break;
348 case 2:
349 unmasked = FPU_load_double((double __user *)data_address,
350 &loaded_data);
351 loaded_tag = unmasked & 0xff;
352 unmasked &= ~0xff;
353 break;
354 case 3:
355 default: /* Used here to suppress gcc warnings. */
356 loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
357 break;
358 }
359
360 /* No more access to user memory, it is safe
361 to use static data now */
362
363 /* NaN operands have the next priority. */
364 /* We have to delay looking at st(0) until after
365 loading the data, because that data might contain an SNaN */
366 if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
367 ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
368 {
369 /* Restore the status word; we might have loaded a
370 denormal. */
371 partial_status = status1;
372 if ( (FPU_modrm & 0x30) == 0x10 )
373 {
374 /* fcom or fcomp */
375 EXCEPTION(EX_Invalid);
376 setcc(SW_C3 | SW_C2 | SW_C0);
377 if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
378 FPU_pop(); /* fcomp, masked, so we pop. */
379 }
380 else
381 {
382 if ( loaded_tag == TAG_Special )
383 loaded_tag = FPU_Special(&loaded_data);
384#ifdef PECULIAR_486
385 /* This is not really needed, but gives behaviour
386 identical to an 80486 */
387 if ( (FPU_modrm & 0x28) == 0x20 )
388 /* fdiv or fsub */
389 real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
390 else
391#endif /* PECULIAR_486 */
392 /* fadd, fdivr, fmul, or fsubr */
393 real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
394 }
395 goto reg_mem_instr_done;
396 }
397
398 if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
399 {
400 /* Is not a comparison instruction. */
401 if ( (FPU_modrm & 0x38) == 0x38 )
402 {
403 /* fdivr */
404 if ( (st0_tag == TAG_Zero) &&
405 ((loaded_tag == TAG_Valid)
406 || (loaded_tag == TAG_Special
407 && isdenormal(&loaded_data))) )
408 {
409 if ( FPU_divide_by_zero(0, getsign(&loaded_data))
410 < 0 )
411 {
412 /* We use the fact here that the unmasked
413 exception in the loaded data was for a
414 denormal operand */
415 /* Restore the state of the denormal op bit */
416 partial_status &= ~SW_Denorm_Op;
417 partial_status |= status1 & SW_Denorm_Op;
418 }
419 else
420 setsign(st0_ptr, getsign(&loaded_data));
421 }
422 }
423 goto reg_mem_instr_done;
424 }
425
426 switch ( (FPU_modrm >> 3) & 7 )
427 {
428 case 0: /* fadd */
429 clear_C1();
430 FPU_add(&loaded_data, loaded_tag, 0, control_word);
431 break;
432 case 1: /* fmul */
433 clear_C1();
434 FPU_mul(&loaded_data, loaded_tag, 0, control_word);
435 break;
436 case 2: /* fcom */
437 FPU_compare_st_data(&loaded_data, loaded_tag);
438 break;
439 case 3: /* fcomp */
440 if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
441 && !unmasked )
442 FPU_pop();
443 break;
444 case 4: /* fsub */
445 clear_C1();
446 FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
447 break;
448 case 5: /* fsubr */
449 clear_C1();
450 FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
451 break;
452 case 6: /* fdiv */
453 clear_C1();
454 FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
455 break;
456 case 7: /* fdivr */
457 clear_C1();
458 if ( st0_tag == TAG_Zero )
459 partial_status = status1; /* Undo any denorm tag,
460 zero-divide has priority. */
461 FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
462 break;
463 }
464 }
465 else
466 {
467 if ( (FPU_modrm & 0x30) == 0x10 )
468 {
469 /* The instruction is fcom or fcomp */
470 EXCEPTION(EX_StackUnder);
471 setcc(SW_C3 | SW_C2 | SW_C0);
472 if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
473 FPU_pop(); /* fcomp */
474 }
475 else
476 FPU_stack_underflow();
477 }
478 reg_mem_instr_done:
479 operand_address = data_sel_off;
480 }
481 else
482 {
483 if ( !(no_ip_update =
484 FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
485 addr_modes, data_address)) )
486 {
487 operand_address = data_sel_off;
488 }
489 }
490
491 }
492 else
493 {
494 /* None of these instructions access user memory */
495 u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
496
497#ifdef PECULIAR_486
498 /* This is supposed to be undefined, but a real 80486 seems
499 to do this: */
500 operand_address.offset = 0;
501 operand_address.selector = FPU_DS;
502#endif /* PECULIAR_486 */
503
504 st0_ptr = &st(0);
505 st0_tag = FPU_gettag0();
506 switch ( type_table[(int) instr_index] )
507 {
508 case _NONE_: /* also _REGIc: _REGIn */
509 break;
510 case _REG0_:
511 if ( !NOT_EMPTY_ST0 )
512 {
513 FPU_stack_underflow();
514 goto FPU_instruction_done;
515 }
516 break;
517 case _REGIi:
518 if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
519 {
520 FPU_stack_underflow_i(FPU_rm);
521 goto FPU_instruction_done;
522 }
523 break;
524 case _REGIp:
525 if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
526 {
527 FPU_stack_underflow_pop(FPU_rm);
528 goto FPU_instruction_done;
529 }
530 break;
531 case _REGI_:
532 if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
533 {
534 FPU_stack_underflow();
535 goto FPU_instruction_done;
536 }
537 break;
538 case _PUSH_: /* Only used by the fld st(i) instruction */
539 break;
540 case _null_:
541 FPU_illegal();
542 goto FPU_instruction_done;
543 default:
544 EXCEPTION(EX_INTERNAL|0x111);
545 goto FPU_instruction_done;
546 }
547 (*st_instr_table[(int) instr_index])();
548
549FPU_instruction_done:
550 ;
551 }
552
553 if ( ! no_ip_update )
554 instruction_address = entry_sel_off;
555
556FPU_fwait_done:
557
558#ifdef DEBUG
559 RE_ENTRANT_CHECK_OFF;
560 FPU_printall();
561 RE_ENTRANT_CHECK_ON;
562#endif /* DEBUG */
563
564 if (FPU_lookahead && !need_resched())
565 {
566 FPU_ORIG_EIP = FPU_EIP - code_base;
567 if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
568 &addr_modes.override) )
569 goto do_another_FPU_instruction;
570 }
571
572 if ( addr_modes.default_mode )
573 FPU_EIP -= code_base;
574
575 RE_ENTRANT_CHECK_OFF;
576}
577
578
579/* Support for prefix bytes is not yet complete. To properly handle
580 all prefix bytes, further changes are needed in the emulator code
581 which accesses user address space. Access to separate segments is
582 important for msdos emulation. */
583static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
584 overrides *override)
585{
586 u_char byte;
587 u_char __user *ip = *fpu_eip;
588
589 *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */
590
591 RE_ENTRANT_CHECK_OFF;
592 FPU_code_access_ok(1);
593 FPU_get_user(byte, ip);
594 RE_ENTRANT_CHECK_ON;
595
596 while ( 1 )
597 {
598 switch ( byte )
599 {
600 case ADDR_SIZE_PREFIX:
601 override->address_size = ADDR_SIZE_PREFIX;
602 goto do_next_byte;
603
604 case OP_SIZE_PREFIX:
605 override->operand_size = OP_SIZE_PREFIX;
606 goto do_next_byte;
607
608 case PREFIX_CS:
609 override->segment = PREFIX_CS_;
610 goto do_next_byte;
611 case PREFIX_ES:
612 override->segment = PREFIX_ES_;
613 goto do_next_byte;
614 case PREFIX_SS:
615 override->segment = PREFIX_SS_;
616 goto do_next_byte;
617 case PREFIX_FS:
618 override->segment = PREFIX_FS_;
619 goto do_next_byte;
620 case PREFIX_GS:
621 override->segment = PREFIX_GS_;
622 goto do_next_byte;
623 case PREFIX_DS:
624 override->segment = PREFIX_DS_;
625 goto do_next_byte;
626
627/* lock is not a valid prefix for FPU instructions,
628 let the cpu handle it to generate a SIGILL. */
629/* case PREFIX_LOCK: */
630
631 /* rep.. prefixes have no meaning for FPU instructions */
632 case PREFIX_REPE:
633 case PREFIX_REPNE:
634
635 do_next_byte:
636 ip++;
637 RE_ENTRANT_CHECK_OFF;
638 FPU_code_access_ok(1);
639 FPU_get_user(byte, ip);
640 RE_ENTRANT_CHECK_ON;
641 break;
642 case FWAIT_OPCODE:
643 *Byte = byte;
644 return 1;
645 default:
646 if ( (byte & 0xf8) == 0xd8 )
647 {
648 *Byte = byte;
649 *fpu_eip = ip;
650 return 1;
651 }
652 else
653 {
654 /* Not a valid sequence of prefix bytes followed by
655 an FPU instruction. */
656 *Byte = byte; /* Needed for error message. */
657 return 0;
658 }
659 }
660 }
661}
662
663
664void math_abort(struct info * info, unsigned int signal)
665{
666 FPU_EIP = FPU_ORIG_EIP;
667 current->thread.trap_no = 16;
668 current->thread.error_code = 0;
669 send_sig(signal,current,1);
670 RE_ENTRANT_CHECK_OFF;
671 __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
672#ifdef PARANOID
673 printk("ERROR: wm-FPU-emu math_abort failed!\n");
674#endif /* PARANOID */
675}
676
677
678
679#define S387 ((struct i387_soft_struct *)s387)
680#define sstatus_word() \
681 ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
682
683int restore_i387_soft(void *s387, struct _fpstate __user *buf)
684{
685 u_char __user *d = (u_char __user *)buf;
686 int offset, other, i, tags, regnr, tag, newtop;
687
688 RE_ENTRANT_CHECK_OFF;
689 FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
690 if (__copy_from_user(&S387->cwd, d, 7*4))
691 return -1;
692 RE_ENTRANT_CHECK_ON;
693
694 d += 7*4;
695
696 S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
697 offset = (S387->ftop & 7) * 10;
698 other = 80 - offset;
699
700 RE_ENTRANT_CHECK_OFF;
701 /* Copy all registers in stack order. */
702 if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
703 return -1;
704 if ( offset )
705 if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
706 return -1;
707 RE_ENTRANT_CHECK_ON;
708
709 /* The tags may need to be corrected now. */
710 tags = S387->twd;
711 newtop = S387->ftop;
712 for ( i = 0; i < 8; i++ )
713 {
714 regnr = (i+newtop) & 7;
715 if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
716 {
717 /* The loaded data over-rides all other cases. */
718 tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
719 tags &= ~(3 << (regnr*2));
720 tags |= (tag & 3) << (regnr*2);
721 }
722 }
723 S387->twd = tags;
724
725 return 0;
726}
727
728
729int save_i387_soft(void *s387, struct _fpstate __user * buf)
730{
731 u_char __user *d = (u_char __user *)buf;
732 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
733
734 RE_ENTRANT_CHECK_OFF;
735 FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
736#ifdef PECULIAR_486
737 S387->cwd &= ~0xe080;
738 /* An 80486 sets nearly all of the reserved bits to 1. */
739 S387->cwd |= 0xffff0040;
740 S387->swd = sstatus_word() | 0xffff0000;
741 S387->twd |= 0xffff0000;
742 S387->fcs &= ~0xf8000000;
743 S387->fos |= 0xffff0000;
744#endif /* PECULIAR_486 */
745 __copy_to_user(d, &S387->cwd, 7*4);
746 RE_ENTRANT_CHECK_ON;
747
748 d += 7*4;
749
750 RE_ENTRANT_CHECK_OFF;
751 /* Copy all registers in stack order. */
752 if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
753 return -1;
754 if ( offset )
755 if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
756 return -1
757 RE_ENTRANT_CHECK_ON;
758
759 return 1;
760}
diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c
new file mode 100644
index 000000000000..e3b5d465587f
--- /dev/null
+++ b/arch/i386/math-emu/fpu_etc.c
@@ -0,0 +1,143 @@
1/*---------------------------------------------------------------------------+
2 | fpu_etc.c |
3 | |
4 | Implement a few FPU instructions. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "exception.h"
15#include "fpu_emu.h"
16#include "status_w.h"
17#include "reg_constant.h"
18
19
20static void fchs(FPU_REG *st0_ptr, u_char st0tag)
21{
22 if ( st0tag ^ TAG_Empty )
23 {
24 signbyte(st0_ptr) ^= SIGN_NEG;
25 clear_C1();
26 }
27 else
28 FPU_stack_underflow();
29}
30
31
32static void fabs(FPU_REG *st0_ptr, u_char st0tag)
33{
34 if ( st0tag ^ TAG_Empty )
35 {
36 setpositive(st0_ptr);
37 clear_C1();
38 }
39 else
40 FPU_stack_underflow();
41}
42
43
44static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
45{
46 switch (st0tag)
47 {
48 case TAG_Zero:
49 setcc(SW_C3);
50 break;
51 case TAG_Valid:
52 if (getsign(st0_ptr) == SIGN_POS)
53 setcc(0);
54 else
55 setcc(SW_C0);
56 break;
57 case TAG_Special:
58 switch ( FPU_Special(st0_ptr) )
59 {
60 case TW_Denormal:
61 if (getsign(st0_ptr) == SIGN_POS)
62 setcc(0);
63 else
64 setcc(SW_C0);
65 if ( denormal_operand() < 0 )
66 {
67#ifdef PECULIAR_486
68 /* This is weird! */
69 if (getsign(st0_ptr) == SIGN_POS)
70 setcc(SW_C3);
71#endif /* PECULIAR_486 */
72 return;
73 }
74 break;
75 case TW_NaN:
76 setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
77 EXCEPTION(EX_Invalid);
78 break;
79 case TW_Infinity:
80 if (getsign(st0_ptr) == SIGN_POS)
81 setcc(0);
82 else
83 setcc(SW_C0);
84 break;
85 default:
86 setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
87 EXCEPTION(EX_INTERNAL|0x14);
88 break;
89 }
90 break;
91 case TAG_Empty:
92 setcc(SW_C0|SW_C2|SW_C3);
93 EXCEPTION(EX_StackUnder);
94 break;
95 }
96}
97
98
99static void fxam(FPU_REG *st0_ptr, u_char st0tag)
100{
101 int c = 0;
102 switch (st0tag)
103 {
104 case TAG_Empty:
105 c = SW_C3|SW_C0;
106 break;
107 case TAG_Zero:
108 c = SW_C3;
109 break;
110 case TAG_Valid:
111 c = SW_C2;
112 break;
113 case TAG_Special:
114 switch ( FPU_Special(st0_ptr) )
115 {
116 case TW_Denormal:
117 c = SW_C2|SW_C3; /* Denormal */
118 break;
119 case TW_NaN:
120 /* We also use NaN for unsupported types. */
121 if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
122 c = SW_C0;
123 break;
124 case TW_Infinity:
125 c = SW_C2|SW_C0;
126 break;
127 }
128 }
129 if ( getsign(st0_ptr) == SIGN_NEG )
130 c |= SW_C1;
131 setcc(c);
132}
133
134
135static FUNC_ST0 const fp_etc_table[] = {
136 fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
137 ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
138};
139
140void FPU_etc(void)
141{
142 (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
143}
diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h
new file mode 100644
index 000000000000..37a8a7fe7e2b
--- /dev/null
+++ b/arch/i386/math-emu/fpu_proto.h
@@ -0,0 +1,140 @@
1#ifndef _FPU_PROTO_H
2#define _FPU_PROTO_H
3
4/* errors.c */
5extern void FPU_illegal(void);
6extern void FPU_printall(void);
7asmlinkage void FPU_exception(int n);
8extern int real_1op_NaN(FPU_REG *a);
9extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
10 FPU_REG const *defaultNaN);
11asmlinkage int arith_invalid(int deststnr);
12asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign);
13extern int set_precision_flag(int flags);
14asmlinkage void set_precision_flag_up(void);
15asmlinkage void set_precision_flag_down(void);
16asmlinkage int denormal_operand(void);
17asmlinkage int arith_overflow(FPU_REG *dest);
18asmlinkage int arith_underflow(FPU_REG *dest);
19extern void FPU_stack_overflow(void);
20extern void FPU_stack_underflow(void);
21extern void FPU_stack_underflow_i(int i);
22extern void FPU_stack_underflow_pop(int i);
23/* fpu_arith.c */
24extern void fadd__(void);
25extern void fmul__(void);
26extern void fsub__(void);
27extern void fsubr_(void);
28extern void fdiv__(void);
29extern void fdivr_(void);
30extern void fadd_i(void);
31extern void fmul_i(void);
32extern void fsubri(void);
33extern void fsub_i(void);
34extern void fdivri(void);
35extern void fdiv_i(void);
36extern void faddp_(void);
37extern void fmulp_(void);
38extern void fsubrp(void);
39extern void fsubp_(void);
40extern void fdivrp(void);
41extern void fdivp_(void);
42/* fpu_aux.c */
43extern void finit(void);
44extern void finit_(void);
45extern void fstsw_(void);
46extern void fp_nop(void);
47extern void fld_i_(void);
48extern void fxch_i(void);
49extern void ffree_(void);
50extern void ffreep(void);
51extern void fst_i_(void);
52extern void fstp_i(void);
53/* fpu_entry.c */
54asmlinkage extern void math_emulate(long arg);
55extern void math_abort(struct info *info, unsigned int signal);
56/* fpu_etc.c */
57extern void FPU_etc(void);
58/* fpu_tags.c */
59extern int FPU_gettag0(void);
60extern int FPU_gettagi(int stnr);
61extern int FPU_gettag(int regnr);
62extern void FPU_settag0(int tag);
63extern void FPU_settagi(int stnr, int tag);
64extern void FPU_settag(int regnr, int tag);
65extern int FPU_Special(FPU_REG const *ptr);
66extern int isNaN(FPU_REG const *ptr);
67extern void FPU_pop(void);
68extern int FPU_empty_i(int stnr);
69extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
70extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
71extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
72extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
73/* fpu_trig.c */
74extern void FPU_triga(void);
75extern void FPU_trigb(void);
76/* get_address.c */
77extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
78 struct address *addr, fpu_addr_modes addr_modes);
79extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
80 struct address *addr, fpu_addr_modes addr_modes);
81/* load_store.c */
82extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
83 void __user *data_address);
84/* poly_2xm1.c */
85extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
86/* poly_atan.c */
87extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
88 u_char st1_tag);
89/* poly_l2.c */
90extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
91extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
92 FPU_REG *d);
93/* poly_sin.c */
94extern void poly_sine(FPU_REG *st0_ptr);
95extern void poly_cos(FPU_REG *st0_ptr);
96/* poly_tan.c */
97extern void poly_tan(FPU_REG *st0_ptr);
98/* reg_add_sub.c */
99extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w);
100extern int FPU_sub(int flags, int rm, int control_w);
101/* reg_compare.c */
102extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag);
103extern void fcom_st(void);
104extern void fcompst(void);
105extern void fcompp(void);
106extern void fucom_(void);
107extern void fucomp(void);
108extern void fucompp(void);
109/* reg_constant.c */
110extern void fconst(void);
111/* reg_ld_str.c */
112extern int FPU_load_extended(long double __user *s, int stnr);
113extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data);
114extern int FPU_load_single(float __user *single, FPU_REG *loaded_data);
115extern int FPU_load_int64(long long __user *_s);
116extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
117extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
118extern int FPU_load_bcd(u_char __user *s);
119extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
120 long double __user *d);
121extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
122extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
123extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
124extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
125extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
126extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
127extern int FPU_round_to_int(FPU_REG *r, u_char tag);
128extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
129extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
130extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
131extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
132extern int FPU_tagof(FPU_REG *ptr);
133/* reg_mul.c */
134extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
135
136extern int FPU_div(int flags, int regrm, int control_w);
137/* reg_convert.c */
138extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
139#endif /* _FPU_PROTO_H */
140
diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h
new file mode 100644
index 000000000000..bf26341c8bde
--- /dev/null
+++ b/arch/i386/math-emu/fpu_system.h
@@ -0,0 +1,89 @@
1/*---------------------------------------------------------------------------+
2 | fpu_system.h |
3 | |
4 | Copyright (C) 1992,1994,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _FPU_SYSTEM_H
11#define _FPU_SYSTEM_H
12
13/* system dependent definitions */
14
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/mm.h>
18
19/* This sets the pointer FPU_info to point to the argument part
20 of the stack frame of math_emulate() */
21#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg
22
23/* s is always from a cpu register, and the cpu does bounds checking
24 * during register load --> no further bounds checks needed */
25#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
26#define SEG_D_SIZE(x) ((x).b & (3 << 21))
27#define SEG_G_BIT(x) ((x).b & (1 << 23))
28#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
29#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
30#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \
31 | (((s).b & 0xff) << 16) | ((s).a >> 16))
32#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff))
33#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
34#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
36 == (1 << 10))
37
38#define I387 (current->thread.i387)
39#define FPU_info (I387.soft.info)
40
41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
43#define FPU_DS (*(unsigned short *) &(FPU_info->___ds))
44#define FPU_EAX (FPU_info->___eax)
45#define FPU_EFLAGS (FPU_info->___eflags)
46#define FPU_EIP (FPU_info->___eip)
47#define FPU_ORIG_EIP (FPU_info->___orig_eip)
48
49#define FPU_lookahead (I387.soft.lookahead)
50
51/* nz if ip_offset and cs_selector are not to be set for the current
52 instruction. */
53#define no_ip_update (*(u_char *)&(I387.soft.no_update))
54#define FPU_rm (*(u_char *)&(I387.soft.rm))
55
56/* Number of bytes of data which can be legally accessed by the current
57 instruction. This only needs to hold a number <= 108, so a byte will do. */
58#define access_limit (*(u_char *)&(I387.soft.alimit))
59
60#define partial_status (I387.soft.swd)
61#define control_word (I387.soft.cwd)
62#define fpu_tag_word (I387.soft.twd)
63#define registers (I387.soft.st_space)
64#define top (I387.soft.ftop)
65
66#define instruction_address (*(struct address *)&I387.soft.fip)
67#define operand_address (*(struct address *)&I387.soft.foo)
68
69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
70 math_abort(FPU_info,SIGSEGV)
71
72#undef FPU_IGNORE_CODE_SEGV
73#ifdef FPU_IGNORE_CODE_SEGV
74/* access_ok() is very expensive, and causes the emulator to run
75 about 20% slower if applied to the code. Anyway, errors due to bad
76 code addresses should be much rarer than errors due to bad data
77 addresses. */
78#define FPU_code_access_ok(z)
79#else
80/* A simpler test than access_ok() can probably be done for
81 FPU_code_access_ok() because the only possible error is to step
82 past the upper boundary of a legal code area. */
83#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z)
84#endif
85
86#define FPU_get_user(x,y) get_user((x),(y))
87#define FPU_put_user(x,y) put_user((x),(y))
88
89#endif
diff --git a/arch/i386/math-emu/fpu_tags.c b/arch/i386/math-emu/fpu_tags.c
new file mode 100644
index 000000000000..cb436fe20e4c
--- /dev/null
+++ b/arch/i386/math-emu/fpu_tags.c
@@ -0,0 +1,127 @@
1/*---------------------------------------------------------------------------+
2 | fpu_tags.c |
3 | |
4 | Set FPU register tags. |
5 | |
6 | Copyright (C) 1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_emu.h"
14#include "fpu_system.h"
15#include "exception.h"
16
17
18void FPU_pop(void)
19{
20 fpu_tag_word |= 3 << ((top & 7)*2);
21 top++;
22}
23
24
25int FPU_gettag0(void)
26{
27 return (fpu_tag_word >> ((top & 7)*2)) & 3;
28}
29
30
31int FPU_gettagi(int stnr)
32{
33 return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
34}
35
36
37int FPU_gettag(int regnr)
38{
39 return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
40}
41
42
43void FPU_settag0(int tag)
44{
45 int regnr = top;
46 regnr &= 7;
47 fpu_tag_word &= ~(3 << (regnr*2));
48 fpu_tag_word |= (tag & 3) << (regnr*2);
49}
50
51
52void FPU_settagi(int stnr, int tag)
53{
54 int regnr = stnr+top;
55 regnr &= 7;
56 fpu_tag_word &= ~(3 << (regnr*2));
57 fpu_tag_word |= (tag & 3) << (regnr*2);
58}
59
60
61void FPU_settag(int regnr, int tag)
62{
63 regnr &= 7;
64 fpu_tag_word &= ~(3 << (regnr*2));
65 fpu_tag_word |= (tag & 3) << (regnr*2);
66}
67
68
69int FPU_Special(FPU_REG const *ptr)
70{
71 int exp = exponent(ptr);
72
73 if ( exp == EXP_BIAS+EXP_UNDER )
74 return TW_Denormal;
75 else if ( exp != EXP_BIAS+EXP_OVER )
76 return TW_NaN;
77 else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
78 return TW_Infinity;
79 return TW_NaN;
80}
81
82
83int isNaN(FPU_REG const *ptr)
84{
85 return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
86 && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
87}
88
89
90int FPU_empty_i(int stnr)
91{
92 int regnr = (top+stnr) & 7;
93
94 return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
95}
96
97
98int FPU_stackoverflow(FPU_REG **st_new_ptr)
99{
100 *st_new_ptr = &st(-1);
101
102 return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
103}
104
105
106void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
107{
108 reg_copy(r, &st(stnr));
109 FPU_settagi(stnr, tag);
110}
111
112void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
113{
114 reg_copy(r, &st(1));
115 FPU_settagi(1, tag);
116}
117
118void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
119{
120 int regnr = top;
121 regnr &= 7;
122
123 reg_copy(r, &st(0));
124
125 fpu_tag_word &= ~(3 << (regnr*2));
126 fpu_tag_word |= (tag & 3) << (regnr*2);
127}
diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c
new file mode 100644
index 000000000000..403cbde1d425
--- /dev/null
+++ b/arch/i386/math-emu/fpu_trig.c
@@ -0,0 +1,1845 @@
1/*---------------------------------------------------------------------------+
2 | fpu_trig.c |
3 | |
4 | Implementation of the FPU "transcendental" functions. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997,1999 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@melbpc.org.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "exception.h"
15#include "fpu_emu.h"
16#include "status_w.h"
17#include "control_w.h"
18#include "reg_constant.h"
19
20static void rem_kernel(unsigned long long st0, unsigned long long *y,
21 unsigned long long st1,
22 unsigned long long q, int n);
23
24#define BETTER_THAN_486
25
26#define FCOS 4
27
28/* Used only by fptan, fsin, fcos, and fsincos. */
29/* This routine produces very accurate results, similar to
30 using a value of pi with more than 128 bits precision. */
31/* Limited measurements show no results worse than 64 bit precision
32 except for the results for arguments close to 2^63, where the
33 precision of the result sometimes degrades to about 63.9 bits */
34static int trig_arg(FPU_REG *st0_ptr, int even)
35{
36 FPU_REG tmp;
37 u_char tmptag;
38 unsigned long long q;
39 int old_cw = control_word, saved_status = partial_status;
40 int tag, st0_tag = TAG_Valid;
41
42 if ( exponent(st0_ptr) >= 63 )
43 {
44 partial_status |= SW_C2; /* Reduction incomplete. */
45 return -1;
46 }
47
48 control_word &= ~CW_RC;
49 control_word |= RC_CHOP;
50
51 setpositive(st0_ptr);
52 tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
53 SIGN_POS);
54
55 FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow
56 to 2^64 */
57 q = significand(&tmp);
58 if ( q )
59 {
60 rem_kernel(significand(st0_ptr),
61 &significand(&tmp),
62 significand(&CONST_PI2),
63 q, exponent(st0_ptr) - exponent(&CONST_PI2));
64 setexponent16(&tmp, exponent(&CONST_PI2));
65 st0_tag = FPU_normalize(&tmp);
66 FPU_copy_to_reg0(&tmp, st0_tag);
67 }
68
69 if ( (even && !(q & 1)) || (!even && (q & 1)) )
70 {
71 st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION);
72
73#ifdef BETTER_THAN_486
74 /* So far, the results are exact but based upon a 64 bit
75 precision approximation to pi/2. The technique used
76 now is equivalent to using an approximation to pi/2 which
77 is accurate to about 128 bits. */
78 if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) )
79 {
80 /* This code gives the effect of having pi/2 to better than
81 128 bits precision. */
82
83 significand(&tmp) = q + 1;
84 setexponent16(&tmp, 63);
85 FPU_normalize(&tmp);
86 tmptag =
87 FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS,
88 exponent(&CONST_PI2extra) + exponent(&tmp));
89 setsign(&tmp, getsign(&CONST_PI2extra));
90 st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
91 if ( signnegative(st0_ptr) )
92 {
93 /* CONST_PI2extra is negative, so the result of the addition
94 can be negative. This means that the argument is actually
95 in a different quadrant. The correction is always < pi/2,
96 so it can't overflow into yet another quadrant. */
97 setpositive(st0_ptr);
98 q++;
99 }
100 }
101#endif /* BETTER_THAN_486 */
102 }
103#ifdef BETTER_THAN_486
104 else
105 {
106 /* So far, the results are exact but based upon a 64 bit
107 precision approximation to pi/2. The technique used
108 now is equivalent to using an approximation to pi/2 which
109 is accurate to about 128 bits. */
110 if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
111 || (q > 1) )
112 {
113 /* This code gives the effect of having p/2 to better than
114 128 bits precision. */
115
116 significand(&tmp) = q;
117 setexponent16(&tmp, 63);
118 FPU_normalize(&tmp); /* This must return TAG_Valid */
119 tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION,
120 SIGN_POS,
121 exponent(&CONST_PI2extra) + exponent(&tmp));
122 setsign(&tmp, getsign(&CONST_PI2extra));
123 st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp,
124 FULL_PRECISION);
125 if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) &&
126 ((st0_ptr->sigh > CONST_PI2.sigh)
127 || ((st0_ptr->sigh == CONST_PI2.sigh)
128 && (st0_ptr->sigl > CONST_PI2.sigl))) )
129 {
130 /* CONST_PI2extra is negative, so the result of the
131 subtraction can be larger than pi/2. This means
132 that the argument is actually in a different quadrant.
133 The correction is always < pi/2, so it can't overflow
134 into yet another quadrant. */
135 st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2,
136 FULL_PRECISION);
137 q++;
138 }
139 }
140 }
141#endif /* BETTER_THAN_486 */
142
143 FPU_settag0(st0_tag);
144 control_word = old_cw;
145 partial_status = saved_status & ~SW_C2; /* Reduction complete. */
146
147 return (q & 3) | even;
148}
149
150
151/* Convert a long to register */
152static void convert_l2reg(long const *arg, int deststnr)
153{
154 int tag;
155 long num = *arg;
156 u_char sign;
157 FPU_REG *dest = &st(deststnr);
158
159 if (num == 0)
160 {
161 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
162 return;
163 }
164
165 if (num > 0)
166 { sign = SIGN_POS; }
167 else
168 { num = -num; sign = SIGN_NEG; }
169
170 dest->sigh = num;
171 dest->sigl = 0;
172 setexponent16(dest, 31);
173 tag = FPU_normalize(dest);
174 FPU_settagi(deststnr, tag);
175 setsign(dest, sign);
176 return;
177}
178
179
180static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
181{
182 if ( st0_tag == TAG_Empty )
183 FPU_stack_underflow(); /* Puts a QNaN in st(0) */
184 else if ( st0_tag == TW_NaN )
185 real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */
186#ifdef PARANOID
187 else
188 EXCEPTION(EX_INTERNAL|0x0112);
189#endif /* PARANOID */
190}
191
192
193static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
194{
195 int isNaN;
196
197 switch ( st0_tag )
198 {
199 case TW_NaN:
200 isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000);
201 if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */
202 {
203 EXCEPTION(EX_Invalid);
204 if ( control_word & CW_Invalid )
205 {
206 /* The masked response */
207 /* Convert to a QNaN */
208 st0_ptr->sigh |= 0x40000000;
209 push();
210 FPU_copy_to_reg0(st0_ptr, TAG_Special);
211 }
212 }
213 else if ( isNaN )
214 {
215 /* A QNaN */
216 push();
217 FPU_copy_to_reg0(st0_ptr, TAG_Special);
218 }
219 else
220 {
221 /* pseudoNaN or other unsupported */
222 EXCEPTION(EX_Invalid);
223 if ( control_word & CW_Invalid )
224 {
225 /* The masked response */
226 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
227 push();
228 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
229 }
230 }
231 break; /* return with a NaN in st(0) */
232#ifdef PARANOID
233 default:
234 EXCEPTION(EX_INTERNAL|0x0112);
235#endif /* PARANOID */
236 }
237}
238
239
240/*---------------------------------------------------------------------------*/
241
242static void f2xm1(FPU_REG *st0_ptr, u_char tag)
243{
244 FPU_REG a;
245
246 clear_C1();
247
248 if ( tag == TAG_Valid )
249 {
250 /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
251 if ( exponent(st0_ptr) < 0 )
252 {
253 denormal_arg:
254
255 FPU_to_exp16(st0_ptr, &a);
256
257 /* poly_2xm1(x) requires 0 < st(0) < 1. */
258 poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
259 }
260 set_precision_flag_up(); /* 80486 appears to always do this */
261 return;
262 }
263
264 if ( tag == TAG_Zero )
265 return;
266
267 if ( tag == TAG_Special )
268 tag = FPU_Special(st0_ptr);
269
270 switch ( tag )
271 {
272 case TW_Denormal:
273 if ( denormal_operand() < 0 )
274 return;
275 goto denormal_arg;
276 case TW_Infinity:
277 if ( signnegative(st0_ptr) )
278 {
279 /* -infinity gives -1 (p16-10) */
280 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
281 setnegative(st0_ptr);
282 }
283 return;
284 default:
285 single_arg_error(st0_ptr, tag);
286 }
287}
288
289
290static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
291{
292 FPU_REG *st_new_ptr;
293 int q;
294 u_char arg_sign = getsign(st0_ptr);
295
296 /* Stack underflow has higher priority */
297 if ( st0_tag == TAG_Empty )
298 {
299 FPU_stack_underflow(); /* Puts a QNaN in st(0) */
300 if ( control_word & CW_Invalid )
301 {
302 st_new_ptr = &st(-1);
303 push();
304 FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
305 }
306 return;
307 }
308
309 if ( STACK_OVERFLOW )
310 { FPU_stack_overflow(); return; }
311
312 if ( st0_tag == TAG_Valid )
313 {
314 if ( exponent(st0_ptr) > -40 )
315 {
316 if ( (q = trig_arg(st0_ptr, 0)) == -1 )
317 {
318 /* Operand is out of range */
319 return;
320 }
321
322 poly_tan(st0_ptr);
323 setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
324 set_precision_flag_up(); /* We do not really know if up or down */
325 }
326 else
327 {
328 /* For a small arg, the result == the argument */
329 /* Underflow may happen */
330
331 denormal_arg:
332
333 FPU_to_exp16(st0_ptr, st0_ptr);
334
335 st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
336 FPU_settag0(st0_tag);
337 }
338 push();
339 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
340 return;
341 }
342
343 if ( st0_tag == TAG_Zero )
344 {
345 push();
346 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
347 setcc(0);
348 return;
349 }
350
351 if ( st0_tag == TAG_Special )
352 st0_tag = FPU_Special(st0_ptr);
353
354 if ( st0_tag == TW_Denormal )
355 {
356 if ( denormal_operand() < 0 )
357 return;
358
359 goto denormal_arg;
360 }
361
362 if ( st0_tag == TW_Infinity )
363 {
364 /* The 80486 treats infinity as an invalid operand */
365 if ( arith_invalid(0) >= 0 )
366 {
367 st_new_ptr = &st(-1);
368 push();
369 arith_invalid(0);
370 }
371 return;
372 }
373
374 single_arg_2_error(st0_ptr, st0_tag);
375}
376
377
378static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
379{
380 FPU_REG *st_new_ptr;
381 u_char sign;
382 register FPU_REG *st1_ptr = st0_ptr; /* anticipate */
383
384 if ( STACK_OVERFLOW )
385 { FPU_stack_overflow(); return; }
386
387 clear_C1();
388
389 if ( st0_tag == TAG_Valid )
390 {
391 long e;
392
393 push();
394 sign = getsign(st1_ptr);
395 reg_copy(st1_ptr, st_new_ptr);
396 setexponent16(st_new_ptr, exponent(st_new_ptr));
397
398 denormal_arg:
399
400 e = exponent16(st_new_ptr);
401 convert_l2reg(&e, 1);
402 setexponentpos(st_new_ptr, 0);
403 setsign(st_new_ptr, sign);
404 FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */
405 return;
406 }
407 else if ( st0_tag == TAG_Zero )
408 {
409 sign = getsign(st0_ptr);
410
411 if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 )
412 return;
413
414 push();
415 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
416 setsign(st_new_ptr, sign);
417 return;
418 }
419
420 if ( st0_tag == TAG_Special )
421 st0_tag = FPU_Special(st0_ptr);
422
423 if ( st0_tag == TW_Denormal )
424 {
425 if (denormal_operand() < 0 )
426 return;
427
428 push();
429 sign = getsign(st1_ptr);
430 FPU_to_exp16(st1_ptr, st_new_ptr);
431 goto denormal_arg;
432 }
433 else if ( st0_tag == TW_Infinity )
434 {
435 sign = getsign(st0_ptr);
436 setpositive(st0_ptr);
437 push();
438 FPU_copy_to_reg0(&CONST_INF, TAG_Special);
439 setsign(st_new_ptr, sign);
440 return;
441 }
442 else if ( st0_tag == TW_NaN )
443 {
444 if ( real_1op_NaN(st0_ptr) < 0 )
445 return;
446
447 push();
448 FPU_copy_to_reg0(st0_ptr, TAG_Special);
449 return;
450 }
451 else if ( st0_tag == TAG_Empty )
452 {
453 /* Is this the correct behaviour? */
454 if ( control_word & EX_Invalid )
455 {
456 FPU_stack_underflow();
457 push();
458 FPU_stack_underflow();
459 }
460 else
461 EXCEPTION(EX_StackUnder);
462 }
463#ifdef PARANOID
464 else
465 EXCEPTION(EX_INTERNAL | 0x119);
466#endif /* PARANOID */
467}
468
469
470static void fdecstp(void)
471{
472 clear_C1();
473 top--;
474}
475
476static void fincstp(void)
477{
478 clear_C1();
479 top++;
480}
481
482
483static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
484{
485 int expon;
486
487 clear_C1();
488
489 if ( st0_tag == TAG_Valid )
490 {
491 u_char tag;
492
493 if (signnegative(st0_ptr))
494 {
495 arith_invalid(0); /* sqrt(negative) is invalid */
496 return;
497 }
498
499 /* make st(0) in [1.0 .. 4.0) */
500 expon = exponent(st0_ptr);
501
502 denormal_arg:
503
504 setexponent16(st0_ptr, (expon & 1));
505
506 /* Do the computation, the sign of the result will be positive. */
507 tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
508 addexponent(st0_ptr, expon >> 1);
509 FPU_settag0(tag);
510 return;
511 }
512
513 if ( st0_tag == TAG_Zero )
514 return;
515
516 if ( st0_tag == TAG_Special )
517 st0_tag = FPU_Special(st0_ptr);
518
519 if ( st0_tag == TW_Infinity )
520 {
521 if ( signnegative(st0_ptr) )
522 arith_invalid(0); /* sqrt(-Infinity) is invalid */
523 return;
524 }
525 else if ( st0_tag == TW_Denormal )
526 {
527 if (signnegative(st0_ptr))
528 {
529 arith_invalid(0); /* sqrt(negative) is invalid */
530 return;
531 }
532
533 if ( denormal_operand() < 0 )
534 return;
535
536 FPU_to_exp16(st0_ptr, st0_ptr);
537
538 expon = exponent16(st0_ptr);
539
540 goto denormal_arg;
541 }
542
543 single_arg_error(st0_ptr, st0_tag);
544
545}
546
547
548static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
549{
550 int flags, tag;
551
552 if ( st0_tag == TAG_Valid )
553 {
554 u_char sign;
555
556 denormal_arg:
557
558 sign = getsign(st0_ptr);
559
560 if (exponent(st0_ptr) > 63)
561 return;
562
563 if ( st0_tag == TW_Denormal )
564 {
565 if (denormal_operand() < 0 )
566 return;
567 }
568
569 /* Fortunately, this can't overflow to 2^64 */
570 if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) )
571 set_precision_flag(flags);
572
573 setexponent16(st0_ptr, 63);
574 tag = FPU_normalize(st0_ptr);
575 setsign(st0_ptr, sign);
576 FPU_settag0(tag);
577 return;
578 }
579
580 if ( st0_tag == TAG_Zero )
581 return;
582
583 if ( st0_tag == TAG_Special )
584 st0_tag = FPU_Special(st0_ptr);
585
586 if ( st0_tag == TW_Denormal )
587 goto denormal_arg;
588 else if ( st0_tag == TW_Infinity )
589 return;
590 else
591 single_arg_error(st0_ptr, st0_tag);
592}
593
594
595static int fsin(FPU_REG *st0_ptr, u_char tag)
596{
597 u_char arg_sign = getsign(st0_ptr);
598
599 if ( tag == TAG_Valid )
600 {
601 int q;
602
603 if ( exponent(st0_ptr) > -40 )
604 {
605 if ( (q = trig_arg(st0_ptr, 0)) == -1 )
606 {
607 /* Operand is out of range */
608 return 1;
609 }
610
611 poly_sine(st0_ptr);
612
613 if (q & 2)
614 changesign(st0_ptr);
615
616 setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
617
618 /* We do not really know if up or down */
619 set_precision_flag_up();
620 return 0;
621 }
622 else
623 {
624 /* For a small arg, the result == the argument */
625 set_precision_flag_up(); /* Must be up. */
626 return 0;
627 }
628 }
629
630 if ( tag == TAG_Zero )
631 {
632 setcc(0);
633 return 0;
634 }
635
636 if ( tag == TAG_Special )
637 tag = FPU_Special(st0_ptr);
638
639 if ( tag == TW_Denormal )
640 {
641 if ( denormal_operand() < 0 )
642 return 1;
643
644 /* For a small arg, the result == the argument */
645 /* Underflow may happen */
646 FPU_to_exp16(st0_ptr, st0_ptr);
647
648 tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
649
650 FPU_settag0(tag);
651
652 return 0;
653 }
654 else if ( tag == TW_Infinity )
655 {
656 /* The 80486 treats infinity as an invalid operand */
657 arith_invalid(0);
658 return 1;
659 }
660 else
661 {
662 single_arg_error(st0_ptr, tag);
663 return 1;
664 }
665}
666
667
668static int f_cos(FPU_REG *st0_ptr, u_char tag)
669{
670 u_char st0_sign;
671
672 st0_sign = getsign(st0_ptr);
673
674 if ( tag == TAG_Valid )
675 {
676 int q;
677
678 if ( exponent(st0_ptr) > -40 )
679 {
680 if ( (exponent(st0_ptr) < 0)
681 || ((exponent(st0_ptr) == 0)
682 && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) )
683 {
684 poly_cos(st0_ptr);
685
686 /* We do not really know if up or down */
687 set_precision_flag_down();
688
689 return 0;
690 }
691 else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 )
692 {
693 poly_sine(st0_ptr);
694
695 if ((q+1) & 2)
696 changesign(st0_ptr);
697
698 /* We do not really know if up or down */
699 set_precision_flag_down();
700
701 return 0;
702 }
703 else
704 {
705 /* Operand is out of range */
706 return 1;
707 }
708 }
709 else
710 {
711 denormal_arg:
712
713 setcc(0);
714 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
715#ifdef PECULIAR_486
716 set_precision_flag_down(); /* 80486 appears to do this. */
717#else
718 set_precision_flag_up(); /* Must be up. */
719#endif /* PECULIAR_486 */
720 return 0;
721 }
722 }
723 else if ( tag == TAG_Zero )
724 {
725 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
726 setcc(0);
727 return 0;
728 }
729
730 if ( tag == TAG_Special )
731 tag = FPU_Special(st0_ptr);
732
733 if ( tag == TW_Denormal )
734 {
735 if ( denormal_operand() < 0 )
736 return 1;
737
738 goto denormal_arg;
739 }
740 else if ( tag == TW_Infinity )
741 {
742 /* The 80486 treats infinity as an invalid operand */
743 arith_invalid(0);
744 return 1;
745 }
746 else
747 {
748 single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */
749 return 1;
750 }
751}
752
753
754static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
755{
756 f_cos(st0_ptr, st0_tag);
757}
758
759
760static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
761{
762 FPU_REG *st_new_ptr;
763 FPU_REG arg;
764 u_char tag;
765
766 /* Stack underflow has higher priority */
767 if ( st0_tag == TAG_Empty )
768 {
769 FPU_stack_underflow(); /* Puts a QNaN in st(0) */
770 if ( control_word & CW_Invalid )
771 {
772 st_new_ptr = &st(-1);
773 push();
774 FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
775 }
776 return;
777 }
778
779 if ( STACK_OVERFLOW )
780 { FPU_stack_overflow(); return; }
781
782 if ( st0_tag == TAG_Special )
783 tag = FPU_Special(st0_ptr);
784 else
785 tag = st0_tag;
786
787 if ( tag == TW_NaN )
788 {
789 single_arg_2_error(st0_ptr, TW_NaN);
790 return;
791 }
792 else if ( tag == TW_Infinity )
793 {
794 /* The 80486 treats infinity as an invalid operand */
795 if ( arith_invalid(0) >= 0 )
796 {
797 /* Masked response */
798 push();
799 arith_invalid(0);
800 }
801 return;
802 }
803
804 reg_copy(st0_ptr, &arg);
805 if ( !fsin(st0_ptr, st0_tag) )
806 {
807 push();
808 FPU_copy_to_reg0(&arg, st0_tag);
809 f_cos(&st(0), st0_tag);
810 }
811 else
812 {
813 /* An error, so restore st(0) */
814 FPU_copy_to_reg0(&arg, st0_tag);
815 }
816}
817
818
819/*---------------------------------------------------------------------------*/
820/* The following all require two arguments: st(0) and st(1) */
821
822/* A lean, mean kernel for the fprem instructions. This relies upon
823 the division and rounding to an integer in do_fprem giving an
824 exact result. Because of this, rem_kernel() needs to deal only with
825 the least significant 64 bits, the more significant bits of the
826 result must be zero.
827 */
828static void rem_kernel(unsigned long long st0, unsigned long long *y,
829 unsigned long long st1,
830 unsigned long long q, int n)
831{
832 int dummy;
833 unsigned long long x;
834
835 x = st0 << n;
836
837 /* Do the required multiplication and subtraction in the one operation */
838
839 /* lsw x -= lsw st1 * lsw q */
840 asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1"
841 :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]),
842 "=a" (dummy)
843 :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0])
844 :"%dx");
845 /* msw x -= msw st1 * lsw q */
846 asm volatile ("mull %3; subl %%eax,%0"
847 :"=m" (((unsigned *)&x)[1]), "=a" (dummy)
848 :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0])
849 :"%dx");
850 /* msw x -= lsw st1 * msw q */
851 asm volatile ("mull %3; subl %%eax,%0"
852 :"=m" (((unsigned *)&x)[1]), "=a" (dummy)
853 :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1])
854 :"%dx");
855
856 *y = x;
857}
858
859
860/* Remainder of st(0) / st(1) */
861/* This routine produces exact results, i.e. there is never any
862 rounding or truncation, etc of the result. */
863static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
864{
865 FPU_REG *st1_ptr = &st(1);
866 u_char st1_tag = FPU_gettagi(1);
867
868 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
869 {
870 FPU_REG tmp, st0, st1;
871 u_char st0_sign, st1_sign;
872 u_char tmptag;
873 int tag;
874 int old_cw;
875 int expdif;
876 long long q;
877 unsigned short saved_status;
878 int cc;
879
880 fprem_valid:
881 /* Convert registers for internal use. */
882 st0_sign = FPU_to_exp16(st0_ptr, &st0);
883 st1_sign = FPU_to_exp16(st1_ptr, &st1);
884 expdif = exponent16(&st0) - exponent16(&st1);
885
886 old_cw = control_word;
887 cc = 0;
888
889 /* We want the status following the denorm tests, but don't want
890 the status changed by the arithmetic operations. */
891 saved_status = partial_status;
892 control_word &= ~CW_RC;
893 control_word |= RC_CHOP;
894
895 if ( expdif < 64 )
896 {
897 /* This should be the most common case */
898
899 if ( expdif > -2 )
900 {
901 u_char sign = st0_sign ^ st1_sign;
902 tag = FPU_u_div(&st0, &st1, &tmp,
903 PR_64_BITS | RC_CHOP | 0x3f,
904 sign);
905 setsign(&tmp, sign);
906
907 if ( exponent(&tmp) >= 0 )
908 {
909 FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
910 overflow to 2^64 */
911 q = significand(&tmp);
912
913 rem_kernel(significand(&st0),
914 &significand(&tmp),
915 significand(&st1),
916 q, expdif);
917
918 setexponent16(&tmp, exponent16(&st1));
919 }
920 else
921 {
922 reg_copy(&st0, &tmp);
923 q = 0;
924 }
925
926 if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
927 {
928 /* We may need to subtract st(1) once more,
929 to get a result <= 1/2 of st(1). */
930 unsigned long long x;
931 expdif = exponent16(&st1) - exponent16(&tmp);
932 if ( expdif <= 1 )
933 {
934 if ( expdif == 0 )
935 x = significand(&st1) - significand(&tmp);
936 else /* expdif is 1 */
937 x = (significand(&st1) << 1) - significand(&tmp);
938 if ( (x < significand(&tmp)) ||
939 /* or equi-distant (from 0 & st(1)) and q is odd */
940 ((x == significand(&tmp)) && (q & 1) ) )
941 {
942 st0_sign = ! st0_sign;
943 significand(&tmp) = x;
944 q++;
945 }
946 }
947 }
948
949 if (q & 4) cc |= SW_C0;
950 if (q & 2) cc |= SW_C3;
951 if (q & 1) cc |= SW_C1;
952 }
953 else
954 {
955 control_word = old_cw;
956 setcc(0);
957 return;
958 }
959 }
960 else
961 {
962 /* There is a large exponent difference ( >= 64 ) */
963 /* To make much sense, the code in this section should
964 be done at high precision. */
965 int exp_1, N;
966 u_char sign;
967
968 /* prevent overflow here */
969 /* N is 'a number between 32 and 63' (p26-113) */
970 reg_copy(&st0, &tmp);
971 tmptag = st0_tag;
972 N = (expdif & 0x0000001f) + 32; /* This choice gives results
973 identical to an AMD 486 */
974 setexponent16(&tmp, N);
975 exp_1 = exponent16(&st1);
976 setexponent16(&st1, 0);
977 expdif -= N;
978
979 sign = getsign(&tmp) ^ st1_sign;
980 tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
981 sign);
982 setsign(&tmp, sign);
983
984 FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
985 overflow to 2^64 */
986
987 rem_kernel(significand(&st0),
988 &significand(&tmp),
989 significand(&st1),
990 significand(&tmp),
991 exponent(&tmp)
992 );
993 setexponent16(&tmp, exp_1 + expdif);
994
995 /* It is possible for the operation to be complete here.
996 What does the IEEE standard say? The Intel 80486 manual
997 implies that the operation will never be completed at this
998 point, and the behaviour of a real 80486 confirms this.
999 */
1000 if ( !(tmp.sigh | tmp.sigl) )
1001 {
1002 /* The result is zero */
1003 control_word = old_cw;
1004 partial_status = saved_status;
1005 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
1006 setsign(&st0, st0_sign);
1007#ifdef PECULIAR_486
1008 setcc(SW_C2);
1009#else
1010 setcc(0);
1011#endif /* PECULIAR_486 */
1012 return;
1013 }
1014 cc = SW_C2;
1015 }
1016
1017 control_word = old_cw;
1018 partial_status = saved_status;
1019 tag = FPU_normalize_nuo(&tmp);
1020 reg_copy(&tmp, st0_ptr);
1021
1022 /* The only condition to be looked for is underflow,
1023 and it can occur here only if underflow is unmasked. */
1024 if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
1025 && !(control_word & CW_Underflow) )
1026 {
1027 setcc(cc);
1028 tag = arith_underflow(st0_ptr);
1029 setsign(st0_ptr, st0_sign);
1030 FPU_settag0(tag);
1031 return;
1032 }
1033 else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) )
1034 {
1035 stdexp(st0_ptr);
1036 setsign(st0_ptr, st0_sign);
1037 }
1038 else
1039 {
1040 tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
1041 }
1042 FPU_settag0(tag);
1043 setcc(cc);
1044
1045 return;
1046 }
1047
1048 if ( st0_tag == TAG_Special )
1049 st0_tag = FPU_Special(st0_ptr);
1050 if ( st1_tag == TAG_Special )
1051 st1_tag = FPU_Special(st1_ptr);
1052
1053 if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
1054 || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
1055 || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
1056 {
1057 if ( denormal_operand() < 0 )
1058 return;
1059 goto fprem_valid;
1060 }
1061 else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
1062 {
1063 FPU_stack_underflow();
1064 return;
1065 }
1066 else if ( st0_tag == TAG_Zero )
1067 {
1068 if ( st1_tag == TAG_Valid )
1069 {
1070 setcc(0); return;
1071 }
1072 else if ( st1_tag == TW_Denormal )
1073 {
1074 if ( denormal_operand() < 0 )
1075 return;
1076 setcc(0); return;
1077 }
1078 else if ( st1_tag == TAG_Zero )
1079 { arith_invalid(0); return; } /* fprem(?,0) always invalid */
1080 else if ( st1_tag == TW_Infinity )
1081 { setcc(0); return; }
1082 }
1083 else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
1084 {
1085 if ( st1_tag == TAG_Zero )
1086 {
1087 arith_invalid(0); /* fprem(Valid,Zero) is invalid */
1088 return;
1089 }
1090 else if ( st1_tag != TW_NaN )
1091 {
1092 if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal))
1093 && (denormal_operand() < 0) )
1094 return;
1095
1096 if ( st1_tag == TW_Infinity )
1097 {
1098 /* fprem(Valid,Infinity) is o.k. */
1099 setcc(0); return;
1100 }
1101 }
1102 }
1103 else if ( st0_tag == TW_Infinity )
1104 {
1105 if ( st1_tag != TW_NaN )
1106 {
1107 arith_invalid(0); /* fprem(Infinity,?) is invalid */
1108 return;
1109 }
1110 }
1111
1112 /* One of the registers must contain a NaN if we got here. */
1113
1114#ifdef PARANOID
1115 if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
1116 EXCEPTION(EX_INTERNAL | 0x118);
1117#endif /* PARANOID */
1118
1119 real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
1120
1121}
1122
1123
1124/* ST(1) <- ST(1) * log ST; pop ST */
1125static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
1126{
1127 FPU_REG *st1_ptr = &st(1), exponent;
1128 u_char st1_tag = FPU_gettagi(1);
1129 u_char sign;
1130 int e, tag;
1131
1132 clear_C1();
1133
1134 if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) )
1135 {
1136 both_valid:
1137 /* Both regs are Valid or Denormal */
1138 if ( signpositive(st0_ptr) )
1139 {
1140 if ( st0_tag == TW_Denormal )
1141 FPU_to_exp16(st0_ptr, st0_ptr);
1142 else
1143 /* Convert st(0) for internal use. */
1144 setexponent16(st0_ptr, exponent(st0_ptr));
1145
1146 if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
1147 {
1148 /* Special case. The result can be precise. */
1149 u_char esign;
1150 e = exponent16(st0_ptr);
1151 if ( e >= 0 )
1152 {
1153 exponent.sigh = e;
1154 esign = SIGN_POS;
1155 }
1156 else
1157 {
1158 exponent.sigh = -e;
1159 esign = SIGN_NEG;
1160 }
1161 exponent.sigl = 0;
1162 setexponent16(&exponent, 31);
1163 tag = FPU_normalize_nuo(&exponent);
1164 stdexp(&exponent);
1165 setsign(&exponent, esign);
1166 tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION);
1167 if ( tag >= 0 )
1168 FPU_settagi(1, tag);
1169 }
1170 else
1171 {
1172 /* The usual case */
1173 sign = getsign(st1_ptr);
1174 if ( st1_tag == TW_Denormal )
1175 FPU_to_exp16(st1_ptr, st1_ptr);
1176 else
1177 /* Convert st(1) for internal use. */
1178 setexponent16(st1_ptr, exponent(st1_ptr));
1179 poly_l2(st0_ptr, st1_ptr, sign);
1180 }
1181 }
1182 else
1183 {
1184 /* negative */
1185 if ( arith_invalid(1) < 0 )
1186 return;
1187 }
1188
1189 FPU_pop();
1190
1191 return;
1192 }
1193
1194 if ( st0_tag == TAG_Special )
1195 st0_tag = FPU_Special(st0_ptr);
1196 if ( st1_tag == TAG_Special )
1197 st1_tag = FPU_Special(st1_ptr);
1198
1199 if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
1200 {
1201 FPU_stack_underflow_pop(1);
1202 return;
1203 }
1204 else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) )
1205 {
1206 if ( st0_tag == TAG_Zero )
1207 {
1208 if ( st1_tag == TAG_Zero )
1209 {
1210 /* Both args zero is invalid */
1211 if ( arith_invalid(1) < 0 )
1212 return;
1213 }
1214 else
1215 {
1216 u_char sign;
1217 sign = getsign(st1_ptr)^SIGN_NEG;
1218 if ( FPU_divide_by_zero(1, sign) < 0 )
1219 return;
1220
1221 setsign(st1_ptr, sign);
1222 }
1223 }
1224 else if ( st1_tag == TAG_Zero )
1225 {
1226 /* st(1) contains zero, st(0) valid <> 0 */
1227 /* Zero is the valid answer */
1228 sign = getsign(st1_ptr);
1229
1230 if ( signnegative(st0_ptr) )
1231 {
1232 /* log(negative) */
1233 if ( arith_invalid(1) < 0 )
1234 return;
1235 }
1236 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1237 return;
1238 else
1239 {
1240 if ( exponent(st0_ptr) < 0 )
1241 sign ^= SIGN_NEG;
1242
1243 FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
1244 setsign(st1_ptr, sign);
1245 }
1246 }
1247 else
1248 {
1249 /* One or both operands are denormals. */
1250 if ( denormal_operand() < 0 )
1251 return;
1252 goto both_valid;
1253 }
1254 }
1255 else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
1256 {
1257 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1258 return;
1259 }
1260 /* One or both arg must be an infinity */
1261 else if ( st0_tag == TW_Infinity )
1262 {
1263 if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) )
1264 {
1265 /* log(-infinity) or 0*log(infinity) */
1266 if ( arith_invalid(1) < 0 )
1267 return;
1268 }
1269 else
1270 {
1271 u_char sign = getsign(st1_ptr);
1272
1273 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1274 return;
1275
1276 FPU_copy_to_reg1(&CONST_INF, TAG_Special);
1277 setsign(st1_ptr, sign);
1278 }
1279 }
1280 /* st(1) must be infinity here */
1281 else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
1282 && ( signpositive(st0_ptr) ) )
1283 {
1284 if ( exponent(st0_ptr) >= 0 )
1285 {
1286 if ( (exponent(st0_ptr) == 0) &&
1287 (st0_ptr->sigh == 0x80000000) &&
1288 (st0_ptr->sigl == 0) )
1289 {
1290 /* st(0) holds 1.0 */
1291 /* infinity*log(1) */
1292 if ( arith_invalid(1) < 0 )
1293 return;
1294 }
1295 /* else st(0) is positive and > 1.0 */
1296 }
1297 else
1298 {
1299 /* st(0) is positive and < 1.0 */
1300
1301 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1302 return;
1303
1304 changesign(st1_ptr);
1305 }
1306 }
1307 else
1308 {
1309 /* st(0) must be zero or negative */
1310 if ( st0_tag == TAG_Zero )
1311 {
1312 /* This should be invalid, but a real 80486 is happy with it. */
1313
1314#ifndef PECULIAR_486
1315 sign = getsign(st1_ptr);
1316 if ( FPU_divide_by_zero(1, sign) < 0 )
1317 return;
1318#endif /* PECULIAR_486 */
1319
1320 changesign(st1_ptr);
1321 }
1322 else if ( arith_invalid(1) < 0 ) /* log(negative) */
1323 return;
1324 }
1325
1326 FPU_pop();
1327}
1328
1329
1330static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
1331{
1332 FPU_REG *st1_ptr = &st(1);
1333 u_char st1_tag = FPU_gettagi(1);
1334 int tag;
1335
1336 clear_C1();
1337 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
1338 {
1339 valid_atan:
1340
1341 poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
1342
1343 FPU_pop();
1344
1345 return;
1346 }
1347
1348 if ( st0_tag == TAG_Special )
1349 st0_tag = FPU_Special(st0_ptr);
1350 if ( st1_tag == TAG_Special )
1351 st1_tag = FPU_Special(st1_ptr);
1352
1353 if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
1354 || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
1355 || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
1356 {
1357 if ( denormal_operand() < 0 )
1358 return;
1359
1360 goto valid_atan;
1361 }
1362 else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
1363 {
1364 FPU_stack_underflow_pop(1);
1365 return;
1366 }
1367 else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
1368 {
1369 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 )
1370 FPU_pop();
1371 return;
1372 }
1373 else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
1374 {
1375 u_char sign = getsign(st1_ptr);
1376 if ( st0_tag == TW_Infinity )
1377 {
1378 if ( st1_tag == TW_Infinity )
1379 {
1380 if ( signpositive(st0_ptr) )
1381 {
1382 FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
1383 }
1384 else
1385 {
1386 setpositive(st1_ptr);
1387 tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr,
1388 FULL_PRECISION, SIGN_POS,
1389 exponent(&CONST_PI4), exponent(&CONST_PI2));
1390 if ( tag >= 0 )
1391 FPU_settagi(1, tag);
1392 }
1393 }
1394 else
1395 {
1396 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1397 return;
1398
1399 if ( signpositive(st0_ptr) )
1400 {
1401 FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
1402 setsign(st1_ptr, sign); /* An 80486 preserves the sign */
1403 FPU_pop();
1404 return;
1405 }
1406 else
1407 {
1408 FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
1409 }
1410 }
1411 }
1412 else
1413 {
1414 /* st(1) is infinity, st(0) not infinity */
1415 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1416 return;
1417
1418 FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
1419 }
1420 setsign(st1_ptr, sign);
1421 }
1422 else if ( st1_tag == TAG_Zero )
1423 {
1424 /* st(0) must be valid or zero */
1425 u_char sign = getsign(st1_ptr);
1426
1427 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1428 return;
1429
1430 if ( signpositive(st0_ptr) )
1431 {
1432 /* An 80486 preserves the sign */
1433 FPU_pop();
1434 return;
1435 }
1436
1437 FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
1438 setsign(st1_ptr, sign);
1439 }
1440 else if ( st0_tag == TAG_Zero )
1441 {
1442 /* st(1) must be TAG_Valid here */
1443 u_char sign = getsign(st1_ptr);
1444
1445 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1446 return;
1447
1448 FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
1449 setsign(st1_ptr, sign);
1450 }
1451#ifdef PARANOID
1452 else
1453 EXCEPTION(EX_INTERNAL | 0x125);
1454#endif /* PARANOID */
1455
1456 FPU_pop();
1457 set_precision_flag_up(); /* We do not really know if up or down */
1458}
1459
1460
1461static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
1462{
1463 do_fprem(st0_ptr, st0_tag, RC_CHOP);
1464}
1465
1466
1467static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
1468{
1469 do_fprem(st0_ptr, st0_tag, RC_RND);
1470}
1471
1472
1473static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
1474{
1475 u_char sign, sign1;
1476 FPU_REG *st1_ptr = &st(1), a, b;
1477 u_char st1_tag = FPU_gettagi(1);
1478
1479 clear_C1();
1480 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
1481 {
1482 valid_yl2xp1:
1483
1484 sign = getsign(st0_ptr);
1485 sign1 = getsign(st1_ptr);
1486
1487 FPU_to_exp16(st0_ptr, &a);
1488 FPU_to_exp16(st1_ptr, &b);
1489
1490 if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) )
1491 return;
1492
1493 FPU_pop();
1494 return;
1495 }
1496
1497 if ( st0_tag == TAG_Special )
1498 st0_tag = FPU_Special(st0_ptr);
1499 if ( st1_tag == TAG_Special )
1500 st1_tag = FPU_Special(st1_ptr);
1501
1502 if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
1503 || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
1504 || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
1505 {
1506 if ( denormal_operand() < 0 )
1507 return;
1508
1509 goto valid_yl2xp1;
1510 }
1511 else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) )
1512 {
1513 FPU_stack_underflow_pop(1);
1514 return;
1515 }
1516 else if ( st0_tag == TAG_Zero )
1517 {
1518 switch ( st1_tag )
1519 {
1520 case TW_Denormal:
1521 if ( denormal_operand() < 0 )
1522 return;
1523
1524 case TAG_Zero:
1525 case TAG_Valid:
1526 setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
1527 FPU_copy_to_reg1(st0_ptr, st0_tag);
1528 break;
1529
1530 case TW_Infinity:
1531 /* Infinity*log(1) */
1532 if ( arith_invalid(1) < 0 )
1533 return;
1534 break;
1535
1536 case TW_NaN:
1537 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1538 return;
1539 break;
1540
1541 default:
1542#ifdef PARANOID
1543 EXCEPTION(EX_INTERNAL | 0x116);
1544 return;
1545#endif /* PARANOID */
1546 break;
1547 }
1548 }
1549 else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
1550 {
1551 switch ( st1_tag )
1552 {
1553 case TAG_Zero:
1554 if ( signnegative(st0_ptr) )
1555 {
1556 if ( exponent(st0_ptr) >= 0 )
1557 {
1558 /* st(0) holds <= -1.0 */
1559#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
1560 changesign(st1_ptr);
1561#else
1562 if ( arith_invalid(1) < 0 )
1563 return;
1564#endif /* PECULIAR_486 */
1565 }
1566 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1567 return;
1568 else
1569 changesign(st1_ptr);
1570 }
1571 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1572 return;
1573 break;
1574
1575 case TW_Infinity:
1576 if ( signnegative(st0_ptr) )
1577 {
1578 if ( (exponent(st0_ptr) >= 0) &&
1579 !((st0_ptr->sigh == 0x80000000) &&
1580 (st0_ptr->sigl == 0)) )
1581 {
1582 /* st(0) holds < -1.0 */
1583#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
1584 changesign(st1_ptr);
1585#else
1586 if ( arith_invalid(1) < 0 ) return;
1587#endif /* PECULIAR_486 */
1588 }
1589 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1590 return;
1591 else
1592 changesign(st1_ptr);
1593 }
1594 else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1595 return;
1596 break;
1597
1598 case TW_NaN:
1599 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1600 return;
1601 }
1602
1603 }
1604 else if ( st0_tag == TW_NaN )
1605 {
1606 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1607 return;
1608 }
1609 else if ( st0_tag == TW_Infinity )
1610 {
1611 if ( st1_tag == TW_NaN )
1612 {
1613 if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
1614 return;
1615 }
1616 else if ( signnegative(st0_ptr) )
1617 {
1618#ifndef PECULIAR_486
1619 /* This should have higher priority than denormals, but... */
1620 if ( arith_invalid(1) < 0 ) /* log(-infinity) */
1621 return;
1622#endif /* PECULIAR_486 */
1623 if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1624 return;
1625#ifdef PECULIAR_486
1626 /* Denormal operands actually get higher priority */
1627 if ( arith_invalid(1) < 0 ) /* log(-infinity) */
1628 return;
1629#endif /* PECULIAR_486 */
1630 }
1631 else if ( st1_tag == TAG_Zero )
1632 {
1633 /* log(infinity) */
1634 if ( arith_invalid(1) < 0 )
1635 return;
1636 }
1637
1638 /* st(1) must be valid here. */
1639
1640 else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
1641 return;
1642
1643 /* The Manual says that log(Infinity) is invalid, but a real
1644 80486 sensibly says that it is o.k. */
1645 else
1646 {
1647 u_char sign = getsign(st1_ptr);
1648 FPU_copy_to_reg1(&CONST_INF, TAG_Special);
1649 setsign(st1_ptr, sign);
1650 }
1651 }
1652#ifdef PARANOID
1653 else
1654 {
1655 EXCEPTION(EX_INTERNAL | 0x117);
1656 return;
1657 }
1658#endif /* PARANOID */
1659
1660 FPU_pop();
1661 return;
1662
1663}
1664
1665
1666static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
1667{
1668 FPU_REG *st1_ptr = &st(1);
1669 u_char st1_tag = FPU_gettagi(1);
1670 int old_cw = control_word;
1671 u_char sign = getsign(st0_ptr);
1672
1673 clear_C1();
1674 if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
1675 {
1676 long scale;
1677 FPU_REG tmp;
1678
1679 /* Convert register for internal use. */
1680 setexponent16(st0_ptr, exponent(st0_ptr));
1681
1682 valid_scale:
1683
1684 if ( exponent(st1_ptr) > 30 )
1685 {
1686 /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
1687
1688 if ( signpositive(st1_ptr) )
1689 {
1690 EXCEPTION(EX_Overflow);
1691 FPU_copy_to_reg0(&CONST_INF, TAG_Special);
1692 }
1693 else
1694 {
1695 EXCEPTION(EX_Underflow);
1696 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
1697 }
1698 setsign(st0_ptr, sign);
1699 return;
1700 }
1701
1702 control_word &= ~CW_RC;
1703 control_word |= RC_CHOP;
1704 reg_copy(st1_ptr, &tmp);
1705 FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */
1706 control_word = old_cw;
1707 scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
1708 scale += exponent16(st0_ptr);
1709
1710 setexponent16(st0_ptr, scale);
1711
1712 /* Use FPU_round() to properly detect under/overflow etc */
1713 FPU_round(st0_ptr, 0, 0, control_word, sign);
1714
1715 return;
1716 }
1717
1718 if ( st0_tag == TAG_Special )
1719 st0_tag = FPU_Special(st0_ptr);
1720 if ( st1_tag == TAG_Special )
1721 st1_tag = FPU_Special(st1_ptr);
1722
1723 if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
1724 {
1725 switch ( st1_tag )
1726 {
1727 case TAG_Valid:
1728 /* st(0) must be a denormal */
1729 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1730 return;
1731
1732 FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */
1733 goto valid_scale;
1734
1735 case TAG_Zero:
1736 if ( st0_tag == TW_Denormal )
1737 denormal_operand();
1738 return;
1739
1740 case TW_Denormal:
1741 denormal_operand();
1742 return;
1743
1744 case TW_Infinity:
1745 if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
1746 return;
1747
1748 if ( signpositive(st1_ptr) )
1749 FPU_copy_to_reg0(&CONST_INF, TAG_Special);
1750 else
1751 FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
1752 setsign(st0_ptr, sign);
1753 return;
1754
1755 case TW_NaN:
1756 real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
1757 return;
1758 }
1759 }
1760 else if ( st0_tag == TAG_Zero )
1761 {
1762 switch ( st1_tag )
1763 {
1764 case TAG_Valid:
1765 case TAG_Zero:
1766 return;
1767
1768 case TW_Denormal:
1769 denormal_operand();
1770 return;
1771
1772 case TW_Infinity:
1773 if ( signpositive(st1_ptr) )
1774 arith_invalid(0); /* Zero scaled by +Infinity */
1775 return;
1776
1777 case TW_NaN:
1778 real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
1779 return;
1780 }
1781 }
1782 else if ( st0_tag == TW_Infinity )
1783 {
1784 switch ( st1_tag )
1785 {
1786 case TAG_Valid:
1787 case TAG_Zero:
1788 return;
1789
1790 case TW_Denormal:
1791 denormal_operand();
1792 return;
1793
1794 case TW_Infinity:
1795 if ( signnegative(st1_ptr) )
1796 arith_invalid(0); /* Infinity scaled by -Infinity */
1797 return;
1798
1799 case TW_NaN:
1800 real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
1801 return;
1802 }
1803 }
1804 else if ( st0_tag == TW_NaN )
1805 {
1806 if ( st1_tag != TAG_Empty )
1807 { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; }
1808 }
1809
1810#ifdef PARANOID
1811 if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) )
1812 {
1813 EXCEPTION(EX_INTERNAL | 0x115);
1814 return;
1815 }
1816#endif
1817
1818 /* At least one of st(0), st(1) must be empty */
1819 FPU_stack_underflow();
1820
1821}
1822
1823
1824/*---------------------------------------------------------------------------*/
1825
1826static FUNC_ST0 const trig_table_a[] = {
1827 f2xm1, fyl2x, fptan, fpatan,
1828 fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp
1829};
1830
1831void FPU_triga(void)
1832{
1833 (trig_table_a[FPU_rm])(&st(0), FPU_gettag0());
1834}
1835
1836
1837static FUNC_ST0 const trig_table_b[] =
1838 {
1839 fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos
1840 };
1841
1842void FPU_trigb(void)
1843{
1844 (trig_table_b[FPU_rm])(&st(0), FPU_gettag0());
1845}
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
new file mode 100644
index 000000000000..91175738e948
--- /dev/null
+++ b/arch/i386/math-emu/get_address.c
@@ -0,0 +1,449 @@
1/*---------------------------------------------------------------------------+
2 | get_address.c |
3 | |
4 | Get the effective address from an FPU instruction. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | Note: |
15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/
19
20
21#include <linux/stddef.h>
22
23#include <asm/uaccess.h>
24#include <asm/desc.h>
25
26#include "fpu_system.h"
27#include "exception.h"
28#include "fpu_emu.h"
29
30
31#define FPU_WRITE_BIT 0x10
32
33static int reg_offset[] = {
34 offsetof(struct info,___eax),
35 offsetof(struct info,___ecx),
36 offsetof(struct info,___edx),
37 offsetof(struct info,___ebx),
38 offsetof(struct info,___esp),
39 offsetof(struct info,___ebp),
40 offsetof(struct info,___esi),
41 offsetof(struct info,___edi)
42};
43
44#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
45
46static int reg_offset_vm86[] = {
47 offsetof(struct info,___cs),
48 offsetof(struct info,___vm86_ds),
49 offsetof(struct info,___vm86_es),
50 offsetof(struct info,___vm86_fs),
51 offsetof(struct info,___vm86_gs),
52 offsetof(struct info,___ss),
53 offsetof(struct info,___vm86_ds)
54 };
55
56#define VM86_REG_(x) (*(unsigned short *) \
57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
58
59/* These are dummy, fs and gs are not saved on the stack. */
60#define ___FS ___ds
61#define ___GS ___ds
62
63static int reg_offset_pm[] = {
64 offsetof(struct info,___cs),
65 offsetof(struct info,___ds),
66 offsetof(struct info,___es),
67 offsetof(struct info,___FS),
68 offsetof(struct info,___GS),
69 offsetof(struct info,___ss),
70 offsetof(struct info,___ds)
71 };
72
73#define PM_REG_(x) (*(unsigned short *) \
74 (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
75
76
77/* Decode the SIB byte. This function assumes mod != 0 */
78static int sib(int mod, unsigned long *fpu_eip)
79{
80 u_char ss,index,base;
81 long offset;
82
83 RE_ENTRANT_CHECK_OFF;
84 FPU_code_access_ok(1);
85 FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */
86 RE_ENTRANT_CHECK_ON;
87 (*fpu_eip)++;
88 ss = base >> 6;
89 index = (base >> 3) & 7;
90 base &= 7;
91
92 if ((mod == 0) && (base == 5))
93 offset = 0; /* No base register */
94 else
95 offset = REG_(base);
96
97 if (index == 4)
98 {
99 /* No index register */
100 /* A non-zero ss is illegal */
101 if ( ss )
102 EXCEPTION(EX_Invalid);
103 }
104 else
105 {
106 offset += (REG_(index)) << ss;
107 }
108
109 if (mod == 1)
110 {
111 /* 8 bit signed displacement */
112 long displacement;
113 RE_ENTRANT_CHECK_OFF;
114 FPU_code_access_ok(1);
115 FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
116 offset += displacement;
117 RE_ENTRANT_CHECK_ON;
118 (*fpu_eip)++;
119 }
120 else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
121 {
122 /* 32 bit displacement */
123 long displacement;
124 RE_ENTRANT_CHECK_OFF;
125 FPU_code_access_ok(4);
126 FPU_get_user(displacement, (long __user *) (*fpu_eip));
127 offset += displacement;
128 RE_ENTRANT_CHECK_ON;
129 (*fpu_eip) += 4;
130 }
131
132 return offset;
133}
134
135
136static unsigned long vm86_segment(u_char segment,
137 struct address *addr)
138{
139 segment--;
140#ifdef PARANOID
141 if ( segment > PREFIX_SS_ )
142 {
143 EXCEPTION(EX_INTERNAL|0x130);
144 math_abort(FPU_info,SIGSEGV);
145 }
146#endif /* PARANOID */
147 addr->selector = VM86_REG_(segment);
148 return (unsigned long)VM86_REG_(segment) << 4;
149}
150
151
152/* This should work for 16 and 32 bit protected mode. */
153static long pm_address(u_char FPU_modrm, u_char segment,
154 struct address *addr, long offset)
155{
156 struct desc_struct descriptor;
157 unsigned long base_address, limit, address, seg_top;
158 unsigned short selector;
159
160 segment--;
161
162#ifdef PARANOID
163 /* segment is unsigned, so this also detects if segment was 0: */
164 if ( segment > PREFIX_SS_ )
165 {
166 EXCEPTION(EX_INTERNAL|0x132);
167 math_abort(FPU_info,SIGSEGV);
168 }
169#endif /* PARANOID */
170
171 switch ( segment )
172 {
173 /* fs and gs aren't used by the kernel, so they still have their
174 user-space values. */
175 case PREFIX_FS_-1:
176 /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
177 in the assembler statement. */
178
179 __asm__("mov %%fs,%0":"=r" (selector));
180 addr->selector = selector;
181 break;
182 case PREFIX_GS_-1:
183 /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
184 in the assembler statement. */
185 __asm__("mov %%gs,%0":"=r" (selector));
186 addr->selector = selector;
187 break;
188 default:
189 addr->selector = PM_REG_(segment);
190 }
191
192 descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
193 base_address = SEG_BASE_ADDR(descriptor);
194 address = base_address + offset;
195 limit = base_address
196 + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
197 if ( limit < base_address ) limit = 0xffffffff;
198
199 if ( SEG_EXPAND_DOWN(descriptor) )
200 {
201 if ( SEG_G_BIT(descriptor) )
202 seg_top = 0xffffffff;
203 else
204 {
205 seg_top = base_address + (1 << 20);
206 if ( seg_top < base_address ) seg_top = 0xffffffff;
207 }
208 access_limit =
209 (address <= limit) || (address >= seg_top) ? 0 :
210 ((seg_top-address) >= 255 ? 255 : seg_top-address);
211 }
212 else
213 {
214 access_limit =
215 (address > limit) || (address < base_address) ? 0 :
216 ((limit-address) >= 254 ? 255 : limit-address+1);
217 }
218 if ( SEG_EXECUTE_ONLY(descriptor) ||
219 (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
220 {
221 access_limit = 0;
222 }
223 return address;
224}
225
226
227/*
228 MOD R/M byte: MOD == 3 has a special use for the FPU
229 SIB byte used iff R/M = 100b
230
231 7 6 5 4 3 2 1 0
232 ..... ......... .........
233 MOD OPCODE(2) R/M
234
235
236 SIB byte
237
238 7 6 5 4 3 2 1 0
239 ..... ......... .........
240 SS INDEX BASE
241
242*/
243
244void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
245 struct address *addr,
246 fpu_addr_modes addr_modes)
247{
248 u_char mod;
249 unsigned rm = FPU_modrm & 7;
250 long *cpu_reg_ptr;
251 int address = 0; /* Initialized just to stop compiler warnings. */
252
253 /* Memory accessed via the cs selector is write protected
254 in `non-segmented' 32 bit protected mode. */
255 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
256 && (addr_modes.override.segment == PREFIX_CS_) )
257 {
258 math_abort(FPU_info,SIGSEGV);
259 }
260
261 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
262
263 mod = (FPU_modrm >> 6) & 3;
264
265 if (rm == 4 && mod != 3)
266 {
267 address = sib(mod, fpu_eip);
268 }
269 else
270 {
271 cpu_reg_ptr = & REG_(rm);
272 switch (mod)
273 {
274 case 0:
275 if (rm == 5)
276 {
277 /* Special case: disp32 */
278 RE_ENTRANT_CHECK_OFF;
279 FPU_code_access_ok(4);
280 FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
281 (*fpu_eip) += 4;
282 RE_ENTRANT_CHECK_ON;
283 addr->offset = address;
284 return (void __user *) address;
285 }
286 else
287 {
288 address = *cpu_reg_ptr; /* Just return the contents
289 of the cpu register */
290 addr->offset = address;
291 return (void __user *) address;
292 }
293 case 1:
294 /* 8 bit signed displacement */
295 RE_ENTRANT_CHECK_OFF;
296 FPU_code_access_ok(1);
297 FPU_get_user(address, (signed char __user *) (*fpu_eip));
298 RE_ENTRANT_CHECK_ON;
299 (*fpu_eip)++;
300 break;
301 case 2:
302 /* 32 bit displacement */
303 RE_ENTRANT_CHECK_OFF;
304 FPU_code_access_ok(4);
305 FPU_get_user(address, (long __user *) (*fpu_eip));
306 (*fpu_eip) += 4;
307 RE_ENTRANT_CHECK_ON;
308 break;
309 case 3:
310 /* Not legal for the FPU */
311 EXCEPTION(EX_Invalid);
312 }
313 address += *cpu_reg_ptr;
314 }
315
316 addr->offset = address;
317
318 switch ( addr_modes.default_mode )
319 {
320 case 0:
321 break;
322 case VM86:
323 address += vm86_segment(addr_modes.override.segment, addr);
324 break;
325 case PM16:
326 case SEG32:
327 address = pm_address(FPU_modrm, addr_modes.override.segment,
328 addr, address);
329 break;
330 default:
331 EXCEPTION(EX_INTERNAL|0x133);
332 }
333
334 return (void __user *)address;
335}
336
337
338void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
339 struct address *addr,
340 fpu_addr_modes addr_modes)
341{
342 u_char mod;
343 unsigned rm = FPU_modrm & 7;
344 int address = 0; /* Default used for mod == 0 */
345
346 /* Memory accessed via the cs selector is write protected
347 in `non-segmented' 32 bit protected mode. */
348 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
349 && (addr_modes.override.segment == PREFIX_CS_) )
350 {
351 math_abort(FPU_info,SIGSEGV);
352 }
353
354 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
355
356 mod = (FPU_modrm >> 6) & 3;
357
358 switch (mod)
359 {
360 case 0:
361 if (rm == 6)
362 {
363 /* Special case: disp16 */
364 RE_ENTRANT_CHECK_OFF;
365 FPU_code_access_ok(2);
366 FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
367 (*fpu_eip) += 2;
368 RE_ENTRANT_CHECK_ON;
369 goto add_segment;
370 }
371 break;
372 case 1:
373 /* 8 bit signed displacement */
374 RE_ENTRANT_CHECK_OFF;
375 FPU_code_access_ok(1);
376 FPU_get_user(address, (signed char __user *) (*fpu_eip));
377 RE_ENTRANT_CHECK_ON;
378 (*fpu_eip)++;
379 break;
380 case 2:
381 /* 16 bit displacement */
382 RE_ENTRANT_CHECK_OFF;
383 FPU_code_access_ok(2);
384 FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
385 (*fpu_eip) += 2;
386 RE_ENTRANT_CHECK_ON;
387 break;
388 case 3:
389 /* Not legal for the FPU */
390 EXCEPTION(EX_Invalid);
391 break;
392 }
393 switch ( rm )
394 {
395 case 0:
396 address += FPU_info->___ebx + FPU_info->___esi;
397 break;
398 case 1:
399 address += FPU_info->___ebx + FPU_info->___edi;
400 break;
401 case 2:
402 address += FPU_info->___ebp + FPU_info->___esi;
403 if ( addr_modes.override.segment == PREFIX_DEFAULT )
404 addr_modes.override.segment = PREFIX_SS_;
405 break;
406 case 3:
407 address += FPU_info->___ebp + FPU_info->___edi;
408 if ( addr_modes.override.segment == PREFIX_DEFAULT )
409 addr_modes.override.segment = PREFIX_SS_;
410 break;
411 case 4:
412 address += FPU_info->___esi;
413 break;
414 case 5:
415 address += FPU_info->___edi;
416 break;
417 case 6:
418 address += FPU_info->___ebp;
419 if ( addr_modes.override.segment == PREFIX_DEFAULT )
420 addr_modes.override.segment = PREFIX_SS_;
421 break;
422 case 7:
423 address += FPU_info->___ebx;
424 break;
425 }
426
427 add_segment:
428 address &= 0xffff;
429
430 addr->offset = address;
431
432 switch ( addr_modes.default_mode )
433 {
434 case 0:
435 break;
436 case VM86:
437 address += vm86_segment(addr_modes.override.segment, addr);
438 break;
439 case PM16:
440 case SEG32:
441 address = pm_address(FPU_modrm, addr_modes.override.segment,
442 addr, address);
443 break;
444 default:
445 EXCEPTION(EX_INTERNAL|0x131);
446 }
447
448 return (void __user *)address ;
449}
diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c
new file mode 100644
index 000000000000..85314be2fef8
--- /dev/null
+++ b/arch/i386/math-emu/load_store.c
@@ -0,0 +1,270 @@
1/*---------------------------------------------------------------------------+
2 | load_store.c |
3 | |
4 | This file contains most of the code to interpret the FPU instructions |
5 | which load and store from user memory. |
6 | |
7 | Copyright (C) 1992,1993,1994,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Note: |
16 | The file contains code which accesses user memory. |
17 | Emulator static data may change when user memory is accessed, due to |
18 | other processes using the emulator while swapping is in progress. |
19 +---------------------------------------------------------------------------*/
20
21#include <asm/uaccess.h>
22
23#include "fpu_system.h"
24#include "exception.h"
25#include "fpu_emu.h"
26#include "status_w.h"
27#include "control_w.h"
28
29
30#define _NONE_ 0 /* st0_ptr etc not needed */
31#define _REG0_ 1 /* Will be storing st(0) */
32#define _PUSH_ 3 /* Need to check for space to push onto stack */
33#define _null_ 4 /* Function illegal or not implemented */
34
35#define pop_0() { FPU_settag0(TAG_Empty); top++; }
36
37
38static u_char const type_table[32] = {
39 _PUSH_, _PUSH_, _PUSH_, _PUSH_,
40 _null_, _null_, _null_, _null_,
41 _REG0_, _REG0_, _REG0_, _REG0_,
42 _REG0_, _REG0_, _REG0_, _REG0_,
43 _NONE_, _null_, _NONE_, _PUSH_,
44 _NONE_, _PUSH_, _null_, _PUSH_,
45 _NONE_, _null_, _NONE_, _REG0_,
46 _NONE_, _REG0_, _NONE_, _REG0_
47 };
48
49u_char const data_sizes_16[32] = {
50 4, 4, 8, 2, 0, 0, 0, 0,
51 4, 4, 8, 2, 4, 4, 8, 2,
52 14, 0, 94, 10, 2, 10, 0, 8,
53 14, 0, 94, 10, 2, 10, 2, 8
54};
55
56static u_char const data_sizes_32[32] = {
57 4, 4, 8, 2, 0, 0, 0, 0,
58 4, 4, 8, 2, 4, 4, 8, 2,
59 28, 0,108, 10, 2, 10, 0, 8,
60 28, 0,108, 10, 2, 10, 2, 8
61};
62
63int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
64 void __user *data_address)
65{
66 FPU_REG loaded_data;
67 FPU_REG *st0_ptr;
68 u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */
69 u_char loaded_tag;
70
71 st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
72
73 if ( addr_modes.default_mode & PROTECTED )
74 {
75 if ( addr_modes.default_mode == SEG32 )
76 {
77 if ( access_limit < data_sizes_32[type] )
78 math_abort(FPU_info,SIGSEGV);
79 }
80 else if ( addr_modes.default_mode == PM16 )
81 {
82 if ( access_limit < data_sizes_16[type] )
83 math_abort(FPU_info,SIGSEGV);
84 }
85#ifdef PARANOID
86 else
87 EXCEPTION(EX_INTERNAL|0x140);
88#endif /* PARANOID */
89 }
90
91 switch ( type_table[type] )
92 {
93 case _NONE_:
94 break;
95 case _REG0_:
96 st0_ptr = &st(0); /* Some of these instructions pop after
97 storing */
98 st0_tag = FPU_gettag0();
99 break;
100 case _PUSH_:
101 {
102 if ( FPU_gettagi(-1) != TAG_Empty )
103 { FPU_stack_overflow(); return 0; }
104 top--;
105 st0_ptr = &st(0);
106 }
107 break;
108 case _null_:
109 FPU_illegal();
110 return 0;
111#ifdef PARANOID
112 default:
113 EXCEPTION(EX_INTERNAL|0x141);
114 return 0;
115#endif /* PARANOID */
116 }
117
118 switch ( type )
119 {
120 case 000: /* fld m32real */
121 clear_C1();
122 loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
123 if ( (loaded_tag == TAG_Special)
124 && isNaN(&loaded_data)
125 && (real_1op_NaN(&loaded_data) < 0) )
126 {
127 top++;
128 break;
129 }
130 FPU_copy_to_reg0(&loaded_data, loaded_tag);
131 break;
132 case 001: /* fild m32int */
133 clear_C1();
134 loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
135 FPU_copy_to_reg0(&loaded_data, loaded_tag);
136 break;
137 case 002: /* fld m64real */
138 clear_C1();
139 loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
140 if ( (loaded_tag == TAG_Special)
141 && isNaN(&loaded_data)
142 && (real_1op_NaN(&loaded_data) < 0) )
143 {
144 top++;
145 break;
146 }
147 FPU_copy_to_reg0(&loaded_data, loaded_tag);
148 break;
149 case 003: /* fild m16int */
150 clear_C1();
151 loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
152 FPU_copy_to_reg0(&loaded_data, loaded_tag);
153 break;
154 case 010: /* fst m32real */
155 clear_C1();
156 FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
157 break;
158 case 011: /* fist m32int */
159 clear_C1();
160 FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
161 break;
162 case 012: /* fst m64real */
163 clear_C1();
164 FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
165 break;
166 case 013: /* fist m16int */
167 clear_C1();
168 FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
169 break;
170 case 014: /* fstp m32real */
171 clear_C1();
172 if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
173 pop_0(); /* pop only if the number was actually stored
174 (see the 80486 manual p16-28) */
175 break;
176 case 015: /* fistp m32int */
177 clear_C1();
178 if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
179 pop_0(); /* pop only if the number was actually stored
180 (see the 80486 manual p16-28) */
181 break;
182 case 016: /* fstp m64real */
183 clear_C1();
184 if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
185 pop_0(); /* pop only if the number was actually stored
186 (see the 80486 manual p16-28) */
187 break;
188 case 017: /* fistp m16int */
189 clear_C1();
190 if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
191 pop_0(); /* pop only if the number was actually stored
192 (see the 80486 manual p16-28) */
193 break;
194 case 020: /* fldenv m14/28byte */
195 fldenv(addr_modes, (u_char __user *)data_address);
196 /* Ensure that the values just loaded are not changed by
197 fix-up operations. */
198 return 1;
199 case 022: /* frstor m94/108byte */
200 frstor(addr_modes, (u_char __user *)data_address);
201 /* Ensure that the values just loaded are not changed by
202 fix-up operations. */
203 return 1;
204 case 023: /* fbld m80dec */
205 clear_C1();
206 loaded_tag = FPU_load_bcd((u_char __user *)data_address);
207 FPU_settag0(loaded_tag);
208 break;
209 case 024: /* fldcw */
210 RE_ENTRANT_CHECK_OFF;
211 FPU_access_ok(VERIFY_READ, data_address, 2);
212 FPU_get_user(control_word, (unsigned short __user *) data_address);
213 RE_ENTRANT_CHECK_ON;
214 if ( partial_status & ~control_word & CW_Exceptions )
215 partial_status |= (SW_Summary | SW_Backward);
216 else
217 partial_status &= ~(SW_Summary | SW_Backward);
218#ifdef PECULIAR_486
219 control_word |= 0x40; /* An 80486 appears to always set this bit */
220#endif /* PECULIAR_486 */
221 return 1;
222 case 025: /* fld m80real */
223 clear_C1();
224 loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
225 FPU_settag0(loaded_tag);
226 break;
227 case 027: /* fild m64int */
228 clear_C1();
229 loaded_tag = FPU_load_int64((long long __user *)data_address);
230 FPU_settag0(loaded_tag);
231 break;
232 case 030: /* fstenv m14/28byte */
233 fstenv(addr_modes, (u_char __user *)data_address);
234 return 1;
235 case 032: /* fsave */
236 fsave(addr_modes, (u_char __user *)data_address);
237 return 1;
238 case 033: /* fbstp m80dec */
239 clear_C1();
240 if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
241 pop_0(); /* pop only if the number was actually stored
242 (see the 80486 manual p16-28) */
243 break;
244 case 034: /* fstcw m16int */
245 RE_ENTRANT_CHECK_OFF;
246 FPU_access_ok(VERIFY_WRITE,data_address,2);
247 FPU_put_user(control_word, (unsigned short __user *) data_address);
248 RE_ENTRANT_CHECK_ON;
249 return 1;
250 case 035: /* fstp m80real */
251 clear_C1();
252 if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
253 pop_0(); /* pop only if the number was actually stored
254 (see the 80486 manual p16-28) */
255 break;
256 case 036: /* fstsw m2byte */
257 RE_ENTRANT_CHECK_OFF;
258 FPU_access_ok(VERIFY_WRITE,data_address,2);
259 FPU_put_user(status_word(),(unsigned short __user *) data_address);
260 RE_ENTRANT_CHECK_ON;
261 return 1;
262 case 037: /* fistp m64int */
263 clear_C1();
264 if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
265 pop_0(); /* pop only if the number was actually stored
266 (see the 80486 manual p16-28) */
267 break;
268 }
269 return 0;
270}
diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S
new file mode 100644
index 000000000000..717785a53eb4
--- /dev/null
+++ b/arch/i386/math-emu/mul_Xsig.S
@@ -0,0 +1,176 @@
1/*---------------------------------------------------------------------------+
2 | mul_Xsig.S |
3 | |
4 | Multiply a 12 byte fixed point number by another fixed point number. |
5 | |
6 | Copyright (C) 1992,1994,1995 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Call from C as: |
11 | void mul32_Xsig(Xsig *x, unsigned b) |
12 | |
13 | void mul64_Xsig(Xsig *x, unsigned long long *b) |
14 | |
15 | void mul_Xsig_Xsig(Xsig *x, unsigned *b) |
16 | |
17 | The result is neither rounded nor normalized, and the ls bit or so may |
18 | be wrong. |
19 | |
20 +---------------------------------------------------------------------------*/
21 .file "mul_Xsig.S"
22
23
24#include "fpu_emu.h"
25
26.text
27ENTRY(mul32_Xsig)
28 pushl %ebp
29 movl %esp,%ebp
30 subl $16,%esp
31 pushl %esi
32
33 movl PARAM1,%esi
34 movl PARAM2,%ecx
35
36 xor %eax,%eax
37 movl %eax,-4(%ebp)
38 movl %eax,-8(%ebp)
39
40 movl (%esi),%eax /* lsl of Xsig */
41 mull %ecx /* msl of b */
42 movl %edx,-12(%ebp)
43
44 movl 4(%esi),%eax /* midl of Xsig */
45 mull %ecx /* msl of b */
46 addl %eax,-12(%ebp)
47 adcl %edx,-8(%ebp)
48 adcl $0,-4(%ebp)
49
50 movl 8(%esi),%eax /* msl of Xsig */
51 mull %ecx /* msl of b */
52 addl %eax,-8(%ebp)
53 adcl %edx,-4(%ebp)
54
55 movl -12(%ebp),%eax
56 movl %eax,(%esi)
57 movl -8(%ebp),%eax
58 movl %eax,4(%esi)
59 movl -4(%ebp),%eax
60 movl %eax,8(%esi)
61
62 popl %esi
63 leave
64 ret
65
66
67ENTRY(mul64_Xsig)
68 pushl %ebp
69 movl %esp,%ebp
70 subl $16,%esp
71 pushl %esi
72
73 movl PARAM1,%esi
74 movl PARAM2,%ecx
75
76 xor %eax,%eax
77 movl %eax,-4(%ebp)
78 movl %eax,-8(%ebp)
79
80 movl (%esi),%eax /* lsl of Xsig */
81 mull 4(%ecx) /* msl of b */
82 movl %edx,-12(%ebp)
83
84 movl 4(%esi),%eax /* midl of Xsig */
85 mull (%ecx) /* lsl of b */
86 addl %edx,-12(%ebp)
87 adcl $0,-8(%ebp)
88 adcl $0,-4(%ebp)
89
90 movl 4(%esi),%eax /* midl of Xsig */
91 mull 4(%ecx) /* msl of b */
92 addl %eax,-12(%ebp)
93 adcl %edx,-8(%ebp)
94 adcl $0,-4(%ebp)
95
96 movl 8(%esi),%eax /* msl of Xsig */
97 mull (%ecx) /* lsl of b */
98 addl %eax,-12(%ebp)
99 adcl %edx,-8(%ebp)
100 adcl $0,-4(%ebp)
101
102 movl 8(%esi),%eax /* msl of Xsig */
103 mull 4(%ecx) /* msl of b */
104 addl %eax,-8(%ebp)
105 adcl %edx,-4(%ebp)
106
107 movl -12(%ebp),%eax
108 movl %eax,(%esi)
109 movl -8(%ebp),%eax
110 movl %eax,4(%esi)
111 movl -4(%ebp),%eax
112 movl %eax,8(%esi)
113
114 popl %esi
115 leave
116 ret
117
118
119
120ENTRY(mul_Xsig_Xsig)
121 pushl %ebp
122 movl %esp,%ebp
123 subl $16,%esp
124 pushl %esi
125
126 movl PARAM1,%esi
127 movl PARAM2,%ecx
128
129 xor %eax,%eax
130 movl %eax,-4(%ebp)
131 movl %eax,-8(%ebp)
132
133 movl (%esi),%eax /* lsl of Xsig */
134 mull 8(%ecx) /* msl of b */
135 movl %edx,-12(%ebp)
136
137 movl 4(%esi),%eax /* midl of Xsig */
138 mull 4(%ecx) /* midl of b */
139 addl %edx,-12(%ebp)
140 adcl $0,-8(%ebp)
141 adcl $0,-4(%ebp)
142
143 movl 8(%esi),%eax /* msl of Xsig */
144 mull (%ecx) /* lsl of b */
145 addl %edx,-12(%ebp)
146 adcl $0,-8(%ebp)
147 adcl $0,-4(%ebp)
148
149 movl 4(%esi),%eax /* midl of Xsig */
150 mull 8(%ecx) /* msl of b */
151 addl %eax,-12(%ebp)
152 adcl %edx,-8(%ebp)
153 adcl $0,-4(%ebp)
154
155 movl 8(%esi),%eax /* msl of Xsig */
156 mull 4(%ecx) /* midl of b */
157 addl %eax,-12(%ebp)
158 adcl %edx,-8(%ebp)
159 adcl $0,-4(%ebp)
160
161 movl 8(%esi),%eax /* msl of Xsig */
162 mull 8(%ecx) /* msl of b */
163 addl %eax,-8(%ebp)
164 adcl %edx,-4(%ebp)
165
166 movl -12(%ebp),%edx
167 movl %edx,(%esi)
168 movl -8(%ebp),%edx
169 movl %edx,4(%esi)
170 movl -4(%ebp),%edx
171 movl %edx,8(%esi)
172
173 popl %esi
174 leave
175 ret
176
diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h
new file mode 100644
index 000000000000..4db798114923
--- /dev/null
+++ b/arch/i386/math-emu/poly.h
@@ -0,0 +1,121 @@
1/*---------------------------------------------------------------------------+
2 | poly.h |
3 | |
4 | Header file for the FPU-emu poly*.c source files. |
5 | |
6 | Copyright (C) 1994,1999 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@melbpc.org.au |
9 | |
10 | Declarations and definitions for functions operating on Xsig (12-byte |
11 | extended-significand) quantities. |
12 | |
13 +---------------------------------------------------------------------------*/
14
15#ifndef _POLY_H
16#define _POLY_H
17
18/* This 12-byte structure is used to improve the accuracy of computation
19 of transcendental functions.
20 Intended to be used to get results better than 8-byte computation
21 allows. 9-byte would probably be sufficient.
22 */
23typedef struct {
24 unsigned long lsw;
25 unsigned long midw;
26 unsigned long msw;
27} Xsig;
28
29asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
30 unsigned long long *result);
31asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
32 const unsigned long long terms[], const int n);
33
34asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
35asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
36asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
37
38asmlinkage void shr_Xsig(Xsig *, const int n);
39asmlinkage int round_Xsig(Xsig *);
40asmlinkage int norm_Xsig(Xsig *);
41asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
42
43/* Macro to extract the most significant 32 bits from a long long */
44#define LL_MSW(x) (((unsigned long *)&x)[1])
45
46/* Macro to initialize an Xsig struct */
47#define MK_XSIG(a,b,c) { c, b, a }
48
49/* Macro to access the 8 ms bytes of an Xsig as a long long */
50#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
51
52
53/*
54 Need to run gcc with optimizations on to get these to
55 actually be in-line.
56 */
57
58/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result.
59 The answer is the ms word of the product. */
60/* Some versions of gcc make it difficult to stop eax from being clobbered.
61 Merely specifying that it is used doesn't work...
62 */
63static inline unsigned long mul_32_32(const unsigned long arg1,
64 const unsigned long arg2)
65{
66 int retval;
67 asm volatile ("mull %2; movl %%edx,%%eax" \
68 :"=a" (retval) \
69 :"0" (arg1), "g" (arg2) \
70 :"dx");
71 return retval;
72}
73
74
75/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
76static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
77{
78 asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
79 "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
80 "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
81 "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
82 :"=g" (*dest):"g" (dest), "g" (x2)
83 :"ax","si","di");
84}
85
86
87/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
88/* Note: the constraints in the asm statement didn't always work properly
89 with gcc 2.5.8. Changing from using edi to using ecx got around the
90 problem, but keep fingers crossed! */
91static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
92{
93 asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
94 "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
95 "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
96 "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
97 "jnc 0f;\n"
98 "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
99 "movl %4,%%ecx; incl (%%ecx)\n"
100 "movl $1,%%eax; jmp 1f;\n"
101 "0: xorl %%eax,%%eax;\n"
102 "1:\n"
103 :"=g" (*exp), "=g" (*dest)
104 :"g" (dest), "g" (x2), "g" (exp)
105 :"cx","si","ax");
106}
107
108
109/* Negate (subtract from 1.0) the 12 byte Xsig */
110/* This is faster in a loop on my 386 than using the "neg" instruction. */
111static inline void negate_Xsig(Xsig *x)
112{
113 asm volatile("movl %1,%%esi;\n"
114 "xorl %%ecx,%%ecx;\n"
115 "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
116 "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
117 "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
118 :"=g" (*x):"g" (x):"si","ax","cx");
119}
120
121#endif /* _POLY_H */
diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c
new file mode 100644
index 000000000000..9766ad5e9743
--- /dev/null
+++ b/arch/i386/math-emu/poly_2xm1.c
@@ -0,0 +1,156 @@
1/*---------------------------------------------------------------------------+
2 | poly_2xm1.c |
3 | |
4 | Function to compute 2^x-1 by a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "reg_constant.h"
15#include "fpu_emu.h"
16#include "fpu_system.h"
17#include "control_w.h"
18#include "poly.h"
19
20
21#define HIPOWER 11
22static const unsigned long long lterms[HIPOWER] =
23{
24 0x0000000000000000LL, /* This term done separately as 12 bytes */
25 0xf5fdeffc162c7543LL,
26 0x1c6b08d704a0bfa6LL,
27 0x0276556df749cc21LL,
28 0x002bb0ffcf14f6b8LL,
29 0x0002861225ef751cLL,
30 0x00001ffcbfcd5422LL,
31 0x00000162c005d5f1LL,
32 0x0000000da96ccb1bLL,
33 0x0000000078d1b897LL,
34 0x000000000422b029LL
35};
36
37static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
38
39/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
40 These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
41 */
42static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
43static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
44static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
45static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
46
47static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
48 &shiftterm2, &shiftterm3 };
49
50
51/*--- poly_2xm1() -----------------------------------------------------------+
52 | Requires st(0) which is TAG_Valid and < 1. |
53 +---------------------------------------------------------------------------*/
54int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
55{
56 long int exponent, shift;
57 unsigned long long Xll;
58 Xsig accumulator, Denom, argSignif;
59 u_char tag;
60
61 exponent = exponent16(arg);
62
63#ifdef PARANOID
64 if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */
65 {
66 /* Number negative, too large, or not Valid. */
67 EXCEPTION(EX_INTERNAL|0x127);
68 return 1;
69 }
70#endif /* PARANOID */
71
72 argSignif.lsw = 0;
73 XSIG_LL(argSignif) = Xll = significand(arg);
74
75 if ( exponent == -1 )
76 {
77 shift = (argSignif.msw & 0x40000000) ? 3 : 2;
78 /* subtract 0.5 or 0.75 */
79 exponent -= 2;
80 XSIG_LL(argSignif) <<= 2;
81 Xll <<= 2;
82 }
83 else if ( exponent == -2 )
84 {
85 shift = 1;
86 /* subtract 0.25 */
87 exponent--;
88 XSIG_LL(argSignif) <<= 1;
89 Xll <<= 1;
90 }
91 else
92 shift = 0;
93
94 if ( exponent < -2 )
95 {
96 /* Shift the argument right by the required places. */
97 if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
98 Xll++; /* round up */
99 }
100
101 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
102 polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
103 mul_Xsig_Xsig(&accumulator, &argSignif);
104 shr_Xsig(&accumulator, 3);
105
106 mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */
107 add_two_Xsig(&accumulator, &argSignif, &exponent);
108
109 if ( shift )
110 {
111 /* The argument is large, use the identity:
112 f(x+a) = f(a) * (f(x) + 1) - 1;
113 */
114 shr_Xsig(&accumulator, - exponent);
115 accumulator.msw |= 0x80000000; /* add 1.0 */
116 mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
117 accumulator.msw &= 0x3fffffff; /* subtract 1.0 */
118 exponent = 1;
119 }
120
121 if ( sign != SIGN_POS )
122 {
123 /* The argument is negative, use the identity:
124 f(-x) = -f(x) / (1 + f(x))
125 */
126 Denom.lsw = accumulator.lsw;
127 XSIG_LL(Denom) = XSIG_LL(accumulator);
128 if ( exponent < 0 )
129 shr_Xsig(&Denom, - exponent);
130 else if ( exponent > 0 )
131 {
132 /* exponent must be 1 here */
133 XSIG_LL(Denom) <<= 1;
134 if ( Denom.lsw & 0x80000000 )
135 XSIG_LL(Denom) |= 1;
136 (Denom.lsw) <<= 1;
137 }
138 Denom.msw |= 0x80000000; /* add 1.0 */
139 div_Xsig(&accumulator, &Denom, &accumulator);
140 }
141
142 /* Convert to 64 bit signed-compatible */
143 exponent += round_Xsig(&accumulator);
144
145 result = &st(0);
146 significand(result) = XSIG_LL(accumulator);
147 setexponent16(result, exponent);
148
149 tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
150
151 setsign(result, sign);
152 FPU_settag0(tag);
153
154 return 0;
155
156}
diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c
new file mode 100644
index 000000000000..82f702952f69
--- /dev/null
+++ b/arch/i386/math-emu/poly_atan.c
@@ -0,0 +1,229 @@
1/*---------------------------------------------------------------------------+
2 | poly_atan.c |
3 | |
4 | Compute the arctan of a FPU_REG, using a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "reg_constant.h"
15#include "fpu_emu.h"
16#include "fpu_system.h"
17#include "status_w.h"
18#include "control_w.h"
19#include "poly.h"
20
21
22#define HIPOWERon 6 /* odd poly, negative terms */
23static const unsigned long long oddnegterms[HIPOWERon] =
24{
25 0x0000000000000000LL, /* Dummy (not for - 1.0) */
26 0x015328437f756467LL,
27 0x0005dda27b73dec6LL,
28 0x0000226bf2bfb91aLL,
29 0x000000ccc439c5f7LL,
30 0x0000000355438407LL
31} ;
32
33#define HIPOWERop 6 /* odd poly, positive terms */
34static const unsigned long long oddplterms[HIPOWERop] =
35{
36/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */
37 0x0db55a71875c9ac2LL,
38 0x0029fce2d67880b0LL,
39 0x0000dfd3908b4596LL,
40 0x00000550fd61dab4LL,
41 0x0000001c9422b3f9LL,
42 0x000000003e3301e1LL
43};
44
45static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
46
47static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
48
49static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
50
51
52/*--- poly_atan() -----------------------------------------------------------+
53 | |
54 +---------------------------------------------------------------------------*/
55void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
56 FPU_REG *st1_ptr, u_char st1_tag)
57{
58 u_char transformed, inverted,
59 sign1, sign2;
60 int exponent;
61 long int dummy_exp;
62 Xsig accumulator, Numer, Denom, accumulatore, argSignif,
63 argSq, argSqSq;
64 u_char tag;
65
66 sign1 = getsign(st0_ptr);
67 sign2 = getsign(st1_ptr);
68 if ( st0_tag == TAG_Valid )
69 {
70 exponent = exponent(st0_ptr);
71 }
72 else
73 {
74 /* This gives non-compatible stack contents... */
75 FPU_to_exp16(st0_ptr, st0_ptr);
76 exponent = exponent16(st0_ptr);
77 }
78 if ( st1_tag == TAG_Valid )
79 {
80 exponent -= exponent(st1_ptr);
81 }
82 else
83 {
84 /* This gives non-compatible stack contents... */
85 FPU_to_exp16(st1_ptr, st1_ptr);
86 exponent -= exponent16(st1_ptr);
87 }
88
89 if ( (exponent < 0) || ((exponent == 0) &&
90 ((st0_ptr->sigh < st1_ptr->sigh) ||
91 ((st0_ptr->sigh == st1_ptr->sigh) &&
92 (st0_ptr->sigl < st1_ptr->sigl))) ) )
93 {
94 inverted = 1;
95 Numer.lsw = Denom.lsw = 0;
96 XSIG_LL(Numer) = significand(st0_ptr);
97 XSIG_LL(Denom) = significand(st1_ptr);
98 }
99 else
100 {
101 inverted = 0;
102 exponent = -exponent;
103 Numer.lsw = Denom.lsw = 0;
104 XSIG_LL(Numer) = significand(st1_ptr);
105 XSIG_LL(Denom) = significand(st0_ptr);
106 }
107 div_Xsig(&Numer, &Denom, &argSignif);
108 exponent += norm_Xsig(&argSignif);
109
110 if ( (exponent >= -1)
111 || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
112 {
113 /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
114 /* Convert the argument by an identity for atan */
115 transformed = 1;
116
117 if ( exponent >= 0 )
118 {
119#ifdef PARANOID
120 if ( !( (exponent == 0) &&
121 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
122 (argSignif.msw == 0x80000000) ) )
123 {
124 EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */
125 return;
126 }
127#endif /* PARANOID */
128 argSignif.msw = 0; /* Make the transformed arg -> 0.0 */
129 }
130 else
131 {
132 Numer.lsw = Denom.lsw = argSignif.lsw;
133 XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
134
135 if ( exponent < -1 )
136 shr_Xsig(&Numer, -1-exponent);
137 negate_Xsig(&Numer);
138
139 shr_Xsig(&Denom, -exponent);
140 Denom.msw |= 0x80000000;
141
142 div_Xsig(&Numer, &Denom, &argSignif);
143
144 exponent = -1 + norm_Xsig(&argSignif);
145 }
146 }
147 else
148 {
149 transformed = 0;
150 }
151
152 argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
153 argSq.msw = argSignif.msw;
154 mul_Xsig_Xsig(&argSq, &argSq);
155
156 argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
157 mul_Xsig_Xsig(&argSqSq, &argSqSq);
158
159 accumulatore.lsw = argSq.lsw;
160 XSIG_LL(accumulatore) = XSIG_LL(argSq);
161
162 shr_Xsig(&argSq, 2*(-1-exponent-1));
163 shr_Xsig(&argSqSq, 4*(-1-exponent-1));
164
165 /* Now have argSq etc with binary point at the left
166 .1xxxxxxxx */
167
168 /* Do the basic fixed point polynomial evaluation */
169 accumulator.msw = accumulator.midw = accumulator.lsw = 0;
170 polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
171 oddplterms, HIPOWERop-1);
172 mul64_Xsig(&accumulator, &XSIG_LL(argSq));
173 negate_Xsig(&accumulator);
174 polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
175 negate_Xsig(&accumulator);
176 add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
177
178 mul64_Xsig(&accumulatore, &denomterm);
179 shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
180 accumulatore.msw |= 0x80000000;
181
182 div_Xsig(&accumulator, &accumulatore, &accumulator);
183
184 mul_Xsig_Xsig(&accumulator, &argSignif);
185 mul_Xsig_Xsig(&accumulator, &argSq);
186
187 shr_Xsig(&accumulator, 3);
188 negate_Xsig(&accumulator);
189 add_Xsig_Xsig(&accumulator, &argSignif);
190
191 if ( transformed )
192 {
193 /* compute pi/4 - accumulator */
194 shr_Xsig(&accumulator, -1-exponent);
195 negate_Xsig(&accumulator);
196 add_Xsig_Xsig(&accumulator, &pi_signif);
197 exponent = -1;
198 }
199
200 if ( inverted )
201 {
202 /* compute pi/2 - accumulator */
203 shr_Xsig(&accumulator, -exponent);
204 negate_Xsig(&accumulator);
205 add_Xsig_Xsig(&accumulator, &pi_signif);
206 exponent = 0;
207 }
208
209 if ( sign1 )
210 {
211 /* compute pi - accumulator */
212 shr_Xsig(&accumulator, 1 - exponent);
213 negate_Xsig(&accumulator);
214 add_Xsig_Xsig(&accumulator, &pi_signif);
215 exponent = 1;
216 }
217
218 exponent += round_Xsig(&accumulator);
219
220 significand(st1_ptr) = XSIG_LL(accumulator);
221 setexponent16(st1_ptr, exponent);
222
223 tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
224 FPU_settagi(1, tag);
225
226 set_precision_flag_up(); /* We do not really know if up or down,
227 use this as the default. */
228
229}
diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c
new file mode 100644
index 000000000000..dd00e1d5b074
--- /dev/null
+++ b/arch/i386/math-emu/poly_l2.c
@@ -0,0 +1,272 @@
1/*---------------------------------------------------------------------------+
2 | poly_l2.c |
3 | |
4 | Compute the base 2 log of a FPU_REG, using a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13
14#include "exception.h"
15#include "reg_constant.h"
16#include "fpu_emu.h"
17#include "fpu_system.h"
18#include "control_w.h"
19#include "poly.h"
20
21
22static void log2_kernel(FPU_REG const *arg, u_char argsign,
23 Xsig *accum_result, long int *expon);
24
25
26/*--- poly_l2() -------------------------------------------------------------+
27 | Base 2 logarithm by a polynomial approximation. |
28 +---------------------------------------------------------------------------*/
29void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
30{
31 long int exponent, expon, expon_expon;
32 Xsig accumulator, expon_accum, yaccum;
33 u_char sign, argsign;
34 FPU_REG x;
35 int tag;
36
37 exponent = exponent16(st0_ptr);
38
39 /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
40 if ( st0_ptr->sigh > (unsigned)0xb504f334 )
41 {
42 /* Treat as sqrt(2)/2 < st0_ptr < 1 */
43 significand(&x) = - significand(st0_ptr);
44 setexponent16(&x, -1);
45 exponent++;
46 argsign = SIGN_NEG;
47 }
48 else
49 {
50 /* Treat as 1 <= st0_ptr < sqrt(2) */
51 x.sigh = st0_ptr->sigh - 0x80000000;
52 x.sigl = st0_ptr->sigl;
53 setexponent16(&x, 0);
54 argsign = SIGN_POS;
55 }
56 tag = FPU_normalize_nuo(&x);
57
58 if ( tag == TAG_Zero )
59 {
60 expon = 0;
61 accumulator.msw = accumulator.midw = accumulator.lsw = 0;
62 }
63 else
64 {
65 log2_kernel(&x, argsign, &accumulator, &expon);
66 }
67
68 if ( exponent < 0 )
69 {
70 sign = SIGN_NEG;
71 exponent = -exponent;
72 }
73 else
74 sign = SIGN_POS;
75 expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
76 if ( exponent )
77 {
78 expon_expon = 31 + norm_Xsig(&expon_accum);
79 shr_Xsig(&accumulator, expon_expon - expon);
80
81 if ( sign ^ argsign )
82 negate_Xsig(&accumulator);
83 add_Xsig_Xsig(&accumulator, &expon_accum);
84 }
85 else
86 {
87 expon_expon = expon;
88 sign = argsign;
89 }
90
91 yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
92 mul_Xsig_Xsig(&accumulator, &yaccum);
93
94 expon_expon += round_Xsig(&accumulator);
95
96 if ( accumulator.msw == 0 )
97 {
98 FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
99 return;
100 }
101
102 significand(st1_ptr) = XSIG_LL(accumulator);
103 setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
104
105 tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
106 FPU_settagi(1, tag);
107
108 set_precision_flag_up(); /* 80486 appears to always do this */
109
110 return;
111
112}
113
114
115/*--- poly_l2p1() -----------------------------------------------------------+
116 | Base 2 logarithm by a polynomial approximation. |
117 | log2(x+1) |
118 +---------------------------------------------------------------------------*/
119int poly_l2p1(u_char sign0, u_char sign1,
120 FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
121{
122 u_char tag;
123 long int exponent;
124 Xsig accumulator, yaccum;
125
126 if ( exponent16(st0_ptr) < 0 )
127 {
128 log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
129
130 yaccum.lsw = 0;
131 XSIG_LL(yaccum) = significand(st1_ptr);
132 mul_Xsig_Xsig(&accumulator, &yaccum);
133
134 exponent += round_Xsig(&accumulator);
135
136 exponent += exponent16(st1_ptr) + 1;
137 if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
138
139 significand(dest) = XSIG_LL(accumulator);
140 setexponent16(dest, exponent);
141
142 tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
143 FPU_settagi(1, tag);
144
145 if ( tag == TAG_Valid )
146 set_precision_flag_up(); /* 80486 appears to always do this */
147 }
148 else
149 {
150 /* The magnitude of st0_ptr is far too large. */
151
152 if ( sign0 != SIGN_POS )
153 {
154 /* Trying to get the log of a negative number. */
155#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
156 changesign(st1_ptr);
157#else
158 if ( arith_invalid(1) < 0 )
159 return 1;
160#endif /* PECULIAR_486 */
161 }
162
163 /* 80486 appears to do this */
164 if ( sign0 == SIGN_NEG )
165 set_precision_flag_down();
166 else
167 set_precision_flag_up();
168 }
169
170 if ( exponent(dest) <= EXP_UNDER )
171 EXCEPTION(EX_Underflow);
172
173 return 0;
174
175}
176
177
178
179
180#undef HIPOWER
181#define HIPOWER 10
182static const unsigned long long logterms[HIPOWER] =
183{
184 0x2a8eca5705fc2ef0LL,
185 0xf6384ee1d01febceLL,
186 0x093bb62877cdf642LL,
187 0x006985d8a9ec439bLL,
188 0x0005212c4f55a9c8LL,
189 0x00004326a16927f0LL,
190 0x0000038d1d80a0e7LL,
191 0x0000003141cc80c6LL,
192 0x00000002b1668c9fLL,
193 0x000000002c7a46aaLL
194};
195
196static const unsigned long leadterm = 0xb8000000;
197
198
199/*--- log2_kernel() ---------------------------------------------------------+
200 | Base 2 logarithm by a polynomial approximation. |
201 | log2(x+1) |
202 +---------------------------------------------------------------------------*/
203static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
204 long int *expon)
205{
206 long int exponent, adj;
207 unsigned long long Xsq;
208 Xsig accumulator, Numer, Denom, argSignif, arg_signif;
209
210 exponent = exponent16(arg);
211 Numer.lsw = Denom.lsw = 0;
212 XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
213 if ( argsign == SIGN_POS )
214 {
215 shr_Xsig(&Denom, 2 - (1 + exponent));
216 Denom.msw |= 0x80000000;
217 div_Xsig(&Numer, &Denom, &argSignif);
218 }
219 else
220 {
221 shr_Xsig(&Denom, 1 - (1 + exponent));
222 negate_Xsig(&Denom);
223 if ( Denom.msw & 0x80000000 )
224 {
225 div_Xsig(&Numer, &Denom, &argSignif);
226 exponent ++;
227 }
228 else
229 {
230 /* Denom must be 1.0 */
231 argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
232 argSignif.msw = Numer.msw;
233 }
234 }
235
236#ifndef PECULIAR_486
237 /* Should check here that |local_arg| is within the valid range */
238 if ( exponent >= -2 )
239 {
240 if ( (exponent > -2) ||
241 (argSignif.msw > (unsigned)0xafb0ccc0) )
242 {
243 /* The argument is too large */
244 }
245 }
246#endif /* PECULIAR_486 */
247
248 arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
249 adj = norm_Xsig(&argSignif);
250 accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
251 mul_Xsig_Xsig(&accumulator, &accumulator);
252 shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
253 Xsq = XSIG_LL(accumulator);
254 if ( accumulator.lsw & 0x80000000 )
255 Xsq++;
256
257 accumulator.msw = accumulator.midw = accumulator.lsw = 0;
258 /* Do the basic fixed point polynomial evaluation */
259 polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
260
261 mul_Xsig_Xsig(&accumulator, &argSignif);
262 shr_Xsig(&accumulator, 6 - adj);
263
264 mul32_Xsig(&arg_signif, leadterm);
265 add_two_Xsig(&accumulator, &arg_signif, &exponent);
266
267 *expon = exponent + 1;
268 accum_result->lsw = accumulator.lsw;
269 accum_result->midw = accumulator.midw;
270 accum_result->msw = accumulator.msw;
271
272}
diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c
new file mode 100644
index 000000000000..a36313fb06f1
--- /dev/null
+++ b/arch/i386/math-emu/poly_sin.c
@@ -0,0 +1,397 @@
1/*---------------------------------------------------------------------------+
2 | poly_sin.c |
3 | |
4 | Computation of an approximation of the sin function and the cosine |
5 | function by a polynomial. |
6 | |
7 | Copyright (C) 1992,1993,1994,1997,1999 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@melbpc.org.au |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14
15#include "exception.h"
16#include "reg_constant.h"
17#include "fpu_emu.h"
18#include "fpu_system.h"
19#include "control_w.h"
20#include "poly.h"
21
22
23#define N_COEFF_P 4
24#define N_COEFF_N 4
25
26static const unsigned long long pos_terms_l[N_COEFF_P] =
27{
28 0xaaaaaaaaaaaaaaabLL,
29 0x00d00d00d00cf906LL,
30 0x000006b99159a8bbLL,
31 0x000000000d7392e6LL
32};
33
34static const unsigned long long neg_terms_l[N_COEFF_N] =
35{
36 0x2222222222222167LL,
37 0x0002e3bc74aab624LL,
38 0x0000000b09229062LL,
39 0x00000000000c7973LL
40};
41
42
43
44#define N_COEFF_PH 4
45#define N_COEFF_NH 4
46static const unsigned long long pos_terms_h[N_COEFF_PH] =
47{
48 0x0000000000000000LL,
49 0x05b05b05b05b0406LL,
50 0x000049f93edd91a9LL,
51 0x00000000c9c9ed62LL
52};
53
54static const unsigned long long neg_terms_h[N_COEFF_NH] =
55{
56 0xaaaaaaaaaaaaaa98LL,
57 0x001a01a01a019064LL,
58 0x0000008f76c68a77LL,
59 0x0000000000d58f5eLL
60};
61
62
63/*--- poly_sine() -----------------------------------------------------------+
64 | |
65 +---------------------------------------------------------------------------*/
66void poly_sine(FPU_REG *st0_ptr)
67{
68 int exponent, echange;
69 Xsig accumulator, argSqrd, argTo4;
70 unsigned long fix_up, adj;
71 unsigned long long fixed_arg;
72 FPU_REG result;
73
74 exponent = exponent(st0_ptr);
75
76 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
77
78 /* Split into two ranges, for arguments below and above 1.0 */
79 /* The boundary between upper and lower is approx 0.88309101259 */
80 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
81 {
82 /* The argument is <= 0.88309101259 */
83
84 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
85 mul64_Xsig(&argSqrd, &significand(st0_ptr));
86 shr_Xsig(&argSqrd, 2*(-1-exponent));
87 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
88 argTo4.lsw = argSqrd.lsw;
89 mul_Xsig_Xsig(&argTo4, &argTo4);
90
91 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
92 N_COEFF_N-1);
93 mul_Xsig_Xsig(&accumulator, &argSqrd);
94 negate_Xsig(&accumulator);
95
96 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
97 N_COEFF_P-1);
98
99 shr_Xsig(&accumulator, 2); /* Divide by four */
100 accumulator.msw |= 0x80000000; /* Add 1.0 */
101
102 mul64_Xsig(&accumulator, &significand(st0_ptr));
103 mul64_Xsig(&accumulator, &significand(st0_ptr));
104 mul64_Xsig(&accumulator, &significand(st0_ptr));
105
106 /* Divide by four, FPU_REG compatible, etc */
107 exponent = 3*exponent;
108
109 /* The minimum exponent difference is 3 */
110 shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
111
112 negate_Xsig(&accumulator);
113 XSIG_LL(accumulator) += significand(st0_ptr);
114
115 echange = round_Xsig(&accumulator);
116
117 setexponentpos(&result, exponent(st0_ptr) + echange);
118 }
119 else
120 {
121 /* The argument is > 0.88309101259 */
122 /* We use sin(st(0)) = cos(pi/2-st(0)) */
123
124 fixed_arg = significand(st0_ptr);
125
126 if ( exponent == 0 )
127 {
128 /* The argument is >= 1.0 */
129
130 /* Put the binary point at the left. */
131 fixed_arg <<= 1;
132 }
133 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
134 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
135 /* There is a special case which arises due to rounding, to fix here. */
136 if ( fixed_arg == 0xffffffffffffffffLL )
137 fixed_arg = 0;
138
139 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
140 mul64_Xsig(&argSqrd, &fixed_arg);
141
142 XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
143 mul_Xsig_Xsig(&argTo4, &argTo4);
144
145 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
146 N_COEFF_NH-1);
147 mul_Xsig_Xsig(&accumulator, &argSqrd);
148 negate_Xsig(&accumulator);
149
150 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
151 N_COEFF_PH-1);
152 negate_Xsig(&accumulator);
153
154 mul64_Xsig(&accumulator, &fixed_arg);
155 mul64_Xsig(&accumulator, &fixed_arg);
156
157 shr_Xsig(&accumulator, 3);
158 negate_Xsig(&accumulator);
159
160 add_Xsig_Xsig(&accumulator, &argSqrd);
161
162 shr_Xsig(&accumulator, 1);
163
164 accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
165 negate_Xsig(&accumulator);
166
167 /* The basic computation is complete. Now fix the answer to
168 compensate for the error due to the approximation used for
169 pi/2
170 */
171
172 /* This has an exponent of -65 */
173 fix_up = 0x898cc517;
174 /* The fix-up needs to be improved for larger args */
175 if ( argSqrd.msw & 0xffc00000 )
176 {
177 /* Get about 32 bit precision in these: */
178 fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
179 }
180 fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
181
182 adj = accumulator.lsw; /* temp save */
183 accumulator.lsw -= fix_up;
184 if ( accumulator.lsw > adj )
185 XSIG_LL(accumulator) --;
186
187 echange = round_Xsig(&accumulator);
188
189 setexponentpos(&result, echange - 1);
190 }
191
192 significand(&result) = XSIG_LL(accumulator);
193 setsign(&result, getsign(st0_ptr));
194 FPU_copy_to_reg0(&result, TAG_Valid);
195
196#ifdef PARANOID
197 if ( (exponent(&result) >= 0)
198 && (significand(&result) > 0x8000000000000000LL) )
199 {
200 EXCEPTION(EX_INTERNAL|0x150);
201 }
202#endif /* PARANOID */
203
204}
205
206
207
208/*--- poly_cos() ------------------------------------------------------------+
209 | |
210 +---------------------------------------------------------------------------*/
211void poly_cos(FPU_REG *st0_ptr)
212{
213 FPU_REG result;
214 long int exponent, exp2, echange;
215 Xsig accumulator, argSqrd, fix_up, argTo4;
216 unsigned long long fixed_arg;
217
218#ifdef PARANOID
219 if ( (exponent(st0_ptr) > 0)
220 || ((exponent(st0_ptr) == 0)
221 && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
222 {
223 EXCEPTION(EX_Invalid);
224 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
225 return;
226 }
227#endif /* PARANOID */
228
229 exponent = exponent(st0_ptr);
230
231 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
232
233 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
234 {
235 /* arg is < 0.687705 */
236
237 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
238 argSqrd.lsw = 0;
239 mul64_Xsig(&argSqrd, &significand(st0_ptr));
240
241 if ( exponent < -1 )
242 {
243 /* shift the argument right by the required places */
244 shr_Xsig(&argSqrd, 2*(-1-exponent));
245 }
246
247 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
248 argTo4.lsw = argSqrd.lsw;
249 mul_Xsig_Xsig(&argTo4, &argTo4);
250
251 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
252 N_COEFF_NH-1);
253 mul_Xsig_Xsig(&accumulator, &argSqrd);
254 negate_Xsig(&accumulator);
255
256 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
257 N_COEFF_PH-1);
258 negate_Xsig(&accumulator);
259
260 mul64_Xsig(&accumulator, &significand(st0_ptr));
261 mul64_Xsig(&accumulator, &significand(st0_ptr));
262 shr_Xsig(&accumulator, -2*(1+exponent));
263
264 shr_Xsig(&accumulator, 3);
265 negate_Xsig(&accumulator);
266
267 add_Xsig_Xsig(&accumulator, &argSqrd);
268
269 shr_Xsig(&accumulator, 1);
270
271 /* It doesn't matter if accumulator is all zero here, the
272 following code will work ok */
273 negate_Xsig(&accumulator);
274
275 if ( accumulator.lsw & 0x80000000 )
276 XSIG_LL(accumulator) ++;
277 if ( accumulator.msw == 0 )
278 {
279 /* The result is 1.0 */
280 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
281 return;
282 }
283 else
284 {
285 significand(&result) = XSIG_LL(accumulator);
286
287 /* will be a valid positive nr with expon = -1 */
288 setexponentpos(&result, -1);
289 }
290 }
291 else
292 {
293 fixed_arg = significand(st0_ptr);
294
295 if ( exponent == 0 )
296 {
297 /* The argument is >= 1.0 */
298
299 /* Put the binary point at the left. */
300 fixed_arg <<= 1;
301 }
302 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
303 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
304 /* There is a special case which arises due to rounding, to fix here. */
305 if ( fixed_arg == 0xffffffffffffffffLL )
306 fixed_arg = 0;
307
308 exponent = -1;
309 exp2 = -1;
310
311 /* A shift is needed here only for a narrow range of arguments,
312 i.e. for fixed_arg approx 2^-32, but we pick up more... */
313 if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
314 {
315 fixed_arg <<= 16;
316 exponent -= 16;
317 exp2 -= 16;
318 }
319
320 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
321 mul64_Xsig(&argSqrd, &fixed_arg);
322
323 if ( exponent < -1 )
324 {
325 /* shift the argument right by the required places */
326 shr_Xsig(&argSqrd, 2*(-1-exponent));
327 }
328
329 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
330 argTo4.lsw = argSqrd.lsw;
331 mul_Xsig_Xsig(&argTo4, &argTo4);
332
333 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
334 N_COEFF_N-1);
335 mul_Xsig_Xsig(&accumulator, &argSqrd);
336 negate_Xsig(&accumulator);
337
338 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
339 N_COEFF_P-1);
340
341 shr_Xsig(&accumulator, 2); /* Divide by four */
342 accumulator.msw |= 0x80000000; /* Add 1.0 */
343
344 mul64_Xsig(&accumulator, &fixed_arg);
345 mul64_Xsig(&accumulator, &fixed_arg);
346 mul64_Xsig(&accumulator, &fixed_arg);
347
348 /* Divide by four, FPU_REG compatible, etc */
349 exponent = 3*exponent;
350
351 /* The minimum exponent difference is 3 */
352 shr_Xsig(&accumulator, exp2 - exponent);
353
354 negate_Xsig(&accumulator);
355 XSIG_LL(accumulator) += fixed_arg;
356
357 /* The basic computation is complete. Now fix the answer to
358 compensate for the error due to the approximation used for
359 pi/2
360 */
361
362 /* This has an exponent of -65 */
363 XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
364 fix_up.lsw = 0;
365
366 /* The fix-up needs to be improved for larger args */
367 if ( argSqrd.msw & 0xffc00000 )
368 {
369 /* Get about 32 bit precision in these: */
370 fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
371 fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
372 }
373
374 exp2 += norm_Xsig(&accumulator);
375 shr_Xsig(&accumulator, 1); /* Prevent overflow */
376 exp2++;
377 shr_Xsig(&fix_up, 65 + exp2);
378
379 add_Xsig_Xsig(&accumulator, &fix_up);
380
381 echange = round_Xsig(&accumulator);
382
383 setexponentpos(&result, exp2 + echange);
384 significand(&result) = XSIG_LL(accumulator);
385 }
386
387 FPU_copy_to_reg0(&result, TAG_Valid);
388
389#ifdef PARANOID
390 if ( (exponent(&result) >= 0)
391 && (significand(&result) > 0x8000000000000000LL) )
392 {
393 EXCEPTION(EX_INTERNAL|0x151);
394 }
395#endif /* PARANOID */
396
397}
diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c
new file mode 100644
index 000000000000..8df3e03b6e6f
--- /dev/null
+++ b/arch/i386/math-emu/poly_tan.c
@@ -0,0 +1,222 @@
1/*---------------------------------------------------------------------------+
2 | poly_tan.c |
3 | |
4 | Compute the tan of a FPU_REG, using a polynomial approximation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1997,1999 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@melbpc.org.au |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "reg_constant.h"
15#include "fpu_emu.h"
16#include "fpu_system.h"
17#include "control_w.h"
18#include "poly.h"
19
20
21#define HiPOWERop 3 /* odd poly, positive terms */
22static const unsigned long long oddplterm[HiPOWERop] =
23{
24 0x0000000000000000LL,
25 0x0051a1cf08fca228LL,
26 0x0000000071284ff7LL
27};
28
29#define HiPOWERon 2 /* odd poly, negative terms */
30static const unsigned long long oddnegterm[HiPOWERon] =
31{
32 0x1291a9a184244e80LL,
33 0x0000583245819c21LL
34};
35
36#define HiPOWERep 2 /* even poly, positive terms */
37static const unsigned long long evenplterm[HiPOWERep] =
38{
39 0x0e848884b539e888LL,
40 0x00003c7f18b887daLL
41};
42
43#define HiPOWERen 2 /* even poly, negative terms */
44static const unsigned long long evennegterm[HiPOWERen] =
45{
46 0xf1f0200fd51569ccLL,
47 0x003afb46105c4432LL
48};
49
50static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
51
52
53/*--- poly_tan() ------------------------------------------------------------+
54 | |
55 +---------------------------------------------------------------------------*/
56void poly_tan(FPU_REG *st0_ptr)
57{
58 long int exponent;
59 int invert;
60 Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
61 argSignif, fix_up;
62 unsigned long adj;
63
64 exponent = exponent(st0_ptr);
65
66#ifdef PARANOID
67 if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */
68 { arith_invalid(0); return; } /* Need a positive number */
69#endif /* PARANOID */
70
71 /* Split the problem into two domains, smaller and larger than pi/4 */
72 if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
73 {
74 /* The argument is greater than (approx) pi/4 */
75 invert = 1;
76 accum.lsw = 0;
77 XSIG_LL(accum) = significand(st0_ptr);
78
79 if ( exponent == 0 )
80 {
81 /* The argument is >= 1.0 */
82 /* Put the binary point at the left. */
83 XSIG_LL(accum) <<= 1;
84 }
85 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
86 XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
87 /* This is a special case which arises due to rounding. */
88 if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
89 {
90 FPU_settag0(TAG_Valid);
91 significand(st0_ptr) = 0x8a51e04daabda360LL;
92 setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
93 return;
94 }
95
96 argSignif.lsw = accum.lsw;
97 XSIG_LL(argSignif) = XSIG_LL(accum);
98 exponent = -1 + norm_Xsig(&argSignif);
99 }
100 else
101 {
102 invert = 0;
103 argSignif.lsw = 0;
104 XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
105
106 if ( exponent < -1 )
107 {
108 /* shift the argument right by the required places */
109 if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
110 XSIG_LL(accum) ++; /* round up */
111 }
112 }
113
114 XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
115 mul_Xsig_Xsig(&argSq, &argSq);
116 XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
117 mul_Xsig_Xsig(&argSqSq, &argSqSq);
118
119 /* Compute the negative terms for the numerator polynomial */
120 accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
121 polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
122 mul_Xsig_Xsig(&accumulatoro, &argSq);
123 negate_Xsig(&accumulatoro);
124 /* Add the positive terms */
125 polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
126
127
128 /* Compute the positive terms for the denominator polynomial */
129 accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
130 polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
131 mul_Xsig_Xsig(&accumulatore, &argSq);
132 negate_Xsig(&accumulatore);
133 /* Add the negative terms */
134 polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
135 /* Multiply by arg^2 */
136 mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
137 mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
138 /* de-normalize and divide by 2 */
139 shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
140 negate_Xsig(&accumulatore); /* This does 1 - accumulator */
141
142 /* Now find the ratio. */
143 if ( accumulatore.msw == 0 )
144 {
145 /* accumulatoro must contain 1.0 here, (actually, 0) but it
146 really doesn't matter what value we use because it will
147 have negligible effect in later calculations
148 */
149 XSIG_LL(accum) = 0x8000000000000000LL;
150 accum.lsw = 0;
151 }
152 else
153 {
154 div_Xsig(&accumulatoro, &accumulatore, &accum);
155 }
156
157 /* Multiply by 1/3 * arg^3 */
158 mul64_Xsig(&accum, &XSIG_LL(argSignif));
159 mul64_Xsig(&accum, &XSIG_LL(argSignif));
160 mul64_Xsig(&accum, &XSIG_LL(argSignif));
161 mul64_Xsig(&accum, &twothirds);
162 shr_Xsig(&accum, -2*(exponent+1));
163
164 /* tan(arg) = arg + accum */
165 add_two_Xsig(&accum, &argSignif, &exponent);
166
167 if ( invert )
168 {
169 /* We now have the value of tan(pi_2 - arg) where pi_2 is an
170 approximation for pi/2
171 */
172 /* The next step is to fix the answer to compensate for the
173 error due to the approximation used for pi/2
174 */
175
176 /* This is (approx) delta, the error in our approx for pi/2
177 (see above). It has an exponent of -65
178 */
179 XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
180 fix_up.lsw = 0;
181
182 if ( exponent == 0 )
183 adj = 0xffffffff; /* We want approx 1.0 here, but
184 this is close enough. */
185 else if ( exponent > -30 )
186 {
187 adj = accum.msw >> -(exponent+1); /* tan */
188 adj = mul_32_32(adj, adj); /* tan^2 */
189 }
190 else
191 adj = 0;
192 adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */
193
194 fix_up.msw += adj;
195 if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */
196 {
197 /* Yes, we need to add an msb */
198 shr_Xsig(&fix_up, 1);
199 fix_up.msw |= 0x80000000;
200 shr_Xsig(&fix_up, 64 + exponent);
201 }
202 else
203 shr_Xsig(&fix_up, 65 + exponent);
204
205 add_two_Xsig(&accum, &fix_up, &exponent);
206
207 /* accum now contains tan(pi/2 - arg).
208 Use tan(arg) = 1.0 / tan(pi/2 - arg)
209 */
210 accumulatoro.lsw = accumulatoro.midw = 0;
211 accumulatoro.msw = 0x80000000;
212 div_Xsig(&accumulatoro, &accum, &accum);
213 exponent = - exponent - 1;
214 }
215
216 /* Transfer the result */
217 round_Xsig(&accum);
218 FPU_settag0(TAG_Valid);
219 significand(st0_ptr) = XSIG_LL(accum);
220 setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */
221
222}
diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S
new file mode 100644
index 000000000000..17315c89ff3d
--- /dev/null
+++ b/arch/i386/math-emu/polynom_Xsig.S
@@ -0,0 +1,135 @@
1/*---------------------------------------------------------------------------+
2 | polynomial_Xsig.S |
3 | |
4 | Fixed point arithmetic polynomial evaluation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1995 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Call from C as: |
11 | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
12 | unsigned long long terms[], int n) |
13 | |
14 | Computes: |
15 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
16 | and adds the result to the 12 byte Xsig. |
17 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
18 | precision. |
19 | |
20 | This function must be used carefully: most overflow of intermediate |
21 | results is controlled, but overflow of the result is not. |
22 | |
23 +---------------------------------------------------------------------------*/
24 .file "polynomial_Xsig.S"
25
26#include "fpu_emu.h"
27
28
29#define TERM_SIZE $8
30#define SUM_MS -20(%ebp) /* sum ms long */
31#define SUM_MIDDLE -24(%ebp) /* sum middle long */
32#define SUM_LS -28(%ebp) /* sum ls long */
33#define ACCUM_MS -4(%ebp) /* accum ms long */
34#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
35#define ACCUM_LS -12(%ebp) /* accum ls long */
36#define OVERFLOWED -16(%ebp) /* addition overflow flag */
37
38.text
39ENTRY(polynomial_Xsig)
40 pushl %ebp
41 movl %esp,%ebp
42 subl $32,%esp
43 pushl %esi
44 pushl %edi
45 pushl %ebx
46
47 movl PARAM2,%esi /* x */
48 movl PARAM3,%edi /* terms */
49
50 movl TERM_SIZE,%eax
51 mull PARAM4 /* n */
52 addl %eax,%edi
53
54 movl 4(%edi),%edx /* terms[n] */
55 movl %edx,SUM_MS
56 movl (%edi),%edx /* terms[n] */
57 movl %edx,SUM_MIDDLE
58 xor %eax,%eax
59 movl %eax,SUM_LS
60 movb %al,OVERFLOWED
61
62 subl TERM_SIZE,%edi
63 decl PARAM4
64 js L_accum_done
65
66L_accum_loop:
67 xor %eax,%eax
68 movl %eax,ACCUM_MS
69 movl %eax,ACCUM_MIDDLE
70
71 movl SUM_MIDDLE,%eax
72 mull (%esi) /* x ls long */
73 movl %edx,ACCUM_LS
74
75 movl SUM_MIDDLE,%eax
76 mull 4(%esi) /* x ms long */
77 addl %eax,ACCUM_LS
78 adcl %edx,ACCUM_MIDDLE
79 adcl $0,ACCUM_MS
80
81 movl SUM_MS,%eax
82 mull (%esi) /* x ls long */
83 addl %eax,ACCUM_LS
84 adcl %edx,ACCUM_MIDDLE
85 adcl $0,ACCUM_MS
86
87 movl SUM_MS,%eax
88 mull 4(%esi) /* x ms long */
89 addl %eax,ACCUM_MIDDLE
90 adcl %edx,ACCUM_MS
91
92 testb $0xff,OVERFLOWED
93 jz L_no_overflow
94
95 movl (%esi),%eax
96 addl %eax,ACCUM_MIDDLE
97 movl 4(%esi),%eax
98 adcl %eax,ACCUM_MS /* This could overflow too */
99
100L_no_overflow:
101
102/*
103 * Now put the sum of next term and the accumulator
104 * into the sum register
105 */
106 movl ACCUM_LS,%eax
107 addl (%edi),%eax /* term ls long */
108 movl %eax,SUM_LS
109 movl ACCUM_MIDDLE,%eax
110 adcl (%edi),%eax /* term ls long */
111 movl %eax,SUM_MIDDLE
112 movl ACCUM_MS,%eax
113 adcl 4(%edi),%eax /* term ms long */
114 movl %eax,SUM_MS
115 sbbb %al,%al
116 movb %al,OVERFLOWED /* Used in the next iteration */
117
118 subl TERM_SIZE,%edi
119 decl PARAM4
120 jns L_accum_loop
121
122L_accum_done:
123 movl PARAM1,%edi /* accum */
124 movl SUM_LS,%eax
125 addl %eax,(%edi)
126 movl SUM_MIDDLE,%eax
127 adcl %eax,4(%edi)
128 movl SUM_MS,%eax
129 adcl %eax,8(%edi)
130
131 popl %ebx
132 popl %edi
133 popl %esi
134 leave
135 ret
diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c
new file mode 100644
index 000000000000..7cd3b37ac084
--- /dev/null
+++ b/arch/i386/math-emu/reg_add_sub.c
@@ -0,0 +1,374 @@
1/*---------------------------------------------------------------------------+
2 | reg_add_sub.c |
3 | |
4 | Functions to add or subtract two registers and put the result in a third. |
5 | |
6 | Copyright (C) 1992,1993,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | For each function, the destination may be any FPU_REG, including one of |
15 | the source FPU_REGs. |
16 | Each function returns 0 if the answer is o.k., otherwise a non-zero |
17 | value is returned, indicating either an exception condition or an |
18 | internal error. |
19 +---------------------------------------------------------------------------*/
20
21#include "exception.h"
22#include "reg_constant.h"
23#include "fpu_emu.h"
24#include "control_w.h"
25#include "fpu_system.h"
26
27static
28int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
29 FPU_REG const *b, u_char tagb, u_char signb,
30 FPU_REG *dest, int deststnr, int control_w);
31
32/*
33 Operates on st(0) and st(n), or on st(0) and temporary data.
34 The destination must be one of the source st(x).
35 */
36int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
37{
38 FPU_REG *a = &st(0);
39 FPU_REG *dest = &st(deststnr);
40 u_char signb = getsign(b);
41 u_char taga = FPU_gettag0();
42 u_char signa = getsign(a);
43 u_char saved_sign = getsign(dest);
44 int diff, tag, expa, expb;
45
46 if ( !(taga | tagb) )
47 {
48 expa = exponent(a);
49 expb = exponent(b);
50
51 valid_add:
52 /* Both registers are valid */
53 if (!(signa ^ signb))
54 {
55 /* signs are the same */
56 tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
57 }
58 else
59 {
60 /* The signs are different, so do a subtraction */
61 diff = expa - expb;
62 if (!diff)
63 {
64 diff = a->sigh - b->sigh; /* This works only if the ms bits
65 are identical. */
66 if (!diff)
67 {
68 diff = a->sigl > b->sigl;
69 if (!diff)
70 diff = -(a->sigl < b->sigl);
71 }
72 }
73
74 if (diff > 0)
75 {
76 tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
77 }
78 else if ( diff < 0 )
79 {
80 tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
81 }
82 else
83 {
84 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
85 /* sign depends upon rounding mode */
86 setsign(dest, ((control_w & CW_RC) != RC_DOWN)
87 ? SIGN_POS : SIGN_NEG);
88 return TAG_Zero;
89 }
90 }
91
92 if ( tag < 0 )
93 {
94 setsign(dest, saved_sign);
95 return tag;
96 }
97 FPU_settagi(deststnr, tag);
98 return tag;
99 }
100
101 if ( taga == TAG_Special )
102 taga = FPU_Special(a);
103 if ( tagb == TAG_Special )
104 tagb = FPU_Special(b);
105
106 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
107 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
108 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
109 {
110 FPU_REG x, y;
111
112 if ( denormal_operand() < 0 )
113 return FPU_Exception;
114
115 FPU_to_exp16(a, &x);
116 FPU_to_exp16(b, &y);
117 a = &x;
118 b = &y;
119 expa = exponent16(a);
120 expb = exponent16(b);
121 goto valid_add;
122 }
123
124 if ( (taga == TW_NaN) || (tagb == TW_NaN) )
125 {
126 if ( deststnr == 0 )
127 return real_2op_NaN(b, tagb, deststnr, a);
128 else
129 return real_2op_NaN(a, taga, deststnr, a);
130 }
131
132 return add_sub_specials(a, taga, signa, b, tagb, signb,
133 dest, deststnr, control_w);
134}
135
136
137/* Subtract b from a. (a-b) -> dest */
138int FPU_sub(int flags, int rm, int control_w)
139{
140 FPU_REG const *a, *b;
141 FPU_REG *dest;
142 u_char taga, tagb, signa, signb, saved_sign, sign;
143 int diff, tag = 0, expa, expb, deststnr;
144
145 a = &st(0);
146 taga = FPU_gettag0();
147
148 deststnr = 0;
149 if ( flags & LOADED )
150 {
151 b = (FPU_REG *)rm;
152 tagb = flags & 0x0f;
153 }
154 else
155 {
156 b = &st(rm);
157 tagb = FPU_gettagi(rm);
158
159 if ( flags & DEST_RM )
160 deststnr = rm;
161 }
162
163 signa = getsign(a);
164 signb = getsign(b);
165
166 if ( flags & REV )
167 {
168 signa ^= SIGN_NEG;
169 signb ^= SIGN_NEG;
170 }
171
172 dest = &st(deststnr);
173 saved_sign = getsign(dest);
174
175 if ( !(taga | tagb) )
176 {
177 expa = exponent(a);
178 expb = exponent(b);
179
180 valid_subtract:
181 /* Both registers are valid */
182
183 diff = expa - expb;
184
185 if (!diff)
186 {
187 diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
188 if (!diff)
189 {
190 diff = a->sigl > b->sigl;
191 if (!diff)
192 diff = -(a->sigl < b->sigl);
193 }
194 }
195
196 switch ( (((int)signa)*2 + signb) / SIGN_NEG )
197 {
198 case 0: /* P - P */
199 case 3: /* N - N */
200 if (diff > 0)
201 {
202 /* |a| > |b| */
203 tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
204 }
205 else if ( diff == 0 )
206 {
207 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
208
209 /* sign depends upon rounding mode */
210 setsign(dest, ((control_w & CW_RC) != RC_DOWN)
211 ? SIGN_POS : SIGN_NEG);
212 return TAG_Zero;
213 }
214 else
215 {
216 sign = signa ^ SIGN_NEG;
217 tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
218 }
219 break;
220 case 1: /* P - N */
221 tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
222 break;
223 case 2: /* N - P */
224 tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
225 break;
226#ifdef PARANOID
227 default:
228 EXCEPTION(EX_INTERNAL|0x111);
229 return -1;
230#endif
231 }
232 if ( tag < 0 )
233 {
234 setsign(dest, saved_sign);
235 return tag;
236 }
237 FPU_settagi(deststnr, tag);
238 return tag;
239 }
240
241 if ( taga == TAG_Special )
242 taga = FPU_Special(a);
243 if ( tagb == TAG_Special )
244 tagb = FPU_Special(b);
245
246 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
247 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
248 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
249 {
250 FPU_REG x, y;
251
252 if ( denormal_operand() < 0 )
253 return FPU_Exception;
254
255 FPU_to_exp16(a, &x);
256 FPU_to_exp16(b, &y);
257 a = &x;
258 b = &y;
259 expa = exponent16(a);
260 expb = exponent16(b);
261
262 goto valid_subtract;
263 }
264
265 if ( (taga == TW_NaN) || (tagb == TW_NaN) )
266 {
267 FPU_REG const *d1, *d2;
268 if ( flags & REV )
269 {
270 d1 = b;
271 d2 = a;
272 }
273 else
274 {
275 d1 = a;
276 d2 = b;
277 }
278 if ( flags & LOADED )
279 return real_2op_NaN(b, tagb, deststnr, d1);
280 if ( flags & DEST_RM )
281 return real_2op_NaN(a, taga, deststnr, d2);
282 else
283 return real_2op_NaN(b, tagb, deststnr, d2);
284 }
285
286 return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
287 dest, deststnr, control_w);
288}
289
290
291static
292int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
293 FPU_REG const *b, u_char tagb, u_char signb,
294 FPU_REG *dest, int deststnr, int control_w)
295{
296 if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
297 && (denormal_operand() < 0) )
298 return FPU_Exception;
299
300 if (taga == TAG_Zero)
301 {
302 if (tagb == TAG_Zero)
303 {
304 /* Both are zero, result will be zero. */
305 u_char different_signs = signa ^ signb;
306
307 FPU_copy_to_regi(a, TAG_Zero, deststnr);
308 if ( different_signs )
309 {
310 /* Signs are different. */
311 /* Sign of answer depends upon rounding mode. */
312 setsign(dest, ((control_w & CW_RC) != RC_DOWN)
313 ? SIGN_POS : SIGN_NEG);
314 }
315 else
316 setsign(dest, signa); /* signa may differ from the sign of a. */
317 return TAG_Zero;
318 }
319 else
320 {
321 reg_copy(b, dest);
322 if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
323 {
324 /* A pseudoDenormal, convert it. */
325 addexponent(dest, 1);
326 tagb = TAG_Valid;
327 }
328 else if ( tagb > TAG_Empty )
329 tagb = TAG_Special;
330 setsign(dest, signb); /* signb may differ from the sign of b. */
331 FPU_settagi(deststnr, tagb);
332 return tagb;
333 }
334 }
335 else if (tagb == TAG_Zero)
336 {
337 reg_copy(a, dest);
338 if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
339 {
340 /* A pseudoDenormal */
341 addexponent(dest, 1);
342 taga = TAG_Valid;
343 }
344 else if ( taga > TAG_Empty )
345 taga = TAG_Special;
346 setsign(dest, signa); /* signa may differ from the sign of a. */
347 FPU_settagi(deststnr, taga);
348 return taga;
349 }
350 else if (taga == TW_Infinity)
351 {
352 if ( (tagb != TW_Infinity) || (signa == signb) )
353 {
354 FPU_copy_to_regi(a, TAG_Special, deststnr);
355 setsign(dest, signa); /* signa may differ from the sign of a. */
356 return taga;
357 }
358 /* Infinity-Infinity is undefined. */
359 return arith_invalid(deststnr);
360 }
361 else if (tagb == TW_Infinity)
362 {
363 FPU_copy_to_regi(b, TAG_Special, deststnr);
364 setsign(dest, signb); /* signb may differ from the sign of b. */
365 return tagb;
366 }
367
368#ifdef PARANOID
369 EXCEPTION(EX_INTERNAL|0x101);
370#endif
371
372 return FPU_Exception;
373}
374
diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c
new file mode 100644
index 000000000000..f37c5b5a35ad
--- /dev/null
+++ b/arch/i386/math-emu/reg_compare.c
@@ -0,0 +1,381 @@
1/*---------------------------------------------------------------------------+
2 | reg_compare.c |
3 | |
4 | Compare two floating point registers |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | compare() is the core FPU_REG comparison function |
15 +---------------------------------------------------------------------------*/
16
17#include "fpu_system.h"
18#include "exception.h"
19#include "fpu_emu.h"
20#include "control_w.h"
21#include "status_w.h"
22
23
24static int compare(FPU_REG const *b, int tagb)
25{
26 int diff, exp0, expb;
27 u_char st0_tag;
28 FPU_REG *st0_ptr;
29 FPU_REG x, y;
30 u_char st0_sign, signb = getsign(b);
31
32 st0_ptr = &st(0);
33 st0_tag = FPU_gettag0();
34 st0_sign = getsign(st0_ptr);
35
36 if ( tagb == TAG_Special )
37 tagb = FPU_Special(b);
38 if ( st0_tag == TAG_Special )
39 st0_tag = FPU_Special(st0_ptr);
40
41 if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
42 || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
43 {
44 if ( st0_tag == TAG_Zero )
45 {
46 if ( tagb == TAG_Zero ) return COMP_A_eq_B;
47 if ( tagb == TAG_Valid )
48 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
49 if ( tagb == TW_Denormal )
50 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
51 | COMP_Denormal;
52 }
53 else if ( tagb == TAG_Zero )
54 {
55 if ( st0_tag == TAG_Valid )
56 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
57 if ( st0_tag == TW_Denormal )
58 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
59 | COMP_Denormal;
60 }
61
62 if ( st0_tag == TW_Infinity )
63 {
64 if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
65 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
66 else if ( tagb == TW_Denormal )
67 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
68 | COMP_Denormal;
69 else if ( tagb == TW_Infinity )
70 {
71 /* The 80486 book says that infinities can be equal! */
72 return (st0_sign == signb) ? COMP_A_eq_B :
73 ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
74 }
75 /* Fall through to the NaN code */
76 }
77 else if ( tagb == TW_Infinity )
78 {
79 if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
80 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
81 if ( st0_tag == TW_Denormal )
82 return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
83 | COMP_Denormal;
84 /* Fall through to the NaN code */
85 }
86
87 /* The only possibility now should be that one of the arguments
88 is a NaN */
89 if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
90 {
91 int signalling = 0, unsupported = 0;
92 if ( st0_tag == TW_NaN )
93 {
94 signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
95 unsupported = !((exponent(st0_ptr) == EXP_OVER)
96 && (st0_ptr->sigh & 0x80000000));
97 }
98 if ( tagb == TW_NaN )
99 {
100 signalling |= (b->sigh & 0xc0000000) == 0x80000000;
101 unsupported |= !((exponent(b) == EXP_OVER)
102 && (b->sigh & 0x80000000));
103 }
104 if ( signalling || unsupported )
105 return COMP_No_Comp | COMP_SNaN | COMP_NaN;
106 else
107 /* Neither is a signaling NaN */
108 return COMP_No_Comp | COMP_NaN;
109 }
110
111 EXCEPTION(EX_Invalid);
112 }
113
114 if (st0_sign != signb)
115 {
116 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
117 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
118 COMP_Denormal : 0);
119 }
120
121 if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
122 {
123 FPU_to_exp16(st0_ptr, &x);
124 FPU_to_exp16(b, &y);
125 st0_ptr = &x;
126 b = &y;
127 exp0 = exponent16(st0_ptr);
128 expb = exponent16(b);
129 }
130 else
131 {
132 exp0 = exponent(st0_ptr);
133 expb = exponent(b);
134 }
135
136#ifdef PARANOID
137 if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
138 if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
139#endif /* PARANOID */
140
141 diff = exp0 - expb;
142 if ( diff == 0 )
143 {
144 diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are
145 identical */
146 if ( diff == 0 )
147 {
148 diff = st0_ptr->sigl > b->sigl;
149 if ( diff == 0 )
150 diff = -(st0_ptr->sigl < b->sigl);
151 }
152 }
153
154 if ( diff > 0 )
155 {
156 return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
157 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
158 COMP_Denormal : 0);
159 }
160 if ( diff < 0 )
161 {
162 return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
163 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
164 COMP_Denormal : 0);
165 }
166
167 return COMP_A_eq_B
168 | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
169 COMP_Denormal : 0);
170
171}
172
173
174/* This function requires that st(0) is not empty */
175int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
176{
177 int f = 0, c;
178
179 c = compare(loaded_data, loaded_tag);
180
181 if (c & COMP_NaN)
182 {
183 EXCEPTION(EX_Invalid);
184 f = SW_C3 | SW_C2 | SW_C0;
185 }
186 else
187 switch (c & 7)
188 {
189 case COMP_A_lt_B:
190 f = SW_C0;
191 break;
192 case COMP_A_eq_B:
193 f = SW_C3;
194 break;
195 case COMP_A_gt_B:
196 f = 0;
197 break;
198 case COMP_No_Comp:
199 f = SW_C3 | SW_C2 | SW_C0;
200 break;
201#ifdef PARANOID
202 default:
203 EXCEPTION(EX_INTERNAL|0x121);
204 f = SW_C3 | SW_C2 | SW_C0;
205 break;
206#endif /* PARANOID */
207 }
208 setcc(f);
209 if (c & COMP_Denormal)
210 {
211 return denormal_operand() < 0;
212 }
213 return 0;
214}
215
216
217static int compare_st_st(int nr)
218{
219 int f = 0, c;
220 FPU_REG *st_ptr;
221
222 if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
223 {
224 setcc(SW_C3 | SW_C2 | SW_C0);
225 /* Stack fault */
226 EXCEPTION(EX_StackUnder);
227 return !(control_word & CW_Invalid);
228 }
229
230 st_ptr = &st(nr);
231 c = compare(st_ptr, FPU_gettagi(nr));
232 if (c & COMP_NaN)
233 {
234 setcc(SW_C3 | SW_C2 | SW_C0);
235 EXCEPTION(EX_Invalid);
236 return !(control_word & CW_Invalid);
237 }
238 else
239 switch (c & 7)
240 {
241 case COMP_A_lt_B:
242 f = SW_C0;
243 break;
244 case COMP_A_eq_B:
245 f = SW_C3;
246 break;
247 case COMP_A_gt_B:
248 f = 0;
249 break;
250 case COMP_No_Comp:
251 f = SW_C3 | SW_C2 | SW_C0;
252 break;
253#ifdef PARANOID
254 default:
255 EXCEPTION(EX_INTERNAL|0x122);
256 f = SW_C3 | SW_C2 | SW_C0;
257 break;
258#endif /* PARANOID */
259 }
260 setcc(f);
261 if (c & COMP_Denormal)
262 {
263 return denormal_operand() < 0;
264 }
265 return 0;
266}
267
268
269static int compare_u_st_st(int nr)
270{
271 int f = 0, c;
272 FPU_REG *st_ptr;
273
274 if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
275 {
276 setcc(SW_C3 | SW_C2 | SW_C0);
277 /* Stack fault */
278 EXCEPTION(EX_StackUnder);
279 return !(control_word & CW_Invalid);
280 }
281
282 st_ptr = &st(nr);
283 c = compare(st_ptr, FPU_gettagi(nr));
284 if (c & COMP_NaN)
285 {
286 setcc(SW_C3 | SW_C2 | SW_C0);
287 if (c & COMP_SNaN) /* This is the only difference between
288 un-ordered and ordinary comparisons */
289 {
290 EXCEPTION(EX_Invalid);
291 return !(control_word & CW_Invalid);
292 }
293 return 0;
294 }
295 else
296 switch (c & 7)
297 {
298 case COMP_A_lt_B:
299 f = SW_C0;
300 break;
301 case COMP_A_eq_B:
302 f = SW_C3;
303 break;
304 case COMP_A_gt_B:
305 f = 0;
306 break;
307 case COMP_No_Comp:
308 f = SW_C3 | SW_C2 | SW_C0;
309 break;
310#ifdef PARANOID
311 default:
312 EXCEPTION(EX_INTERNAL|0x123);
313 f = SW_C3 | SW_C2 | SW_C0;
314 break;
315#endif /* PARANOID */
316 }
317 setcc(f);
318 if (c & COMP_Denormal)
319 {
320 return denormal_operand() < 0;
321 }
322 return 0;
323}
324
325/*---------------------------------------------------------------------------*/
326
327void fcom_st(void)
328{
329 /* fcom st(i) */
330 compare_st_st(FPU_rm);
331}
332
333
334void fcompst(void)
335{
336 /* fcomp st(i) */
337 if ( !compare_st_st(FPU_rm) )
338 FPU_pop();
339}
340
341
342void fcompp(void)
343{
344 /* fcompp */
345 if (FPU_rm != 1)
346 {
347 FPU_illegal();
348 return;
349 }
350 if ( !compare_st_st(1) )
351 poppop();
352}
353
354
355void fucom_(void)
356{
357 /* fucom st(i) */
358 compare_u_st_st(FPU_rm);
359
360}
361
362
363void fucomp(void)
364{
365 /* fucomp st(i) */
366 if ( !compare_u_st_st(FPU_rm) )
367 FPU_pop();
368}
369
370
371void fucompp(void)
372{
373 /* fucompp */
374 if (FPU_rm == 1)
375 {
376 if ( !compare_u_st_st(1) )
377 poppop();
378 }
379 else
380 FPU_illegal();
381}
diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c
new file mode 100644
index 000000000000..a85015801969
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.c
@@ -0,0 +1,120 @@
1/*---------------------------------------------------------------------------+
2 | reg_constant.c |
3 | |
4 | All of the constant FPU_REGs |
5 | |
6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "fpu_system.h"
14#include "fpu_emu.h"
15#include "status_w.h"
16#include "reg_constant.h"
17#include "control_w.h"
18
19
20#define MAKE_REG(s,e,l,h) { l, h, \
21 ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
22
23FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
24#if 0
25FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
26FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
27#endif /* 0 */
28static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
29static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
30FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
31FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
32FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
33static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
34static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
35
36/* Extra bits to take pi/2 to more than 128 bits precision. */
37FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
38 0xfc8f8cbb, 0xece675d1);
39
40/* Only the sign (and tag) is used in internal zeroes */
41FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
42
43/* Only the sign and significand (and tag) are used in internal NaNs */
44/* The 80486 never generates one of these
45FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
46 */
47/* This is the real indefinite QNaN */
48FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
49
50/* Only the sign (and tag) is used in internal infinities */
51FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
52
53
54static void fld_const(FPU_REG const *c, int adj, u_char tag)
55{
56 FPU_REG *st_new_ptr;
57
58 if ( STACK_OVERFLOW )
59 {
60 FPU_stack_overflow();
61 return;
62 }
63 push();
64 reg_copy(c, st_new_ptr);
65 st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to
66 borrow or carry. */
67 FPU_settag0(tag);
68 clear_C1();
69}
70
71/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
72 (and not one of RC_RND or RC_UP).
73 */
74#define DOWN_OR_CHOP(x) (x & RC_DOWN)
75
76static void fld1(int rc)
77{
78 fld_const(&CONST_1, 0, TAG_Valid);
79}
80
81static void fldl2t(int rc)
82{
83 fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
84}
85
86static void fldl2e(int rc)
87{
88 fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
89}
90
91static void fldpi(int rc)
92{
93 fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
94}
95
96static void fldlg2(int rc)
97{
98 fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
99}
100
101static void fldln2(int rc)
102{
103 fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
104}
105
106static void fldz(int rc)
107{
108 fld_const(&CONST_Z, 0, TAG_Zero);
109}
110
111typedef void (*FUNC_RC)(int);
112
113static FUNC_RC constants_table[] = {
114 fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
115};
116
117void fconst(void)
118{
119 (constants_table[FPU_rm])(control_word & CW_RC);
120}
diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h
new file mode 100644
index 000000000000..1bffaec3a134
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.h
@@ -0,0 +1,25 @@
1/*---------------------------------------------------------------------------+
2 | reg_constant.h |
3 | |
4 | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
5 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
6 | |
7 +---------------------------------------------------------------------------*/
8
9#ifndef _REG_CONSTANT_H_
10#define _REG_CONSTANT_H_
11
12#include "fpu_emu.h"
13
14extern FPU_REG const CONST_1;
15extern FPU_REG const CONST_PI;
16extern FPU_REG const CONST_PI2;
17extern FPU_REG const CONST_PI2extra;
18extern FPU_REG const CONST_PI4;
19extern FPU_REG const CONST_Z;
20extern FPU_REG const CONST_PINF;
21extern FPU_REG const CONST_INF;
22extern FPU_REG const CONST_MINF;
23extern FPU_REG const CONST_QNaN;
24
25#endif /* _REG_CONSTANT_H_ */
diff --git a/arch/i386/math-emu/reg_convert.c b/arch/i386/math-emu/reg_convert.c
new file mode 100644
index 000000000000..45a258752703
--- /dev/null
+++ b/arch/i386/math-emu/reg_convert.c
@@ -0,0 +1,53 @@
1/*---------------------------------------------------------------------------+
2 | reg_convert.c |
3 | |
4 | Convert register representation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1996,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13#include "exception.h"
14#include "fpu_emu.h"
15
16
17int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
18{
19 int sign = getsign(a);
20
21 *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
22
23 /* Set up the exponent as a 16 bit quantity. */
24 setexponent16(x, exponent(a));
25
26 if ( exponent16(x) == EXP_UNDER )
27 {
28 /* The number is a de-normal or pseudodenormal. */
29 /* We only deal with the significand and exponent. */
30
31 if (x->sigh & 0x80000000)
32 {
33 /* Is a pseudodenormal. */
34 /* This is non-80486 behaviour because the number
35 loses its 'denormal' identity. */
36 addexponent(x, 1);
37 }
38 else
39 {
40 /* Is a denormal. */
41 addexponent(x, 1);
42 FPU_normalize_nuo(x);
43 }
44 }
45
46 if ( !(x->sigh & 0x80000000) )
47 {
48 EXCEPTION(EX_INTERNAL | 0x180);
49 }
50
51 return sign;
52}
53
diff --git a/arch/i386/math-emu/reg_divide.c b/arch/i386/math-emu/reg_divide.c
new file mode 100644
index 000000000000..5cee7ff920d9
--- /dev/null
+++ b/arch/i386/math-emu/reg_divide.c
@@ -0,0 +1,207 @@
1/*---------------------------------------------------------------------------+
2 | reg_divide.c |
3 | |
4 | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
5 | |
6 | Copyright (C) 1996 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Return value is the tag of the answer, or-ed with FPU_Exception if |
11 | one was raised, or -1 on internal error. |
12 | |
13 +---------------------------------------------------------------------------*/
14
15/*---------------------------------------------------------------------------+
16 | The destination may be any FPU_REG, including one of the source FPU_REGs. |
17 +---------------------------------------------------------------------------*/
18
19#include "exception.h"
20#include "reg_constant.h"
21#include "fpu_emu.h"
22#include "fpu_system.h"
23
24/*
25 Divide one register by another and put the result into a third register.
26 */
27int FPU_div(int flags, int rm, int control_w)
28{
29 FPU_REG x, y;
30 FPU_REG const *a, *b, *st0_ptr, *st_ptr;
31 FPU_REG *dest;
32 u_char taga, tagb, signa, signb, sign, saved_sign;
33 int tag, deststnr;
34
35 if ( flags & DEST_RM )
36 deststnr = rm;
37 else
38 deststnr = 0;
39
40 if ( flags & REV )
41 {
42 b = &st(0);
43 st0_ptr = b;
44 tagb = FPU_gettag0();
45 if ( flags & LOADED )
46 {
47 a = (FPU_REG *)rm;
48 taga = flags & 0x0f;
49 }
50 else
51 {
52 a = &st(rm);
53 st_ptr = a;
54 taga = FPU_gettagi(rm);
55 }
56 }
57 else
58 {
59 a = &st(0);
60 st0_ptr = a;
61 taga = FPU_gettag0();
62 if ( flags & LOADED )
63 {
64 b = (FPU_REG *)rm;
65 tagb = flags & 0x0f;
66 }
67 else
68 {
69 b = &st(rm);
70 st_ptr = b;
71 tagb = FPU_gettagi(rm);
72 }
73 }
74
75 signa = getsign(a);
76 signb = getsign(b);
77
78 sign = signa ^ signb;
79
80 dest = &st(deststnr);
81 saved_sign = getsign(dest);
82
83 if ( !(taga | tagb) )
84 {
85 /* Both regs Valid, this should be the most common case. */
86 reg_copy(a, &x);
87 reg_copy(b, &y);
88 setpositive(&x);
89 setpositive(&y);
90 tag = FPU_u_div(&x, &y, dest, control_w, sign);
91
92 if ( tag < 0 )
93 return tag;
94
95 FPU_settagi(deststnr, tag);
96 return tag;
97 }
98
99 if ( taga == TAG_Special )
100 taga = FPU_Special(a);
101 if ( tagb == TAG_Special )
102 tagb = FPU_Special(b);
103
104 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
105 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
106 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
107 {
108 if ( denormal_operand() < 0 )
109 return FPU_Exception;
110
111 FPU_to_exp16(a, &x);
112 FPU_to_exp16(b, &y);
113 tag = FPU_u_div(&x, &y, dest, control_w, sign);
114 if ( tag < 0 )
115 return tag;
116
117 FPU_settagi(deststnr, tag);
118 return tag;
119 }
120 else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
121 {
122 if ( tagb != TAG_Zero )
123 {
124 /* Want to find Zero/Valid */
125 if ( tagb == TW_Denormal )
126 {
127 if ( denormal_operand() < 0 )
128 return FPU_Exception;
129 }
130
131 /* The result is zero. */
132 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
133 setsign(dest, sign);
134 return TAG_Zero;
135 }
136 /* We have an exception condition, either 0/0 or Valid/Zero. */
137 if ( taga == TAG_Zero )
138 {
139 /* 0/0 */
140 return arith_invalid(deststnr);
141 }
142 /* Valid/Zero */
143 return FPU_divide_by_zero(deststnr, sign);
144 }
145 /* Must have infinities, NaNs, etc */
146 else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
147 {
148 if ( flags & LOADED )
149 return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
150
151 if ( flags & DEST_RM )
152 {
153 int tag;
154 tag = FPU_gettag0();
155 if ( tag == TAG_Special )
156 tag = FPU_Special(st0_ptr);
157 return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
158 }
159 else
160 {
161 int tag;
162 tag = FPU_gettagi(rm);
163 if ( tag == TAG_Special )
164 tag = FPU_Special(&st(rm));
165 return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
166 }
167 }
168 else if (taga == TW_Infinity)
169 {
170 if (tagb == TW_Infinity)
171 {
172 /* infinity/infinity */
173 return arith_invalid(deststnr);
174 }
175 else
176 {
177 /* tagb must be Valid or Zero */
178 if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
179 return FPU_Exception;
180
181 /* Infinity divided by Zero or Valid does
182 not raise and exception, but returns Infinity */
183 FPU_copy_to_regi(a, TAG_Special, deststnr);
184 setsign(dest, sign);
185 return taga;
186 }
187 }
188 else if (tagb == TW_Infinity)
189 {
190 if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
191 return FPU_Exception;
192
193 /* The result is zero. */
194 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
195 setsign(dest, sign);
196 return TAG_Zero;
197 }
198#ifdef PARANOID
199 else
200 {
201 EXCEPTION(EX_INTERNAL|0x102);
202 return FPU_Exception;
203 }
204#endif /* PARANOID */
205
206 return 0;
207}
diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c
new file mode 100644
index 000000000000..f06ed41d191d
--- /dev/null
+++ b/arch/i386/math-emu/reg_ld_str.c
@@ -0,0 +1,1370 @@
1/*---------------------------------------------------------------------------+
2 | reg_ld_str.c |
3 | |
4 | All of the functions which transfer data between user memory and FPU_REGs.|
5 | |
6 | Copyright (C) 1992,1993,1994,1996,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | |
11 +---------------------------------------------------------------------------*/
12
13/*---------------------------------------------------------------------------+
14 | Note: |
15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/
19
20#include "fpu_emu.h"
21
22#include <asm/uaccess.h>
23
24#include "fpu_system.h"
25#include "exception.h"
26#include "reg_constant.h"
27#include "control_w.h"
28#include "status_w.h"
29
30
31#define DOUBLE_Emax 1023 /* largest valid exponent */
32#define DOUBLE_Ebias 1023
33#define DOUBLE_Emin (-1022) /* smallest valid exponent */
34
35#define SINGLE_Emax 127 /* largest valid exponent */
36#define SINGLE_Ebias 127
37#define SINGLE_Emin (-126) /* smallest valid exponent */
38
39
40static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
41{
42 u_char tag;
43
44 setexponent16(r, exp);
45
46 tag = FPU_normalize_nuo(r);
47 stdexp(r);
48 if ( sign )
49 setnegative(r);
50
51 return tag;
52}
53
54
55int FPU_tagof(FPU_REG *ptr)
56{
57 int exp;
58
59 exp = exponent16(ptr) & 0x7fff;
60 if ( exp == 0 )
61 {
62 if ( !(ptr->sigh | ptr->sigl) )
63 {
64 return TAG_Zero;
65 }
66 /* The number is a de-normal or pseudodenormal. */
67 return TAG_Special;
68 }
69
70 if ( exp == 0x7fff )
71 {
72 /* Is an Infinity, a NaN, or an unsupported data type. */
73 return TAG_Special;
74 }
75
76 if ( !(ptr->sigh & 0x80000000) )
77 {
78 /* Unsupported data type. */
79 /* Valid numbers have the ms bit set to 1. */
80 /* Unnormal. */
81 return TAG_Special;
82 }
83
84 return TAG_Valid;
85}
86
87
88/* Get a long double from user memory */
89int FPU_load_extended(long double __user *s, int stnr)
90{
91 FPU_REG *sti_ptr = &st(stnr);
92
93 RE_ENTRANT_CHECK_OFF;
94 FPU_access_ok(VERIFY_READ, s, 10);
95 __copy_from_user(sti_ptr, s, 10);
96 RE_ENTRANT_CHECK_ON;
97
98 return FPU_tagof(sti_ptr);
99}
100
101
102/* Get a double from user memory */
103int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
104{
105 int exp, tag, negative;
106 unsigned m64, l64;
107
108 RE_ENTRANT_CHECK_OFF;
109 FPU_access_ok(VERIFY_READ, dfloat, 8);
110 FPU_get_user(m64, 1 + (unsigned long __user *) dfloat);
111 FPU_get_user(l64, (unsigned long __user *) dfloat);
112 RE_ENTRANT_CHECK_ON;
113
114 negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
115 exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
116 m64 &= 0xfffff;
117 if ( exp > DOUBLE_Emax + EXTENDED_Ebias )
118 {
119 /* Infinity or NaN */
120 if ((m64 == 0) && (l64 == 0))
121 {
122 /* +- infinity */
123 loaded_data->sigh = 0x80000000;
124 loaded_data->sigl = 0x00000000;
125 exp = EXP_Infinity + EXTENDED_Ebias;
126 tag = TAG_Special;
127 }
128 else
129 {
130 /* Must be a signaling or quiet NaN */
131 exp = EXP_NaN + EXTENDED_Ebias;
132 loaded_data->sigh = (m64 << 11) | 0x80000000;
133 loaded_data->sigh |= l64 >> 21;
134 loaded_data->sigl = l64 << 11;
135 tag = TAG_Special; /* The calling function must look for NaNs */
136 }
137 }
138 else if ( exp < DOUBLE_Emin + EXTENDED_Ebias )
139 {
140 /* Zero or de-normal */
141 if ((m64 == 0) && (l64 == 0))
142 {
143 /* Zero */
144 reg_copy(&CONST_Z, loaded_data);
145 exp = 0;
146 tag = TAG_Zero;
147 }
148 else
149 {
150 /* De-normal */
151 loaded_data->sigh = m64 << 11;
152 loaded_data->sigh |= l64 >> 21;
153 loaded_data->sigl = l64 << 11;
154
155 return normalize_no_excep(loaded_data, DOUBLE_Emin, negative)
156 | (denormal_operand() < 0 ? FPU_Exception : 0);
157 }
158 }
159 else
160 {
161 loaded_data->sigh = (m64 << 11) | 0x80000000;
162 loaded_data->sigh |= l64 >> 21;
163 loaded_data->sigl = l64 << 11;
164
165 tag = TAG_Valid;
166 }
167
168 setexponent16(loaded_data, exp | negative);
169
170 return tag;
171}
172
173
174/* Get a float from user memory */
175int FPU_load_single(float __user *single, FPU_REG *loaded_data)
176{
177 unsigned m32;
178 int exp, tag, negative;
179
180 RE_ENTRANT_CHECK_OFF;
181 FPU_access_ok(VERIFY_READ, single, 4);
182 FPU_get_user(m32, (unsigned long __user *) single);
183 RE_ENTRANT_CHECK_ON;
184
185 negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
186
187 if (!(m32 & 0x7fffffff))
188 {
189 /* Zero */
190 reg_copy(&CONST_Z, loaded_data);
191 addexponent(loaded_data, negative);
192 return TAG_Zero;
193 }
194 exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
195 m32 = (m32 & 0x7fffff) << 8;
196 if ( exp < SINGLE_Emin + EXTENDED_Ebias )
197 {
198 /* De-normals */
199 loaded_data->sigh = m32;
200 loaded_data->sigl = 0;
201
202 return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
203 | (denormal_operand() < 0 ? FPU_Exception : 0);
204 }
205 else if ( exp > SINGLE_Emax + EXTENDED_Ebias )
206 {
207 /* Infinity or NaN */
208 if ( m32 == 0 )
209 {
210 /* +- infinity */
211 loaded_data->sigh = 0x80000000;
212 loaded_data->sigl = 0x00000000;
213 exp = EXP_Infinity + EXTENDED_Ebias;
214 tag = TAG_Special;
215 }
216 else
217 {
218 /* Must be a signaling or quiet NaN */
219 exp = EXP_NaN + EXTENDED_Ebias;
220 loaded_data->sigh = m32 | 0x80000000;
221 loaded_data->sigl = 0;
222 tag = TAG_Special; /* The calling function must look for NaNs */
223 }
224 }
225 else
226 {
227 loaded_data->sigh = m32 | 0x80000000;
228 loaded_data->sigl = 0;
229 tag = TAG_Valid;
230 }
231
232 setexponent16(loaded_data, exp | negative); /* Set the sign. */
233
234 return tag;
235}
236
237
238/* Get a long long from user memory */
239int FPU_load_int64(long long __user *_s)
240{
241 long long s;
242 int sign;
243 FPU_REG *st0_ptr = &st(0);
244
245 RE_ENTRANT_CHECK_OFF;
246 FPU_access_ok(VERIFY_READ, _s, 8);
247 copy_from_user(&s,_s,8);
248 RE_ENTRANT_CHECK_ON;
249
250 if (s == 0)
251 {
252 reg_copy(&CONST_Z, st0_ptr);
253 return TAG_Zero;
254 }
255
256 if (s > 0)
257 sign = SIGN_Positive;
258 else
259 {
260 s = -s;
261 sign = SIGN_Negative;
262 }
263
264 significand(st0_ptr) = s;
265
266 return normalize_no_excep(st0_ptr, 63, sign);
267}
268
269
270/* Get a long from user memory */
271int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
272{
273 long s;
274 int negative;
275
276 RE_ENTRANT_CHECK_OFF;
277 FPU_access_ok(VERIFY_READ, _s, 4);
278 FPU_get_user(s, _s);
279 RE_ENTRANT_CHECK_ON;
280
281 if (s == 0)
282 { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
283
284 if (s > 0)
285 negative = SIGN_Positive;
286 else
287 {
288 s = -s;
289 negative = SIGN_Negative;
290 }
291
292 loaded_data->sigh = s;
293 loaded_data->sigl = 0;
294
295 return normalize_no_excep(loaded_data, 31, negative);
296}
297
298
299/* Get a short from user memory */
300int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
301{
302 int s, negative;
303
304 RE_ENTRANT_CHECK_OFF;
305 FPU_access_ok(VERIFY_READ, _s, 2);
306 /* Cast as short to get the sign extended. */
307 FPU_get_user(s, _s);
308 RE_ENTRANT_CHECK_ON;
309
310 if (s == 0)
311 { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
312
313 if (s > 0)
314 negative = SIGN_Positive;
315 else
316 {
317 s = -s;
318 negative = SIGN_Negative;
319 }
320
321 loaded_data->sigh = s << 16;
322 loaded_data->sigl = 0;
323
324 return normalize_no_excep(loaded_data, 15, negative);
325}
326
327
328/* Get a packed bcd array from user memory */
329int FPU_load_bcd(u_char __user *s)
330{
331 FPU_REG *st0_ptr = &st(0);
332 int pos;
333 u_char bcd;
334 long long l=0;
335 int sign;
336
337 RE_ENTRANT_CHECK_OFF;
338 FPU_access_ok(VERIFY_READ, s, 10);
339 RE_ENTRANT_CHECK_ON;
340 for ( pos = 8; pos >= 0; pos--)
341 {
342 l *= 10;
343 RE_ENTRANT_CHECK_OFF;
344 FPU_get_user(bcd, s+pos);
345 RE_ENTRANT_CHECK_ON;
346 l += bcd >> 4;
347 l *= 10;
348 l += bcd & 0x0f;
349 }
350
351 RE_ENTRANT_CHECK_OFF;
352 FPU_get_user(sign, s+9);
353 sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
354 RE_ENTRANT_CHECK_ON;
355
356 if ( l == 0 )
357 {
358 reg_copy(&CONST_Z, st0_ptr);
359 addexponent(st0_ptr, sign); /* Set the sign. */
360 return TAG_Zero;
361 }
362 else
363 {
364 significand(st0_ptr) = l;
365 return normalize_no_excep(st0_ptr, 63, sign);
366 }
367}
368
369/*===========================================================================*/
370
371/* Put a long double into user memory */
372int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d)
373{
374 /*
375 The only exception raised by an attempt to store to an
376 extended format is the Invalid Stack exception, i.e.
377 attempting to store from an empty register.
378 */
379
380 if ( st0_tag != TAG_Empty )
381 {
382 RE_ENTRANT_CHECK_OFF;
383 FPU_access_ok(VERIFY_WRITE, d, 10);
384
385 FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d);
386 FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4));
387 FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8));
388 RE_ENTRANT_CHECK_ON;
389
390 return 1;
391 }
392
393 /* Empty register (stack underflow) */
394 EXCEPTION(EX_StackUnder);
395 if ( control_word & CW_Invalid )
396 {
397 /* The masked response */
398 /* Put out the QNaN indefinite */
399 RE_ENTRANT_CHECK_OFF;
400 FPU_access_ok(VERIFY_WRITE,d,10);
401 FPU_put_user(0, (unsigned long __user *) d);
402 FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d);
403 FPU_put_user(0xffff, 4 + (short __user *) d);
404 RE_ENTRANT_CHECK_ON;
405 return 1;
406 }
407 else
408 return 0;
409
410}
411
412
413/* Put a double into user memory */
414int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
415{
416 unsigned long l[2];
417 unsigned long increment = 0; /* avoid gcc warnings */
418 int precision_loss;
419 int exp;
420 FPU_REG tmp;
421
422 if ( st0_tag == TAG_Valid )
423 {
424 reg_copy(st0_ptr, &tmp);
425 exp = exponent(&tmp);
426
427 if ( exp < DOUBLE_Emin ) /* It may be a denormal */
428 {
429 addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */
430
431 denormal_arg:
432
433 if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
434 {
435#ifdef PECULIAR_486
436 /* Did it round to a non-denormal ? */
437 /* This behaviour might be regarded as peculiar, it appears
438 that the 80486 rounds to the dest precision, then
439 converts to decide underflow. */
440 if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
441 (st0_ptr->sigl & 0x000007ff)) )
442#endif /* PECULIAR_486 */
443 {
444 EXCEPTION(EX_Underflow);
445 /* This is a special case: see sec 16.2.5.1 of
446 the 80486 book */
447 if ( !(control_word & CW_Underflow) )
448 return 0;
449 }
450 EXCEPTION(precision_loss);
451 if ( !(control_word & CW_Precision) )
452 return 0;
453 }
454 l[0] = tmp.sigl;
455 l[1] = tmp.sigh;
456 }
457 else
458 {
459 if ( tmp.sigl & 0x000007ff )
460 {
461 precision_loss = 1;
462 switch (control_word & CW_RC)
463 {
464 case RC_RND:
465 /* Rounding can get a little messy.. */
466 increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */
467 ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */
468 break;
469 case RC_DOWN: /* towards -infinity */
470 increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff;
471 break;
472 case RC_UP: /* towards +infinity */
473 increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0;
474 break;
475 case RC_CHOP:
476 increment = 0;
477 break;
478 }
479
480 /* Truncate the mantissa */
481 tmp.sigl &= 0xfffff800;
482
483 if ( increment )
484 {
485 if ( tmp.sigl >= 0xfffff800 )
486 {
487 /* the sigl part overflows */
488 if ( tmp.sigh == 0xffffffff )
489 {
490 /* The sigh part overflows */
491 tmp.sigh = 0x80000000;
492 exp++;
493 if (exp >= EXP_OVER)
494 goto overflow;
495 }
496 else
497 {
498 tmp.sigh ++;
499 }
500 tmp.sigl = 0x00000000;
501 }
502 else
503 {
504 /* We only need to increment sigl */
505 tmp.sigl += 0x00000800;
506 }
507 }
508 }
509 else
510 precision_loss = 0;
511
512 l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
513 l[1] = ((tmp.sigh >> 11) & 0xfffff);
514
515 if ( exp > DOUBLE_Emax )
516 {
517 overflow:
518 EXCEPTION(EX_Overflow);
519 if ( !(control_word & CW_Overflow) )
520 return 0;
521 set_precision_flag_up();
522 if ( !(control_word & CW_Precision) )
523 return 0;
524
525 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
526 /* Overflow to infinity */
527 l[0] = 0x00000000; /* Set to */
528 l[1] = 0x7ff00000; /* + INF */
529 }
530 else
531 {
532 if ( precision_loss )
533 {
534 if ( increment )
535 set_precision_flag_up();
536 else
537 set_precision_flag_down();
538 }
539 /* Add the exponent */
540 l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
541 }
542 }
543 }
544 else if (st0_tag == TAG_Zero)
545 {
546 /* Number is zero */
547 l[0] = 0;
548 l[1] = 0;
549 }
550 else if ( st0_tag == TAG_Special )
551 {
552 st0_tag = FPU_Special(st0_ptr);
553 if ( st0_tag == TW_Denormal )
554 {
555 /* A denormal will always underflow. */
556#ifndef PECULIAR_486
557 /* An 80486 is supposed to be able to generate
558 a denormal exception here, but... */
559 /* Underflow has priority. */
560 if ( control_word & CW_Underflow )
561 denormal_operand();
562#endif /* PECULIAR_486 */
563 reg_copy(st0_ptr, &tmp);
564 goto denormal_arg;
565 }
566 else if (st0_tag == TW_Infinity)
567 {
568 l[0] = 0;
569 l[1] = 0x7ff00000;
570 }
571 else if (st0_tag == TW_NaN)
572 {
573 /* Is it really a NaN ? */
574 if ( (exponent(st0_ptr) == EXP_OVER)
575 && (st0_ptr->sigh & 0x80000000) )
576 {
577 /* See if we can get a valid NaN from the FPU_REG */
578 l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
579 l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
580 if ( !(st0_ptr->sigh & 0x40000000) )
581 {
582 /* It is a signalling NaN */
583 EXCEPTION(EX_Invalid);
584 if ( !(control_word & CW_Invalid) )
585 return 0;
586 l[1] |= (0x40000000 >> 11);
587 }
588 l[1] |= 0x7ff00000;
589 }
590 else
591 {
592 /* It is an unsupported data type */
593 EXCEPTION(EX_Invalid);
594 if ( !(control_word & CW_Invalid) )
595 return 0;
596 l[0] = 0;
597 l[1] = 0xfff80000;
598 }
599 }
600 }
601 else if ( st0_tag == TAG_Empty )
602 {
603 /* Empty register (stack underflow) */
604 EXCEPTION(EX_StackUnder);
605 if ( control_word & CW_Invalid )
606 {
607 /* The masked response */
608 /* Put out the QNaN indefinite */
609 RE_ENTRANT_CHECK_OFF;
610 FPU_access_ok(VERIFY_WRITE,dfloat,8);
611 FPU_put_user(0, (unsigned long __user *) dfloat);
612 FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat);
613 RE_ENTRANT_CHECK_ON;
614 return 1;
615 }
616 else
617 return 0;
618 }
619 if ( getsign(st0_ptr) )
620 l[1] |= 0x80000000;
621
622 RE_ENTRANT_CHECK_OFF;
623 FPU_access_ok(VERIFY_WRITE,dfloat,8);
624 FPU_put_user(l[0], (unsigned long __user *)dfloat);
625 FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
626 RE_ENTRANT_CHECK_ON;
627
628 return 1;
629}
630
631
632/* Put a float into user memory */
633int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
634{
635 long templ = 0;
636 unsigned long increment = 0; /* avoid gcc warnings */
637 int precision_loss;
638 int exp;
639 FPU_REG tmp;
640
641 if ( st0_tag == TAG_Valid )
642 {
643
644 reg_copy(st0_ptr, &tmp);
645 exp = exponent(&tmp);
646
647 if ( exp < SINGLE_Emin )
648 {
649 addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */
650
651 denormal_arg:
652
653 if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
654 {
655#ifdef PECULIAR_486
656 /* Did it round to a non-denormal ? */
657 /* This behaviour might be regarded as peculiar, it appears
658 that the 80486 rounds to the dest precision, then
659 converts to decide underflow. */
660 if ( !((tmp.sigl == 0x00800000) &&
661 ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
662#endif /* PECULIAR_486 */
663 {
664 EXCEPTION(EX_Underflow);
665 /* This is a special case: see sec 16.2.5.1 of
666 the 80486 book */
667 if ( !(control_word & CW_Underflow) )
668 return 0;
669 }
670 EXCEPTION(precision_loss);
671 if ( !(control_word & CW_Precision) )
672 return 0;
673 }
674 templ = tmp.sigl;
675 }
676 else
677 {
678 if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
679 {
680 unsigned long sigh = tmp.sigh;
681 unsigned long sigl = tmp.sigl;
682
683 precision_loss = 1;
684 switch (control_word & CW_RC)
685 {
686 case RC_RND:
687 increment = ((sigh & 0xff) > 0x80) /* more than half */
688 || (((sigh & 0xff) == 0x80) && sigl) /* more than half */
689 || ((sigh & 0x180) == 0x180); /* round to even */
690 break;
691 case RC_DOWN: /* towards -infinity */
692 increment = signpositive(&tmp)
693 ? 0 : (sigl | (sigh & 0xff));
694 break;
695 case RC_UP: /* towards +infinity */
696 increment = signpositive(&tmp)
697 ? (sigl | (sigh & 0xff)) : 0;
698 break;
699 case RC_CHOP:
700 increment = 0;
701 break;
702 }
703
704 /* Truncate part of the mantissa */
705 tmp.sigl = 0;
706
707 if (increment)
708 {
709 if ( sigh >= 0xffffff00 )
710 {
711 /* The sigh part overflows */
712 tmp.sigh = 0x80000000;
713 exp++;
714 if ( exp >= EXP_OVER )
715 goto overflow;
716 }
717 else
718 {
719 tmp.sigh &= 0xffffff00;
720 tmp.sigh += 0x100;
721 }
722 }
723 else
724 {
725 tmp.sigh &= 0xffffff00; /* Finish the truncation */
726 }
727 }
728 else
729 precision_loss = 0;
730
731 templ = (tmp.sigh >> 8) & 0x007fffff;
732
733 if ( exp > SINGLE_Emax )
734 {
735 overflow:
736 EXCEPTION(EX_Overflow);
737 if ( !(control_word & CW_Overflow) )
738 return 0;
739 set_precision_flag_up();
740 if ( !(control_word & CW_Precision) )
741 return 0;
742
743 /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
744 /* Masked response is overflow to infinity. */
745 templ = 0x7f800000;
746 }
747 else
748 {
749 if ( precision_loss )
750 {
751 if ( increment )
752 set_precision_flag_up();
753 else
754 set_precision_flag_down();
755 }
756 /* Add the exponent */
757 templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
758 }
759 }
760 }
761 else if (st0_tag == TAG_Zero)
762 {
763 templ = 0;
764 }
765 else if ( st0_tag == TAG_Special )
766 {
767 st0_tag = FPU_Special(st0_ptr);
768 if (st0_tag == TW_Denormal)
769 {
770 reg_copy(st0_ptr, &tmp);
771
772 /* A denormal will always underflow. */
773#ifndef PECULIAR_486
774 /* An 80486 is supposed to be able to generate
775 a denormal exception here, but... */
776 /* Underflow has priority. */
777 if ( control_word & CW_Underflow )
778 denormal_operand();
779#endif /* PECULIAR_486 */
780 goto denormal_arg;
781 }
782 else if (st0_tag == TW_Infinity)
783 {
784 templ = 0x7f800000;
785 }
786 else if (st0_tag == TW_NaN)
787 {
788 /* Is it really a NaN ? */
789 if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) )
790 {
791 /* See if we can get a valid NaN from the FPU_REG */
792 templ = st0_ptr->sigh >> 8;
793 if ( !(st0_ptr->sigh & 0x40000000) )
794 {
795 /* It is a signalling NaN */
796 EXCEPTION(EX_Invalid);
797 if ( !(control_word & CW_Invalid) )
798 return 0;
799 templ |= (0x40000000 >> 8);
800 }
801 templ |= 0x7f800000;
802 }
803 else
804 {
805 /* It is an unsupported data type */
806 EXCEPTION(EX_Invalid);
807 if ( !(control_word & CW_Invalid) )
808 return 0;
809 templ = 0xffc00000;
810 }
811 }
812#ifdef PARANOID
813 else
814 {
815 EXCEPTION(EX_INTERNAL|0x164);
816 return 0;
817 }
818#endif
819 }
820 else if ( st0_tag == TAG_Empty )
821 {
822 /* Empty register (stack underflow) */
823 EXCEPTION(EX_StackUnder);
824 if ( control_word & EX_Invalid )
825 {
826 /* The masked response */
827 /* Put out the QNaN indefinite */
828 RE_ENTRANT_CHECK_OFF;
829 FPU_access_ok(VERIFY_WRITE,single,4);
830 FPU_put_user(0xffc00000, (unsigned long __user *) single);
831 RE_ENTRANT_CHECK_ON;
832 return 1;
833 }
834 else
835 return 0;
836 }
837#ifdef PARANOID
838 else
839 {
840 EXCEPTION(EX_INTERNAL|0x163);
841 return 0;
842 }
843#endif
844 if ( getsign(st0_ptr) )
845 templ |= 0x80000000;
846
847 RE_ENTRANT_CHECK_OFF;
848 FPU_access_ok(VERIFY_WRITE,single,4);
849 FPU_put_user(templ,(unsigned long __user *) single);
850 RE_ENTRANT_CHECK_ON;
851
852 return 1;
853}
854
855
856/* Put a long long into user memory */
857int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
858{
859 FPU_REG t;
860 long long tll;
861 int precision_loss;
862
863 if ( st0_tag == TAG_Empty )
864 {
865 /* Empty register (stack underflow) */
866 EXCEPTION(EX_StackUnder);
867 goto invalid_operand;
868 }
869 else if ( st0_tag == TAG_Special )
870 {
871 st0_tag = FPU_Special(st0_ptr);
872 if ( (st0_tag == TW_Infinity) ||
873 (st0_tag == TW_NaN) )
874 {
875 EXCEPTION(EX_Invalid);
876 goto invalid_operand;
877 }
878 }
879
880 reg_copy(st0_ptr, &t);
881 precision_loss = FPU_round_to_int(&t, st0_tag);
882 ((long *)&tll)[0] = t.sigl;
883 ((long *)&tll)[1] = t.sigh;
884 if ( (precision_loss == 1) ||
885 ((t.sigh & 0x80000000) &&
886 !((t.sigh == 0x80000000) && (t.sigl == 0) &&
887 signnegative(&t))) )
888 {
889 EXCEPTION(EX_Invalid);
890 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
891 invalid_operand:
892 if ( control_word & EX_Invalid )
893 {
894 /* Produce something like QNaN "indefinite" */
895 tll = 0x8000000000000000LL;
896 }
897 else
898 return 0;
899 }
900 else
901 {
902 if ( precision_loss )
903 set_precision_flag(precision_loss);
904 if ( signnegative(&t) )
905 tll = - tll;
906 }
907
908 RE_ENTRANT_CHECK_OFF;
909 FPU_access_ok(VERIFY_WRITE,d,8);
910 copy_to_user(d, &tll, 8);
911 RE_ENTRANT_CHECK_ON;
912
913 return 1;
914}
915
916
917/* Put a long into user memory */
918int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
919{
920 FPU_REG t;
921 int precision_loss;
922
923 if ( st0_tag == TAG_Empty )
924 {
925 /* Empty register (stack underflow) */
926 EXCEPTION(EX_StackUnder);
927 goto invalid_operand;
928 }
929 else if ( st0_tag == TAG_Special )
930 {
931 st0_tag = FPU_Special(st0_ptr);
932 if ( (st0_tag == TW_Infinity) ||
933 (st0_tag == TW_NaN) )
934 {
935 EXCEPTION(EX_Invalid);
936 goto invalid_operand;
937 }
938 }
939
940 reg_copy(st0_ptr, &t);
941 precision_loss = FPU_round_to_int(&t, st0_tag);
942 if (t.sigh ||
943 ((t.sigl & 0x80000000) &&
944 !((t.sigl == 0x80000000) && signnegative(&t))) )
945 {
946 EXCEPTION(EX_Invalid);
947 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
948 invalid_operand:
949 if ( control_word & EX_Invalid )
950 {
951 /* Produce something like QNaN "indefinite" */
952 t.sigl = 0x80000000;
953 }
954 else
955 return 0;
956 }
957 else
958 {
959 if ( precision_loss )
960 set_precision_flag(precision_loss);
961 if ( signnegative(&t) )
962 t.sigl = -(long)t.sigl;
963 }
964
965 RE_ENTRANT_CHECK_OFF;
966 FPU_access_ok(VERIFY_WRITE,d,4);
967 FPU_put_user(t.sigl, (unsigned long __user *) d);
968 RE_ENTRANT_CHECK_ON;
969
970 return 1;
971}
972
973
974/* Put a short into user memory */
975int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
976{
977 FPU_REG t;
978 int precision_loss;
979
980 if ( st0_tag == TAG_Empty )
981 {
982 /* Empty register (stack underflow) */
983 EXCEPTION(EX_StackUnder);
984 goto invalid_operand;
985 }
986 else if ( st0_tag == TAG_Special )
987 {
988 st0_tag = FPU_Special(st0_ptr);
989 if ( (st0_tag == TW_Infinity) ||
990 (st0_tag == TW_NaN) )
991 {
992 EXCEPTION(EX_Invalid);
993 goto invalid_operand;
994 }
995 }
996
997 reg_copy(st0_ptr, &t);
998 precision_loss = FPU_round_to_int(&t, st0_tag);
999 if (t.sigh ||
1000 ((t.sigl & 0xffff8000) &&
1001 !((t.sigl == 0x8000) && signnegative(&t))) )
1002 {
1003 EXCEPTION(EX_Invalid);
1004 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
1005 invalid_operand:
1006 if ( control_word & EX_Invalid )
1007 {
1008 /* Produce something like QNaN "indefinite" */
1009 t.sigl = 0x8000;
1010 }
1011 else
1012 return 0;
1013 }
1014 else
1015 {
1016 if ( precision_loss )
1017 set_precision_flag(precision_loss);
1018 if ( signnegative(&t) )
1019 t.sigl = -t.sigl;
1020 }
1021
1022 RE_ENTRANT_CHECK_OFF;
1023 FPU_access_ok(VERIFY_WRITE,d,2);
1024 FPU_put_user((short)t.sigl, d);
1025 RE_ENTRANT_CHECK_ON;
1026
1027 return 1;
1028}
1029
1030
1031/* Put a packed bcd array into user memory */
1032int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
1033{
1034 FPU_REG t;
1035 unsigned long long ll;
1036 u_char b;
1037 int i, precision_loss;
1038 u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
1039
1040 if ( st0_tag == TAG_Empty )
1041 {
1042 /* Empty register (stack underflow) */
1043 EXCEPTION(EX_StackUnder);
1044 goto invalid_operand;
1045 }
1046 else if ( st0_tag == TAG_Special )
1047 {
1048 st0_tag = FPU_Special(st0_ptr);
1049 if ( (st0_tag == TW_Infinity) ||
1050 (st0_tag == TW_NaN) )
1051 {
1052 EXCEPTION(EX_Invalid);
1053 goto invalid_operand;
1054 }
1055 }
1056
1057 reg_copy(st0_ptr, &t);
1058 precision_loss = FPU_round_to_int(&t, st0_tag);
1059 ll = significand(&t);
1060
1061 /* Check for overflow, by comparing with 999999999999999999 decimal. */
1062 if ( (t.sigh > 0x0de0b6b3) ||
1063 ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
1064 {
1065 EXCEPTION(EX_Invalid);
1066 /* This is a special case: see sec 16.2.5.1 of the 80486 book */
1067 invalid_operand:
1068 if ( control_word & CW_Invalid )
1069 {
1070 /* Produce the QNaN "indefinite" */
1071 RE_ENTRANT_CHECK_OFF;
1072 FPU_access_ok(VERIFY_WRITE,d,10);
1073 for ( i = 0; i < 7; i++)
1074 FPU_put_user(0, d+i); /* These bytes "undefined" */
1075 FPU_put_user(0xc0, d+7); /* This byte "undefined" */
1076 FPU_put_user(0xff, d+8);
1077 FPU_put_user(0xff, d+9);
1078 RE_ENTRANT_CHECK_ON;
1079 return 1;
1080 }
1081 else
1082 return 0;
1083 }
1084 else if ( precision_loss )
1085 {
1086 /* Precision loss doesn't stop the data transfer */
1087 set_precision_flag(precision_loss);
1088 }
1089
1090 RE_ENTRANT_CHECK_OFF;
1091 FPU_access_ok(VERIFY_WRITE,d,10);
1092 RE_ENTRANT_CHECK_ON;
1093 for ( i = 0; i < 9; i++)
1094 {
1095 b = FPU_div_small(&ll, 10);
1096 b |= (FPU_div_small(&ll, 10)) << 4;
1097 RE_ENTRANT_CHECK_OFF;
1098 FPU_put_user(b, d+i);
1099 RE_ENTRANT_CHECK_ON;
1100 }
1101 RE_ENTRANT_CHECK_OFF;
1102 FPU_put_user(sign, d+9);
1103 RE_ENTRANT_CHECK_ON;
1104
1105 return 1;
1106}
1107
1108/*===========================================================================*/
1109
1110/* r gets mangled such that sig is int, sign:
1111 it is NOT normalized */
1112/* The return value (in eax) is zero if the result is exact,
1113 if bits are changed due to rounding, truncation, etc, then
1114 a non-zero value is returned */
1115/* Overflow is signalled by a non-zero return value (in eax).
1116 In the case of overflow, the returned significand always has the
1117 largest possible value */
1118int FPU_round_to_int(FPU_REG *r, u_char tag)
1119{
1120 u_char very_big;
1121 unsigned eax;
1122
1123 if (tag == TAG_Zero)
1124 {
1125 /* Make sure that zero is returned */
1126 significand(r) = 0;
1127 return 0; /* o.k. */
1128 }
1129
1130 if (exponent(r) > 63)
1131 {
1132 r->sigl = r->sigh = ~0; /* The largest representable number */
1133 return 1; /* overflow */
1134 }
1135
1136 eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
1137 very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */
1138#define half_or_more (eax & 0x80000000)
1139#define frac_part (eax)
1140#define more_than_half ((eax & 0x80000001) == 0x80000001)
1141 switch (control_word & CW_RC)
1142 {
1143 case RC_RND:
1144 if ( more_than_half /* nearest */
1145 || (half_or_more && (r->sigl & 1)) ) /* odd -> even */
1146 {
1147 if ( very_big ) return 1; /* overflow */
1148 significand(r) ++;
1149 return PRECISION_LOST_UP;
1150 }
1151 break;
1152 case RC_DOWN:
1153 if (frac_part && getsign(r))
1154 {
1155 if ( very_big ) return 1; /* overflow */
1156 significand(r) ++;
1157 return PRECISION_LOST_UP;
1158 }
1159 break;
1160 case RC_UP:
1161 if (frac_part && !getsign(r))
1162 {
1163 if ( very_big ) return 1; /* overflow */
1164 significand(r) ++;
1165 return PRECISION_LOST_UP;
1166 }
1167 break;
1168 case RC_CHOP:
1169 break;
1170 }
1171
1172 return eax ? PRECISION_LOST_DOWN : 0;
1173
1174}
1175
1176/*===========================================================================*/
1177
1178u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
1179{
1180 unsigned short tag_word = 0;
1181 u_char tag;
1182 int i;
1183
1184 if ( (addr_modes.default_mode == VM86) ||
1185 ((addr_modes.default_mode == PM16)
1186 ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
1187 {
1188 RE_ENTRANT_CHECK_OFF;
1189 FPU_access_ok(VERIFY_READ, s, 0x0e);
1190 FPU_get_user(control_word, (unsigned short __user *) s);
1191 FPU_get_user(partial_status, (unsigned short __user *) (s+2));
1192 FPU_get_user(tag_word, (unsigned short __user *) (s+4));
1193 FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6));
1194 FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8));
1195 FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a));
1196 FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c));
1197 RE_ENTRANT_CHECK_ON;
1198 s += 0x0e;
1199 if ( addr_modes.default_mode == VM86 )
1200 {
1201 instruction_address.offset
1202 += (instruction_address.selector & 0xf000) << 4;
1203 operand_address.offset += (operand_address.selector & 0xf000) << 4;
1204 }
1205 }
1206 else
1207 {
1208 RE_ENTRANT_CHECK_OFF;
1209 FPU_access_ok(VERIFY_READ, s, 0x1c);
1210 FPU_get_user(control_word, (unsigned short __user *) s);
1211 FPU_get_user(partial_status, (unsigned short __user *) (s+4));
1212 FPU_get_user(tag_word, (unsigned short __user *) (s+8));
1213 FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c));
1214 FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10));
1215 FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12));
1216 FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14));
1217 FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18));
1218 RE_ENTRANT_CHECK_ON;
1219 s += 0x1c;
1220 }
1221
1222#ifdef PECULIAR_486
1223 control_word &= ~0xe080;
1224#endif /* PECULIAR_486 */
1225
1226 top = (partial_status >> SW_Top_Shift) & 7;
1227
1228 if ( partial_status & ~control_word & CW_Exceptions )
1229 partial_status |= (SW_Summary | SW_Backward);
1230 else
1231 partial_status &= ~(SW_Summary | SW_Backward);
1232
1233 for ( i = 0; i < 8; i++ )
1234 {
1235 tag = tag_word & 3;
1236 tag_word >>= 2;
1237
1238 if ( tag == TAG_Empty )
1239 /* New tag is empty. Accept it */
1240 FPU_settag(i, TAG_Empty);
1241 else if ( FPU_gettag(i) == TAG_Empty )
1242 {
1243 /* Old tag is empty and new tag is not empty. New tag is determined
1244 by old reg contents */
1245 if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias )
1246 {
1247 if ( !(fpu_register(i).sigl | fpu_register(i).sigh) )
1248 FPU_settag(i, TAG_Zero);
1249 else
1250 FPU_settag(i, TAG_Special);
1251 }
1252 else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias )
1253 {
1254 FPU_settag(i, TAG_Special);
1255 }
1256 else if ( fpu_register(i).sigh & 0x80000000 )
1257 FPU_settag(i, TAG_Valid);
1258 else
1259 FPU_settag(i, TAG_Special); /* An Un-normal */
1260 }
1261 /* Else old tag is not empty and new tag is not empty. Old tag
1262 remains correct */
1263 }
1264
1265 return s;
1266}
1267
1268
1269void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
1270{
1271 int i, regnr;
1272 u_char __user *s = fldenv(addr_modes, data_address);
1273 int offset = (top & 7) * 10, other = 80 - offset;
1274
1275 /* Copy all registers in stack order. */
1276 RE_ENTRANT_CHECK_OFF;
1277 FPU_access_ok(VERIFY_READ,s,80);
1278 __copy_from_user(register_base+offset, s, other);
1279 if ( offset )
1280 __copy_from_user(register_base, s+other, offset);
1281 RE_ENTRANT_CHECK_ON;
1282
1283 for ( i = 0; i < 8; i++ )
1284 {
1285 regnr = (i+top) & 7;
1286 if ( FPU_gettag(regnr) != TAG_Empty )
1287 /* The loaded data over-rides all other cases. */
1288 FPU_settag(regnr, FPU_tagof(&st(i)));
1289 }
1290
1291}
1292
1293
1294u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
1295{
1296 if ( (addr_modes.default_mode == VM86) ||
1297 ((addr_modes.default_mode == PM16)
1298 ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
1299 {
1300 RE_ENTRANT_CHECK_OFF;
1301 FPU_access_ok(VERIFY_WRITE,d,14);
1302#ifdef PECULIAR_486
1303 FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d);
1304#else
1305 FPU_put_user(control_word, (unsigned short __user *) d);
1306#endif /* PECULIAR_486 */
1307 FPU_put_user(status_word(), (unsigned short __user *) (d+2));
1308 FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4));
1309 FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6));
1310 FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a));
1311 if ( addr_modes.default_mode == VM86 )
1312 {
1313 FPU_put_user((instruction_address.offset & 0xf0000) >> 4,
1314 (unsigned short __user *) (d+8));
1315 FPU_put_user((operand_address.offset & 0xf0000) >> 4,
1316 (unsigned short __user *) (d+0x0c));
1317 }
1318 else
1319 {
1320 FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8));
1321 FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c));
1322 }
1323 RE_ENTRANT_CHECK_ON;
1324 d += 0x0e;
1325 }
1326 else
1327 {
1328 RE_ENTRANT_CHECK_OFF;
1329 FPU_access_ok(VERIFY_WRITE, d, 7*4);
1330#ifdef PECULIAR_486
1331 control_word &= ~0xe080;
1332 /* An 80486 sets nearly all of the reserved bits to 1. */
1333 control_word |= 0xffff0040;
1334 partial_status = status_word() | 0xffff0000;
1335 fpu_tag_word |= 0xffff0000;
1336 I387.soft.fcs &= ~0xf8000000;
1337 I387.soft.fos |= 0xffff0000;
1338#endif /* PECULIAR_486 */
1339 __copy_to_user(d, &control_word, 7*4);
1340 RE_ENTRANT_CHECK_ON;
1341 d += 0x1c;
1342 }
1343
1344 control_word |= CW_Exceptions;
1345 partial_status &= ~(SW_Summary | SW_Backward);
1346
1347 return d;
1348}
1349
1350
1351void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
1352{
1353 u_char __user *d;
1354 int offset = (top & 7) * 10, other = 80 - offset;
1355
1356 d = fstenv(addr_modes, data_address);
1357
1358 RE_ENTRANT_CHECK_OFF;
1359 FPU_access_ok(VERIFY_WRITE,d,80);
1360
1361 /* Copy all registers in stack order. */
1362 __copy_to_user(d, register_base+offset, other);
1363 if ( offset )
1364 __copy_to_user(d+other, register_base, offset);
1365 RE_ENTRANT_CHECK_ON;
1366
1367 finit();
1368}
1369
1370/*===========================================================================*/
diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c
new file mode 100644
index 000000000000..40f50b61bc67
--- /dev/null
+++ b/arch/i386/math-emu/reg_mul.c
@@ -0,0 +1,132 @@
1/*---------------------------------------------------------------------------+
2 | reg_mul.c |
3 | |
4 | Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
5 | |
6 | Copyright (C) 1992,1993,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
8 | E-mail billm@suburbia.net |
9 | |
10 | Returns the tag of the result if no exceptions or errors occurred. |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | The destination may be any FPU_REG, including one of the source FPU_REGs. |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19#include "exception.h"
20#include "reg_constant.h"
21#include "fpu_system.h"
22
23
24/*
25 Multiply two registers to give a register result.
26 The sources are st(deststnr) and (b,tagb,signb).
27 The destination is st(deststnr).
28 */
29/* This routine must be called with non-empty source registers */
30int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
31{
32 FPU_REG *a = &st(deststnr);
33 FPU_REG *dest = a;
34 u_char taga = FPU_gettagi(deststnr);
35 u_char saved_sign = getsign(dest);
36 u_char sign = (getsign(a) ^ getsign(b));
37 int tag;
38
39
40 if ( !(taga | tagb) )
41 {
42 /* Both regs Valid, this should be the most common case. */
43
44 tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
45 if ( tag < 0 )
46 {
47 setsign(dest, saved_sign);
48 return tag;
49 }
50 FPU_settagi(deststnr, tag);
51 return tag;
52 }
53
54 if ( taga == TAG_Special )
55 taga = FPU_Special(a);
56 if ( tagb == TAG_Special )
57 tagb = FPU_Special(b);
58
59 if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
60 || ((taga == TW_Denormal) && (tagb == TAG_Valid))
61 || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
62 {
63 FPU_REG x, y;
64 if ( denormal_operand() < 0 )
65 return FPU_Exception;
66
67 FPU_to_exp16(a, &x);
68 FPU_to_exp16(b, &y);
69 tag = FPU_u_mul(&x, &y, dest, control_w, sign,
70 exponent16(&x) + exponent16(&y));
71 if ( tag < 0 )
72 {
73 setsign(dest, saved_sign);
74 return tag;
75 }
76 FPU_settagi(deststnr, tag);
77 return tag;
78 }
79 else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
80 {
81 if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
82 && (denormal_operand() < 0) )
83 return FPU_Exception;
84
85 /* Must have either both arguments == zero, or
86 one valid and the other zero.
87 The result is therefore zero. */
88 FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
89 /* The 80486 book says that the answer is +0, but a real
90 80486 behaves this way.
91 IEEE-754 apparently says it should be this way. */
92 setsign(dest, sign);
93 return TAG_Zero;
94 }
95 /* Must have infinities, NaNs, etc */
96 else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
97 {
98 return real_2op_NaN(b, tagb, deststnr, &st(0));
99 }
100 else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
101 || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
102 {
103 return arith_invalid(deststnr); /* Zero*Infinity is invalid */
104 }
105 else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
106 && (denormal_operand() < 0) )
107 {
108 return FPU_Exception;
109 }
110 else if (taga == TW_Infinity)
111 {
112 FPU_copy_to_regi(a, TAG_Special, deststnr);
113 setsign(dest, sign);
114 return TAG_Special;
115 }
116 else if (tagb == TW_Infinity)
117 {
118 FPU_copy_to_regi(b, TAG_Special, deststnr);
119 setsign(dest, sign);
120 return TAG_Special;
121 }
122
123#ifdef PARANOID
124 else
125 {
126 EXCEPTION(EX_INTERNAL|0x102);
127 return FPU_Exception;
128 }
129#endif /* PARANOID */
130
131 return 0;
132}
diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S
new file mode 100644
index 000000000000..8b6352efceef
--- /dev/null
+++ b/arch/i386/math-emu/reg_norm.S
@@ -0,0 +1,147 @@
1/*---------------------------------------------------------------------------+
2 | reg_norm.S |
3 | |
4 | Copyright (C) 1992,1993,1994,1995,1997 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@suburbia.net |
7 | |
8 | Normalize the value in a FPU_REG. |
9 | |
10 | Call from C as: |
11 | int FPU_normalize(FPU_REG *n) |
12 | |
13 | int FPU_normalize_nuo(FPU_REG *n) |
14 | |
15 | Return value is the tag of the answer, or-ed with FPU_Exception if |
16 | one was raised, or -1 on internal error. |
17 | |
18 +---------------------------------------------------------------------------*/
19
20#include "fpu_emu.h"
21
22
23.text
24ENTRY(FPU_normalize)
25 pushl %ebp
26 movl %esp,%ebp
27 pushl %ebx
28
29 movl PARAM1,%ebx
30
31 movl SIGH(%ebx),%edx
32 movl SIGL(%ebx),%eax
33
34 orl %edx,%edx /* ms bits */
35 js L_done /* Already normalized */
36 jnz L_shift_1 /* Shift left 1 - 31 bits */
37
38 orl %eax,%eax
39 jz L_zero /* The contents are zero */
40
41 movl %eax,%edx
42 xorl %eax,%eax
43 subw $32,EXP(%ebx) /* This can cause an underflow */
44
45/* We need to shift left by 1 - 31 bits */
46L_shift_1:
47 bsrl %edx,%ecx /* get the required shift in %ecx */
48 subl $31,%ecx
49 negl %ecx
50 shld %cl,%eax,%edx
51 shl %cl,%eax
52 subw %cx,EXP(%ebx) /* This can cause an underflow */
53
54 movl %edx,SIGH(%ebx)
55 movl %eax,SIGL(%ebx)
56
57L_done:
58 cmpw EXP_OVER,EXP(%ebx)
59 jge L_overflow
60
61 cmpw EXP_UNDER,EXP(%ebx)
62 jle L_underflow
63
64L_exit_valid:
65 movl TAG_Valid,%eax
66
67 /* Convert the exponent to 80x87 form. */
68 addw EXTENDED_Ebias,EXP(%ebx)
69 andw $0x7fff,EXP(%ebx)
70
71L_exit:
72 popl %ebx
73 leave
74 ret
75
76
77L_zero:
78 movw $0,EXP(%ebx)
79 movl TAG_Zero,%eax
80 jmp L_exit
81
82L_underflow:
83 /* Convert the exponent to 80x87 form. */
84 addw EXTENDED_Ebias,EXP(%ebx)
85 push %ebx
86 call arith_underflow
87 pop %ebx
88 jmp L_exit
89
90L_overflow:
91 /* Convert the exponent to 80x87 form. */
92 addw EXTENDED_Ebias,EXP(%ebx)
93 push %ebx
94 call arith_overflow
95 pop %ebx
96 jmp L_exit
97
98
99
100/* Normalise without reporting underflow or overflow */
101ENTRY(FPU_normalize_nuo)
102 pushl %ebp
103 movl %esp,%ebp
104 pushl %ebx
105
106 movl PARAM1,%ebx
107
108 movl SIGH(%ebx),%edx
109 movl SIGL(%ebx),%eax
110
111 orl %edx,%edx /* ms bits */
112 js L_exit_nuo_valid /* Already normalized */
113 jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */
114
115 orl %eax,%eax
116 jz L_exit_nuo_zero /* The contents are zero */
117
118 movl %eax,%edx
119 xorl %eax,%eax
120 subw $32,EXP(%ebx) /* This can cause an underflow */
121
122/* We need to shift left by 1 - 31 bits */
123L_nuo_shift_1:
124 bsrl %edx,%ecx /* get the required shift in %ecx */
125 subl $31,%ecx
126 negl %ecx
127 shld %cl,%eax,%edx
128 shl %cl,%eax
129 subw %cx,EXP(%ebx) /* This can cause an underflow */
130
131 movl %edx,SIGH(%ebx)
132 movl %eax,SIGL(%ebx)
133
134L_exit_nuo_valid:
135 movl TAG_Valid,%eax
136
137 popl %ebx
138 leave
139 ret
140
141L_exit_nuo_zero:
142 movl TAG_Zero,%eax
143 movw EXP_UNDER,EXP(%ebx)
144
145 popl %ebx
146 leave
147 ret
diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S
new file mode 100644
index 000000000000..d1d4e48b4f67
--- /dev/null
+++ b/arch/i386/math-emu/reg_round.S
@@ -0,0 +1,708 @@
1 .file "reg_round.S"
2/*---------------------------------------------------------------------------+
3 | reg_round.S |
4 | |
5 | Rounding/truncation/etc for FPU basic arithmetic functions. |
6 | |
7 | Copyright (C) 1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | This code has four possible entry points. |
12 | The following must be entered by a jmp instruction: |
13 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
14 | |
15 | The FPU_round entry point is intended to be used by C code. |
16 | From C, call as: |
17 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
18 | |
19 | Return value is the tag of the answer, or-ed with FPU_Exception if |
20 | one was raised, or -1 on internal error. |
21 | |
22 | For correct "up" and "down" rounding, the argument must have the correct |
23 | sign. |
24 | |
25 +---------------------------------------------------------------------------*/
26
27/*---------------------------------------------------------------------------+
28 | Four entry points. |
29 | |
30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
31 | %eax:%ebx 64 bit significand |
32 | %edx 32 bit extension of the significand |
33 | %edi pointer to an FPU_REG for the result to be stored |
34 | stack calling function must have set up a C stack frame and |
35 | pushed %esi, %edi, and %ebx |
36 | |
37 | Needed just for the fpu_reg_round_sqrt entry point: |
38 | %cx A control word in the same format as the FPU control word. |
39 | Otherwise, PARAM4 must give such a value. |
40 | |
41 | |
42 | The significand and its extension are assumed to be exact in the |
43 | following sense: |
44 | If the significand by itself is the exact result then the significand |
45 | extension (%edx) must contain 0, otherwise the significand extension |
46 | must be non-zero. |
47 | If the significand extension is non-zero then the significand is |
48 | smaller than the magnitude of the correct exact result by an amount |
49 | greater than zero and less than one ls bit of the significand. |
50 | The significand extension is only required to have three possible |
51 | non-zero values: |
52 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
53 | bit smaller than the magnitude of the |
54 | true exact result. |
55 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
56 | smaller than the magnitude of the true |
57 | exact result. |
58 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
59 | bit smaller than the magnitude of the |
60 | true exact result. |
61 | |
62 +---------------------------------------------------------------------------*/
63
64/*---------------------------------------------------------------------------+
65 | The code in this module has become quite complex, but it should handle |
66 | all of the FPU flags which are set at this stage of the basic arithmetic |
67 | computations. |
68 | There are a few rare cases where the results are not set identically to |
69 | a real FPU. These require a bit more thought because at this stage the |
70 | results of the code here appear to be more consistent... |
71 | This may be changed in a future version. |
72 +---------------------------------------------------------------------------*/
73
74
75#include "fpu_emu.h"
76#include "exception.h"
77#include "control_w.h"
78
79/* Flags for FPU_bits_lost */
80#define LOST_DOWN $1
81#define LOST_UP $2
82
83/* Flags for FPU_denormal */
84#define DENORMAL $1
85#define UNMASKED_UNDERFLOW $2
86
87
88#ifndef NON_REENTRANT_FPU
89/* Make the code re-entrant by putting
90 local storage on the stack: */
91#define FPU_bits_lost (%esp)
92#define FPU_denormal 1(%esp)
93
94#else
95/* Not re-entrant, so we can gain speed by putting
96 local storage in a static area: */
97.data
98 .align 4,0
99FPU_bits_lost:
100 .byte 0
101FPU_denormal:
102 .byte 0
103#endif /* NON_REENTRANT_FPU */
104
105
106.text
107.globl fpu_reg_round
108.globl fpu_Arith_exit
109
110/* Entry point when called from C */
111ENTRY(FPU_round)
112 pushl %ebp
113 movl %esp,%ebp
114 pushl %esi
115 pushl %edi
116 pushl %ebx
117
118 movl PARAM1,%edi
119 movl SIGH(%edi),%eax
120 movl SIGL(%edi),%ebx
121 movl PARAM2,%edx
122
123fpu_reg_round: /* Normal entry point */
124 movl PARAM4,%ecx
125
126#ifndef NON_REENTRANT_FPU
127 pushl %ebx /* adjust the stack pointer */
128#endif /* NON_REENTRANT_FPU */
129
130#ifdef PARANOID
131/* Cannot use this here yet */
132/* orl %eax,%eax */
133/* jns L_entry_bugged */
134#endif /* PARANOID */
135
136 cmpw EXP_UNDER,EXP(%edi)
137 jle L_Make_denorm /* The number is a de-normal */
138
139 movb $0,FPU_denormal /* 0 -> not a de-normal */
140
141Denorm_done:
142 movb $0,FPU_bits_lost /* No bits yet lost in rounding */
143
144 movl %ecx,%esi
145 andl CW_PC,%ecx
146 cmpl PR_64_BITS,%ecx
147 je LRound_To_64
148
149 cmpl PR_53_BITS,%ecx
150 je LRound_To_53
151
152 cmpl PR_24_BITS,%ecx
153 je LRound_To_24
154
155#ifdef PECULIAR_486
156/* With the precision control bits set to 01 "(reserved)", a real 80486
157 behaves as if the precision control bits were set to 11 "64 bits" */
158 cmpl PR_RESERVED_BITS,%ecx
159 je LRound_To_64
160#ifdef PARANOID
161 jmp L_bugged_denorm_486
162#endif /* PARANOID */
163#else
164#ifdef PARANOID
165 jmp L_bugged_denorm /* There is no bug, just a bad control word */
166#endif /* PARANOID */
167#endif /* PECULIAR_486 */
168
169
170/* Round etc to 24 bit precision */
171LRound_To_24:
172 movl %esi,%ecx
173 andl CW_RC,%ecx
174 cmpl RC_RND,%ecx
175 je LRound_nearest_24
176
177 cmpl RC_CHOP,%ecx
178 je LCheck_truncate_24
179
180 cmpl RC_UP,%ecx /* Towards +infinity */
181 je LUp_24
182
183 cmpl RC_DOWN,%ecx /* Towards -infinity */
184 je LDown_24
185
186#ifdef PARANOID
187 jmp L_bugged_round24
188#endif /* PARANOID */
189
190LUp_24:
191 cmpb SIGN_POS,PARAM5
192 jne LCheck_truncate_24 /* If negative then up==truncate */
193
194 jmp LCheck_24_round_up
195
196LDown_24:
197 cmpb SIGN_POS,PARAM5
198 je LCheck_truncate_24 /* If positive then down==truncate */
199
200LCheck_24_round_up:
201 movl %eax,%ecx
202 andl $0x000000ff,%ecx
203 orl %ebx,%ecx
204 orl %edx,%ecx
205 jnz LDo_24_round_up
206 jmp L_Re_normalise
207
208LRound_nearest_24:
209 /* Do rounding of the 24th bit if needed (nearest or even) */
210 movl %eax,%ecx
211 andl $0x000000ff,%ecx
212 cmpl $0x00000080,%ecx
213 jc LCheck_truncate_24 /* less than half, no increment needed */
214
215 jne LGreater_Half_24 /* greater than half, increment needed */
216
217 /* Possibly half, we need to check the ls bits */
218 orl %ebx,%ebx
219 jnz LGreater_Half_24 /* greater than half, increment needed */
220
221 orl %edx,%edx
222 jnz LGreater_Half_24 /* greater than half, increment needed */
223
224 /* Exactly half, increment only if 24th bit is 1 (round to even) */
225 testl $0x00000100,%eax
226 jz LDo_truncate_24
227
228LGreater_Half_24: /* Rounding: increment at the 24th bit */
229LDo_24_round_up:
230 andl $0xffffff00,%eax /* Truncate to 24 bits */
231 xorl %ebx,%ebx
232 movb LOST_UP,FPU_bits_lost
233 addl $0x00000100,%eax
234 jmp LCheck_Round_Overflow
235
236LCheck_truncate_24:
237 movl %eax,%ecx
238 andl $0x000000ff,%ecx
239 orl %ebx,%ecx
240 orl %edx,%ecx
241 jz L_Re_normalise /* No truncation needed */
242
243LDo_truncate_24:
244 andl $0xffffff00,%eax /* Truncate to 24 bits */
245 xorl %ebx,%ebx
246 movb LOST_DOWN,FPU_bits_lost
247 jmp L_Re_normalise
248
249
250/* Round etc to 53 bit precision */
251LRound_To_53:
252 movl %esi,%ecx
253 andl CW_RC,%ecx
254 cmpl RC_RND,%ecx
255 je LRound_nearest_53
256
257 cmpl RC_CHOP,%ecx
258 je LCheck_truncate_53
259
260 cmpl RC_UP,%ecx /* Towards +infinity */
261 je LUp_53
262
263 cmpl RC_DOWN,%ecx /* Towards -infinity */
264 je LDown_53
265
266#ifdef PARANOID
267 jmp L_bugged_round53
268#endif /* PARANOID */
269
270LUp_53:
271 cmpb SIGN_POS,PARAM5
272 jne LCheck_truncate_53 /* If negative then up==truncate */
273
274 jmp LCheck_53_round_up
275
276LDown_53:
277 cmpb SIGN_POS,PARAM5
278 je LCheck_truncate_53 /* If positive then down==truncate */
279
280LCheck_53_round_up:
281 movl %ebx,%ecx
282 andl $0x000007ff,%ecx
283 orl %edx,%ecx
284 jnz LDo_53_round_up
285 jmp L_Re_normalise
286
287LRound_nearest_53:
288 /* Do rounding of the 53rd bit if needed (nearest or even) */
289 movl %ebx,%ecx
290 andl $0x000007ff,%ecx
291 cmpl $0x00000400,%ecx
292 jc LCheck_truncate_53 /* less than half, no increment needed */
293
294 jnz LGreater_Half_53 /* greater than half, increment needed */
295
296 /* Possibly half, we need to check the ls bits */
297 orl %edx,%edx
298 jnz LGreater_Half_53 /* greater than half, increment needed */
299
300 /* Exactly half, increment only if 53rd bit is 1 (round to even) */
301 testl $0x00000800,%ebx
302 jz LTruncate_53
303
304LGreater_Half_53: /* Rounding: increment at the 53rd bit */
305LDo_53_round_up:
306 movb LOST_UP,FPU_bits_lost
307 andl $0xfffff800,%ebx /* Truncate to 53 bits */
308 addl $0x00000800,%ebx
309 adcl $0,%eax
310 jmp LCheck_Round_Overflow
311
312LCheck_truncate_53:
313 movl %ebx,%ecx
314 andl $0x000007ff,%ecx
315 orl %edx,%ecx
316 jz L_Re_normalise
317
318LTruncate_53:
319 movb LOST_DOWN,FPU_bits_lost
320 andl $0xfffff800,%ebx /* Truncate to 53 bits */
321 jmp L_Re_normalise
322
323
324/* Round etc to 64 bit precision */
325LRound_To_64:
326 movl %esi,%ecx
327 andl CW_RC,%ecx
328 cmpl RC_RND,%ecx
329 je LRound_nearest_64
330
331 cmpl RC_CHOP,%ecx
332 je LCheck_truncate_64
333
334 cmpl RC_UP,%ecx /* Towards +infinity */
335 je LUp_64
336
337 cmpl RC_DOWN,%ecx /* Towards -infinity */
338 je LDown_64
339
340#ifdef PARANOID
341 jmp L_bugged_round64
342#endif /* PARANOID */
343
344LUp_64:
345 cmpb SIGN_POS,PARAM5
346 jne LCheck_truncate_64 /* If negative then up==truncate */
347
348 orl %edx,%edx
349 jnz LDo_64_round_up
350 jmp L_Re_normalise
351
352LDown_64:
353 cmpb SIGN_POS,PARAM5
354 je LCheck_truncate_64 /* If positive then down==truncate */
355
356 orl %edx,%edx
357 jnz LDo_64_round_up
358 jmp L_Re_normalise
359
360LRound_nearest_64:
361 cmpl $0x80000000,%edx
362 jc LCheck_truncate_64
363
364 jne LDo_64_round_up
365
366 /* Now test for round-to-even */
367 testb $1,%bl
368 jz LCheck_truncate_64
369
370LDo_64_round_up:
371 movb LOST_UP,FPU_bits_lost
372 addl $1,%ebx
373 adcl $0,%eax
374
375LCheck_Round_Overflow:
376 jnc L_Re_normalise
377
378 /* Overflow, adjust the result (significand to 1.0) */
379 rcrl $1,%eax
380 rcrl $1,%ebx
381 incw EXP(%edi)
382 jmp L_Re_normalise
383
384LCheck_truncate_64:
385 orl %edx,%edx
386 jz L_Re_normalise
387
388LTruncate_64:
389 movb LOST_DOWN,FPU_bits_lost
390
391L_Re_normalise:
392 testb $0xff,FPU_denormal
393 jnz Normalise_result
394
395L_Normalised:
396 movl TAG_Valid,%edx
397
398L_deNormalised:
399 cmpb LOST_UP,FPU_bits_lost
400 je L_precision_lost_up
401
402 cmpb LOST_DOWN,FPU_bits_lost
403 je L_precision_lost_down
404
405L_no_precision_loss:
406 /* store the result */
407
408L_Store_significand:
409 movl %eax,SIGH(%edi)
410 movl %ebx,SIGL(%edi)
411
412 cmpw EXP_OVER,EXP(%edi)
413 jge L_overflow
414
415 movl %edx,%eax
416
417 /* Convert the exponent to 80x87 form. */
418 addw EXTENDED_Ebias,EXP(%edi)
419 andw $0x7fff,EXP(%edi)
420
421fpu_reg_round_signed_special_exit:
422
423 cmpb SIGN_POS,PARAM5
424 je fpu_reg_round_special_exit
425
426 orw $0x8000,EXP(%edi) /* Negative sign for the result. */
427
428fpu_reg_round_special_exit:
429
430#ifndef NON_REENTRANT_FPU
431 popl %ebx /* adjust the stack pointer */
432#endif /* NON_REENTRANT_FPU */
433
434fpu_Arith_exit:
435 popl %ebx
436 popl %edi
437 popl %esi
438 leave
439 ret
440
441
442/*
443 * Set the FPU status flags to represent precision loss due to
444 * round-up.
445 */
446L_precision_lost_up:
447 push %edx
448 push %eax
449 call set_precision_flag_up
450 popl %eax
451 popl %edx
452 jmp L_no_precision_loss
453
454/*
455 * Set the FPU status flags to represent precision loss due to
456 * truncation.
457 */
458L_precision_lost_down:
459 push %edx
460 push %eax
461 call set_precision_flag_down
462 popl %eax
463 popl %edx
464 jmp L_no_precision_loss
465
466
467/*
468 * The number is a denormal (which might get rounded up to a normal)
469 * Shift the number right the required number of bits, which will
470 * have to be undone later...
471 */
472L_Make_denorm:
473 /* The action to be taken depends upon whether the underflow
474 exception is masked */
475 testb CW_Underflow,%cl /* Underflow mask. */
476 jz Unmasked_underflow /* Do not make a denormal. */
477
478 movb DENORMAL,FPU_denormal
479
480 pushl %ecx /* Save */
481 movw EXP_UNDER+1,%cx
482 subw EXP(%edi),%cx
483
484 cmpw $64,%cx /* shrd only works for 0..31 bits */
485 jnc Denorm_shift_more_than_63
486
487 cmpw $32,%cx /* shrd only works for 0..31 bits */
488 jnc Denorm_shift_more_than_32
489
490/*
491 * We got here without jumps by assuming that the most common requirement
492 * is for a small de-normalising shift.
493 * Shift by [1..31] bits
494 */
495 addw %cx,EXP(%edi)
496 orl %edx,%edx /* extension */
497 setne %ch /* Save whether %edx is non-zero */
498 xorl %edx,%edx
499 shrd %cl,%ebx,%edx
500 shrd %cl,%eax,%ebx
501 shr %cl,%eax
502 orb %ch,%dl
503 popl %ecx
504 jmp Denorm_done
505
506/* Shift by [32..63] bits */
507Denorm_shift_more_than_32:
508 addw %cx,EXP(%edi)
509 subb $32,%cl
510 orl %edx,%edx
511 setne %ch
512 orb %ch,%bl
513 xorl %edx,%edx
514 shrd %cl,%ebx,%edx
515 shrd %cl,%eax,%ebx
516 shr %cl,%eax
517 orl %edx,%edx /* test these 32 bits */
518 setne %cl
519 orb %ch,%bl
520 orb %cl,%bl
521 movl %ebx,%edx
522 movl %eax,%ebx
523 xorl %eax,%eax
524 popl %ecx
525 jmp Denorm_done
526
527/* Shift by [64..) bits */
528Denorm_shift_more_than_63:
529 cmpw $64,%cx
530 jne Denorm_shift_more_than_64
531
532/* Exactly 64 bit shift */
533 addw %cx,EXP(%edi)
534 xorl %ecx,%ecx
535 orl %edx,%edx
536 setne %cl
537 orl %ebx,%ebx
538 setne %ch
539 orb %ch,%cl
540 orb %cl,%al
541 movl %eax,%edx
542 xorl %eax,%eax
543 xorl %ebx,%ebx
544 popl %ecx
545 jmp Denorm_done
546
547Denorm_shift_more_than_64:
548 movw EXP_UNDER+1,EXP(%edi)
549/* This is easy, %eax must be non-zero, so.. */
550 movl $1,%edx
551 xorl %eax,%eax
552 xorl %ebx,%ebx
553 popl %ecx
554 jmp Denorm_done
555
556
557Unmasked_underflow:
558 movb UNMASKED_UNDERFLOW,FPU_denormal
559 jmp Denorm_done
560
561
562/* Undo the de-normalisation. */
563Normalise_result:
564 cmpb UNMASKED_UNDERFLOW,FPU_denormal
565 je Signal_underflow
566
567/* The number must be a denormal if we got here. */
568#ifdef PARANOID
569 /* But check it... just in case. */
570 cmpw EXP_UNDER+1,EXP(%edi)
571 jne L_norm_bugged
572#endif /* PARANOID */
573
574#ifdef PECULIAR_486
575 /*
576 * This implements a special feature of 80486 behaviour.
577 * Underflow will be signalled even if the number is
578 * not a denormal after rounding.
579 * This difference occurs only for masked underflow, and not
580 * in the unmasked case.
581 * Actual 80486 behaviour differs from this in some circumstances.
582 */
583 orl %eax,%eax /* ms bits */
584 js LPseudoDenormal /* Will be masked underflow */
585#else
586 orl %eax,%eax /* ms bits */
587 js L_Normalised /* No longer a denormal */
588#endif /* PECULIAR_486 */
589
590 jnz LDenormal_adj_exponent
591
592 orl %ebx,%ebx
593 jz L_underflow_to_zero /* The contents are zero */
594
595LDenormal_adj_exponent:
596 decw EXP(%edi)
597
598LPseudoDenormal:
599 testb $0xff,FPU_bits_lost /* bits lost == underflow */
600 movl TAG_Special,%edx
601 jz L_deNormalised
602
603 /* There must be a masked underflow */
604 push %eax
605 pushl EX_Underflow
606 call EXCEPTION
607 popl %eax
608 popl %eax
609 movl TAG_Special,%edx
610 jmp L_deNormalised
611
612
613/*
614 * The operations resulted in a number too small to represent.
615 * Masked response.
616 */
617L_underflow_to_zero:
618 push %eax
619 call set_precision_flag_down
620 popl %eax
621
622 push %eax
623 pushl EX_Underflow
624 call EXCEPTION
625 popl %eax
626 popl %eax
627
628/* Reduce the exponent to EXP_UNDER */
629 movw EXP_UNDER,EXP(%edi)
630 movl TAG_Zero,%edx
631 jmp L_Store_significand
632
633
634/* The operations resulted in a number too large to represent. */
635L_overflow:
636 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
637 push %edi
638 call arith_overflow
639 pop %edi
640 jmp fpu_reg_round_signed_special_exit
641
642
643Signal_underflow:
644 /* The number may have been changed to a non-denormal */
645 /* by the rounding operations. */
646 cmpw EXP_UNDER,EXP(%edi)
647 jle Do_unmasked_underflow
648
649 jmp L_Normalised
650
651Do_unmasked_underflow:
652 /* Increase the exponent by the magic number */
653 addw $(3*(1<<13)),EXP(%edi)
654 push %eax
655 pushl EX_Underflow
656 call EXCEPTION
657 popl %eax
658 popl %eax
659 jmp L_Normalised
660
661
662#ifdef PARANOID
663#ifdef PECULIAR_486
664L_bugged_denorm_486:
665 pushl EX_INTERNAL|0x236
666 call EXCEPTION
667 popl %ebx
668 jmp L_exception_exit
669#else
670L_bugged_denorm:
671 pushl EX_INTERNAL|0x230
672 call EXCEPTION
673 popl %ebx
674 jmp L_exception_exit
675#endif /* PECULIAR_486 */
676
677L_bugged_round24:
678 pushl EX_INTERNAL|0x231
679 call EXCEPTION
680 popl %ebx
681 jmp L_exception_exit
682
683L_bugged_round53:
684 pushl EX_INTERNAL|0x232
685 call EXCEPTION
686 popl %ebx
687 jmp L_exception_exit
688
689L_bugged_round64:
690 pushl EX_INTERNAL|0x233
691 call EXCEPTION
692 popl %ebx
693 jmp L_exception_exit
694
695L_norm_bugged:
696 pushl EX_INTERNAL|0x234
697 call EXCEPTION
698 popl %ebx
699 jmp L_exception_exit
700
701L_entry_bugged:
702 pushl EX_INTERNAL|0x235
703 call EXCEPTION
704 popl %ebx
705L_exception_exit:
706 mov $-1,%eax
707 jmp fpu_reg_round_special_exit
708#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S
new file mode 100644
index 000000000000..47c4c2434d85
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_add.S
@@ -0,0 +1,167 @@
1 .file "reg_u_add.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_add.S |
4 | |
5 | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the |
6 | result in a destination FPU_REG. |
7 | |
8 | Copyright (C) 1992,1993,1995,1997 |
9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
10 | E-mail billm@suburbia.net |
11 | |
12 | Call from C as: |
13 | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
14 | int control_w) |
15 | Return value is the tag of the answer, or-ed with FPU_Exception if |
16 | one was raised, or -1 on internal error. |
17 | |
18 +---------------------------------------------------------------------------*/
19
20/*
21 | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
22 | Takes two valid reg f.p. numbers (TAG_Valid), which are
23 | treated as unsigned numbers,
24 | and returns their sum as a TAG_Valid or TAG_Special f.p. number.
25 | The returned number is normalized.
26 | Basic checks are performed if PARANOID is defined.
27 */
28
29#include "exception.h"
30#include "fpu_emu.h"
31#include "control_w.h"
32
33.text
34ENTRY(FPU_u_add)
35 pushl %ebp
36 movl %esp,%ebp
37 pushl %esi
38 pushl %edi
39 pushl %ebx
40
41 movl PARAM1,%esi /* source 1 */
42 movl PARAM2,%edi /* source 2 */
43
44 movl PARAM6,%ecx
45 movl %ecx,%edx
46 subl PARAM7,%ecx /* exp1 - exp2 */
47 jge L_arg1_larger
48
49 /* num1 is smaller */
50 movl SIGL(%esi),%ebx
51 movl SIGH(%esi),%eax
52
53 movl %edi,%esi
54 movl PARAM7,%edx
55 negw %cx
56 jmp L_accum_loaded
57
58L_arg1_larger:
59 /* num1 has larger or equal exponent */
60 movl SIGL(%edi),%ebx
61 movl SIGH(%edi),%eax
62
63L_accum_loaded:
64 movl PARAM3,%edi /* destination */
65 movw %dx,EXP(%edi) /* Copy exponent to destination */
66
67 xorl %edx,%edx /* clear the extension */
68
69#ifdef PARANOID
70 testl $0x80000000,%eax
71 je L_bugged
72
73 testl $0x80000000,SIGH(%esi)
74 je L_bugged
75#endif /* PARANOID */
76
77/* The number to be shifted is in %eax:%ebx:%edx */
78 cmpw $32,%cx /* shrd only works for 0..31 bits */
79 jnc L_more_than_31
80
81/* less than 32 bits */
82 shrd %cl,%ebx,%edx
83 shrd %cl,%eax,%ebx
84 shr %cl,%eax
85 jmp L_shift_done
86
87L_more_than_31:
88 cmpw $64,%cx
89 jnc L_more_than_63
90
91 subb $32,%cl
92 jz L_exactly_32
93
94 shrd %cl,%eax,%edx
95 shr %cl,%eax
96 orl %ebx,%ebx
97 jz L_more_31_no_low /* none of the lowest bits is set */
98
99 orl $1,%edx /* record the fact in the extension */
100
101L_more_31_no_low:
102 movl %eax,%ebx
103 xorl %eax,%eax
104 jmp L_shift_done
105
106L_exactly_32:
107 movl %ebx,%edx
108 movl %eax,%ebx
109 xorl %eax,%eax
110 jmp L_shift_done
111
112L_more_than_63:
113 cmpw $65,%cx
114 jnc L_more_than_64
115
116 movl %eax,%edx
117 orl %ebx,%ebx
118 jz L_more_63_no_low
119
120 orl $1,%edx
121 jmp L_more_63_no_low
122
123L_more_than_64:
124 movl $1,%edx /* The shifted nr always at least one '1' */
125
126L_more_63_no_low:
127 xorl %ebx,%ebx
128 xorl %eax,%eax
129
130L_shift_done:
131 /* Now do the addition */
132 addl SIGL(%esi),%ebx
133 adcl SIGH(%esi),%eax
134 jnc L_round_the_result
135
136 /* Overflow, adjust the result */
137 rcrl $1,%eax
138 rcrl $1,%ebx
139 rcrl $1,%edx
140 jnc L_no_bit_lost
141
142 orl $1,%edx
143
144L_no_bit_lost:
145 incw EXP(%edi)
146
147L_round_the_result:
148 jmp fpu_reg_round /* Round the result */
149
150
151
152#ifdef PARANOID
153/* If we ever get here then we have problems! */
154L_bugged:
155 pushl EX_INTERNAL|0x201
156 call EXCEPTION
157 pop %ebx
158 movl $-1,%eax
159 jmp L_exit
160
161L_exit:
162 popl %ebx
163 popl %edi
164 popl %esi
165 leave
166 ret
167#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S
new file mode 100644
index 000000000000..cc00654b6f9a
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_div.S
@@ -0,0 +1,471 @@
1 .file "reg_u_div.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_div.S |
4 | |
5 | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@suburbia.net |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Call from C as: |
16 | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, |
17 | unsigned int control_word, char *sign) |
18 | |
19 | Does not compute the destination exponent, but does adjust it. |
20 | |
21 | Return value is the tag of the answer, or-ed with FPU_Exception if |
22 | one was raised, or -1 on internal error. |
23 +---------------------------------------------------------------------------*/
24
25#include "exception.h"
26#include "fpu_emu.h"
27#include "control_w.h"
28
29
30/* #define dSIGL(x) (x) */
31/* #define dSIGH(x) 4(x) */
32
33
34#ifndef NON_REENTRANT_FPU
35/*
36 Local storage on the stack:
37 Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
38 Overflow flag: ovfl_flag
39 */
40#define FPU_accum_3 -4(%ebp)
41#define FPU_accum_2 -8(%ebp)
42#define FPU_accum_1 -12(%ebp)
43#define FPU_accum_0 -16(%ebp)
44#define FPU_result_1 -20(%ebp)
45#define FPU_result_2 -24(%ebp)
46#define FPU_ovfl_flag -28(%ebp)
47
48#else
49.data
50/*
51 Local storage in a static area:
52 Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
53 Overflow flag: ovfl_flag
54 */
55 .align 4,0
56FPU_accum_3:
57 .long 0
58FPU_accum_2:
59 .long 0
60FPU_accum_1:
61 .long 0
62FPU_accum_0:
63 .long 0
64FPU_result_1:
65 .long 0
66FPU_result_2:
67 .long 0
68FPU_ovfl_flag:
69 .byte 0
70#endif /* NON_REENTRANT_FPU */
71
72#define REGA PARAM1
73#define REGB PARAM2
74#define DEST PARAM3
75
76.text
77ENTRY(FPU_u_div)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83
84 pushl %esi
85 pushl %edi
86 pushl %ebx
87
88 movl REGA,%esi
89 movl REGB,%ebx
90 movl DEST,%edi
91
92 movswl EXP(%esi),%edx
93 movswl EXP(%ebx),%eax
94 subl %eax,%edx
95 addl EXP_BIAS,%edx
96
97 /* A denormal and a large number can cause an exponent underflow */
98 cmpl EXP_WAY_UNDER,%edx
99 jg xExp_not_underflow
100
101 /* Set to a really low value allow correct handling */
102 movl EXP_WAY_UNDER,%edx
103
104xExp_not_underflow:
105
106 movw %dx,EXP(%edi)
107
108#ifdef PARANOID
109/* testl $0x80000000, SIGH(%esi) // Dividend */
110/* je L_bugged */
111 testl $0x80000000, SIGH(%ebx) /* Divisor */
112 je L_bugged
113#endif /* PARANOID */
114
115/* Check if the divisor can be treated as having just 32 bits */
116 cmpl $0,SIGL(%ebx)
117 jnz L_Full_Division /* Can't do a quick divide */
118
119/* We should be able to zip through the division here */
120 movl SIGH(%ebx),%ecx /* The divisor */
121 movl SIGH(%esi),%edx /* Dividend */
122 movl SIGL(%esi),%eax /* Dividend */
123
124 cmpl %ecx,%edx
125 setaeb FPU_ovfl_flag /* Keep a record */
126 jb L_no_adjust
127
128 subl %ecx,%edx /* Prevent the overflow */
129
130L_no_adjust:
131 /* Divide the 64 bit number by the 32 bit denominator */
132 divl %ecx
133 movl %eax,FPU_result_2
134
135 /* Work on the remainder of the first division */
136 xorl %eax,%eax
137 divl %ecx
138 movl %eax,FPU_result_1
139
140 /* Work on the remainder of the 64 bit division */
141 xorl %eax,%eax
142 divl %ecx
143
144 testb $255,FPU_ovfl_flag /* was the num > denom ? */
145 je L_no_overflow
146
147 /* Do the shifting here */
148 /* increase the exponent */
149 incw EXP(%edi)
150
151 /* shift the mantissa right one bit */
152 stc /* To set the ms bit */
153 rcrl FPU_result_2
154 rcrl FPU_result_1
155 rcrl %eax
156
157L_no_overflow:
158 jmp LRound_precision /* Do the rounding as required */
159
160
161/*---------------------------------------------------------------------------+
162 | Divide: Return arg1/arg2 to arg3. |
163 | |
164 | This routine does not use the exponents of arg1 and arg2, but does |
165 | adjust the exponent of arg3. |
166 | |
167 | The maximum returned value is (ignoring exponents) |
168 | .ffffffff ffffffff |
169 | ------------------ = 1.ffffffff fffffffe |
170 | .80000000 00000000 |
171 | and the minimum is |
172 | .80000000 00000000 |
173 | ------------------ = .80000000 00000001 (rounded) |
174 | .ffffffff ffffffff |
175 | |
176 +---------------------------------------------------------------------------*/
177
178
179L_Full_Division:
180 /* Save extended dividend in local register */
181 movl SIGL(%esi),%eax
182 movl %eax,FPU_accum_2
183 movl SIGH(%esi),%eax
184 movl %eax,FPU_accum_3
185 xorl %eax,%eax
186 movl %eax,FPU_accum_1 /* zero the extension */
187 movl %eax,FPU_accum_0 /* zero the extension */
188
189 movl SIGL(%esi),%eax /* Get the current num */
190 movl SIGH(%esi),%edx
191
192/*----------------------------------------------------------------------*/
193/* Initialization done.
194 Do the first 32 bits. */
195
196 movb $0,FPU_ovfl_flag
197 cmpl SIGH(%ebx),%edx /* Test for imminent overflow */
198 jb LLess_than_1
199 ja LGreater_than_1
200
201 cmpl SIGL(%ebx),%eax
202 jb LLess_than_1
203
204LGreater_than_1:
205/* The dividend is greater or equal, would cause overflow */
206 setaeb FPU_ovfl_flag /* Keep a record */
207
208 subl SIGL(%ebx),%eax
209 sbbl SIGH(%ebx),%edx /* Prevent the overflow */
210 movl %eax,FPU_accum_2
211 movl %edx,FPU_accum_3
212
213LLess_than_1:
214/* At this point, we have a dividend < divisor, with a record of
215 adjustment in FPU_ovfl_flag */
216
217 /* We will divide by a number which is too large */
218 movl SIGH(%ebx),%ecx
219 addl $1,%ecx
220 jnc LFirst_div_not_1
221
222 /* here we need to divide by 100000000h,
223 i.e., no division at all.. */
224 mov %edx,%eax
225 jmp LFirst_div_done
226
227LFirst_div_not_1:
228 divl %ecx /* Divide the numerator by the augmented
229 denom ms dw */
230
231LFirst_div_done:
232 movl %eax,FPU_result_2 /* Put the result in the answer */
233
234 mull SIGH(%ebx) /* mul by the ms dw of the denom */
235
236 subl %eax,FPU_accum_2 /* Subtract from the num local reg */
237 sbbl %edx,FPU_accum_3
238
239 movl FPU_result_2,%eax /* Get the result back */
240 mull SIGL(%ebx) /* now mul the ls dw of the denom */
241
242 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
243 sbbl %edx,FPU_accum_2
244 sbbl $0,FPU_accum_3
245 je LDo_2nd_32_bits /* Must check for non-zero result here */
246
247#ifdef PARANOID
248 jb L_bugged_1
249#endif /* PARANOID */
250
251 /* need to subtract another once of the denom */
252 incl FPU_result_2 /* Correct the answer */
253
254 movl SIGL(%ebx),%eax
255 movl SIGH(%ebx),%edx
256 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
257 sbbl %edx,FPU_accum_2
258
259#ifdef PARANOID
260 sbbl $0,FPU_accum_3
261 jne L_bugged_1 /* Must check for non-zero result here */
262#endif /* PARANOID */
263
264/*----------------------------------------------------------------------*/
265/* Half of the main problem is done, there is just a reduced numerator
266 to handle now.
267 Work with the second 32 bits, FPU_accum_0 not used from now on */
268LDo_2nd_32_bits:
269 movl FPU_accum_2,%edx /* get the reduced num */
270 movl FPU_accum_1,%eax
271
272 /* need to check for possible subsequent overflow */
273 cmpl SIGH(%ebx),%edx
274 jb LDo_2nd_div
275 ja LPrevent_2nd_overflow
276
277 cmpl SIGL(%ebx),%eax
278 jb LDo_2nd_div
279
280LPrevent_2nd_overflow:
281/* The numerator is greater or equal, would cause overflow */
282 /* prevent overflow */
283 subl SIGL(%ebx),%eax
284 sbbl SIGH(%ebx),%edx
285 movl %edx,FPU_accum_2
286 movl %eax,FPU_accum_1
287
288 incl FPU_result_2 /* Reflect the subtraction in the answer */
289
290#ifdef PARANOID
291 je L_bugged_2 /* Can't bump the result to 1.0 */
292#endif /* PARANOID */
293
294LDo_2nd_div:
295 cmpl $0,%ecx /* augmented denom msw */
296 jnz LSecond_div_not_1
297
298 /* %ecx == 0, we are dividing by 1.0 */
299 mov %edx,%eax
300 jmp LSecond_div_done
301
302LSecond_div_not_1:
303 divl %ecx /* Divide the numerator by the denom ms dw */
304
305LSecond_div_done:
306 movl %eax,FPU_result_1 /* Put the result in the answer */
307
308 mull SIGH(%ebx) /* mul by the ms dw of the denom */
309
310 subl %eax,FPU_accum_1 /* Subtract from the num local reg */
311 sbbl %edx,FPU_accum_2
312
313#ifdef PARANOID
314 jc L_bugged_2
315#endif /* PARANOID */
316
317 movl FPU_result_1,%eax /* Get the result back */
318 mull SIGL(%ebx) /* now mul the ls dw of the denom */
319
320 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
321 sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
322 sbbl $0,FPU_accum_2
323
324#ifdef PARANOID
325 jc L_bugged_2
326#endif /* PARANOID */
327
328 jz LDo_3rd_32_bits
329
330#ifdef PARANOID
331 cmpl $1,FPU_accum_2
332 jne L_bugged_2
333#endif /* PARANOID */
334
335 /* need to subtract another once of the denom */
336 movl SIGL(%ebx),%eax
337 movl SIGH(%ebx),%edx
338 subl %eax,FPU_accum_0 /* Subtract from the num local reg */
339 sbbl %edx,FPU_accum_1
340 sbbl $0,FPU_accum_2
341
342#ifdef PARANOID
343 jc L_bugged_2
344 jne L_bugged_2
345#endif /* PARANOID */
346
347 addl $1,FPU_result_1 /* Correct the answer */
348 adcl $0,FPU_result_2
349
350#ifdef PARANOID
351 jc L_bugged_2 /* Must check for non-zero result here */
352#endif /* PARANOID */
353
354/*----------------------------------------------------------------------*/
355/* The division is essentially finished here, we just need to perform
356 tidying operations.
357 Deal with the 3rd 32 bits */
358LDo_3rd_32_bits:
359 movl FPU_accum_1,%edx /* get the reduced num */
360 movl FPU_accum_0,%eax
361
362 /* need to check for possible subsequent overflow */
363 cmpl SIGH(%ebx),%edx /* denom */
364 jb LRound_prep
365 ja LPrevent_3rd_overflow
366
367 cmpl SIGL(%ebx),%eax /* denom */
368 jb LRound_prep
369
370LPrevent_3rd_overflow:
371 /* prevent overflow */
372 subl SIGL(%ebx),%eax
373 sbbl SIGH(%ebx),%edx
374 movl %edx,FPU_accum_1
375 movl %eax,FPU_accum_0
376
377 addl $1,FPU_result_1 /* Reflect the subtraction in the answer */
378 adcl $0,FPU_result_2
379 jne LRound_prep
380 jnc LRound_prep
381
382 /* This is a tricky spot, there is an overflow of the answer */
383 movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */
384
385LRound_prep:
386/*
387 * Prepare for rounding.
388 * To test for rounding, we just need to compare 2*accum with the
389 * denom.
390 */
391 movl FPU_accum_0,%ecx
392 movl FPU_accum_1,%edx
393 movl %ecx,%eax
394 orl %edx,%eax
395 jz LRound_ovfl /* The accumulator contains zero. */
396
397 /* Multiply by 2 */
398 clc
399 rcll $1,%ecx
400 rcll $1,%edx
401 jc LRound_large /* No need to compare, denom smaller */
402
403 subl SIGL(%ebx),%ecx
404 sbbl SIGH(%ebx),%edx
405 jnc LRound_not_small
406
407 movl $0x70000000,%eax /* Denom was larger */
408 jmp LRound_ovfl
409
410LRound_not_small:
411 jnz LRound_large
412
413 movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */
414 jmp LRound_ovfl
415
416LRound_large:
417 movl $0xff000000,%eax /* Denom was smaller */
418
419LRound_ovfl:
420/* We are now ready to deal with rounding, but first we must get
421 the bits properly aligned */
422 testb $255,FPU_ovfl_flag /* was the num > denom ? */
423 je LRound_precision
424
425 incw EXP(%edi)
426
427 /* shift the mantissa right one bit */
428 stc /* Will set the ms bit */
429 rcrl FPU_result_2
430 rcrl FPU_result_1
431 rcrl %eax
432
433/* Round the result as required */
434LRound_precision:
435 decw EXP(%edi) /* binary point between 1st & 2nd bits */
436
437 movl %eax,%edx
438 movl FPU_result_1,%ebx
439 movl FPU_result_2,%eax
440 jmp fpu_reg_round
441
442
443#ifdef PARANOID
444/* The logic is wrong if we got here */
445L_bugged:
446 pushl EX_INTERNAL|0x202
447 call EXCEPTION
448 pop %ebx
449 jmp L_exit
450
451L_bugged_1:
452 pushl EX_INTERNAL|0x203
453 call EXCEPTION
454 pop %ebx
455 jmp L_exit
456
457L_bugged_2:
458 pushl EX_INTERNAL|0x204
459 call EXCEPTION
460 pop %ebx
461 jmp L_exit
462
463L_exit:
464 movl $-1,%eax
465 popl %ebx
466 popl %edi
467 popl %esi
468
469 leave
470 ret
471#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S
new file mode 100644
index 000000000000..973f12af97df
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_mul.S
@@ -0,0 +1,148 @@
1 .file "reg_u_mul.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_mul.S |
4 | |
5 | Core multiplication routine |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@suburbia.net |
10 | |
11 | |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Basic multiplication routine. |
16 | Does not check the resulting exponent for overflow/underflow |
17 | |
18 | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); |
19 | |
20 | Internal working is at approx 128 bits. |
21 | Result is rounded to nearest 53 or 64 bits, using "nearest or even". |
22 +---------------------------------------------------------------------------*/
23
24#include "exception.h"
25#include "fpu_emu.h"
26#include "control_w.h"
27
28
29
30#ifndef NON_REENTRANT_FPU
31/* Local storage on the stack: */
32#define FPU_accum_0 -4(%ebp) /* ms word */
33#define FPU_accum_1 -8(%ebp)
34
35#else
36/* Local storage in a static area: */
37.data
38 .align 4,0
39FPU_accum_0:
40 .long 0
41FPU_accum_1:
42 .long 0
43#endif /* NON_REENTRANT_FPU */
44
45
46.text
47ENTRY(FPU_u_mul)
48 pushl %ebp
49 movl %esp,%ebp
50#ifndef NON_REENTRANT_FPU
51 subl $8,%esp
52#endif /* NON_REENTRANT_FPU */
53
54 pushl %esi
55 pushl %edi
56 pushl %ebx
57
58 movl PARAM1,%esi
59 movl PARAM2,%edi
60
61#ifdef PARANOID
62 testl $0x80000000,SIGH(%esi)
63 jz L_bugged
64 testl $0x80000000,SIGH(%edi)
65 jz L_bugged
66#endif /* PARANOID */
67
68 xorl %ecx,%ecx
69 xorl %ebx,%ebx
70
71 movl SIGL(%esi),%eax
72 mull SIGL(%edi)
73 movl %eax,FPU_accum_0
74 movl %edx,FPU_accum_1
75
76 movl SIGL(%esi),%eax
77 mull SIGH(%edi)
78 addl %eax,FPU_accum_1
79 adcl %edx,%ebx
80/* adcl $0,%ecx // overflow here is not possible */
81
82 movl SIGH(%esi),%eax
83 mull SIGL(%edi)
84 addl %eax,FPU_accum_1
85 adcl %edx,%ebx
86 adcl $0,%ecx
87
88 movl SIGH(%esi),%eax
89 mull SIGH(%edi)
90 addl %eax,%ebx
91 adcl %edx,%ecx
92
93 /* Get the sum of the exponents. */
94 movl PARAM6,%eax
95 subl EXP_BIAS-1,%eax
96
97 /* Two denormals can cause an exponent underflow */
98 cmpl EXP_WAY_UNDER,%eax
99 jg Exp_not_underflow
100
101 /* Set to a really low value allow correct handling */
102 movl EXP_WAY_UNDER,%eax
103
104Exp_not_underflow:
105
106/* Have now finished with the sources */
107 movl PARAM3,%edi /* Point to the destination */
108 movw %ax,EXP(%edi)
109
110/* Now make sure that the result is normalized */
111 testl $0x80000000,%ecx
112 jnz LResult_Normalised
113
114 /* Normalize by shifting left one bit */
115 shll $1,FPU_accum_0
116 rcll $1,FPU_accum_1
117 rcll $1,%ebx
118 rcll $1,%ecx
119 decw EXP(%edi)
120
121LResult_Normalised:
122 movl FPU_accum_0,%eax
123 movl FPU_accum_1,%edx
124 orl %eax,%eax
125 jz L_extent_zero
126
127 orl $1,%edx
128
129L_extent_zero:
130 movl %ecx,%eax
131 jmp fpu_reg_round
132
133
134#ifdef PARANOID
135L_bugged:
136 pushl EX_INTERNAL|0x205
137 call EXCEPTION
138 pop %ebx
139 jmp L_exit
140
141L_exit:
142 popl %ebx
143 popl %edi
144 popl %esi
145 leave
146 ret
147#endif /* PARANOID */
148
diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S
new file mode 100644
index 000000000000..1b6c24801d22
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_sub.S
@@ -0,0 +1,272 @@
1 .file "reg_u_sub.S"
2/*---------------------------------------------------------------------------+
3 | reg_u_sub.S |
4 | |
5 | Core floating point subtraction routine. |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 | E-mail billm@suburbia.net |
10 | |
11 | Call from C as: |
12 | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
13 | int control_w) |
14 | Return value is the tag of the answer, or-ed with FPU_Exception if |
15 | one was raised, or -1 on internal error. |
16 | |
17 +---------------------------------------------------------------------------*/
18
19/*
20 | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
21 | Takes two valid reg f.p. numbers (TAG_Valid), which are
22 | treated as unsigned numbers,
23 | and returns their difference as a TAG_Valid or TAG_Zero f.p.
24 | number.
25 | The first number (arg1) must be the larger.
26 | The returned number is normalized.
27 | Basic checks are performed if PARANOID is defined.
28 */
29
30#include "exception.h"
31#include "fpu_emu.h"
32#include "control_w.h"
33
34.text
35ENTRY(FPU_u_sub)
36 pushl %ebp
37 movl %esp,%ebp
38 pushl %esi
39 pushl %edi
40 pushl %ebx
41
42 movl PARAM1,%esi /* source 1 */
43 movl PARAM2,%edi /* source 2 */
44
45 movl PARAM6,%ecx
46 subl PARAM7,%ecx /* exp1 - exp2 */
47
48#ifdef PARANOID
49 /* source 2 is always smaller than source 1 */
50 js L_bugged_1
51
52 testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */
53 je L_bugged_2
54
55 testl $0x80000000,SIGH(%esi)
56 je L_bugged_2
57#endif /* PARANOID */
58
59/*--------------------------------------+
60 | Form a register holding the |
61 | smaller number |
62 +--------------------------------------*/
63 movl SIGH(%edi),%eax /* register ms word */
64 movl SIGL(%edi),%ebx /* register ls word */
65
66 movl PARAM3,%edi /* destination */
67 movl PARAM6,%edx
68 movw %dx,EXP(%edi) /* Copy exponent to destination */
69
70 xorl %edx,%edx /* register extension */
71
72/*--------------------------------------+
73 | Shift the temporary register |
74 | right the required number of |
75 | places. |
76 +--------------------------------------*/
77
78 cmpw $32,%cx /* shrd only works for 0..31 bits */
79 jnc L_more_than_31
80
81/* less than 32 bits */
82 shrd %cl,%ebx,%edx
83 shrd %cl,%eax,%ebx
84 shr %cl,%eax
85 jmp L_shift_done
86
87L_more_than_31:
88 cmpw $64,%cx
89 jnc L_more_than_63
90
91 subb $32,%cl
92 jz L_exactly_32
93
94 shrd %cl,%eax,%edx
95 shr %cl,%eax
96 orl %ebx,%ebx
97 jz L_more_31_no_low /* none of the lowest bits is set */
98
99 orl $1,%edx /* record the fact in the extension */
100
101L_more_31_no_low:
102 movl %eax,%ebx
103 xorl %eax,%eax
104 jmp L_shift_done
105
106L_exactly_32:
107 movl %ebx,%edx
108 movl %eax,%ebx
109 xorl %eax,%eax
110 jmp L_shift_done
111
112L_more_than_63:
113 cmpw $65,%cx
114 jnc L_more_than_64
115
116 /* Shift right by 64 bits */
117 movl %eax,%edx
118 orl %ebx,%ebx
119 jz L_more_63_no_low
120
121 orl $1,%edx
122 jmp L_more_63_no_low
123
124L_more_than_64:
125 jne L_more_than_65
126
127 /* Shift right by 65 bits */
128 /* Carry is clear if we get here */
129 movl %eax,%edx
130 rcrl %edx
131 jnc L_shift_65_nc
132
133 orl $1,%edx
134 jmp L_more_63_no_low
135
136L_shift_65_nc:
137 orl %ebx,%ebx
138 jz L_more_63_no_low
139
140 orl $1,%edx
141 jmp L_more_63_no_low
142
143L_more_than_65:
144 movl $1,%edx /* The shifted nr always at least one '1' */
145
146L_more_63_no_low:
147 xorl %ebx,%ebx
148 xorl %eax,%eax
149
150L_shift_done:
151L_subtr:
152/*------------------------------+
153 | Do the subtraction |
154 +------------------------------*/
155 xorl %ecx,%ecx
156 subl %edx,%ecx
157 movl %ecx,%edx
158 movl SIGL(%esi),%ecx
159 sbbl %ebx,%ecx
160 movl %ecx,%ebx
161 movl SIGH(%esi),%ecx
162 sbbl %eax,%ecx
163 movl %ecx,%eax
164
165#ifdef PARANOID
166 /* We can never get a borrow */
167 jc L_bugged
168#endif /* PARANOID */
169
170/*--------------------------------------+
171 | Normalize the result |
172 +--------------------------------------*/
173 testl $0x80000000,%eax
174 jnz L_round /* no shifting needed */
175
176 orl %eax,%eax
177 jnz L_shift_1 /* shift left 1 - 31 bits */
178
179 orl %ebx,%ebx
180 jnz L_shift_32 /* shift left 32 - 63 bits */
181
182/*
183 * A rare case, the only one which is non-zero if we got here
184 * is: 1000000 .... 0000
185 * -0111111 .... 1111 1
186 * --------------------
187 * 0000000 .... 0000 1
188 */
189
190 cmpl $0x80000000,%edx
191 jnz L_must_be_zero
192
193 /* Shift left 64 bits */
194 subw $64,EXP(%edi)
195 xchg %edx,%eax
196 jmp fpu_reg_round
197
198L_must_be_zero:
199#ifdef PARANOID
200 orl %edx,%edx
201 jnz L_bugged_3
202#endif /* PARANOID */
203
204 /* The result is zero */
205 movw $0,EXP(%edi) /* exponent */
206 movl $0,SIGL(%edi)
207 movl $0,SIGH(%edi)
208 movl TAG_Zero,%eax
209 jmp L_exit
210
211L_shift_32:
212 movl %ebx,%eax
213 movl %edx,%ebx
214 movl $0,%edx
215 subw $32,EXP(%edi) /* Can get underflow here */
216
217/* We need to shift left by 1 - 31 bits */
218L_shift_1:
219 bsrl %eax,%ecx /* get the required shift in %ecx */
220 subl $31,%ecx
221 negl %ecx
222 shld %cl,%ebx,%eax
223 shld %cl,%edx,%ebx
224 shl %cl,%edx
225 subw %cx,EXP(%edi) /* Can get underflow here */
226
227L_round:
228 jmp fpu_reg_round /* Round the result */
229
230
231#ifdef PARANOID
232L_bugged_1:
233 pushl EX_INTERNAL|0x206
234 call EXCEPTION
235 pop %ebx
236 jmp L_error_exit
237
238L_bugged_2:
239 pushl EX_INTERNAL|0x209
240 call EXCEPTION
241 pop %ebx
242 jmp L_error_exit
243
244L_bugged_3:
245 pushl EX_INTERNAL|0x210
246 call EXCEPTION
247 pop %ebx
248 jmp L_error_exit
249
250L_bugged_4:
251 pushl EX_INTERNAL|0x211
252 call EXCEPTION
253 pop %ebx
254 jmp L_error_exit
255
256L_bugged:
257 pushl EX_INTERNAL|0x212
258 call EXCEPTION
259 pop %ebx
260 jmp L_error_exit
261
262L_error_exit:
263 movl $-1,%eax
264
265#endif /* PARANOID */
266
267L_exit:
268 popl %ebx
269 popl %edi
270 popl %esi
271 leave
272 ret
diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S
new file mode 100644
index 000000000000..bbe0e87718e4
--- /dev/null
+++ b/arch/i386/math-emu/round_Xsig.S
@@ -0,0 +1,141 @@
1/*---------------------------------------------------------------------------+
2 | round_Xsig.S |
3 | |
4 | Copyright (C) 1992,1993,1994,1995 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
7 | |
8 | Normalize and round a 12 byte quantity. |
9 | Call from C as: |
10 | int round_Xsig(Xsig *n) |
11 | |
12 | Normalize a 12 byte quantity. |
13 | Call from C as: |
14 | int norm_Xsig(Xsig *n) |
15 | |
16 | Each function returns the size of the shift (nr of bits). |
17 | |
18 +---------------------------------------------------------------------------*/
19 .file "round_Xsig.S"
20
21#include "fpu_emu.h"
22
23
24.text
25ENTRY(round_Xsig)
26 pushl %ebp
27 movl %esp,%ebp
28 pushl %ebx /* Reserve some space */
29 pushl %ebx
30 pushl %esi
31
32 movl PARAM1,%esi
33
34 movl 8(%esi),%edx
35 movl 4(%esi),%ebx
36 movl (%esi),%eax
37
38 movl $0,-4(%ebp)
39
40 orl %edx,%edx /* ms bits */
41 js L_round /* Already normalized */
42 jnz L_shift_1 /* Shift left 1 - 31 bits */
43
44 movl %ebx,%edx
45 movl %eax,%ebx
46 xorl %eax,%eax
47 movl $-32,-4(%ebp)
48
49/* We need to shift left by 1 - 31 bits */
50L_shift_1:
51 bsrl %edx,%ecx /* get the required shift in %ecx */
52 subl $31,%ecx
53 negl %ecx
54 subl %ecx,-4(%ebp)
55 shld %cl,%ebx,%edx
56 shld %cl,%eax,%ebx
57 shl %cl,%eax
58
59L_round:
60 testl $0x80000000,%eax
61 jz L_exit
62
63 addl $1,%ebx
64 adcl $0,%edx
65 jnz L_exit
66
67 movl $0x80000000,%edx
68 incl -4(%ebp)
69
70L_exit:
71 movl %edx,8(%esi)
72 movl %ebx,4(%esi)
73 movl %eax,(%esi)
74
75 movl -4(%ebp),%eax
76
77 popl %esi
78 popl %ebx
79 leave
80 ret
81
82
83
84
85ENTRY(norm_Xsig)
86 pushl %ebp
87 movl %esp,%ebp
88 pushl %ebx /* Reserve some space */
89 pushl %ebx
90 pushl %esi
91
92 movl PARAM1,%esi
93
94 movl 8(%esi),%edx
95 movl 4(%esi),%ebx
96 movl (%esi),%eax
97
98 movl $0,-4(%ebp)
99
100 orl %edx,%edx /* ms bits */
101 js L_n_exit /* Already normalized */
102 jnz L_n_shift_1 /* Shift left 1 - 31 bits */
103
104 movl %ebx,%edx
105 movl %eax,%ebx
106 xorl %eax,%eax
107 movl $-32,-4(%ebp)
108
109 orl %edx,%edx /* ms bits */
110 js L_n_exit /* Normalized now */
111 jnz L_n_shift_1 /* Shift left 1 - 31 bits */
112
113 movl %ebx,%edx
114 movl %eax,%ebx
115 xorl %eax,%eax
116 addl $-32,-4(%ebp)
117 jmp L_n_exit /* Might not be normalized,
118 but shift no more. */
119
120/* We need to shift left by 1 - 31 bits */
121L_n_shift_1:
122 bsrl %edx,%ecx /* get the required shift in %ecx */
123 subl $31,%ecx
124 negl %ecx
125 subl %ecx,-4(%ebp)
126 shld %cl,%ebx,%edx
127 shld %cl,%eax,%ebx
128 shl %cl,%eax
129
130L_n_exit:
131 movl %edx,8(%esi)
132 movl %ebx,4(%esi)
133 movl %eax,(%esi)
134
135 movl -4(%ebp),%eax
136
137 popl %esi
138 popl %ebx
139 leave
140 ret
141
diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S
new file mode 100644
index 000000000000..31cdd118e918
--- /dev/null
+++ b/arch/i386/math-emu/shr_Xsig.S
@@ -0,0 +1,87 @@
1 .file "shr_Xsig.S"
2/*---------------------------------------------------------------------------+
3 | shr_Xsig.S |
4 | |
5 | 12 byte right shift function |
6 | |
7 | Copyright (C) 1992,1994,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | Call from C as: |
12 | void shr_Xsig(Xsig *arg, unsigned nr) |
13 | |
14 | Extended shift right function. |
15 | Fastest for small shifts. |
16 | Shifts the 12 byte quantity pointed to by the first arg (arg) |
17 | right by the number of bits specified by the second arg (nr). |
18 | |
19 +---------------------------------------------------------------------------*/
20
21#include "fpu_emu.h"
22
23.text
24ENTRY(shr_Xsig)
25 push %ebp
26 movl %esp,%ebp
27 pushl %esi
28 movl PARAM2,%ecx
29 movl PARAM1,%esi
30 cmpl $32,%ecx /* shrd only works for 0..31 bits */
31 jnc L_more_than_31
32
33/* less than 32 bits */
34 pushl %ebx
35 movl (%esi),%eax /* lsl */
36 movl 4(%esi),%ebx /* midl */
37 movl 8(%esi),%edx /* msl */
38 shrd %cl,%ebx,%eax
39 shrd %cl,%edx,%ebx
40 shr %cl,%edx
41 movl %eax,(%esi)
42 movl %ebx,4(%esi)
43 movl %edx,8(%esi)
44 popl %ebx
45 popl %esi
46 leave
47 ret
48
49L_more_than_31:
50 cmpl $64,%ecx
51 jnc L_more_than_63
52
53 subb $32,%cl
54 movl 4(%esi),%eax /* midl */
55 movl 8(%esi),%edx /* msl */
56 shrd %cl,%edx,%eax
57 shr %cl,%edx
58 movl %eax,(%esi)
59 movl %edx,4(%esi)
60 movl $0,8(%esi)
61 popl %esi
62 leave
63 ret
64
65L_more_than_63:
66 cmpl $96,%ecx
67 jnc L_more_than_95
68
69 subb $64,%cl
70 movl 8(%esi),%eax /* msl */
71 shr %cl,%eax
72 xorl %edx,%edx
73 movl %eax,(%esi)
74 movl %edx,4(%esi)
75 movl %edx,8(%esi)
76 popl %esi
77 leave
78 ret
79
80L_more_than_95:
81 xorl %eax,%eax
82 movl %eax,(%esi)
83 movl %eax,4(%esi)
84 movl %eax,8(%esi)
85 popl %esi
86 leave
87 ret
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h
new file mode 100644
index 000000000000..78d7b7689dd6
--- /dev/null
+++ b/arch/i386/math-emu/status_w.h
@@ -0,0 +1,65 @@
1/*---------------------------------------------------------------------------+
2 | status_w.h |
3 | |
4 | Copyright (C) 1992,1993 |
5 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
6 | Australia. E-mail billm@vaxc.cc.monash.edu.au |
7 | |
8 +---------------------------------------------------------------------------*/
9
10#ifndef _STATUS_H_
11#define _STATUS_H_
12
13#include "fpu_emu.h" /* for definition of PECULIAR_486 */
14
15#ifdef __ASSEMBLY__
16#define Const__(x) $##x
17#else
18#define Const__(x) x
19#endif
20
21#define SW_Backward Const__(0x8000) /* backward compatibility */
22#define SW_C3 Const__(0x4000) /* condition bit 3 */
23#define SW_Top Const__(0x3800) /* top of stack */
24#define SW_Top_Shift Const__(11) /* shift for top of stack bits */
25#define SW_C2 Const__(0x0400) /* condition bit 2 */
26#define SW_C1 Const__(0x0200) /* condition bit 1 */
27#define SW_C0 Const__(0x0100) /* condition bit 0 */
28#define SW_Summary Const__(0x0080) /* exception summary */
29#define SW_Stack_Fault Const__(0x0040) /* stack fault */
30#define SW_Precision Const__(0x0020) /* loss of precision */
31#define SW_Underflow Const__(0x0010) /* underflow */
32#define SW_Overflow Const__(0x0008) /* overflow */
33#define SW_Zero_Div Const__(0x0004) /* divide by zero */
34#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */
35#define SW_Invalid Const__(0x0001) /* invalid operation */
36
37#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
38
39#ifndef __ASSEMBLY__
40
41#define COMP_A_gt_B 1
42#define COMP_A_eq_B 2
43#define COMP_A_lt_B 3
44#define COMP_No_Comp 4
45#define COMP_Denormal 0x20
46#define COMP_NaN 0x40
47#define COMP_SNaN 0x80
48
49#define status_word() \
50 ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
51#define setcc(cc) ({ \
52 partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \
53 partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); })
54
55#ifdef PECULIAR_486
56 /* Default, this conveys no information, but an 80486 does it. */
57 /* Clear the SW_C1 bit, "other bits undefined". */
58# define clear_C1() { partial_status &= ~SW_C1; }
59# else
60# define clear_C1()
61#endif /* PECULIAR_486 */
62
63#endif /* __ASSEMBLY__ */
64
65#endif /* _STATUS_H_ */
diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h
new file mode 100644
index 000000000000..a0d73a1d2b67
--- /dev/null
+++ b/arch/i386/math-emu/version.h
@@ -0,0 +1,12 @@
1/*---------------------------------------------------------------------------+
2 | version.h |
3 | |
4 | |
5 | Copyright (C) 1992,1993,1994,1996,1997,1999 |
6 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
7 | E-mail billm@melbpc.org.au |
8 | |
9 | |
10 +---------------------------------------------------------------------------*/
11
12#define FPU_VERSION "wm-FPU-emu version 2.01"
diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S
new file mode 100644
index 000000000000..518428317985
--- /dev/null
+++ b/arch/i386/math-emu/wm_shrx.S
@@ -0,0 +1,204 @@
1 .file "wm_shrx.S"
2/*---------------------------------------------------------------------------+
3 | wm_shrx.S |
4 | |
5 | 64 bit right shift functions |
6 | |
7 | Copyright (C) 1992,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | Call from C as: |
12 | unsigned FPU_shrx(void *arg1, unsigned arg2) |
13 | and |
14 | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
15 | |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19
20.text
21/*---------------------------------------------------------------------------+
22 | unsigned FPU_shrx(void *arg1, unsigned arg2) |
23 | |
24 | Extended shift right function. |
25 | Fastest for small shifts. |
26 | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
27 | right by the number of bits specified by the second arg (arg2). |
28 | Forms a 96 bit quantity from the 64 bit arg and eax: |
29 | [ 64 bit arg ][ eax ] |
30 | shift right ---------> |
31 | The eax register is initialized to 0 before the shifting. |
32 | Results returned in the 64 bit arg and eax. |
33 +---------------------------------------------------------------------------*/
34
35ENTRY(FPU_shrx)
36 push %ebp
37 movl %esp,%ebp
38 pushl %esi
39 movl PARAM2,%ecx
40 movl PARAM1,%esi
41 cmpl $32,%ecx /* shrd only works for 0..31 bits */
42 jnc L_more_than_31
43
44/* less than 32 bits */
45 pushl %ebx
46 movl (%esi),%ebx /* lsl */
47 movl 4(%esi),%edx /* msl */
48 xorl %eax,%eax /* extension */
49 shrd %cl,%ebx,%eax
50 shrd %cl,%edx,%ebx
51 shr %cl,%edx
52 movl %ebx,(%esi)
53 movl %edx,4(%esi)
54 popl %ebx
55 popl %esi
56 leave
57 ret
58
59L_more_than_31:
60 cmpl $64,%ecx
61 jnc L_more_than_63
62
63 subb $32,%cl
64 movl (%esi),%eax /* lsl */
65 movl 4(%esi),%edx /* msl */
66 shrd %cl,%edx,%eax
67 shr %cl,%edx
68 movl %edx,(%esi)
69 movl $0,4(%esi)
70 popl %esi
71 leave
72 ret
73
74L_more_than_63:
75 cmpl $96,%ecx
76 jnc L_more_than_95
77
78 subb $64,%cl
79 movl 4(%esi),%eax /* msl */
80 shr %cl,%eax
81 xorl %edx,%edx
82 movl %edx,(%esi)
83 movl %edx,4(%esi)
84 popl %esi
85 leave
86 ret
87
88L_more_than_95:
89 xorl %eax,%eax
90 movl %eax,(%esi)
91 movl %eax,4(%esi)
92 popl %esi
93 leave
94 ret
95
96
97/*---------------------------------------------------------------------------+
98 | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
99 | |
100 | Extended shift right function (optimized for small floating point |
101 | integers). |
102 | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
103 | right by the number of bits specified by the second arg (arg2). |
104 | Forms a 96 bit quantity from the 64 bit arg and eax: |
105 | [ 64 bit arg ][ eax ] |
106 | shift right ---------> |
107 | The eax register is initialized to 0 before the shifting. |
108 | The lower 8 bits of eax are lost and replaced by a flag which is |
109 | set (to 0x01) if any bit, apart from the first one, is set in the |
110 | part which has been shifted out of the arg. |
111 | Results returned in the 64 bit arg and eax. |
112 +---------------------------------------------------------------------------*/
113ENTRY(FPU_shrxs)
114 push %ebp
115 movl %esp,%ebp
116 pushl %esi
117 pushl %ebx
118 movl PARAM2,%ecx
119 movl PARAM1,%esi
120 cmpl $64,%ecx /* shrd only works for 0..31 bits */
121 jnc Ls_more_than_63
122
123 cmpl $32,%ecx /* shrd only works for 0..31 bits */
124 jc Ls_less_than_32
125
126/* We got here without jumps by assuming that the most common requirement
127 is for small integers */
128/* Shift by [32..63] bits */
129 subb $32,%cl
130 movl (%esi),%eax /* lsl */
131 movl 4(%esi),%edx /* msl */
132 xorl %ebx,%ebx
133 shrd %cl,%eax,%ebx
134 shrd %cl,%edx,%eax
135 shr %cl,%edx
136 orl %ebx,%ebx /* test these 32 bits */
137 setne %bl
138 test $0x7fffffff,%eax /* and 31 bits here */
139 setne %bh
140 orw %bx,%bx /* Any of the 63 bit set ? */
141 setne %al
142 movl %edx,(%esi)
143 movl $0,4(%esi)
144 popl %ebx
145 popl %esi
146 leave
147 ret
148
149/* Shift by [0..31] bits */
150Ls_less_than_32:
151 movl (%esi),%ebx /* lsl */
152 movl 4(%esi),%edx /* msl */
153 xorl %eax,%eax /* extension */
154 shrd %cl,%ebx,%eax
155 shrd %cl,%edx,%ebx
156 shr %cl,%edx
157 test $0x7fffffff,%eax /* only need to look at eax here */
158 setne %al
159 movl %ebx,(%esi)
160 movl %edx,4(%esi)
161 popl %ebx
162 popl %esi
163 leave
164 ret
165
166/* Shift by [64..95] bits */
167Ls_more_than_63:
168 cmpl $96,%ecx
169 jnc Ls_more_than_95
170
171 subb $64,%cl
172 movl (%esi),%ebx /* lsl */
173 movl 4(%esi),%eax /* msl */
174 xorl %edx,%edx /* extension */
175 shrd %cl,%ebx,%edx
176 shrd %cl,%eax,%ebx
177 shr %cl,%eax
178 orl %ebx,%edx
179 setne %bl
180 test $0x7fffffff,%eax /* only need to look at eax here */
181 setne %bh
182 orw %bx,%bx
183 setne %al
184 xorl %edx,%edx
185 movl %edx,(%esi) /* set to zero */
186 movl %edx,4(%esi) /* set to zero */
187 popl %ebx
188 popl %esi
189 leave
190 ret
191
192Ls_more_than_95:
193/* Shift by [96..inf) bits */
194 xorl %eax,%eax
195 movl (%esi),%ebx
196 orl 4(%esi),%ebx
197 setne %al
198 xorl %ebx,%ebx
199 movl %ebx,(%esi)
200 movl %ebx,4(%esi)
201 popl %ebx
202 popl %esi
203 leave
204 ret
diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S
new file mode 100644
index 000000000000..d258f59564e1
--- /dev/null
+++ b/arch/i386/math-emu/wm_sqrt.S
@@ -0,0 +1,470 @@
1 .file "wm_sqrt.S"
2/*---------------------------------------------------------------------------+
3 | wm_sqrt.S |
4 | |
5 | Fixed point arithmetic square root evaluation. |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | Call from C as: |
12 | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
13 | |
14 +---------------------------------------------------------------------------*/
15
16/*---------------------------------------------------------------------------+
17 | wm_sqrt(FPU_REG *n, unsigned int control_word) |
18 | returns the square root of n in n. |
19 | |
20 | Use Newton's method to compute the square root of a number, which must |
21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
22 | Does not check the sign or tag of the argument. |
23 | Sets the exponent, but not the sign or tag of the result. |
24 | |
25 | The guess is kept in %esi:%edi |
26 +---------------------------------------------------------------------------*/
27
28#include "exception.h"
29#include "fpu_emu.h"
30
31
32#ifndef NON_REENTRANT_FPU
33/* Local storage on the stack: */
34#define FPU_accum_3 -4(%ebp) /* ms word */
35#define FPU_accum_2 -8(%ebp)
36#define FPU_accum_1 -12(%ebp)
37#define FPU_accum_0 -16(%ebp)
38
39/*
40 * The de-normalised argument:
41 * sq_2 sq_1 sq_0
42 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
43 * ^ binary point here
44 */
45#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
46#define FPU_fsqrt_arg_1 -24(%ebp)
47#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
48
49#else
50/* Local storage in a static area: */
51.data
52 .align 4,0
53FPU_accum_3:
54 .long 0 /* ms word */
55FPU_accum_2:
56 .long 0
57FPU_accum_1:
58 .long 0
59FPU_accum_0:
60 .long 0
61
62/* The de-normalised argument:
63 sq_2 sq_1 sq_0
64 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
65 ^ binary point here
66 */
67FPU_fsqrt_arg_2:
68 .long 0 /* ms word */
69FPU_fsqrt_arg_1:
70 .long 0
71FPU_fsqrt_arg_0:
72 .long 0 /* ls word, at most the ms bit is set */
73#endif /* NON_REENTRANT_FPU */
74
75
76.text
77ENTRY(wm_sqrt)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83 pushl %esi
84 pushl %edi
85 pushl %ebx
86
87 movl PARAM1,%esi
88
89 movl SIGH(%esi),%eax
90 movl SIGL(%esi),%ecx
91 xorl %edx,%edx
92
93/* We use a rough linear estimate for the first guess.. */
94
95 cmpw EXP_BIAS,EXP(%esi)
96 jnz sqrt_arg_ge_2
97
98 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
99 rcrl $1,%ecx
100 rcrl $1,%edx
101
102sqrt_arg_ge_2:
103/* From here on, n is never accessed directly again until it is
104 replaced by the answer. */
105
106 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
107 movl %ecx,FPU_fsqrt_arg_1
108 movl %edx,FPU_fsqrt_arg_0
109
110/* Make a linear first estimate */
111 shrl $1,%eax
112 addl $0x40000000,%eax
113 movl $0xaaaaaaaa,%ecx
114 mull %ecx
115 shll %edx /* max result was 7fff... */
116 testl $0x80000000,%edx /* but min was 3fff... */
117 jnz sqrt_prelim_no_adjust
118
119 movl $0x80000000,%edx /* round up */
120
121sqrt_prelim_no_adjust:
122 movl %edx,%esi /* Our first guess */
123
124/* We have now computed (approx) (2 + x) / 3, which forms the basis
125 for a few iterations of Newton's method */
126
127 movl FPU_fsqrt_arg_2,%ecx /* ms word */
128
129/*
130 * From our initial estimate, three iterations are enough to get us
131 * to 30 bits or so. This will then allow two iterations at better
132 * precision to complete the process.
133 */
134
135/* Compute (g + n/g)/2 at each iteration (g is the guess). */
136 shrl %ecx /* Doing this first will prevent a divide */
137 /* overflow later. */
138
139 movl %ecx,%edx /* msw of the arg / 2 */
140 divl %esi /* current estimate */
141 shrl %esi /* divide by 2 */
142 addl %eax,%esi /* the new estimate */
143
144 movl %ecx,%edx
145 divl %esi
146 shrl %esi
147 addl %eax,%esi
148
149 movl %ecx,%edx
150 divl %esi
151 shrl %esi
152 addl %eax,%esi
153
154/*
155 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156 * we improve it to 60 bits or so.
157 *
158 * The strategy from now on is to compute new estimates from
159 * guess := guess + (n - guess^2) / (2 * guess)
160 */
161
162/* First, find the square of the guess */
163 movl %esi,%eax
164 mull %esi
165/* guess^2 now in %edx:%eax */
166
167 movl FPU_fsqrt_arg_1,%ecx
168 subl %ecx,%eax
169 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
170 sbbl %ecx,%edx
171 jnc sqrt_stage_2_positive
172
173/* Subtraction gives a negative result,
174 negate the result before division. */
175 notl %edx
176 notl %eax
177 addl $1,%eax
178 adcl $0,%edx
179
180 divl %esi
181 movl %eax,%ecx
182
183 movl %edx,%eax
184 divl %esi
185 jmp sqrt_stage_2_finish
186
187sqrt_stage_2_positive:
188 divl %esi
189 movl %eax,%ecx
190
191 movl %edx,%eax
192 divl %esi
193
194 notl %ecx
195 notl %eax
196 addl $1,%eax
197 adcl $0,%ecx
198
199sqrt_stage_2_finish:
200 sarl $1,%ecx /* divide by 2 */
201 rcrl $1,%eax
202
203 /* Form the new estimate in %esi:%edi */
204 movl %eax,%edi
205 addl %ecx,%esi
206
207 jnz sqrt_stage_2_done /* result should be [1..2) */
208
209#ifdef PARANOID
210/* It should be possible to get here only if the arg is ffff....ffff */
211 cmp $0xffffffff,FPU_fsqrt_arg_1
212 jnz sqrt_stage_2_error
213#endif /* PARANOID */
214
215/* The best rounded result. */
216 xorl %eax,%eax
217 decl %eax
218 movl %eax,%edi
219 movl %eax,%esi
220 movl $0x7fffffff,%eax
221 jmp sqrt_round_result
222
223#ifdef PARANOID
224sqrt_stage_2_error:
225 pushl EX_INTERNAL|0x213
226 call EXCEPTION
227#endif /* PARANOID */
228
229sqrt_stage_2_done:
230
231/* Now the square root has been computed to better than 60 bits. */
232
233/* Find the square of the guess. */
234 movl %edi,%eax /* ls word of guess */
235 mull %edi
236 movl %edx,FPU_accum_1
237
238 movl %esi,%eax
239 mull %esi
240 movl %edx,FPU_accum_3
241 movl %eax,FPU_accum_2
242
243 movl %edi,%eax
244 mull %esi
245 addl %eax,FPU_accum_1
246 adcl %edx,FPU_accum_2
247 adcl $0,FPU_accum_3
248
249/* movl %esi,%eax */
250/* mull %edi */
251 addl %eax,FPU_accum_1
252 adcl %edx,FPU_accum_2
253 adcl $0,FPU_accum_3
254
255/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
257 movl FPU_fsqrt_arg_0,%eax /* get normalized n */
258 subl %eax,FPU_accum_1
259 movl FPU_fsqrt_arg_1,%eax
260 sbbl %eax,FPU_accum_2
261 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
262 sbbl %eax,FPU_accum_3
263 jnc sqrt_stage_3_positive
264
265/* Subtraction gives a negative result,
266 negate the result before division */
267 notl FPU_accum_1
268 notl FPU_accum_2
269 notl FPU_accum_3
270 addl $1,FPU_accum_1
271 adcl $0,FPU_accum_2
272
273#ifdef PARANOID
274 adcl $0,FPU_accum_3 /* This must be zero */
275 jz sqrt_stage_3_no_error
276
277sqrt_stage_3_error:
278 pushl EX_INTERNAL|0x207
279 call EXCEPTION
280
281sqrt_stage_3_no_error:
282#endif /* PARANOID */
283
284 movl FPU_accum_2,%edx
285 movl FPU_accum_1,%eax
286 divl %esi
287 movl %eax,%ecx
288
289 movl %edx,%eax
290 divl %esi
291
292 sarl $1,%ecx /* divide by 2 */
293 rcrl $1,%eax
294
295 /* prepare to round the result */
296
297 addl %ecx,%edi
298 adcl $0,%esi
299
300 jmp sqrt_stage_3_finished
301
302sqrt_stage_3_positive:
303 movl FPU_accum_2,%edx
304 movl FPU_accum_1,%eax
305 divl %esi
306 movl %eax,%ecx
307
308 movl %edx,%eax
309 divl %esi
310
311 sarl $1,%ecx /* divide by 2 */
312 rcrl $1,%eax
313
314 /* prepare to round the result */
315
316 notl %eax /* Negate the correction term */
317 notl %ecx
318 addl $1,%eax
319 adcl $0,%ecx /* carry here ==> correction == 0 */
320 adcl $0xffffffff,%esi
321
322 addl %ecx,%edi
323 adcl $0,%esi
324
325sqrt_stage_3_finished:
326
327/*
328 * The result in %esi:%edi:%esi should be good to about 90 bits here,
329 * and the rounding information here does not have sufficient accuracy
330 * in a few rare cases.
331 */
332 cmpl $0xffffffe0,%eax
333 ja sqrt_near_exact_x
334
335 cmpl $0x00000020,%eax
336 jb sqrt_near_exact
337
338 cmpl $0x7fffffe0,%eax
339 jb sqrt_round_result
340
341 cmpl $0x80000020,%eax
342 jb sqrt_get_more_precision
343
344sqrt_round_result:
345/* Set up for rounding operations */
346 movl %eax,%edx
347 movl %esi,%eax
348 movl %edi,%ebx
349 movl PARAM1,%edi
350 movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
351 jmp fpu_reg_round
352
353
354sqrt_near_exact_x:
355/* First, the estimate must be rounded up. */
356 addl $1,%edi
357 adcl $0,%esi
358
359sqrt_near_exact:
360/*
361 * This is an easy case because x^1/2 is monotonic.
362 * We need just find the square of our estimate, compare it
363 * with the argument, and deduce whether our estimate is
364 * above, below, or exact. We use the fact that the estimate
365 * is known to be accurate to about 90 bits.
366 */
367 movl %edi,%eax /* ls word of guess */
368 mull %edi
369 movl %edx,%ebx /* 2nd ls word of square */
370 movl %eax,%ecx /* ls word of square */
371
372 movl %edi,%eax
373 mull %esi
374 addl %eax,%ebx
375 addl %eax,%ebx
376
377#ifdef PARANOID
378 cmp $0xffffffb0,%ebx
379 jb sqrt_near_exact_ok
380
381 cmp $0x00000050,%ebx
382 ja sqrt_near_exact_ok
383
384 pushl EX_INTERNAL|0x214
385 call EXCEPTION
386
387sqrt_near_exact_ok:
388#endif /* PARANOID */
389
390 or %ebx,%ebx
391 js sqrt_near_exact_small
392
393 jnz sqrt_near_exact_large
394
395 or %ebx,%edx
396 jnz sqrt_near_exact_large
397
398/* Our estimate is exactly the right answer */
399 xorl %eax,%eax
400 jmp sqrt_round_result
401
402sqrt_near_exact_small:
403/* Our estimate is too small */
404 movl $0x000000ff,%eax
405 jmp sqrt_round_result
406
407sqrt_near_exact_large:
408/* Our estimate is too large, we need to decrement it */
409 subl $1,%edi
410 sbbl $0,%esi
411 movl $0xffffff00,%eax
412 jmp sqrt_round_result
413
414
415sqrt_get_more_precision:
416/* This case is almost the same as the above, except we start
417 with an extra bit of precision in the estimate. */
418 stc /* The extra bit. */
419 rcll $1,%edi /* Shift the estimate left one bit */
420 rcll $1,%esi
421
422 movl %edi,%eax /* ls word of guess */
423 mull %edi
424 movl %edx,%ebx /* 2nd ls word of square */
425 movl %eax,%ecx /* ls word of square */
426
427 movl %edi,%eax
428 mull %esi
429 addl %eax,%ebx
430 addl %eax,%ebx
431
432/* Put our estimate back to its original value */
433 stc /* The ms bit. */
434 rcrl $1,%esi /* Shift the estimate left one bit */
435 rcrl $1,%edi
436
437#ifdef PARANOID
438 cmp $0xffffff60,%ebx
439 jb sqrt_more_prec_ok
440
441 cmp $0x000000a0,%ebx
442 ja sqrt_more_prec_ok
443
444 pushl EX_INTERNAL|0x215
445 call EXCEPTION
446
447sqrt_more_prec_ok:
448#endif /* PARANOID */
449
450 or %ebx,%ebx
451 js sqrt_more_prec_small
452
453 jnz sqrt_more_prec_large
454
455 or %ebx,%ecx
456 jnz sqrt_more_prec_large
457
458/* Our estimate is exactly the right answer */
459 movl $0x80000000,%eax
460 jmp sqrt_round_result
461
462sqrt_more_prec_small:
463/* Our estimate is too small */
464 movl $0x800000ff,%eax
465 jmp sqrt_round_result
466
467sqrt_more_prec_large:
468/* Our estimate is too large */
469 movl $0x7fffff00,%eax
470 jmp sqrt_round_result
diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile
new file mode 100644
index 000000000000..fc3272506846
--- /dev/null
+++ b/arch/i386/mm/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for the linux i386-specific parts of the memory manager.
3#
4
5obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o
6
7obj-$(CONFIG_DISCONTIGMEM) += discontig.o
8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
9obj-$(CONFIG_HIGHMEM) += highmem.o
10obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
new file mode 100644
index 000000000000..523b30634e0a
--- /dev/null
+++ b/arch/i386/mm/boot_ioremap.c
@@ -0,0 +1,97 @@
1/*
2 * arch/i386/mm/boot_ioremap.c
3 *
4 * Re-map functions for early boot-time before paging_init() when the
5 * boot-time pagetables are still in use
6 *
7 * Written by Dave Hansen <haveblue@us.ibm.com>
8 */
9
10
11/*
12 * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE
13 * keeps that from happenning. If anyone has a better way, I'm listening.
14 *
15 * boot_pte_t is defined only if this all works correctly
16 */
17
18#include <linux/config.h>
19#undef CONFIG_X86_PAE
20#include <asm/page.h>
21#include <asm/pgtable.h>
22#include <asm/tlbflush.h>
23#include <linux/init.h>
24#include <linux/stddef.h>
25
26/*
27 * I'm cheating here. It is known that the two boot PTE pages are
28 * allocated next to each other. I'm pretending that they're just
29 * one big array.
30 */
31
32#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
33#define boot_pte_index(address) \
34 (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1))
35
36static inline boot_pte_t* boot_vaddr_to_pte(void *address)
37{
38 boot_pte_t* boot_pg = (boot_pte_t*)pg0;
39 return &boot_pg[boot_pte_index((unsigned long)address)];
40}
41
42/*
43 * This is only for a caller who is clever enough to page-align
44 * phys_addr and virtual_source, and who also has a preference
45 * about which virtual address from which to steal ptes
46 */
47static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
48 void* virtual_source)
49{
50 boot_pte_t* pte;
51 int i;
52 char *vaddr = virtual_source;
53
54 pte = boot_vaddr_to_pte(virtual_source);
55 for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
56 set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
57 __flush_tlb_one(&vaddr[i*PAGE_SIZE]);
58 }
59}
60
61/* the virtual space we're going to remap comes from this array */
62#define BOOT_IOREMAP_PAGES 4
63#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE)
64static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE]
65 __attribute__ ((aligned (PAGE_SIZE)));
66
67/*
68 * This only applies to things which need to ioremap before paging_init()
69 * bt_ioremap() and plain ioremap() are both useless at this point.
70 *
71 * When used, we're still using the boot-time pagetables, which only
72 * have 2 PTE pages mapping the first 8MB
73 *
74 * There is no unmap. The boot-time PTE pages aren't used after boot.
75 * If you really want the space back, just remap it yourself.
76 * boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE)
77 */
78__init void* boot_ioremap(unsigned long phys_addr, unsigned long size)
79{
80 unsigned long last_addr, offset;
81 unsigned int nrpages;
82
83 last_addr = phys_addr + size - 1;
84
85 /* page align the requested address */
86 offset = phys_addr & ~PAGE_MASK;
87 phys_addr &= PAGE_MASK;
88 size = PAGE_ALIGN(last_addr) - phys_addr;
89
90 nrpages = size >> PAGE_SHIFT;
91 if (nrpages > BOOT_IOREMAP_PAGES)
92 return NULL;
93
94 __boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
95
96 return &boot_ioremap_space[offset];
97}
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
new file mode 100644
index 000000000000..1726b4096b10
--- /dev/null
+++ b/arch/i386/mm/discontig.c
@@ -0,0 +1,383 @@
1/*
2 * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation
3 * August 2002: added remote node KVA remap - Martin J. Bligh
4 *
5 * Copyright (C) 2002, IBM Corp.
6 *
7 * All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
17 * NON INFRINGEMENT. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#include <linux/config.h>
26#include <linux/mm.h>
27#include <linux/bootmem.h>
28#include <linux/mmzone.h>
29#include <linux/highmem.h>
30#include <linux/initrd.h>
31#include <linux/nodemask.h>
32#include <asm/e820.h>
33#include <asm/setup.h>
34#include <asm/mmzone.h>
35#include <bios_ebda.h>
36
37struct pglist_data *node_data[MAX_NUMNODES];
38bootmem_data_t node0_bdata;
39
40/*
41 * numa interface - we expect the numa architecture specfic code to have
42 * populated the following initialisation.
43 *
44 * 1) node_online_map - the map of all nodes configured (online) in the system
45 * 2) physnode_map - the mapping between a pfn and owning node
46 * 3) node_start_pfn - the starting page frame number for a node
47 * 3) node_end_pfn - the ending page fram number for a node
48 */
49
50/*
51 * physnode_map keeps track of the physical memory layout of a generic
52 * numa node on a 256Mb break (each element of the array will
53 * represent 256Mb of memory and will be marked by the node id. so,
54 * if the first gig is on node 0, and the second gig is on node 1
55 * physnode_map will contain:
56 *
57 * physnode_map[0-3] = 0;
58 * physnode_map[4-7] = 1;
59 * physnode_map[8- ] = -1;
60 */
61s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
62
63void memory_present(int nid, unsigned long start, unsigned long end)
64{
65 unsigned long pfn;
66
67 printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n",
68 nid, start, end);
69 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
70 printk(KERN_DEBUG " ");
71 for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
72 physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
73 printk("%ld ", pfn);
74 }
75 printk("\n");
76}
77
78unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
79 unsigned long end_pfn)
80{
81 unsigned long nr_pages = end_pfn - start_pfn;
82
83 if (!nr_pages)
84 return 0;
85
86 return (nr_pages + 1) * sizeof(struct page);
87}
88
89unsigned long node_start_pfn[MAX_NUMNODES];
90unsigned long node_end_pfn[MAX_NUMNODES];
91
92extern unsigned long find_max_low_pfn(void);
93extern void find_max_pfn(void);
94extern void one_highpage_init(struct page *, int, int);
95
96extern struct e820map e820;
97extern unsigned long init_pg_tables_end;
98extern unsigned long highend_pfn, highstart_pfn;
99extern unsigned long max_low_pfn;
100extern unsigned long totalram_pages;
101extern unsigned long totalhigh_pages;
102
103#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
104
105unsigned long node_remap_start_pfn[MAX_NUMNODES];
106unsigned long node_remap_size[MAX_NUMNODES];
107unsigned long node_remap_offset[MAX_NUMNODES];
108void *node_remap_start_vaddr[MAX_NUMNODES];
109void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110
111/*
112 * FLAT - support for basic PC memory model with discontig enabled, essentially
113 * a single node with all available processors in it with a flat
114 * memory map.
115 */
116int __init get_memcfg_numa_flat(void)
117{
118 printk("NUMA - single node, flat memory mode\n");
119
120 /* Run the memory configuration and find the top of memory. */
121 find_max_pfn();
122 node_start_pfn[0] = 0;
123 node_end_pfn[0] = max_pfn;
124 memory_present(0, 0, max_pfn);
125
126 /* Indicate there is one node available. */
127 nodes_clear(node_online_map);
128 node_set_online(0);
129 return 1;
130}
131
132/*
133 * Find the highest page frame number we have available for the node
134 */
135static void __init find_max_pfn_node(int nid)
136{
137 if (node_end_pfn[nid] > max_pfn)
138 node_end_pfn[nid] = max_pfn;
139 /*
140 * if a user has given mem=XXXX, then we need to make sure
141 * that the node _starts_ before that, too, not just ends
142 */
143 if (node_start_pfn[nid] > max_pfn)
144 node_start_pfn[nid] = max_pfn;
145 if (node_start_pfn[nid] > node_end_pfn[nid])
146 BUG();
147}
148
149/*
150 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
151 * method. For node zero take this from the bottom of memory, for
152 * subsequent nodes place them at node_remap_start_vaddr which contains
153 * node local data in physically node local memory. See setup_memory()
154 * for details.
155 */
156static void __init allocate_pgdat(int nid)
157{
158 if (nid && node_has_online_mem(nid))
159 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
160 else {
161 NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT));
162 min_low_pfn += PFN_UP(sizeof(pg_data_t));
163 }
164}
165
166void __init remap_numa_kva(void)
167{
168 void *vaddr;
169 unsigned long pfn;
170 int node;
171
172 for_each_online_node(node) {
173 if (node == 0)
174 continue;
175 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
176 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
177 set_pmd_pfn((ulong) vaddr,
178 node_remap_start_pfn[node] + pfn,
179 PAGE_KERNEL_LARGE);
180 }
181 }
182}
183
184static unsigned long calculate_numa_remap_pages(void)
185{
186 int nid;
187 unsigned long size, reserve_pages = 0;
188
189 for_each_online_node(nid) {
190 if (nid == 0)
191 continue;
192 if (!node_remap_size[nid])
193 continue;
194
195 /*
196 * The acpi/srat node info can show hot-add memroy zones
197 * where memory could be added but not currently present.
198 */
199 if (node_start_pfn[nid] > max_pfn)
200 continue;
201 if (node_end_pfn[nid] > max_pfn)
202 node_end_pfn[nid] = max_pfn;
203
204 /* ensure the remap includes space for the pgdat. */
205 size = node_remap_size[nid] + sizeof(pg_data_t);
206
207 /* convert size to large (pmd size) pages, rounding up */
208 size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
209 /* now the roundup is correct, convert to PAGE_SIZE pages */
210 size = size * PTRS_PER_PTE;
211 printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
212 size, nid);
213 node_remap_size[nid] = size;
214 reserve_pages += size;
215 node_remap_offset[nid] = reserve_pages;
216 printk("Shrinking node %d from %ld pages to %ld pages\n",
217 nid, node_end_pfn[nid], node_end_pfn[nid] - size);
218 node_end_pfn[nid] -= size;
219 node_remap_start_pfn[nid] = node_end_pfn[nid];
220 }
221 printk("Reserving total of %ld pages for numa KVA remap\n",
222 reserve_pages);
223 return reserve_pages;
224}
225
226extern void setup_bootmem_allocator(void);
227unsigned long __init setup_memory(void)
228{
229 int nid;
230 unsigned long system_start_pfn, system_max_low_pfn;
231 unsigned long reserve_pages;
232
233 /*
234 * When mapping a NUMA machine we allocate the node_mem_map arrays
235 * from node local memory. They are then mapped directly into KVA
236 * between zone normal and vmalloc space. Calculate the size of
237 * this space and use it to adjust the boundry between ZONE_NORMAL
238 * and ZONE_HIGHMEM.
239 */
240 find_max_pfn();
241 get_memcfg_numa();
242
243 reserve_pages = calculate_numa_remap_pages();
244
245 /* partially used pages are not usable - thus round upwards */
246 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
247
248 system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
249 printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
250 reserve_pages, max_low_pfn + reserve_pages);
251 printk("max_pfn = %ld\n", max_pfn);
252#ifdef CONFIG_HIGHMEM
253 highstart_pfn = highend_pfn = max_pfn;
254 if (max_pfn > system_max_low_pfn)
255 highstart_pfn = system_max_low_pfn;
256 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
257 pages_to_mb(highend_pfn - highstart_pfn));
258#endif
259 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
260 pages_to_mb(system_max_low_pfn));
261 printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n",
262 min_low_pfn, max_low_pfn, highstart_pfn);
263
264 printk("Low memory ends at vaddr %08lx\n",
265 (ulong) pfn_to_kaddr(max_low_pfn));
266 for_each_online_node(nid) {
267 node_remap_start_vaddr[nid] = pfn_to_kaddr(
268 (highstart_pfn + reserve_pages) - node_remap_offset[nid]);
269 allocate_pgdat(nid);
270 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
271 (ulong) node_remap_start_vaddr[nid],
272 (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
273 - node_remap_offset[nid] + node_remap_size[nid]));
274 }
275 printk("High memory starts at vaddr %08lx\n",
276 (ulong) pfn_to_kaddr(highstart_pfn));
277 vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
278 for_each_online_node(nid)
279 find_max_pfn_node(nid);
280
281 memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
282 NODE_DATA(0)->bdata = &node0_bdata;
283 setup_bootmem_allocator();
284 return max_low_pfn;
285}
286
287void __init zone_sizes_init(void)
288{
289 int nid;
290
291 /*
292 * Insert nodes into pgdat_list backward so they appear in order.
293 * Clobber node 0's links and NULL out pgdat_list before starting.
294 */
295 pgdat_list = NULL;
296 for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) {
297 if (!node_online(nid))
298 continue;
299 NODE_DATA(nid)->pgdat_next = pgdat_list;
300 pgdat_list = NODE_DATA(nid);
301 }
302
303 for_each_online_node(nid) {
304 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
305 unsigned long *zholes_size;
306 unsigned int max_dma;
307
308 unsigned long low = max_low_pfn;
309 unsigned long start = node_start_pfn[nid];
310 unsigned long high = node_end_pfn[nid];
311
312 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
313
314 if (node_has_online_mem(nid)){
315 if (start > low) {
316#ifdef CONFIG_HIGHMEM
317 BUG_ON(start > high);
318 zones_size[ZONE_HIGHMEM] = high - start;
319#endif
320 } else {
321 if (low < max_dma)
322 zones_size[ZONE_DMA] = low;
323 else {
324 BUG_ON(max_dma > low);
325 BUG_ON(low > high);
326 zones_size[ZONE_DMA] = max_dma;
327 zones_size[ZONE_NORMAL] = low - max_dma;
328#ifdef CONFIG_HIGHMEM
329 zones_size[ZONE_HIGHMEM] = high - low;
330#endif
331 }
332 }
333 }
334
335 zholes_size = get_zholes_size(nid);
336 /*
337 * We let the lmem_map for node 0 be allocated from the
338 * normal bootmem allocator, but other nodes come from the
339 * remapped KVA area - mbligh
340 */
341 if (!nid)
342 free_area_init_node(nid, NODE_DATA(nid),
343 zones_size, start, zholes_size);
344 else {
345 unsigned long lmem_map;
346 lmem_map = (unsigned long)node_remap_start_vaddr[nid];
347 lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
348 lmem_map &= PAGE_MASK;
349 NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
350 free_area_init_node(nid, NODE_DATA(nid), zones_size,
351 start, zholes_size);
352 }
353 }
354 return;
355}
356
357void __init set_highmem_pages_init(int bad_ppro)
358{
359#ifdef CONFIG_HIGHMEM
360 struct zone *zone;
361
362 for_each_zone(zone) {
363 unsigned long node_pfn, node_high_size, zone_start_pfn;
364 struct page * zone_mem_map;
365
366 if (!is_highmem(zone))
367 continue;
368
369 printk("Initializing %s for node %d\n", zone->name,
370 zone->zone_pgdat->node_id);
371
372 node_high_size = zone->spanned_pages;
373 zone_mem_map = zone->zone_mem_map;
374 zone_start_pfn = zone->zone_start_pfn;
375
376 for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) {
377 one_highpage_init((struct page *)(zone_mem_map + node_pfn),
378 zone_start_pfn + node_pfn, bad_ppro);
379 }
380 }
381 totalram_pages += totalhigh_pages;
382#endif
383}
diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c
new file mode 100644
index 000000000000..f706449319c4
--- /dev/null
+++ b/arch/i386/mm/extable.c
@@ -0,0 +1,36 @@
1/*
2 * linux/arch/i386/mm/extable.c
3 */
4
5#include <linux/config.h>
6#include <linux/module.h>
7#include <linux/spinlock.h>
8#include <asm/uaccess.h>
9
10int fixup_exception(struct pt_regs *regs)
11{
12 const struct exception_table_entry *fixup;
13
14#ifdef CONFIG_PNPBIOS
15 if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
16 {
17 extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
18 extern u32 pnp_bios_is_utter_crap;
19 pnp_bios_is_utter_crap = 1;
20 printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
21 __asm__ volatile(
22 "movl %0, %%esp\n\t"
23 "jmp *%1\n\t"
24 : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
25 panic("do_trap: can't hit this");
26 }
27#endif
28
29 fixup = search_exception_tables(regs->eip);
30 if (fixup) {
31 regs->eip = fixup->fixup;
32 return 1;
33 }
34
35 return 0;
36}
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
new file mode 100644
index 000000000000..a509237c4815
--- /dev/null
+++ b/arch/i386/mm/fault.c
@@ -0,0 +1,552 @@
1/*
2 * linux/arch/i386/mm/fault.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 */
6
7#include <linux/signal.h>
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/string.h>
12#include <linux/types.h>
13#include <linux/ptrace.h>
14#include <linux/mman.h>
15#include <linux/mm.h>
16#include <linux/smp.h>
17#include <linux/smp_lock.h>
18#include <linux/interrupt.h>
19#include <linux/init.h>
20#include <linux/tty.h>
21#include <linux/vt_kern.h> /* For unblank_screen() */
22#include <linux/highmem.h>
23#include <linux/module.h>
24
25#include <asm/system.h>
26#include <asm/uaccess.h>
27#include <asm/desc.h>
28#include <asm/kdebug.h>
29
30extern void die(const char *,struct pt_regs *,long);
31
32/*
33 * Unlock any spinlocks which will prevent us from getting the
34 * message out
35 */
36void bust_spinlocks(int yes)
37{
38 int loglevel_save = console_loglevel;
39
40 if (yes) {
41 oops_in_progress = 1;
42 return;
43 }
44#ifdef CONFIG_VT
45 unblank_screen();
46#endif
47 oops_in_progress = 0;
48 /*
49 * OK, the message is on the console. Now we call printk()
50 * without oops_in_progress set so that printk will give klogd
51 * a poke. Hold onto your hats...
52 */
53 console_loglevel = 15; /* NMI oopser may have shut the console up */
54 printk(" ");
55 console_loglevel = loglevel_save;
56}
57
58/*
59 * Return EIP plus the CS segment base. The segment limit is also
60 * adjusted, clamped to the kernel/user address space (whichever is
61 * appropriate), and returned in *eip_limit.
62 *
63 * The segment is checked, because it might have been changed by another
64 * task between the original faulting instruction and here.
65 *
66 * If CS is no longer a valid code segment, or if EIP is beyond the
67 * limit, or if it is a kernel address when CS is not a kernel segment,
68 * then the returned value will be greater than *eip_limit.
69 *
70 * This is slow, but is very rarely executed.
71 */
72static inline unsigned long get_segment_eip(struct pt_regs *regs,
73 unsigned long *eip_limit)
74{
75 unsigned long eip = regs->eip;
76 unsigned seg = regs->xcs & 0xffff;
77 u32 seg_ar, seg_limit, base, *desc;
78
79 /* The standard kernel/user address space limit. */
80 *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
81
82 /* Unlikely, but must come before segment checks. */
83 if (unlikely((regs->eflags & VM_MASK) != 0))
84 return eip + (seg << 4);
85
86 /* By far the most common cases. */
87 if (likely(seg == __USER_CS || seg == __KERNEL_CS))
88 return eip;
89
90 /* Check the segment exists, is within the current LDT/GDT size,
91 that kernel/user (ring 0..3) has the appropriate privilege,
92 that it's a code segment, and get the limit. */
93 __asm__ ("larl %3,%0; lsll %3,%1"
94 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
95 if ((~seg_ar & 0x9800) || eip > seg_limit) {
96 *eip_limit = 0;
97 return 1; /* So that returned eip > *eip_limit. */
98 }
99
100 /* Get the GDT/LDT descriptor base.
101 When you look for races in this code remember that
102 LDT and other horrors are only used in user space. */
103 if (seg & (1<<2)) {
104 /* Must lock the LDT while reading it. */
105 down(&current->mm->context.sem);
106 desc = current->mm->context.ldt;
107 desc = (void *)desc + (seg & ~7);
108 } else {
109 /* Must disable preemption while reading the GDT. */
110 desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu());
111 desc = (void *)desc + (seg & ~7);
112 }
113
114 /* Decode the code segment base from the descriptor */
115 base = get_desc_base((unsigned long *)desc);
116
117 if (seg & (1<<2)) {
118 up(&current->mm->context.sem);
119 } else
120 put_cpu();
121
122 /* Adjust EIP and segment limit, and clamp at the kernel limit.
123 It's legitimate for segments to wrap at 0xffffffff. */
124 seg_limit += base;
125 if (seg_limit < *eip_limit && seg_limit >= base)
126 *eip_limit = seg_limit;
127 return eip + base;
128}
129
130/*
131 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
132 * Check that here and ignore it.
133 */
134static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
135{
136 unsigned long limit;
137 unsigned long instr = get_segment_eip (regs, &limit);
138 int scan_more = 1;
139 int prefetch = 0;
140 int i;
141
142 for (i = 0; scan_more && i < 15; i++) {
143 unsigned char opcode;
144 unsigned char instr_hi;
145 unsigned char instr_lo;
146
147 if (instr > limit)
148 break;
149 if (__get_user(opcode, (unsigned char *) instr))
150 break;
151
152 instr_hi = opcode & 0xf0;
153 instr_lo = opcode & 0x0f;
154 instr++;
155
156 switch (instr_hi) {
157 case 0x20:
158 case 0x30:
159 /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
160 scan_more = ((instr_lo & 7) == 0x6);
161 break;
162
163 case 0x60:
164 /* 0x64 thru 0x67 are valid prefixes in all modes. */
165 scan_more = (instr_lo & 0xC) == 0x4;
166 break;
167 case 0xF0:
168 /* 0xF0, 0xF2, and 0xF3 are valid prefixes */
169 scan_more = !instr_lo || (instr_lo>>1) == 1;
170 break;
171 case 0x00:
172 /* Prefetch instruction is 0x0F0D or 0x0F18 */
173 scan_more = 0;
174 if (instr > limit)
175 break;
176 if (__get_user(opcode, (unsigned char *) instr))
177 break;
178 prefetch = (instr_lo == 0xF) &&
179 (opcode == 0x0D || opcode == 0x18);
180 break;
181 default:
182 scan_more = 0;
183 break;
184 }
185 }
186 return prefetch;
187}
188
189static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
190 unsigned long error_code)
191{
192 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
193 boot_cpu_data.x86 >= 6)) {
194 /* Catch an obscure case of prefetch inside an NX page. */
195 if (nx_enabled && (error_code & 16))
196 return 0;
197 return __is_prefetch(regs, addr);
198 }
199 return 0;
200}
201
202fastcall void do_invalid_op(struct pt_regs *, unsigned long);
203
204/*
205 * This routine handles page faults. It determines the address,
206 * and the problem, and then passes it off to one of the appropriate
207 * routines.
208 *
209 * error_code:
210 * bit 0 == 0 means no page found, 1 means protection fault
211 * bit 1 == 0 means read, 1 means write
212 * bit 2 == 0 means kernel, 1 means user-mode
213 */
214fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
215{
216 struct task_struct *tsk;
217 struct mm_struct *mm;
218 struct vm_area_struct * vma;
219 unsigned long address;
220 unsigned long page;
221 int write;
222 siginfo_t info;
223
224 /* get the address */
225 __asm__("movl %%cr2,%0":"=r" (address));
226
227 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
228 SIGSEGV) == NOTIFY_STOP)
229 return;
230 /* It's safe to allow irq's after cr2 has been saved */
231 if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
232 local_irq_enable();
233
234 tsk = current;
235
236 info.si_code = SEGV_MAPERR;
237
238 /*
239 * We fault-in kernel-space virtual memory on-demand. The
240 * 'reference' page table is init_mm.pgd.
241 *
242 * NOTE! We MUST NOT take any locks for this case. We may
243 * be in an interrupt or a critical region, and should
244 * only copy the information from the master page table,
245 * nothing more.
246 *
247 * This verifies that the fault happens in kernel space
248 * (error_code & 4) == 0, and that the fault was not a
249 * protection error (error_code & 1) == 0.
250 */
251 if (unlikely(address >= TASK_SIZE)) {
252 if (!(error_code & 5))
253 goto vmalloc_fault;
254 /*
255 * Don't take the mm semaphore here. If we fixup a prefetch
256 * fault we could otherwise deadlock.
257 */
258 goto bad_area_nosemaphore;
259 }
260
261 mm = tsk->mm;
262
263 /*
264 * If we're in an interrupt, have no user context or are running in an
265 * atomic region then we must not take the fault..
266 */
267 if (in_atomic() || !mm)
268 goto bad_area_nosemaphore;
269
270 /* When running in the kernel we expect faults to occur only to
271 * addresses in user space. All other faults represent errors in the
272 * kernel and should generate an OOPS. Unfortunatly, in the case of an
273 * erroneous fault occuring in a code path which already holds mmap_sem
274 * we will deadlock attempting to validate the fault against the
275 * address space. Luckily the kernel only validly references user
276 * space from well defined areas of code, which are listed in the
277 * exceptions table.
278 *
279 * As the vast majority of faults will be valid we will only perform
280 * the source reference check when there is a possibilty of a deadlock.
281 * Attempt to lock the address space, if we cannot we then validate the
282 * source. If this is invalid we can skip the address space check,
283 * thus avoiding the deadlock.
284 */
285 if (!down_read_trylock(&mm->mmap_sem)) {
286 if ((error_code & 4) == 0 &&
287 !search_exception_tables(regs->eip))
288 goto bad_area_nosemaphore;
289 down_read(&mm->mmap_sem);
290 }
291
292 vma = find_vma(mm, address);
293 if (!vma)
294 goto bad_area;
295 if (vma->vm_start <= address)
296 goto good_area;
297 if (!(vma->vm_flags & VM_GROWSDOWN))
298 goto bad_area;
299 if (error_code & 4) {
300 /*
301 * accessing the stack below %esp is always a bug.
302 * The "+ 32" is there due to some instructions (like
303 * pusha) doing post-decrement on the stack and that
304 * doesn't show up until later..
305 */
306 if (address + 32 < regs->esp)
307 goto bad_area;
308 }
309 if (expand_stack(vma, address))
310 goto bad_area;
311/*
312 * Ok, we have a good vm_area for this memory access, so
313 * we can handle it..
314 */
315good_area:
316 info.si_code = SEGV_ACCERR;
317 write = 0;
318 switch (error_code & 3) {
319 default: /* 3: write, present */
320#ifdef TEST_VERIFY_AREA
321 if (regs->cs == KERNEL_CS)
322 printk("WP fault at %08lx\n", regs->eip);
323#endif
324 /* fall through */
325 case 2: /* write, not present */
326 if (!(vma->vm_flags & VM_WRITE))
327 goto bad_area;
328 write++;
329 break;
330 case 1: /* read, present */
331 goto bad_area;
332 case 0: /* read, not present */
333 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
334 goto bad_area;
335 }
336
337 survive:
338 /*
339 * If for any reason at all we couldn't handle the fault,
340 * make sure we exit gracefully rather than endlessly redo
341 * the fault.
342 */
343 switch (handle_mm_fault(mm, vma, address, write)) {
344 case VM_FAULT_MINOR:
345 tsk->min_flt++;
346 break;
347 case VM_FAULT_MAJOR:
348 tsk->maj_flt++;
349 break;
350 case VM_FAULT_SIGBUS:
351 goto do_sigbus;
352 case VM_FAULT_OOM:
353 goto out_of_memory;
354 default:
355 BUG();
356 }
357
358 /*
359 * Did it hit the DOS screen memory VA from vm86 mode?
360 */
361 if (regs->eflags & VM_MASK) {
362 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
363 if (bit < 32)
364 tsk->thread.screen_bitmap |= 1 << bit;
365 }
366 up_read(&mm->mmap_sem);
367 return;
368
369/*
370 * Something tried to access memory that isn't in our memory map..
371 * Fix it, but check if it's kernel or user first..
372 */
373bad_area:
374 up_read(&mm->mmap_sem);
375
376bad_area_nosemaphore:
377 /* User mode accesses just cause a SIGSEGV */
378 if (error_code & 4) {
379 /*
380 * Valid to do another page fault here because this one came
381 * from user space.
382 */
383 if (is_prefetch(regs, address, error_code))
384 return;
385
386 tsk->thread.cr2 = address;
387 /* Kernel addresses are always protection faults */
388 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
389 tsk->thread.trap_no = 14;
390 info.si_signo = SIGSEGV;
391 info.si_errno = 0;
392 /* info.si_code has been set above */
393 info.si_addr = (void __user *)address;
394 force_sig_info(SIGSEGV, &info, tsk);
395 return;
396 }
397
398#ifdef CONFIG_X86_F00F_BUG
399 /*
400 * Pentium F0 0F C7 C8 bug workaround.
401 */
402 if (boot_cpu_data.f00f_bug) {
403 unsigned long nr;
404
405 nr = (address - idt_descr.address) >> 3;
406
407 if (nr == 6) {
408 do_invalid_op(regs, 0);
409 return;
410 }
411 }
412#endif
413
414no_context:
415 /* Are we prepared to handle this kernel fault? */
416 if (fixup_exception(regs))
417 return;
418
419 /*
420 * Valid to do another page fault here, because if this fault
421 * had been triggered by is_prefetch fixup_exception would have
422 * handled it.
423 */
424 if (is_prefetch(regs, address, error_code))
425 return;
426
427/*
428 * Oops. The kernel tried to access some bad page. We'll have to
429 * terminate things with extreme prejudice.
430 */
431
432 bust_spinlocks(1);
433
434#ifdef CONFIG_X86_PAE
435 if (error_code & 16) {
436 pte_t *pte = lookup_address(address);
437
438 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
439 printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
440 }
441#endif
442 if (address < PAGE_SIZE)
443 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
444 else
445 printk(KERN_ALERT "Unable to handle kernel paging request");
446 printk(" at virtual address %08lx\n",address);
447 printk(KERN_ALERT " printing eip:\n");
448 printk("%08lx\n", regs->eip);
449 asm("movl %%cr3,%0":"=r" (page));
450 page = ((unsigned long *) __va(page))[address >> 22];
451 printk(KERN_ALERT "*pde = %08lx\n", page);
452 /*
453 * We must not directly access the pte in the highpte
454 * case, the page table might be allocated in highmem.
455 * And lets rather not kmap-atomic the pte, just in case
456 * it's allocated already.
457 */
458#ifndef CONFIG_HIGHPTE
459 if (page & 1) {
460 page &= PAGE_MASK;
461 address &= 0x003ff000;
462 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
463 printk(KERN_ALERT "*pte = %08lx\n", page);
464 }
465#endif
466 die("Oops", regs, error_code);
467 bust_spinlocks(0);
468 do_exit(SIGKILL);
469
470/*
471 * We ran out of memory, or some other thing happened to us that made
472 * us unable to handle the page fault gracefully.
473 */
474out_of_memory:
475 up_read(&mm->mmap_sem);
476 if (tsk->pid == 1) {
477 yield();
478 down_read(&mm->mmap_sem);
479 goto survive;
480 }
481 printk("VM: killing process %s\n", tsk->comm);
482 if (error_code & 4)
483 do_exit(SIGKILL);
484 goto no_context;
485
486do_sigbus:
487 up_read(&mm->mmap_sem);
488
489 /* Kernel mode? Handle exceptions or die */
490 if (!(error_code & 4))
491 goto no_context;
492
493 /* User space => ok to do another page fault */
494 if (is_prefetch(regs, address, error_code))
495 return;
496
497 tsk->thread.cr2 = address;
498 tsk->thread.error_code = error_code;
499 tsk->thread.trap_no = 14;
500 info.si_signo = SIGBUS;
501 info.si_errno = 0;
502 info.si_code = BUS_ADRERR;
503 info.si_addr = (void __user *)address;
504 force_sig_info(SIGBUS, &info, tsk);
505 return;
506
507vmalloc_fault:
508 {
509 /*
510 * Synchronize this task's top level page-table
511 * with the 'reference' page table.
512 *
513 * Do _not_ use "tsk" here. We might be inside
514 * an interrupt in the middle of a task switch..
515 */
516 int index = pgd_index(address);
517 unsigned long pgd_paddr;
518 pgd_t *pgd, *pgd_k;
519 pud_t *pud, *pud_k;
520 pmd_t *pmd, *pmd_k;
521 pte_t *pte_k;
522
523 asm("movl %%cr3,%0":"=r" (pgd_paddr));
524 pgd = index + (pgd_t *)__va(pgd_paddr);
525 pgd_k = init_mm.pgd + index;
526
527 if (!pgd_present(*pgd_k))
528 goto no_context;
529
530 /*
531 * set_pgd(pgd, *pgd_k); here would be useless on PAE
532 * and redundant with the set_pmd() on non-PAE. As would
533 * set_pud.
534 */
535
536 pud = pud_offset(pgd, address);
537 pud_k = pud_offset(pgd_k, address);
538 if (!pud_present(*pud_k))
539 goto no_context;
540
541 pmd = pmd_offset(pud, address);
542 pmd_k = pmd_offset(pud_k, address);
543 if (!pmd_present(*pmd_k))
544 goto no_context;
545 set_pmd(pmd, *pmd_k);
546
547 pte_k = pte_offset_kernel(pmd_k, address);
548 if (!pte_present(*pte_k))
549 goto no_context;
550 return;
551 }
552}
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
new file mode 100644
index 000000000000..fc4c4cad4e98
--- /dev/null
+++ b/arch/i386/mm/highmem.c
@@ -0,0 +1,89 @@
1#include <linux/highmem.h>
2
3void *kmap(struct page *page)
4{
5 might_sleep();
6 if (!PageHighMem(page))
7 return page_address(page);
8 return kmap_high(page);
9}
10
11void kunmap(struct page *page)
12{
13 if (in_interrupt())
14 BUG();
15 if (!PageHighMem(page))
16 return;
17 kunmap_high(page);
18}
19
20/*
21 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
22 * no global lock is needed and because the kmap code must perform a global TLB
23 * invalidation when the kmap pool wraps.
24 *
25 * However when holding an atomic kmap is is not legal to sleep, so atomic
26 * kmaps are appropriate for short, tight code paths only.
27 */
28void *kmap_atomic(struct page *page, enum km_type type)
29{
30 enum fixed_addresses idx;
31 unsigned long vaddr;
32
33 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
34 inc_preempt_count();
35 if (!PageHighMem(page))
36 return page_address(page);
37
38 idx = type + KM_TYPE_NR*smp_processor_id();
39 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
40#ifdef CONFIG_DEBUG_HIGHMEM
41 if (!pte_none(*(kmap_pte-idx)))
42 BUG();
43#endif
44 set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
45 __flush_tlb_one(vaddr);
46
47 return (void*) vaddr;
48}
49
50void kunmap_atomic(void *kvaddr, enum km_type type)
51{
52#ifdef CONFIG_DEBUG_HIGHMEM
53 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
54 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
55
56 if (vaddr < FIXADDR_START) { // FIXME
57 dec_preempt_count();
58 preempt_check_resched();
59 return;
60 }
61
62 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
63 BUG();
64
65 /*
66 * force other mappings to Oops if they'll try to access
67 * this pte without first remap it
68 */
69 pte_clear(&init_mm, vaddr, kmap_pte-idx);
70 __flush_tlb_one(vaddr);
71#endif
72
73 dec_preempt_count();
74 preempt_check_resched();
75}
76
77struct page *kmap_atomic_to_page(void *ptr)
78{
79 unsigned long idx, vaddr = (unsigned long)ptr;
80 pte_t *pte;
81
82 if (vaddr < FIXADDR_START)
83 return virt_to_page(ptr);
84
85 idx = virt_to_fix(vaddr);
86 pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
87 return pte_page(*pte);
88}
89
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
new file mode 100644
index 000000000000..a8c45143088b
--- /dev/null
+++ b/arch/i386/mm/hugetlbpage.c
@@ -0,0 +1,431 @@
1/*
2 * IA-32 Huge TLB Page Support for Kernel.
3 *
4 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
5 */
6
7#include <linux/config.h>
8#include <linux/init.h>
9#include <linux/fs.h>
10#include <linux/mm.h>
11#include <linux/hugetlb.h>
12#include <linux/pagemap.h>
13#include <linux/smp_lock.h>
14#include <linux/slab.h>
15#include <linux/err.h>
16#include <linux/sysctl.h>
17#include <asm/mman.h>
18#include <asm/tlb.h>
19#include <asm/tlbflush.h>
20
21static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
22{
23 pgd_t *pgd;
24 pud_t *pud;
25 pmd_t *pmd = NULL;
26
27 pgd = pgd_offset(mm, addr);
28 pud = pud_alloc(mm, pgd, addr);
29 pmd = pmd_alloc(mm, pud, addr);
30 return (pte_t *) pmd;
31}
32
33static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
34{
35 pgd_t *pgd;
36 pud_t *pud;
37 pmd_t *pmd = NULL;
38
39 pgd = pgd_offset(mm, addr);
40 pud = pud_offset(pgd, addr);
41 pmd = pmd_offset(pud, addr);
42 return (pte_t *) pmd;
43}
44
45static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access)
46{
47 pte_t entry;
48
49 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
50 if (write_access) {
51 entry =
52 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
53 } else
54 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
55 entry = pte_mkyoung(entry);
56 mk_pte_huge(entry);
57 set_pte(page_table, entry);
58}
59
60/*
61 * This function checks for proper alignment of input addr and len parameters.
62 */
63int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
64{
65 if (len & ~HPAGE_MASK)
66 return -EINVAL;
67 if (addr & ~HPAGE_MASK)
68 return -EINVAL;
69 return 0;
70}
71
72int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
73 struct vm_area_struct *vma)
74{
75 pte_t *src_pte, *dst_pte, entry;
76 struct page *ptepage;
77 unsigned long addr = vma->vm_start;
78 unsigned long end = vma->vm_end;
79
80 while (addr < end) {
81 dst_pte = huge_pte_alloc(dst, addr);
82 if (!dst_pte)
83 goto nomem;
84 src_pte = huge_pte_offset(src, addr);
85 entry = *src_pte;
86 ptepage = pte_page(entry);
87 get_page(ptepage);
88 set_pte(dst_pte, entry);
89 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
90 addr += HPAGE_SIZE;
91 }
92 return 0;
93
94nomem:
95 return -ENOMEM;
96}
97
98int
99follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
100 struct page **pages, struct vm_area_struct **vmas,
101 unsigned long *position, int *length, int i)
102{
103 unsigned long vpfn, vaddr = *position;
104 int remainder = *length;
105
106 WARN_ON(!is_vm_hugetlb_page(vma));
107
108 vpfn = vaddr/PAGE_SIZE;
109 while (vaddr < vma->vm_end && remainder) {
110
111 if (pages) {
112 pte_t *pte;
113 struct page *page;
114
115 pte = huge_pte_offset(mm, vaddr);
116
117 /* hugetlb should be locked, and hence, prefaulted */
118 WARN_ON(!pte || pte_none(*pte));
119
120 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
121
122 WARN_ON(!PageCompound(page));
123
124 get_page(page);
125 pages[i] = page;
126 }
127
128 if (vmas)
129 vmas[i] = vma;
130
131 vaddr += PAGE_SIZE;
132 ++vpfn;
133 --remainder;
134 ++i;
135 }
136
137 *length = remainder;
138 *position = vaddr;
139
140 return i;
141}
142
143#if 0 /* This is just for testing */
144struct page *
145follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
146{
147 unsigned long start = address;
148 int length = 1;
149 int nr;
150 struct page *page;
151 struct vm_area_struct *vma;
152
153 vma = find_vma(mm, addr);
154 if (!vma || !is_vm_hugetlb_page(vma))
155 return ERR_PTR(-EINVAL);
156
157 pte = huge_pte_offset(mm, address);
158
159 /* hugetlb should be locked, and hence, prefaulted */
160 WARN_ON(!pte || pte_none(*pte));
161
162 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
163
164 WARN_ON(!PageCompound(page));
165
166 return page;
167}
168
169int pmd_huge(pmd_t pmd)
170{
171 return 0;
172}
173
174struct page *
175follow_huge_pmd(struct mm_struct *mm, unsigned long address,
176 pmd_t *pmd, int write)
177{
178 return NULL;
179}
180
181#else
182
183struct page *
184follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
185{
186 return ERR_PTR(-EINVAL);
187}
188
189int pmd_huge(pmd_t pmd)
190{
191 return !!(pmd_val(pmd) & _PAGE_PSE);
192}
193
194struct page *
195follow_huge_pmd(struct mm_struct *mm, unsigned long address,
196 pmd_t *pmd, int write)
197{
198 struct page *page;
199
200 page = pte_page(*(pte_t *)pmd);
201 if (page)
202 page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
203 return page;
204}
205#endif
206
207void unmap_hugepage_range(struct vm_area_struct *vma,
208 unsigned long start, unsigned long end)
209{
210 struct mm_struct *mm = vma->vm_mm;
211 unsigned long address;
212 pte_t pte, *ptep;
213 struct page *page;
214
215 BUG_ON(start & (HPAGE_SIZE - 1));
216 BUG_ON(end & (HPAGE_SIZE - 1));
217
218 for (address = start; address < end; address += HPAGE_SIZE) {
219 ptep = huge_pte_offset(mm, address);
220 if (!ptep)
221 continue;
222 pte = ptep_get_and_clear(mm, address, ptep);
223 if (pte_none(pte))
224 continue;
225 page = pte_page(pte);
226 put_page(page);
227 }
228 add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT));
229 flush_tlb_range(vma, start, end);
230}
231
232int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
233{
234 struct mm_struct *mm = current->mm;
235 unsigned long addr;
236 int ret = 0;
237
238 BUG_ON(vma->vm_start & ~HPAGE_MASK);
239 BUG_ON(vma->vm_end & ~HPAGE_MASK);
240
241 spin_lock(&mm->page_table_lock);
242 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
243 unsigned long idx;
244 pte_t *pte = huge_pte_alloc(mm, addr);
245 struct page *page;
246
247 if (!pte) {
248 ret = -ENOMEM;
249 goto out;
250 }
251
252 if (!pte_none(*pte)) {
253 pmd_t *pmd = (pmd_t *) pte;
254
255 page = pmd_page(*pmd);
256 pmd_clear(pmd);
257 mm->nr_ptes--;
258 dec_page_state(nr_page_table_pages);
259 page_cache_release(page);
260 }
261
262 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
263 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
264 page = find_get_page(mapping, idx);
265 if (!page) {
266 /* charge the fs quota first */
267 if (hugetlb_get_quota(mapping)) {
268 ret = -ENOMEM;
269 goto out;
270 }
271 page = alloc_huge_page();
272 if (!page) {
273 hugetlb_put_quota(mapping);
274 ret = -ENOMEM;
275 goto out;
276 }
277 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
278 if (! ret) {
279 unlock_page(page);
280 } else {
281 hugetlb_put_quota(mapping);
282 free_huge_page(page);
283 goto out;
284 }
285 }
286 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
287 }
288out:
289 spin_unlock(&mm->page_table_lock);
290 return ret;
291}
292
293/* x86_64 also uses this file */
294
295#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
296static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
297 unsigned long addr, unsigned long len,
298 unsigned long pgoff, unsigned long flags)
299{
300 struct mm_struct *mm = current->mm;
301 struct vm_area_struct *vma;
302 unsigned long start_addr;
303
304 start_addr = mm->free_area_cache;
305
306full_search:
307 addr = ALIGN(start_addr, HPAGE_SIZE);
308
309 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
310 /* At this point: (!vma || addr < vma->vm_end). */
311 if (TASK_SIZE - len < addr) {
312 /*
313 * Start a new search - just in case we missed
314 * some holes.
315 */
316 if (start_addr != TASK_UNMAPPED_BASE) {
317 start_addr = TASK_UNMAPPED_BASE;
318 goto full_search;
319 }
320 return -ENOMEM;
321 }
322 if (!vma || addr + len <= vma->vm_start) {
323 mm->free_area_cache = addr + len;
324 return addr;
325 }
326 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
327 }
328}
329
330static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
331 unsigned long addr0, unsigned long len,
332 unsigned long pgoff, unsigned long flags)
333{
334 struct mm_struct *mm = current->mm;
335 struct vm_area_struct *vma, *prev_vma;
336 unsigned long base = mm->mmap_base, addr = addr0;
337 int first_time = 1;
338
339 /* don't allow allocations above current base */
340 if (mm->free_area_cache > base)
341 mm->free_area_cache = base;
342
343try_again:
344 /* make sure it can fit in the remaining address space */
345 if (mm->free_area_cache < len)
346 goto fail;
347
348 /* either no address requested or cant fit in requested address hole */
349 addr = (mm->free_area_cache - len) & HPAGE_MASK;
350 do {
351 /*
352 * Lookup failure means no vma is above this address,
353 * i.e. return with success:
354 */
355 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
356 return addr;
357
358 /*
359 * new region fits between prev_vma->vm_end and
360 * vma->vm_start, use it:
361 */
362 if (addr + len <= vma->vm_start &&
363 (!prev_vma || (addr >= prev_vma->vm_end)))
364 /* remember the address as a hint for next time */
365 return (mm->free_area_cache = addr);
366 else
367 /* pull free_area_cache down to the first hole */
368 if (mm->free_area_cache == vma->vm_end)
369 mm->free_area_cache = vma->vm_start;
370
371 /* try just below the current vma->vm_start */
372 addr = (vma->vm_start - len) & HPAGE_MASK;
373 } while (len <= vma->vm_start);
374
375fail:
376 /*
377 * if hint left us with no space for the requested
378 * mapping then try again:
379 */
380 if (first_time) {
381 mm->free_area_cache = base;
382 first_time = 0;
383 goto try_again;
384 }
385 /*
386 * A failed mmap() very likely causes application failure,
387 * so fall back to the bottom-up function here. This scenario
388 * can happen with large stack limits and large mmap()
389 * allocations.
390 */
391 mm->free_area_cache = TASK_UNMAPPED_BASE;
392 addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
393 len, pgoff, flags);
394
395 /*
396 * Restore the topdown base:
397 */
398 mm->free_area_cache = base;
399
400 return addr;
401}
402
403unsigned long
404hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
405 unsigned long len, unsigned long pgoff, unsigned long flags)
406{
407 struct mm_struct *mm = current->mm;
408 struct vm_area_struct *vma;
409
410 if (len & ~HPAGE_MASK)
411 return -EINVAL;
412 if (len > TASK_SIZE)
413 return -ENOMEM;
414
415 if (addr) {
416 addr = ALIGN(addr, HPAGE_SIZE);
417 vma = find_vma(mm, addr);
418 if (TASK_SIZE - len >= addr &&
419 (!vma || addr + len <= vma->vm_start))
420 return addr;
421 }
422 if (mm->get_unmapped_area == arch_get_unmapped_area)
423 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
424 pgoff, flags);
425 else
426 return hugetlb_get_unmapped_area_topdown(file, addr, len,
427 pgoff, flags);
428}
429
430#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
431
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
new file mode 100644
index 000000000000..7a7ea3737265
--- /dev/null
+++ b/arch/i386/mm/init.c
@@ -0,0 +1,696 @@
1/*
2 * linux/arch/i386/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 */
8
9#include <linux/config.h>
10#include <linux/module.h>
11#include <linux/signal.h>
12#include <linux/sched.h>
13#include <linux/kernel.h>
14#include <linux/errno.h>
15#include <linux/string.h>
16#include <linux/types.h>
17#include <linux/ptrace.h>
18#include <linux/mman.h>
19#include <linux/mm.h>
20#include <linux/hugetlb.h>
21#include <linux/swap.h>
22#include <linux/smp.h>
23#include <linux/init.h>
24#include <linux/highmem.h>
25#include <linux/pagemap.h>
26#include <linux/bootmem.h>
27#include <linux/slab.h>
28#include <linux/proc_fs.h>
29#include <linux/efi.h>
30
31#include <asm/processor.h>
32#include <asm/system.h>
33#include <asm/uaccess.h>
34#include <asm/pgtable.h>
35#include <asm/dma.h>
36#include <asm/fixmap.h>
37#include <asm/e820.h>
38#include <asm/apic.h>
39#include <asm/tlb.h>
40#include <asm/tlbflush.h>
41#include <asm/sections.h>
42
43unsigned int __VMALLOC_RESERVE = 128 << 20;
44
45DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
46unsigned long highstart_pfn, highend_pfn;
47
48static int noinline do_test_wp_bit(void);
49
50/*
51 * Creates a middle page table and puts a pointer to it in the
52 * given global directory entry. This only returns the gd entry
53 * in non-PAE compilation mode, since the middle layer is folded.
54 */
55static pmd_t * __init one_md_table_init(pgd_t *pgd)
56{
57 pud_t *pud;
58 pmd_t *pmd_table;
59
60#ifdef CONFIG_X86_PAE
61 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
62 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
63 pud = pud_offset(pgd, 0);
64 if (pmd_table != pmd_offset(pud, 0))
65 BUG();
66#else
67 pud = pud_offset(pgd, 0);
68 pmd_table = pmd_offset(pud, 0);
69#endif
70
71 return pmd_table;
72}
73
74/*
75 * Create a page table and place a pointer to it in a middle page
76 * directory entry.
77 */
78static pte_t * __init one_page_table_init(pmd_t *pmd)
79{
80 if (pmd_none(*pmd)) {
81 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
82 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
83 if (page_table != pte_offset_kernel(pmd, 0))
84 BUG();
85
86 return page_table;
87 }
88
89 return pte_offset_kernel(pmd, 0);
90}
91
92/*
93 * This function initializes a certain range of kernel virtual memory
94 * with new bootmem page tables, everywhere page tables are missing in
95 * the given range.
96 */
97
98/*
99 * NOTE: The pagetables are allocated contiguous on the physical space
100 * so we can cache the place of the first one and move around without
101 * checking the pgd every time.
102 */
103static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
104{
105 pgd_t *pgd;
106 pud_t *pud;
107 pmd_t *pmd;
108 int pgd_idx, pmd_idx;
109 unsigned long vaddr;
110
111 vaddr = start;
112 pgd_idx = pgd_index(vaddr);
113 pmd_idx = pmd_index(vaddr);
114 pgd = pgd_base + pgd_idx;
115
116 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
117 if (pgd_none(*pgd))
118 one_md_table_init(pgd);
119 pud = pud_offset(pgd, vaddr);
120 pmd = pmd_offset(pud, vaddr);
121 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
122 if (pmd_none(*pmd))
123 one_page_table_init(pmd);
124
125 vaddr += PMD_SIZE;
126 }
127 pmd_idx = 0;
128 }
129}
130
131static inline int is_kernel_text(unsigned long addr)
132{
133 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
134 return 1;
135 return 0;
136}
137
138/*
139 * This maps the physical memory to kernel virtual address space, a total
140 * of max_low_pfn pages, by creating page tables starting from address
141 * PAGE_OFFSET.
142 */
143static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
144{
145 unsigned long pfn;
146 pgd_t *pgd;
147 pmd_t *pmd;
148 pte_t *pte;
149 int pgd_idx, pmd_idx, pte_ofs;
150
151 pgd_idx = pgd_index(PAGE_OFFSET);
152 pgd = pgd_base + pgd_idx;
153 pfn = 0;
154
155 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
156 pmd = one_md_table_init(pgd);
157 if (pfn >= max_low_pfn)
158 continue;
159 for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
160 unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
161
162 /* Map with big pages if possible, otherwise create normal page tables. */
163 if (cpu_has_pse) {
164 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
165
166 if (is_kernel_text(address) || is_kernel_text(address2))
167 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
168 else
169 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
170 pfn += PTRS_PER_PTE;
171 } else {
172 pte = one_page_table_init(pmd);
173
174 for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
175 if (is_kernel_text(address))
176 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
177 else
178 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
179 }
180 }
181 }
182 }
183}
184
185static inline int page_kills_ppro(unsigned long pagenr)
186{
187 if (pagenr >= 0x70000 && pagenr <= 0x7003F)
188 return 1;
189 return 0;
190}
191
192extern int is_available_memory(efi_memory_desc_t *);
193
194static inline int page_is_ram(unsigned long pagenr)
195{
196 int i;
197 unsigned long addr, end;
198
199 if (efi_enabled) {
200 efi_memory_desc_t *md;
201
202 for (i = 0; i < memmap.nr_map; i++) {
203 md = &memmap.map[i];
204 if (!is_available_memory(md))
205 continue;
206 addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
207 end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
208
209 if ((pagenr >= addr) && (pagenr < end))
210 return 1;
211 }
212 return 0;
213 }
214
215 for (i = 0; i < e820.nr_map; i++) {
216
217 if (e820.map[i].type != E820_RAM) /* not usable memory */
218 continue;
219 /*
220 * !!!FIXME!!! Some BIOSen report areas as RAM that
221 * are not. Notably the 640->1Mb area. We need a sanity
222 * check here.
223 */
224 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
225 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
226 if ((pagenr >= addr) && (pagenr < end))
227 return 1;
228 }
229 return 0;
230}
231
232#ifdef CONFIG_HIGHMEM
233pte_t *kmap_pte;
234pgprot_t kmap_prot;
235
236#define kmap_get_fixmap_pte(vaddr) \
237 pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
238
239static void __init kmap_init(void)
240{
241 unsigned long kmap_vstart;
242
243 /* cache the first kmap pte */
244 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
245 kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
246
247 kmap_prot = PAGE_KERNEL;
248}
249
250static void __init permanent_kmaps_init(pgd_t *pgd_base)
251{
252 pgd_t *pgd;
253 pud_t *pud;
254 pmd_t *pmd;
255 pte_t *pte;
256 unsigned long vaddr;
257
258 vaddr = PKMAP_BASE;
259 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
260
261 pgd = swapper_pg_dir + pgd_index(vaddr);
262 pud = pud_offset(pgd, vaddr);
263 pmd = pmd_offset(pud, vaddr);
264 pte = pte_offset_kernel(pmd, vaddr);
265 pkmap_page_table = pte;
266}
267
268void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
269{
270 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
271 ClearPageReserved(page);
272 set_bit(PG_highmem, &page->flags);
273 set_page_count(page, 1);
274 __free_page(page);
275 totalhigh_pages++;
276 } else
277 SetPageReserved(page);
278}
279
280#ifndef CONFIG_DISCONTIGMEM
281static void __init set_highmem_pages_init(int bad_ppro)
282{
283 int pfn;
284 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
285 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
286 totalram_pages += totalhigh_pages;
287}
288#else
289extern void set_highmem_pages_init(int);
290#endif /* !CONFIG_DISCONTIGMEM */
291
292#else
293#define kmap_init() do { } while (0)
294#define permanent_kmaps_init(pgd_base) do { } while (0)
295#define set_highmem_pages_init(bad_ppro) do { } while (0)
296#endif /* CONFIG_HIGHMEM */
297
298unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
299unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
300
301#ifndef CONFIG_DISCONTIGMEM
302#define remap_numa_kva() do {} while (0)
303#else
304extern void __init remap_numa_kva(void);
305#endif
306
307static void __init pagetable_init (void)
308{
309 unsigned long vaddr;
310 pgd_t *pgd_base = swapper_pg_dir;
311
312#ifdef CONFIG_X86_PAE
313 int i;
314 /* Init entries of the first-level page table to the zero page */
315 for (i = 0; i < PTRS_PER_PGD; i++)
316 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
317#endif
318
319 /* Enable PSE if available */
320 if (cpu_has_pse) {
321 set_in_cr4(X86_CR4_PSE);
322 }
323
324 /* Enable PGE if available */
325 if (cpu_has_pge) {
326 set_in_cr4(X86_CR4_PGE);
327 __PAGE_KERNEL |= _PAGE_GLOBAL;
328 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
329 }
330
331 kernel_physical_mapping_init(pgd_base);
332 remap_numa_kva();
333
334 /*
335 * Fixed mappings, only the page table structure has to be
336 * created - mappings will be set by set_fixmap():
337 */
338 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
339 page_table_range_init(vaddr, 0, pgd_base);
340
341 permanent_kmaps_init(pgd_base);
342
343#ifdef CONFIG_X86_PAE
344 /*
345 * Add low memory identity-mappings - SMP needs it when
346 * starting up on an AP from real-mode. In the non-PAE
347 * case we already have these mappings through head.S.
348 * All user-space mappings are explicitly cleared after
349 * SMP startup.
350 */
351 pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
352#endif
353}
354
355#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
356/*
357 * Swap suspend & friends need this for resume because things like the intel-agp
358 * driver might have split up a kernel 4MB mapping.
359 */
360char __nosavedata swsusp_pg_dir[PAGE_SIZE]
361 __attribute__ ((aligned (PAGE_SIZE)));
362
363static inline void save_pg_dir(void)
364{
365 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
366}
367#else
368static inline void save_pg_dir(void)
369{
370}
371#endif
372
373void zap_low_mappings (void)
374{
375 int i;
376
377 save_pg_dir();
378
379 /*
380 * Zap initial low-memory mappings.
381 *
382 * Note that "pgd_clear()" doesn't do it for
383 * us, because pgd_clear() is a no-op on i386.
384 */
385 for (i = 0; i < USER_PTRS_PER_PGD; i++)
386#ifdef CONFIG_X86_PAE
387 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
388#else
389 set_pgd(swapper_pg_dir+i, __pgd(0));
390#endif
391 flush_tlb_all();
392}
393
394static int disable_nx __initdata = 0;
395u64 __supported_pte_mask = ~_PAGE_NX;
396
397/*
398 * noexec = on|off
399 *
400 * Control non executable mappings.
401 *
402 * on Enable
403 * off Disable
404 */
405void __init noexec_setup(const char *str)
406{
407 if (!strncmp(str, "on",2) && cpu_has_nx) {
408 __supported_pte_mask |= _PAGE_NX;
409 disable_nx = 0;
410 } else if (!strncmp(str,"off",3)) {
411 disable_nx = 1;
412 __supported_pte_mask &= ~_PAGE_NX;
413 }
414}
415
416int nx_enabled = 0;
417#ifdef CONFIG_X86_PAE
418
419static void __init set_nx(void)
420{
421 unsigned int v[4], l, h;
422
423 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
424 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
425 if ((v[3] & (1 << 20)) && !disable_nx) {
426 rdmsr(MSR_EFER, l, h);
427 l |= EFER_NX;
428 wrmsr(MSR_EFER, l, h);
429 nx_enabled = 1;
430 __supported_pte_mask |= _PAGE_NX;
431 }
432 }
433}
434
435/*
436 * Enables/disables executability of a given kernel page and
437 * returns the previous setting.
438 */
439int __init set_kernel_exec(unsigned long vaddr, int enable)
440{
441 pte_t *pte;
442 int ret = 1;
443
444 if (!nx_enabled)
445 goto out;
446
447 pte = lookup_address(vaddr);
448 BUG_ON(!pte);
449
450 if (!pte_exec_kernel(*pte))
451 ret = 0;
452
453 if (enable)
454 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
455 else
456 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
457 __flush_tlb_all();
458out:
459 return ret;
460}
461
462#endif
463
464/*
465 * paging_init() sets up the page tables - note that the first 8MB are
466 * already mapped by head.S.
467 *
468 * This routines also unmaps the page at virtual kernel address 0, so
469 * that we can trap those pesky NULL-reference errors in the kernel.
470 */
471void __init paging_init(void)
472{
473#ifdef CONFIG_X86_PAE
474 set_nx();
475 if (nx_enabled)
476 printk("NX (Execute Disable) protection: active\n");
477#endif
478
479 pagetable_init();
480
481 load_cr3(swapper_pg_dir);
482
483#ifdef CONFIG_X86_PAE
484 /*
485 * We will bail out later - printk doesn't work right now so
486 * the user would just see a hanging kernel.
487 */
488 if (cpu_has_pae)
489 set_in_cr4(X86_CR4_PAE);
490#endif
491 __flush_tlb_all();
492
493 kmap_init();
494}
495
496/*
497 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
498 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
499 * used to involve black magic jumps to work around some nasty CPU bugs,
500 * but fortunately the switch to using exceptions got rid of all that.
501 */
502
503static void __init test_wp_bit(void)
504{
505 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
506
507 /* Any page-aligned address will do, the test is non-destructive */
508 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
509 boot_cpu_data.wp_works_ok = do_test_wp_bit();
510 clear_fixmap(FIX_WP_TEST);
511
512 if (!boot_cpu_data.wp_works_ok) {
513 printk("No.\n");
514#ifdef CONFIG_X86_WP_WORKS_OK
515 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
516#endif
517 } else {
518 printk("Ok.\n");
519 }
520}
521
522static void __init set_max_mapnr_init(void)
523{
524#ifdef CONFIG_HIGHMEM
525 num_physpages = highend_pfn;
526#else
527 num_physpages = max_low_pfn;
528#endif
529#ifndef CONFIG_DISCONTIGMEM
530 max_mapnr = num_physpages;
531#endif
532}
533
534static struct kcore_list kcore_mem, kcore_vmalloc;
535
536void __init mem_init(void)
537{
538 extern int ppro_with_ram_bug(void);
539 int codesize, reservedpages, datasize, initsize;
540 int tmp;
541 int bad_ppro;
542
543#ifndef CONFIG_DISCONTIGMEM
544 if (!mem_map)
545 BUG();
546#endif
547
548 bad_ppro = ppro_with_ram_bug();
549
550#ifdef CONFIG_HIGHMEM
551 /* check that fixmap and pkmap do not overlap */
552 if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
553 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
554 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
555 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
556 BUG();
557 }
558#endif
559
560 set_max_mapnr_init();
561
562#ifdef CONFIG_HIGHMEM
563 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
564#else
565 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
566#endif
567
568 /* this will put all low memory onto the freelists */
569 totalram_pages += free_all_bootmem();
570
571 reservedpages = 0;
572 for (tmp = 0; tmp < max_low_pfn; tmp++)
573 /*
574 * Only count reserved RAM pages
575 */
576 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
577 reservedpages++;
578
579 set_highmem_pages_init(bad_ppro);
580
581 codesize = (unsigned long) &_etext - (unsigned long) &_text;
582 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
583 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
584
585 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
586 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
587 VMALLOC_END-VMALLOC_START);
588
589 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
590 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
591 num_physpages << (PAGE_SHIFT-10),
592 codesize >> 10,
593 reservedpages << (PAGE_SHIFT-10),
594 datasize >> 10,
595 initsize >> 10,
596 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
597 );
598
599#ifdef CONFIG_X86_PAE
600 if (!cpu_has_pae)
601 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
602#endif
603 if (boot_cpu_data.wp_works_ok < 0)
604 test_wp_bit();
605
606 /*
607 * Subtle. SMP is doing it's boot stuff late (because it has to
608 * fork idle threads) - but it also needs low mappings for the
609 * protected-mode entry to work. We zap these entries only after
610 * the WP-bit has been tested.
611 */
612#ifndef CONFIG_SMP
613 zap_low_mappings();
614#endif
615}
616
617kmem_cache_t *pgd_cache;
618kmem_cache_t *pmd_cache;
619
620void __init pgtable_cache_init(void)
621{
622 if (PTRS_PER_PMD > 1) {
623 pmd_cache = kmem_cache_create("pmd",
624 PTRS_PER_PMD*sizeof(pmd_t),
625 PTRS_PER_PMD*sizeof(pmd_t),
626 0,
627 pmd_ctor,
628 NULL);
629 if (!pmd_cache)
630 panic("pgtable_cache_init(): cannot create pmd cache");
631 }
632 pgd_cache = kmem_cache_create("pgd",
633 PTRS_PER_PGD*sizeof(pgd_t),
634 PTRS_PER_PGD*sizeof(pgd_t),
635 0,
636 pgd_ctor,
637 PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
638 if (!pgd_cache)
639 panic("pgtable_cache_init(): Cannot create pgd cache");
640}
641
642/*
643 * This function cannot be __init, since exceptions don't work in that
644 * section. Put this after the callers, so that it cannot be inlined.
645 */
646static int noinline do_test_wp_bit(void)
647{
648 char tmp_reg;
649 int flag;
650
651 __asm__ __volatile__(
652 " movb %0,%1 \n"
653 "1: movb %1,%0 \n"
654 " xorl %2,%2 \n"
655 "2: \n"
656 ".section __ex_table,\"a\"\n"
657 " .align 4 \n"
658 " .long 1b,2b \n"
659 ".previous \n"
660 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
661 "=q" (tmp_reg),
662 "=r" (flag)
663 :"2" (1)
664 :"memory");
665
666 return flag;
667}
668
669void free_initmem(void)
670{
671 unsigned long addr;
672
673 addr = (unsigned long)(&__init_begin);
674 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
675 ClearPageReserved(virt_to_page(addr));
676 set_page_count(virt_to_page(addr), 1);
677 memset((void *)addr, 0xcc, PAGE_SIZE);
678 free_page(addr);
679 totalram_pages++;
680 }
681 printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
682}
683
684#ifdef CONFIG_BLK_DEV_INITRD
685void free_initrd_mem(unsigned long start, unsigned long end)
686{
687 if (start < end)
688 printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
689 for (; start < end; start += PAGE_SIZE) {
690 ClearPageReserved(virt_to_page(start));
691 set_page_count(virt_to_page(start), 1);
692 free_page(start);
693 totalram_pages++;
694 }
695}
696#endif
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
new file mode 100644
index 000000000000..db06f7399913
--- /dev/null
+++ b/arch/i386/mm/ioremap.c
@@ -0,0 +1,320 @@
1/*
2 * arch/i386/mm/ioremap.c
3 *
4 * Re-map IO memory to kernel address space so that we can access it.
5 * This is needed for high PCI addresses that aren't mapped in the
6 * 640k-1MB IO memory area on PC's
7 *
8 * (C) Copyright 1995 1996 Linus Torvalds
9 */
10
11#include <linux/vmalloc.h>
12#include <linux/init.h>
13#include <linux/slab.h>
14#include <asm/io.h>
15#include <asm/fixmap.h>
16#include <asm/cacheflush.h>
17#include <asm/tlbflush.h>
18#include <asm/pgtable.h>
19
20#define ISA_START_ADDRESS 0xa0000
21#define ISA_END_ADDRESS 0x100000
22
23static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
24 unsigned long end, unsigned long phys_addr, unsigned long flags)
25{
26 pte_t *pte;
27 unsigned long pfn;
28
29 pfn = phys_addr >> PAGE_SHIFT;
30 pte = pte_alloc_kernel(&init_mm, pmd, addr);
31 if (!pte)
32 return -ENOMEM;
33 do {
34 BUG_ON(!pte_none(*pte));
35 set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW |
36 _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
37 pfn++;
38 } while (pte++, addr += PAGE_SIZE, addr != end);
39 return 0;
40}
41
42static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
43 unsigned long end, unsigned long phys_addr, unsigned long flags)
44{
45 pmd_t *pmd;
46 unsigned long next;
47
48 phys_addr -= addr;
49 pmd = pmd_alloc(&init_mm, pud, addr);
50 if (!pmd)
51 return -ENOMEM;
52 do {
53 next = pmd_addr_end(addr, end);
54 if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags))
55 return -ENOMEM;
56 } while (pmd++, addr = next, addr != end);
57 return 0;
58}
59
60static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
61 unsigned long end, unsigned long phys_addr, unsigned long flags)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 phys_addr -= addr;
67 pud = pud_alloc(&init_mm, pgd, addr);
68 if (!pud)
69 return -ENOMEM;
70 do {
71 next = pud_addr_end(addr, end);
72 if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags))
73 return -ENOMEM;
74 } while (pud++, addr = next, addr != end);
75 return 0;
76}
77
78static int ioremap_page_range(unsigned long addr,
79 unsigned long end, unsigned long phys_addr, unsigned long flags)
80{
81 pgd_t *pgd;
82 unsigned long next;
83 int err;
84
85 BUG_ON(addr >= end);
86 flush_cache_all();
87 phys_addr -= addr;
88 pgd = pgd_offset_k(addr);
89 spin_lock(&init_mm.page_table_lock);
90 do {
91 next = pgd_addr_end(addr, end);
92 err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
93 if (err)
94 break;
95 } while (pgd++, addr = next, addr != end);
96 spin_unlock(&init_mm.page_table_lock);
97 flush_tlb_all();
98 return err;
99}
100
101/*
102 * Generic mapping function (not visible outside):
103 */
104
105/*
106 * Remap an arbitrary physical address space into the kernel virtual
107 * address space. Needed when the kernel wants to access high addresses
108 * directly.
109 *
110 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
111 * have to convert them into an offset in a page-aligned mapping, but the
112 * caller shouldn't need to know that small detail.
113 */
114void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
115{
116 void __iomem * addr;
117 struct vm_struct * area;
118 unsigned long offset, last_addr;
119
120 /* Don't allow wraparound or zero size */
121 last_addr = phys_addr + size - 1;
122 if (!size || last_addr < phys_addr)
123 return NULL;
124
125 /*
126 * Don't remap the low PCI/ISA area, it's always mapped..
127 */
128 if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
129 return (void __iomem *) phys_to_virt(phys_addr);
130
131 /*
132 * Don't allow anybody to remap normal RAM that we're using..
133 */
134 if (phys_addr <= virt_to_phys(high_memory - 1)) {
135 char *t_addr, *t_end;
136 struct page *page;
137
138 t_addr = __va(phys_addr);
139 t_end = t_addr + (size - 1);
140
141 for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
142 if(!PageReserved(page))
143 return NULL;
144 }
145
146 /*
147 * Mappings have to be page-aligned
148 */
149 offset = phys_addr & ~PAGE_MASK;
150 phys_addr &= PAGE_MASK;
151 size = PAGE_ALIGN(last_addr+1) - phys_addr;
152
153 /*
154 * Ok, go for it..
155 */
156 area = get_vm_area(size, VM_IOREMAP | (flags << 20));
157 if (!area)
158 return NULL;
159 area->phys_addr = phys_addr;
160 addr = (void __iomem *) area->addr;
161 if (ioremap_page_range((unsigned long) addr,
162 (unsigned long) addr + size, phys_addr, flags)) {
163 vunmap((void __force *) addr);
164 return NULL;
165 }
166 return (void __iomem *) (offset + (char __iomem *)addr);
167}
168
169
170/**
171 * ioremap_nocache - map bus memory into CPU space
172 * @offset: bus address of the memory
173 * @size: size of the resource to map
174 *
175 * ioremap_nocache performs a platform specific sequence of operations to
176 * make bus memory CPU accessible via the readb/readw/readl/writeb/
177 * writew/writel functions and the other mmio helpers. The returned
178 * address is not guaranteed to be usable directly as a virtual
179 * address.
180 *
181 * This version of ioremap ensures that the memory is marked uncachable
182 * on the CPU as well as honouring existing caching rules from things like
183 * the PCI bus. Note that there are other caches and buffers on many
184 * busses. In particular driver authors should read up on PCI writes
185 *
186 * It's useful if some control registers are in such an area and
187 * write combining or read caching is not desirable:
188 *
189 * Must be freed with iounmap.
190 */
191
192void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
193{
194 unsigned long last_addr;
195 void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
196 if (!p)
197 return p;
198
199 /* Guaranteed to be > phys_addr, as per __ioremap() */
200 last_addr = phys_addr + size - 1;
201
202 if (last_addr < virt_to_phys(high_memory) - 1) {
203 struct page *ppage = virt_to_page(__va(phys_addr));
204 unsigned long npages;
205
206 phys_addr &= PAGE_MASK;
207
208 /* This might overflow and become zero.. */
209 last_addr = PAGE_ALIGN(last_addr);
210
211 /* .. but that's ok, because modulo-2**n arithmetic will make
212 * the page-aligned "last - first" come out right.
213 */
214 npages = (last_addr - phys_addr) >> PAGE_SHIFT;
215
216 if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
217 iounmap(p);
218 p = NULL;
219 }
220 global_flush_tlb();
221 }
222
223 return p;
224}
225
226void iounmap(volatile void __iomem *addr)
227{
228 struct vm_struct *p;
229 if ((void __force *) addr <= high_memory)
230 return;
231
232 /*
233 * __ioremap special-cases the PCI/ISA range by not instantiating a
234 * vm_area and by simply returning an address into the kernel mapping
235 * of ISA space. So handle that here.
236 */
237 if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
238 addr < phys_to_virt(ISA_END_ADDRESS))
239 return;
240
241 p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
242 if (!p) {
243 printk("__iounmap: bad address %p\n", addr);
244 return;
245 }
246
247 if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
248 /* p->size includes the guard page, but cpa doesn't like that */
249 change_page_attr(virt_to_page(__va(p->phys_addr)),
250 p->size >> PAGE_SHIFT,
251 PAGE_KERNEL);
252 global_flush_tlb();
253 }
254 kfree(p);
255}
256
257void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
258{
259 unsigned long offset, last_addr;
260 unsigned int nrpages;
261 enum fixed_addresses idx;
262
263 /* Don't allow wraparound or zero size */
264 last_addr = phys_addr + size - 1;
265 if (!size || last_addr < phys_addr)
266 return NULL;
267
268 /*
269 * Don't remap the low PCI/ISA area, it's always mapped..
270 */
271 if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
272 return phys_to_virt(phys_addr);
273
274 /*
275 * Mappings have to be page-aligned
276 */
277 offset = phys_addr & ~PAGE_MASK;
278 phys_addr &= PAGE_MASK;
279 size = PAGE_ALIGN(last_addr) - phys_addr;
280
281 /*
282 * Mappings have to fit in the FIX_BTMAP area.
283 */
284 nrpages = size >> PAGE_SHIFT;
285 if (nrpages > NR_FIX_BTMAPS)
286 return NULL;
287
288 /*
289 * Ok, go for it..
290 */
291 idx = FIX_BTMAP_BEGIN;
292 while (nrpages > 0) {
293 set_fixmap(idx, phys_addr);
294 phys_addr += PAGE_SIZE;
295 --idx;
296 --nrpages;
297 }
298 return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
299}
300
301void __init bt_iounmap(void *addr, unsigned long size)
302{
303 unsigned long virt_addr;
304 unsigned long offset;
305 unsigned int nrpages;
306 enum fixed_addresses idx;
307
308 virt_addr = (unsigned long)addr;
309 if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
310 return;
311 offset = virt_addr & ~PAGE_MASK;
312 nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
313
314 idx = FIX_BTMAP_BEGIN;
315 while (nrpages > 0) {
316 clear_fixmap(idx);
317 --idx;
318 --nrpages;
319 }
320}
diff --git a/arch/i386/mm/mmap.c b/arch/i386/mm/mmap.c
new file mode 100644
index 000000000000..e4730a1a43dd
--- /dev/null
+++ b/arch/i386/mm/mmap.c
@@ -0,0 +1,76 @@
1/*
2 * linux/arch/i386/mm/mmap.c
3 *
4 * flexible mmap layout support
5 *
6 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
7 * All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 *
24 * Started by Ingo Molnar <mingo@elte.hu>
25 */
26
27#include <linux/personality.h>
28#include <linux/mm.h>
29#include <linux/random.h>
30
31/*
32 * Top of mmap area (just below the process stack).
33 *
34 * Leave an at least ~128 MB hole.
35 */
36#define MIN_GAP (128*1024*1024)
37#define MAX_GAP (TASK_SIZE/6*5)
38
39static inline unsigned long mmap_base(struct mm_struct *mm)
40{
41 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
42 unsigned long random_factor = 0;
43
44 if (current->flags & PF_RANDOMIZE)
45 random_factor = get_random_int() % (1024*1024);
46
47 if (gap < MIN_GAP)
48 gap = MIN_GAP;
49 else if (gap > MAX_GAP)
50 gap = MAX_GAP;
51
52 return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
53}
54
55/*
56 * This function, called very early during the creation of a new
57 * process VM image, sets up which VM layout function to use:
58 */
59void arch_pick_mmap_layout(struct mm_struct *mm)
60{
61 /*
62 * Fall back to the standard layout if the personality
63 * bit is set, or if the expected stack growth is unlimited:
64 */
65 if (sysctl_legacy_va_layout ||
66 (current->personality & ADDR_COMPAT_LAYOUT) ||
67 current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
68 mm->mmap_base = TASK_UNMAPPED_BASE;
69 mm->get_unmapped_area = arch_get_unmapped_area;
70 mm->unmap_area = arch_unmap_area;
71 } else {
72 mm->mmap_base = mmap_base(mm);
73 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
74 mm->unmap_area = arch_unmap_area_topdown;
75 }
76}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
new file mode 100644
index 000000000000..cb3da6baa704
--- /dev/null
+++ b/arch/i386/mm/pageattr.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
3 * Thanks to Ben LaHaise for precious feedback.
4 */
5
6#include <linux/config.h>
7#include <linux/mm.h>
8#include <linux/sched.h>
9#include <linux/highmem.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <asm/uaccess.h>
13#include <asm/processor.h>
14#include <asm/tlbflush.h>
15
16static DEFINE_SPINLOCK(cpa_lock);
17static struct list_head df_list = LIST_HEAD_INIT(df_list);
18
19
20pte_t *lookup_address(unsigned long address)
21{
22 pgd_t *pgd = pgd_offset_k(address);
23 pud_t *pud;
24 pmd_t *pmd;
25 if (pgd_none(*pgd))
26 return NULL;
27 pud = pud_offset(pgd, address);
28 if (pud_none(*pud))
29 return NULL;
30 pmd = pmd_offset(pud, address);
31 if (pmd_none(*pmd))
32 return NULL;
33 if (pmd_large(*pmd))
34 return (pte_t *)pmd;
35 return pte_offset_kernel(pmd, address);
36}
37
38static struct page *split_large_page(unsigned long address, pgprot_t prot)
39{
40 int i;
41 unsigned long addr;
42 struct page *base;
43 pte_t *pbase;
44
45 spin_unlock_irq(&cpa_lock);
46 base = alloc_pages(GFP_KERNEL, 0);
47 spin_lock_irq(&cpa_lock);
48 if (!base)
49 return NULL;
50
51 address = __pa(address);
52 addr = address & LARGE_PAGE_MASK;
53 pbase = (pte_t *)page_address(base);
54 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
55 pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
56 addr == address ? prot : PAGE_KERNEL);
57 }
58 return base;
59}
60
61static void flush_kernel_map(void *dummy)
62{
63 /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
64 if (boot_cpu_data.x86_model >= 4)
65 asm volatile("wbinvd":::"memory");
66 /* Flush all to work around Errata in early athlons regarding
67 * large page flushing.
68 */
69 __flush_tlb_all();
70}
71
72static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
73{
74 struct page *page;
75 unsigned long flags;
76
77 set_pte_atomic(kpte, pte); /* change init_mm */
78 if (PTRS_PER_PMD > 1)
79 return;
80
81 spin_lock_irqsave(&pgd_lock, flags);
82 for (page = pgd_list; page; page = (struct page *)page->index) {
83 pgd_t *pgd;
84 pud_t *pud;
85 pmd_t *pmd;
86 pgd = (pgd_t *)page_address(page) + pgd_index(address);
87 pud = pud_offset(pgd, address);
88 pmd = pmd_offset(pud, address);
89 set_pte_atomic((pte_t *)pmd, pte);
90 }
91 spin_unlock_irqrestore(&pgd_lock, flags);
92}
93
94/*
95 * No more special protections in this 2/4MB area - revert to a
96 * large page again.
97 */
98static inline void revert_page(struct page *kpte_page, unsigned long address)
99{
100 pte_t *linear = (pte_t *)
101 pmd_offset(pud_offset(pgd_offset_k(address), address), address);
102 set_pmd_pte(linear, address,
103 pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
104 PAGE_KERNEL_LARGE));
105}
106
107static int
108__change_page_attr(struct page *page, pgprot_t prot)
109{
110 pte_t *kpte;
111 unsigned long address;
112 struct page *kpte_page;
113
114 BUG_ON(PageHighMem(page));
115 address = (unsigned long)page_address(page);
116
117 kpte = lookup_address(address);
118 if (!kpte)
119 return -EINVAL;
120 kpte_page = virt_to_page(kpte);
121 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
122 if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
123 set_pte_atomic(kpte, mk_pte(page, prot));
124 } else {
125 struct page *split = split_large_page(address, prot);
126 if (!split)
127 return -ENOMEM;
128 set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
129 kpte_page = split;
130 }
131 get_page(kpte_page);
132 } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
133 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
134 __put_page(kpte_page);
135 } else
136 BUG();
137
138 /*
139 * If the pte was reserved, it means it was created at boot
140 * time (not via split_large_page) and in turn we must not
141 * replace it with a largepage.
142 */
143 if (!PageReserved(kpte_page)) {
144 /* memleak and potential failed 2M page regeneration */
145 BUG_ON(!page_count(kpte_page));
146
147 if (cpu_has_pse && (page_count(kpte_page) == 1)) {
148 list_add(&kpte_page->lru, &df_list);
149 revert_page(kpte_page, address);
150 }
151 }
152 return 0;
153}
154
155static inline void flush_map(void)
156{
157 on_each_cpu(flush_kernel_map, NULL, 1, 1);
158}
159
160/*
161 * Change the page attributes of an page in the linear mapping.
162 *
163 * This should be used when a page is mapped with a different caching policy
164 * than write-back somewhere - some CPUs do not like it when mappings with
165 * different caching policies exist. This changes the page attributes of the
166 * in kernel linear mapping too.
167 *
168 * The caller needs to ensure that there are no conflicting mappings elsewhere.
169 * This function only deals with the kernel linear map.
170 *
171 * Caller must call global_flush_tlb() after this.
172 */
173int change_page_attr(struct page *page, int numpages, pgprot_t prot)
174{
175 int err = 0;
176 int i;
177 unsigned long flags;
178
179 spin_lock_irqsave(&cpa_lock, flags);
180 for (i = 0; i < numpages; i++, page++) {
181 err = __change_page_attr(page, prot);
182 if (err)
183 break;
184 }
185 spin_unlock_irqrestore(&cpa_lock, flags);
186 return err;
187}
188
189void global_flush_tlb(void)
190{
191 LIST_HEAD(l);
192 struct page *pg, *next;
193
194 BUG_ON(irqs_disabled());
195
196 spin_lock_irq(&cpa_lock);
197 list_splice_init(&df_list, &l);
198 spin_unlock_irq(&cpa_lock);
199 flush_map();
200 list_for_each_entry_safe(pg, next, &l, lru)
201 __free_page(pg);
202}
203
204#ifdef CONFIG_DEBUG_PAGEALLOC
205void kernel_map_pages(struct page *page, int numpages, int enable)
206{
207 if (PageHighMem(page))
208 return;
209 /* the return value is ignored - the calls cannot fail,
210 * large pages are disabled at boot time.
211 */
212 change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
213 /* we should perform an IPI and flush all tlbs,
214 * but that can deadlock->flush only current cpu.
215 */
216 __flush_tlb_all();
217}
218#endif
219
220EXPORT_SYMBOL(change_page_attr);
221EXPORT_SYMBOL(global_flush_tlb);
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
new file mode 100644
index 000000000000..0742d54f8bb0
--- /dev/null
+++ b/arch/i386/mm/pgtable.c
@@ -0,0 +1,260 @@
1/*
2 * linux/arch/i386/mm/pgtable.c
3 */
4
5#include <linux/config.h>
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/mm.h>
10#include <linux/swap.h>
11#include <linux/smp.h>
12#include <linux/highmem.h>
13#include <linux/slab.h>
14#include <linux/pagemap.h>
15#include <linux/spinlock.h>
16
17#include <asm/system.h>
18#include <asm/pgtable.h>
19#include <asm/pgalloc.h>
20#include <asm/fixmap.h>
21#include <asm/e820.h>
22#include <asm/tlb.h>
23#include <asm/tlbflush.h>
24
25void show_mem(void)
26{
27 int total = 0, reserved = 0;
28 int shared = 0, cached = 0;
29 int highmem = 0;
30 struct page *page;
31 pg_data_t *pgdat;
32 unsigned long i;
33
34 printk("Mem-info:\n");
35 show_free_areas();
36 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
37 for_each_pgdat(pgdat) {
38 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
39 page = pgdat->node_mem_map + i;
40 total++;
41 if (PageHighMem(page))
42 highmem++;
43 if (PageReserved(page))
44 reserved++;
45 else if (PageSwapCache(page))
46 cached++;
47 else if (page_count(page))
48 shared += page_count(page) - 1;
49 }
50 }
51 printk("%d pages of RAM\n", total);
52 printk("%d pages of HIGHMEM\n",highmem);
53 printk("%d reserved pages\n",reserved);
54 printk("%d pages shared\n",shared);
55 printk("%d pages swap cached\n",cached);
56}
57
58/*
59 * Associate a virtual page frame with a given physical page frame
60 * and protection flags for that frame.
61 */
62static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
63{
64 pgd_t *pgd;
65 pud_t *pud;
66 pmd_t *pmd;
67 pte_t *pte;
68
69 pgd = swapper_pg_dir + pgd_index(vaddr);
70 if (pgd_none(*pgd)) {
71 BUG();
72 return;
73 }
74 pud = pud_offset(pgd, vaddr);
75 if (pud_none(*pud)) {
76 BUG();
77 return;
78 }
79 pmd = pmd_offset(pud, vaddr);
80 if (pmd_none(*pmd)) {
81 BUG();
82 return;
83 }
84 pte = pte_offset_kernel(pmd, vaddr);
85 /* <pfn,flags> stored as-is, to permit clearing entries */
86 set_pte(pte, pfn_pte(pfn, flags));
87
88 /*
89 * It's enough to flush this one mapping.
90 * (PGE mappings get flushed as well)
91 */
92 __flush_tlb_one(vaddr);
93}
94
95/*
96 * Associate a large virtual page frame with a given physical page frame
97 * and protection flags for that frame. pfn is for the base of the page,
98 * vaddr is what the page gets mapped to - both must be properly aligned.
99 * The pmd must already be instantiated. Assumes PAE mode.
100 */
101void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
102{
103 pgd_t *pgd;
104 pud_t *pud;
105 pmd_t *pmd;
106
107 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
108 printk ("set_pmd_pfn: vaddr misaligned\n");
109 return; /* BUG(); */
110 }
111 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
112 printk ("set_pmd_pfn: pfn misaligned\n");
113 return; /* BUG(); */
114 }
115 pgd = swapper_pg_dir + pgd_index(vaddr);
116 if (pgd_none(*pgd)) {
117 printk ("set_pmd_pfn: pgd_none\n");
118 return; /* BUG(); */
119 }
120 pud = pud_offset(pgd, vaddr);
121 pmd = pmd_offset(pud, vaddr);
122 set_pmd(pmd, pfn_pmd(pfn, flags));
123 /*
124 * It's enough to flush this one mapping.
125 * (PGE mappings get flushed as well)
126 */
127 __flush_tlb_one(vaddr);
128}
129
130void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
131{
132 unsigned long address = __fix_to_virt(idx);
133
134 if (idx >= __end_of_fixed_addresses) {
135 BUG();
136 return;
137 }
138 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
139}
140
141pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
142{
143 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
144}
145
146struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
147{
148 struct page *pte;
149
150#ifdef CONFIG_HIGHPTE
151 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
152#else
153 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
154#endif
155 return pte;
156}
157
158void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
159{
160 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
161}
162
163/*
164 * List of all pgd's needed for non-PAE so it can invalidate entries
165 * in both cached and uncached pgd's; not needed for PAE since the
166 * kernel pmd is shared. If PAE were not to share the pmd a similar
167 * tactic would be needed. This is essentially codepath-based locking
168 * against pageattr.c; it is the unique case in which a valid change
169 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
170 * vmalloc faults work because attached pagetables are never freed.
171 * The locking scheme was chosen on the basis of manfred's
172 * recommendations and having no core impact whatsoever.
173 * -- wli
174 */
175DEFINE_SPINLOCK(pgd_lock);
176struct page *pgd_list;
177
178static inline void pgd_list_add(pgd_t *pgd)
179{
180 struct page *page = virt_to_page(pgd);
181 page->index = (unsigned long)pgd_list;
182 if (pgd_list)
183 pgd_list->private = (unsigned long)&page->index;
184 pgd_list = page;
185 page->private = (unsigned long)&pgd_list;
186}
187
188static inline void pgd_list_del(pgd_t *pgd)
189{
190 struct page *next, **pprev, *page = virt_to_page(pgd);
191 next = (struct page *)page->index;
192 pprev = (struct page **)page->private;
193 *pprev = next;
194 if (next)
195 next->private = (unsigned long)pprev;
196}
197
198void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
199{
200 unsigned long flags;
201
202 if (PTRS_PER_PMD == 1)
203 spin_lock_irqsave(&pgd_lock, flags);
204
205 memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
206 swapper_pg_dir + USER_PTRS_PER_PGD,
207 (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
208
209 if (PTRS_PER_PMD > 1)
210 return;
211
212 pgd_list_add(pgd);
213 spin_unlock_irqrestore(&pgd_lock, flags);
214 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
215}
216
217/* never called when PTRS_PER_PMD > 1 */
218void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
219{
220 unsigned long flags; /* can be called from interrupt context */
221
222 spin_lock_irqsave(&pgd_lock, flags);
223 pgd_list_del(pgd);
224 spin_unlock_irqrestore(&pgd_lock, flags);
225}
226
227pgd_t *pgd_alloc(struct mm_struct *mm)
228{
229 int i;
230 pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
231
232 if (PTRS_PER_PMD == 1 || !pgd)
233 return pgd;
234
235 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
236 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
237 if (!pmd)
238 goto out_oom;
239 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
240 }
241 return pgd;
242
243out_oom:
244 for (i--; i >= 0; i--)
245 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
246 kmem_cache_free(pgd_cache, pgd);
247 return NULL;
248}
249
250void pgd_free(pgd_t *pgd)
251{
252 int i;
253
254 /* in the PAE case user pgd entries are overwritten before usage */
255 if (PTRS_PER_PMD > 1)
256 for (i = 0; i < USER_PTRS_PER_PGD; ++i)
257 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
258 /* in the non-PAE case, clear_page_range() clears user pgd entries */
259 kmem_cache_free(pgd_cache, pgd);
260}
diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig
new file mode 100644
index 000000000000..5ade19801b97
--- /dev/null
+++ b/arch/i386/oprofile/Kconfig
@@ -0,0 +1,23 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/i386/oprofile/Makefile b/arch/i386/oprofile/Makefile
new file mode 100644
index 000000000000..30f3eb366667
--- /dev/null
+++ b/arch/i386/oprofile/Makefile
@@ -0,0 +1,12 @@
1obj-$(CONFIG_OPROFILE) += oprofile.o
2
3DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \
7 timer_int.o )
8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
11 op_model_ppro.o op_model_p4.o
12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
new file mode 100644
index 000000000000..52d72e074f7f
--- /dev/null
+++ b/arch/i386/oprofile/backtrace.c
@@ -0,0 +1,111 @@
1/**
2 * @file backtrace.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon
8 * @author David Smith
9 */
10
11#include <linux/oprofile.h>
12#include <linux/sched.h>
13#include <linux/mm.h>
14#include <asm/ptrace.h>
15
16struct frame_head {
17 struct frame_head * ebp;
18 unsigned long ret;
19} __attribute__((packed));
20
21static struct frame_head *
22dump_backtrace(struct frame_head * head)
23{
24 oprofile_add_trace(head->ret);
25
26 /* frame pointers should strictly progress back up the stack
27 * (towards higher addresses) */
28 if (head >= head->ebp)
29 return NULL;
30
31 return head->ebp;
32}
33
34/* check that the page(s) containing the frame head are present */
35static int pages_present(struct frame_head * head)
36{
37 struct mm_struct * mm = current->mm;
38
39 /* FIXME: only necessary once per page */
40 if (!check_user_page_readable(mm, (unsigned long)head))
41 return 0;
42
43 return check_user_page_readable(mm, (unsigned long)(head + 1));
44}
45
46/*
47 * | | /\ Higher addresses
48 * | |
49 * --------------- stack base (address of current_thread_info)
50 * | thread info |
51 * . .
52 * | stack |
53 * --------------- saved regs->ebp value if valid (frame_head address)
54 * . .
55 * --------------- struct pt_regs stored on stack (struct pt_regs *)
56 * | |
57 * . .
58 * | |
59 * --------------- %esp
60 * | |
61 * | | \/ Lower addresses
62 *
63 * Thus, &pt_regs <-> stack base restricts the valid(ish) ebp values
64 */
65#ifdef CONFIG_FRAME_POINTER
66static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
67{
68 unsigned long headaddr = (unsigned long)head;
69 unsigned long stack = (unsigned long)regs;
70 unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
71
72 return headaddr > stack && headaddr < stack_base;
73}
74#else
75/* without fp, it's just junk */
76static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
77{
78 return 0;
79}
80#endif
81
82
83void
84x86_backtrace(struct pt_regs * const regs, unsigned int depth)
85{
86 struct frame_head *head;
87
88#ifdef CONFIG_X86_64
89 head = (struct frame_head *)regs->rbp;
90#else
91 head = (struct frame_head *)regs->ebp;
92#endif
93
94 if (!user_mode(regs)) {
95 while (depth-- && valid_kernel_stack(head, regs))
96 head = dump_backtrace(head);
97 return;
98 }
99
100#ifdef CONFIG_SMP
101 if (!spin_trylock(&current->mm->page_table_lock))
102 return;
103#endif
104
105 while (depth-- && head && pages_present(head))
106 head = dump_backtrace(head);
107
108#ifdef CONFIG_SMP
109 spin_unlock(&current->mm->page_table_lock);
110#endif
111}
diff --git a/arch/i386/oprofile/init.c b/arch/i386/oprofile/init.c
new file mode 100644
index 000000000000..c90332de582b
--- /dev/null
+++ b/arch/i386/oprofile/init.c
@@ -0,0 +1,48 @@
1/**
2 * @file init.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 */
9
10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/errno.h>
13
14/* We support CPUs that have performance counters like the Pentium Pro
15 * with the NMI mode driver.
16 */
17
18extern int nmi_init(struct oprofile_operations * ops);
19extern int nmi_timer_init(struct oprofile_operations * ops);
20extern void nmi_exit(void);
21extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
22
23
24int __init oprofile_arch_init(struct oprofile_operations * ops)
25{
26 int ret;
27
28 ret = -ENODEV;
29
30#ifdef CONFIG_X86_LOCAL_APIC
31 ret = nmi_init(ops);
32#endif
33#ifdef CONFIG_X86_IO_APIC
34 if (ret < 0)
35 ret = nmi_timer_init(ops);
36#endif
37 ops->backtrace = x86_backtrace;
38
39 return ret;
40}
41
42
43void oprofile_arch_exit(void)
44{
45#ifdef CONFIG_X86_LOCAL_APIC
46 nmi_exit();
47#endif
48}
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
new file mode 100644
index 000000000000..3492d961d3f1
--- /dev/null
+++ b/arch/i386/oprofile/nmi_int.c
@@ -0,0 +1,427 @@
1/**
2 * @file nmi_int.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 */
9
10#include <linux/init.h>
11#include <linux/notifier.h>
12#include <linux/smp.h>
13#include <linux/oprofile.h>
14#include <linux/sysdev.h>
15#include <linux/slab.h>
16#include <asm/nmi.h>
17#include <asm/msr.h>
18#include <asm/apic.h>
19
20#include "op_counter.h"
21#include "op_x86_model.h"
22
23static struct op_x86_model_spec const * model;
24static struct op_msrs cpu_msrs[NR_CPUS];
25static unsigned long saved_lvtpc[NR_CPUS];
26
27static int nmi_start(void);
28static void nmi_stop(void);
29
30/* 0 == registered but off, 1 == registered and on */
31static int nmi_enabled = 0;
32
33#ifdef CONFIG_PM
34
35static int nmi_suspend(struct sys_device *dev, u32 state)
36{
37 if (nmi_enabled == 1)
38 nmi_stop();
39 return 0;
40}
41
42
43static int nmi_resume(struct sys_device *dev)
44{
45 if (nmi_enabled == 1)
46 nmi_start();
47 return 0;
48}
49
50
51static struct sysdev_class oprofile_sysclass = {
52 set_kset_name("oprofile"),
53 .resume = nmi_resume,
54 .suspend = nmi_suspend,
55};
56
57
58static struct sys_device device_oprofile = {
59 .id = 0,
60 .cls = &oprofile_sysclass,
61};
62
63
64static int __init init_driverfs(void)
65{
66 int error;
67 if (!(error = sysdev_class_register(&oprofile_sysclass)))
68 error = sysdev_register(&device_oprofile);
69 return error;
70}
71
72
73static void exit_driverfs(void)
74{
75 sysdev_unregister(&device_oprofile);
76 sysdev_class_unregister(&oprofile_sysclass);
77}
78
79#else
80#define init_driverfs() do { } while (0)
81#define exit_driverfs() do { } while (0)
82#endif /* CONFIG_PM */
83
84
85static int nmi_callback(struct pt_regs * regs, int cpu)
86{
87 return model->check_ctrs(regs, &cpu_msrs[cpu]);
88}
89
90
91static void nmi_cpu_save_registers(struct op_msrs * msrs)
92{
93 unsigned int const nr_ctrs = model->num_counters;
94 unsigned int const nr_ctrls = model->num_controls;
95 struct op_msr * counters = msrs->counters;
96 struct op_msr * controls = msrs->controls;
97 unsigned int i;
98
99 for (i = 0; i < nr_ctrs; ++i) {
100 rdmsr(counters[i].addr,
101 counters[i].saved.low,
102 counters[i].saved.high);
103 }
104
105 for (i = 0; i < nr_ctrls; ++i) {
106 rdmsr(controls[i].addr,
107 controls[i].saved.low,
108 controls[i].saved.high);
109 }
110}
111
112
113static void nmi_save_registers(void * dummy)
114{
115 int cpu = smp_processor_id();
116 struct op_msrs * msrs = &cpu_msrs[cpu];
117 model->fill_in_addresses(msrs);
118 nmi_cpu_save_registers(msrs);
119}
120
121
122static void free_msrs(void)
123{
124 int i;
125 for (i = 0; i < NR_CPUS; ++i) {
126 kfree(cpu_msrs[i].counters);
127 cpu_msrs[i].counters = NULL;
128 kfree(cpu_msrs[i].controls);
129 cpu_msrs[i].controls = NULL;
130 }
131}
132
133
134static int allocate_msrs(void)
135{
136 int success = 1;
137 size_t controls_size = sizeof(struct op_msr) * model->num_controls;
138 size_t counters_size = sizeof(struct op_msr) * model->num_counters;
139
140 int i;
141 for (i = 0; i < NR_CPUS; ++i) {
142 if (!cpu_online(i))
143 continue;
144
145 cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
146 if (!cpu_msrs[i].counters) {
147 success = 0;
148 break;
149 }
150 cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
151 if (!cpu_msrs[i].controls) {
152 success = 0;
153 break;
154 }
155 }
156
157 if (!success)
158 free_msrs();
159
160 return success;
161}
162
163
164static void nmi_cpu_setup(void * dummy)
165{
166 int cpu = smp_processor_id();
167 struct op_msrs * msrs = &cpu_msrs[cpu];
168 spin_lock(&oprofilefs_lock);
169 model->setup_ctrs(msrs);
170 spin_unlock(&oprofilefs_lock);
171 saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
172 apic_write(APIC_LVTPC, APIC_DM_NMI);
173}
174
175
176static int nmi_setup(void)
177{
178 if (!allocate_msrs())
179 return -ENOMEM;
180
181 /* We walk a thin line between law and rape here.
182 * We need to be careful to install our NMI handler
183 * without actually triggering any NMIs as this will
184 * break the core code horrifically.
185 */
186 if (reserve_lapic_nmi() < 0) {
187 free_msrs();
188 return -EBUSY;
189 }
190 /* We need to serialize save and setup for HT because the subset
191 * of msrs are distinct for save and setup operations
192 */
193 on_each_cpu(nmi_save_registers, NULL, 0, 1);
194 on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
195 set_nmi_callback(nmi_callback);
196 nmi_enabled = 1;
197 return 0;
198}
199
200
201static void nmi_restore_registers(struct op_msrs * msrs)
202{
203 unsigned int const nr_ctrs = model->num_counters;
204 unsigned int const nr_ctrls = model->num_controls;
205 struct op_msr * counters = msrs->counters;
206 struct op_msr * controls = msrs->controls;
207 unsigned int i;
208
209 for (i = 0; i < nr_ctrls; ++i) {
210 wrmsr(controls[i].addr,
211 controls[i].saved.low,
212 controls[i].saved.high);
213 }
214
215 for (i = 0; i < nr_ctrs; ++i) {
216 wrmsr(counters[i].addr,
217 counters[i].saved.low,
218 counters[i].saved.high);
219 }
220}
221
222
223static void nmi_cpu_shutdown(void * dummy)
224{
225 unsigned int v;
226 int cpu = smp_processor_id();
227 struct op_msrs * msrs = &cpu_msrs[cpu];
228
229 /* restoring APIC_LVTPC can trigger an apic error because the delivery
230 * mode and vector nr combination can be illegal. That's by design: on
231 * power on apic lvt contain a zero vector nr which are legal only for
232 * NMI delivery mode. So inhibit apic err before restoring lvtpc
233 */
234 v = apic_read(APIC_LVTERR);
235 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
236 apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
237 apic_write(APIC_LVTERR, v);
238 nmi_restore_registers(msrs);
239}
240
241
242static void nmi_shutdown(void)
243{
244 nmi_enabled = 0;
245 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
246 unset_nmi_callback();
247 release_lapic_nmi();
248 free_msrs();
249}
250
251
252static void nmi_cpu_start(void * dummy)
253{
254 struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
255 model->start(msrs);
256}
257
258
259static int nmi_start(void)
260{
261 on_each_cpu(nmi_cpu_start, NULL, 0, 1);
262 return 0;
263}
264
265
266static void nmi_cpu_stop(void * dummy)
267{
268 struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
269 model->stop(msrs);
270}
271
272
273static void nmi_stop(void)
274{
275 on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
276}
277
278
279struct op_counter_config counter_config[OP_MAX_COUNTER];
280
281static int nmi_create_files(struct super_block * sb, struct dentry * root)
282{
283 unsigned int i;
284
285 for (i = 0; i < model->num_counters; ++i) {
286 struct dentry * dir;
287 char buf[2];
288
289 snprintf(buf, 2, "%d", i);
290 dir = oprofilefs_mkdir(sb, root, buf);
291 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
292 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
293 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
294 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
295 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
296 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
297 }
298
299 return 0;
300}
301
302
303static int __init p4_init(char ** cpu_type)
304{
305 __u8 cpu_model = boot_cpu_data.x86_model;
306
307 if (cpu_model > 4)
308 return 0;
309
310#ifndef CONFIG_SMP
311 *cpu_type = "i386/p4";
312 model = &op_p4_spec;
313 return 1;
314#else
315 switch (smp_num_siblings) {
316 case 1:
317 *cpu_type = "i386/p4";
318 model = &op_p4_spec;
319 return 1;
320
321 case 2:
322 *cpu_type = "i386/p4-ht";
323 model = &op_p4_ht2_spec;
324 return 1;
325 }
326#endif
327
328 printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
329 printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
330 return 0;
331}
332
333
334static int __init ppro_init(char ** cpu_type)
335{
336 __u8 cpu_model = boot_cpu_data.x86_model;
337
338 if (cpu_model > 0xd)
339 return 0;
340
341 if (cpu_model == 9) {
342 *cpu_type = "i386/p6_mobile";
343 } else if (cpu_model > 5) {
344 *cpu_type = "i386/piii";
345 } else if (cpu_model > 2) {
346 *cpu_type = "i386/pii";
347 } else {
348 *cpu_type = "i386/ppro";
349 }
350
351 model = &op_ppro_spec;
352 return 1;
353}
354
355/* in order to get driverfs right */
356static int using_nmi;
357
358int __init nmi_init(struct oprofile_operations *ops)
359{
360 __u8 vendor = boot_cpu_data.x86_vendor;
361 __u8 family = boot_cpu_data.x86;
362 char *cpu_type;
363
364 if (!cpu_has_apic)
365 return -ENODEV;
366
367 switch (vendor) {
368 case X86_VENDOR_AMD:
369 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
370
371 switch (family) {
372 default:
373 return -ENODEV;
374 case 6:
375 model = &op_athlon_spec;
376 cpu_type = "i386/athlon";
377 break;
378 case 0xf:
379 model = &op_athlon_spec;
380 /* Actually it could be i386/hammer too, but give
381 user space an consistent name. */
382 cpu_type = "x86-64/hammer";
383 break;
384 }
385 break;
386
387 case X86_VENDOR_INTEL:
388 switch (family) {
389 /* Pentium IV */
390 case 0xf:
391 if (!p4_init(&cpu_type))
392 return -ENODEV;
393 break;
394
395 /* A P6-class processor */
396 case 6:
397 if (!ppro_init(&cpu_type))
398 return -ENODEV;
399 break;
400
401 default:
402 return -ENODEV;
403 }
404 break;
405
406 default:
407 return -ENODEV;
408 }
409
410 init_driverfs();
411 using_nmi = 1;
412 ops->create_files = nmi_create_files;
413 ops->setup = nmi_setup;
414 ops->shutdown = nmi_shutdown;
415 ops->start = nmi_start;
416 ops->stop = nmi_stop;
417 ops->cpu_type = cpu_type;
418 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
419 return 0;
420}
421
422
423void nmi_exit(void)
424{
425 if (using_nmi)
426 exit_driverfs();
427}
diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c
new file mode 100644
index 000000000000..b2e462abf337
--- /dev/null
+++ b/arch/i386/oprofile/nmi_timer_int.c
@@ -0,0 +1,55 @@
1/**
2 * @file nmi_timer_int.c
3 *
4 * @remark Copyright 2003 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author Zwane Mwaikambo <zwane@linuxpower.ca>
8 */
9
10#include <linux/init.h>
11#include <linux/smp.h>
12#include <linux/irq.h>
13#include <linux/oprofile.h>
14#include <linux/rcupdate.h>
15
16
17#include <asm/nmi.h>
18#include <asm/apic.h>
19#include <asm/ptrace.h>
20
21static int nmi_timer_callback(struct pt_regs * regs, int cpu)
22{
23 oprofile_add_sample(regs, 0);
24 return 1;
25}
26
27static int timer_start(void)
28{
29 disable_timer_nmi_watchdog();
30 set_nmi_callback(nmi_timer_callback);
31 return 0;
32}
33
34
35static void timer_stop(void)
36{
37 enable_timer_nmi_watchdog();
38 unset_nmi_callback();
39 synchronize_kernel();
40}
41
42
43int __init nmi_timer_init(struct oprofile_operations * ops)
44{
45 extern int nmi_active;
46
47 if (nmi_active <= 0)
48 return -ENODEV;
49
50 ops->start = timer_start;
51 ops->stop = timer_stop;
52 ops->cpu_type = "timer";
53 printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
54 return 0;
55}
diff --git a/arch/i386/oprofile/op_counter.h b/arch/i386/oprofile/op_counter.h
new file mode 100644
index 000000000000..2880b15c4675
--- /dev/null
+++ b/arch/i386/oprofile/op_counter.h
@@ -0,0 +1,29 @@
1/**
2 * @file op_counter.h
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon
8 */
9
10#ifndef OP_COUNTER_H
11#define OP_COUNTER_H
12
13#define OP_MAX_COUNTER 8
14
15/* Per-perfctr configuration as set via
16 * oprofilefs.
17 */
18struct op_counter_config {
19 unsigned long count;
20 unsigned long enabled;
21 unsigned long event;
22 unsigned long kernel;
23 unsigned long user;
24 unsigned long unit_mask;
25};
26
27extern struct op_counter_config counter_config[];
28
29#endif /* OP_COUNTER_H */
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
new file mode 100644
index 000000000000..3ad9a72a5036
--- /dev/null
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -0,0 +1,149 @@
1/**
2 * @file op_model_athlon.h
3 * athlon / K7 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 * @author Graydon Hoare
11 */
12
13#include <linux/oprofile.h>
14#include <asm/ptrace.h>
15#include <asm/msr.h>
16
17#include "op_x86_model.h"
18#include "op_counter.h"
19
20#define NUM_COUNTERS 4
21#define NUM_CONTROLS 4
22
23#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
24#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
25#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
26
27#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
28#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
29#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
30#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
31#define CTRL_CLEAR(x) (x &= (1<<21))
32#define CTRL_SET_ENABLE(val) (val |= 1<<20)
33#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
34#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
35#define CTRL_SET_UM(val, m) (val |= (m << 8))
36#define CTRL_SET_EVENT(val, e) (val |= e)
37
38static unsigned long reset_value[NUM_COUNTERS];
39
40static void athlon_fill_in_addresses(struct op_msrs * const msrs)
41{
42 msrs->counters[0].addr = MSR_K7_PERFCTR0;
43 msrs->counters[1].addr = MSR_K7_PERFCTR1;
44 msrs->counters[2].addr = MSR_K7_PERFCTR2;
45 msrs->counters[3].addr = MSR_K7_PERFCTR3;
46
47 msrs->controls[0].addr = MSR_K7_EVNTSEL0;
48 msrs->controls[1].addr = MSR_K7_EVNTSEL1;
49 msrs->controls[2].addr = MSR_K7_EVNTSEL2;
50 msrs->controls[3].addr = MSR_K7_EVNTSEL3;
51}
52
53
54static void athlon_setup_ctrs(struct op_msrs const * const msrs)
55{
56 unsigned int low, high;
57 int i;
58
59 /* clear all counters */
60 for (i = 0 ; i < NUM_CONTROLS; ++i) {
61 CTRL_READ(low, high, msrs, i);
62 CTRL_CLEAR(low);
63 CTRL_WRITE(low, high, msrs, i);
64 }
65
66 /* avoid a false detection of ctr overflows in NMI handler */
67 for (i = 0; i < NUM_COUNTERS; ++i) {
68 CTR_WRITE(1, msrs, i);
69 }
70
71 /* enable active counters */
72 for (i = 0; i < NUM_COUNTERS; ++i) {
73 if (counter_config[i].enabled) {
74 reset_value[i] = counter_config[i].count;
75
76 CTR_WRITE(counter_config[i].count, msrs, i);
77
78 CTRL_READ(low, high, msrs, i);
79 CTRL_CLEAR(low);
80 CTRL_SET_ENABLE(low);
81 CTRL_SET_USR(low, counter_config[i].user);
82 CTRL_SET_KERN(low, counter_config[i].kernel);
83 CTRL_SET_UM(low, counter_config[i].unit_mask);
84 CTRL_SET_EVENT(low, counter_config[i].event);
85 CTRL_WRITE(low, high, msrs, i);
86 } else {
87 reset_value[i] = 0;
88 }
89 }
90}
91
92
93static int athlon_check_ctrs(struct pt_regs * const regs,
94 struct op_msrs const * const msrs)
95{
96 unsigned int low, high;
97 int i;
98
99 for (i = 0 ; i < NUM_COUNTERS; ++i) {
100 CTR_READ(low, high, msrs, i);
101 if (CTR_OVERFLOWED(low)) {
102 oprofile_add_sample(regs, i);
103 CTR_WRITE(reset_value[i], msrs, i);
104 }
105 }
106
107 /* See op_model_ppro.c */
108 return 1;
109}
110
111
112static void athlon_start(struct op_msrs const * const msrs)
113{
114 unsigned int low, high;
115 int i;
116 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
117 if (reset_value[i]) {
118 CTRL_READ(low, high, msrs, i);
119 CTRL_SET_ACTIVE(low);
120 CTRL_WRITE(low, high, msrs, i);
121 }
122 }
123}
124
125
126static void athlon_stop(struct op_msrs const * const msrs)
127{
128 unsigned int low,high;
129 int i;
130
131 /* Subtle: stop on all counters to avoid race with
132 * setting our pm callback */
133 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
134 CTRL_READ(low, high, msrs, i);
135 CTRL_SET_INACTIVE(low);
136 CTRL_WRITE(low, high, msrs, i);
137 }
138}
139
140
141struct op_x86_model_spec const op_athlon_spec = {
142 .num_counters = NUM_COUNTERS,
143 .num_controls = NUM_CONTROLS,
144 .fill_in_addresses = &athlon_fill_in_addresses,
145 .setup_ctrs = &athlon_setup_ctrs,
146 .check_ctrs = &athlon_check_ctrs,
147 .start = &athlon_start,
148 .stop = &athlon_stop
149};
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
new file mode 100644
index 000000000000..ac8a066035c2
--- /dev/null
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -0,0 +1,725 @@
1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
13#include <asm/msr.h>
14#include <asm/ptrace.h>
15#include <asm/fixmap.h>
16#include <asm/apic.h>
17
18#include "op_x86_model.h"
19#include "op_counter.h"
20
21#define NUM_EVENTS 39
22
23#define NUM_COUNTERS_NON_HT 8
24#define NUM_ESCRS_NON_HT 45
25#define NUM_CCCRS_NON_HT 18
26#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
27
28#define NUM_COUNTERS_HT2 4
29#define NUM_ESCRS_HT2 23
30#define NUM_CCCRS_HT2 9
31#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
32
33static unsigned int num_counters = NUM_COUNTERS_NON_HT;
34
35
36/* this has to be checked dynamically since the
37 hyper-threadedness of a chip is discovered at
38 kernel boot-time. */
39static inline void setup_num_counters(void)
40{
41#ifdef CONFIG_SMP
42 if (smp_num_siblings == 2)
43 num_counters = NUM_COUNTERS_HT2;
44#endif
45}
46
47static int inline addr_increment(void)
48{
49#ifdef CONFIG_SMP
50 return smp_num_siblings == 2 ? 2 : 1;
51#else
52 return 1;
53#endif
54}
55
56
57/* tables to simulate simplified hardware view of p4 registers */
58struct p4_counter_binding {
59 int virt_counter;
60 int counter_address;
61 int cccr_address;
62};
63
64struct p4_event_binding {
65 int escr_select; /* value to put in CCCR */
66 int event_select; /* value to put in ESCR */
67 struct {
68 int virt_counter; /* for this counter... */
69 int escr_address; /* use this ESCR */
70 } bindings[2];
71};
72
73/* nb: these CTR_* defines are a duplicate of defines in
74 event/i386.p4*events. */
75
76
77#define CTR_BPU_0 (1 << 0)
78#define CTR_MS_0 (1 << 1)
79#define CTR_FLAME_0 (1 << 2)
80#define CTR_IQ_4 (1 << 3)
81#define CTR_BPU_2 (1 << 4)
82#define CTR_MS_2 (1 << 5)
83#define CTR_FLAME_2 (1 << 6)
84#define CTR_IQ_5 (1 << 7)
85
86static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
87 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
88 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
89 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
90 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
91 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
92 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
93 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
94 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
95};
96
97#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
98
99/* All cccr we don't use. */
100static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
101 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
102 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
103 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
104 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
105 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
106};
107
108/* p4 event codes in libop/op_event.h are indices into this table. */
109
110static struct p4_event_binding p4_events[NUM_EVENTS] = {
111
112 { /* BRANCH_RETIRED */
113 0x05, 0x06,
114 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
115 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
116 },
117
118 { /* MISPRED_BRANCH_RETIRED */
119 0x04, 0x03,
120 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
121 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
122 },
123
124 { /* TC_DELIVER_MODE */
125 0x01, 0x01,
126 { { CTR_MS_0, MSR_P4_TC_ESCR0},
127 { CTR_MS_2, MSR_P4_TC_ESCR1} }
128 },
129
130 { /* BPU_FETCH_REQUEST */
131 0x00, 0x03,
132 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
133 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
134 },
135
136 { /* ITLB_REFERENCE */
137 0x03, 0x18,
138 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
139 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
140 },
141
142 { /* MEMORY_CANCEL */
143 0x05, 0x02,
144 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
145 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
146 },
147
148 { /* MEMORY_COMPLETE */
149 0x02, 0x08,
150 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
151 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
152 },
153
154 { /* LOAD_PORT_REPLAY */
155 0x02, 0x04,
156 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
157 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
158 },
159
160 { /* STORE_PORT_REPLAY */
161 0x02, 0x05,
162 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
163 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
164 },
165
166 { /* MOB_LOAD_REPLAY */
167 0x02, 0x03,
168 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
169 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
170 },
171
172 { /* PAGE_WALK_TYPE */
173 0x04, 0x01,
174 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
175 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
176 },
177
178 { /* BSQ_CACHE_REFERENCE */
179 0x07, 0x0c,
180 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
181 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
182 },
183
184 { /* IOQ_ALLOCATION */
185 0x06, 0x03,
186 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
187 { 0, 0 } }
188 },
189
190 { /* IOQ_ACTIVE_ENTRIES */
191 0x06, 0x1a,
192 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
193 { 0, 0 } }
194 },
195
196 { /* FSB_DATA_ACTIVITY */
197 0x06, 0x17,
198 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
199 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
200 },
201
202 { /* BSQ_ALLOCATION */
203 0x07, 0x05,
204 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
205 { 0, 0 } }
206 },
207
208 { /* BSQ_ACTIVE_ENTRIES */
209 0x07, 0x06,
210 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
211 { 0, 0 } }
212 },
213
214 { /* X87_ASSIST */
215 0x05, 0x03,
216 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
217 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
218 },
219
220 { /* SSE_INPUT_ASSIST */
221 0x01, 0x34,
222 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
223 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
224 },
225
226 { /* PACKED_SP_UOP */
227 0x01, 0x08,
228 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
229 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
230 },
231
232 { /* PACKED_DP_UOP */
233 0x01, 0x0c,
234 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
235 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
236 },
237
238 { /* SCALAR_SP_UOP */
239 0x01, 0x0a,
240 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
241 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
242 },
243
244 { /* SCALAR_DP_UOP */
245 0x01, 0x0e,
246 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
247 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
248 },
249
250 { /* 64BIT_MMX_UOP */
251 0x01, 0x02,
252 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
253 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
254 },
255
256 { /* 128BIT_MMX_UOP */
257 0x01, 0x1a,
258 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
259 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
260 },
261
262 { /* X87_FP_UOP */
263 0x01, 0x04,
264 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
265 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
266 },
267
268 { /* X87_SIMD_MOVES_UOP */
269 0x01, 0x2e,
270 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
271 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
272 },
273
274 { /* MACHINE_CLEAR */
275 0x05, 0x02,
276 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
277 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
278 },
279
280 { /* GLOBAL_POWER_EVENTS */
281 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
282 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
283 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
284 },
285
286 { /* TC_MS_XFER */
287 0x00, 0x05,
288 { { CTR_MS_0, MSR_P4_MS_ESCR0},
289 { CTR_MS_2, MSR_P4_MS_ESCR1} }
290 },
291
292 { /* UOP_QUEUE_WRITES */
293 0x00, 0x09,
294 { { CTR_MS_0, MSR_P4_MS_ESCR0},
295 { CTR_MS_2, MSR_P4_MS_ESCR1} }
296 },
297
298 { /* FRONT_END_EVENT */
299 0x05, 0x08,
300 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
301 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
302 },
303
304 { /* EXECUTION_EVENT */
305 0x05, 0x0c,
306 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
307 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
308 },
309
310 { /* REPLAY_EVENT */
311 0x05, 0x09,
312 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
313 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
314 },
315
316 { /* INSTR_RETIRED */
317 0x04, 0x02,
318 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
319 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
320 },
321
322 { /* UOPS_RETIRED */
323 0x04, 0x01,
324 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
325 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
326 },
327
328 { /* UOP_TYPE */
329 0x02, 0x02,
330 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
331 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
332 },
333
334 { /* RETIRED_MISPRED_BRANCH_TYPE */
335 0x02, 0x05,
336 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
337 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
338 },
339
340 { /* RETIRED_BRANCH_TYPE */
341 0x02, 0x04,
342 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
343 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
344 }
345};
346
347
348#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
349
350#define ESCR_RESERVED_BITS 0x80000003
351#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
352#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
353#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
354#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
355#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
356#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
357#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
358#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
359#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
360
361#define CCCR_RESERVED_BITS 0x38030FFF
362#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
363#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
364#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
365#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
366#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
367#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
368#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
369#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
370#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
371#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
372#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
373
374#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
375#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
376#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
377
378
379/* this assigns a "stagger" to the current CPU, which is used throughout
380 the code in this module as an extra array offset, to select the "even"
381 or "odd" part of all the divided resources. */
382static unsigned int get_stagger(void)
383{
384#ifdef CONFIG_SMP
385 int cpu = smp_processor_id();
386 return (cpu != first_cpu(cpu_sibling_map[cpu]));
387#endif
388 return 0;
389}
390
391
392/* finally, mediate access to a real hardware counter
393 by passing a "virtual" counter numer to this macro,
394 along with your stagger setting. */
395#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
396
397static unsigned long reset_value[NUM_COUNTERS_NON_HT];
398
399
400static void p4_fill_in_addresses(struct op_msrs * const msrs)
401{
402 unsigned int i;
403 unsigned int addr, stag;
404
405 setup_num_counters();
406 stag = get_stagger();
407
408 /* the counter registers we pay attention to */
409 for (i = 0; i < num_counters; ++i) {
410 msrs->counters[i].addr =
411 p4_counters[VIRT_CTR(stag, i)].counter_address;
412 }
413
414 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
415
416 /* 18 CCCR registers */
417 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
418 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
419 msrs->controls[i].addr = addr;
420 }
421
422 /* 43 ESCR registers in three or four discontiguous group */
423 for (addr = MSR_P4_BSU_ESCR0 + stag;
424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
425 msrs->controls[i].addr = addr;
426 }
427
428 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
429 * to avoid special case in nmi_{save|restore}_registers() */
430 if (boot_cpu_data.x86_model >= 0x3) {
431 for (addr = MSR_P4_BSU_ESCR0 + stag;
432 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
433 msrs->controls[i].addr = addr;
434 }
435 } else {
436 for (addr = MSR_P4_IQ_ESCR0 + stag;
437 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
438 msrs->controls[i].addr = addr;
439 }
440 }
441
442 for (addr = MSR_P4_RAT_ESCR0 + stag;
443 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
444 msrs->controls[i].addr = addr;
445 }
446
447 for (addr = MSR_P4_MS_ESCR0 + stag;
448 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
449 msrs->controls[i].addr = addr;
450 }
451
452 for (addr = MSR_P4_IX_ESCR0 + stag;
453 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
454 msrs->controls[i].addr = addr;
455 }
456
457 /* there are 2 remaining non-contiguously located ESCRs */
458
459 if (num_counters == NUM_COUNTERS_NON_HT) {
460 /* standard non-HT CPUs handle both remaining ESCRs*/
461 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
462 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
463
464 } else if (stag == 0) {
465 /* HT CPUs give the first remainder to the even thread, as
466 the 32nd control register */
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
468
469 } else {
470 /* and two copies of the second to the odd thread,
471 for the 22st and 23nd control registers */
472 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
474 }
475}
476
477
478static void pmc_setup_one_p4_counter(unsigned int ctr)
479{
480 int i;
481 int const maxbind = 2;
482 unsigned int cccr = 0;
483 unsigned int escr = 0;
484 unsigned int high = 0;
485 unsigned int counter_bit;
486 struct p4_event_binding *ev = NULL;
487 unsigned int stag;
488
489 stag = get_stagger();
490
491 /* convert from counter *number* to counter *bit* */
492 counter_bit = 1 << VIRT_CTR(stag, ctr);
493
494 /* find our event binding structure. */
495 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
496 printk(KERN_ERR
497 "oprofile: P4 event code 0x%lx out of range\n",
498 counter_config[ctr].event);
499 return;
500 }
501
502 ev = &(p4_events[counter_config[ctr].event - 1]);
503
504 for (i = 0; i < maxbind; i++) {
505 if (ev->bindings[i].virt_counter & counter_bit) {
506
507 /* modify ESCR */
508 ESCR_READ(escr, high, ev, i);
509 ESCR_CLEAR(escr);
510 if (stag == 0) {
511 ESCR_SET_USR_0(escr, counter_config[ctr].user);
512 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
513 } else {
514 ESCR_SET_USR_1(escr, counter_config[ctr].user);
515 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
516 }
517 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
518 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
519 ESCR_WRITE(escr, high, ev, i);
520
521 /* modify CCCR */
522 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
523 CCCR_CLEAR(cccr);
524 CCCR_SET_REQUIRED_BITS(cccr);
525 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
526 if (stag == 0) {
527 CCCR_SET_PMI_OVF_0(cccr);
528 } else {
529 CCCR_SET_PMI_OVF_1(cccr);
530 }
531 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
532 return;
533 }
534 }
535
536 printk(KERN_ERR
537 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
538 counter_config[ctr].event, stag, ctr);
539}
540
541
542static void p4_setup_ctrs(struct op_msrs const * const msrs)
543{
544 unsigned int i;
545 unsigned int low, high;
546 unsigned int addr;
547 unsigned int stag;
548
549 stag = get_stagger();
550
551 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
552 if (! MISC_PMC_ENABLED_P(low)) {
553 printk(KERN_ERR "oprofile: P4 PMC not available\n");
554 return;
555 }
556
557 /* clear the cccrs we will use */
558 for (i = 0 ; i < num_counters ; i++) {
559 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
560 CCCR_CLEAR(low);
561 CCCR_SET_REQUIRED_BITS(low);
562 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
563 }
564
565 /* clear cccrs outside our concern */
566 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
567 rdmsr(p4_unused_cccr[i], low, high);
568 CCCR_CLEAR(low);
569 CCCR_SET_REQUIRED_BITS(low);
570 wrmsr(p4_unused_cccr[i], low, high);
571 }
572
573 /* clear all escrs (including those outside our concern) */
574 for (addr = MSR_P4_BSU_ESCR0 + stag;
575 addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
576 wrmsr(addr, 0, 0);
577 }
578
579 /* On older models clear also MSR_P4_IQ_ESCR0/1 */
580 if (boot_cpu_data.x86_model < 0x3) {
581 wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
582 wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
583 }
584
585 for (addr = MSR_P4_RAT_ESCR0 + stag;
586 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
587 wrmsr(addr, 0, 0);
588 }
589
590 for (addr = MSR_P4_MS_ESCR0 + stag;
591 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
592 wrmsr(addr, 0, 0);
593 }
594
595 for (addr = MSR_P4_IX_ESCR0 + stag;
596 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
597 wrmsr(addr, 0, 0);
598 }
599
600 if (num_counters == NUM_COUNTERS_NON_HT) {
601 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
602 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
603 } else if (stag == 0) {
604 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
605 } else {
606 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
607 }
608
609 /* setup all counters */
610 for (i = 0 ; i < num_counters ; ++i) {
611 if (counter_config[i].enabled) {
612 reset_value[i] = counter_config[i].count;
613 pmc_setup_one_p4_counter(i);
614 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
615 } else {
616 reset_value[i] = 0;
617 }
618 }
619}
620
621
622static int p4_check_ctrs(struct pt_regs * const regs,
623 struct op_msrs const * const msrs)
624{
625 unsigned long ctr, low, high, stag, real;
626 int i;
627
628 stag = get_stagger();
629
630 for (i = 0; i < num_counters; ++i) {
631
632 if (!reset_value[i])
633 continue;
634
635 /*
636 * there is some eccentricity in the hardware which
637 * requires that we perform 2 extra corrections:
638 *
639 * - check both the CCCR:OVF flag for overflow and the
640 * counter high bit for un-flagged overflows.
641 *
642 * - write the counter back twice to ensure it gets
643 * updated properly.
644 *
645 * the former seems to be related to extra NMIs happening
646 * during the current NMI; the latter is reported as errata
647 * N15 in intel doc 249199-029, pentium 4 specification
648 * update, though their suggested work-around does not
649 * appear to solve the problem.
650 */
651
652 real = VIRT_CTR(stag, i);
653
654 CCCR_READ(low, high, real);
655 CTR_READ(ctr, high, real);
656 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
657 oprofile_add_sample(regs, i);
658 CTR_WRITE(reset_value[i], real);
659 CCCR_CLEAR_OVF(low);
660 CCCR_WRITE(low, high, real);
661 CTR_WRITE(reset_value[i], real);
662 }
663 }
664
665 /* P4 quirk: you have to re-unmask the apic vector */
666 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
667
668 /* See op_model_ppro.c */
669 return 1;
670}
671
672
673static void p4_start(struct op_msrs const * const msrs)
674{
675 unsigned int low, high, stag;
676 int i;
677
678 stag = get_stagger();
679
680 for (i = 0; i < num_counters; ++i) {
681 if (!reset_value[i])
682 continue;
683 CCCR_READ(low, high, VIRT_CTR(stag, i));
684 CCCR_SET_ENABLE(low);
685 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
686 }
687}
688
689
690static void p4_stop(struct op_msrs const * const msrs)
691{
692 unsigned int low, high, stag;
693 int i;
694
695 stag = get_stagger();
696
697 for (i = 0; i < num_counters; ++i) {
698 CCCR_READ(low, high, VIRT_CTR(stag, i));
699 CCCR_SET_DISABLE(low);
700 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
701 }
702}
703
704
705#ifdef CONFIG_SMP
706struct op_x86_model_spec const op_p4_ht2_spec = {
707 .num_counters = NUM_COUNTERS_HT2,
708 .num_controls = NUM_CONTROLS_HT2,
709 .fill_in_addresses = &p4_fill_in_addresses,
710 .setup_ctrs = &p4_setup_ctrs,
711 .check_ctrs = &p4_check_ctrs,
712 .start = &p4_start,
713 .stop = &p4_stop
714};
715#endif
716
717struct op_x86_model_spec const op_p4_spec = {
718 .num_counters = NUM_COUNTERS_NON_HT,
719 .num_controls = NUM_CONTROLS_NON_HT,
720 .fill_in_addresses = &p4_fill_in_addresses,
721 .setup_ctrs = &p4_setup_ctrs,
722 .check_ctrs = &p4_check_ctrs,
723 .start = &p4_start,
724 .stop = &p4_stop
725};
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
new file mode 100644
index 000000000000..d719015fc044
--- /dev/null
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -0,0 +1,143 @@
1/**
2 * @file op_model_ppro.h
3 * pentium pro / P6 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 * @author Graydon Hoare
11 */
12
13#include <linux/oprofile.h>
14#include <asm/ptrace.h>
15#include <asm/msr.h>
16#include <asm/apic.h>
17
18#include "op_x86_model.h"
19#include "op_counter.h"
20
21#define NUM_COUNTERS 2
22#define NUM_CONTROLS 2
23
24#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
25#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
26#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
27
28#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
29#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
30#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
31#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
32#define CTRL_CLEAR(x) (x &= (1<<21))
33#define CTRL_SET_ENABLE(val) (val |= 1<<20)
34#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
35#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
36#define CTRL_SET_UM(val, m) (val |= (m << 8))
37#define CTRL_SET_EVENT(val, e) (val |= e)
38
39static unsigned long reset_value[NUM_COUNTERS];
40
41static void ppro_fill_in_addresses(struct op_msrs * const msrs)
42{
43 msrs->counters[0].addr = MSR_P6_PERFCTR0;
44 msrs->counters[1].addr = MSR_P6_PERFCTR1;
45
46 msrs->controls[0].addr = MSR_P6_EVNTSEL0;
47 msrs->controls[1].addr = MSR_P6_EVNTSEL1;
48}
49
50
51static void ppro_setup_ctrs(struct op_msrs const * const msrs)
52{
53 unsigned int low, high;
54 int i;
55
56 /* clear all counters */
57 for (i = 0 ; i < NUM_CONTROLS; ++i) {
58 CTRL_READ(low, high, msrs, i);
59 CTRL_CLEAR(low);
60 CTRL_WRITE(low, high, msrs, i);
61 }
62
63 /* avoid a false detection of ctr overflows in NMI handler */
64 for (i = 0; i < NUM_COUNTERS; ++i) {
65 CTR_WRITE(1, msrs, i);
66 }
67
68 /* enable active counters */
69 for (i = 0; i < NUM_COUNTERS; ++i) {
70 if (counter_config[i].enabled) {
71 reset_value[i] = counter_config[i].count;
72
73 CTR_WRITE(counter_config[i].count, msrs, i);
74
75 CTRL_READ(low, high, msrs, i);
76 CTRL_CLEAR(low);
77 CTRL_SET_ENABLE(low);
78 CTRL_SET_USR(low, counter_config[i].user);
79 CTRL_SET_KERN(low, counter_config[i].kernel);
80 CTRL_SET_UM(low, counter_config[i].unit_mask);
81 CTRL_SET_EVENT(low, counter_config[i].event);
82 CTRL_WRITE(low, high, msrs, i);
83 }
84 }
85}
86
87
88static int ppro_check_ctrs(struct pt_regs * const regs,
89 struct op_msrs const * const msrs)
90{
91 unsigned int low, high;
92 int i;
93
94 for (i = 0 ; i < NUM_COUNTERS; ++i) {
95 CTR_READ(low, high, msrs, i);
96 if (CTR_OVERFLOWED(low)) {
97 oprofile_add_sample(regs, i);
98 CTR_WRITE(reset_value[i], msrs, i);
99 }
100 }
101
102 /* Only P6 based Pentium M need to re-unmask the apic vector but it
103 * doesn't hurt other P6 variant */
104 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
105
106 /* We can't work out if we really handled an interrupt. We
107 * might have caught a *second* counter just after overflowing
108 * the interrupt for this counter then arrives
109 * and we don't find a counter that's overflowed, so we
110 * would return 0 and get dazed + confused. Instead we always
111 * assume we found an overflow. This sucks.
112 */
113 return 1;
114}
115
116
117static void ppro_start(struct op_msrs const * const msrs)
118{
119 unsigned int low,high;
120 CTRL_READ(low, high, msrs, 0);
121 CTRL_SET_ACTIVE(low);
122 CTRL_WRITE(low, high, msrs, 0);
123}
124
125
126static void ppro_stop(struct op_msrs const * const msrs)
127{
128 unsigned int low,high;
129 CTRL_READ(low, high, msrs, 0);
130 CTRL_SET_INACTIVE(low);
131 CTRL_WRITE(low, high, msrs, 0);
132}
133
134
135struct op_x86_model_spec const op_ppro_spec = {
136 .num_counters = NUM_COUNTERS,
137 .num_controls = NUM_CONTROLS,
138 .fill_in_addresses = &ppro_fill_in_addresses,
139 .setup_ctrs = &ppro_setup_ctrs,
140 .check_ctrs = &ppro_check_ctrs,
141 .start = &ppro_start,
142 .stop = &ppro_stop
143};
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h
new file mode 100644
index 000000000000..123b7e90a9ee
--- /dev/null
+++ b/arch/i386/oprofile/op_x86_model.h
@@ -0,0 +1,50 @@
1/**
2 * @file op_x86_model.h
3 * interface to x86 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#ifndef OP_X86_MODEL_H
12#define OP_X86_MODEL_H
13
14struct op_saved_msr {
15 unsigned int high;
16 unsigned int low;
17};
18
19struct op_msr {
20 unsigned long addr;
21 struct op_saved_msr saved;
22};
23
24struct op_msrs {
25 struct op_msr * counters;
26 struct op_msr * controls;
27};
28
29struct pt_regs;
30
31/* The model vtable abstracts the differences between
32 * various x86 CPU model's perfctr support.
33 */
34struct op_x86_model_spec {
35 unsigned int const num_counters;
36 unsigned int const num_controls;
37 void (*fill_in_addresses)(struct op_msrs * const msrs);
38 void (*setup_ctrs)(struct op_msrs const * const msrs);
39 int (*check_ctrs)(struct pt_regs * const regs,
40 struct op_msrs const * const msrs);
41 void (*start)(struct op_msrs const * const msrs);
42 void (*stop)(struct op_msrs const * const msrs);
43};
44
45extern struct op_x86_model_spec const op_ppro_spec;
46extern struct op_x86_model_spec const op_p4_spec;
47extern struct op_x86_model_spec const op_p4_ht2_spec;
48extern struct op_x86_model_spec const op_athlon_spec;
49
50#endif /* OP_X86_MODEL_H */
diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile
new file mode 100644
index 000000000000..1bff03f36965
--- /dev/null
+++ b/arch/i386/pci/Makefile
@@ -0,0 +1,14 @@
1obj-y := i386.o
2
3obj-$(CONFIG_PCI_BIOS) += pcbios.o
4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
5obj-$(CONFIG_PCI_DIRECT) += direct.o
6
7pci-y := fixup.o
8pci-$(CONFIG_ACPI_PCI) += acpi.o
9pci-y += legacy.o irq.o
10
11pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
12pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
13
14obj-y += $(pci-y) common.o
diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
new file mode 100644
index 000000000000..2db65ec45dc3
--- /dev/null
+++ b/arch/i386/pci/acpi.c
@@ -0,0 +1,53 @@
1#include <linux/pci.h>
2#include <linux/acpi.h>
3#include <linux/init.h>
4#include <linux/irq.h>
5#include <asm/hw_irq.h>
6#include "pci.h"
7
8struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
9{
10 if (domain != 0) {
11 printk(KERN_WARNING "PCI: Multiple domains not supported\n");
12 return NULL;
13 }
14
15 return pcibios_scan_root(busnum);
16}
17
18extern int pci_routeirq;
19static int __init pci_acpi_init(void)
20{
21 struct pci_dev *dev = NULL;
22
23 if (pcibios_scanned)
24 return 0;
25
26 if (acpi_noirq)
27 return 0;
28
29 printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
30 acpi_irq_penalty_init();
31 pcibios_scanned++;
32 pcibios_enable_irq = acpi_pci_irq_enable;
33
34 if (pci_routeirq) {
35 /*
36 * PCI IRQ routing is set up by pci_enable_device(), but we
37 * also do it here in case there are still broken drivers that
38 * don't use pci_enable_device().
39 */
40 printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
41 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL)
42 acpi_pci_irq_enable(dev);
43 } else
44 printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n");
45
46#ifdef CONFIG_X86_IO_APIC
47 if (acpi_ioapic)
48 print_IO_APIC();
49#endif
50
51 return 0;
52}
53subsys_initcall(pci_acpi_init);
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
new file mode 100644
index 000000000000..720975e1af50
--- /dev/null
+++ b/arch/i386/pci/common.c
@@ -0,0 +1,251 @@
1/*
2 * Low-Level PCI Support for PC
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 */
6
7#include <linux/sched.h>
8#include <linux/pci.h>
9#include <linux/ioport.h>
10#include <linux/init.h>
11
12#include <asm/acpi.h>
13#include <asm/segment.h>
14#include <asm/io.h>
15#include <asm/smp.h>
16
17#include "pci.h"
18
19#ifdef CONFIG_PCI_BIOS
20extern void pcibios_sort(void);
21#endif
22
23unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
24 PCI_PROBE_MMCONF;
25
26int pci_routeirq;
27int pcibios_last_bus = -1;
28struct pci_bus *pci_root_bus = NULL;
29struct pci_raw_ops *raw_pci_ops;
30
31static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
32{
33 return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
34}
35
36static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
37{
38 return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
39}
40
41struct pci_ops pci_root_ops = {
42 .read = pci_read,
43 .write = pci_write,
44};
45
46/*
47 * legacy, numa, and acpi all want to call pcibios_scan_root
48 * from their initcalls. This flag prevents that.
49 */
50int pcibios_scanned;
51
52/*
53 * This interrupt-safe spinlock protects all accesses to PCI
54 * configuration space.
55 */
56DEFINE_SPINLOCK(pci_config_lock);
57
58/*
59 * Several buggy motherboards address only 16 devices and mirror
60 * them to next 16 IDs. We try to detect this `feature' on all
61 * primary buses (those containing host bridges as they are
62 * expected to be unique) and remove the ghost devices.
63 */
64
65static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
66{
67 struct list_head *ln, *mn;
68 struct pci_dev *d, *e;
69 int mirror = PCI_DEVFN(16,0);
70 int seen_host_bridge = 0;
71 int i;
72
73 DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
74 list_for_each(ln, &b->devices) {
75 d = pci_dev_b(ln);
76 if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
77 seen_host_bridge++;
78 for (mn=ln->next; mn != &b->devices; mn=mn->next) {
79 e = pci_dev_b(mn);
80 if (e->devfn != d->devfn + mirror ||
81 e->vendor != d->vendor ||
82 e->device != d->device ||
83 e->class != d->class)
84 continue;
85 for(i=0; i<PCI_NUM_RESOURCES; i++)
86 if (e->resource[i].start != d->resource[i].start ||
87 e->resource[i].end != d->resource[i].end ||
88 e->resource[i].flags != d->resource[i].flags)
89 continue;
90 break;
91 }
92 if (mn == &b->devices)
93 return;
94 }
95 if (!seen_host_bridge)
96 return;
97 printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
98
99 ln = &b->devices;
100 while (ln->next != &b->devices) {
101 d = pci_dev_b(ln->next);
102 if (d->devfn >= mirror) {
103 list_del(&d->global_list);
104 list_del(&d->bus_list);
105 kfree(d);
106 } else
107 ln = ln->next;
108 }
109}
110
111/*
112 * Called after each bus is probed, but before its children
113 * are examined.
114 */
115
116void __devinit pcibios_fixup_bus(struct pci_bus *b)
117{
118 pcibios_fixup_ghosts(b);
119 pci_read_bridge_bases(b);
120}
121
122
123struct pci_bus * __devinit pcibios_scan_root(int busnum)
124{
125 struct pci_bus *bus = NULL;
126
127 while ((bus = pci_find_next_bus(bus)) != NULL) {
128 if (bus->number == busnum) {
129 /* Already scanned */
130 return bus;
131 }
132 }
133
134 printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
135
136 return pci_scan_bus(busnum, &pci_root_ops, NULL);
137}
138
139extern u8 pci_cache_line_size;
140
141static int __init pcibios_init(void)
142{
143 struct cpuinfo_x86 *c = &boot_cpu_data;
144
145 if (!raw_pci_ops) {
146 printk("PCI: System does not support PCI\n");
147 return 0;
148 }
149
150 /*
151 * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8
152 * and P4. It's also good for 386/486s (which actually have 16)
153 * as quite a few PCI devices do not support smaller values.
154 */
155 pci_cache_line_size = 32 >> 2;
156 if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
157 pci_cache_line_size = 64 >> 2; /* K7 & K8 */
158 else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
159 pci_cache_line_size = 128 >> 2; /* P4 */
160
161 pcibios_resource_survey();
162
163#ifdef CONFIG_PCI_BIOS
164 if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
165 pcibios_sort();
166#endif
167 return 0;
168}
169
170subsys_initcall(pcibios_init);
171
172char * __devinit pcibios_setup(char *str)
173{
174 if (!strcmp(str, "off")) {
175 pci_probe = 0;
176 return NULL;
177 }
178#ifdef CONFIG_PCI_BIOS
179 else if (!strcmp(str, "bios")) {
180 pci_probe = PCI_PROBE_BIOS;
181 return NULL;
182 } else if (!strcmp(str, "nobios")) {
183 pci_probe &= ~PCI_PROBE_BIOS;
184 return NULL;
185 } else if (!strcmp(str, "nosort")) {
186 pci_probe |= PCI_NO_SORT;
187 return NULL;
188 } else if (!strcmp(str, "biosirq")) {
189 pci_probe |= PCI_BIOS_IRQ_SCAN;
190 return NULL;
191 }
192#endif
193#ifdef CONFIG_PCI_DIRECT
194 else if (!strcmp(str, "conf1")) {
195 pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
196 return NULL;
197 }
198 else if (!strcmp(str, "conf2")) {
199 pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
200 return NULL;
201 }
202#endif
203#ifdef CONFIG_PCI_MMCONFIG
204 else if (!strcmp(str, "nommconf")) {
205 pci_probe &= ~PCI_PROBE_MMCONF;
206 return NULL;
207 }
208#endif
209 else if (!strcmp(str, "noacpi")) {
210 acpi_noirq_set();
211 return NULL;
212 }
213#ifndef CONFIG_X86_VISWS
214 else if (!strcmp(str, "usepirqmask")) {
215 pci_probe |= PCI_USE_PIRQ_MASK;
216 return NULL;
217 } else if (!strncmp(str, "irqmask=", 8)) {
218 pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
219 return NULL;
220 } else if (!strncmp(str, "lastbus=", 8)) {
221 pcibios_last_bus = simple_strtol(str+8, NULL, 0);
222 return NULL;
223 }
224#endif
225 else if (!strcmp(str, "rom")) {
226 pci_probe |= PCI_ASSIGN_ROMS;
227 return NULL;
228 } else if (!strcmp(str, "assign-busses")) {
229 pci_probe |= PCI_ASSIGN_ALL_BUSSES;
230 return NULL;
231 } else if (!strcmp(str, "routeirq")) {
232 pci_routeirq = 1;
233 return NULL;
234 }
235 return str;
236}
237
238unsigned int pcibios_assign_all_busses(void)
239{
240 return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
241}
242
243int pcibios_enable_device(struct pci_dev *dev, int mask)
244{
245 int err;
246
247 if ((err = pcibios_enable_resources(dev, mask)) < 0)
248 return err;
249
250 return pcibios_enable_irq(dev);
251}
diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c
new file mode 100644
index 000000000000..30b7e9b4f6a2
--- /dev/null
+++ b/arch/i386/pci/direct.c
@@ -0,0 +1,289 @@
1/*
2 * direct.c - Low-level direct PCI config space access
3 */
4
5#include <linux/pci.h>
6#include <linux/init.h>
7#include "pci.h"
8
9/*
10 * Functions for accessing PCI configuration space with type 1 accesses
11 */
12
13#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
14 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
15
16static int pci_conf1_read(unsigned int seg, unsigned int bus,
17 unsigned int devfn, int reg, int len, u32 *value)
18{
19 unsigned long flags;
20
21 if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
22 return -EINVAL;
23
24 spin_lock_irqsave(&pci_config_lock, flags);
25
26 outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8);
27
28 switch (len) {
29 case 1:
30 *value = inb(0xCFC + (reg & 3));
31 break;
32 case 2:
33 *value = inw(0xCFC + (reg & 2));
34 break;
35 case 4:
36 *value = inl(0xCFC);
37 break;
38 }
39
40 spin_unlock_irqrestore(&pci_config_lock, flags);
41
42 return 0;
43}
44
45static int pci_conf1_write(unsigned int seg, unsigned int bus,
46 unsigned int devfn, int reg, int len, u32 value)
47{
48 unsigned long flags;
49
50 if ((bus > 255) || (devfn > 255) || (reg > 255))
51 return -EINVAL;
52
53 spin_lock_irqsave(&pci_config_lock, flags);
54
55 outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8);
56
57 switch (len) {
58 case 1:
59 outb((u8)value, 0xCFC + (reg & 3));
60 break;
61 case 2:
62 outw((u16)value, 0xCFC + (reg & 2));
63 break;
64 case 4:
65 outl((u32)value, 0xCFC);
66 break;
67 }
68
69 spin_unlock_irqrestore(&pci_config_lock, flags);
70
71 return 0;
72}
73
74#undef PCI_CONF1_ADDRESS
75
76struct pci_raw_ops pci_direct_conf1 = {
77 .read = pci_conf1_read,
78 .write = pci_conf1_write,
79};
80
81
82/*
83 * Functions for accessing PCI configuration space with type 2 accesses
84 */
85
86#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg)
87
88static int pci_conf2_read(unsigned int seg, unsigned int bus,
89 unsigned int devfn, int reg, int len, u32 *value)
90{
91 unsigned long flags;
92 int dev, fn;
93
94 if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
95 return -EINVAL;
96
97 dev = PCI_SLOT(devfn);
98 fn = PCI_FUNC(devfn);
99
100 if (dev & 0x10)
101 return PCIBIOS_DEVICE_NOT_FOUND;
102
103 spin_lock_irqsave(&pci_config_lock, flags);
104
105 outb((u8)(0xF0 | (fn << 1)), 0xCF8);
106 outb((u8)bus, 0xCFA);
107
108 switch (len) {
109 case 1:
110 *value = inb(PCI_CONF2_ADDRESS(dev, reg));
111 break;
112 case 2:
113 *value = inw(PCI_CONF2_ADDRESS(dev, reg));
114 break;
115 case 4:
116 *value = inl(PCI_CONF2_ADDRESS(dev, reg));
117 break;
118 }
119
120 outb(0, 0xCF8);
121
122 spin_unlock_irqrestore(&pci_config_lock, flags);
123
124 return 0;
125}
126
127static int pci_conf2_write(unsigned int seg, unsigned int bus,
128 unsigned int devfn, int reg, int len, u32 value)
129{
130 unsigned long flags;
131 int dev, fn;
132
133 if ((bus > 255) || (devfn > 255) || (reg > 255))
134 return -EINVAL;
135
136 dev = PCI_SLOT(devfn);
137 fn = PCI_FUNC(devfn);
138
139 if (dev & 0x10)
140 return PCIBIOS_DEVICE_NOT_FOUND;
141
142 spin_lock_irqsave(&pci_config_lock, flags);
143
144 outb((u8)(0xF0 | (fn << 1)), 0xCF8);
145 outb((u8)bus, 0xCFA);
146
147 switch (len) {
148 case 1:
149 outb((u8)value, PCI_CONF2_ADDRESS(dev, reg));
150 break;
151 case 2:
152 outw((u16)value, PCI_CONF2_ADDRESS(dev, reg));
153 break;
154 case 4:
155 outl((u32)value, PCI_CONF2_ADDRESS(dev, reg));
156 break;
157 }
158
159 outb(0, 0xCF8);
160
161 spin_unlock_irqrestore(&pci_config_lock, flags);
162
163 return 0;
164}
165
166#undef PCI_CONF2_ADDRESS
167
168static struct pci_raw_ops pci_direct_conf2 = {
169 .read = pci_conf2_read,
170 .write = pci_conf2_write,
171};
172
173
174/*
175 * Before we decide to use direct hardware access mechanisms, we try to do some
176 * trivial checks to ensure it at least _seems_ to be working -- we just test
177 * whether bus 00 contains a host bridge (this is similar to checking
178 * techniques used in XFree86, but ours should be more reliable since we
179 * attempt to make use of direct access hints provided by the PCI BIOS).
180 *
181 * This should be close to trivial, but it isn't, because there are buggy
182 * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
183 */
184static int __init pci_sanity_check(struct pci_raw_ops *o)
185{
186 u32 x = 0;
187 int devfn;
188
189 if (pci_probe & PCI_NO_CHECKS)
190 return 1;
191
192 for (devfn = 0; devfn < 0x100; devfn++) {
193 if (o->read(0, 0, devfn, PCI_CLASS_DEVICE, 2, &x))
194 continue;
195 if (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)
196 return 1;
197
198 if (o->read(0, 0, devfn, PCI_VENDOR_ID, 2, &x))
199 continue;
200 if (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)
201 return 1;
202 }
203
204 DBG("PCI: Sanity check failed\n");
205 return 0;
206}
207
208static int __init pci_check_type1(void)
209{
210 unsigned long flags;
211 unsigned int tmp;
212 int works = 0;
213
214 local_irq_save(flags);
215
216 outb(0x01, 0xCFB);
217 tmp = inl(0xCF8);
218 outl(0x80000000, 0xCF8);
219 if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) {
220 works = 1;
221 }
222 outl(tmp, 0xCF8);
223 local_irq_restore(flags);
224
225 return works;
226}
227
228static int __init pci_check_type2(void)
229{
230 unsigned long flags;
231 int works = 0;
232
233 local_irq_save(flags);
234
235 outb(0x00, 0xCFB);
236 outb(0x00, 0xCF8);
237 outb(0x00, 0xCFA);
238 if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 &&
239 pci_sanity_check(&pci_direct_conf2)) {
240 works = 1;
241 }
242
243 local_irq_restore(flags);
244
245 return works;
246}
247
248static int __init pci_direct_init(void)
249{
250 struct resource *region, *region2;
251
252 if ((pci_probe & PCI_PROBE_CONF1) == 0)
253 goto type2;
254 region = request_region(0xCF8, 8, "PCI conf1");
255 if (!region)
256 goto type2;
257
258 if (pci_check_type1()) {
259 printk(KERN_INFO "PCI: Using configuration type 1\n");
260 raw_pci_ops = &pci_direct_conf1;
261 return 0;
262 }
263 release_resource(region);
264
265 type2:
266 if ((pci_probe & PCI_PROBE_CONF2) == 0)
267 goto out;
268 region = request_region(0xCF8, 4, "PCI conf2");
269 if (!region)
270 goto out;
271 region2 = request_region(0xC000, 0x1000, "PCI conf2");
272 if (!region2)
273 goto fail2;
274
275 if (pci_check_type2()) {
276 printk(KERN_INFO "PCI: Using configuration type 2\n");
277 raw_pci_ops = &pci_direct_conf2;
278 return 0;
279 }
280
281 release_resource(region2);
282 fail2:
283 release_resource(region);
284
285 out:
286 return 0;
287}
288
289arch_initcall(pci_direct_init);
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
new file mode 100644
index 000000000000..be52c5ac4e05
--- /dev/null
+++ b/arch/i386/pci/fixup.c
@@ -0,0 +1,386 @@
1/*
2 * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
3 */
4
5#include <linux/pci.h>
6#include <linux/init.h>
7#include "pci.h"
8
9
10static void __devinit pci_fixup_i450nx(struct pci_dev *d)
11{
12 /*
13 * i450NX -- Find and scan all secondary buses on all PXB's.
14 */
15 int pxb, reg;
16 u8 busno, suba, subb;
17
18 printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
19 reg = 0xd0;
20 for(pxb=0; pxb<2; pxb++) {
21 pci_read_config_byte(d, reg++, &busno);
22 pci_read_config_byte(d, reg++, &suba);
23 pci_read_config_byte(d, reg++, &subb);
24 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
25 if (busno)
26 pci_scan_bus(busno, &pci_root_ops, NULL); /* Bus A */
27 if (suba < subb)
28 pci_scan_bus(suba+1, &pci_root_ops, NULL); /* Bus B */
29 }
30 pcibios_last_bus = -1;
31}
32DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
33
34static void __devinit pci_fixup_i450gx(struct pci_dev *d)
35{
36 /*
37 * i450GX and i450KX -- Find and scan all secondary buses.
38 * (called separately for each PCI bridge found)
39 */
40 u8 busno;
41 pci_read_config_byte(d, 0x4a, &busno);
42 printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno);
43 pci_scan_bus(busno, &pci_root_ops, NULL);
44 pcibios_last_bus = -1;
45}
46DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
47
48static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
49{
50 /*
51 * UM8886BF IDE controller sets region type bits incorrectly,
52 * therefore they look like memory despite of them being I/O.
53 */
54 int i;
55
56 printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d));
57 for(i=0; i<4; i++)
58 d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
59}
60DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
61
62static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
63{
64 /*
65 * NCR 53C810 returns class code 0 (at least on some systems).
66 * Fix class to be PCI_CLASS_STORAGE_SCSI
67 */
68 if (!d->class) {
69 printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", pci_name(d));
70 d->class = PCI_CLASS_STORAGE_SCSI << 8;
71 }
72}
73DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810);
74
75static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
76{
77 int i;
78
79 /*
80 * PCI IDE controllers use non-standard I/O port decoding, respect it.
81 */
82 if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
83 return;
84 DBG("PCI: IDE base address fixup for %s\n", pci_name(d));
85 for(i=0; i<4; i++) {
86 struct resource *r = &d->resource[i];
87 if ((r->start & ~0x80) == 0x374) {
88 r->start |= 2;
89 r->end = r->start;
90 }
91 }
92}
93DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
94
95static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
96{
97 int i;
98
99 /*
100 * Runs the fixup only for the first IDE controller
101 * (Shai Fultheim - shai@ftcon.com)
102 */
103 static int called = 0;
104 if (called)
105 return;
106 called = 1;
107
108 /*
109 * There exist PCI IDE controllers which have utter garbage
110 * in first four base registers. Ignore that.
111 */
112 DBG("PCI: IDE base address trash cleared for %s\n", pci_name(d));
113 for(i=0; i<4; i++)
114 d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
115}
116DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash);
117DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, pci_fixup_ide_trash);
118DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_11, pci_fixup_ide_trash);
119DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_9, pci_fixup_ide_trash);
120
121static void __devinit pci_fixup_latency(struct pci_dev *d)
122{
123 /*
124 * SiS 5597 and 5598 chipsets require latency timer set to
125 * at most 32 to avoid lockups.
126 */
127 DBG("PCI: Setting max latency to 32\n");
128 pcibios_max_latency = 32;
129}
130DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);
131DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency);
132
133static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
134{
135 /*
136 * PIIX4 ACPI device: hardwired IRQ9
137 */
138 d->irq = 9;
139}
140DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi);
141
142/*
143 * Addresses issues with problems in the memory write queue timer in
144 * certain VIA Northbridges. This bugfix is per VIA's specifications,
145 * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
146 * to trigger a bug in its integrated ProSavage video card, which
147 * causes screen corruption. We only clear bits 6 and 7 for that chipset,
148 * until VIA can provide us with definitive information on why screen
149 * corruption occurs, and what exactly those bits do.
150 *
151 * VIA 8363,8622,8361 Northbridges:
152 * - bits 5, 6, 7 at offset 0x55 need to be turned off
153 * VIA 8367 (KT266x) Northbridges:
154 * - bits 5, 6, 7 at offset 0x95 need to be turned off
155 * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
156 * - bits 6, 7 at offset 0x55 need to be turned off
157 */
158
159#define VIA_8363_KL133_REVISION_ID 0x81
160#define VIA_8363_KM133_REVISION_ID 0x84
161
162static void __devinit pci_fixup_via_northbridge_bug(struct pci_dev *d)
163{
164 u8 v;
165 u8 revision;
166 int where = 0x55;
167 int mask = 0x1f; /* clear bits 5, 6, 7 by default */
168
169 pci_read_config_byte(d, PCI_REVISION_ID, &revision);
170
171 if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
172 /* fix pci bus latency issues resulted by NB bios error
173 it appears on bug free^Wreduced kt266x's bios forces
174 NB latency to zero */
175 pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
176
177 where = 0x95; /* the memory write queue timer register is
178 different for the KT266x's: 0x95 not 0x55 */
179 } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
180 (revision == VIA_8363_KL133_REVISION_ID ||
181 revision == VIA_8363_KM133_REVISION_ID)) {
182 mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
183 causes screen corruption on the KL133/KM133 */
184 }
185
186 pci_read_config_byte(d, where, &v);
187 if (v & ~mask) {
188 printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
189 d->device, revision, where, v, mask, v & mask);
190 v &= mask;
191 pci_write_config_byte(d, where, v);
192 }
193}
194DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug);
195DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug);
196DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug);
197DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug);
198
199/*
200 * For some reasons Intel decided that certain parts of their
201 * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
202 * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
203 * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
204 * to Intel terminology. These devices do forward all addresses from
205 * system to PCI bus no matter what are their window settings, so they are
206 * "transparent" (or subtractive decoding) from programmers point of view.
207 */
208static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
209{
210 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
211 (dev->device & 0xff00) == 0x2400)
212 dev->transparent = 1;
213}
214DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge);
215
216/*
217 * Fixup for C1 Halt Disconnect problem on nForce2 systems.
218 *
219 * From information provided by "Allen Martin" <AMartin@nvidia.com>:
220 *
221 * A hang is caused when the CPU generates a very fast CONNECT/HALT cycle
222 * sequence. Workaround is to set the SYSTEM_IDLE_TIMEOUT to 80 ns.
223 * This allows the state-machine and timer to return to a proper state within
224 * 80 ns of the CONNECT and probe appearing together. Since the CPU will not
225 * issue another HALT within 80 ns of the initial HALT, the failure condition
226 * is avoided.
227 */
228static void __init pci_fixup_nforce2(struct pci_dev *dev)
229{
230 u32 val;
231
232 /*
233 * Chip Old value New value
234 * C17 0x1F0FFF01 0x1F01FF01
235 * C18D 0x9F0FFF01 0x9F01FF01
236 *
237 * Northbridge chip version may be determined by
238 * reading the PCI revision ID (0xC1 or greater is C18D).
239 */
240 pci_read_config_dword(dev, 0x6c, &val);
241
242 /*
243 * Apply fixup if needed, but don't touch disconnect state
244 */
245 if ((val & 0x00FF0000) != 0x00010000) {
246 printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n");
247 pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000);
248 }
249}
250DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2);
251
252/* Max PCI Express root ports */
253#define MAX_PCIEROOT 6
254static int quirk_aspm_offset[MAX_PCIEROOT << 3];
255
256#define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b)
257
258static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
259{
260 return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
261}
262
263/*
264 * Replace the original pci bus ops for write with a new one that will filter
265 * the request to insure ASPM cannot be enabled.
266 */
267static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
268{
269 u8 offset;
270
271 offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)];
272
273 if ((offset) && (where == offset))
274 value = value & 0xfffffffc;
275
276 return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
277}
278
279static struct pci_ops quirk_pcie_aspm_ops = {
280 .read = quirk_pcie_aspm_read,
281 .write = quirk_pcie_aspm_write,
282};
283
284/*
285 * Prevents PCI Express ASPM (Active State Power Management) being enabled.
286 *
287 * Save the register offset, where the ASPM control bits are located,
288 * for each PCI Express device that is in the device list of
289 * the root port in an array for fast indexing. Replace the bus ops
290 * with the modified one.
291 */
292static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
293{
294 int cap_base, i;
295 struct pci_bus *pbus;
296 struct pci_dev *dev;
297
298 if ((pbus = pdev->subordinate) == NULL)
299 return;
300
301 /*
302 * Check if the DID of pdev matches one of the six root ports. This
303 * check is needed in the case this function is called directly by the
304 * hot-plug driver.
305 */
306 if ((pdev->device < PCI_DEVICE_ID_INTEL_MCH_PA) ||
307 (pdev->device > PCI_DEVICE_ID_INTEL_MCH_PC1))
308 return;
309
310 if (list_empty(&pbus->devices)) {
311 /*
312 * If no device is attached to the root port at power-up or
313 * after hot-remove, the pbus->devices is empty and this code
314 * will set the offsets to zero and the bus ops to parent's bus
315 * ops, which is unmodified.
316 */
317 for (i= GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
318 quirk_aspm_offset[i] = 0;
319
320 pbus->ops = pbus->parent->ops;
321 } else {
322 /*
323 * If devices are attached to the root port at power-up or
324 * after hot-add, the code loops through the device list of
325 * each root port to save the register offsets and replace the
326 * bus ops.
327 */
328 list_for_each_entry(dev, &pbus->devices, bus_list) {
329 /* There are 0 to 8 devices attached to this bus */
330 cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP);
331 quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)]= cap_base + 0x10;
332 }
333 pbus->ops = &quirk_pcie_aspm_ops;
334 }
335}
336DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk );
337DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk );
338DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB, pcie_rootport_aspm_quirk );
339DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB1, pcie_rootport_aspm_quirk );
340DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_rootport_aspm_quirk );
341DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk );
342
343/*
344 * Fixup to mark boot BIOS video selected by BIOS before it changes
345 *
346 * From information provided by "Jon Smirl" <jonsmirl@gmail.com>
347 *
348 * The standard boot ROM sequence for an x86 machine uses the BIOS
349 * to select an initial video card for boot display. This boot video
350 * card will have it's BIOS copied to C0000 in system RAM.
351 * IORESOURCE_ROM_SHADOW is used to associate the boot video
352 * card with this copy. On laptops this copy has to be used since
353 * the main ROM may be compressed or combined with another image.
354 * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW
355 * is marked here since the boot video device will be the only enabled
356 * video device at this point.
357 */
358
359static void __devinit pci_fixup_video(struct pci_dev *pdev)
360{
361 struct pci_dev *bridge;
362 struct pci_bus *bus;
363 u16 config;
364
365 if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
366 return;
367
368 /* Is VGA routed to us? */
369 bus = pdev->bus;
370 while (bus) {
371 bridge = bus->self;
372 if (bridge) {
373 pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
374 &config);
375 if (!(config & PCI_BRIDGE_CTL_VGA))
376 return;
377 }
378 bus = bus->parent;
379 }
380 pci_read_config_word(pdev, PCI_COMMAND, &config);
381 if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
382 pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
383 printk(KERN_DEBUG "Boot video device is %s\n", pci_name(pdev));
384 }
385}
386DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c
new file mode 100644
index 000000000000..c205ea7e233b
--- /dev/null
+++ b/arch/i386/pci/i386.c
@@ -0,0 +1,304 @@
1/*
2 * Low-Level PCI Access for i386 machines
3 *
4 * Copyright 1993, 1994 Drew Eckhardt
5 * Visionary Computing
6 * (Unix and Linux consulting and custom programming)
7 * Drew@Colorado.EDU
8 * +1 (303) 786-7975
9 *
10 * Drew's work was sponsored by:
11 * iX Multiuser Multitasking Magazine
12 * Hannover, Germany
13 * hm@ix.de
14 *
15 * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
16 *
17 * For more information, please consult the following manuals (look at
18 * http://www.pcisig.com/ for how to get them):
19 *
20 * PCI BIOS Specification
21 * PCI Local Bus Specification
22 * PCI to PCI Bridge Specification
23 * PCI System Design Guide
24 *
25 */
26
27#include <linux/types.h>
28#include <linux/kernel.h>
29#include <linux/pci.h>
30#include <linux/init.h>
31#include <linux/ioport.h>
32#include <linux/errno.h>
33
34#include "pci.h"
35
36/*
37 * We need to avoid collisions with `mirrored' VGA ports
38 * and other strange ISA hardware, so we always want the
39 * addresses to be allocated in the 0x000-0x0ff region
40 * modulo 0x400.
41 *
42 * Why? Because some silly external IO cards only decode
43 * the low 10 bits of the IO address. The 0x00-0xff region
44 * is reserved for motherboard devices that decode all 16
45 * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
46 * but we want to try to avoid allocating at 0x2900-0x2bff
47 * which might have be mirrored at 0x0100-0x03ff..
48 */
49void
50pcibios_align_resource(void *data, struct resource *res,
51 unsigned long size, unsigned long align)
52{
53 if (res->flags & IORESOURCE_IO) {
54 unsigned long start = res->start;
55
56 if (start & 0x300) {
57 start = (start + 0x3ff) & ~0x3ff;
58 res->start = start;
59 }
60 }
61}
62
63
64/*
65 * Handle resources of PCI devices. If the world were perfect, we could
66 * just allocate all the resource regions and do nothing more. It isn't.
67 * On the other hand, we cannot just re-allocate all devices, as it would
68 * require us to know lots of host bridge internals. So we attempt to
69 * keep as much of the original configuration as possible, but tweak it
70 * when it's found to be wrong.
71 *
72 * Known BIOS problems we have to work around:
73 * - I/O or memory regions not configured
74 * - regions configured, but not enabled in the command register
75 * - bogus I/O addresses above 64K used
76 * - expansion ROMs left enabled (this may sound harmless, but given
77 * the fact the PCI specs explicitly allow address decoders to be
78 * shared between expansion ROMs and other resource regions, it's
79 * at least dangerous)
80 *
81 * Our solution:
82 * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
83 * This gives us fixed barriers on where we can allocate.
84 * (2) Allocate resources for all enabled devices. If there is
85 * a collision, just mark the resource as unallocated. Also
86 * disable expansion ROMs during this step.
87 * (3) Try to allocate resources for disabled devices. If the
88 * resources were assigned correctly, everything goes well,
89 * if they weren't, they won't disturb allocation of other
90 * resources.
91 * (4) Assign new addresses to resources which were either
92 * not configured at all or misconfigured. If explicitly
93 * requested by the user, configure expansion ROM address
94 * as well.
95 */
96
97static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
98{
99 struct pci_bus *bus;
100 struct pci_dev *dev;
101 int idx;
102 struct resource *r, *pr;
103
104 /* Depth-First Search on bus tree */
105 list_for_each_entry(bus, bus_list, node) {
106 if ((dev = bus->self)) {
107 for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
108 r = &dev->resource[idx];
109 if (!r->start)
110 continue;
111 pr = pci_find_parent_resource(dev, r);
112 if (!pr || request_resource(pr, r) < 0)
113 printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, pci_name(dev));
114 }
115 }
116 pcibios_allocate_bus_resources(&bus->children);
117 }
118}
119
120static void __init pcibios_allocate_resources(int pass)
121{
122 struct pci_dev *dev = NULL;
123 int idx, disabled;
124 u16 command;
125 struct resource *r, *pr;
126
127 for_each_pci_dev(dev) {
128 pci_read_config_word(dev, PCI_COMMAND, &command);
129 for(idx = 0; idx < 6; idx++) {
130 r = &dev->resource[idx];
131 if (r->parent) /* Already allocated */
132 continue;
133 if (!r->start) /* Address not assigned at all */
134 continue;
135 if (r->flags & IORESOURCE_IO)
136 disabled = !(command & PCI_COMMAND_IO);
137 else
138 disabled = !(command & PCI_COMMAND_MEMORY);
139 if (pass == disabled) {
140 DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
141 r->start, r->end, r->flags, disabled, pass);
142 pr = pci_find_parent_resource(dev, r);
143 if (!pr || request_resource(pr, r) < 0) {
144 printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, pci_name(dev));
145 /* We'll assign a new address later */
146 r->end -= r->start;
147 r->start = 0;
148 }
149 }
150 }
151 if (!pass) {
152 r = &dev->resource[PCI_ROM_RESOURCE];
153 if (r->flags & IORESOURCE_ROM_ENABLE) {
154 /* Turn the ROM off, leave the resource region, but keep it unregistered. */
155 u32 reg;
156 DBG("PCI: Switching off ROM of %s\n", pci_name(dev));
157 r->flags &= ~IORESOURCE_ROM_ENABLE;
158 pci_read_config_dword(dev, dev->rom_base_reg, &reg);
159 pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
160 }
161 }
162 }
163}
164
165static int __init pcibios_assign_resources(void)
166{
167 struct pci_dev *dev = NULL;
168 int idx;
169 struct resource *r;
170
171 for_each_pci_dev(dev) {
172 int class = dev->class >> 8;
173
174 /* Don't touch classless devices and host bridges */
175 if (!class || class == PCI_CLASS_BRIDGE_HOST)
176 continue;
177
178 for(idx=0; idx<6; idx++) {
179 r = &dev->resource[idx];
180
181 /*
182 * Don't touch IDE controllers and I/O ports of video cards!
183 */
184 if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
185 (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
186 continue;
187
188 /*
189 * We shall assign a new address to this resource, either because
190 * the BIOS forgot to do so or because we have decided the old
191 * address was unusable for some reason.
192 */
193 if (!r->start && r->end)
194 pci_assign_resource(dev, idx);
195 }
196
197 if (pci_probe & PCI_ASSIGN_ROMS) {
198 r = &dev->resource[PCI_ROM_RESOURCE];
199 r->end -= r->start;
200 r->start = 0;
201 if (r->end)
202 pci_assign_resource(dev, PCI_ROM_RESOURCE);
203 }
204 }
205 return 0;
206}
207
208void __init pcibios_resource_survey(void)
209{
210 DBG("PCI: Allocating resources\n");
211 pcibios_allocate_bus_resources(&pci_root_buses);
212 pcibios_allocate_resources(0);
213 pcibios_allocate_resources(1);
214}
215
216/**
217 * called in fs_initcall (one below subsys_initcall),
218 * give a chance for motherboard reserve resources
219 */
220fs_initcall(pcibios_assign_resources);
221
222int pcibios_enable_resources(struct pci_dev *dev, int mask)
223{
224 u16 cmd, old_cmd;
225 int idx;
226 struct resource *r;
227
228 pci_read_config_word(dev, PCI_COMMAND, &cmd);
229 old_cmd = cmd;
230 for(idx=0; idx<6; idx++) {
231 /* Only set up the requested stuff */
232 if (!(mask & (1<<idx)))
233 continue;
234
235 r = &dev->resource[idx];
236 if (!r->start && r->end) {
237 printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
238 return -EINVAL;
239 }
240 if (r->flags & IORESOURCE_IO)
241 cmd |= PCI_COMMAND_IO;
242 if (r->flags & IORESOURCE_MEM)
243 cmd |= PCI_COMMAND_MEMORY;
244 }
245 if (dev->resource[PCI_ROM_RESOURCE].start)
246 cmd |= PCI_COMMAND_MEMORY;
247 if (cmd != old_cmd) {
248 printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
249 pci_write_config_word(dev, PCI_COMMAND, cmd);
250 }
251 return 0;
252}
253
254/*
255 * If we set up a device for bus mastering, we need to check the latency
256 * timer as certain crappy BIOSes forget to set it properly.
257 */
258unsigned int pcibios_max_latency = 255;
259
260void pcibios_set_master(struct pci_dev *dev)
261{
262 u8 lat;
263 pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
264 if (lat < 16)
265 lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
266 else if (lat > pcibios_max_latency)
267 lat = pcibios_max_latency;
268 else
269 return;
270 printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", pci_name(dev), lat);
271 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
272}
273
274int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
275 enum pci_mmap_state mmap_state, int write_combine)
276{
277 unsigned long prot;
278
279 /* I/O space cannot be accessed via normal processor loads and
280 * stores on this platform.
281 */
282 if (mmap_state == pci_mmap_io)
283 return -EINVAL;
284
285 /* Leave vm_pgoff as-is, the PCI space address is the physical
286 * address on this platform.
287 */
288 vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
289
290 prot = pgprot_val(vma->vm_page_prot);
291 if (boot_cpu_data.x86 > 3)
292 prot |= _PAGE_PCD | _PAGE_PWT;
293 vma->vm_page_prot = __pgprot(prot);
294
295 /* Write-combine setting is ignored, it is changed via the mtrr
296 * interfaces on this platform.
297 */
298 if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
299 vma->vm_end - vma->vm_start,
300 vma->vm_page_prot))
301 return -EAGAIN;
302
303 return 0;
304}
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
new file mode 100644
index 000000000000..1128451b5d74
--- /dev/null
+++ b/arch/i386/pci/irq.c
@@ -0,0 +1,1119 @@
1/*
2 * Low-Level PCI Support for PC -- Routing of Interrupts
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 */
6
7#include <linux/config.h>
8#include <linux/types.h>
9#include <linux/kernel.h>
10#include <linux/pci.h>
11#include <linux/init.h>
12#include <linux/slab.h>
13#include <linux/interrupt.h>
14#include <linux/irq.h>
15#include <linux/dmi.h>
16#include <asm/io.h>
17#include <asm/smp.h>
18#include <asm/io_apic.h>
19#include <asm/hw_irq.h>
20#include <linux/acpi.h>
21
22#include "pci.h"
23
24#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
25#define PIRQ_VERSION 0x0100
26
27static int broken_hp_bios_irq9;
28static int acer_tm360_irqrouting;
29
30static struct irq_routing_table *pirq_table;
31
32static int pirq_enable_irq(struct pci_dev *dev);
33
34/*
35 * Never use: 0, 1, 2 (timer, keyboard, and cascade)
36 * Avoid using: 13, 14 and 15 (FP error and IDE).
37 * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
38 */
39unsigned int pcibios_irq_mask = 0xfff8;
40
41static int pirq_penalty[16] = {
42 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
43 0, 0, 0, 0, 1000, 100000, 100000, 100000
44};
45
46struct irq_router {
47 char *name;
48 u16 vendor, device;
49 int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
50 int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
51};
52
53struct irq_router_handler {
54 u16 vendor;
55 int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
56};
57
58int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
59
60/*
61 * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
62 */
63
64static struct irq_routing_table * __init pirq_find_routing_table(void)
65{
66 u8 *addr;
67 struct irq_routing_table *rt;
68 int i;
69 u8 sum;
70
71 for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
72 rt = (struct irq_routing_table *) addr;
73 if (rt->signature != PIRQ_SIGNATURE ||
74 rt->version != PIRQ_VERSION ||
75 rt->size % 16 ||
76 rt->size < sizeof(struct irq_routing_table))
77 continue;
78 sum = 0;
79 for(i=0; i<rt->size; i++)
80 sum += addr[i];
81 if (!sum) {
82 DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
83 return rt;
84 }
85 }
86 return NULL;
87}
88
89/*
90 * If we have a IRQ routing table, use it to search for peer host
91 * bridges. It's a gross hack, but since there are no other known
92 * ways how to get a list of buses, we have to go this way.
93 */
94
95static void __init pirq_peer_trick(void)
96{
97 struct irq_routing_table *rt = pirq_table;
98 u8 busmap[256];
99 int i;
100 struct irq_info *e;
101
102 memset(busmap, 0, sizeof(busmap));
103 for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
104 e = &rt->slots[i];
105#ifdef DEBUG
106 {
107 int j;
108 DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
109 for(j=0; j<4; j++)
110 DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
111 DBG("\n");
112 }
113#endif
114 busmap[e->bus] = 1;
115 }
116 for(i = 1; i < 256; i++) {
117 if (!busmap[i] || pci_find_bus(0, i))
118 continue;
119 if (pci_scan_bus(i, &pci_root_ops, NULL))
120 printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
121 }
122 pcibios_last_bus = -1;
123}
124
125/*
126 * Code for querying and setting of IRQ routes on various interrupt routers.
127 */
128
129void eisa_set_level_irq(unsigned int irq)
130{
131 unsigned char mask = 1 << (irq & 7);
132 unsigned int port = 0x4d0 + (irq >> 3);
133 unsigned char val;
134 static u16 eisa_irq_mask;
135
136 if (irq >= 16 || (1 << irq) & eisa_irq_mask)
137 return;
138
139 eisa_irq_mask |= (1 << irq);
140 printk("PCI: setting IRQ %u as level-triggered\n", irq);
141 val = inb(port);
142 if (!(val & mask)) {
143 DBG(" -> edge");
144 outb(val | mask, port);
145 }
146}
147
148/*
149 * Common IRQ routing practice: nybbles in config space,
150 * offset by some magic constant.
151 */
152static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
153{
154 u8 x;
155 unsigned reg = offset + (nr >> 1);
156
157 pci_read_config_byte(router, reg, &x);
158 return (nr & 1) ? (x >> 4) : (x & 0xf);
159}
160
161static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
162{
163 u8 x;
164 unsigned reg = offset + (nr >> 1);
165
166 pci_read_config_byte(router, reg, &x);
167 x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
168 pci_write_config_byte(router, reg, x);
169}
170
171/*
172 * ALI pirq entries are damn ugly, and completely undocumented.
173 * This has been figured out from pirq tables, and it's not a pretty
174 * picture.
175 */
176static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
177{
178 static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
179
180 return irqmap[read_config_nybble(router, 0x48, pirq-1)];
181}
182
183static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
184{
185 static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
186 unsigned int val = irqmap[irq];
187
188 if (val) {
189 write_config_nybble(router, 0x48, pirq-1, val);
190 return 1;
191 }
192 return 0;
193}
194
195/*
196 * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
197 * just a pointer to the config space.
198 */
199static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
200{
201 u8 x;
202
203 pci_read_config_byte(router, pirq, &x);
204 return (x < 16) ? x : 0;
205}
206
207static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
208{
209 pci_write_config_byte(router, pirq, irq);
210 return 1;
211}
212
213/*
214 * The VIA pirq rules are nibble-based, like ALI,
215 * but without the ugly irq number munging.
216 * However, PIRQD is in the upper instead of lower 4 bits.
217 */
218static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
219{
220 return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq);
221}
222
223static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
224{
225 write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq);
226 return 1;
227}
228
229/*
230 * ITE 8330G pirq rules are nibble-based
231 * FIXME: pirqmap may be { 1, 0, 3, 2 },
232 * 2+3 are both mapped to irq 9 on my system
233 */
234static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
235{
236 static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
237 return read_config_nybble(router,0x43, pirqmap[pirq-1]);
238}
239
240static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
241{
242 static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
243 write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
244 return 1;
245}
246
247/*
248 * OPTI: high four bits are nibble pointer..
249 * I wonder what the low bits do?
250 */
251static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
252{
253 return read_config_nybble(router, 0xb8, pirq >> 4);
254}
255
256static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
257{
258 write_config_nybble(router, 0xb8, pirq >> 4, irq);
259 return 1;
260}
261
262/*
263 * Cyrix: nibble offset 0x5C
264 * 0x5C bits 7:4 is INTB bits 3:0 is INTA
265 * 0x5D bits 7:4 is INTD bits 3:0 is INTC
266 */
267static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
268{
269 return read_config_nybble(router, 0x5C, (pirq-1)^1);
270}
271
272static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
273{
274 write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
275 return 1;
276}
277
278/*
279 * PIRQ routing for SiS 85C503 router used in several SiS chipsets.
280 * We have to deal with the following issues here:
281 * - vendors have different ideas about the meaning of link values
282 * - some onboard devices (integrated in the chipset) have special
283 * links and are thus routed differently (i.e. not via PCI INTA-INTD)
284 * - different revision of the router have a different layout for
285 * the routing registers, particularly for the onchip devices
286 *
287 * For all routing registers the common thing is we have one byte
288 * per routeable link which is defined as:
289 * bit 7 IRQ mapping enabled (0) or disabled (1)
290 * bits [6:4] reserved (sometimes used for onchip devices)
291 * bits [3:0] IRQ to map to
292 * allowed: 3-7, 9-12, 14-15
293 * reserved: 0, 1, 2, 8, 13
294 *
295 * The config-space registers located at 0x41/0x42/0x43/0x44 are
296 * always used to route the normal PCI INT A/B/C/D respectively.
297 * Apparently there are systems implementing PCI routing table using
298 * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
299 * We try our best to handle both link mappings.
300 *
301 * Currently (2003-05-21) it appears most SiS chipsets follow the
302 * definition of routing registers from the SiS-5595 southbridge.
303 * According to the SiS 5595 datasheets the revision id's of the
304 * router (ISA-bridge) should be 0x01 or 0xb0.
305 *
306 * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1.
307 * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets.
308 * They seem to work with the current routing code. However there is
309 * some concern because of the two USB-OHCI HCs (original SiS 5595
310 * had only one). YMMV.
311 *
312 * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1:
313 *
314 * 0x61: IDEIRQ:
315 * bits [6:5] must be written 01
316 * bit 4 channel-select primary (0), secondary (1)
317 *
318 * 0x62: USBIRQ:
319 * bit 6 OHCI function disabled (0), enabled (1)
320 *
321 * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
322 *
323 * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
324 *
325 * We support USBIRQ (in addition to INTA-INTD) and keep the
326 * IDE, ACPI and DAQ routing untouched as set by the BIOS.
327 *
328 * Currently the only reported exception is the new SiS 65x chipset
329 * which includes the SiS 69x southbridge. Here we have the 85C503
330 * router revision 0x04 and there are changes in the register layout
331 * mostly related to the different USB HCs with USB 2.0 support.
332 *
333 * Onchip routing for router rev-id 0x04 (try-and-error observation)
334 *
335 * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs
336 * bit 6-4 are probably unused, not like 5595
337 */
338
339#define PIRQ_SIS_IRQ_MASK 0x0f
340#define PIRQ_SIS_IRQ_DISABLE 0x80
341#define PIRQ_SIS_USB_ENABLE 0x40
342
343static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
344{
345 u8 x;
346 int reg;
347
348 reg = pirq;
349 if (reg >= 0x01 && reg <= 0x04)
350 reg += 0x40;
351 pci_read_config_byte(router, reg, &x);
352 return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
353}
354
355static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
356{
357 u8 x;
358 int reg;
359
360 reg = pirq;
361 if (reg >= 0x01 && reg <= 0x04)
362 reg += 0x40;
363 pci_read_config_byte(router, reg, &x);
364 x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE);
365 x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE;
366 pci_write_config_byte(router, reg, x);
367 return 1;
368}
369
370
371/*
372 * VLSI: nibble offset 0x74 - educated guess due to routing table and
373 * config space of VLSI 82C534 PCI-bridge/router (1004:0102)
374 * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
375 * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
376 * for the busbridge to the docking station.
377 */
378
379static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
380{
381 if (pirq > 8) {
382 printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
383 return 0;
384 }
385 return read_config_nybble(router, 0x74, pirq-1);
386}
387
388static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
389{
390 if (pirq > 8) {
391 printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
392 return 0;
393 }
394 write_config_nybble(router, 0x74, pirq-1, irq);
395 return 1;
396}
397
398/*
399 * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
400 * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register
401 * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect
402 * register is a straight binary coding of desired PIC IRQ (low nibble).
403 *
404 * The 'link' value in the PIRQ table is already in the correct format
405 * for the Index register. There are some special index values:
406 * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
407 * and 0x03 for SMBus.
408 */
409static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
410{
411 outb_p(pirq, 0xc00);
412 return inb(0xc01) & 0xf;
413}
414
415static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
416{
417 outb_p(pirq, 0xc00);
418 outb_p(irq, 0xc01);
419 return 1;
420}
421
422/* Support for AMD756 PCI IRQ Routing
423 * Jhon H. Caicedo <jhcaiced@osso.org.co>
424 * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
425 * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
426 * The AMD756 pirq rules are nibble-based
427 * offset 0x56 0-3 PIRQA 4-7 PIRQB
428 * offset 0x57 0-3 PIRQC 4-7 PIRQD
429 */
430static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
431{
432 u8 irq;
433 irq = 0;
434 if (pirq <= 4)
435 {
436 irq = read_config_nybble(router, 0x56, pirq - 1);
437 }
438 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
439 dev->vendor, dev->device, pirq, irq);
440 return irq;
441}
442
443static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
444{
445 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
446 dev->vendor, dev->device, pirq, irq);
447 if (pirq <= 4)
448 {
449 write_config_nybble(router, 0x56, pirq - 1, irq);
450 }
451 return 1;
452}
453
454#ifdef CONFIG_PCI_BIOS
455
456static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
457{
458 struct pci_dev *bridge;
459 int pin = pci_get_interrupt_pin(dev, &bridge);
460 return pcibios_set_irq_routing(bridge, pin, irq);
461}
462
463#endif
464
465static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
466{
467 static struct pci_device_id pirq_440gx[] = {
468 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) },
469 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) },
470 { },
471 };
472
473 /* 440GX has a proprietary PIRQ router -- don't use it */
474 if (pci_dev_present(pirq_440gx))
475 return 0;
476
477 switch(device)
478 {
479 case PCI_DEVICE_ID_INTEL_82371FB_0:
480 case PCI_DEVICE_ID_INTEL_82371SB_0:
481 case PCI_DEVICE_ID_INTEL_82371AB_0:
482 case PCI_DEVICE_ID_INTEL_82371MX:
483 case PCI_DEVICE_ID_INTEL_82443MX_0:
484 case PCI_DEVICE_ID_INTEL_82801AA_0:
485 case PCI_DEVICE_ID_INTEL_82801AB_0:
486 case PCI_DEVICE_ID_INTEL_82801BA_0:
487 case PCI_DEVICE_ID_INTEL_82801BA_10:
488 case PCI_DEVICE_ID_INTEL_82801CA_0:
489 case PCI_DEVICE_ID_INTEL_82801CA_12:
490 case PCI_DEVICE_ID_INTEL_82801DB_0:
491 case PCI_DEVICE_ID_INTEL_82801E_0:
492 case PCI_DEVICE_ID_INTEL_82801EB_0:
493 case PCI_DEVICE_ID_INTEL_ESB_1:
494 case PCI_DEVICE_ID_INTEL_ICH6_0:
495 case PCI_DEVICE_ID_INTEL_ICH6_1:
496 case PCI_DEVICE_ID_INTEL_ICH7_0:
497 case PCI_DEVICE_ID_INTEL_ICH7_1:
498 r->name = "PIIX/ICH";
499 r->get = pirq_piix_get;
500 r->set = pirq_piix_set;
501 return 1;
502 }
503 return 0;
504}
505
506static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
507{
508 /* FIXME: We should move some of the quirk fixup stuff here */
509 switch(device)
510 {
511 case PCI_DEVICE_ID_VIA_82C586_0:
512 case PCI_DEVICE_ID_VIA_82C596:
513 case PCI_DEVICE_ID_VIA_82C686:
514 case PCI_DEVICE_ID_VIA_8231:
515 /* FIXME: add new ones for 8233/5 */
516 r->name = "VIA";
517 r->get = pirq_via_get;
518 r->set = pirq_via_set;
519 return 1;
520 }
521 return 0;
522}
523
524static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
525{
526 switch(device)
527 {
528 case PCI_DEVICE_ID_VLSI_82C534:
529 r->name = "VLSI 82C534";
530 r->get = pirq_vlsi_get;
531 r->set = pirq_vlsi_set;
532 return 1;
533 }
534 return 0;
535}
536
537
538static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
539{
540 switch(device)
541 {
542 case PCI_DEVICE_ID_SERVERWORKS_OSB4:
543 case PCI_DEVICE_ID_SERVERWORKS_CSB5:
544 r->name = "ServerWorks";
545 r->get = pirq_serverworks_get;
546 r->set = pirq_serverworks_set;
547 return 1;
548 }
549 return 0;
550}
551
552static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
553{
554 if (device != PCI_DEVICE_ID_SI_503)
555 return 0;
556
557 r->name = "SIS";
558 r->get = pirq_sis_get;
559 r->set = pirq_sis_set;
560 return 1;
561}
562
563static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
564{
565 switch(device)
566 {
567 case PCI_DEVICE_ID_CYRIX_5520:
568 r->name = "NatSemi";
569 r->get = pirq_cyrix_get;
570 r->set = pirq_cyrix_set;
571 return 1;
572 }
573 return 0;
574}
575
576static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
577{
578 switch(device)
579 {
580 case PCI_DEVICE_ID_OPTI_82C700:
581 r->name = "OPTI";
582 r->get = pirq_opti_get;
583 r->set = pirq_opti_set;
584 return 1;
585 }
586 return 0;
587}
588
589static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
590{
591 switch(device)
592 {
593 case PCI_DEVICE_ID_ITE_IT8330G_0:
594 r->name = "ITE";
595 r->get = pirq_ite_get;
596 r->set = pirq_ite_set;
597 return 1;
598 }
599 return 0;
600}
601
602static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
603{
604 switch(device)
605 {
606 case PCI_DEVICE_ID_AL_M1533:
607 case PCI_DEVICE_ID_AL_M1563:
608 printk("PCI: Using ALI IRQ Router\n");
609 r->name = "ALI";
610 r->get = pirq_ali_get;
611 r->set = pirq_ali_set;
612 return 1;
613 }
614 return 0;
615}
616
617static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
618{
619 switch(device)
620 {
621 case PCI_DEVICE_ID_AMD_VIPER_740B:
622 r->name = "AMD756";
623 break;
624 case PCI_DEVICE_ID_AMD_VIPER_7413:
625 r->name = "AMD766";
626 break;
627 case PCI_DEVICE_ID_AMD_VIPER_7443:
628 r->name = "AMD768";
629 break;
630 default:
631 return 0;
632 }
633 r->get = pirq_amd756_get;
634 r->set = pirq_amd756_set;
635 return 1;
636}
637
638static __initdata struct irq_router_handler pirq_routers[] = {
639 { PCI_VENDOR_ID_INTEL, intel_router_probe },
640 { PCI_VENDOR_ID_AL, ali_router_probe },
641 { PCI_VENDOR_ID_ITE, ite_router_probe },
642 { PCI_VENDOR_ID_VIA, via_router_probe },
643 { PCI_VENDOR_ID_OPTI, opti_router_probe },
644 { PCI_VENDOR_ID_SI, sis_router_probe },
645 { PCI_VENDOR_ID_CYRIX, cyrix_router_probe },
646 { PCI_VENDOR_ID_VLSI, vlsi_router_probe },
647 { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
648 { PCI_VENDOR_ID_AMD, amd_router_probe },
649 /* Someone with docs needs to add the ATI Radeon IGP */
650 { 0, NULL }
651};
652static struct irq_router pirq_router;
653static struct pci_dev *pirq_router_dev;
654
655
656/*
657 * FIXME: should we have an option to say "generic for
658 * chipset" ?
659 */
660
661static void __init pirq_find_router(struct irq_router *r)
662{
663 struct irq_routing_table *rt = pirq_table;
664 struct irq_router_handler *h;
665
666#ifdef CONFIG_PCI_BIOS
667 if (!rt->signature) {
668 printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
669 r->set = pirq_bios_set;
670 r->name = "BIOS";
671 return;
672 }
673#endif
674
675 /* Default unless a driver reloads it */
676 r->name = "default";
677 r->get = NULL;
678 r->set = NULL;
679
680 DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
681 rt->rtr_vendor, rt->rtr_device);
682
683 pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
684 if (!pirq_router_dev) {
685 DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
686 return;
687 }
688
689 for( h = pirq_routers; h->vendor; h++) {
690 /* First look for a router match */
691 if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
692 break;
693 /* Fall back to a device match */
694 if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
695 break;
696 }
697 printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
698 pirq_router.name,
699 pirq_router_dev->vendor,
700 pirq_router_dev->device,
701 pci_name(pirq_router_dev));
702}
703
704static struct irq_info *pirq_get_info(struct pci_dev *dev)
705{
706 struct irq_routing_table *rt = pirq_table;
707 int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
708 struct irq_info *info;
709
710 for (info = rt->slots; entries--; info++)
711 if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
712 return info;
713 return NULL;
714}
715
716static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
717{
718 u8 pin;
719 struct irq_info *info;
720 int i, pirq, newirq;
721 int irq = 0;
722 u32 mask;
723 struct irq_router *r = &pirq_router;
724 struct pci_dev *dev2 = NULL;
725 char *msg = NULL;
726
727 /* Find IRQ pin */
728 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
729 if (!pin) {
730 DBG(" -> no interrupt pin\n");
731 return 0;
732 }
733 pin = pin - 1;
734
735 /* Find IRQ routing entry */
736
737 if (!pirq_table)
738 return 0;
739
740 DBG("IRQ for %s[%c]", pci_name(dev), 'A' + pin);
741 info = pirq_get_info(dev);
742 if (!info) {
743 DBG(" -> not found in routing table\n");
744 return 0;
745 }
746 pirq = info->irq[pin].link;
747 mask = info->irq[pin].bitmap;
748 if (!pirq) {
749 DBG(" -> not routed\n");
750 return 0;
751 }
752 DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
753 mask &= pcibios_irq_mask;
754
755 /* Work around broken HP Pavilion Notebooks which assign USB to
756 IRQ 9 even though it is actually wired to IRQ 11 */
757
758 if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
759 dev->irq = 11;
760 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
761 r->set(pirq_router_dev, dev, pirq, 11);
762 }
763
764 /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
765 if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
766 pirq = 0x68;
767 mask = 0x400;
768 dev->irq = r->get(pirq_router_dev, dev, pirq);
769 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
770 }
771
772 /*
773 * Find the best IRQ to assign: use the one
774 * reported by the device if possible.
775 */
776 newirq = dev->irq;
777 if (!((1 << newirq) & mask)) {
778 if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0;
779 else printk(KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n", newirq, pci_name(dev));
780 }
781 if (!newirq && assign) {
782 for (i = 0; i < 16; i++) {
783 if (!(mask & (1 << i)))
784 continue;
785 if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, SA_SHIRQ))
786 newirq = i;
787 }
788 }
789 DBG(" -> newirq=%d", newirq);
790
791 /* Check if it is hardcoded */
792 if ((pirq & 0xf0) == 0xf0) {
793 irq = pirq & 0xf;
794 DBG(" -> hardcoded IRQ %d\n", irq);
795 msg = "Hardcoded";
796 } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
797 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) {
798 DBG(" -> got IRQ %d\n", irq);
799 msg = "Found";
800 } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
801 DBG(" -> assigning IRQ %d", newirq);
802 if (r->set(pirq_router_dev, dev, pirq, newirq)) {
803 eisa_set_level_irq(newirq);
804 DBG(" ... OK\n");
805 msg = "Assigned";
806 irq = newirq;
807 }
808 }
809
810 if (!irq) {
811 DBG(" ... failed\n");
812 if (newirq && mask == (1 << newirq)) {
813 msg = "Guessed";
814 irq = newirq;
815 } else
816 return 0;
817 }
818 printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
819
820 /* Update IRQ for all devices with the same pirq value */
821 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
822 pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
823 if (!pin)
824 continue;
825 pin--;
826 info = pirq_get_info(dev2);
827 if (!info)
828 continue;
829 if (info->irq[pin].link == pirq) {
830 /* We refuse to override the dev->irq information. Give a warning! */
831 if ( dev2->irq && dev2->irq != irq && \
832 (!(pci_probe & PCI_USE_PIRQ_MASK) || \
833 ((1 << dev2->irq) & mask)) ) {
834#ifndef CONFIG_PCI_MSI
835 printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
836 pci_name(dev2), dev2->irq, irq);
837#endif
838 continue;
839 }
840 dev2->irq = irq;
841 pirq_penalty[irq]++;
842 if (dev != dev2)
843 printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
844 }
845 }
846 return 1;
847}
848
849static void __init pcibios_fixup_irqs(void)
850{
851 struct pci_dev *dev = NULL;
852 u8 pin;
853
854 DBG("PCI: IRQ fixup\n");
855 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
856 /*
857 * If the BIOS has set an out of range IRQ number, just ignore it.
858 * Also keep track of which IRQ's are already in use.
859 */
860 if (dev->irq >= 16) {
861 DBG("%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
862 dev->irq = 0;
863 }
864 /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
865 if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
866 pirq_penalty[dev->irq] = 0;
867 pirq_penalty[dev->irq]++;
868 }
869
870 dev = NULL;
871 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
872 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
873#ifdef CONFIG_X86_IO_APIC
874 /*
875 * Recalculate IRQ numbers if we use the I/O APIC.
876 */
877 if (io_apic_assign_pci_irqs)
878 {
879 int irq;
880
881 if (pin) {
882 pin--; /* interrupt pins are numbered starting from 1 */
883 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
884 /*
885 * Busses behind bridges are typically not listed in the MP-table.
886 * In this case we have to look up the IRQ based on the parent bus,
887 * parent slot, and pin number. The SMP code detects such bridged
888 * busses itself so we should get into this branch reliably.
889 */
890 if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
891 struct pci_dev * bridge = dev->bus->self;
892
893 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
894 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
895 PCI_SLOT(bridge->devfn), pin);
896 if (irq >= 0)
897 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
898 pci_name(bridge), 'A' + pin, irq);
899 }
900 if (irq >= 0) {
901 if (use_pci_vector() &&
902 !platform_legacy_irq(irq))
903 irq = IO_APIC_VECTOR(irq);
904
905 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
906 pci_name(dev), 'A' + pin, irq);
907 dev->irq = irq;
908 }
909 }
910 }
911#endif
912 /*
913 * Still no IRQ? Try to lookup one...
914 */
915 if (pin && !dev->irq)
916 pcibios_lookup_irq(dev, 0);
917 }
918}
919
920/*
921 * Work around broken HP Pavilion Notebooks which assign USB to
922 * IRQ 9 even though it is actually wired to IRQ 11
923 */
924static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d)
925{
926 if (!broken_hp_bios_irq9) {
927 broken_hp_bios_irq9 = 1;
928 printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
929 }
930 return 0;
931}
932
933/*
934 * Work around broken Acer TravelMate 360 Notebooks which assign
935 * Cardbus to IRQ 11 even though it is actually wired to IRQ 10
936 */
937static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d)
938{
939 if (!acer_tm360_irqrouting) {
940 acer_tm360_irqrouting = 1;
941 printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
942 }
943 return 0;
944}
945
946static struct dmi_system_id __initdata pciirq_dmi_table[] = {
947 {
948 .callback = fix_broken_hp_bios_irq9,
949 .ident = "HP Pavilion N5400 Series Laptop",
950 .matches = {
951 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
952 DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
953 DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
954 DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
955 },
956 },
957 {
958 .callback = fix_acer_tm360_irqrouting,
959 .ident = "Acer TravelMate 36x Laptop",
960 .matches = {
961 DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
962 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
963 },
964 },
965 { }
966};
967
968static int __init pcibios_irq_init(void)
969{
970 DBG("PCI: IRQ init\n");
971
972 if (pcibios_enable_irq || raw_pci_ops == NULL)
973 return 0;
974
975 dmi_check_system(pciirq_dmi_table);
976
977 pirq_table = pirq_find_routing_table();
978
979#ifdef CONFIG_PCI_BIOS
980 if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
981 pirq_table = pcibios_get_irq_routing_table();
982#endif
983 if (pirq_table) {
984 pirq_peer_trick();
985 pirq_find_router(&pirq_router);
986 if (pirq_table->exclusive_irqs) {
987 int i;
988 for (i=0; i<16; i++)
989 if (!(pirq_table->exclusive_irqs & (1 << i)))
990 pirq_penalty[i] += 100;
991 }
992 /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
993 if (io_apic_assign_pci_irqs)
994 pirq_table = NULL;
995 }
996
997 pcibios_enable_irq = pirq_enable_irq;
998
999 pcibios_fixup_irqs();
1000 return 0;
1001}
1002
1003subsys_initcall(pcibios_irq_init);
1004
1005
1006static void pirq_penalize_isa_irq(int irq)
1007{
1008 /*
1009 * If any ISAPnP device reports an IRQ in its list of possible
1010 * IRQ's, we try to avoid assigning it to PCI devices.
1011 */
1012 if (irq < 16)
1013 pirq_penalty[irq] += 100;
1014}
1015
1016void pcibios_penalize_isa_irq(int irq)
1017{
1018#ifdef CONFIG_ACPI_PCI
1019 if (!acpi_noirq)
1020 acpi_penalize_isa_irq(irq);
1021 else
1022#endif
1023 pirq_penalize_isa_irq(irq);
1024}
1025
1026static int pirq_enable_irq(struct pci_dev *dev)
1027{
1028 u8 pin;
1029 extern int via_interrupt_line_quirk;
1030 struct pci_dev *temp_dev;
1031
1032 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
1033 if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
1034 char *msg = "";
1035
1036 pin--; /* interrupt pins are numbered starting from 1 */
1037
1038 if (io_apic_assign_pci_irqs) {
1039 int irq;
1040
1041 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
1042 /*
1043 * Busses behind bridges are typically not listed in the MP-table.
1044 * In this case we have to look up the IRQ based on the parent bus,
1045 * parent slot, and pin number. The SMP code detects such bridged
1046 * busses itself so we should get into this branch reliably.
1047 */
1048 temp_dev = dev;
1049 while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
1050 struct pci_dev * bridge = dev->bus->self;
1051
1052 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
1053 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1054 PCI_SLOT(bridge->devfn), pin);
1055 if (irq >= 0)
1056 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
1057 pci_name(bridge), 'A' + pin, irq);
1058 dev = bridge;
1059 }
1060 dev = temp_dev;
1061 if (irq >= 0) {
1062#ifdef CONFIG_PCI_MSI
1063 if (!platform_legacy_irq(irq))
1064 irq = IO_APIC_VECTOR(irq);
1065#endif
1066 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
1067 pci_name(dev), 'A' + pin, irq);
1068 dev->irq = irq;
1069 return 0;
1070 } else
1071 msg = " Probably buggy MP table.";
1072 } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
1073 msg = "";
1074 else
1075 msg = " Please try using pci=biosirq.";
1076
1077 /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
1078 if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
1079 return 0;
1080
1081 printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
1082 'A' + pin, pci_name(dev), msg);
1083 }
1084 /* VIA bridges use interrupt line for apic/pci steering across
1085 the V-Link */
1086 else if (via_interrupt_line_quirk)
1087 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15);
1088 return 0;
1089}
1090
1091int pci_vector_resources(int last, int nr_released)
1092{
1093 int count = nr_released;
1094
1095 int next = last;
1096 int offset = (last % 8);
1097
1098 while (next < FIRST_SYSTEM_VECTOR) {
1099 next += 8;
1100#ifdef CONFIG_X86_64
1101 if (next == IA32_SYSCALL_VECTOR)
1102 continue;
1103#else
1104 if (next == SYSCALL_VECTOR)
1105 continue;
1106#endif
1107 count++;
1108 if (next >= FIRST_SYSTEM_VECTOR) {
1109 if (offset%8) {
1110 next = FIRST_DEVICE_VECTOR + offset;
1111 offset++;
1112 continue;
1113 }
1114 count--;
1115 }
1116 }
1117
1118 return count;
1119}
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
new file mode 100644
index 000000000000..1492e3753869
--- /dev/null
+++ b/arch/i386/pci/legacy.c
@@ -0,0 +1,54 @@
1/*
2 * legacy.c - traditional, old school PCI bus probing
3 */
4#include <linux/init.h>
5#include <linux/pci.h>
6#include "pci.h"
7
8/*
9 * Discover remaining PCI buses in case there are peer host bridges.
10 * We use the number of last PCI bus provided by the PCI BIOS.
11 */
12static void __devinit pcibios_fixup_peer_bridges(void)
13{
14 int n, devfn;
15
16 if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
17 return;
18 DBG("PCI: Peer bridge fixup\n");
19
20 for (n=0; n <= pcibios_last_bus; n++) {
21 u32 l;
22 if (pci_find_bus(0, n))
23 continue;
24 for (devfn = 0; devfn < 256; devfn += 8) {
25 if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
26 l != 0x0000 && l != 0xffff) {
27 DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
28 printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
29 pci_scan_bus(n, &pci_root_ops, NULL);
30 break;
31 }
32 }
33 }
34}
35
36static int __init pci_legacy_init(void)
37{
38 if (!raw_pci_ops) {
39 printk("PCI: System does not support PCI\n");
40 return 0;
41 }
42
43 if (pcibios_scanned++)
44 return 0;
45
46 printk("PCI: Probing PCI hardware\n");
47 pci_root_bus = pcibios_scan_root(0);
48
49 pcibios_fixup_peer_bridges();
50
51 return 0;
52}
53
54subsys_initcall(pci_legacy_init);
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
new file mode 100644
index 000000000000..021a50aa51f4
--- /dev/null
+++ b/arch/i386/pci/mmconfig.c
@@ -0,0 +1,122 @@
1/*
2 * Copyright (C) 2004 Matthew Wilcox <matthew@wil.cx>
3 * Copyright (C) 2004 Intel Corp.
4 *
5 * This code is released under the GNU General Public License version 2.
6 */
7
8/*
9 * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
10 */
11
12#include <linux/pci.h>
13#include <linux/init.h>
14#include "pci.h"
15
16/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
17u32 pci_mmcfg_base_addr;
18
19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
20
21/* The base address of the last MMCONFIG device accessed */
22static u32 mmcfg_last_accessed_device;
23
24/*
25 * Functions for accessing PCI configuration space with MMCONFIG accesses
26 */
27
28static inline void pci_exp_set_dev_base(int bus, int devfn)
29{
30 u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
31 if (dev_base != mmcfg_last_accessed_device) {
32 mmcfg_last_accessed_device = dev_base;
33 set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
34 }
35}
36
37static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
38 unsigned int devfn, int reg, int len, u32 *value)
39{
40 unsigned long flags;
41
42 if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
43 return -EINVAL;
44
45 spin_lock_irqsave(&pci_config_lock, flags);
46
47 pci_exp_set_dev_base(bus, devfn);
48
49 switch (len) {
50 case 1:
51 *value = readb(mmcfg_virt_addr + reg);
52 break;
53 case 2:
54 *value = readw(mmcfg_virt_addr + reg);
55 break;
56 case 4:
57 *value = readl(mmcfg_virt_addr + reg);
58 break;
59 }
60
61 spin_unlock_irqrestore(&pci_config_lock, flags);
62
63 return 0;
64}
65
66static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
67 unsigned int devfn, int reg, int len, u32 value)
68{
69 unsigned long flags;
70
71 if ((bus > 255) || (devfn > 255) || (reg > 4095))
72 return -EINVAL;
73
74 spin_lock_irqsave(&pci_config_lock, flags);
75
76 pci_exp_set_dev_base(bus, devfn);
77
78 switch (len) {
79 case 1:
80 writeb(value, mmcfg_virt_addr + reg);
81 break;
82 case 2:
83 writew(value, mmcfg_virt_addr + reg);
84 break;
85 case 4:
86 writel(value, mmcfg_virt_addr + reg);
87 break;
88 }
89
90 spin_unlock_irqrestore(&pci_config_lock, flags);
91
92 return 0;
93}
94
95static struct pci_raw_ops pci_mmcfg = {
96 .read = pci_mmcfg_read,
97 .write = pci_mmcfg_write,
98};
99
100static int __init pci_mmcfg_init(void)
101{
102 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
103 goto out;
104 if (!pci_mmcfg_base_addr)
105 goto out;
106
107 /* Kludge for now. Don't use mmconfig on AMD systems because
108 those have some busses where mmconfig doesn't work,
109 and we don't parse ACPI MCFG well enough to handle that.
110 Remove when proper handling is added. */
111 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
112 goto out;
113
114 printk(KERN_INFO "PCI: Using MMCONFIG\n");
115 raw_pci_ops = &pci_mmcfg;
116 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
117
118 out:
119 return 0;
120}
121
122arch_initcall(pci_mmcfg_init);
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
new file mode 100644
index 000000000000..9e3695461899
--- /dev/null
+++ b/arch/i386/pci/numa.c
@@ -0,0 +1,130 @@
1/*
2 * numa.c - Low-level PCI access for NUMA-Q machines
3 */
4
5#include <linux/pci.h>
6#include <linux/init.h>
7#include <linux/nodemask.h>
8#include "pci.h"
9
10#define BUS2QUAD(global) (mp_bus_id_to_node[global])
11#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
12#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
13
14#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
15 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
16
17static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
18 unsigned int devfn, int reg, int len, u32 *value)
19{
20 unsigned long flags;
21
22 if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
23 return -EINVAL;
24
25 spin_lock_irqsave(&pci_config_lock, flags);
26
27 outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
28
29 switch (len) {
30 case 1:
31 *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
32 break;
33 case 2:
34 *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
35 break;
36 case 4:
37 *value = inl_quad(0xCFC, BUS2QUAD(bus));
38 break;
39 }
40
41 spin_unlock_irqrestore(&pci_config_lock, flags);
42
43 return 0;
44}
45
46static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
47 unsigned int devfn, int reg, int len, u32 value)
48{
49 unsigned long flags;
50
51 if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
52 return -EINVAL;
53
54 spin_lock_irqsave(&pci_config_lock, flags);
55
56 outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
57
58 switch (len) {
59 case 1:
60 outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
61 break;
62 case 2:
63 outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
64 break;
65 case 4:
66 outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
67 break;
68 }
69
70 spin_unlock_irqrestore(&pci_config_lock, flags);
71
72 return 0;
73}
74
75#undef PCI_CONF1_MQ_ADDRESS
76
77static struct pci_raw_ops pci_direct_conf1_mq = {
78 .read = pci_conf1_mq_read,
79 .write = pci_conf1_mq_write
80};
81
82
83static void __devinit pci_fixup_i450nx(struct pci_dev *d)
84{
85 /*
86 * i450NX -- Find and scan all secondary buses on all PXB's.
87 */
88 int pxb, reg;
89 u8 busno, suba, subb;
90 int quad = BUS2QUAD(d->bus->number);
91
92 printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
93 reg = 0xd0;
94 for(pxb=0; pxb<2; pxb++) {
95 pci_read_config_byte(d, reg++, &busno);
96 pci_read_config_byte(d, reg++, &suba);
97 pci_read_config_byte(d, reg++, &subb);
98 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
99 if (busno)
100 pci_scan_bus(QUADLOCAL2BUS(quad,busno), &pci_root_ops, NULL); /* Bus A */
101 if (suba < subb)
102 pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), &pci_root_ops, NULL); /* Bus B */
103 }
104 pcibios_last_bus = -1;
105}
106DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
107
108static int __init pci_numa_init(void)
109{
110 int quad;
111
112 raw_pci_ops = &pci_direct_conf1_mq;
113
114 if (pcibios_scanned++)
115 return 0;
116
117 pci_root_bus = pcibios_scan_root(0);
118 if (num_online_nodes() > 1)
119 for_each_online_node(quad) {
120 if (quad == 0)
121 continue;
122 printk("Scanning PCI bus %d for quad %d\n",
123 QUADLOCAL2BUS(quad,0), quad);
124 pci_scan_bus(QUADLOCAL2BUS(quad,0),
125 &pci_root_ops, NULL);
126 }
127 return 0;
128}
129
130subsys_initcall(pci_numa_init);
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
new file mode 100644
index 000000000000..141421b673b0
--- /dev/null
+++ b/arch/i386/pci/pcbios.c
@@ -0,0 +1,487 @@
1/*
2 * BIOS32 and PCI BIOS handling.
3 */
4
5#include <linux/pci.h>
6#include <linux/init.h>
7#include "pci.h"
8#include "pci-functions.h"
9
10
11/* BIOS32 signature: "_32_" */
12#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
13
14/* PCI signature: "PCI " */
15#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
16
17/* PCI service signature: "$PCI" */
18#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
19
20/* PCI BIOS hardware mechanism flags */
21#define PCIBIOS_HW_TYPE1 0x01
22#define PCIBIOS_HW_TYPE2 0x02
23#define PCIBIOS_HW_TYPE1_SPEC 0x10
24#define PCIBIOS_HW_TYPE2_SPEC 0x20
25
26/*
27 * This is the standard structure used to identify the entry point
28 * to the BIOS32 Service Directory, as documented in
29 * Standard BIOS 32-bit Service Directory Proposal
30 * Revision 0.4 May 24, 1993
31 * Phoenix Technologies Ltd.
32 * Norwood, MA
33 * and the PCI BIOS specification.
34 */
35
36union bios32 {
37 struct {
38 unsigned long signature; /* _32_ */
39 unsigned long entry; /* 32 bit physical address */
40 unsigned char revision; /* Revision level, 0 */
41 unsigned char length; /* Length in paragraphs should be 01 */
42 unsigned char checksum; /* All bytes must add up to zero */
43 unsigned char reserved[5]; /* Must be zero */
44 } fields;
45 char chars[16];
46};
47
48/*
49 * Physical address of the service directory. I don't know if we're
50 * allowed to have more than one of these or not, so just in case
51 * we'll make pcibios_present() take a memory start parameter and store
52 * the array there.
53 */
54
55static struct {
56 unsigned long address;
57 unsigned short segment;
58} bios32_indirect = { 0, __KERNEL_CS };
59
60/*
61 * Returns the entry point for the given service, NULL on error
62 */
63
64static unsigned long bios32_service(unsigned long service)
65{
66 unsigned char return_code; /* %al */
67 unsigned long address; /* %ebx */
68 unsigned long length; /* %ecx */
69 unsigned long entry; /* %edx */
70 unsigned long flags;
71
72 local_irq_save(flags);
73 __asm__("lcall *(%%edi); cld"
74 : "=a" (return_code),
75 "=b" (address),
76 "=c" (length),
77 "=d" (entry)
78 : "0" (service),
79 "1" (0),
80 "D" (&bios32_indirect));
81 local_irq_restore(flags);
82
83 switch (return_code) {
84 case 0:
85 return address + entry;
86 case 0x80: /* Not present */
87 printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
88 return 0;
89 default: /* Shouldn't happen */
90 printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
91 service, return_code);
92 return 0;
93 }
94}
95
96static struct {
97 unsigned long address;
98 unsigned short segment;
99} pci_indirect = { 0, __KERNEL_CS };
100
101static int pci_bios_present;
102
103static int __devinit check_pcibios(void)
104{
105 u32 signature, eax, ebx, ecx;
106 u8 status, major_ver, minor_ver, hw_mech;
107 unsigned long flags, pcibios_entry;
108
109 if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
110 pci_indirect.address = pcibios_entry + PAGE_OFFSET;
111
112 local_irq_save(flags);
113 __asm__(
114 "lcall *(%%edi); cld\n\t"
115 "jc 1f\n\t"
116 "xor %%ah, %%ah\n"
117 "1:"
118 : "=d" (signature),
119 "=a" (eax),
120 "=b" (ebx),
121 "=c" (ecx)
122 : "1" (PCIBIOS_PCI_BIOS_PRESENT),
123 "D" (&pci_indirect)
124 : "memory");
125 local_irq_restore(flags);
126
127 status = (eax >> 8) & 0xff;
128 hw_mech = eax & 0xff;
129 major_ver = (ebx >> 8) & 0xff;
130 minor_ver = ebx & 0xff;
131 if (pcibios_last_bus < 0)
132 pcibios_last_bus = ecx & 0xff;
133 DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
134 status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
135 if (status || signature != PCI_SIGNATURE) {
136 printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
137 status, signature);
138 return 0;
139 }
140 printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
141 major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
142#ifdef CONFIG_PCI_DIRECT
143 if (!(hw_mech & PCIBIOS_HW_TYPE1))
144 pci_probe &= ~PCI_PROBE_CONF1;
145 if (!(hw_mech & PCIBIOS_HW_TYPE2))
146 pci_probe &= ~PCI_PROBE_CONF2;
147#endif
148 return 1;
149 }
150 return 0;
151}
152
153static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
154 unsigned short index, unsigned char *bus, unsigned char *device_fn)
155{
156 unsigned short bx;
157 unsigned short ret;
158
159 __asm__("lcall *(%%edi); cld\n\t"
160 "jc 1f\n\t"
161 "xor %%ah, %%ah\n"
162 "1:"
163 : "=b" (bx),
164 "=a" (ret)
165 : "1" (PCIBIOS_FIND_PCI_DEVICE),
166 "c" (device_id),
167 "d" (vendor),
168 "S" ((int) index),
169 "D" (&pci_indirect));
170 *bus = (bx >> 8) & 0xff;
171 *device_fn = bx & 0xff;
172 return (int) (ret & 0xff00) >> 8;
173}
174
175static int pci_bios_read(unsigned int seg, unsigned int bus,
176 unsigned int devfn, int reg, int len, u32 *value)
177{
178 unsigned long result = 0;
179 unsigned long flags;
180 unsigned long bx = (bus << 8) | devfn;
181
182 if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
183 return -EINVAL;
184
185 spin_lock_irqsave(&pci_config_lock, flags);
186
187 switch (len) {
188 case 1:
189 __asm__("lcall *(%%esi); cld\n\t"
190 "jc 1f\n\t"
191 "xor %%ah, %%ah\n"
192 "1:"
193 : "=c" (*value),
194 "=a" (result)
195 : "1" (PCIBIOS_READ_CONFIG_BYTE),
196 "b" (bx),
197 "D" ((long)reg),
198 "S" (&pci_indirect));
199 break;
200 case 2:
201 __asm__("lcall *(%%esi); cld\n\t"
202 "jc 1f\n\t"
203 "xor %%ah, %%ah\n"
204 "1:"
205 : "=c" (*value),
206 "=a" (result)
207 : "1" (PCIBIOS_READ_CONFIG_WORD),
208 "b" (bx),
209 "D" ((long)reg),
210 "S" (&pci_indirect));
211 break;
212 case 4:
213 __asm__("lcall *(%%esi); cld\n\t"
214 "jc 1f\n\t"
215 "xor %%ah, %%ah\n"
216 "1:"
217 : "=c" (*value),
218 "=a" (result)
219 : "1" (PCIBIOS_READ_CONFIG_DWORD),
220 "b" (bx),
221 "D" ((long)reg),
222 "S" (&pci_indirect));
223 break;
224 }
225
226 spin_unlock_irqrestore(&pci_config_lock, flags);
227
228 return (int)((result & 0xff00) >> 8);
229}
230
231static int pci_bios_write(unsigned int seg, unsigned int bus,
232 unsigned int devfn, int reg, int len, u32 value)
233{
234 unsigned long result = 0;
235 unsigned long flags;
236 unsigned long bx = (bus << 8) | devfn;
237
238 if ((bus > 255) || (devfn > 255) || (reg > 255))
239 return -EINVAL;
240
241 spin_lock_irqsave(&pci_config_lock, flags);
242
243 switch (len) {
244 case 1:
245 __asm__("lcall *(%%esi); cld\n\t"
246 "jc 1f\n\t"
247 "xor %%ah, %%ah\n"
248 "1:"
249 : "=a" (result)
250 : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
251 "c" (value),
252 "b" (bx),
253 "D" ((long)reg),
254 "S" (&pci_indirect));
255 break;
256 case 2:
257 __asm__("lcall *(%%esi); cld\n\t"
258 "jc 1f\n\t"
259 "xor %%ah, %%ah\n"
260 "1:"
261 : "=a" (result)
262 : "0" (PCIBIOS_WRITE_CONFIG_WORD),
263 "c" (value),
264 "b" (bx),
265 "D" ((long)reg),
266 "S" (&pci_indirect));
267 break;
268 case 4:
269 __asm__("lcall *(%%esi); cld\n\t"
270 "jc 1f\n\t"
271 "xor %%ah, %%ah\n"
272 "1:"
273 : "=a" (result)
274 : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
275 "c" (value),
276 "b" (bx),
277 "D" ((long)reg),
278 "S" (&pci_indirect));
279 break;
280 }
281
282 spin_unlock_irqrestore(&pci_config_lock, flags);
283
284 return (int)((result & 0xff00) >> 8);
285}
286
287
288/*
289 * Function table for BIOS32 access
290 */
291
292static struct pci_raw_ops pci_bios_access = {
293 .read = pci_bios_read,
294 .write = pci_bios_write
295};
296
297/*
298 * Try to find PCI BIOS.
299 */
300
301static struct pci_raw_ops * __devinit pci_find_bios(void)
302{
303 union bios32 *check;
304 unsigned char sum;
305 int i, length;
306
307 /*
308 * Follow the standard procedure for locating the BIOS32 Service
309 * directory by scanning the permissible address range from
310 * 0xe0000 through 0xfffff for a valid BIOS32 structure.
311 */
312
313 for (check = (union bios32 *) __va(0xe0000);
314 check <= (union bios32 *) __va(0xffff0);
315 ++check) {
316 if (check->fields.signature != BIOS32_SIGNATURE)
317 continue;
318 length = check->fields.length * 16;
319 if (!length)
320 continue;
321 sum = 0;
322 for (i = 0; i < length ; ++i)
323 sum += check->chars[i];
324 if (sum != 0)
325 continue;
326 if (check->fields.revision != 0) {
327 printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
328 check->fields.revision, check);
329 continue;
330 }
331 DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
332 if (check->fields.entry >= 0x100000) {
333 printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
334 return NULL;
335 } else {
336 unsigned long bios32_entry = check->fields.entry;
337 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
338 bios32_indirect.address = bios32_entry + PAGE_OFFSET;
339 if (check_pcibios())
340 return &pci_bios_access;
341 }
342 break; /* Hopefully more than one BIOS32 cannot happen... */
343 }
344
345 return NULL;
346}
347
348/*
349 * Sort the device list according to PCI BIOS. Nasty hack, but since some
350 * fool forgot to define the `correct' device order in the PCI BIOS specs
351 * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
352 * which used BIOS ordering, we are bound to do this...
353 */
354
355void __devinit pcibios_sort(void)
356{
357 LIST_HEAD(sorted_devices);
358 struct list_head *ln;
359 struct pci_dev *dev, *d;
360 int idx, found;
361 unsigned char bus, devfn;
362
363 DBG("PCI: Sorting device list...\n");
364 while (!list_empty(&pci_devices)) {
365 ln = pci_devices.next;
366 dev = pci_dev_g(ln);
367 idx = found = 0;
368 while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
369 idx++;
370 list_for_each(ln, &pci_devices) {
371 d = pci_dev_g(ln);
372 if (d->bus->number == bus && d->devfn == devfn) {
373 list_del(&d->global_list);
374 list_add_tail(&d->global_list, &sorted_devices);
375 if (d == dev)
376 found = 1;
377 break;
378 }
379 }
380 if (ln == &pci_devices) {
381 printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
382 /*
383 * We must not continue scanning as several buggy BIOSes
384 * return garbage after the last device. Grr.
385 */
386 break;
387 }
388 }
389 if (!found) {
390 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
391 pci_name(dev));
392 list_del(&dev->global_list);
393 list_add_tail(&dev->global_list, &sorted_devices);
394 }
395 }
396 list_splice(&sorted_devices, &pci_devices);
397}
398
399/*
400 * BIOS Functions for IRQ Routing
401 */
402
403struct irq_routing_options {
404 u16 size;
405 struct irq_info *table;
406 u16 segment;
407} __attribute__((packed));
408
409struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
410{
411 struct irq_routing_options opt;
412 struct irq_routing_table *rt = NULL;
413 int ret, map;
414 unsigned long page;
415
416 if (!pci_bios_present)
417 return NULL;
418 page = __get_free_page(GFP_KERNEL);
419 if (!page)
420 return NULL;
421 opt.table = (struct irq_info *) page;
422 opt.size = PAGE_SIZE;
423 opt.segment = __KERNEL_DS;
424
425 DBG("PCI: Fetching IRQ routing table... ");
426 __asm__("push %%es\n\t"
427 "push %%ds\n\t"
428 "pop %%es\n\t"
429 "lcall *(%%esi); cld\n\t"
430 "pop %%es\n\t"
431 "jc 1f\n\t"
432 "xor %%ah, %%ah\n"
433 "1:"
434 : "=a" (ret),
435 "=b" (map),
436 "=m" (opt)
437 : "0" (PCIBIOS_GET_ROUTING_OPTIONS),
438 "1" (0),
439 "D" ((long) &opt),
440 "S" (&pci_indirect),
441 "m" (opt)
442 : "memory");
443 DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map);
444 if (ret & 0xff00)
445 printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
446 else if (opt.size) {
447 rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
448 if (rt) {
449 memset(rt, 0, sizeof(struct irq_routing_table));
450 rt->size = opt.size + sizeof(struct irq_routing_table);
451 rt->exclusive_irqs = map;
452 memcpy(rt->slots, (void *) page, opt.size);
453 printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
454 }
455 }
456 free_page(page);
457 return rt;
458}
459
460
461int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
462{
463 int ret;
464
465 __asm__("lcall *(%%esi); cld\n\t"
466 "jc 1f\n\t"
467 "xor %%ah, %%ah\n"
468 "1:"
469 : "=a" (ret)
470 : "0" (PCIBIOS_SET_PCI_HW_INT),
471 "b" ((dev->bus->number << 8) | dev->devfn),
472 "c" ((irq << 8) | (pin + 10)),
473 "S" (&pci_indirect));
474 return !(ret & 0xff00);
475}
476
477static int __init pci_pcbios_init(void)
478{
479 if ((pci_probe & PCI_PROBE_BIOS)
480 && ((raw_pci_ops = pci_find_bios()))) {
481 pci_probe |= PCI_BIOS_SORT;
482 pci_bios_present = 1;
483 }
484 return 0;
485}
486
487arch_initcall(pci_pcbios_init);
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
new file mode 100644
index 000000000000..a8fc80ca69f3
--- /dev/null
+++ b/arch/i386/pci/pci.h
@@ -0,0 +1,74 @@
1/*
2 * Low-Level PCI Access for i386 machines.
3 *
4 * (c) 1999 Martin Mares <mj@ucw.cz>
5 */
6
7#undef DEBUG
8
9#ifdef DEBUG
10#define DBG(x...) printk(x)
11#else
12#define DBG(x...)
13#endif
14
15#define PCI_PROBE_BIOS 0x0001
16#define PCI_PROBE_CONF1 0x0002
17#define PCI_PROBE_CONF2 0x0004
18#define PCI_PROBE_MMCONF 0x0008
19#define PCI_PROBE_MASK 0x000f
20
21#define PCI_NO_SORT 0x0100
22#define PCI_BIOS_SORT 0x0200
23#define PCI_NO_CHECKS 0x0400
24#define PCI_USE_PIRQ_MASK 0x0800
25#define PCI_ASSIGN_ROMS 0x1000
26#define PCI_BIOS_IRQ_SCAN 0x2000
27#define PCI_ASSIGN_ALL_BUSSES 0x4000
28
29extern unsigned int pci_probe;
30
31/* pci-i386.c */
32
33extern unsigned int pcibios_max_latency;
34
35void pcibios_resource_survey(void);
36int pcibios_enable_resources(struct pci_dev *, int);
37
38/* pci-pc.c */
39
40extern int pcibios_last_bus;
41extern struct pci_bus *pci_root_bus;
42extern struct pci_ops pci_root_ops;
43
44/* pci-irq.c */
45
46struct irq_info {
47 u8 bus, devfn; /* Bus, device and function */
48 struct {
49 u8 link; /* IRQ line ID, chipset dependent, 0=not routed */
50 u16 bitmap; /* Available IRQs */
51 } __attribute__((packed)) irq[4];
52 u8 slot; /* Slot number, 0=onboard */
53 u8 rfu;
54} __attribute__((packed));
55
56struct irq_routing_table {
57 u32 signature; /* PIRQ_SIGNATURE should be here */
58 u16 version; /* PIRQ_VERSION */
59 u16 size; /* Table size in bytes */
60 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
61 u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */
62 u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */
63 u32 miniport_data; /* Crap */
64 u8 rfu[11];
65 u8 checksum; /* Modulo 256 checksum must give zero */
66 struct irq_info slots[0];
67} __attribute__((packed));
68
69extern unsigned int pcibios_irq_mask;
70
71extern int pcibios_scanned;
72extern spinlock_t pci_config_lock;
73
74extern int (*pcibios_enable_irq)(struct pci_dev *dev);
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c
new file mode 100644
index 000000000000..6a9248784439
--- /dev/null
+++ b/arch/i386/pci/visws.c
@@ -0,0 +1,110 @@
1/*
2 * Low-Level PCI Support for SGI Visual Workstation
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 */
6
7#include <linux/config.h>
8#include <linux/kernel.h>
9#include <linux/pci.h>
10#include <linux/init.h>
11
12#include "cobalt.h"
13#include "lithium.h"
14
15#include "pci.h"
16
17
18extern struct pci_raw_ops pci_direct_conf1;
19
20static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
21
22int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq;
23
24void __init pcibios_penalize_isa_irq(int irq) {}
25
26
27unsigned int pci_bus0, pci_bus1;
28
29static inline u8 bridge_swizzle(u8 pin, u8 slot)
30{
31 return (((pin - 1) + slot) % 4) + 1;
32}
33
34static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp)
35{
36 u8 pin = *pinp;
37
38 while (dev->bus->self) { /* Move up the chain of bridges. */
39 pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
40 dev = dev->bus->self;
41 }
42 *pinp = pin;
43
44 return PCI_SLOT(dev->devfn);
45}
46
47static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
48{
49 int irq, bus = dev->bus->number;
50
51 pin--;
52
53 /* Nothing useful at PIIX4 pin 1 */
54 if (bus == pci_bus0 && slot == 4 && pin == 0)
55 return -1;
56
57 /* PIIX4 USB is on Bus 0, Slot 4, Line 3 */
58 if (bus == pci_bus0 && slot == 4 && pin == 3) {
59 irq = CO_IRQ(CO_APIC_PIIX4_USB);
60 goto out;
61 }
62
63 /* First pin spread down 1 APIC entry per slot */
64 if (pin == 0) {
65 irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 :
66 CO_APIC_PCIA_BASE0) + slot);
67 goto out;
68 }
69
70 /* lines 1,2,3 from any slot is shared in this twirly pattern */
71 if (bus == pci_bus1) {
72 /* lines 1-3 from devices 0 1 rotate over 2 apic entries */
73 irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2));
74 } else { /* bus == pci_bus0 */
75 /* lines 1-3 from devices 0-3 rotate over 3 apic entries */
76 if (slot == 0)
77 slot = 3; /* same pattern */
78 irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3));
79 }
80out:
81 printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq);
82 return irq;
83}
84
85void __init pcibios_update_irq(struct pci_dev *dev, int irq)
86{
87 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
88}
89
90static int __init pcibios_init(void)
91{
92 /* The VISWS supports configuration access type 1 only */
93 pci_probe = (pci_probe | PCI_PROBE_CONF1) &
94 ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2);
95
96 pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff;
97 pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff;
98
99 printk(KERN_INFO "PCI: Lithium bridge A bus: %u, "
100 "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0);
101
102 raw_pci_ops = &pci_direct_conf1;
103 pci_scan_bus(pci_bus0, &pci_root_ops, NULL);
104 pci_scan_bus(pci_bus1, &pci_root_ops, NULL);
105 pci_fixup_irqs(visws_swizzle, visws_map_irq);
106 pcibios_resource_survey();
107 return 0;
108}
109
110subsys_initcall(pcibios_init);
diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile
new file mode 100644
index 000000000000..8cfa4e8a719d
--- /dev/null
+++ b/arch/i386/power/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_PM) += cpu.o
2obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
new file mode 100644
index 000000000000..cf337c673d92
--- /dev/null
+++ b/arch/i386/power/cpu.c
@@ -0,0 +1,152 @@
1/*
2 * Suspend support specific for i386.
3 *
4 * Distribute under GPLv2
5 *
6 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
7 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
8 */
9
10#include <linux/config.h>
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/spinlock.h>
16#include <linux/poll.h>
17#include <linux/delay.h>
18#include <linux/sysrq.h>
19#include <linux/proc_fs.h>
20#include <linux/irq.h>
21#include <linux/pm.h>
22#include <linux/device.h>
23#include <linux/suspend.h>
24#include <linux/acpi.h>
25#include <asm/uaccess.h>
26#include <asm/acpi.h>
27#include <asm/tlbflush.h>
28
29static struct saved_context saved_context;
30
31unsigned long saved_context_ebx;
32unsigned long saved_context_esp, saved_context_ebp;
33unsigned long saved_context_esi, saved_context_edi;
34unsigned long saved_context_eflags;
35
36extern void enable_sep_cpu(void *);
37
38void __save_processor_state(struct saved_context *ctxt)
39{
40 kernel_fpu_begin();
41
42 /*
43 * descriptor tables
44 */
45 asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
46 asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
47 asm volatile ("sldt %0" : "=m" (ctxt->ldt));
48 asm volatile ("str %0" : "=m" (ctxt->tr));
49
50 /*
51 * segment registers
52 */
53 asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
54 asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
55 asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
56 asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
57
58 /*
59 * control registers
60 */
61 asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0));
62 asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2));
63 asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3));
64 asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4));
65}
66
67void save_processor_state(void)
68{
69 __save_processor_state(&saved_context);
70}
71
72static void
73do_fpu_end(void)
74{
75 /* restore FPU regs if necessary */
76 /* Do it out of line so that gcc does not move cr0 load to some stupid place */
77 kernel_fpu_end();
78 mxcsr_feature_mask_init();
79}
80
81
82static void fix_processor_context(void)
83{
84 int cpu = smp_processor_id();
85 struct tss_struct * t = &per_cpu(init_tss, cpu);
86
87 set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
88 per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff;
89
90 load_TR_desc(); /* This does ltr */
91 load_LDT(&current->active_mm->context); /* This does lldt */
92
93 /*
94 * Now maybe reload the debug registers
95 */
96 if (current->thread.debugreg[7]){
97 loaddebug(&current->thread, 0);
98 loaddebug(&current->thread, 1);
99 loaddebug(&current->thread, 2);
100 loaddebug(&current->thread, 3);
101 /* no 4 and 5 */
102 loaddebug(&current->thread, 6);
103 loaddebug(&current->thread, 7);
104 }
105
106}
107
108void __restore_processor_state(struct saved_context *ctxt)
109{
110
111 /*
112 * control registers
113 */
114 asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4));
115 asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3));
116 asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2));
117 asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0));
118
119 /*
120 * segment registers
121 */
122 asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
123 asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
124 asm volatile ("movw %0, %%gs" :: "r" (ctxt->gs));
125 asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
126
127 /*
128 * now restore the descriptor tables to their proper values
129 * ltr is done i fix_processor_context().
130 */
131 asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
132 asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
133 asm volatile ("lldt %0" :: "m" (ctxt->ldt));
134
135 /*
136 * sysenter MSRs
137 */
138 if (boot_cpu_has(X86_FEATURE_SEP))
139 enable_sep_cpu(NULL);
140
141 fix_processor_context();
142 do_fpu_end();
143}
144
145void restore_processor_state(void)
146{
147 __restore_processor_state(&saved_context);
148}
149
150/* Needed by apm.c */
151EXPORT_SYMBOL(save_processor_state);
152EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
new file mode 100644
index 000000000000..c4105286ff26
--- /dev/null
+++ b/arch/i386/power/swsusp.S
@@ -0,0 +1,73 @@
1.text
2
3/* Originally gcc generated, modified by hand
4 *
5 * This may not use any stack, nor any variable that is not "NoSave":
6 *
7 * Its rewriting one kernel image with another. What is stack in "old"
8 * image could very well be data page in "new" image, and overwriting
9 * your own stack under you is bad idea.
10 */
11
12#include <linux/linkage.h>
13#include <asm/segment.h>
14#include <asm/page.h>
15#include <asm/asm_offsets.h>
16
17 .text
18
19ENTRY(swsusp_arch_suspend)
20
21 movl %esp, saved_context_esp
22 movl %ebx, saved_context_ebx
23 movl %ebp, saved_context_ebp
24 movl %esi, saved_context_esi
25 movl %edi, saved_context_edi
26 pushfl ; popl saved_context_eflags
27
28 call swsusp_save
29 ret
30
31ENTRY(swsusp_arch_resume)
32 movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx
33 movl %ecx, %cr3
34
35 movl pagedir_nosave, %edx
36 .p2align 4,,7
37
38copy_loop:
39 testl %edx, %edx
40 jz done
41
42 movl pbe_address(%edx), %esi
43 movl pbe_orig_address(%edx), %edi
44
45 movl $1024, %ecx
46 rep
47 movsl
48
49 movl pbe_next(%edx), %edx
50 jmp copy_loop
51 .p2align 4,,7
52
53done:
54 /* Flush TLB, including "global" things (vmalloc) */
55 movl mmu_cr4_features, %eax
56 movl %eax, %edx
57 andl $~(1<<7), %edx; # PGE
58 movl %edx, %cr4; # turn off PGE
59 movl %cr3, %ecx; # flush TLB
60 movl %ecx, %cr3
61 movl %eax, %cr4; # turn PGE back on
62
63 movl saved_context_esp, %esp
64 movl saved_context_ebp, %ebp
65 movl saved_context_ebx, %ebx
66 movl saved_context_esi, %esi
67 movl saved_context_edi, %edi
68
69 pushl saved_context_eflags ; popfl
70
71 xorl %eax, %eax
72
73 ret